diff options
| -rw-r--r-- | fs/ext4/balloc.c | 66 | ||||
| -rw-r--r-- | fs/ext4/dir.c | 3 | ||||
| -rw-r--r-- | fs/ext4/ext4.h | 59 | ||||
| -rw-r--r-- | fs/ext4/ext4_extents.h | 22 | ||||
| -rw-r--r-- | fs/ext4/ext4_jbd2.c | 7 | ||||
| -rw-r--r-- | fs/ext4/ext4_jbd2.h | 4 | ||||
| -rw-r--r-- | fs/ext4/extents.c | 236 | ||||
| -rw-r--r-- | fs/ext4/extents_status.c | 10 | ||||
| -rw-r--r-- | fs/ext4/file.c | 153 | ||||
| -rw-r--r-- | fs/ext4/inline.c | 15 | ||||
| -rw-r--r-- | fs/ext4/inode.c | 90 | ||||
| -rw-r--r-- | fs/ext4/mballoc.c | 8 | ||||
| -rw-r--r-- | fs/ext4/migrate.c | 2 | ||||
| -rw-r--r-- | fs/ext4/mmp.c | 4 | ||||
| -rw-r--r-- | fs/ext4/move_extent.c | 39 | ||||
| -rw-r--r-- | fs/ext4/namei.c | 131 | ||||
| -rw-r--r-- | fs/ext4/page-io.c | 32 | ||||
| -rw-r--r-- | fs/ext4/resize.c | 13 | ||||
| -rw-r--r-- | fs/ext4/super.c | 20 | ||||
| -rw-r--r-- | fs/ext4/xattr.c | 9 | ||||
| -rw-r--r-- | include/linux/page-flags.h | 12 | ||||
| -rw-r--r-- | include/trace/events/ext4.h | 9 | ||||
| -rw-r--r-- | mm/page-writeback.c | 11 | 
23 files changed, 515 insertions, 440 deletions
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 5c56785007e0..0762d143e252 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -83,9 +83,9 @@ static inline int ext4_block_in_group(struct super_block *sb,  /* Return the number of clusters used for file system metadata; this   * represents the overhead needed by the file system.   */ -unsigned ext4_num_overhead_clusters(struct super_block *sb, -				    ext4_group_t block_group, -				    struct ext4_group_desc *gdp) +static unsigned ext4_num_overhead_clusters(struct super_block *sb, +					   ext4_group_t block_group, +					   struct ext4_group_desc *gdp)  {  	unsigned num_clusters;  	int block_cluster = -1, inode_cluster = -1, itbl_cluster = -1, i, c; @@ -176,9 +176,10 @@ static unsigned int num_clusters_in_group(struct super_block *sb,  }  /* Initializes an uninitialized block bitmap */ -void ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, -			    ext4_group_t block_group, -			    struct ext4_group_desc *gdp) +static void ext4_init_block_bitmap(struct super_block *sb, +				   struct buffer_head *bh, +				   ext4_group_t block_group, +				   struct ext4_group_desc *gdp)  {  	unsigned int bit, bit_max;  	struct ext4_sb_info *sbi = EXT4_SB(sb); @@ -307,6 +308,7 @@ static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb,  					    ext4_group_t block_group,  					    struct buffer_head *bh)  { +	struct ext4_sb_info *sbi = EXT4_SB(sb);  	ext4_grpblk_t offset;  	ext4_grpblk_t next_zero_bit;  	ext4_fsblk_t blk; @@ -326,14 +328,14 @@ static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb,  	/* check whether block bitmap block number is set */  	blk = ext4_block_bitmap(sb, desc);  	offset = blk - group_first_block; -	if (!ext4_test_bit(offset, bh->b_data)) +	if (!ext4_test_bit(EXT4_B2C(sbi, offset), bh->b_data))  		/* bad block bitmap */  		return blk;  	/* check whether the inode bitmap block number is set */  	blk = ext4_inode_bitmap(sb, desc);  	offset = blk - group_first_block; -	if (!ext4_test_bit(offset, bh->b_data)) +	if (!ext4_test_bit(EXT4_B2C(sbi, offset), bh->b_data))  		/* bad block bitmap */  		return blk; @@ -341,18 +343,19 @@ static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb,  	blk = ext4_inode_table(sb, desc);  	offset = blk - group_first_block;  	next_zero_bit = ext4_find_next_zero_bit(bh->b_data, -				offset + EXT4_SB(sb)->s_itb_per_group, -				offset); -	if (next_zero_bit < offset + EXT4_SB(sb)->s_itb_per_group) +			EXT4_B2C(sbi, offset + EXT4_SB(sb)->s_itb_per_group), +			EXT4_B2C(sbi, offset)); +	if (next_zero_bit < +	    EXT4_B2C(sbi, offset + EXT4_SB(sb)->s_itb_per_group))  		/* bad bitmap for inode tables */  		return blk;  	return 0;  } -void ext4_validate_block_bitmap(struct super_block *sb, -			       struct ext4_group_desc *desc, -			       ext4_group_t block_group, -			       struct buffer_head *bh) +static void ext4_validate_block_bitmap(struct super_block *sb, +				       struct ext4_group_desc *desc, +				       ext4_group_t block_group, +				       struct buffer_head *bh)  {  	ext4_fsblk_t	blk;  	struct ext4_group_info *grp = ext4_get_group_info(sb, block_group); @@ -708,16 +711,6 @@ static inline int test_root(ext4_group_t a, int b)  	}  } -static int ext4_group_sparse(ext4_group_t group) -{ -	if (group <= 1) -		return 1; -	if (!(group & 1)) -		return 0; -	return (test_root(group, 7) || test_root(group, 5) || -		test_root(group, 3)); -} -  /**   *	ext4_bg_has_super - number of blocks used by the superblock in group   *	@sb: superblock for filesystem @@ -728,11 +721,26 @@ static int ext4_group_sparse(ext4_group_t group)   */  int ext4_bg_has_super(struct super_block *sb, ext4_group_t group)  { -	if (EXT4_HAS_RO_COMPAT_FEATURE(sb, -				EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER) && -			!ext4_group_sparse(group)) +	struct ext4_super_block *es = EXT4_SB(sb)->s_es; + +	if (group == 0) +		return 1; +	if (EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_SPARSE_SUPER2)) { +		if (group == le32_to_cpu(es->s_backup_bgs[0]) || +		    group == le32_to_cpu(es->s_backup_bgs[1])) +			return 1;  		return 0; -	return 1; +	} +	if ((group <= 1) || !EXT4_HAS_RO_COMPAT_FEATURE(sb, +					EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER)) +		return 1; +	if (!(group & 1)) +		return 0; +	if (test_root(group, 3) || (test_root(group, 5)) || +	    test_root(group, 7)) +		return 1; + +	return 0;  }  static unsigned long ext4_bg_num_gdb_meta(struct super_block *sb, diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index d638c57e996e..ef1bed66c14f 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -105,7 +105,7 @@ int __ext4_check_dir_entry(const char *function, unsigned int line,  static int ext4_readdir(struct file *file, struct dir_context *ctx)  {  	unsigned int offset; -	int i, stored; +	int i;  	struct ext4_dir_entry_2 *de;  	int err;  	struct inode *inode = file_inode(file); @@ -133,7 +133,6 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)  			return ret;  	} -	stored = 0;  	offset = ctx->pos & (sb->s_blocksize - 1);  	while (ctx->pos < inode->i_size) { diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 66946aa62127..1479e2ae00d2 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -158,7 +158,6 @@ struct ext4_allocation_request {  #define EXT4_MAP_MAPPED		(1 << BH_Mapped)  #define EXT4_MAP_UNWRITTEN	(1 << BH_Unwritten)  #define EXT4_MAP_BOUNDARY	(1 << BH_Boundary) -#define EXT4_MAP_UNINIT		(1 << BH_Uninit)  /* Sometimes (in the bigalloc case, from ext4_da_get_block_prep) the caller of   * ext4_map_blocks wants to know whether or not the underlying cluster has   * already been accounted for. EXT4_MAP_FROM_CLUSTER conveys to the caller that @@ -169,7 +168,7 @@ struct ext4_allocation_request {  #define EXT4_MAP_FROM_CLUSTER	(1 << BH_AllocFromCluster)  #define EXT4_MAP_FLAGS		(EXT4_MAP_NEW | EXT4_MAP_MAPPED |\  				 EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY |\ -				 EXT4_MAP_UNINIT | EXT4_MAP_FROM_CLUSTER) +				 EXT4_MAP_FROM_CLUSTER)  struct ext4_map_blocks {  	ext4_fsblk_t m_pblk; @@ -184,7 +183,7 @@ struct ext4_map_blocks {  #define	EXT4_IO_END_UNWRITTEN	0x0001  /* - * For converting uninitialized extents on a work queue. 'handle' is used for + * For converting unwritten extents on a work queue. 'handle' is used for   * buffered writeback.   */  typedef struct ext4_io_end { @@ -537,26 +536,26 @@ enum {  /*   * Flags used by ext4_map_blocks()   */ -	/* Allocate any needed blocks and/or convert an unitialized +	/* Allocate any needed blocks and/or convert an unwritten  	   extent to be an initialized ext4 */  #define EXT4_GET_BLOCKS_CREATE			0x0001 -	/* Request the creation of an unitialized extent */ -#define EXT4_GET_BLOCKS_UNINIT_EXT		0x0002 -#define EXT4_GET_BLOCKS_CREATE_UNINIT_EXT	(EXT4_GET_BLOCKS_UNINIT_EXT|\ +	/* Request the creation of an unwritten extent */ +#define EXT4_GET_BLOCKS_UNWRIT_EXT		0x0002 +#define EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT	(EXT4_GET_BLOCKS_UNWRIT_EXT|\  						 EXT4_GET_BLOCKS_CREATE)  	/* Caller is from the delayed allocation writeout path  	 * finally doing the actual allocation of delayed blocks */  #define EXT4_GET_BLOCKS_DELALLOC_RESERVE	0x0004  	/* caller is from the direct IO path, request to creation of an -	unitialized extents if not allocated, split the uninitialized +	unwritten extents if not allocated, split the unwritten  	extent if blocks has been preallocated already*/  #define EXT4_GET_BLOCKS_PRE_IO			0x0008  #define EXT4_GET_BLOCKS_CONVERT			0x0010  #define EXT4_GET_BLOCKS_IO_CREATE_EXT		(EXT4_GET_BLOCKS_PRE_IO|\ -					 EXT4_GET_BLOCKS_CREATE_UNINIT_EXT) +					 EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT)  	/* Convert extent to initialized after IO complete */  #define EXT4_GET_BLOCKS_IO_CONVERT_EXT		(EXT4_GET_BLOCKS_CONVERT|\ -					 EXT4_GET_BLOCKS_CREATE_UNINIT_EXT) +					 EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT)  	/* Eventual metadata allocation (due to growing extent tree)  	 * should not fail, so try to use reserved blocks for that.*/  #define EXT4_GET_BLOCKS_METADATA_NOFAIL		0x0020 @@ -876,6 +875,8 @@ struct ext4_inode_info {  	struct inode vfs_inode;  	struct jbd2_inode *jinode; +	spinlock_t i_raw_lock;	/* protects updates to the raw inode */ +  	/*  	 * File creation time. Its function is same as that of  	 * struct timespec i_{a,c,m}time in the generic inode. @@ -1159,7 +1160,8 @@ struct ext4_super_block {  	__le32	s_usr_quota_inum;	/* inode for tracking user quota */  	__le32	s_grp_quota_inum;	/* inode for tracking group quota */  	__le32	s_overhead_clusters;	/* overhead blocks/clusters in fs */ -	__le32	s_reserved[108];	/* Padding to the end of the block */ +	__le32	s_backup_bgs[2];	/* groups with sparse_super2 SBs */ +	__le32	s_reserved[106];	/* Padding to the end of the block */  	__le32	s_checksum;		/* crc32c(superblock) */  }; @@ -1505,6 +1507,7 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)  #define EXT4_FEATURE_COMPAT_EXT_ATTR		0x0008  #define EXT4_FEATURE_COMPAT_RESIZE_INODE	0x0010  #define EXT4_FEATURE_COMPAT_DIR_INDEX		0x0020 +#define EXT4_FEATURE_COMPAT_SPARSE_SUPER2	0x0200  #define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER	0x0001  #define EXT4_FEATURE_RO_COMPAT_LARGE_FILE	0x0002 @@ -1953,10 +1956,6 @@ extern void ext4_get_group_no_and_offset(struct super_block *sb,  extern ext4_group_t ext4_get_group_number(struct super_block *sb,  					  ext4_fsblk_t block); -extern void ext4_validate_block_bitmap(struct super_block *sb, -				       struct ext4_group_desc *desc, -				       ext4_group_t block_group, -				       struct buffer_head *bh);  extern unsigned int ext4_block_group(struct super_block *sb,  			ext4_fsblk_t blocknr);  extern ext4_grpblk_t ext4_block_group_offset(struct super_block *sb, @@ -1985,16 +1984,9 @@ extern int ext4_wait_block_bitmap(struct super_block *sb,  				  struct buffer_head *bh);  extern struct buffer_head *ext4_read_block_bitmap(struct super_block *sb,  						  ext4_group_t block_group); -extern void ext4_init_block_bitmap(struct super_block *sb, -				   struct buffer_head *bh, -				   ext4_group_t group, -				   struct ext4_group_desc *desc);  extern unsigned ext4_free_clusters_after_init(struct super_block *sb,  					      ext4_group_t block_group,  					      struct ext4_group_desc *gdp); -extern unsigned ext4_num_overhead_clusters(struct super_block *sb, -					   ext4_group_t block_group, -					   struct ext4_group_desc *gdp);  ext4_fsblk_t ext4_inode_to_goal_block(struct inode *);  /* dir.c */ @@ -2137,8 +2129,6 @@ extern int ext4_alloc_da_blocks(struct inode *inode);  extern void ext4_set_aops(struct inode *inode);  extern int ext4_writepage_trans_blocks(struct inode *);  extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); -extern int ext4_block_truncate_page(handle_t *handle, -		struct address_space *mapping, loff_t from);  extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,  			     loff_t lstart, loff_t lend);  extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); @@ -2198,8 +2188,6 @@ extern int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count);  /* super.c */  extern int ext4_calculate_overhead(struct super_block *sb); -extern int ext4_superblock_csum_verify(struct super_block *sb, -				       struct ext4_super_block *es);  extern void ext4_superblock_csum_set(struct super_block *sb);  extern void *ext4_kvmalloc(size_t size, gfp_t flags);  extern void *ext4_kvzalloc(size_t size, gfp_t flags); @@ -2571,19 +2559,11 @@ extern const struct file_operations ext4_dir_operations;  extern const struct inode_operations ext4_file_inode_operations;  extern const struct file_operations ext4_file_operations;  extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin); -extern void ext4_unwritten_wait(struct inode *inode);  /* inline.c */  extern int ext4_has_inline_data(struct inode *inode); -extern int ext4_get_inline_size(struct inode *inode);  extern int ext4_get_max_inline_size(struct inode *inode);  extern int ext4_find_inline_data_nolock(struct inode *inode); -extern void ext4_write_inline_data(struct inode *inode, -				   struct ext4_iloc *iloc, -				   void *buffer, loff_t pos, -				   unsigned int len); -extern int ext4_prepare_inline_data(handle_t *handle, struct inode *inode, -				    unsigned int len);  extern int ext4_init_inline_data(handle_t *handle, struct inode *inode,  				 unsigned int len);  extern int ext4_destroy_inline_data(handle_t *handle, struct inode *inode); @@ -2771,23 +2751,20 @@ extern void ext4_io_submit(struct ext4_io_submit *io);  extern int ext4_bio_write_page(struct ext4_io_submit *io,  			       struct page *page,  			       int len, -			       struct writeback_control *wbc); +			       struct writeback_control *wbc, +			       bool keep_towrite);  /* mmp.c */  extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t); -extern void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp); -extern int ext4_mmp_csum_verify(struct super_block *sb, -				struct mmp_struct *mmp);  /*   * Note that these flags will never ever appear in a buffer_head's state flag.   * See EXT4_MAP_... to see where this is used.   */  enum ext4_state_bits { -	BH_Uninit	/* blocks are allocated but uninitialized on disk */ -	 = BH_JBDPrivateStart, -	BH_AllocFromCluster,	/* allocated blocks were part of already +	BH_AllocFromCluster	/* allocated blocks were part of already  				 * allocated cluster. */ +	= BH_JBDPrivateStart  };  /* diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h index 5074fe23f19e..a867f5ca9991 100644 --- a/fs/ext4/ext4_extents.h +++ b/fs/ext4/ext4_extents.h @@ -137,21 +137,21 @@ struct ext4_ext_path {   * EXT_INIT_MAX_LEN is the maximum number of blocks we can have in an   * initialized extent. This is 2^15 and not (2^16 - 1), since we use the   * MSB of ee_len field in the extent datastructure to signify if this - * particular extent is an initialized extent or an uninitialized (i.e. + * particular extent is an initialized extent or an unwritten (i.e.   * preallocated). - * EXT_UNINIT_MAX_LEN is the maximum number of blocks we can have in an - * uninitialized extent. + * EXT_UNWRITTEN_MAX_LEN is the maximum number of blocks we can have in an + * unwritten extent.   * If ee_len is <= 0x8000, it is an initialized extent. Otherwise, it is an - * uninitialized one. In other words, if MSB of ee_len is set, it is an - * uninitialized extent with only one special scenario when ee_len = 0x8000. - * In this case we can not have an uninitialized extent of zero length and + * unwritten one. In other words, if MSB of ee_len is set, it is an + * unwritten extent with only one special scenario when ee_len = 0x8000. + * In this case we can not have an unwritten extent of zero length and   * thus we make it as a special case of initialized extent with 0x8000 length.   * This way we get better extent-to-group alignment for initialized extents.   * Hence, the maximum number of blocks we can have in an *initialized* - * extent is 2^15 (32768) and in an *uninitialized* extent is 2^15-1 (32767). + * extent is 2^15 (32768) and in an *unwritten* extent is 2^15-1 (32767).   */  #define EXT_INIT_MAX_LEN	(1UL << 15) -#define EXT_UNINIT_MAX_LEN	(EXT_INIT_MAX_LEN - 1) +#define EXT_UNWRITTEN_MAX_LEN	(EXT_INIT_MAX_LEN - 1)  #define EXT_FIRST_EXTENT(__hdr__) \ @@ -187,14 +187,14 @@ static inline unsigned short ext_depth(struct inode *inode)  	return le16_to_cpu(ext_inode_hdr(inode)->eh_depth);  } -static inline void ext4_ext_mark_uninitialized(struct ext4_extent *ext) +static inline void ext4_ext_mark_unwritten(struct ext4_extent *ext)  { -	/* We can not have an uninitialized extent of zero length! */ +	/* We can not have an unwritten extent of zero length! */  	BUG_ON((le16_to_cpu(ext->ee_len) & ~EXT_INIT_MAX_LEN) == 0);  	ext->ee_len |= cpu_to_le16(EXT_INIT_MAX_LEN);  } -static inline int ext4_ext_is_uninitialized(struct ext4_extent *ext) +static inline int ext4_ext_is_unwritten(struct ext4_extent *ext)  {  	/* Extent with ee_len of 0x8000 is treated as an initialized extent */  	return (le16_to_cpu(ext->ee_len) > EXT_INIT_MAX_LEN); diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index c3fb607413ed..0074e0d23d6e 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -122,9 +122,10 @@ handle_t *__ext4_journal_start_reserved(handle_t *handle, unsigned int line,  	return handle;  } -void ext4_journal_abort_handle(const char *caller, unsigned int line, -			       const char *err_fn, struct buffer_head *bh, -			       handle_t *handle, int err) +static void ext4_journal_abort_handle(const char *caller, unsigned int line, +				      const char *err_fn, +				      struct buffer_head *bh, +				      handle_t *handle, int err)  {  	char nbuf[16];  	const char *errstr = ext4_decode_error(NULL, err, nbuf); diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index 81cfefa9dc0c..17c00ff202f2 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -231,10 +231,6 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode);  /*   * Wrapper functions with which ext4 calls into JBD.   */ -void ext4_journal_abort_handle(const char *caller, unsigned int line, -			       const char *err_fn, -		struct buffer_head *bh, handle_t *handle, int err); -  int __ext4_journal_get_write_access(const char *where, unsigned int line,  				    handle_t *handle, struct buffer_head *bh); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 01b0c208f625..4da228a0e6d0 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -50,8 +50,8 @@   */  #define EXT4_EXT_MAY_ZEROOUT	0x1  /* safe to zeroout if split fails \  					due to ENOSPC */ -#define EXT4_EXT_MARK_UNINIT1	0x2  /* mark first half uninitialized */ -#define EXT4_EXT_MARK_UNINIT2	0x4  /* mark second half uninitialized */ +#define EXT4_EXT_MARK_UNWRIT1	0x2  /* mark first half unwritten */ +#define EXT4_EXT_MARK_UNWRIT2	0x4  /* mark second half unwritten */  #define EXT4_EXT_DATA_VALID1	0x8  /* first half contains valid data */  #define EXT4_EXT_DATA_VALID2	0x10 /* second half contains valid data */ @@ -143,6 +143,7 @@ static int ext4_ext_get_access(handle_t *handle, struct inode *inode,  {  	if (path->p_bh) {  		/* path points to block */ +		BUFFER_TRACE(path->p_bh, "get_write_access");  		return ext4_journal_get_write_access(handle, path->p_bh);  	}  	/* path points to leaf/index in inode body */ @@ -524,7 +525,7 @@ __read_extent_tree_block(const char *function, unsigned int line,  						     lblk - prev, ~0,  						     EXTENT_STATUS_HOLE); -			if (ext4_ext_is_uninitialized(ex)) +			if (ext4_ext_is_unwritten(ex))  				status = EXTENT_STATUS_UNWRITTEN;  			ext4_es_cache_extent(inode, lblk, len,  					     ext4_ext_pblock(ex), status); @@ -620,7 +621,7 @@ static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)  		} else if (path->p_ext) {  			ext_debug("  %d:[%d]%d:%llu ",  				  le32_to_cpu(path->p_ext->ee_block), -				  ext4_ext_is_uninitialized(path->p_ext), +				  ext4_ext_is_unwritten(path->p_ext),  				  ext4_ext_get_actual_len(path->p_ext),  				  ext4_ext_pblock(path->p_ext));  		} else @@ -646,7 +647,7 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path)  	for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) {  		ext_debug("%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block), -			  ext4_ext_is_uninitialized(ex), +			  ext4_ext_is_unwritten(ex),  			  ext4_ext_get_actual_len(ex), ext4_ext_pblock(ex));  	}  	ext_debug("\n"); @@ -677,7 +678,7 @@ static void ext4_ext_show_move(struct inode *inode, struct ext4_ext_path *path,  		ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n",  				le32_to_cpu(ex->ee_block),  				ext4_ext_pblock(ex), -				ext4_ext_is_uninitialized(ex), +				ext4_ext_is_unwritten(ex),  				ext4_ext_get_actual_len(ex),  				newblock);  		ex++; @@ -802,7 +803,7 @@ ext4_ext_binsearch(struct inode *inode,  	ext_debug("  -> %d:%llu:[%d]%d ",  			le32_to_cpu(path->p_ext->ee_block),  			ext4_ext_pblock(path->p_ext), -			ext4_ext_is_uninitialized(path->p_ext), +			ext4_ext_is_unwritten(path->p_ext),  			ext4_ext_get_actual_len(path->p_ext));  #ifdef CHECK_BINSEARCH @@ -1686,11 +1687,11 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,  	/*  	 * Make sure that both extents are initialized. We don't merge -	 * uninitialized extents so that we can be sure that end_io code has +	 * unwritten extents so that we can be sure that end_io code has  	 * the extent that was written properly split out and conversion to  	 * initialized is trivial.  	 */ -	if (ext4_ext_is_uninitialized(ex1) != ext4_ext_is_uninitialized(ex2)) +	if (ext4_ext_is_unwritten(ex1) != ext4_ext_is_unwritten(ex2))  		return 0;  	ext1_ee_len = ext4_ext_get_actual_len(ex1); @@ -1707,10 +1708,10 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,  	 */  	if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN)  		return 0; -	if (ext4_ext_is_uninitialized(ex1) && +	if (ext4_ext_is_unwritten(ex1) &&  	    (ext4_test_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN) ||  	     atomic_read(&EXT4_I(inode)->i_unwritten) || -	     (ext1_ee_len + ext2_ee_len > EXT_UNINIT_MAX_LEN))) +	     (ext1_ee_len + ext2_ee_len > EXT_UNWRITTEN_MAX_LEN)))  		return 0;  #ifdef AGGRESSIVE_TEST  	if (ext1_ee_len >= 4) @@ -1735,7 +1736,7 @@ static int ext4_ext_try_to_merge_right(struct inode *inode,  {  	struct ext4_extent_header *eh;  	unsigned int depth, len; -	int merge_done = 0, uninit; +	int merge_done = 0, unwritten;  	depth = ext_depth(inode);  	BUG_ON(path[depth].p_hdr == NULL); @@ -1745,11 +1746,11 @@ static int ext4_ext_try_to_merge_right(struct inode *inode,  		if (!ext4_can_extents_be_merged(inode, ex, ex + 1))  			break;  		/* merge with next extent! */ -		uninit = ext4_ext_is_uninitialized(ex); +		unwritten = ext4_ext_is_unwritten(ex);  		ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)  				+ ext4_ext_get_actual_len(ex + 1)); -		if (uninit) -			ext4_ext_mark_uninitialized(ex); +		if (unwritten) +			ext4_ext_mark_unwritten(ex);  		if (ex + 1 < EXT_LAST_EXTENT(eh)) {  			len = (EXT_LAST_EXTENT(eh) - ex - 1) @@ -1903,7 +1904,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,  	struct ext4_ext_path *npath = NULL;  	int depth, len, err;  	ext4_lblk_t next; -	int mb_flags = 0, uninit; +	int mb_flags = 0, unwritten;  	if (unlikely(ext4_ext_get_actual_len(newext) == 0)) {  		EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0"); @@ -1943,21 +1944,21 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,  		if (ext4_can_extents_be_merged(inode, ex, newext)) {  			ext_debug("append [%d]%d block to %u:[%d]%d"  				  "(from %llu)\n", -				  ext4_ext_is_uninitialized(newext), +				  ext4_ext_is_unwritten(newext),  				  ext4_ext_get_actual_len(newext),  				  le32_to_cpu(ex->ee_block), -				  ext4_ext_is_uninitialized(ex), +				  ext4_ext_is_unwritten(ex),  				  ext4_ext_get_actual_len(ex),  				  ext4_ext_pblock(ex));  			err = ext4_ext_get_access(handle, inode,  						  path + depth);  			if (err)  				return err; -			uninit = ext4_ext_is_uninitialized(ex); +			unwritten = ext4_ext_is_unwritten(ex);  			ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)  					+ ext4_ext_get_actual_len(newext)); -			if (uninit) -				ext4_ext_mark_uninitialized(ex); +			if (unwritten) +				ext4_ext_mark_unwritten(ex);  			eh = path[depth].p_hdr;  			nearex = ex;  			goto merge; @@ -1969,10 +1970,10 @@ prepend:  			ext_debug("prepend %u[%d]%d block to %u:[%d]%d"  				  "(from %llu)\n",  				  le32_to_cpu(newext->ee_block), -				  ext4_ext_is_uninitialized(newext), +				  ext4_ext_is_unwritten(newext),  				  ext4_ext_get_actual_len(newext),  				  le32_to_cpu(ex->ee_block), -				  ext4_ext_is_uninitialized(ex), +				  ext4_ext_is_unwritten(ex),  				  ext4_ext_get_actual_len(ex),  				  ext4_ext_pblock(ex));  			err = ext4_ext_get_access(handle, inode, @@ -1980,13 +1981,13 @@ prepend:  			if (err)  				return err; -			uninit = ext4_ext_is_uninitialized(ex); +			unwritten = ext4_ext_is_unwritten(ex);  			ex->ee_block = newext->ee_block;  			ext4_ext_store_pblock(ex, ext4_ext_pblock(newext));  			ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)  					+ ext4_ext_get_actual_len(newext)); -			if (uninit) -				ext4_ext_mark_uninitialized(ex); +			if (unwritten) +				ext4_ext_mark_unwritten(ex);  			eh = path[depth].p_hdr;  			nearex = ex;  			goto merge; @@ -2046,7 +2047,7 @@ has_space:  		ext_debug("first extent in the leaf: %u:%llu:[%d]%d\n",  				le32_to_cpu(newext->ee_block),  				ext4_ext_pblock(newext), -				ext4_ext_is_uninitialized(newext), +				ext4_ext_is_unwritten(newext),  				ext4_ext_get_actual_len(newext));  		nearex = EXT_FIRST_EXTENT(eh);  	} else { @@ -2057,7 +2058,7 @@ has_space:  					"nearest %p\n",  					le32_to_cpu(newext->ee_block),  					ext4_ext_pblock(newext), -					ext4_ext_is_uninitialized(newext), +					ext4_ext_is_unwritten(newext),  					ext4_ext_get_actual_len(newext),  					nearex);  			nearex++; @@ -2068,7 +2069,7 @@ has_space:  					"nearest %p\n",  					le32_to_cpu(newext->ee_block),  					ext4_ext_pblock(newext), -					ext4_ext_is_uninitialized(newext), +					ext4_ext_is_unwritten(newext),  					ext4_ext_get_actual_len(newext),  					nearex);  		} @@ -2078,7 +2079,7 @@ has_space:  					"move %d extents from 0x%p to 0x%p\n",  					le32_to_cpu(newext->ee_block),  					ext4_ext_pblock(newext), -					ext4_ext_is_uninitialized(newext), +					ext4_ext_is_unwritten(newext),  					ext4_ext_get_actual_len(newext),  					len, nearex, nearex + 1);  			memmove(nearex + 1, nearex, @@ -2200,7 +2201,7 @@ static int ext4_fill_fiemap_extents(struct inode *inode,  			es.es_lblk = le32_to_cpu(ex->ee_block);  			es.es_len = ext4_ext_get_actual_len(ex);  			es.es_pblk = ext4_ext_pblock(ex); -			if (ext4_ext_is_uninitialized(ex)) +			if (ext4_ext_is_unwritten(ex))  				flags |= FIEMAP_EXTENT_UNWRITTEN;  		} @@ -2576,7 +2577,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,  	unsigned num;  	ext4_lblk_t ex_ee_block;  	unsigned short ex_ee_len; -	unsigned uninitialized = 0; +	unsigned unwritten = 0;  	struct ext4_extent *ex;  	ext4_fsblk_t pblk; @@ -2623,13 +2624,13 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,  	while (ex >= EXT_FIRST_EXTENT(eh) &&  			ex_ee_block + ex_ee_len > start) { -		if (ext4_ext_is_uninitialized(ex)) -			uninitialized = 1; +		if (ext4_ext_is_unwritten(ex)) +			unwritten = 1;  		else -			uninitialized = 0; +			unwritten = 0;  		ext_debug("remove ext %u:[%d]%d\n", ex_ee_block, -			 uninitialized, ex_ee_len); +			  unwritten, ex_ee_len);  		path[depth].p_ext = ex;  		a = ex_ee_block > start ? ex_ee_block : start; @@ -2701,11 +2702,11 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,  		ex->ee_len = cpu_to_le16(num);  		/* -		 * Do not mark uninitialized if all the blocks in the +		 * Do not mark unwritten if all the blocks in the  		 * extent have been removed.  		 */ -		if (uninitialized && num) -			ext4_ext_mark_uninitialized(ex); +		if (unwritten && num) +			ext4_ext_mark_unwritten(ex);  		/*  		 * If the extent was completely released,  		 * we need to remove it from the leaf @@ -2854,9 +2855,9 @@ again:  		    end < ee_block + ext4_ext_get_actual_len(ex) - 1) {  			int split_flag = 0; -			if (ext4_ext_is_uninitialized(ex)) -				split_flag = EXT4_EXT_MARK_UNINIT1 | -					     EXT4_EXT_MARK_UNINIT2; +			if (ext4_ext_is_unwritten(ex)) +				split_flag = EXT4_EXT_MARK_UNWRIT1 | +					     EXT4_EXT_MARK_UNWRIT2;  			/*  			 * Split the extent in two so that 'end' is the last @@ -3113,7 +3114,7 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)   * @path: the path to the extent   * @split: the logical block where the extent is splitted.   * @split_flags: indicates if the extent could be zeroout if split fails, and - *		 the states(init or uninit) of new extents. + *		 the states(init or unwritten) of new extents.   * @flags: flags used to insert new extent to extent tree.   *   * @@ -3155,10 +3156,10 @@ static int ext4_split_extent_at(handle_t *handle,  	newblock = split - ee_block + ext4_ext_pblock(ex);  	BUG_ON(split < ee_block || split >= (ee_block + ee_len)); -	BUG_ON(!ext4_ext_is_uninitialized(ex) && +	BUG_ON(!ext4_ext_is_unwritten(ex) &&  	       split_flag & (EXT4_EXT_MAY_ZEROOUT | -			     EXT4_EXT_MARK_UNINIT1 | -			     EXT4_EXT_MARK_UNINIT2)); +			     EXT4_EXT_MARK_UNWRIT1 | +			     EXT4_EXT_MARK_UNWRIT2));  	err = ext4_ext_get_access(handle, inode, path + depth);  	if (err) @@ -3170,8 +3171,8 @@ static int ext4_split_extent_at(handle_t *handle,  		 * then we just change the state of the extent, and splitting  		 * is not needed.  		 */ -		if (split_flag & EXT4_EXT_MARK_UNINIT2) -			ext4_ext_mark_uninitialized(ex); +		if (split_flag & EXT4_EXT_MARK_UNWRIT2) +			ext4_ext_mark_unwritten(ex);  		else  			ext4_ext_mark_initialized(ex); @@ -3185,8 +3186,8 @@ static int ext4_split_extent_at(handle_t *handle,  	/* case a */  	memcpy(&orig_ex, ex, sizeof(orig_ex));  	ex->ee_len = cpu_to_le16(split - ee_block); -	if (split_flag & EXT4_EXT_MARK_UNINIT1) -		ext4_ext_mark_uninitialized(ex); +	if (split_flag & EXT4_EXT_MARK_UNWRIT1) +		ext4_ext_mark_unwritten(ex);  	/*  	 * path may lead to new leaf, not to original leaf any more @@ -3200,8 +3201,8 @@ static int ext4_split_extent_at(handle_t *handle,  	ex2->ee_block = cpu_to_le32(split);  	ex2->ee_len   = cpu_to_le16(ee_len - (split - ee_block));  	ext4_ext_store_pblock(ex2, newblock); -	if (split_flag & EXT4_EXT_MARK_UNINIT2) -		ext4_ext_mark_uninitialized(ex2); +	if (split_flag & EXT4_EXT_MARK_UNWRIT2) +		ext4_ext_mark_unwritten(ex2);  	err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);  	if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) { @@ -3278,7 +3279,7 @@ static int ext4_split_extent(handle_t *handle,  	struct ext4_extent *ex;  	unsigned int ee_len, depth;  	int err = 0; -	int uninitialized; +	int unwritten;  	int split_flag1, flags1;  	int allocated = map->m_len; @@ -3286,14 +3287,14 @@ static int ext4_split_extent(handle_t *handle,  	ex = path[depth].p_ext;  	ee_block = le32_to_cpu(ex->ee_block);  	ee_len = ext4_ext_get_actual_len(ex); -	uninitialized = ext4_ext_is_uninitialized(ex); +	unwritten = ext4_ext_is_unwritten(ex);  	if (map->m_lblk + map->m_len < ee_block + ee_len) {  		split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT;  		flags1 = flags | EXT4_GET_BLOCKS_PRE_IO; -		if (uninitialized) -			split_flag1 |= EXT4_EXT_MARK_UNINIT1 | -				       EXT4_EXT_MARK_UNINIT2; +		if (unwritten) +			split_flag1 |= EXT4_EXT_MARK_UNWRIT1 | +				       EXT4_EXT_MARK_UNWRIT2;  		if (split_flag & EXT4_EXT_DATA_VALID2)  			split_flag1 |= EXT4_EXT_DATA_VALID1;  		err = ext4_split_extent_at(handle, inode, path, @@ -3318,15 +3319,15 @@ static int ext4_split_extent(handle_t *handle,  				 (unsigned long) map->m_lblk);  		return -EIO;  	} -	uninitialized = ext4_ext_is_uninitialized(ex); +	unwritten = ext4_ext_is_unwritten(ex);  	split_flag1 = 0;  	if (map->m_lblk >= ee_block) {  		split_flag1 = split_flag & EXT4_EXT_DATA_VALID2; -		if (uninitialized) { -			split_flag1 |= EXT4_EXT_MARK_UNINIT1; +		if (unwritten) { +			split_flag1 |= EXT4_EXT_MARK_UNWRIT1;  			split_flag1 |= split_flag & (EXT4_EXT_MAY_ZEROOUT | -						     EXT4_EXT_MARK_UNINIT2); +						     EXT4_EXT_MARK_UNWRIT2);  		}  		err = ext4_split_extent_at(handle, inode, path,  				map->m_lblk, split_flag1, flags); @@ -3341,16 +3342,16 @@ out:  /*   * This function is called by ext4_ext_map_blocks() if someone tries to write - * to an uninitialized extent. It may result in splitting the uninitialized + * to an unwritten extent. It may result in splitting the unwritten   * extent into multiple extents (up to three - one initialized and two - * uninitialized). + * unwritten).   * There are three possibilities:   *   a> There is no split required: Entire extent should be initialized   *   b> Splits in two extents: Write is happening at either end of the extent   *   c> Splits in three extents: Somone is writing in middle of the extent   *   * Pre-conditions: - *  - The extent pointed to by 'path' is uninitialized. + *  - The extent pointed to by 'path' is unwritten.   *  - The extent pointed to by 'path' contains a superset   *    of the logical span [map->m_lblk, map->m_lblk + map->m_len).   * @@ -3396,12 +3397,12 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,  	trace_ext4_ext_convert_to_initialized_enter(inode, map, ex);  	/* Pre-conditions */ -	BUG_ON(!ext4_ext_is_uninitialized(ex)); +	BUG_ON(!ext4_ext_is_unwritten(ex));  	BUG_ON(!in_range(map->m_lblk, ee_block, ee_len));  	/*  	 * Attempt to transfer newly initialized blocks from the currently -	 * uninitialized extent to its neighbor. This is much cheaper +	 * unwritten extent to its neighbor. This is much cheaper  	 * than an insertion followed by a merge as those involve costly  	 * memmove() calls. Transferring to the left is the common case in  	 * steady state for workloads doing fallocate(FALLOC_FL_KEEP_SIZE) @@ -3437,7 +3438,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,  		 * - C4: abut_ex can receive the additional blocks without  		 *   overflowing the (initialized) length limit.  		 */ -		if ((!ext4_ext_is_uninitialized(abut_ex)) &&		/*C1*/ +		if ((!ext4_ext_is_unwritten(abut_ex)) &&		/*C1*/  			((prev_lblk + prev_len) == ee_block) &&		/*C2*/  			((prev_pblk + prev_len) == ee_pblk) &&		/*C3*/  			(prev_len < (EXT_INIT_MAX_LEN - map_len))) {	/*C4*/ @@ -3452,7 +3453,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,  			ex->ee_block = cpu_to_le32(ee_block + map_len);  			ext4_ext_store_pblock(ex, ee_pblk + map_len);  			ex->ee_len = cpu_to_le16(ee_len - map_len); -			ext4_ext_mark_uninitialized(ex); /* Restore the flag */ +			ext4_ext_mark_unwritten(ex); /* Restore the flag */  			/* Extend abut_ex by 'map_len' blocks */  			abut_ex->ee_len = cpu_to_le16(prev_len + map_len); @@ -3483,7 +3484,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,  		 * - C4: abut_ex can receive the additional blocks without  		 *   overflowing the (initialized) length limit.  		 */ -		if ((!ext4_ext_is_uninitialized(abut_ex)) &&		/*C1*/ +		if ((!ext4_ext_is_unwritten(abut_ex)) &&		/*C1*/  		    ((map->m_lblk + map_len) == next_lblk) &&		/*C2*/  		    ((ee_pblk + ee_len) == next_pblk) &&		/*C3*/  		    (next_len < (EXT_INIT_MAX_LEN - map_len))) {	/*C4*/ @@ -3498,7 +3499,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,  			abut_ex->ee_block = cpu_to_le32(next_lblk - map_len);  			ext4_ext_store_pblock(abut_ex, next_pblk - map_len);  			ex->ee_len = cpu_to_le16(ee_len - map_len); -			ext4_ext_mark_uninitialized(ex); /* Restore the flag */ +			ext4_ext_mark_unwritten(ex); /* Restore the flag */  			/* Extend abut_ex by 'map_len' blocks */  			abut_ex->ee_len = cpu_to_le16(next_len + map_len); @@ -3603,26 +3604,26 @@ out:  /*   * This function is called by ext4_ext_map_blocks() from   * ext4_get_blocks_dio_write() when DIO to write - * to an uninitialized extent. + * to an unwritten extent.   * - * Writing to an uninitialized extent may result in splitting the uninitialized - * extent into multiple initialized/uninitialized extents (up to three) + * Writing to an unwritten extent may result in splitting the unwritten + * extent into multiple initialized/unwritten extents (up to three)   * There are three possibilities: - *   a> There is no split required: Entire extent should be uninitialized + *   a> There is no split required: Entire extent should be unwritten   *   b> Splits in two extents: Write is happening at either end of the extent   *   c> Splits in three extents: Somone is writing in middle of the extent   *   * This works the same way in the case of initialized -> unwritten conversion.   *   * One of more index blocks maybe needed if the extent tree grow after - * the uninitialized extent split. To prevent ENOSPC occur at the IO - * complete, we need to split the uninitialized extent before DIO submit - * the IO. The uninitialized extent called at this time will be split - * into three uninitialized extent(at most). After IO complete, the part + * the unwritten extent split. To prevent ENOSPC occur at the IO + * complete, we need to split the unwritten extent before DIO submit + * the IO. The unwritten extent called at this time will be split + * into three unwritten extent(at most). After IO complete, the part   * being filled will be convert to initialized by the end_io callback function   * via ext4_convert_unwritten_extents().   * - * Returns the size of uninitialized extent to be written on success. + * Returns the size of unwritten extent to be written on success.   */  static int ext4_split_convert_extents(handle_t *handle,  					struct inode *inode, @@ -3660,7 +3661,7 @@ static int ext4_split_convert_extents(handle_t *handle,  	} else if (flags & EXT4_GET_BLOCKS_CONVERT) {  		split_flag |= ee_block + ee_len <= eof_block ?  			      EXT4_EXT_MAY_ZEROOUT : 0; -		split_flag |= (EXT4_EXT_MARK_UNINIT2 | EXT4_EXT_DATA_VALID2); +		split_flag |= (EXT4_EXT_MARK_UNWRIT2 | EXT4_EXT_DATA_VALID2);  	}  	flags |= EXT4_GET_BLOCKS_PRE_IO;  	return ext4_split_extent(handle, inode, path, map, split_flag, flags); @@ -3710,8 +3711,8 @@ static int ext4_convert_initialized_extents(handle_t *handle,  	err = ext4_ext_get_access(handle, inode, path + depth);  	if (err)  		goto out; -	/* first mark the extent as uninitialized */ -	ext4_ext_mark_uninitialized(ex); +	/* first mark the extent as unwritten */ +	ext4_ext_mark_unwritten(ex);  	/* note: ext4_ext_correct_indexes() isn't needed here because  	 * borders are not changed @@ -3971,10 +3972,10 @@ ext4_ext_convert_initialized_extent(handle_t *handle, struct inode *inode,  	/*  	 * Make sure that the extent is no bigger than we support with -	 * uninitialized extent +	 * unwritten extent  	 */ -	if (map->m_len > EXT_UNINIT_MAX_LEN) -		map->m_len = EXT_UNINIT_MAX_LEN / 2; +	if (map->m_len > EXT_UNWRITTEN_MAX_LEN) +		map->m_len = EXT_UNWRITTEN_MAX_LEN / 2;  	ret = ext4_convert_initialized_extents(handle, inode, map,  						path); @@ -3993,7 +3994,7 @@ ext4_ext_convert_initialized_extent(handle_t *handle, struct inode *inode,  }  static int -ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, +ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,  			struct ext4_map_blocks *map,  			struct ext4_ext_path *path, int flags,  			unsigned int allocated, ext4_fsblk_t newblock) @@ -4002,23 +4003,23 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,  	int err = 0;  	ext4_io_end_t *io = ext4_inode_aio(inode); -	ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical " +	ext_debug("ext4_ext_handle_unwritten_extents: inode %lu, logical "  		  "block %llu, max_blocks %u, flags %x, allocated %u\n",  		  inode->i_ino, (unsigned long long)map->m_lblk, map->m_len,  		  flags, allocated);  	ext4_ext_show_leaf(inode, path);  	/* -	 * When writing into uninitialized space, we should not fail to +	 * When writing into unwritten space, we should not fail to  	 * allocate metadata blocks for the new extent block if needed.  	 */  	flags |= EXT4_GET_BLOCKS_METADATA_NOFAIL; -	trace_ext4_ext_handle_uninitialized_extents(inode, map, flags, +	trace_ext4_ext_handle_unwritten_extents(inode, map, flags,  						    allocated, newblock);  	/* get_block() before submit the IO, split the extent */ -	if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { +	if (flags & EXT4_GET_BLOCKS_PRE_IO) {  		ret = ext4_split_convert_extents(handle, inode, map,  					 path, flags | EXT4_GET_BLOCKS_CONVERT);  		if (ret <= 0) @@ -4033,12 +4034,10 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,  		else  			ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);  		map->m_flags |= EXT4_MAP_UNWRITTEN; -		if (ext4_should_dioread_nolock(inode)) -			map->m_flags |= EXT4_MAP_UNINIT;  		goto out;  	}  	/* IO end_io complete, convert the filled extent to written */ -	if ((flags & EXT4_GET_BLOCKS_CONVERT)) { +	if (flags & EXT4_GET_BLOCKS_CONVERT) {  		ret = ext4_convert_unwritten_extents_endio(handle, inode, map,  							path);  		if (ret >= 0) { @@ -4059,7 +4058,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,  	 * repeat fallocate creation request  	 * we already have an unwritten extent  	 */ -	if (flags & EXT4_GET_BLOCKS_UNINIT_EXT) { +	if (flags & EXT4_GET_BLOCKS_UNWRIT_EXT) {  		map->m_flags |= EXT4_MAP_UNWRITTEN;  		goto map_out;  	} @@ -4310,7 +4309,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,  		/* -		 * Uninitialized extents are treated as holes, except that +		 * unwritten extents are treated as holes, except that  		 * we split out initialized portions during a write.  		 */  		ee_len = ext4_ext_get_actual_len(ex); @@ -4329,16 +4328,16 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,  			 * If the extent is initialized check whether the  			 * caller wants to convert it to unwritten.  			 */ -			if ((!ext4_ext_is_uninitialized(ex)) && +			if ((!ext4_ext_is_unwritten(ex)) &&  			    (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) {  				allocated = ext4_ext_convert_initialized_extent(  						handle, inode, map, path, flags,  						allocated, newblock);  				goto out2; -			} else if (!ext4_ext_is_uninitialized(ex)) +			} else if (!ext4_ext_is_unwritten(ex))  				goto out; -			ret = ext4_ext_handle_uninitialized_extents( +			ret = ext4_ext_handle_unwritten_extents(  				handle, inode, map, path, flags,  				allocated, newblock);  			if (ret < 0) @@ -4410,15 +4409,15 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,  	/*  	 * See if request is beyond maximum number of blocks we can have in  	 * a single extent. For an initialized extent this limit is -	 * EXT_INIT_MAX_LEN and for an uninitialized extent this limit is -	 * EXT_UNINIT_MAX_LEN. +	 * EXT_INIT_MAX_LEN and for an unwritten extent this limit is +	 * EXT_UNWRITTEN_MAX_LEN.  	 */  	if (map->m_len > EXT_INIT_MAX_LEN && -	    !(flags & EXT4_GET_BLOCKS_UNINIT_EXT)) +	    !(flags & EXT4_GET_BLOCKS_UNWRIT_EXT))  		map->m_len = EXT_INIT_MAX_LEN; -	else if (map->m_len > EXT_UNINIT_MAX_LEN && -		 (flags & EXT4_GET_BLOCKS_UNINIT_EXT)) -		map->m_len = EXT_UNINIT_MAX_LEN; +	else if (map->m_len > EXT_UNWRITTEN_MAX_LEN && +		 (flags & EXT4_GET_BLOCKS_UNWRIT_EXT)) +		map->m_len = EXT_UNWRITTEN_MAX_LEN;  	/* Check if we can really insert (m_lblk)::(m_lblk + m_len) extent */  	newex.ee_len = cpu_to_le16(map->m_len); @@ -4466,21 +4465,19 @@ got_allocated_blocks:  	/* try to insert new extent into found leaf and return */  	ext4_ext_store_pblock(&newex, newblock + offset);  	newex.ee_len = cpu_to_le16(ar.len); -	/* Mark uninitialized */ -	if (flags & EXT4_GET_BLOCKS_UNINIT_EXT){ -		ext4_ext_mark_uninitialized(&newex); +	/* Mark unwritten */ +	if (flags & EXT4_GET_BLOCKS_UNWRIT_EXT){ +		ext4_ext_mark_unwritten(&newex);  		map->m_flags |= EXT4_MAP_UNWRITTEN;  		/*  		 * io_end structure was created for every IO write to an -		 * uninitialized extent. To avoid unnecessary conversion, +		 * unwritten extent. To avoid unnecessary conversion,  		 * here we flag the IO that really needs the conversion.  		 * For non asycn direct IO case, flag the inode state  		 * that we need to perform conversion when IO is done.  		 */ -		if ((flags & EXT4_GET_BLOCKS_PRE_IO)) +		if (flags & EXT4_GET_BLOCKS_PRE_IO)  			set_unwritten = 1; -		if (ext4_should_dioread_nolock(inode)) -			map->m_flags |= EXT4_MAP_UNINIT;  	}  	err = 0; @@ -4607,9 +4604,9 @@ got_allocated_blocks:  	/*  	 * Cache the extent and update transaction to commit on fdatasync only -	 * when it is _not_ an uninitialized extent. +	 * when it is _not_ an unwritten extent.  	 */ -	if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) +	if ((flags & EXT4_GET_BLOCKS_UNWRIT_EXT) == 0)  		ext4_update_inode_fsync_trans(handle, inode, 1);  	else  		ext4_update_inode_fsync_trans(handle, inode, 0); @@ -4683,7 +4680,7 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,  	 * that it doesn't get unnecessarily split into multiple  	 * extents.  	 */ -	if (len <= EXT_UNINIT_MAX_LEN) +	if (len <= EXT_UNWRITTEN_MAX_LEN)  		flags |= EXT4_GET_BLOCKS_NO_NORMALIZE;  	/* @@ -4744,6 +4741,13 @@ static long ext4_zero_range(struct file *file, loff_t offset,  	if (!S_ISREG(inode->i_mode))  		return -EINVAL; +	/* Call ext4_force_commit to flush all data in case of data=journal. */ +	if (ext4_should_journal_data(inode)) { +		ret = ext4_force_commit(inode->i_sb); +		if (ret) +			return ret; +	} +  	/*  	 * Write out all dirty pages to avoid race conditions  	 * Then release them. @@ -4775,7 +4779,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,  	else  		max_blocks -= lblk; -	flags = EXT4_GET_BLOCKS_CREATE_UNINIT_EXT | +	flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT |  		EXT4_GET_BLOCKS_CONVERT_UNWRITTEN;  	if (mode & FALLOC_FL_KEEP_SIZE)  		flags |= EXT4_GET_BLOCKS_KEEP_SIZE; @@ -4918,7 +4922,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)  	max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits)  		- lblk; -	flags = EXT4_GET_BLOCKS_CREATE_UNINIT_EXT; +	flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT;  	if (mode & FALLOC_FL_KEEP_SIZE)  		flags |= EXT4_GET_BLOCKS_KEEP_SIZE; diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index 0ebc21204b51..3f5c188953a4 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c @@ -344,8 +344,14 @@ static int ext4_es_can_be_merged(struct extent_status *es1,  	if (ext4_es_status(es1) != ext4_es_status(es2))  		return 0; -	if (((__u64) es1->es_len) + es2->es_len > 0xFFFFFFFFULL) +	if (((__u64) es1->es_len) + es2->es_len > EXT_MAX_BLOCKS) { +		pr_warn("ES assertion failed when merging extents. " +			"The sum of lengths of es1 (%d) and es2 (%d) " +			"is bigger than allowed file size (%d)\n", +			es1->es_len, es2->es_len, EXT_MAX_BLOCKS); +		WARN_ON(1);  		return 0; +	}  	if (((__u64) es1->es_lblk) + es1->es_len != es2->es_lblk)  		return 0; @@ -433,7 +439,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode,  		ee_start = ext4_ext_pblock(ex);  		ee_len = ext4_ext_get_actual_len(ex); -		ee_status = ext4_ext_is_uninitialized(ex) ? 1 : 0; +		ee_status = ext4_ext_is_unwritten(ex) ? 1 : 0;  		es_status = ext4_es_is_unwritten(es) ? 1 : 0;  		/* diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 063fc1538355..4e8bc284ec0e 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -57,7 +57,7 @@ static int ext4_release_file(struct inode *inode, struct file *filp)  	return 0;  } -void ext4_unwritten_wait(struct inode *inode) +static void ext4_unwritten_wait(struct inode *inode)  {  	wait_queue_head_t *wq = ext4_ioend_wq(inode); @@ -92,58 +92,91 @@ ext4_unaligned_aio(struct inode *inode, const struct iovec *iov,  }  static ssize_t -ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov, -		    unsigned long nr_segs, loff_t pos) +ext4_file_write(struct kiocb *iocb, const struct iovec *iov, +		unsigned long nr_segs, loff_t pos)  {  	struct file *file = iocb->ki_filp; -	struct inode *inode = file->f_mapping->host; +	struct inode *inode = file_inode(iocb->ki_filp); +	struct mutex *aio_mutex = NULL;  	struct blk_plug plug; -	int unaligned_aio = 0; -	ssize_t ret; +	int o_direct = file->f_flags & O_DIRECT;  	int overwrite = 0;  	size_t length = iov_length(iov, nr_segs); +	ssize_t ret; -	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) && -	    !is_sync_kiocb(iocb)) -		unaligned_aio = ext4_unaligned_aio(inode, iov, nr_segs, pos); +	BUG_ON(iocb->ki_pos != pos); -	/* Unaligned direct AIO must be serialized; see comment above */ -	if (unaligned_aio) { -		mutex_lock(ext4_aio_mutex(inode)); +	/* +	 * Unaligned direct AIO must be serialized; see comment above +	 * In the case of O_APPEND, assume that we must always serialize +	 */ +	if (o_direct && +	    ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) && +	    !is_sync_kiocb(iocb) && +	    (file->f_flags & O_APPEND || +	     ext4_unaligned_aio(inode, iov, nr_segs, pos))) { +		aio_mutex = ext4_aio_mutex(inode); +		mutex_lock(aio_mutex);  		ext4_unwritten_wait(inode);  	} -	BUG_ON(iocb->ki_pos != pos); -  	mutex_lock(&inode->i_mutex); -	blk_start_plug(&plug); +	if (file->f_flags & O_APPEND) +		iocb->ki_pos = pos = i_size_read(inode); + +	/* +	 * If we have encountered a bitmap-format file, the size limit +	 * is smaller than s_maxbytes, which is for extent-mapped files. +	 */ +	if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { +		struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); -	iocb->private = &overwrite; +		if ((pos > sbi->s_bitmap_maxbytes) || +		    (pos == sbi->s_bitmap_maxbytes && length > 0)) { +			mutex_unlock(&inode->i_mutex); +			ret = -EFBIG; +			goto errout; +		} -	/* check whether we do a DIO overwrite or not */ -	if (ext4_should_dioread_nolock(inode) && !unaligned_aio && -	    !file->f_mapping->nrpages && pos + length <= i_size_read(inode)) { -		struct ext4_map_blocks map; -		unsigned int blkbits = inode->i_blkbits; -		int err, len; +		if (pos + length > sbi->s_bitmap_maxbytes) { +			nr_segs = iov_shorten((struct iovec *)iov, nr_segs, +					      sbi->s_bitmap_maxbytes - pos); +		} +	} -		map.m_lblk = pos >> blkbits; -		map.m_len = (EXT4_BLOCK_ALIGN(pos + length, blkbits) >> blkbits) -			- map.m_lblk; -		len = map.m_len; +	if (o_direct) { +		blk_start_plug(&plug); -		err = ext4_map_blocks(NULL, inode, &map, 0); -		/* -		 * 'err==len' means that all of blocks has been preallocated no -		 * matter they are initialized or not.  For excluding -		 * uninitialized extents, we need to check m_flags.  There are -		 * two conditions that indicate for initialized extents. -		 * 1) If we hit extent cache, EXT4_MAP_MAPPED flag is returned; -		 * 2) If we do a real lookup, non-flags are returned. -		 * So we should check these two conditions. -		 */ -		if (err == len && (map.m_flags & EXT4_MAP_MAPPED)) -			overwrite = 1; +		iocb->private = &overwrite; + +		/* check whether we do a DIO overwrite or not */ +		if (ext4_should_dioread_nolock(inode) && !aio_mutex && +		    !file->f_mapping->nrpages && pos + length <= i_size_read(inode)) { +			struct ext4_map_blocks map; +			unsigned int blkbits = inode->i_blkbits; +			int err, len; + +			map.m_lblk = pos >> blkbits; +			map.m_len = (EXT4_BLOCK_ALIGN(pos + length, blkbits) >> blkbits) +				- map.m_lblk; +			len = map.m_len; + +			err = ext4_map_blocks(NULL, inode, &map, 0); +			/* +			 * 'err==len' means that all of blocks has +			 * been preallocated no matter they are +			 * initialized or not.  For excluding +			 * unwritten extents, we need to check +			 * m_flags.  There are two conditions that +			 * indicate for initialized extents.  1) If we +			 * hit extent cache, EXT4_MAP_MAPPED flag is +			 * returned; 2) If we do a real lookup, +			 * non-flags are returned.  So we should check +			 * these two conditions. +			 */ +			if (err == len && (map.m_flags & EXT4_MAP_MAPPED)) +				overwrite = 1; +		}  	}  	ret = __generic_file_aio_write(iocb, iov, nr_segs); @@ -156,45 +189,12 @@ ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov,  		if (err < 0)  			ret = err;  	} -	blk_finish_plug(&plug); - -	if (unaligned_aio) -		mutex_unlock(ext4_aio_mutex(inode)); - -	return ret; -} - -static ssize_t -ext4_file_write(struct kiocb *iocb, const struct iovec *iov, -		unsigned long nr_segs, loff_t pos) -{ -	struct inode *inode = file_inode(iocb->ki_filp); -	ssize_t ret; - -	/* -	 * If we have encountered a bitmap-format file, the size limit -	 * is smaller than s_maxbytes, which is for extent-mapped files. -	 */ - -	if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { -		struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); -		size_t length = iov_length(iov, nr_segs); - -		if ((pos > sbi->s_bitmap_maxbytes || -		    (pos == sbi->s_bitmap_maxbytes && length > 0))) -			return -EFBIG; - -		if (pos + length > sbi->s_bitmap_maxbytes) { -			nr_segs = iov_shorten((struct iovec *)iov, nr_segs, -					      sbi->s_bitmap_maxbytes - pos); -		} -	} - -	if (unlikely(iocb->ki_filp->f_flags & O_DIRECT)) -		ret = ext4_file_dio_write(iocb, iov, nr_segs, pos); -	else -		ret = generic_file_aio_write(iocb, iov, nr_segs, pos); +	if (o_direct) +		blk_finish_plug(&plug); +errout: +	if (aio_mutex) +		mutex_unlock(aio_mutex);  	return ret;  } @@ -244,6 +244,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp)  			handle = ext4_journal_start_sb(sb, EXT4_HT_MISC, 1);  			if (IS_ERR(handle))  				return PTR_ERR(handle); +			BUFFER_TRACE(sbi->s_sbh, "get_write_access");  			err = ext4_journal_get_write_access(handle, sbi->s_sbh);  			if (err) {  				ext4_journal_stop(handle); diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 82edf5b93352..645205d8ada6 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -22,7 +22,7 @@  #define EXT4_INLINE_DOTDOT_OFFSET	2  #define EXT4_INLINE_DOTDOT_SIZE		4 -int ext4_get_inline_size(struct inode *inode) +static int ext4_get_inline_size(struct inode *inode)  {  	if (EXT4_I(inode)->i_inline_off)  		return EXT4_I(inode)->i_inline_size; @@ -211,8 +211,8 @@ out:   * value since it is already handled by ext4_xattr_ibody_inline_set.   * That saves us one memcpy.   */ -void ext4_write_inline_data(struct inode *inode, struct ext4_iloc *iloc, -			    void *buffer, loff_t pos, unsigned int len) +static void ext4_write_inline_data(struct inode *inode, struct ext4_iloc *iloc, +				   void *buffer, loff_t pos, unsigned int len)  {  	struct ext4_xattr_entry *entry;  	struct ext4_xattr_ibody_header *header; @@ -264,6 +264,7 @@ static int ext4_create_inline_data(handle_t *handle,  	if (error)  		return error; +	BUFFER_TRACE(is.iloc.bh, "get_write_access");  	error = ext4_journal_get_write_access(handle, is.iloc.bh);  	if (error)  		goto out; @@ -347,6 +348,7 @@ static int ext4_update_inline_data(handle_t *handle, struct inode *inode,  	if (error == -ENODATA)  		goto out; +	BUFFER_TRACE(is.iloc.bh, "get_write_access");  	error = ext4_journal_get_write_access(handle, is.iloc.bh);  	if (error)  		goto out; @@ -373,8 +375,8 @@ out:  	return error;  } -int ext4_prepare_inline_data(handle_t *handle, struct inode *inode, -			     unsigned int len) +static int ext4_prepare_inline_data(handle_t *handle, struct inode *inode, +				    unsigned int len)  {  	int ret, size;  	struct ext4_inode_info *ei = EXT4_I(inode); @@ -424,6 +426,7 @@ static int ext4_destroy_inline_data_nolock(handle_t *handle,  	if (error)  		goto out; +	BUFFER_TRACE(is.iloc.bh, "get_write_access");  	error = ext4_journal_get_write_access(handle, is.iloc.bh);  	if (error)  		goto out; @@ -1007,6 +1010,7 @@ static int ext4_add_dirent_to_inline(handle_t *handle,  	if (err)  		return err; +	BUFFER_TRACE(iloc->bh, "get_write_access");  	err = ext4_journal_get_write_access(handle, iloc->bh);  	if (err)  		return err; @@ -1669,6 +1673,7 @@ int ext4_delete_inline_entry(handle_t *handle,  				EXT4_MIN_INLINE_DATA_SIZE;  	} +	BUFFER_TRACE(bh, "get_write_access");  	err = ext4_journal_get_write_access(handle, bh);  	if (err)  		goto out; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index d7b7462a0e13..7fcd68ee9155 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -148,6 +148,9 @@ static int ext4_inode_is_fast_symlink(struct inode *inode)          int ea_blocks = EXT4_I(inode)->i_file_acl ?  		EXT4_CLUSTER_SIZE(inode->i_sb) >> 9 : 0; +	if (ext4_has_inline_data(inode)) +		return 0; +  	return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0);  } @@ -443,7 +446,7 @@ static void ext4_map_blocks_es_recheck(handle_t *handle,  	 * could be converted.  	 */  	if (!(flags & EXT4_GET_BLOCKS_NO_LOCK)) -		down_read((&EXT4_I(inode)->i_data_sem)); +		down_read(&EXT4_I(inode)->i_data_sem);  	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {  		retval = ext4_ext_map_blocks(handle, inode, map, flags &  					     EXT4_GET_BLOCKS_KEEP_SIZE); @@ -489,8 +492,8 @@ static void ext4_map_blocks_es_recheck(handle_t *handle,   * Otherwise, call with ext4_ind_map_blocks() to handle indirect mapping   * based files   * - * On success, it returns the number of blocks being mapped or allocate. - * if create==0 and the blocks are pre-allocated and uninitialized block, + * On success, it returns the number of blocks being mapped or allocated. + * if create==0 and the blocks are pre-allocated and unwritten block,   * the result buffer head is unmapped. If the create ==1, it will make sure   * the buffer head is mapped.   * @@ -555,7 +558,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,  	 * file system block.  	 */  	if (!(flags & EXT4_GET_BLOCKS_NO_LOCK)) -		down_read((&EXT4_I(inode)->i_data_sem)); +		down_read(&EXT4_I(inode)->i_data_sem);  	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {  		retval = ext4_ext_map_blocks(handle, inode, map, flags &  					     EXT4_GET_BLOCKS_KEEP_SIZE); @@ -622,12 +625,12 @@ found:  	map->m_flags &= ~EXT4_MAP_FLAGS;  	/* -	 * New blocks allocate and/or writing to uninitialized extent +	 * New blocks allocate and/or writing to unwritten extent  	 * will possibly result in updating i_data, so we take  	 * the write lock of i_data_sem, and call get_blocks()  	 * with create == 1 flag.  	 */ -	down_write((&EXT4_I(inode)->i_data_sem)); +	down_write(&EXT4_I(inode)->i_data_sem);  	/*  	 * if the caller is from delayed allocation writeout path @@ -922,6 +925,7 @@ int do_journal_get_write_access(handle_t *handle,  	 */  	if (dirty)  		clear_buffer_dirty(bh); +	BUFFER_TRACE(bh, "get write access");  	ret = ext4_journal_get_write_access(handle, bh);  	if (!ret && dirty)  		ret = ext4_handle_dirty_metadata(handle, NULL, bh); @@ -1540,7 +1544,7 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,  		ext4_es_lru_add(inode);  		if (ext4_es_is_hole(&es)) {  			retval = 0; -			down_read((&EXT4_I(inode)->i_data_sem)); +			down_read(&EXT4_I(inode)->i_data_sem);  			goto add_delayed;  		} @@ -1577,7 +1581,7 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,  	 * Try to see if we can get the block without requesting a new  	 * file system block.  	 */ -	down_read((&EXT4_I(inode)->i_data_sem)); +	down_read(&EXT4_I(inode)->i_data_sem);  	if (ext4_has_inline_data(inode)) {  		/*  		 * We will soon create blocks for this page, and let @@ -1769,6 +1773,7 @@ static int __ext4_journalled_writepage(struct page *page,  	BUG_ON(!ext4_handle_valid(handle));  	if (inline_data) { +		BUFFER_TRACE(inode_bh, "get write access");  		ret = ext4_journal_get_write_access(handle, inode_bh);  		err = ext4_handle_dirty_metadata(handle, inode, inode_bh); @@ -1846,6 +1851,7 @@ static int ext4_writepage(struct page *page,  	struct buffer_head *page_bufs = NULL;  	struct inode *inode = page->mapping->host;  	struct ext4_io_submit io_submit; +	bool keep_towrite = false;  	trace_ext4_writepage(page);  	size = i_size_read(inode); @@ -1876,6 +1882,7 @@ static int ext4_writepage(struct page *page,  			unlock_page(page);  			return 0;  		} +		keep_towrite = true;  	}  	if (PageChecked(page) && ext4_should_journal_data(inode)) @@ -1892,7 +1899,7 @@ static int ext4_writepage(struct page *page,  		unlock_page(page);  		return -ENOMEM;  	} -	ret = ext4_bio_write_page(&io_submit, page, len, wbc); +	ret = ext4_bio_write_page(&io_submit, page, len, wbc, keep_towrite);  	ext4_io_submit(&io_submit);  	/* Drop io_end reference we got from init */  	ext4_put_io_end_defer(io_submit.io_end); @@ -1911,7 +1918,7 @@ static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page)  	else  		len = PAGE_CACHE_SIZE;  	clear_page_dirty_for_io(page); -	err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc); +	err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc, false);  	if (!err)  		mpd->wbc->nr_to_write--;  	mpd->first_page++; @@ -2032,7 +2039,7 @@ static int mpage_process_page_bufs(struct mpage_da_data *mpd,   * Scan buffers corresponding to changed extent (we expect corresponding pages   * to be already locked) and update buffer state according to new extent state.   * We map delalloc buffers to their physical location, clear unwritten bits, - * and mark buffers as uninit when we perform writes to uninitialized extents + * and mark buffers as uninit when we perform writes to unwritten extents   * and do extent conversion after IO is finished. If the last page is not fully   * mapped, we update @map to the next extent in the last page that needs   * mapping. Otherwise we submit the page for IO. @@ -2126,12 +2133,12 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)  	struct inode *inode = mpd->inode;  	struct ext4_map_blocks *map = &mpd->map;  	int get_blocks_flags; -	int err; +	int err, dioread_nolock;  	trace_ext4_da_write_pages_extent(inode, map);  	/*  	 * Call ext4_map_blocks() to allocate any delayed allocation blocks, or -	 * to convert an uninitialized extent to be initialized (in the case +	 * to convert an unwritten extent to be initialized (in the case  	 * where we have written into one or more preallocated blocks).  It is  	 * possible that we're going to need more metadata blocks than  	 * previously reserved. However we must not fail because we're in @@ -2148,7 +2155,8 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)  	 */  	get_blocks_flags = EXT4_GET_BLOCKS_CREATE |  			   EXT4_GET_BLOCKS_METADATA_NOFAIL; -	if (ext4_should_dioread_nolock(inode)) +	dioread_nolock = ext4_should_dioread_nolock(inode); +	if (dioread_nolock)  		get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT;  	if (map->m_flags & (1 << BH_Delay))  		get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE; @@ -2156,7 +2164,7 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)  	err = ext4_map_blocks(handle, inode, map, get_blocks_flags);  	if (err < 0)  		return err; -	if (map->m_flags & EXT4_MAP_UNINIT) { +	if (dioread_nolock && (map->m_flags & EXT4_MAP_UNWRITTEN)) {  		if (!mpd->io_submit.io_end->handle &&  		    ext4_handle_valid(handle)) {  			mpd->io_submit.io_end->handle = handle->h_rsv_handle; @@ -3070,9 +3078,9 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,   * preallocated extents, and those write extend the file, no need to   * fall back to buffered IO.   * - * For holes, we fallocate those blocks, mark them as uninitialized + * For holes, we fallocate those blocks, mark them as unwritten   * If those blocks were preallocated, we mark sure they are split, but - * still keep the range to write as uninitialized. + * still keep the range to write as unwritten.   *   * The unwritten extents will be converted to written when DIO is completed.   * For async direct IO, since the IO may still pending when return, we @@ -3124,12 +3132,12 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,  	 * We could direct write to holes and fallocate.  	 *  	 * Allocated blocks to fill the hole are marked as -	 * uninitialized to prevent parallel buffered read to expose +	 * unwritten to prevent parallel buffered read to expose  	 * the stale data before DIO complete the data IO.  	 *  	 * As to previously fallocated extents, ext4 get_block will  	 * just simply mark the buffer mapped but still keep the -	 * extents uninitialized. +	 * extents unwritten.  	 *  	 * For non AIO case, we will convert those unwritten extents  	 * to written after return back from blockdev_direct_IO. @@ -3440,7 +3448,7 @@ unlock:   * This required during truncate. We need to physically zero the tail end   * of that block so it doesn't yield old data if the file is later grown.   */ -int ext4_block_truncate_page(handle_t *handle, +static int ext4_block_truncate_page(handle_t *handle,  		struct address_space *mapping, loff_t from)  {  	unsigned offset = from & (PAGE_CACHE_SIZE-1); @@ -4304,12 +4312,15 @@ static int ext4_do_update_inode(handle_t *handle,  	struct ext4_inode *raw_inode = ext4_raw_inode(iloc);  	struct ext4_inode_info *ei = EXT4_I(inode);  	struct buffer_head *bh = iloc->bh; +	struct super_block *sb = inode->i_sb;  	int err = 0, rc, block; -	int need_datasync = 0; +	int need_datasync = 0, set_large_file = 0;  	uid_t i_uid;  	gid_t i_gid; -	/* For fields not not tracking in the in-memory inode, +	spin_lock(&ei->i_raw_lock); + +	/* For fields not tracked in the in-memory inode,  	 * initialise them to zero for new inodes. */  	if (ext4_test_inode_state(inode, EXT4_STATE_NEW))  		memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size); @@ -4347,8 +4358,10 @@ static int ext4_do_update_inode(handle_t *handle,  	EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode);  	EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode); -	if (ext4_inode_blocks_set(handle, raw_inode, ei)) +	if (ext4_inode_blocks_set(handle, raw_inode, ei)) { +		spin_unlock(&ei->i_raw_lock);  		goto out_brelse; +	}  	raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);  	raw_inode->i_flags = cpu_to_le32(ei->i_flags & 0xFFFFFFFF);  	if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) @@ -4360,24 +4373,11 @@ static int ext4_do_update_inode(handle_t *handle,  		need_datasync = 1;  	}  	if (ei->i_disksize > 0x7fffffffULL) { -		struct super_block *sb = inode->i_sb;  		if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,  				EXT4_FEATURE_RO_COMPAT_LARGE_FILE) ||  				EXT4_SB(sb)->s_es->s_rev_level == -				cpu_to_le32(EXT4_GOOD_OLD_REV)) { -			/* If this is the first large file -			 * created, add a flag to the superblock. -			 */ -			err = ext4_journal_get_write_access(handle, -					EXT4_SB(sb)->s_sbh); -			if (err) -				goto out_brelse; -			ext4_update_dynamic_rev(sb); -			EXT4_SET_RO_COMPAT_FEATURE(sb, -					EXT4_FEATURE_RO_COMPAT_LARGE_FILE); -			ext4_handle_sync(handle); -			err = ext4_handle_dirty_super(handle, sb); -		} +		    cpu_to_le32(EXT4_GOOD_OLD_REV)) +			set_large_file = 1;  	}  	raw_inode->i_generation = cpu_to_le32(inode->i_generation);  	if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { @@ -4409,12 +4409,24 @@ static int ext4_do_update_inode(handle_t *handle,  	ext4_inode_csum_set(inode, raw_inode, ei); +	spin_unlock(&ei->i_raw_lock); +  	BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");  	rc = ext4_handle_dirty_metadata(handle, NULL, bh);  	if (!err)  		err = rc;  	ext4_clear_inode_state(inode, EXT4_STATE_NEW); - +	if (set_large_file) { +		BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get write access"); +		err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh); +		if (err) +			goto out_brelse; +		ext4_update_dynamic_rev(sb); +		EXT4_SET_RO_COMPAT_FEATURE(sb, +					   EXT4_FEATURE_RO_COMPAT_LARGE_FILE); +		ext4_handle_sync(handle); +		err = ext4_handle_dirty_super(handle, sb); +	}  	ext4_update_inode_fsync_trans(handle, inode, need_datasync);  out_brelse:  	brelse(bh); diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index afe8a133e3d1..59e31622cc6e 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2619,7 +2619,7 @@ int ext4_mb_init(struct super_block *sb)  	sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);  	if (sbi->s_locality_groups == NULL) {  		ret = -ENOMEM; -		goto out_free_groupinfo_slab; +		goto out;  	}  	for_each_possible_cpu(i) {  		struct ext4_locality_group *lg; @@ -2644,8 +2644,6 @@ int ext4_mb_init(struct super_block *sb)  out_free_locality_groups:  	free_percpu(sbi->s_locality_groups);  	sbi->s_locality_groups = NULL; -out_free_groupinfo_slab: -	ext4_groupinfo_destroy_slabs();  out:  	kfree(sbi->s_mb_offsets);  	sbi->s_mb_offsets = NULL; @@ -2878,6 +2876,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,  	if (!bitmap_bh)  		goto out_err; +	BUFFER_TRACE(bitmap_bh, "getting write access");  	err = ext4_journal_get_write_access(handle, bitmap_bh);  	if (err)  		goto out_err; @@ -2890,6 +2889,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,  	ext4_debug("using block group %u(%d)\n", ac->ac_b_ex.fe_group,  			ext4_free_group_clusters(sb, gdp)); +	BUFFER_TRACE(gdp_bh, "get_write_access");  	err = ext4_journal_get_write_access(handle, gdp_bh);  	if (err)  		goto out_err; @@ -3147,7 +3147,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,  	}  	BUG_ON(start + size <= ac->ac_o_ex.fe_logical &&  			start > ac->ac_o_ex.fe_logical); -	BUG_ON(size <= 0 || size > EXT4_CLUSTERS_PER_GROUP(ac->ac_sb)); +	BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));  	/* now prepare goal request */ diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 2ae73a80c19b..ec092437d3e0 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -505,7 +505,7 @@ int ext4_ext_migrate(struct inode *inode)  	 * with i_data_sem held to prevent racing with block  	 * allocation.  	 */ -	down_read((&EXT4_I(inode)->i_data_sem)); +	down_read(&EXT4_I(inode)->i_data_sem);  	ext4_set_inode_state(inode, EXT4_STATE_EXT_MIGRATE);  	up_read((&EXT4_I(inode)->i_data_sem)); diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c index 04434ad3e8e0..32bce844c2e1 100644 --- a/fs/ext4/mmp.c +++ b/fs/ext4/mmp.c @@ -18,7 +18,7 @@ static __le32 ext4_mmp_csum(struct super_block *sb, struct mmp_struct *mmp)  	return cpu_to_le32(csum);  } -int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp) +static int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp)  {  	if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,  				       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) @@ -27,7 +27,7 @@ int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp)  	return mmp->mmp_checksum == ext4_mmp_csum(sb, mmp);  } -void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp) +static void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp)  {  	if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,  				       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 58ee7dc87669..2484c7ec6a72 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -57,8 +57,8 @@ get_ext_path(struct inode *inode, ext4_lblk_t lblock,  static void  copy_extent_status(struct ext4_extent *src, struct ext4_extent *dest)  { -	if (ext4_ext_is_uninitialized(src)) -		ext4_ext_mark_uninitialized(dest); +	if (ext4_ext_is_unwritten(src)) +		ext4_ext_mark_unwritten(dest);  	else  		dest->ee_len = cpu_to_le16(ext4_ext_get_actual_len(dest));  } @@ -391,6 +391,7 @@ mext_insert_extents(handle_t *handle, struct inode *orig_inode,  	if (depth) {  		/* Register to journal */ +		BUFFER_TRACE(orig_path->p_bh, "get_write_access");  		ret = ext4_journal_get_write_access(handle, orig_path->p_bh);  		if (ret)  			return ret; @@ -593,14 +594,14 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext,   * @inode:		inode in question   * @from:		block offset of inode   * @count:		block count to be checked - * @uninit:		extents expected to be uninitialized + * @unwritten:		extents expected to be unwritten   * @err:		pointer to save error value   *   * Return 1 if all extents in range has expected type, and zero otherwise.   */  static int  mext_check_coverage(struct inode *inode, ext4_lblk_t from, ext4_lblk_t count, -			  int uninit, int *err) +		    int unwritten, int *err)  {  	struct ext4_ext_path *path = NULL;  	struct ext4_extent *ext; @@ -611,7 +612,7 @@ mext_check_coverage(struct inode *inode, ext4_lblk_t from, ext4_lblk_t count,  		if (*err)  			goto out;  		ext = path[ext_depth(inode)].p_ext; -		if (uninit != ext4_ext_is_uninitialized(ext)) +		if (unwritten != ext4_ext_is_unwritten(ext))  			goto out;  		from += ext4_ext_get_actual_len(ext);  		ext4_ext_drop_refs(path); @@ -894,7 +895,7 @@ out:   * @orig_page_offset:		page index on original file   * @data_offset_in_page:	block index where data swapping starts   * @block_len_in_page:		the number of blocks to be swapped - * @uninit:			orig extent is uninitialized or not + * @unwritten:			orig extent is unwritten or not   * @err:			pointer to save return value   *   * Save the data in original inode blocks and replace original inode extents @@ -905,7 +906,7 @@ out:  static int  move_extent_per_page(struct file *o_filp, struct inode *donor_inode,  		  pgoff_t orig_page_offset, int data_offset_in_page, -		  int block_len_in_page, int uninit, int *err) +		  int block_len_in_page, int unwritten, int *err)  {  	struct inode *orig_inode = file_inode(o_filp);  	struct page *pagep[2] = {NULL, NULL}; @@ -962,27 +963,27 @@ again:  	if (unlikely(*err < 0))  		goto stop_journal;  	/* -	 * If orig extent was uninitialized it can become initialized +	 * If orig extent was unwritten it can become initialized  	 * at any time after i_data_sem was dropped, in order to  	 * serialize with delalloc we have recheck extent while we  	 * hold page's lock, if it is still the case data copy is not  	 * necessary, just swap data blocks between orig and donor.  	 */ -	if (uninit) { +	if (unwritten) {  		ext4_double_down_write_data_sem(orig_inode, donor_inode);  		/* If any of extents in range became initialized we have to  		 * fallback to data copying */ -		uninit = mext_check_coverage(orig_inode, orig_blk_offset, -					     block_len_in_page, 1, err); +		unwritten = mext_check_coverage(orig_inode, orig_blk_offset, +						block_len_in_page, 1, err);  		if (*err)  			goto drop_data_sem; -		uninit &= mext_check_coverage(donor_inode, orig_blk_offset, -					      block_len_in_page, 1, err); +		unwritten &= mext_check_coverage(donor_inode, orig_blk_offset, +						 block_len_in_page, 1, err);  		if (*err)  			goto drop_data_sem; -		if (!uninit) { +		if (!unwritten) {  			ext4_double_up_write_data_sem(orig_inode, donor_inode);  			goto data_copy;  		} @@ -1259,7 +1260,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,  	int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;  	int data_offset_in_page;  	int block_len_in_page; -	int uninit; +	int unwritten;  	if (orig_inode->i_sb != donor_inode->i_sb) {  		ext4_debug("ext4 move extent: The argument files " @@ -1391,8 +1392,8 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,  		    !last_extent)  			continue; -		/* Is original extent is uninitialized */ -		uninit = ext4_ext_is_uninitialized(ext_prev); +		/* Is original extent is unwritten */ +		unwritten = ext4_ext_is_unwritten(ext_prev);  		data_offset_in_page = seq_start % blocks_per_page; @@ -1432,8 +1433,8 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,  						o_filp, donor_inode,  						orig_page_offset,  						data_offset_in_page, -						block_len_in_page, uninit, -						&ret); +						block_len_in_page, +						unwritten, &ret);  			/* Count how many blocks we have exchanged */  			*moved_len += block_len_in_page; diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 1cb84f78909e..3520ab8a6639 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -67,6 +67,7 @@ static struct buffer_head *ext4_append(handle_t *handle,  		return ERR_PTR(err);  	inode->i_size += inode->i_sb->s_blocksize;  	EXT4_I(inode)->i_disksize = inode->i_size; +	BUFFER_TRACE(bh, "get_write_access");  	err = ext4_journal_get_write_access(handle, bh);  	if (err) {  		brelse(bh); @@ -1778,6 +1779,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,  	blocksize =  dir->i_sb->s_blocksize;  	dxtrace(printk(KERN_DEBUG "Creating index: inode %lu\n", dir->i_ino)); +	BUFFER_TRACE(bh, "get_write_access");  	retval = ext4_journal_get_write_access(handle, bh);  	if (retval) {  		ext4_std_error(dir->i_sb, retval); @@ -2510,8 +2512,7 @@ static int empty_dir(struct inode *inode)  		 ext4_rec_len_from_disk(de1->rec_len, sb->s_blocksize);  	de = ext4_next_entry(de1, sb->s_blocksize);  	while (offset < inode->i_size) { -		if (!bh || -		    (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) { +		if ((void *) de >= (void *) (bh->b_data+sb->s_blocksize)) {  			unsigned int lblock;  			err = 0;  			brelse(bh); @@ -2539,26 +2540,37 @@ static int empty_dir(struct inode *inode)  	return 1;  } -/* ext4_orphan_add() links an unlinked or truncated inode into a list of +/* + * ext4_orphan_add() links an unlinked or truncated inode into a list of   * such inodes, starting at the superblock, in case we crash before the   * file is closed/deleted, or in case the inode truncate spans multiple   * transactions and the last transaction is not recovered after a crash.   *   * At filesystem recovery time, we walk this list deleting unlinked   * inodes and truncating linked inodes in ext4_orphan_cleanup(). + * + * Orphan list manipulation functions must be called under i_mutex unless + * we are just creating the inode or deleting it.   */  int ext4_orphan_add(handle_t *handle, struct inode *inode)  {  	struct super_block *sb = inode->i_sb; +	struct ext4_sb_info *sbi = EXT4_SB(sb);  	struct ext4_iloc iloc;  	int err = 0, rc; +	bool dirty = false; -	if (!EXT4_SB(sb)->s_journal) +	if (!sbi->s_journal)  		return 0; -	mutex_lock(&EXT4_SB(sb)->s_orphan_lock); +	WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) && +		     !mutex_is_locked(&inode->i_mutex)); +	/* +	 * Exit early if inode already is on orphan list. This is a big speedup +	 * since we don't have to contend on the global s_orphan_lock. +	 */  	if (!list_empty(&EXT4_I(inode)->i_orphan)) -		goto out_unlock; +		return 0;  	/*  	 * Orphan handling is only valid for files with data blocks @@ -2569,48 +2581,51 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)  	J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||  		  S_ISLNK(inode->i_mode)) || inode->i_nlink == 0); -	BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access"); -	err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh); +	BUFFER_TRACE(sbi->s_sbh, "get_write_access"); +	err = ext4_journal_get_write_access(handle, sbi->s_sbh);  	if (err) -		goto out_unlock; +		goto out;  	err = ext4_reserve_inode_write(handle, inode, &iloc);  	if (err) -		goto out_unlock; +		goto out; + +	mutex_lock(&sbi->s_orphan_lock);  	/*  	 * Due to previous errors inode may be already a part of on-disk  	 * orphan list. If so skip on-disk list modification.  	 */ -	if (NEXT_ORPHAN(inode) && NEXT_ORPHAN(inode) <= -		(le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))) -			goto mem_insert; - -	/* Insert this inode at the head of the on-disk orphan list... */ -	NEXT_ORPHAN(inode) = le32_to_cpu(EXT4_SB(sb)->s_es->s_last_orphan); -	EXT4_SB(sb)->s_es->s_last_orphan = cpu_to_le32(inode->i_ino); -	err = ext4_handle_dirty_super(handle, sb); -	rc = ext4_mark_iloc_dirty(handle, inode, &iloc); -	if (!err) -		err = rc; - -	/* Only add to the head of the in-memory list if all the -	 * previous operations succeeded.  If the orphan_add is going to -	 * fail (possibly taking the journal offline), we can't risk -	 * leaving the inode on the orphan list: stray orphan-list -	 * entries can cause panics at unmount time. -	 * -	 * This is safe: on error we're going to ignore the orphan list -	 * anyway on the next recovery. */ -mem_insert: -	if (!err) -		list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan); - +	if (!NEXT_ORPHAN(inode) || NEXT_ORPHAN(inode) > +	    (le32_to_cpu(sbi->s_es->s_inodes_count))) { +		/* Insert this inode at the head of the on-disk orphan list */ +		NEXT_ORPHAN(inode) = le32_to_cpu(sbi->s_es->s_last_orphan); +		sbi->s_es->s_last_orphan = cpu_to_le32(inode->i_ino); +		dirty = true; +	} +	list_add(&EXT4_I(inode)->i_orphan, &sbi->s_orphan); +	mutex_unlock(&sbi->s_orphan_lock); + +	if (dirty) { +		err = ext4_handle_dirty_super(handle, sb); +		rc = ext4_mark_iloc_dirty(handle, inode, &iloc); +		if (!err) +			err = rc; +		if (err) { +			/* +			 * We have to remove inode from in-memory list if +			 * addition to on disk orphan list failed. Stray orphan +			 * list entries can cause panics at unmount time. +			 */ +			mutex_lock(&sbi->s_orphan_lock); +			list_del(&EXT4_I(inode)->i_orphan); +			mutex_unlock(&sbi->s_orphan_lock); +		} +	}  	jbd_debug(4, "superblock will point to %lu\n", inode->i_ino);  	jbd_debug(4, "orphan inode %lu will point to %d\n",  			inode->i_ino, NEXT_ORPHAN(inode)); -out_unlock: -	mutex_unlock(&EXT4_SB(sb)->s_orphan_lock); -	ext4_std_error(inode->i_sb, err); +out: +	ext4_std_error(sb, err);  	return err;  } @@ -2622,45 +2637,51 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)  {  	struct list_head *prev;  	struct ext4_inode_info *ei = EXT4_I(inode); -	struct ext4_sb_info *sbi; +	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);  	__u32 ino_next;  	struct ext4_iloc iloc;  	int err = 0; -	if ((!EXT4_SB(inode->i_sb)->s_journal) && -	    !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) +	if (!sbi->s_journal && !(sbi->s_mount_state & EXT4_ORPHAN_FS))  		return 0; -	mutex_lock(&EXT4_SB(inode->i_sb)->s_orphan_lock); +	WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) && +		     !mutex_is_locked(&inode->i_mutex)); +	/* Do this quick check before taking global s_orphan_lock. */  	if (list_empty(&ei->i_orphan)) -		goto out; +		return 0; -	ino_next = NEXT_ORPHAN(inode); -	prev = ei->i_orphan.prev; -	sbi = EXT4_SB(inode->i_sb); +	if (handle) { +		/* Grab inode buffer early before taking global s_orphan_lock */ +		err = ext4_reserve_inode_write(handle, inode, &iloc); +	} +	mutex_lock(&sbi->s_orphan_lock);  	jbd_debug(4, "remove inode %lu from orphan list\n", inode->i_ino); +	prev = ei->i_orphan.prev;  	list_del_init(&ei->i_orphan);  	/* If we're on an error path, we may not have a valid  	 * transaction handle with which to update the orphan list on  	 * disk, but we still need to remove the inode from the linked  	 * list in memory. */ -	if (!handle) -		goto out; - -	err = ext4_reserve_inode_write(handle, inode, &iloc); -	if (err) +	if (!handle || err) { +		mutex_unlock(&sbi->s_orphan_lock);  		goto out_err; +	} +	ino_next = NEXT_ORPHAN(inode);  	if (prev == &sbi->s_orphan) {  		jbd_debug(4, "superblock will point to %u\n", ino_next);  		BUFFER_TRACE(sbi->s_sbh, "get_write_access");  		err = ext4_journal_get_write_access(handle, sbi->s_sbh); -		if (err) +		if (err) { +			mutex_unlock(&sbi->s_orphan_lock);  			goto out_brelse; +		}  		sbi->s_es->s_last_orphan = cpu_to_le32(ino_next); +		mutex_unlock(&sbi->s_orphan_lock);  		err = ext4_handle_dirty_super(handle, inode->i_sb);  	} else {  		struct ext4_iloc iloc2; @@ -2670,20 +2691,20 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)  		jbd_debug(4, "orphan inode %lu will point to %u\n",  			  i_prev->i_ino, ino_next);  		err = ext4_reserve_inode_write(handle, i_prev, &iloc2); -		if (err) +		if (err) { +			mutex_unlock(&sbi->s_orphan_lock);  			goto out_brelse; +		}  		NEXT_ORPHAN(i_prev) = ino_next;  		err = ext4_mark_iloc_dirty(handle, i_prev, &iloc2); +		mutex_unlock(&sbi->s_orphan_lock);  	}  	if (err)  		goto out_brelse;  	NEXT_ORPHAN(inode) = 0;  	err = ext4_mark_iloc_dirty(handle, inode, &iloc); -  out_err:  	ext4_std_error(inode->i_sb, err); -out: -	mutex_unlock(&EXT4_SB(inode->i_sb)->s_orphan_lock);  	return err;  out_brelse: diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 1a64e7a52b84..b24a2541a9ba 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -401,7 +401,8 @@ submit_and_retry:  int ext4_bio_write_page(struct ext4_io_submit *io,  			struct page *page,  			int len, -			struct writeback_control *wbc) +			struct writeback_control *wbc, +			bool keep_towrite)  {  	struct inode *inode = page->mapping->host;  	unsigned block_start, blocksize; @@ -414,10 +415,24 @@ int ext4_bio_write_page(struct ext4_io_submit *io,  	BUG_ON(!PageLocked(page));  	BUG_ON(PageWriteback(page)); -	set_page_writeback(page); +	if (keep_towrite) +		set_page_writeback_keepwrite(page); +	else +		set_page_writeback(page);  	ClearPageError(page);  	/* +	 * Comments copied from block_write_full_page: +	 * +	 * The page straddles i_size.  It must be zeroed out on each and every +	 * writepage invocation because it may be mmapped.  "A file is mapped +	 * in multiples of the page size.  For a file that is not a multiple of +	 * the page size, the remaining memory is zeroed when mapped, and +	 * writes to that region are not written out to the file." +	 */ +	if (len < PAGE_CACHE_SIZE) +		zero_user_segment(page, len, PAGE_CACHE_SIZE); +	/*  	 * In the first loop we prepare and mark buffers to submit. We have to  	 * mark all buffers in the page before submitting so that  	 * end_page_writeback() cannot be called from ext4_bio_end_io() when IO @@ -428,19 +443,6 @@ int ext4_bio_write_page(struct ext4_io_submit *io,  	do {  		block_start = bh_offset(bh);  		if (block_start >= len) { -			/* -			 * Comments copied from block_write_full_page: -			 * -			 * The page straddles i_size.  It must be zeroed out on -			 * each and every writepage invocation because it may -			 * be mmapped.  "A file is mapped in multiples of the -			 * page size.  For a file that is not a multiple of -			 * the  page size, the remaining memory is zeroed when -			 * mapped, and writes to that region are not written -			 * out to the file." -			 */ -			zero_user_segment(page, block_start, -					  block_start + blocksize);  			clear_buffer_dirty(bh);  			set_buffer_uptodate(bh);  			continue; diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 08b3c116915b..bb0e80f03e2e 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -348,6 +348,7 @@ static struct buffer_head *bclean(handle_t *handle, struct super_block *sb,  	bh = sb_getblk(sb, blk);  	if (unlikely(!bh))  		return ERR_PTR(-ENOMEM); +	BUFFER_TRACE(bh, "get_write_access");  	if ((err = ext4_journal_get_write_access(handle, bh))) {  		brelse(bh);  		bh = ERR_PTR(err); @@ -426,6 +427,7 @@ static int set_flexbg_block_bitmap(struct super_block *sb, handle_t *handle,  		if (unlikely(!bh))  			return -ENOMEM; +		BUFFER_TRACE(bh, "get_write_access");  		err = ext4_journal_get_write_access(handle, bh);  		if (err)  			return err; @@ -518,6 +520,7 @@ static int setup_new_flex_group_blocks(struct super_block *sb,  				goto out;  			} +			BUFFER_TRACE(gdb, "get_write_access");  			err = ext4_journal_get_write_access(handle, gdb);  			if (err) {  				brelse(gdb); @@ -790,14 +793,17 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,  		goto exit_dind;  	} +	BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access");  	err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh);  	if (unlikely(err))  		goto exit_dind; +	BUFFER_TRACE(gdb_bh, "get_write_access");  	err = ext4_journal_get_write_access(handle, gdb_bh);  	if (unlikely(err))  		goto exit_dind; +	BUFFER_TRACE(dind, "get_write_access");  	err = ext4_journal_get_write_access(handle, dind);  	if (unlikely(err))  		ext4_std_error(sb, err); @@ -902,6 +908,7 @@ static int add_new_gdb_meta_bg(struct super_block *sb,  	EXT4_SB(sb)->s_group_desc = n_group_desc;  	EXT4_SB(sb)->s_gdb_count++;  	ext4_kvfree(o_group_desc); +	BUFFER_TRACE(gdb_bh, "get_write_access");  	err = ext4_journal_get_write_access(handle, gdb_bh);  	if (unlikely(err))  		brelse(gdb_bh); @@ -977,6 +984,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,  	}  	for (i = 0; i < reserved_gdb; i++) { +		BUFFER_TRACE(primary[i], "get_write_access");  		if ((err = ext4_journal_get_write_access(handle, primary[i])))  			goto exit_bh;  	} @@ -1084,6 +1092,7 @@ static void update_backups(struct super_block *sb, int blk_off, char *data,  		ext4_debug("update metadata backup %llu(+%llu)\n",  			   backup_block, backup_block -  			   ext4_group_first_block_no(sb, group)); +		BUFFER_TRACE(bh, "get_write_access");  		if ((err = ext4_journal_get_write_access(handle, bh)))  			break;  		lock_buffer(bh); @@ -1163,6 +1172,7 @@ static int ext4_add_new_descs(handle_t *handle, struct super_block *sb,  		 */  		if (gdb_off) {  			gdb_bh = sbi->s_group_desc[gdb_num]; +			BUFFER_TRACE(gdb_bh, "get_write_access");  			err = ext4_journal_get_write_access(handle, gdb_bh);  			if (!err && reserved_gdb && ext4_bg_num_gdb(sb, group)) @@ -1433,6 +1443,7 @@ static int ext4_flex_group_add(struct super_block *sb,  		goto exit;  	} +	BUFFER_TRACE(sbi->s_sbh, "get_write_access");  	err = ext4_journal_get_write_access(handle, sbi->s_sbh);  	if (err)  		goto exit_journal; @@ -1645,6 +1656,7 @@ static int ext4_group_extend_no_check(struct super_block *sb,  		return err;  	} +	BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access");  	err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh);  	if (err) {  		ext4_warning(sb, "error %d on journal write access", err); @@ -1804,6 +1816,7 @@ static int ext4_convert_meta_bg(struct super_block *sb, struct inode *inode)  	if (IS_ERR(handle))  		return PTR_ERR(handle); +	BUFFER_TRACE(sbi->s_sbh, "get_write_access");  	err = ext4_journal_get_write_access(handle, sbi->s_sbh);  	if (err)  		goto errout; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 6f9e6fadac04..b9b9aabfb4d2 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -138,8 +138,8 @@ static __le32 ext4_superblock_csum(struct super_block *sb,  	return cpu_to_le32(csum);  } -int ext4_superblock_csum_verify(struct super_block *sb, -				struct ext4_super_block *es) +static int ext4_superblock_csum_verify(struct super_block *sb, +				       struct ext4_super_block *es)  {  	if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,  				       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) @@ -879,6 +879,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)  		return NULL;  	ei->vfs_inode.i_version = 1; +	spin_lock_init(&ei->i_raw_lock);  	INIT_LIST_HEAD(&ei->i_prealloc_list);  	spin_lock_init(&ei->i_prealloc_lock);  	ext4_es_init_tree(&ei->i_es_tree); @@ -1903,7 +1904,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,  	if (!(sbi->s_mount_state & EXT4_VALID_FS))  		ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, "  			 "running e2fsck is recommended"); -	else if ((sbi->s_mount_state & EXT4_ERROR_FS)) +	else if (sbi->s_mount_state & EXT4_ERROR_FS)  		ext4_msg(sb, KERN_WARNING,  			 "warning: mounting fs with errors, "  			 "running e2fsck is recommended"); @@ -2404,6 +2405,16 @@ static ext4_fsblk_t descriptor_loc(struct super_block *sb,  	if (ext4_bg_has_super(sb, bg))  		has_super = 1; +	/* +	 * If we have a meta_bg fs with 1k blocks, group 0's GDT is at +	 * block 2, not 1.  If s_first_data_block == 0 (bigalloc is enabled +	 * on modern mke2fs or blksize > 1k on older mke2fs) then we must +	 * compensate. +	 */ +	if (sb->s_blocksize == 1024 && nr == 0 && +	    le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) == 0) +		has_super++; +  	return (has_super + ext4_group_first_block_no(sb, bg));  } @@ -3337,7 +3348,7 @@ static ext4_fsblk_t ext4_calculate_resv_clusters(struct super_block *sb)  	 * By default we reserve 2% or 4096 clusters, whichever is smaller.  	 * This should cover the situations where we can not afford to run  	 * out of space like for example punch hole, or converting -	 * uninitialized extents in delalloc path. In most cases such +	 * unwritten extents in delalloc path. In most cases such  	 * allocation would require 1, or 2 blocks, higher numbers are  	 * very rare.  	 */ @@ -5370,6 +5381,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,  	bh = ext4_bread(handle, inode, blk, 1, &err);  	if (!bh)  		goto out; +	BUFFER_TRACE(bh, "get write access");  	err = ext4_journal_get_write_access(handle, bh);  	if (err) {  		brelse(bh); diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 4eec399ec807..e7387337060c 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -369,6 +369,9 @@ ext4_xattr_get(struct inode *inode, int name_index, const char *name,  {  	int error; +	if (strlen(name) > 255) +		return -ERANGE; +  	down_read(&EXT4_I(inode)->xattr_sem);  	error = ext4_xattr_ibody_get(inode, name_index, name, buffer,  				     buffer_size); @@ -513,6 +516,7 @@ static void ext4_xattr_update_super_block(handle_t *handle,  	if (EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_EXT_ATTR))  		return; +	BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access");  	if (ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh) == 0) {  		EXT4_SET_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_EXT_ATTR);  		ext4_handle_dirty_super(handle, sb); @@ -532,6 +536,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,  	struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);  	ce = mb_cache_entry_get(ext4_mb_cache, bh->b_bdev, bh->b_blocknr); +	BUFFER_TRACE(bh, "get_write_access");  	error = ext4_journal_get_write_access(handle, bh);  	if (error)  		goto out; @@ -774,6 +779,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,  	if (s->base) {  		ce = mb_cache_entry_get(ext4_mb_cache, bs->bh->b_bdev,  					bs->bh->b_blocknr); +		BUFFER_TRACE(bs->bh, "get_write_access");  		error = ext4_journal_get_write_access(handle, bs->bh);  		if (error)  			goto cleanup; @@ -859,6 +865,7 @@ inserted:  						EXT4_C2B(EXT4_SB(sb), 1));  				if (error)  					goto cleanup; +				BUFFER_TRACE(new_bh, "get_write_access");  				error = ext4_journal_get_write_access(handle,  								      new_bh);  				if (error) @@ -896,7 +903,7 @@ inserted:  			 * take i_data_sem because we will test  			 * i_delalloc_reserved_flag in ext4_mb_new_blocks  			 */ -			down_read((&EXT4_I(inode)->i_data_sem)); +			down_read(&EXT4_I(inode)->i_data_sem);  			block = ext4_new_meta_blocks(handle, inode, goal, 0,  						     NULL, &error);  			up_read((&EXT4_I(inode)->i_data_sem)); diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 2093eb72785e..3c545b48aeab 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -319,13 +319,23 @@ CLEARPAGEFLAG(Uptodate, uptodate)  extern void cancel_dirty_page(struct page *page, unsigned int account_size);  int test_clear_page_writeback(struct page *page); -int test_set_page_writeback(struct page *page); +int __test_set_page_writeback(struct page *page, bool keep_write); + +#define test_set_page_writeback(page)			\ +	__test_set_page_writeback(page, false) +#define test_set_page_writeback_keepwrite(page)	\ +	__test_set_page_writeback(page, true)  static inline void set_page_writeback(struct page *page)  {  	test_set_page_writeback(page);  } +static inline void set_page_writeback_keepwrite(struct page *page) +{ +	test_set_page_writeback_keepwrite(page); +} +  #ifdef CONFIG_PAGEFLAGS_EXTENDED  /*   * System with lots of page flags available. This allows separate diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 6a1a0245474f..d4f70a7fe876 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -36,7 +36,7 @@ struct extent_status;  #define show_map_flags(flags) __print_flags(flags, "|",			\  	{ EXT4_GET_BLOCKS_CREATE,		"CREATE" },		\ -	{ EXT4_GET_BLOCKS_UNINIT_EXT,		"UNINIT" },		\ +	{ EXT4_GET_BLOCKS_UNWRIT_EXT,		"UNWRIT" },		\  	{ EXT4_GET_BLOCKS_DELALLOC_RESERVE,	"DELALLOC" },		\  	{ EXT4_GET_BLOCKS_PRE_IO,		"PRE_IO" },		\  	{ EXT4_GET_BLOCKS_CONVERT,		"CONVERT" },		\ @@ -51,7 +51,6 @@ struct extent_status;  	{ EXT4_MAP_MAPPED,	"M" },			\  	{ EXT4_MAP_UNWRITTEN,	"U" },			\  	{ EXT4_MAP_BOUNDARY,	"B" },			\ -	{ EXT4_MAP_UNINIT,	"u" },			\  	{ EXT4_MAP_FROM_CLUSTER, "C" })  #define show_free_flags(flags) __print_flags(flags, "|",	\ @@ -1497,7 +1496,7 @@ DEFINE_EVENT(ext4__truncate, ext4_truncate_exit,  	TP_ARGS(inode)  ); -/* 'ux' is the uninitialized extent. */ +/* 'ux' is the unwritten extent. */  TRACE_EVENT(ext4_ext_convert_to_initialized_enter,  	TP_PROTO(struct inode *inode, struct ext4_map_blocks *map,  		 struct ext4_extent *ux), @@ -1533,7 +1532,7 @@ TRACE_EVENT(ext4_ext_convert_to_initialized_enter,  );  /* - * 'ux' is the uninitialized extent. + * 'ux' is the unwritten extent.   * 'ix' is the initialized extent to which blocks are transferred.   */  TRACE_EVENT(ext4_ext_convert_to_initialized_fastpath, @@ -1811,7 +1810,7 @@ DEFINE_EVENT(ext4__trim, ext4_trim_all_free,  	TP_ARGS(sb, group, start, len)  ); -TRACE_EVENT(ext4_ext_handle_uninitialized_extents, +TRACE_EVENT(ext4_ext_handle_unwritten_extents,  	TP_PROTO(struct inode *inode, struct ext4_map_blocks *map, int flags,  		 unsigned int allocated, ext4_fsblk_t newblock), diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 7d9a4ef0a078..518e2c3f4c75 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2380,7 +2380,7 @@ int test_clear_page_writeback(struct page *page)  	return ret;  } -int test_set_page_writeback(struct page *page) +int __test_set_page_writeback(struct page *page, bool keep_write)  {  	struct address_space *mapping = page_mapping(page);  	int ret; @@ -2405,9 +2405,10 @@ int test_set_page_writeback(struct page *page)  			radix_tree_tag_clear(&mapping->page_tree,  						page_index(page),  						PAGECACHE_TAG_DIRTY); -		radix_tree_tag_clear(&mapping->page_tree, -				     page_index(page), -				     PAGECACHE_TAG_TOWRITE); +		if (!keep_write) +			radix_tree_tag_clear(&mapping->page_tree, +						page_index(page), +						PAGECACHE_TAG_TOWRITE);  		spin_unlock_irqrestore(&mapping->tree_lock, flags);  	} else {  		ret = TestSetPageWriteback(page); @@ -2418,7 +2419,7 @@ int test_set_page_writeback(struct page *page)  	return ret;  } -EXPORT_SYMBOL(test_set_page_writeback); +EXPORT_SYMBOL(__test_set_page_writeback);  /*   * Return true if any of the pages in the mapping are marked with the  | 
