diff options
| author | Zheng Liu <wenqing.lz@taobao.com> | 2013-02-18 00:29:59 -0500 | 
|---|---|---|
| committer | Theodore Ts'o <tytso@mit.edu> | 2013-02-18 00:29:59 -0500 | 
| commit | d100eef2440fea13e4f09e88b1c8bcbca64beb9f (patch) | |
| tree | 2451dc4582b43a30b414c89108b75148d48c9b57 | |
| parent | f7fec032aa782d3fd7e51fbdf08aa3a296c01500 (diff) | |
ext4: lookup block mapping in extent status tree
After tracking all extent status, we already have a extent cache in
memory.  Every time we want to lookup a block mapping, we can first
try to lookup it in extent status tree to avoid a potential disk I/O.
A new function called ext4_es_lookup_extent is defined to finish this
work.  When we try to lookup a block mapping, we always call
ext4_map_blocks and/or ext4_da_map_blocks.  So in these functions we
first try to lookup a block mapping in extent status tree.
A new flag EXT4_GET_BLOCKS_NO_PUT_HOLE is used in ext4_da_map_blocks
in order not to put a hole into extent status tree because this hole
will be converted to delayed extent in the tree immediately.
Signed-off-by: Zheng Liu <wenqing.lz@taobao.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: Jan kara <jack@suse.cz>
| -rw-r--r-- | fs/ext4/ext4.h | 2 | ||||
| -rw-r--r-- | fs/ext4/extents.c | 9 | ||||
| -rw-r--r-- | fs/ext4/extents_status.c | 60 | ||||
| -rw-r--r-- | fs/ext4/extents_status.h | 2 | ||||
| -rw-r--r-- | fs/ext4/inode.c | 66 | ||||
| -rw-r--r-- | include/trace/events/ext4.h | 56 | 
6 files changed, 192 insertions, 3 deletions
| diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 5c31d6ac9500..329e7fba47d6 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -579,6 +579,8 @@ enum {  #define EXT4_GET_BLOCKS_KEEP_SIZE		0x0080  	/* Do not take i_data_sem locking in ext4_map_blocks */  #define EXT4_GET_BLOCKS_NO_LOCK			0x0100 +	/* Do not put hole in extent cache */ +#define EXT4_GET_BLOCKS_NO_PUT_HOLE		0x0200  /*   * Flags used by ext4_free_blocks diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index be0b1b3eed97..b9d7a2363736 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2167,6 +2167,9 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,  				block,  				le32_to_cpu(ex->ee_block),  				 ext4_ext_get_actual_len(ex)); +		if (!ext4_find_delalloc_range(inode, lblock, lblock + len - 1)) +			ext4_es_insert_extent(inode, lblock, len, ~0, +					      EXTENT_STATUS_HOLE);  	} else if (block >= le32_to_cpu(ex->ee_block)  			+ ext4_ext_get_actual_len(ex)) {  		ext4_lblk_t next; @@ -2180,6 +2183,9 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,  				block);  		BUG_ON(next == lblock);  		len = next - lblock; +		if (!ext4_find_delalloc_range(inode, lblock, lblock + len - 1)) +			ext4_es_insert_extent(inode, lblock, len, ~0, +					      EXTENT_STATUS_HOLE);  	} else {  		lblock = len = 0;  		BUG(); @@ -4018,7 +4024,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,  		 * put just found gap into cache to speed up  		 * subsequent requests  		 */ -		ext4_ext_put_gap_in_cache(inode, path, map->m_lblk); +		if ((flags & EXT4_GET_BLOCKS_NO_PUT_HOLE) == 0) +			ext4_ext_put_gap_in_cache(inode, path, map->m_lblk);  		goto out2;  	} diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index 76f4351ea821..eeb893122d8d 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c @@ -461,6 +461,66 @@ error:  	return err;  } +/* + * ext4_es_lookup_extent() looks up an extent in extent status tree. + * + * ext4_es_lookup_extent is called by ext4_map_blocks/ext4_da_map_blocks. + * + * Return: 1 on found, 0 on not + */ +int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk, +			  struct extent_status *es) +{ +	struct ext4_es_tree *tree; +	struct extent_status *es1 = NULL; +	struct rb_node *node; +	int found = 0; + +	trace_ext4_es_lookup_extent_enter(inode, lblk); +	es_debug("lookup extent in block %u\n", lblk); + +	tree = &EXT4_I(inode)->i_es_tree; +	read_lock(&EXT4_I(inode)->i_es_lock); + +	/* find extent in cache firstly */ +	es->es_lblk = es->es_len = es->es_pblk = 0; +	if (tree->cache_es) { +		es1 = tree->cache_es; +		if (in_range(lblk, es1->es_lblk, es1->es_len)) { +			es_debug("%u cached by [%u/%u)\n", +				 lblk, es1->es_lblk, es1->es_len); +			found = 1; +			goto out; +		} +	} + +	node = tree->root.rb_node; +	while (node) { +		es1 = rb_entry(node, struct extent_status, rb_node); +		if (lblk < es1->es_lblk) +			node = node->rb_left; +		else if (lblk > ext4_es_end(es1)) +			node = node->rb_right; +		else { +			found = 1; +			break; +		} +	} + +out: +	if (found) { +		BUG_ON(!es1); +		es->es_lblk = es1->es_lblk; +		es->es_len = es1->es_len; +		es->es_pblk = es1->es_pblk; +	} + +	read_unlock(&EXT4_I(inode)->i_es_lock); + +	trace_ext4_es_lookup_extent_exit(inode, es, found); +	return found; +} +  static int __es_remove_extent(struct ext4_es_tree *tree, ext4_lblk_t lblk,  				 ext4_lblk_t end)  { diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h index 3f69d097c6e7..8ffc90c784fa 100644 --- a/fs/ext4/extents_status.h +++ b/fs/ext4/extents_status.h @@ -53,6 +53,8 @@ extern int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,  				 ext4_lblk_t len);  extern void ext4_es_find_delayed_extent(struct inode *inode, ext4_lblk_t lblk,  					struct extent_status *es); +extern int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk, +				 struct extent_status *es);  static inline int ext4_es_is_written(struct extent_status *es)  { diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 576b586b61aa..95a0c62c5683 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -507,12 +507,33 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx,  int ext4_map_blocks(handle_t *handle, struct inode *inode,  		    struct ext4_map_blocks *map, int flags)  { +	struct extent_status es;  	int retval;  	map->m_flags = 0;  	ext_debug("ext4_map_blocks(): inode %lu, flag %d, max_blocks %u,"  		  "logical block %lu\n", inode->i_ino, flags, map->m_len,  		  (unsigned long) map->m_lblk); + +	/* Lookup extent status tree firstly */ +	if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) { +		if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) { +			map->m_pblk = ext4_es_pblock(&es) + +					map->m_lblk - es.es_lblk; +			map->m_flags |= ext4_es_is_written(&es) ? +					EXT4_MAP_MAPPED : EXT4_MAP_UNWRITTEN; +			retval = es.es_len - (map->m_lblk - es.es_lblk); +			if (retval > map->m_len) +				retval = map->m_len; +			map->m_len = retval; +		} else if (ext4_es_is_delayed(&es) || ext4_es_is_hole(&es)) { +			retval = 0; +		} else { +			BUG_ON(1); +		} +		goto found; +	} +  	/*  	 * Try to see if we can get the block without requesting a new  	 * file system block. @@ -544,6 +565,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,  	if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))  		up_read((&EXT4_I(inode)->i_data_sem)); +found:  	if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {  		int ret = check_block_validity(inode, map);  		if (ret != 0) @@ -1743,6 +1765,7 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,  			      struct ext4_map_blocks *map,  			      struct buffer_head *bh)  { +	struct extent_status es;  	int retval;  	sector_t invalid_block = ~((sector_t) 0xffff); @@ -1753,6 +1776,42 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,  	ext_debug("ext4_da_map_blocks(): inode %lu, max_blocks %u,"  		  "logical block %lu\n", inode->i_ino, map->m_len,  		  (unsigned long) map->m_lblk); + +	/* Lookup extent status tree firstly */ +	if (ext4_es_lookup_extent(inode, iblock, &es)) { + +		if (ext4_es_is_hole(&es)) { +			retval = 0; +			down_read((&EXT4_I(inode)->i_data_sem)); +			goto add_delayed; +		} + +		/* +		 * Delayed extent could be allocated by fallocate. +		 * So we need to check it. +		 */ +		if (ext4_es_is_delayed(&es) && !ext4_es_is_unwritten(&es)) { +			map_bh(bh, inode->i_sb, invalid_block); +			set_buffer_new(bh); +			set_buffer_delay(bh); +			return 0; +		} + +		map->m_pblk = ext4_es_pblock(&es) + iblock - es.es_lblk; +		retval = es.es_len - (iblock - es.es_lblk); +		if (retval > map->m_len) +			retval = map->m_len; +		map->m_len = retval; +		if (ext4_es_is_written(&es)) +			map->m_flags |= EXT4_MAP_MAPPED; +		else if (ext4_es_is_unwritten(&es)) +			map->m_flags |= EXT4_MAP_UNWRITTEN; +		else +			BUG_ON(1); + +		return retval; +	} +  	/*  	 * Try to see if we can get the block without requesting a new  	 * file system block. @@ -1771,10 +1830,13 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,  			map->m_flags |= EXT4_MAP_FROM_CLUSTER;  		retval = 0;  	} else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) -		retval = ext4_ext_map_blocks(NULL, inode, map, 0); +		retval = ext4_ext_map_blocks(NULL, inode, map, +					     EXT4_GET_BLOCKS_NO_PUT_HOLE);  	else -		retval = ext4_ind_map_blocks(NULL, inode, map, 0); +		retval = ext4_ind_map_blocks(NULL, inode, map, +					     EXT4_GET_BLOCKS_NO_PUT_HOLE); +add_delayed:  	if (retval == 0) {  		int ret;  		/* diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index c121cdf55ab3..1e590b68cec4 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -2199,6 +2199,62 @@ TRACE_EVENT(ext4_es_find_delayed_extent_exit,  		  __entry->pblk, __entry->status)  ); +TRACE_EVENT(ext4_es_lookup_extent_enter, +	TP_PROTO(struct inode *inode, ext4_lblk_t lblk), + +	TP_ARGS(inode, lblk), + +	TP_STRUCT__entry( +		__field(	dev_t,		dev		) +		__field(	ino_t,		ino		) +		__field(	ext4_lblk_t,	lblk		) +	), + +	TP_fast_assign( +		__entry->dev	= inode->i_sb->s_dev; +		__entry->ino	= inode->i_ino; +		__entry->lblk	= lblk; +	), + +	TP_printk("dev %d,%d ino %lu lblk %u", +		  MAJOR(__entry->dev), MINOR(__entry->dev), +		  (unsigned long) __entry->ino, __entry->lblk) +); + +TRACE_EVENT(ext4_es_lookup_extent_exit, +	TP_PROTO(struct inode *inode, struct extent_status *es, +		 int found), + +	TP_ARGS(inode, es, found), + +	TP_STRUCT__entry( +		__field(	dev_t,		dev		) +		__field(	ino_t,		ino		) +		__field(	ext4_lblk_t,	lblk		) +		__field(	ext4_lblk_t,	len		) +		__field(	ext4_fsblk_t,	pblk		) +		__field(	unsigned long long,	status	) +		__field(	int,		found		) +	), + +	TP_fast_assign( +		__entry->dev	= inode->i_sb->s_dev; +		__entry->ino	= inode->i_ino; +		__entry->lblk	= es->es_lblk; +		__entry->len	= es->es_len; +		__entry->pblk	= ext4_es_pblock(es); +		__entry->status	= ext4_es_status(es); +		__entry->found	= found; +	), + +	TP_printk("dev %d,%d ino %lu found %d [%u/%u) %llu %llx", +		  MAJOR(__entry->dev), MINOR(__entry->dev), +		  (unsigned long) __entry->ino, __entry->found, +		  __entry->lblk, __entry->len, +		  __entry->found ? __entry->pblk : 0, +		  __entry->found ? __entry->status : 0) +); +  #endif /* _TRACE_EXT4_H */  /* This part must be outside protection */ | 
