diff options
| -rw-r--r-- | Documentation/filesystems/f2fs.txt | 5 | ||||
| -rw-r--r-- | fs/f2fs/acl.c | 6 | ||||
| -rw-r--r-- | fs/f2fs/checkpoint.c | 178 | ||||
| -rw-r--r-- | fs/f2fs/data.c | 59 | ||||
| -rw-r--r-- | fs/f2fs/debug.c | 19 | ||||
| -rw-r--r-- | fs/f2fs/dir.c | 87 | ||||
| -rw-r--r-- | fs/f2fs/f2fs.h | 50 | ||||
| -rw-r--r-- | fs/f2fs/file.c | 45 | ||||
| -rw-r--r-- | fs/f2fs/gc.c | 7 | ||||
| -rw-r--r-- | fs/f2fs/hash.c | 4 | ||||
| -rw-r--r-- | fs/f2fs/inline.c | 1 | ||||
| -rw-r--r-- | fs/f2fs/inode.c | 12 | ||||
| -rw-r--r-- | fs/f2fs/namei.c | 246 | ||||
| -rw-r--r-- | fs/f2fs/node.c | 273 | ||||
| -rw-r--r-- | fs/f2fs/node.h | 7 | ||||
| -rw-r--r-- | fs/f2fs/recovery.c | 22 | ||||
| -rw-r--r-- | fs/f2fs/segment.c | 38 | ||||
| -rw-r--r-- | fs/f2fs/segment.h | 8 | ||||
| -rw-r--r-- | fs/f2fs/super.c | 21 | ||||
| -rw-r--r-- | include/trace/events/f2fs.h | 87 | 
20 files changed, 883 insertions, 292 deletions
| diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index 51afba17bbae..a2046a7d0a9d 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -126,6 +126,11 @@ flush_merge	       Merge concurrent cache_flush commands as much as possible                         to eliminate redundant command issues. If the underlying  		       device handles the cache_flush command relatively slowly,  		       recommend to enable this option. +nobarrier              This option can be used if underlying storage guarantees +                       its cached data should be written to the novolatile area. +		       If this option is set, no cache_flush commands are issued +		       but f2fs still guarantees the write ordering of all the +		       data writes.  ================================================================================  DEBUGFS ENTRIES diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index dbe2141d10ad..83b9b5a8d112 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -203,12 +203,6 @@ static int __f2fs_set_acl(struct inode *inode, int type,  	size_t size = 0;  	int error; -	if (acl) { -		error = posix_acl_valid(acl); -		if (error < 0) -			return error; -	} -  	switch (type) {  	case ACL_TYPE_ACCESS:  		name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS; diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 0b4710c1d370..6aeed5bada52 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -22,7 +22,7 @@  #include "segment.h"  #include <trace/events/f2fs.h> -static struct kmem_cache *orphan_entry_slab; +static struct kmem_cache *ino_entry_slab;  static struct kmem_cache *inode_entry_slab;  /* @@ -282,72 +282,120 @@ const struct address_space_operations f2fs_meta_aops = {  	.set_page_dirty	= f2fs_set_meta_page_dirty,  }; +static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) +{ +	struct ino_entry *e; +retry: +	spin_lock(&sbi->ino_lock[type]); + +	e = radix_tree_lookup(&sbi->ino_root[type], ino); +	if (!e) { +		e = kmem_cache_alloc(ino_entry_slab, GFP_ATOMIC); +		if (!e) { +			spin_unlock(&sbi->ino_lock[type]); +			goto retry; +		} +		if (radix_tree_insert(&sbi->ino_root[type], ino, e)) { +			spin_unlock(&sbi->ino_lock[type]); +			kmem_cache_free(ino_entry_slab, e); +			goto retry; +		} +		memset(e, 0, sizeof(struct ino_entry)); +		e->ino = ino; + +		list_add_tail(&e->list, &sbi->ino_list[type]); +	} +	spin_unlock(&sbi->ino_lock[type]); +} + +static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) +{ +	struct ino_entry *e; + +	spin_lock(&sbi->ino_lock[type]); +	e = radix_tree_lookup(&sbi->ino_root[type], ino); +	if (e) { +		list_del(&e->list); +		radix_tree_delete(&sbi->ino_root[type], ino); +		if (type == ORPHAN_INO) +			sbi->n_orphans--; +		spin_unlock(&sbi->ino_lock[type]); +		kmem_cache_free(ino_entry_slab, e); +		return; +	} +	spin_unlock(&sbi->ino_lock[type]); +} + +void add_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type) +{ +	/* add new dirty ino entry into list */ +	__add_ino_entry(sbi, ino, type); +} + +void remove_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type) +{ +	/* remove dirty ino entry from list */ +	__remove_ino_entry(sbi, ino, type); +} + +/* mode should be APPEND_INO or UPDATE_INO */ +bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode) +{ +	struct ino_entry *e; +	spin_lock(&sbi->ino_lock[mode]); +	e = radix_tree_lookup(&sbi->ino_root[mode], ino); +	spin_unlock(&sbi->ino_lock[mode]); +	return e ? true : false; +} + +static void release_dirty_inode(struct f2fs_sb_info *sbi) +{ +	struct ino_entry *e, *tmp; +	int i; + +	for (i = APPEND_INO; i <= UPDATE_INO; i++) { +		spin_lock(&sbi->ino_lock[i]); +		list_for_each_entry_safe(e, tmp, &sbi->ino_list[i], list) { +			list_del(&e->list); +			radix_tree_delete(&sbi->ino_root[i], e->ino); +			kmem_cache_free(ino_entry_slab, e); +		} +		spin_unlock(&sbi->ino_lock[i]); +	} +} +  int acquire_orphan_inode(struct f2fs_sb_info *sbi)  {  	int err = 0; -	spin_lock(&sbi->orphan_inode_lock); +	spin_lock(&sbi->ino_lock[ORPHAN_INO]);  	if (unlikely(sbi->n_orphans >= sbi->max_orphans))  		err = -ENOSPC;  	else  		sbi->n_orphans++; -	spin_unlock(&sbi->orphan_inode_lock); +	spin_unlock(&sbi->ino_lock[ORPHAN_INO]);  	return err;  }  void release_orphan_inode(struct f2fs_sb_info *sbi)  { -	spin_lock(&sbi->orphan_inode_lock); +	spin_lock(&sbi->ino_lock[ORPHAN_INO]);  	f2fs_bug_on(sbi->n_orphans == 0);  	sbi->n_orphans--; -	spin_unlock(&sbi->orphan_inode_lock); +	spin_unlock(&sbi->ino_lock[ORPHAN_INO]);  }  void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)  { -	struct list_head *head; -	struct orphan_inode_entry *new, *orphan; - -	new = f2fs_kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC); -	new->ino = ino; - -	spin_lock(&sbi->orphan_inode_lock); -	head = &sbi->orphan_inode_list; -	list_for_each_entry(orphan, head, list) { -		if (orphan->ino == ino) { -			spin_unlock(&sbi->orphan_inode_lock); -			kmem_cache_free(orphan_entry_slab, new); -			return; -		} - -		if (orphan->ino > ino) -			break; -	} - -	/* add new orphan entry into list which is sorted by inode number */ -	list_add_tail(&new->list, &orphan->list); -	spin_unlock(&sbi->orphan_inode_lock); +	/* add new orphan ino entry into list */ +	__add_ino_entry(sbi, ino, ORPHAN_INO);  }  void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)  { -	struct list_head *head; -	struct orphan_inode_entry *orphan; - -	spin_lock(&sbi->orphan_inode_lock); -	head = &sbi->orphan_inode_list; -	list_for_each_entry(orphan, head, list) { -		if (orphan->ino == ino) { -			list_del(&orphan->list); -			f2fs_bug_on(sbi->n_orphans == 0); -			sbi->n_orphans--; -			spin_unlock(&sbi->orphan_inode_lock); -			kmem_cache_free(orphan_entry_slab, orphan); -			return; -		} -	} -	spin_unlock(&sbi->orphan_inode_lock); +	/* remove orphan entry from orphan list */ +	__remove_ino_entry(sbi, ino, ORPHAN_INO);  }  static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) @@ -401,14 +449,14 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)  	unsigned short orphan_blocks = (unsigned short)((sbi->n_orphans +  		(F2FS_ORPHANS_PER_BLOCK - 1)) / F2FS_ORPHANS_PER_BLOCK);  	struct page *page = NULL; -	struct orphan_inode_entry *orphan = NULL; +	struct ino_entry *orphan = NULL;  	for (index = 0; index < orphan_blocks; index++)  		grab_meta_page(sbi, start_blk + index);  	index = 1; -	spin_lock(&sbi->orphan_inode_lock); -	head = &sbi->orphan_inode_list; +	spin_lock(&sbi->ino_lock[ORPHAN_INO]); +	head = &sbi->ino_list[ORPHAN_INO];  	/* loop for each orphan inode entry and write them in Jornal block */  	list_for_each_entry(orphan, head, list) { @@ -448,7 +496,7 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)  		f2fs_put_page(page, 1);  	} -	spin_unlock(&sbi->orphan_inode_lock); +	spin_unlock(&sbi->ino_lock[ORPHAN_INO]);  }  static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, @@ -714,10 +762,10 @@ retry_flush_dents:  	 * until finishing nat/sit flush.  	 */  retry_flush_nodes: -	mutex_lock(&sbi->node_write); +	down_write(&sbi->node_write);  	if (get_pages(sbi, F2FS_DIRTY_NODES)) { -		mutex_unlock(&sbi->node_write); +		up_write(&sbi->node_write);  		sync_node_pages(sbi, 0, &wbc);  		goto retry_flush_nodes;  	} @@ -726,7 +774,7 @@ retry_flush_nodes:  static void unblock_operations(struct f2fs_sb_info *sbi)  { -	mutex_unlock(&sbi->node_write); +	up_write(&sbi->node_write);  	f2fs_unlock_all(sbi);  } @@ -748,6 +796,7 @@ static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)  static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)  {  	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); +	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);  	nid_t last_nid = 0;  	block_t start_blk;  	struct page *cp_page; @@ -761,7 +810,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)  	 * This avoids to conduct wrong roll-forward operations and uses  	 * metapages, so should be called prior to sync_meta_pages below.  	 */ -	discard_next_dnode(sbi); +	discard_next_dnode(sbi, NEXT_FREE_BLKADDR(sbi, curseg));  	/* Flush all the NAT/SIT pages */  	while (get_pages(sbi, F2FS_DIRTY_META)) @@ -885,8 +934,9 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)  	/* Here, we only have one bio having CP pack */  	sync_meta_pages(sbi, META_FLUSH, LONG_MAX); -	if (unlikely(!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG))) { +	if (!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) {  		clear_prefree_segments(sbi); +		release_dirty_inode(sbi);  		F2FS_RESET_SB_DIRT(sbi);  	}  } @@ -932,31 +982,37 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)  	trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish checkpoint");  } -void init_orphan_info(struct f2fs_sb_info *sbi) +void init_ino_entry_info(struct f2fs_sb_info *sbi)  { -	spin_lock_init(&sbi->orphan_inode_lock); -	INIT_LIST_HEAD(&sbi->orphan_inode_list); -	sbi->n_orphans = 0; +	int i; + +	for (i = 0; i < MAX_INO_ENTRY; i++) { +		INIT_RADIX_TREE(&sbi->ino_root[i], GFP_ATOMIC); +		spin_lock_init(&sbi->ino_lock[i]); +		INIT_LIST_HEAD(&sbi->ino_list[i]); +	} +  	/*  	 * considering 512 blocks in a segment 8 blocks are needed for cp  	 * and log segment summaries. Remaining blocks are used to keep  	 * orphan entries with the limitation one reserved segment  	 * for cp pack we can have max 1020*504 orphan entries  	 */ +	sbi->n_orphans = 0;  	sbi->max_orphans = (sbi->blocks_per_seg - 2 - NR_CURSEG_TYPE)  				* F2FS_ORPHANS_PER_BLOCK;  }  int __init create_checkpoint_caches(void)  { -	orphan_entry_slab = f2fs_kmem_cache_create("f2fs_orphan_entry", -			sizeof(struct orphan_inode_entry)); -	if (!orphan_entry_slab) +	ino_entry_slab = f2fs_kmem_cache_create("f2fs_ino_entry", +			sizeof(struct ino_entry)); +	if (!ino_entry_slab)  		return -ENOMEM;  	inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry",  			sizeof(struct dir_inode_entry));  	if (!inode_entry_slab) { -		kmem_cache_destroy(orphan_entry_slab); +		kmem_cache_destroy(ino_entry_slab);  		return -ENOMEM;  	}  	return 0; @@ -964,6 +1020,6 @@ int __init create_checkpoint_caches(void)  void destroy_checkpoint_caches(void)  { -	kmem_cache_destroy(orphan_entry_slab); +	kmem_cache_destroy(ino_entry_slab);  	kmem_cache_destroy(inode_entry_slab);  } diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index f8cf619edb5f..03313099c51c 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -139,7 +139,10 @@ void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,  	/* change META to META_FLUSH in the checkpoint procedure */  	if (type >= META_FLUSH) {  		io->fio.type = META_FLUSH; -		io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO; +		if (test_opt(sbi, NOBARRIER)) +			io->fio.rw = WRITE_FLUSH | REQ_META | REQ_PRIO; +		else +			io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO;  	}  	__submit_merged_bio(io);  	up_write(&io->io_rwsem); @@ -626,8 +629,10 @@ static int __get_data_block(struct inode *inode, sector_t iblock,  	if (check_extent_cache(inode, pgofs, bh_result))  		goto out; -	if (create) +	if (create) { +		f2fs_balance_fs(sbi);  		f2fs_lock_op(sbi); +	}  	/* When reading holes, we need its node page */  	set_new_dnode(&dn, inode, NULL, NULL, 0); @@ -784,9 +789,11 @@ int do_write_data_page(struct page *page, struct f2fs_io_info *fio)  			!is_cold_data(page) &&  			need_inplace_update(inode))) {  		rewrite_data_page(page, old_blkaddr, fio); +		set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE);  	} else {  		write_data_page(page, &dn, &new_blkaddr, fio);  		update_extent_cache(new_blkaddr, &dn); +		set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE);  	}  out_writepage:  	f2fs_put_dnode(&dn); @@ -914,6 +921,16 @@ skip_write:  	return 0;  } +static void f2fs_write_failed(struct address_space *mapping, loff_t to) +{ +	struct inode *inode = mapping->host; + +	if (to > inode->i_size) { +		truncate_pagecache(inode, inode->i_size); +		truncate_blocks(inode, inode->i_size); +	} +} +  static int f2fs_write_begin(struct file *file, struct address_space *mapping,  		loff_t pos, unsigned len, unsigned flags,  		struct page **pagep, void **fsdata) @@ -931,11 +948,13 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,  repeat:  	err = f2fs_convert_inline_data(inode, pos + len);  	if (err) -		return err; +		goto fail;  	page = grab_cache_page_write_begin(mapping, index, flags); -	if (!page) -		return -ENOMEM; +	if (!page) { +		err = -ENOMEM; +		goto fail; +	}  	/* to avoid latency during memory pressure */  	unlock_page(page); @@ -949,10 +968,9 @@ repeat:  	set_new_dnode(&dn, inode, NULL, NULL, 0);  	err = f2fs_reserve_block(&dn, index);  	f2fs_unlock_op(sbi); -  	if (err) {  		f2fs_put_page(page, 0); -		return err; +		goto fail;  	}  inline_data:  	lock_page(page); @@ -982,19 +1000,20 @@ inline_data:  			err = f2fs_read_inline_data(inode, page);  			if (err) {  				page_cache_release(page); -				return err; +				goto fail;  			}  		} else {  			err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr,  							READ_SYNC);  			if (err) -				return err; +				goto fail;  		}  		lock_page(page);  		if (unlikely(!PageUptodate(page))) {  			f2fs_put_page(page, 1); -			return -EIO; +			err = -EIO; +			goto fail;  		}  		if (unlikely(page->mapping != mapping)) {  			f2fs_put_page(page, 1); @@ -1005,6 +1024,9 @@ out:  	SetPageUptodate(page);  	clear_cold_data(page);  	return 0; +fail: +	f2fs_write_failed(mapping, pos + len); +	return err;  }  static int f2fs_write_end(struct file *file, @@ -1016,7 +1038,6 @@ static int f2fs_write_end(struct file *file,  	trace_f2fs_write_end(inode, pos, len, copied); -	SetPageUptodate(page);  	set_page_dirty(page);  	if (pos + copied > i_size_read(inode)) { @@ -1050,7 +1071,10 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,  		struct iov_iter *iter, loff_t offset)  {  	struct file *file = iocb->ki_filp; -	struct inode *inode = file->f_mapping->host; +	struct address_space *mapping = file->f_mapping; +	struct inode *inode = mapping->host; +	size_t count = iov_iter_count(iter); +	int err;  	/* Let buffer I/O handle the inline data case. */  	if (f2fs_has_inline_data(inode)) @@ -1062,8 +1086,15 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,  	/* clear fsync mark to recover these blocks */  	fsync_mark_clear(F2FS_SB(inode->i_sb), inode->i_ino); -	return blockdev_direct_IO(rw, iocb, inode, iter, offset, -				  get_data_block); +	trace_f2fs_direct_IO_enter(inode, offset, count, rw); + +	err = blockdev_direct_IO(rw, iocb, inode, iter, offset, get_data_block); +	if (err < 0 && (rw & WRITE)) +		f2fs_write_failed(mapping, offset + count); + +	trace_f2fs_direct_IO_exit(inode, offset, count, rw, err); + +	return err;  }  static void f2fs_invalidate_data_page(struct page *page, unsigned int offset, diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index b52c12cf5873..a441ba33be11 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -167,7 +167,7 @@ get_cache:  	si->cache_mem += npages << PAGE_CACHE_SHIFT;  	npages = META_MAPPING(sbi)->nrpages;  	si->cache_mem += npages << PAGE_CACHE_SHIFT; -	si->cache_mem += sbi->n_orphans * sizeof(struct orphan_inode_entry); +	si->cache_mem += sbi->n_orphans * sizeof(struct ino_entry);  	si->cache_mem += sbi->n_dirty_dirs * sizeof(struct dir_inode_entry);  } @@ -345,21 +345,14 @@ void __init f2fs_create_root_stats(void)  	f2fs_debugfs_root = debugfs_create_dir("f2fs", NULL);  	if (!f2fs_debugfs_root) -		goto bail; +		return;  	file = debugfs_create_file("status", S_IRUGO, f2fs_debugfs_root,  			NULL, &stat_fops); -	if (!file) -		goto free_debugfs_dir; - -	return; - -free_debugfs_dir: -	debugfs_remove(f2fs_debugfs_root); - -bail: -	f2fs_debugfs_root = NULL; -	return; +	if (!file) { +		debugfs_remove(f2fs_debugfs_root); +		f2fs_debugfs_root = NULL; +	}  }  void f2fs_destroy_root_stats(void) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index a4addd72ebbd..bcf893c3d903 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -77,8 +77,8 @@ static unsigned long dir_block_index(unsigned int level,  	return bidx;  } -static bool early_match_name(const char *name, size_t namelen, -			f2fs_hash_t namehash, struct f2fs_dir_entry *de) +static bool early_match_name(size_t namelen, f2fs_hash_t namehash, +				struct f2fs_dir_entry *de)  {  	if (le16_to_cpu(de->name_len) != namelen)  		return false; @@ -90,7 +90,7 @@ static bool early_match_name(const char *name, size_t namelen,  }  static struct f2fs_dir_entry *find_in_block(struct page *dentry_page, -			const char *name, size_t namelen, int *max_slots, +			struct qstr *name, int *max_slots,  			f2fs_hash_t namehash, struct page **res_page)  {  	struct f2fs_dir_entry *de; @@ -109,9 +109,10 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,  			continue;  		}  		de = &dentry_blk->dentry[bit_pos]; -		if (early_match_name(name, namelen, namehash, de)) { +		if (early_match_name(name->len, namehash, de)) {  			if (!memcmp(dentry_blk->filename[bit_pos], -							name, namelen)) { +							name->name, +							name->len)) {  				*res_page = dentry_page;  				goto found;  			} @@ -120,6 +121,13 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,  			*max_slots = max_len;  			max_len = 0;  		} + +		/* +		 * For the most part, it should be a bug when name_len is zero. +		 * We stop here for figuring out where the bugs are occurred. +		 */ +		f2fs_bug_on(!de->name_len); +  		bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));  	} @@ -132,10 +140,10 @@ found:  }  static struct f2fs_dir_entry *find_in_level(struct inode *dir, -		unsigned int level, const char *name, size_t namelen, +			unsigned int level, struct qstr *name,  			f2fs_hash_t namehash, struct page **res_page)  { -	int s = GET_DENTRY_SLOTS(namelen); +	int s = GET_DENTRY_SLOTS(name->len);  	unsigned int nbucket, nblock;  	unsigned int bidx, end_block;  	struct page *dentry_page; @@ -160,8 +168,8 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,  			continue;  		} -		de = find_in_block(dentry_page, name, namelen, -					&max_slots, namehash, res_page); +		de = find_in_block(dentry_page, name, &max_slots, +					namehash, res_page);  		if (de)  			break; @@ -187,8 +195,6 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,  struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,  			struct qstr *child, struct page **res_page)  { -	const char *name = child->name; -	size_t namelen = child->len;  	unsigned long npages = dir_blocks(dir);  	struct f2fs_dir_entry *de = NULL;  	f2fs_hash_t name_hash; @@ -200,12 +206,11 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,  	*res_page = NULL; -	name_hash = f2fs_dentry_hash(name, namelen); +	name_hash = f2fs_dentry_hash(child);  	max_depth = F2FS_I(dir)->i_current_depth;  	for (level = 0; level < max_depth; level++) { -		de = find_in_level(dir, level, name, -				namelen, name_hash, res_page); +		de = find_in_level(dir, level, child, name_hash, res_page);  		if (de)  			break;  	} @@ -298,14 +303,13 @@ static int make_empty_dir(struct inode *inode,  	struct page *dentry_page;  	struct f2fs_dentry_block *dentry_blk;  	struct f2fs_dir_entry *de; -	void *kaddr;  	dentry_page = get_new_data_page(inode, page, 0, true);  	if (IS_ERR(dentry_page))  		return PTR_ERR(dentry_page); -	kaddr = kmap_atomic(dentry_page); -	dentry_blk = (struct f2fs_dentry_block *)kaddr; + +	dentry_blk = kmap_atomic(dentry_page);  	de = &dentry_blk->dentry[0];  	de->name_len = cpu_to_le16(1); @@ -323,7 +327,7 @@ static int make_empty_dir(struct inode *inode,  	test_and_set_bit_le(0, &dentry_blk->dentry_bitmap);  	test_and_set_bit_le(1, &dentry_blk->dentry_bitmap); -	kunmap_atomic(kaddr); +	kunmap_atomic(dentry_blk);  	set_page_dirty(dentry_page);  	f2fs_put_page(dentry_page, 1); @@ -333,11 +337,12 @@ static int make_empty_dir(struct inode *inode,  static struct page *init_inode_metadata(struct inode *inode,  		struct inode *dir, const struct qstr *name)  { +	struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);  	struct page *page;  	int err;  	if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) { -		page = new_inode_page(inode, name); +		page = new_inode_page(inode);  		if (IS_ERR(page))  			return page; @@ -362,7 +367,8 @@ static struct page *init_inode_metadata(struct inode *inode,  		set_cold_node(inode, page);  	} -	init_dent_inode(name, page); +	if (name) +		init_dent_inode(name, page);  	/*  	 * This file should be checkpointed during fsync. @@ -370,6 +376,12 @@ static struct page *init_inode_metadata(struct inode *inode,  	 */  	if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) {  		file_lost_pino(inode); +		/* +		 * If link the tmpfile to alias through linkat path, +		 * we should remove this inode from orphan list. +		 */ +		if (inode->i_nlink == 0) +			remove_orphan_inode(sbi, inode->i_ino);  		inc_nlink(inode);  	}  	return page; @@ -453,7 +465,7 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name,  	int err = 0;  	int i; -	dentry_hash = f2fs_dentry_hash(name->name, name->len); +	dentry_hash = f2fs_dentry_hash(name);  	level = 0;  	current_depth = F2FS_I(dir)->i_current_depth;  	if (F2FS_I(dir)->chash == dentry_hash) { @@ -529,6 +541,27 @@ fail:  	return err;  } +int f2fs_do_tmpfile(struct inode *inode, struct inode *dir) +{ +	struct page *page; +	int err = 0; + +	down_write(&F2FS_I(inode)->i_sem); +	page = init_inode_metadata(inode, dir, NULL); +	if (IS_ERR(page)) { +		err = PTR_ERR(page); +		goto fail; +	} +	/* we don't need to mark_inode_dirty now */ +	update_inode(inode, page); +	f2fs_put_page(page, 1); + +	clear_inode_flag(F2FS_I(inode), FI_NEW_INODE); +fail: +	up_write(&F2FS_I(inode)->i_sem); +	return err; +} +  /*   * It only removes the dentry from the dentry page,corresponding name   * entry in name page does not need to be touched during deletion. @@ -541,14 +574,13 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,  	struct address_space *mapping = page->mapping;  	struct inode *dir = mapping->host;  	int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len)); -	void *kaddr = page_address(page);  	int i;  	lock_page(page);  	f2fs_wait_on_page_writeback(page, DATA); -	dentry_blk = (struct f2fs_dentry_block *)kaddr; -	bit_pos = dentry - (struct f2fs_dir_entry *)dentry_blk->dentry; +	dentry_blk = page_address(page); +	bit_pos = dentry - dentry_blk->dentry;  	for (i = 0; i < slots; i++)  		test_and_clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap); @@ -603,7 +635,6 @@ bool f2fs_empty_dir(struct inode *dir)  	unsigned long nblock = dir_blocks(dir);  	for (bidx = 0; bidx < nblock; bidx++) { -		void *kaddr;  		dentry_page = get_lock_data_page(dir, bidx);  		if (IS_ERR(dentry_page)) {  			if (PTR_ERR(dentry_page) == -ENOENT) @@ -612,8 +643,8 @@ bool f2fs_empty_dir(struct inode *dir)  				return false;  		} -		kaddr = kmap_atomic(dentry_page); -		dentry_blk = (struct f2fs_dentry_block *)kaddr; + +		dentry_blk = kmap_atomic(dentry_page);  		if (bidx == 0)  			bit_pos = 2;  		else @@ -621,7 +652,7 @@ bool f2fs_empty_dir(struct inode *dir)  		bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,  						NR_DENTRY_IN_BLOCK,  						bit_pos); -		kunmap_atomic(kaddr); +		kunmap_atomic(dentry_blk);  		f2fs_put_page(dentry_page, 1); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 58df97e174d0..4dab5338a97a 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -41,6 +41,7 @@  #define F2FS_MOUNT_INLINE_XATTR		0x00000080  #define F2FS_MOUNT_INLINE_DATA		0x00000100  #define F2FS_MOUNT_FLUSH_MERGE		0x00000200 +#define F2FS_MOUNT_NOBARRIER		0x00000400  #define clear_opt(sbi, option)	(sbi->mount_opt.opt &= ~F2FS_MOUNT_##option)  #define set_opt(sbi, option)	(sbi->mount_opt.opt |= F2FS_MOUNT_##option) @@ -99,8 +100,15 @@ enum {  	META_SSA  }; -/* for the list of orphan inodes */ -struct orphan_inode_entry { +/* for the list of ino */ +enum { +	ORPHAN_INO,		/* for orphan ino list */ +	APPEND_INO,		/* for append ino list */ +	UPDATE_INO,		/* for update ino list */ +	MAX_INO_ENTRY,		/* max. list */ +}; + +struct ino_entry {  	struct list_head list;	/* list head */  	nid_t ino;		/* inode number */  }; @@ -256,6 +264,8 @@ struct f2fs_nm_info {  	unsigned int nat_cnt;		/* the # of cached nat entries */  	struct list_head nat_entries;	/* cached nat entry list (clean) */  	struct list_head dirty_nat_entries; /* cached nat entry list (dirty) */ +	struct list_head nat_entry_set;	/* nat entry set list */ +	unsigned int dirty_nat_cnt;	/* total num of nat entries in set */  	/* free node ids management */  	struct radix_tree_root free_nid_root;/* root of the free_nid cache */ @@ -442,14 +452,17 @@ struct f2fs_sb_info {  	struct inode *meta_inode;		/* cache meta blocks */  	struct mutex cp_mutex;			/* checkpoint procedure lock */  	struct rw_semaphore cp_rwsem;		/* blocking FS operations */ -	struct mutex node_write;		/* locking node writes */ +	struct rw_semaphore node_write;		/* locking node writes */  	struct mutex writepages;		/* mutex for writepages() */  	bool por_doing;				/* recovery is doing or not */  	wait_queue_head_t cp_wait; -	/* for orphan inode management */ -	struct list_head orphan_inode_list;	/* orphan inode list */ -	spinlock_t orphan_inode_lock;		/* for orphan inode list */ +	/* for inode management */ +	struct radix_tree_root ino_root[MAX_INO_ENTRY];	/* ino entry array */ +	spinlock_t ino_lock[MAX_INO_ENTRY];		/* for ino entry lock */ +	struct list_head ino_list[MAX_INO_ENTRY];	/* inode list head */ + +	/* for orphan inode, use 0'th array */  	unsigned int n_orphans;			/* # of orphan inodes */  	unsigned int max_orphans;		/* max orphan inodes */ @@ -768,7 +781,7 @@ static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag)  		if (flag == NAT_BITMAP)  			return &ckpt->sit_nat_version_bitmap;  		else -			return ((unsigned char *)ckpt + F2FS_BLKSIZE); +			return (unsigned char *)ckpt + F2FS_BLKSIZE;  	} else {  		offset = (flag == NAT_BITMAP) ?  			le32_to_cpu(ckpt->sit_ver_bitmap_bytesize) : 0; @@ -983,11 +996,15 @@ enum {  	FI_NO_EXTENT,		/* not to use the extent cache */  	FI_INLINE_XATTR,	/* used for inline xattr */  	FI_INLINE_DATA,		/* used for inline data*/ +	FI_APPEND_WRITE,	/* inode has appended data */ +	FI_UPDATE_WRITE,	/* inode has in-place-update data */ +	FI_NEED_IPU,		/* used fo ipu for fdatasync */  };  static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag)  { -	set_bit(flag, &fi->flags); +	if (!test_bit(flag, &fi->flags)) +		set_bit(flag, &fi->flags);  }  static inline int is_inode_flag_set(struct f2fs_inode_info *fi, int flag) @@ -997,7 +1014,8 @@ static inline int is_inode_flag_set(struct f2fs_inode_info *fi, int flag)  static inline void clear_inode_flag(struct f2fs_inode_info *fi, int flag)  { -	clear_bit(flag, &fi->flags); +	if (test_bit(flag, &fi->flags)) +		clear_bit(flag, &fi->flags);  }  static inline void set_acl_inode(struct f2fs_inode_info *fi, umode_t mode) @@ -1136,6 +1154,7 @@ void f2fs_set_link(struct inode *, struct f2fs_dir_entry *,  int update_dent_inode(struct inode *, const struct qstr *);  int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *);  void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *); +int f2fs_do_tmpfile(struct inode *, struct inode *);  int f2fs_make_empty(struct inode *, struct inode *);  bool f2fs_empty_dir(struct inode *); @@ -1155,7 +1174,7 @@ void f2fs_msg(struct super_block *, const char *, const char *, ...);  /*   * hash.c   */ -f2fs_hash_t f2fs_dentry_hash(const char *, size_t); +f2fs_hash_t f2fs_dentry_hash(const struct qstr *);  /*   * node.c @@ -1173,7 +1192,7 @@ int truncate_inode_blocks(struct inode *, pgoff_t);  int truncate_xattr_node(struct inode *, struct page *);  int wait_on_node_pages_writeback(struct f2fs_sb_info *, nid_t);  void remove_inode_page(struct inode *); -struct page *new_inode_page(struct inode *, const struct qstr *); +struct page *new_inode_page(struct inode *);  struct page *new_node_page(struct dnode_of_data *, unsigned int, struct page *);  void ra_node_page(struct f2fs_sb_info *, nid_t);  struct page *get_node_page(struct f2fs_sb_info *, pgoff_t); @@ -1185,6 +1204,7 @@ void alloc_nid_done(struct f2fs_sb_info *, nid_t);  void alloc_nid_failed(struct f2fs_sb_info *, nid_t);  void recover_node_page(struct f2fs_sb_info *, struct page *,  		struct f2fs_summary *, struct node_info *, block_t); +void recover_inline_xattr(struct inode *, struct page *);  bool recover_xattr_data(struct inode *, struct page *, block_t);  int recover_inode_page(struct f2fs_sb_info *, struct page *);  int restore_node_summary(struct f2fs_sb_info *, unsigned int, @@ -1206,7 +1226,7 @@ void destroy_flush_cmd_control(struct f2fs_sb_info *);  void invalidate_blocks(struct f2fs_sb_info *, block_t);  void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t);  void clear_prefree_segments(struct f2fs_sb_info *); -void discard_next_dnode(struct f2fs_sb_info *); +void discard_next_dnode(struct f2fs_sb_info *, block_t);  int npages_for_summary_flush(struct f2fs_sb_info *);  void allocate_new_segments(struct f2fs_sb_info *);  struct page *get_sum_page(struct f2fs_sb_info *, unsigned int); @@ -1240,6 +1260,9 @@ struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t);  struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t);  int ra_meta_pages(struct f2fs_sb_info *, int, int, int);  long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long); +void add_dirty_inode(struct f2fs_sb_info *, nid_t, int type); +void remove_dirty_inode(struct f2fs_sb_info *, nid_t, int type); +bool exist_written_data(struct f2fs_sb_info *, nid_t, int);  int acquire_orphan_inode(struct f2fs_sb_info *);  void release_orphan_inode(struct f2fs_sb_info *);  void add_orphan_inode(struct f2fs_sb_info *, nid_t); @@ -1251,7 +1274,7 @@ void add_dirty_dir_inode(struct inode *);  void remove_dirty_dir_inode(struct inode *);  void sync_dirty_dir_inodes(struct f2fs_sb_info *);  void write_checkpoint(struct f2fs_sb_info *, bool); -void init_orphan_info(struct f2fs_sb_info *); +void init_ino_entry_info(struct f2fs_sb_info *);  int __init create_checkpoint_caches(void);  void destroy_checkpoint_caches(void); @@ -1295,7 +1318,6 @@ bool space_for_roll_forward(struct f2fs_sb_info *);  struct f2fs_stat_info {  	struct list_head stat_list;  	struct f2fs_sb_info *sbi; -	struct mutex stat_lock;  	int all_area_segs, sit_area_segs, nat_area_segs, ssa_area_segs;  	int main_area_segs, main_area_sections, main_area_zones;  	int hit_ext, total_ext; diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 7d8b96275092..208f1a9bd569 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -127,12 +127,30 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)  		return 0;  	trace_f2fs_sync_file_enter(inode); + +	/* if fdatasync is triggered, let's do in-place-update */ +	if (datasync) +		set_inode_flag(fi, FI_NEED_IPU); +  	ret = filemap_write_and_wait_range(inode->i_mapping, start, end); +	if (datasync) +		clear_inode_flag(fi, FI_NEED_IPU);  	if (ret) {  		trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);  		return ret;  	} +	/* +	 * if there is no written data, don't waste time to write recovery info. +	 */ +	if (!is_inode_flag_set(fi, FI_APPEND_WRITE) && +		!exist_written_data(sbi, inode->i_ino, APPEND_INO)) { +		if (is_inode_flag_set(fi, FI_UPDATE_WRITE) || +			exist_written_data(sbi, inode->i_ino, UPDATE_INO)) +			goto flush_out; +		goto out; +	} +  	/* guarantee free sections for fsync */  	f2fs_balance_fs(sbi); @@ -188,6 +206,13 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)  		ret = wait_on_node_pages_writeback(sbi, inode->i_ino);  		if (ret)  			goto out; + +		/* once recovery info is written, don't need to tack this */ +		remove_dirty_inode(sbi, inode->i_ino, APPEND_INO); +		clear_inode_flag(fi, FI_APPEND_WRITE); +flush_out: +		remove_dirty_inode(sbi, inode->i_ino, UPDATE_INO); +		clear_inode_flag(fi, FI_UPDATE_WRITE);  		ret = f2fs_issue_flush(F2FS_SB(inode->i_sb));  	}  out: @@ -206,8 +231,9 @@ static pgoff_t __get_first_dirty_index(struct address_space *mapping,  	/* find first dirty page index */  	pagevec_init(&pvec, 0); -	nr_pages = pagevec_lookup_tag(&pvec, mapping, &pgofs, PAGECACHE_TAG_DIRTY, 1); -	pgofs = nr_pages ? pvec.pages[0]->index: LONG_MAX; +	nr_pages = pagevec_lookup_tag(&pvec, mapping, &pgofs, +					PAGECACHE_TAG_DIRTY, 1); +	pgofs = nr_pages ? pvec.pages[0]->index : LONG_MAX;  	pagevec_release(&pvec);  	return pgofs;  } @@ -272,8 +298,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)  			}  		} -		end_offset = IS_INODE(dn.node_page) ? -			ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK; +		end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));  		/* find data/hole in dnode block */  		for (; dn.ofs_in_node < end_offset; @@ -380,13 +405,15 @@ static void truncate_partial_data_page(struct inode *inode, u64 from)  		return;  	lock_page(page); -	if (unlikely(page->mapping != inode->i_mapping)) { -		f2fs_put_page(page, 1); -		return; -	} +	if (unlikely(!PageUptodate(page) || +			page->mapping != inode->i_mapping)) +		goto out; +  	f2fs_wait_on_page_writeback(page, DATA);  	zero_user(page, offset, PAGE_CACHE_SIZE - offset);  	set_page_dirty(page); + +out:  	f2fs_put_page(page, 1);  } @@ -645,6 +672,8 @@ static int expand_inode_data(struct inode *inode, loff_t offset,  	loff_t off_start, off_end;  	int ret = 0; +	f2fs_balance_fs(sbi); +  	ret = inode_newsize_ok(inode, (len + offset));  	if (ret)  		return ret; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index b90dbe55403a..d7947d90ccc3 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -186,7 +186,6 @@ static unsigned int get_max_cost(struct f2fs_sb_info *sbi,  static unsigned int check_bg_victims(struct f2fs_sb_info *sbi)  {  	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); -	unsigned int hint = 0;  	unsigned int secno;  	/* @@ -194,11 +193,9 @@ static unsigned int check_bg_victims(struct f2fs_sb_info *sbi)  	 * selected by background GC before.  	 * Those segments guarantee they have small valid blocks.  	 */ -next: -	secno = find_next_bit(dirty_i->victim_secmap, TOTAL_SECS(sbi), hint++); -	if (secno < TOTAL_SECS(sbi)) { +	for_each_set_bit(secno, dirty_i->victim_secmap, TOTAL_SECS(sbi)) {  		if (sec_usage_check(sbi, secno)) -			goto next; +			continue;  		clear_bit(secno, dirty_i->victim_secmap);  		return secno * sbi->segs_per_sec;  	} diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c index 6eb8d269b53b..948d17bf7281 100644 --- a/fs/f2fs/hash.c +++ b/fs/f2fs/hash.c @@ -69,12 +69,14 @@ static void str2hashbuf(const char *msg, size_t len, unsigned int *buf, int num)  		*buf++ = pad;  } -f2fs_hash_t f2fs_dentry_hash(const char *name, size_t len) +f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info)  {  	__u32 hash;  	f2fs_hash_t f2fs_hash;  	const char *p;  	__u32 in[8], buf[4]; +	const char *name = name_info->name; +	size_t len = name_info->len;  	if ((len <= 2) && (name[0] == '.') &&  		(name[1] == '.' || name[1] == '\0')) diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 1bba5228c197..5beeccef9ae1 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -172,6 +172,7 @@ int f2fs_write_inline_data(struct inode *inode,  		stat_inc_inline_inode(inode);  	} +	set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE);  	sync_inode_page(&dn);  	f2fs_put_dnode(&dn); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 2cf6962f6cc8..2c39999f3868 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -267,13 +267,14 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)  void f2fs_evict_inode(struct inode *inode)  {  	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); +	nid_t xnid = F2FS_I(inode)->i_xattr_nid;  	trace_f2fs_evict_inode(inode);  	truncate_inode_pages_final(&inode->i_data);  	if (inode->i_ino == F2FS_NODE_INO(sbi) ||  			inode->i_ino == F2FS_META_INO(sbi)) -		goto no_delete; +		goto out_clear;  	f2fs_bug_on(get_dirty_dents(inode));  	remove_dirty_dir_inode(inode); @@ -295,6 +296,13 @@ void f2fs_evict_inode(struct inode *inode)  	sb_end_intwrite(inode->i_sb);  no_delete: -	clear_inode(inode);  	invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino); +	if (xnid) +		invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid); +	if (is_inode_flag_set(F2FS_I(inode), FI_APPEND_WRITE)) +		add_dirty_inode(sbi, inode->i_ino, APPEND_INO); +	if (is_inode_flag_set(F2FS_I(inode), FI_UPDATE_WRITE)) +		add_dirty_inode(sbi, inode->i_ino, UPDATE_INO); +out_clear: +	clear_inode(inode);  } diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index a6bdddc33ce2..27b03776ffd2 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -13,6 +13,7 @@  #include <linux/pagemap.h>  #include <linux/sched.h>  #include <linux/ctype.h> +#include <linux/dcache.h>  #include "f2fs.h"  #include "node.h" @@ -22,14 +23,13 @@  static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)  { -	struct super_block *sb = dir->i_sb; -	struct f2fs_sb_info *sbi = F2FS_SB(sb); +	struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);  	nid_t ino;  	struct inode *inode;  	bool nid_free = false;  	int err; -	inode = new_inode(sb); +	inode = new_inode(dir->i_sb);  	if (!inode)  		return ERR_PTR(-ENOMEM); @@ -102,8 +102,7 @@ static inline void set_cold_files(struct f2fs_sb_info *sbi, struct inode *inode,  static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,  						bool excl)  { -	struct super_block *sb = dir->i_sb; -	struct f2fs_sb_info *sbi = F2FS_SB(sb); +	struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);  	struct inode *inode;  	nid_t ino = 0;  	int err; @@ -146,8 +145,7 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,  		struct dentry *dentry)  {  	struct inode *inode = old_dentry->d_inode; -	struct super_block *sb = dir->i_sb; -	struct f2fs_sb_info *sbi = F2FS_SB(sb); +	struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);  	int err;  	f2fs_balance_fs(sbi); @@ -207,8 +205,7 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,  static int f2fs_unlink(struct inode *dir, struct dentry *dentry)  { -	struct super_block *sb = dir->i_sb; -	struct f2fs_sb_info *sbi = F2FS_SB(sb); +	struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);  	struct inode *inode = dentry->d_inode;  	struct f2fs_dir_entry *de;  	struct page *page; @@ -242,8 +239,7 @@ fail:  static int f2fs_symlink(struct inode *dir, struct dentry *dentry,  					const char *symname)  { -	struct super_block *sb = dir->i_sb; -	struct f2fs_sb_info *sbi = F2FS_SB(sb); +	struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);  	struct inode *inode;  	size_t symlen = strlen(symname) + 1;  	int err; @@ -330,8 +326,7 @@ static int f2fs_rmdir(struct inode *dir, struct dentry *dentry)  static int f2fs_mknod(struct inode *dir, struct dentry *dentry,  				umode_t mode, dev_t rdev)  { -	struct super_block *sb = dir->i_sb; -	struct f2fs_sb_info *sbi = F2FS_SB(sb); +	struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);  	struct inode *inode;  	int err = 0; @@ -369,8 +364,7 @@ out:  static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,  			struct inode *new_dir, struct dentry *new_dentry)  { -	struct super_block *sb = old_dir->i_sb; -	struct f2fs_sb_info *sbi = F2FS_SB(sb); +	struct f2fs_sb_info *sbi = F2FS_SB(old_dir->i_sb);  	struct inode *old_inode = old_dentry->d_inode;  	struct inode *new_inode = new_dentry->d_inode;  	struct page *old_dir_page; @@ -393,8 +387,6 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,  			goto out_old;  	} -	f2fs_lock_op(sbi); -  	if (new_inode) {  		err = -ENOTEMPTY; @@ -407,6 +399,8 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,  		if (!new_entry)  			goto out_dir; +		f2fs_lock_op(sbi); +  		err = acquire_orphan_inode(sbi);  		if (err)  			goto put_out_dir; @@ -435,9 +429,13 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,  		update_inode_page(old_inode);  		update_inode_page(new_inode);  	} else { +		f2fs_lock_op(sbi); +  		err = f2fs_add_link(new_dentry, old_inode); -		if (err) +		if (err) { +			f2fs_unlock_op(sbi);  			goto out_dir; +		}  		if (old_dir_entry) {  			inc_nlink(new_dir); @@ -472,6 +470,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,  	return 0;  put_out_dir: +	f2fs_unlock_op(sbi);  	kunmap(new_page);  	f2fs_put_page(new_page, 0);  out_dir: @@ -479,7 +478,151 @@ out_dir:  		kunmap(old_dir_page);  		f2fs_put_page(old_dir_page, 0);  	} +out_old: +	kunmap(old_page); +	f2fs_put_page(old_page, 0); +out: +	return err; +} + +static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, +			     struct inode *new_dir, struct dentry *new_dentry) +{ +	struct super_block *sb = old_dir->i_sb; +	struct f2fs_sb_info *sbi = F2FS_SB(sb); +	struct inode *old_inode = old_dentry->d_inode; +	struct inode *new_inode = new_dentry->d_inode; +	struct page *old_dir_page, *new_dir_page; +	struct page *old_page, *new_page; +	struct f2fs_dir_entry *old_dir_entry = NULL, *new_dir_entry = NULL; +	struct f2fs_dir_entry *old_entry, *new_entry; +	int old_nlink = 0, new_nlink = 0; +	int err = -ENOENT; + +	f2fs_balance_fs(sbi); + +	old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page); +	if (!old_entry) +		goto out; + +	new_entry = f2fs_find_entry(new_dir, &new_dentry->d_name, &new_page); +	if (!new_entry) +		goto out_old; + +	/* prepare for updating ".." directory entry info later */ +	if (old_dir != new_dir) { +		if (S_ISDIR(old_inode->i_mode)) { +			err = -EIO; +			old_dir_entry = f2fs_parent_dir(old_inode, +							&old_dir_page); +			if (!old_dir_entry) +				goto out_new; +		} + +		if (S_ISDIR(new_inode->i_mode)) { +			err = -EIO; +			new_dir_entry = f2fs_parent_dir(new_inode, +							&new_dir_page); +			if (!new_dir_entry) +				goto out_old_dir; +		} +	} + +	/* +	 * If cross rename between file and directory those are not +	 * in the same directory, we will inc nlink of file's parent +	 * later, so we should check upper boundary of its nlink. +	 */ +	if ((!old_dir_entry || !new_dir_entry) && +				old_dir_entry != new_dir_entry) { +		old_nlink = old_dir_entry ? -1 : 1; +		new_nlink = -old_nlink; +		err = -EMLINK; +		if ((old_nlink > 0 && old_inode->i_nlink >= F2FS_LINK_MAX) || +			(new_nlink > 0 && new_inode->i_nlink >= F2FS_LINK_MAX)) +			goto out_new_dir; +	} + +	f2fs_lock_op(sbi); + +	err = update_dent_inode(old_inode, &new_dentry->d_name); +	if (err) +		goto out_unlock; + +	err = update_dent_inode(new_inode, &old_dentry->d_name); +	if (err) +		goto out_undo; + +	/* update ".." directory entry info of old dentry */ +	if (old_dir_entry) +		f2fs_set_link(old_inode, old_dir_entry, old_dir_page, new_dir); + +	/* update ".." directory entry info of new dentry */ +	if (new_dir_entry) +		f2fs_set_link(new_inode, new_dir_entry, new_dir_page, old_dir); + +	/* update directory entry info of old dir inode */ +	f2fs_set_link(old_dir, old_entry, old_page, new_inode); + +	down_write(&F2FS_I(old_inode)->i_sem); +	file_lost_pino(old_inode); +	up_write(&F2FS_I(old_inode)->i_sem); + +	update_inode_page(old_inode); + +	old_dir->i_ctime = CURRENT_TIME; +	if (old_nlink) { +		down_write(&F2FS_I(old_dir)->i_sem); +		if (old_nlink < 0) +			drop_nlink(old_dir); +		else +			inc_nlink(old_dir); +		up_write(&F2FS_I(old_dir)->i_sem); +	} +	mark_inode_dirty(old_dir); +	update_inode_page(old_dir); + +	/* update directory entry info of new dir inode */ +	f2fs_set_link(new_dir, new_entry, new_page, old_inode); + +	down_write(&F2FS_I(new_inode)->i_sem); +	file_lost_pino(new_inode); +	up_write(&F2FS_I(new_inode)->i_sem); + +	update_inode_page(new_inode); + +	new_dir->i_ctime = CURRENT_TIME; +	if (new_nlink) { +		down_write(&F2FS_I(new_dir)->i_sem); +		if (new_nlink < 0) +			drop_nlink(new_dir); +		else +			inc_nlink(new_dir); +		up_write(&F2FS_I(new_dir)->i_sem); +	} +	mark_inode_dirty(new_dir); +	update_inode_page(new_dir); + +	f2fs_unlock_op(sbi); +	return 0; +out_undo: +	/* Still we may fail to recover name info of f2fs_inode here */ +	update_dent_inode(old_inode, &old_dentry->d_name); +out_unlock:  	f2fs_unlock_op(sbi); +out_new_dir: +	if (new_dir_entry) { +		kunmap(new_dir_page); +		f2fs_put_page(new_dir_page, 0); +	} +out_old_dir: +	if (old_dir_entry) { +		kunmap(old_dir_page); +		f2fs_put_page(old_dir_page, 0); +	} +out_new: +	kunmap(new_page); +	f2fs_put_page(new_page, 0);  out_old:  	kunmap(old_page);  	f2fs_put_page(old_page, 0); @@ -487,6 +630,71 @@ out:  	return err;  } +static int f2fs_rename2(struct inode *old_dir, struct dentry *old_dentry, +			struct inode *new_dir, struct dentry *new_dentry, +			unsigned int flags) +{ +	if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) +		return -EINVAL; + +	if (flags & RENAME_EXCHANGE) { +		return f2fs_cross_rename(old_dir, old_dentry, +					 new_dir, new_dentry); +	} +	/* +	 * VFS has already handled the new dentry existence case, +	 * here, we just deal with "RENAME_NOREPLACE" as regular rename. +	 */ +	return f2fs_rename(old_dir, old_dentry, new_dir, new_dentry); +} + +static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) +{ +	struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); +	struct inode *inode; +	int err; + +	inode = f2fs_new_inode(dir, mode); +	if (IS_ERR(inode)) +		return PTR_ERR(inode); + +	inode->i_op = &f2fs_file_inode_operations; +	inode->i_fop = &f2fs_file_operations; +	inode->i_mapping->a_ops = &f2fs_dblock_aops; + +	f2fs_lock_op(sbi); +	err = acquire_orphan_inode(sbi); +	if (err) +		goto out; + +	err = f2fs_do_tmpfile(inode, dir); +	if (err) +		goto release_out; + +	/* +	 * add this non-linked tmpfile to orphan list, in this way we could +	 * remove all unused data of tmpfile after abnormal power-off. +	 */ +	add_orphan_inode(sbi, inode->i_ino); +	f2fs_unlock_op(sbi); + +	alloc_nid_done(sbi, inode->i_ino); +	d_tmpfile(dentry, inode); +	unlock_new_inode(inode); +	return 0; + +release_out: +	release_orphan_inode(sbi); +out: +	f2fs_unlock_op(sbi); +	clear_nlink(inode); +	unlock_new_inode(inode); +	make_bad_inode(inode); +	iput(inode); +	alloc_nid_failed(sbi, inode->i_ino); +	return err; +} +  const struct inode_operations f2fs_dir_inode_operations = {  	.create		= f2fs_create,  	.lookup		= f2fs_lookup, @@ -497,6 +705,8 @@ const struct inode_operations f2fs_dir_inode_operations = {  	.rmdir		= f2fs_rmdir,  	.mknod		= f2fs_mknod,  	.rename		= f2fs_rename, +	.rename2	= f2fs_rename2, +	.tmpfile	= f2fs_tmpfile,  	.getattr	= f2fs_getattr,  	.setattr	= f2fs_setattr,  	.get_acl	= f2fs_get_acl, diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 4b697ccc9b0c..d3d90d284631 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -25,6 +25,7 @@  static struct kmem_cache *nat_entry_slab;  static struct kmem_cache *free_nid_slab; +static struct kmem_cache *nat_entry_set_slab;  bool available_free_memory(struct f2fs_sb_info *sbi, int type)  { @@ -90,12 +91,8 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)  	/* get current nat block page with lock */  	src_page = get_meta_page(sbi, src_off); - -	/* Dirty src_page means that it is already the new target NAT page. */ -	if (PageDirty(src_page)) -		return src_page; -  	dst_page = grab_meta_page(sbi, dst_off); +	f2fs_bug_on(PageDirty(src_page));  	src_addr = page_address(src_page);  	dst_addr = page_address(dst_page); @@ -845,7 +842,7 @@ void remove_inode_page(struct inode *inode)  	truncate_node(&dn);  } -struct page *new_inode_page(struct inode *inode, const struct qstr *name) +struct page *new_inode_page(struct inode *inode)  {  	struct dnode_of_data dn; @@ -1234,12 +1231,12 @@ static int f2fs_write_node_page(struct page *page,  	if (wbc->for_reclaim)  		goto redirty_out; -	mutex_lock(&sbi->node_write); +	down_read(&sbi->node_write);  	set_page_writeback(page);  	write_node_page(sbi, page, &fio, nid, ni.blk_addr, &new_addr);  	set_node_addr(sbi, &ni, new_addr, is_fsync_dnode(page));  	dec_page_count(sbi, F2FS_DIRTY_NODES); -	mutex_unlock(&sbi->node_write); +	up_read(&sbi->node_write);  	unlock_page(page);  	return 0; @@ -1552,7 +1549,7 @@ void recover_node_page(struct f2fs_sb_info *sbi, struct page *page,  	clear_node_page_dirty(page);  } -static void recover_inline_xattr(struct inode *inode, struct page *page) +void recover_inline_xattr(struct inode *inode, struct page *page)  {  	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);  	void *src_addr, *dst_addr; @@ -1591,8 +1588,6 @@ bool recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr)  	nid_t new_xnid = nid_of_node(page);  	struct node_info ni; -	recover_inline_xattr(inode, page); -  	if (!f2fs_has_xattr_block(ofs_of_node(page)))  		return false; @@ -1744,7 +1739,90 @@ skip:  	return err;  } -static bool flush_nats_in_journal(struct f2fs_sb_info *sbi) +static struct nat_entry_set *grab_nat_entry_set(void) +{ +	struct nat_entry_set *nes = +			f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_ATOMIC); + +	nes->entry_cnt = 0; +	INIT_LIST_HEAD(&nes->set_list); +	INIT_LIST_HEAD(&nes->entry_list); +	return nes; +} + +static void release_nat_entry_set(struct nat_entry_set *nes, +						struct f2fs_nm_info *nm_i) +{ +	f2fs_bug_on(!list_empty(&nes->entry_list)); + +	nm_i->dirty_nat_cnt -= nes->entry_cnt; +	list_del(&nes->set_list); +	kmem_cache_free(nat_entry_set_slab, nes); +} + +static void adjust_nat_entry_set(struct nat_entry_set *nes, +						struct list_head *head) +{ +	struct nat_entry_set *next = nes; + +	if (list_is_last(&nes->set_list, head)) +		return; + +	list_for_each_entry_continue(next, head, set_list) +		if (nes->entry_cnt <= next->entry_cnt) +			break; + +	list_move_tail(&nes->set_list, &next->set_list); +} + +static void add_nat_entry(struct nat_entry *ne, struct list_head *head) +{ +	struct nat_entry_set *nes; +	nid_t start_nid = START_NID(ne->ni.nid); + +	list_for_each_entry(nes, head, set_list) { +		if (nes->start_nid == start_nid) { +			list_move_tail(&ne->list, &nes->entry_list); +			nes->entry_cnt++; +			adjust_nat_entry_set(nes, head); +			return; +		} +	} + +	nes = grab_nat_entry_set(); + +	nes->start_nid = start_nid; +	list_move_tail(&ne->list, &nes->entry_list); +	nes->entry_cnt++; +	list_add(&nes->set_list, head); +} + +static void merge_nats_in_set(struct f2fs_sb_info *sbi) +{ +	struct f2fs_nm_info *nm_i = NM_I(sbi); +	struct list_head *dirty_list = &nm_i->dirty_nat_entries; +	struct list_head *set_list = &nm_i->nat_entry_set; +	struct nat_entry *ne, *tmp; + +	write_lock(&nm_i->nat_tree_lock); +	list_for_each_entry_safe(ne, tmp, dirty_list, list) { +		if (nat_get_blkaddr(ne) == NEW_ADDR) +			continue; +		add_nat_entry(ne, set_list); +		nm_i->dirty_nat_cnt++; +	} +	write_unlock(&nm_i->nat_tree_lock); +} + +static bool __has_cursum_space(struct f2fs_summary_block *sum, int size) +{ +	if (nats_in_cursum(sum) + size <= NAT_JOURNAL_ENTRIES) +		return true; +	else +		return false; +} + +static void remove_nats_in_journal(struct f2fs_sb_info *sbi)  {  	struct f2fs_nm_info *nm_i = NM_I(sbi);  	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); @@ -1752,12 +1830,6 @@ static bool flush_nats_in_journal(struct f2fs_sb_info *sbi)  	int i;  	mutex_lock(&curseg->curseg_mutex); - -	if (nats_in_cursum(sum) < NAT_JOURNAL_ENTRIES) { -		mutex_unlock(&curseg->curseg_mutex); -		return false; -	} -  	for (i = 0; i < nats_in_cursum(sum); i++) {  		struct nat_entry *ne;  		struct f2fs_nat_entry raw_ne; @@ -1767,23 +1839,21 @@ static bool flush_nats_in_journal(struct f2fs_sb_info *sbi)  retry:  		write_lock(&nm_i->nat_tree_lock);  		ne = __lookup_nat_cache(nm_i, nid); -		if (ne) { -			__set_nat_cache_dirty(nm_i, ne); -			write_unlock(&nm_i->nat_tree_lock); -			continue; -		} +		if (ne) +			goto found; +  		ne = grab_nat_entry(nm_i, nid);  		if (!ne) {  			write_unlock(&nm_i->nat_tree_lock);  			goto retry;  		}  		node_info_from_raw_nat(&ne->ni, &raw_ne); +found:  		__set_nat_cache_dirty(nm_i, ne);  		write_unlock(&nm_i->nat_tree_lock);  	}  	update_nats_in_cursum(sum, -i);  	mutex_unlock(&curseg->curseg_mutex); -	return true;  }  /* @@ -1794,80 +1864,91 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)  	struct f2fs_nm_info *nm_i = NM_I(sbi);  	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);  	struct f2fs_summary_block *sum = curseg->sum_blk; -	struct nat_entry *ne, *cur; -	struct page *page = NULL; -	struct f2fs_nat_block *nat_blk = NULL; -	nid_t start_nid = 0, end_nid = 0; -	bool flushed; +	struct nat_entry_set *nes, *tmp; +	struct list_head *head = &nm_i->nat_entry_set; +	bool to_journal = true; -	flushed = flush_nats_in_journal(sbi); - -	if (!flushed) -		mutex_lock(&curseg->curseg_mutex); - -	/* 1) flush dirty nat caches */ -	list_for_each_entry_safe(ne, cur, &nm_i->dirty_nat_entries, list) { -		nid_t nid; -		struct f2fs_nat_entry raw_ne; -		int offset = -1; - -		if (nat_get_blkaddr(ne) == NEW_ADDR) -			continue; +	/* merge nat entries of dirty list to nat entry set temporarily */ +	merge_nats_in_set(sbi); -		nid = nat_get_nid(ne); +	/* +	 * if there are no enough space in journal to store dirty nat +	 * entries, remove all entries from journal and merge them +	 * into nat entry set. +	 */ +	if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt)) { +		remove_nats_in_journal(sbi); -		if (flushed) -			goto to_nat_page; +		/* +		 * merge nat entries of dirty list to nat entry set temporarily +		 */ +		merge_nats_in_set(sbi); +	} -		/* if there is room for nat enries in curseg->sumpage */ -		offset = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 1); -		if (offset >= 0) { -			raw_ne = nat_in_journal(sum, offset); -			goto flush_now; -		} -to_nat_page: -		if (!page || (start_nid > nid || nid > end_nid)) { -			if (page) { -				f2fs_put_page(page, 1); -				page = NULL; -			} -			start_nid = START_NID(nid); -			end_nid = start_nid + NAT_ENTRY_PER_BLOCK - 1; +	if (!nm_i->dirty_nat_cnt) +		return; -			/* -			 * get nat block with dirty flag, increased reference -			 * count, mapped and lock -			 */ +	/* +	 * there are two steps to flush nat entries: +	 * #1, flush nat entries to journal in current hot data summary block. +	 * #2, flush nat entries to nat page. +	 */ +	list_for_each_entry_safe(nes, tmp, head, set_list) { +		struct f2fs_nat_block *nat_blk; +		struct nat_entry *ne, *cur; +		struct page *page; +		nid_t start_nid = nes->start_nid; + +		if (to_journal && !__has_cursum_space(sum, nes->entry_cnt)) +			to_journal = false; + +		if (to_journal) { +			mutex_lock(&curseg->curseg_mutex); +		} else {  			page = get_next_nat_page(sbi, start_nid);  			nat_blk = page_address(page); +			f2fs_bug_on(!nat_blk);  		} -		f2fs_bug_on(!nat_blk); -		raw_ne = nat_blk->entries[nid - start_nid]; -flush_now: -		raw_nat_from_node_info(&raw_ne, &ne->ni); - -		if (offset < 0) { -			nat_blk->entries[nid - start_nid] = raw_ne; -		} else { -			nat_in_journal(sum, offset) = raw_ne; -			nid_in_journal(sum, offset) = cpu_to_le32(nid); -		} +		/* flush dirty nats in nat entry set */ +		list_for_each_entry_safe(ne, cur, &nes->entry_list, list) { +			struct f2fs_nat_entry *raw_ne; +			nid_t nid = nat_get_nid(ne); +			int offset; + +			if (to_journal) { +				offset = lookup_journal_in_cursum(sum, +							NAT_JOURNAL, nid, 1); +				f2fs_bug_on(offset < 0); +				raw_ne = &nat_in_journal(sum, offset); +				nid_in_journal(sum, offset) = cpu_to_le32(nid); +			} else { +				raw_ne = &nat_blk->entries[nid - start_nid]; +			} +			raw_nat_from_node_info(raw_ne, &ne->ni); -		if (nat_get_blkaddr(ne) == NULL_ADDR && +			if (nat_get_blkaddr(ne) == NULL_ADDR &&  				add_free_nid(sbi, nid, false) <= 0) { -			write_lock(&nm_i->nat_tree_lock); -			__del_from_nat_cache(nm_i, ne); -			write_unlock(&nm_i->nat_tree_lock); -		} else { -			write_lock(&nm_i->nat_tree_lock); -			__clear_nat_cache_dirty(nm_i, ne); -			write_unlock(&nm_i->nat_tree_lock); +				write_lock(&nm_i->nat_tree_lock); +				__del_from_nat_cache(nm_i, ne); +				write_unlock(&nm_i->nat_tree_lock); +			} else { +				write_lock(&nm_i->nat_tree_lock); +				__clear_nat_cache_dirty(nm_i, ne); +				write_unlock(&nm_i->nat_tree_lock); +			}  		} + +		if (to_journal) +			mutex_unlock(&curseg->curseg_mutex); +		else +			f2fs_put_page(page, 1); + +		release_nat_entry_set(nes, nm_i);  	} -	if (!flushed) -		mutex_unlock(&curseg->curseg_mutex); -	f2fs_put_page(page, 1); + +	f2fs_bug_on(!list_empty(head)); +	f2fs_bug_on(nm_i->dirty_nat_cnt);  }  static int init_node_manager(struct f2fs_sb_info *sbi) @@ -1896,6 +1977,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi)  	INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC);  	INIT_LIST_HEAD(&nm_i->nat_entries);  	INIT_LIST_HEAD(&nm_i->dirty_nat_entries); +	INIT_LIST_HEAD(&nm_i->nat_entry_set);  	mutex_init(&nm_i->build_lock);  	spin_lock_init(&nm_i->free_nid_list_lock); @@ -1976,19 +2058,30 @@ int __init create_node_manager_caches(void)  	nat_entry_slab = f2fs_kmem_cache_create("nat_entry",  			sizeof(struct nat_entry));  	if (!nat_entry_slab) -		return -ENOMEM; +		goto fail;  	free_nid_slab = f2fs_kmem_cache_create("free_nid",  			sizeof(struct free_nid)); -	if (!free_nid_slab) { -		kmem_cache_destroy(nat_entry_slab); -		return -ENOMEM; -	} +	if (!free_nid_slab) +		goto destory_nat_entry; + +	nat_entry_set_slab = f2fs_kmem_cache_create("nat_entry_set", +			sizeof(struct nat_entry_set)); +	if (!nat_entry_set_slab) +		goto destory_free_nid;  	return 0; + +destory_free_nid: +	kmem_cache_destroy(free_nid_slab); +destory_nat_entry: +	kmem_cache_destroy(nat_entry_slab); +fail: +	return -ENOMEM;  }  void destroy_node_manager_caches(void)  { +	kmem_cache_destroy(nat_entry_set_slab);  	kmem_cache_destroy(free_nid_slab);  	kmem_cache_destroy(nat_entry_slab);  } diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 7281112cd1c8..8a116a407599 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -89,6 +89,13 @@ enum mem_type {  	DIRTY_DENTS	/* indicates dirty dentry pages */  }; +struct nat_entry_set { +	struct list_head set_list;	/* link with all nat sets */ +	struct list_head entry_list;	/* link with dirty nat entries */ +	nid_t start_nid;		/* start nid of nats in set */ +	unsigned int entry_cnt;		/* the # of nat entries in set */ +}; +  /*   * For free nid mangement   */ diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index a112368a4a86..fe1c6d921ba2 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -300,6 +300,8 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,  	struct node_info ni;  	int err = 0, recovered = 0; +	recover_inline_xattr(inode, page); +  	if (recover_inline_data(inode, page))  		goto out; @@ -434,7 +436,9 @@ next:  int recover_fsync_data(struct f2fs_sb_info *sbi)  { +	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);  	struct list_head inode_list; +	block_t blkaddr;  	int err;  	bool need_writecp = false; @@ -447,6 +451,9 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)  	/* step #1: find fsynced inode numbers */  	sbi->por_doing = true; + +	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); +  	err = find_fsync_dnodes(sbi, &inode_list);  	if (err)  		goto out; @@ -462,8 +469,21 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)  out:  	destroy_fsync_dnodes(&inode_list);  	kmem_cache_destroy(fsync_entry_slab); + +	if (err) { +		truncate_inode_pages_final(NODE_MAPPING(sbi)); +		truncate_inode_pages_final(META_MAPPING(sbi)); +	} +  	sbi->por_doing = false; -	if (!err && need_writecp) +	if (err) { +		discard_next_dnode(sbi, blkaddr); + +		/* Flush all the NAT/SIT pages */ +		while (get_pages(sbi, F2FS_DIRTY_META)) +			sync_meta_pages(sbi, META, LONG_MAX); +	} else if (need_writecp) {  		write_checkpoint(sbi, false); +	}  	return err;  } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index d04613df710a..0dfeebae2a50 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -239,6 +239,12 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi)  	struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;  	struct flush_cmd cmd; +	trace_f2fs_issue_flush(sbi->sb, test_opt(sbi, NOBARRIER), +					test_opt(sbi, FLUSH_MERGE)); + +	if (test_opt(sbi, NOBARRIER)) +		return 0; +  	if (!test_opt(sbi, FLUSH_MERGE))  		return blkdev_issue_flush(sbi->sb->s_bdev, GFP_KERNEL, NULL); @@ -272,13 +278,13 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi)  		return -ENOMEM;  	spin_lock_init(&fcc->issue_lock);  	init_waitqueue_head(&fcc->flush_wait_queue); -	sbi->sm_info->cmd_control_info = fcc; +	SM_I(sbi)->cmd_control_info = fcc;  	fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,  				"f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));  	if (IS_ERR(fcc->f2fs_issue_flush)) {  		err = PTR_ERR(fcc->f2fs_issue_flush);  		kfree(fcc); -		sbi->sm_info->cmd_control_info = NULL; +		SM_I(sbi)->cmd_control_info = NULL;  		return err;  	} @@ -287,13 +293,12 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi)  void destroy_flush_cmd_control(struct f2fs_sb_info *sbi)  { -	struct flush_cmd_control *fcc = -				sbi->sm_info->cmd_control_info; +	struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;  	if (fcc && fcc->f2fs_issue_flush)  		kthread_stop(fcc->f2fs_issue_flush);  	kfree(fcc); -	sbi->sm_info->cmd_control_info = NULL; +	SM_I(sbi)->cmd_control_info = NULL;  }  static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, @@ -377,11 +382,8 @@ static int f2fs_issue_discard(struct f2fs_sb_info *sbi,  	return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0);  } -void discard_next_dnode(struct f2fs_sb_info *sbi) +void discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr)  { -	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); -	block_t blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); -  	if (f2fs_issue_discard(sbi, blkaddr, 1)) {  		struct page *page = grab_meta_page(sbi, blkaddr);  		/* zero-filled page */ @@ -437,17 +439,12 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi,  static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)  {  	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); -	unsigned int segno = -1; +	unsigned int segno;  	unsigned int total_segs = TOTAL_SEGS(sbi);  	mutex_lock(&dirty_i->seglist_lock); -	while (1) { -		segno = find_next_bit(dirty_i->dirty_segmap[PRE], total_segs, -				segno + 1); -		if (segno >= total_segs) -			break; +	for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], total_segs)  		__set_test_and_free(sbi, segno); -	}  	mutex_unlock(&dirty_i->seglist_lock);  } @@ -974,14 +971,12 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,  {  	struct sit_info *sit_i = SIT_I(sbi);  	struct curseg_info *curseg; -	unsigned int old_cursegno;  	curseg = CURSEG_I(sbi, type);  	mutex_lock(&curseg->curseg_mutex);  	*new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); -	old_cursegno = curseg->segno;  	/*  	 * __add_sum_entry should be resided under the curseg_mutex @@ -1002,7 +997,6 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,  	 * since SSR needs latest valid block information.  	 */  	refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr); -	locate_dirty_segment(sbi, old_cursegno);  	mutex_unlock(&sit_i->sentry_lock); @@ -1532,7 +1526,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi)  	struct page *page = NULL;  	struct f2fs_sit_block *raw_sit = NULL;  	unsigned int start = 0, end = 0; -	unsigned int segno = -1; +	unsigned int segno;  	bool flushed;  	mutex_lock(&curseg->curseg_mutex); @@ -1544,7 +1538,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi)  	 */  	flushed = flush_sits_in_journal(sbi); -	while ((segno = find_next_bit(bitmap, nsegs, segno + 1)) < nsegs) { +	for_each_set_bit(segno, bitmap, nsegs) {  		struct seg_entry *se = get_seg_entry(sbi, segno);  		int sit_offset, offset; @@ -1703,7 +1697,7 @@ static int build_curseg(struct f2fs_sb_info *sbi)  	struct curseg_info *array;  	int i; -	array = kzalloc(sizeof(*array) * NR_CURSEG_TYPE, GFP_KERNEL); +	array = kcalloc(NR_CURSEG_TYPE, sizeof(*array), GFP_KERNEL);  	if (!array)  		return -ENOMEM; diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 7091204680f4..55973f7b0330 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -347,8 +347,8 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi,  	if (test_and_clear_bit(segno, free_i->free_segmap)) {  		free_i->free_segments++; -		next = find_next_bit(free_i->free_segmap, TOTAL_SEGS(sbi), -								start_segno); +		next = find_next_bit(free_i->free_segmap, +				start_segno + sbi->segs_per_sec, start_segno);  		if (next >= start_segno + sbi->segs_per_sec) {  			if (test_and_clear_bit(secno, free_i->free_secmap))  				free_i->free_sections++; @@ -486,6 +486,10 @@ static inline bool need_inplace_update(struct inode *inode)  	if (S_ISDIR(inode->i_mode))  		return false; +	/* this is only set during fdatasync */ +	if (is_inode_flag_set(F2FS_I(inode), FI_NEED_IPU)) +		return true; +  	switch (SM_I(sbi)->ipu_policy) {  	case F2FS_IPU_FORCE:  		return true; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 8f96d9372ade..657582fc7601 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -52,6 +52,7 @@ enum {  	Opt_inline_xattr,  	Opt_inline_data,  	Opt_flush_merge, +	Opt_nobarrier,  	Opt_err,  }; @@ -69,6 +70,7 @@ static match_table_t f2fs_tokens = {  	{Opt_inline_xattr, "inline_xattr"},  	{Opt_inline_data, "inline_data"},  	{Opt_flush_merge, "flush_merge"}, +	{Opt_nobarrier, "nobarrier"},  	{Opt_err, NULL},  }; @@ -339,6 +341,9 @@ static int parse_options(struct super_block *sb, char *options)  		case Opt_flush_merge:  			set_opt(sbi, FLUSH_MERGE);  			break; +		case Opt_nobarrier: +			set_opt(sbi, NOBARRIER); +			break;  		default:  			f2fs_msg(sb, KERN_ERR,  				"Unrecognized mount option \"%s\" or missing value", @@ -544,6 +549,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)  		seq_puts(seq, ",inline_data");  	if (!f2fs_readonly(sbi->sb) && test_opt(sbi, FLUSH_MERGE))  		seq_puts(seq, ",flush_merge"); +	if (test_opt(sbi, NOBARRIER)) +		seq_puts(seq, ",nobarrier");  	seq_printf(seq, ",active_logs=%u", sbi->active_logs);  	return 0; @@ -615,7 +622,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)  	 * Previous and new state of filesystem is RO,  	 * so skip checking GC and FLUSH_MERGE conditions.  	 */ -	if ((sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) +	if (f2fs_readonly(sb) && (*flags & MS_RDONLY))  		goto skip;  	/* @@ -642,8 +649,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)  	 */  	if ((*flags & MS_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) {  		destroy_flush_cmd_control(sbi); -	} else if (test_opt(sbi, FLUSH_MERGE) && -					!sbi->sm_info->cmd_control_info) { +	} else if (test_opt(sbi, FLUSH_MERGE) && !SM_I(sbi)->cmd_control_info) {  		err = create_flush_cmd_control(sbi);  		if (err)  			goto restore_gc; @@ -947,7 +953,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)  	mutex_init(&sbi->gc_mutex);  	mutex_init(&sbi->writepages);  	mutex_init(&sbi->cp_mutex); -	mutex_init(&sbi->node_write); +	init_rwsem(&sbi->node_write);  	sbi->por_doing = false;  	spin_lock_init(&sbi->stat_lock); @@ -997,7 +1003,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)  	INIT_LIST_HEAD(&sbi->dir_inode_list);  	spin_lock_init(&sbi->dir_inode_lock); -	init_orphan_info(sbi); +	init_ino_entry_info(sbi);  	/* setup f2fs internal modules */  	err = build_segment_manager(sbi); @@ -1034,8 +1040,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)  		goto free_node_inode;  	}  	if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { +		iput(root);  		err = -EINVAL; -		goto free_root_inode; +		goto free_node_inode;  	}  	sb->s_root = d_make_root(root); /* allocate root dentry */ @@ -1082,7 +1089,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)  	 * If filesystem is not mounted as read-only then  	 * do start the gc_thread.  	 */ -	if (!(sb->s_flags & MS_RDONLY)) { +	if (!f2fs_readonly(sb)) {  		/* After POR, we can run background GC thread.*/  		err = start_gc_thread(sbi);  		if (err) diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index b983990b4a9f..d06d44363fea 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -587,6 +587,69 @@ TRACE_EVENT(f2fs_fallocate,  		__entry->ret)  ); +TRACE_EVENT(f2fs_direct_IO_enter, + +	TP_PROTO(struct inode *inode, loff_t offset, unsigned long len, int rw), + +	TP_ARGS(inode, offset, len, rw), + +	TP_STRUCT__entry( +		__field(dev_t,	dev) +		__field(ino_t,	ino) +		__field(loff_t,	pos) +		__field(unsigned long,	len) +		__field(int,	rw) +	), + +	TP_fast_assign( +		__entry->dev	= inode->i_sb->s_dev; +		__entry->ino	= inode->i_ino; +		__entry->pos	= offset; +		__entry->len	= len; +		__entry->rw	= rw; +	), + +	TP_printk("dev = (%d,%d), ino = %lu pos = %lld len = %lu rw = %d", +		show_dev_ino(__entry), +		__entry->pos, +		__entry->len, +		__entry->rw) +); + +TRACE_EVENT(f2fs_direct_IO_exit, + +	TP_PROTO(struct inode *inode, loff_t offset, unsigned long len, +		 int rw, int ret), + +	TP_ARGS(inode, offset, len, rw, ret), + +	TP_STRUCT__entry( +		__field(dev_t,	dev) +		__field(ino_t,	ino) +		__field(loff_t,	pos) +		__field(unsigned long,	len) +		__field(int,	rw) +		__field(int,	ret) +	), + +	TP_fast_assign( +		__entry->dev	= inode->i_sb->s_dev; +		__entry->ino	= inode->i_ino; +		__entry->pos	= offset; +		__entry->len	= len; +		__entry->rw	= rw; +		__entry->ret	= ret; +	), + +	TP_printk("dev = (%d,%d), ino = %lu pos = %lld len = %lu " +		"rw = %d ret = %d", +		show_dev_ino(__entry), +		__entry->pos, +		__entry->len, +		__entry->rw, +		__entry->ret) +); +  TRACE_EVENT(f2fs_reserve_new_block,  	TP_PROTO(struct inode *inode, nid_t nid, unsigned int ofs_in_node), @@ -926,6 +989,30 @@ TRACE_EVENT(f2fs_issue_discard,  		(unsigned long long)__entry->blkstart,  		(unsigned long long)__entry->blklen)  ); + +TRACE_EVENT(f2fs_issue_flush, + +	TP_PROTO(struct super_block *sb, bool nobarrier, bool flush_merge), + +	TP_ARGS(sb, nobarrier, flush_merge), + +	TP_STRUCT__entry( +		__field(dev_t,	dev) +		__field(bool, nobarrier) +		__field(bool, flush_merge) +	), + +	TP_fast_assign( +		__entry->dev	= sb->s_dev; +		__entry->nobarrier = nobarrier; +		__entry->flush_merge = flush_merge; +	), + +	TP_printk("dev = (%d,%d), %s %s", +		show_dev(__entry), +		__entry->nobarrier ? "skip (nobarrier)" : "issue", +		__entry->flush_merge ? " with flush_merge" : "") +);  #endif /* _TRACE_F2FS_H */   /* This part must be outside protection */ | 
