diff options
author | J. Bruce Fields <bfields@redhat.com> | 2012-10-09 18:35:22 -0400 |
---|---|---|
committer | J. Bruce Fields <bfields@redhat.com> | 2012-10-09 18:35:22 -0400 |
commit | f474af7051212b4efc8267583fad9c4ebf33ccff (patch) | |
tree | 1aa46ebc8065a341f247c2a2d9af2f624ad1d4f8 /fs/btrfs | |
parent | 0d22f68f02c10d5d10ec5712917e5828b001a822 (diff) | |
parent | e3dd9a52cb5552c46c2a4ca7ccdfb4dab5c72457 (diff) |
nfs: disintegrate UAPI for nfs
This is to complete part of the Userspace API (UAPI) disintegration for which
the preparatory patches were pulled recently. After these patches, userspace
headers will be segregated into:
include/uapi/linux/.../foo.h
for the userspace interface stuff, and:
include/linux/.../foo.h
for the strictly kernel internal stuff.
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Diffstat (limited to 'fs/btrfs')
-rw-r--r-- | fs/btrfs/acl.c | 8 | ||||
-rw-r--r-- | fs/btrfs/backref.c | 4 | ||||
-rw-r--r-- | fs/btrfs/compression.c | 1 | ||||
-rw-r--r-- | fs/btrfs/ctree.c | 9 | ||||
-rw-r--r-- | fs/btrfs/ctree.h | 5 | ||||
-rw-r--r-- | fs/btrfs/delayed-inode.c | 20 | ||||
-rw-r--r-- | fs/btrfs/delayed-ref.c | 163 | ||||
-rw-r--r-- | fs/btrfs/delayed-ref.h | 6 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 53 | ||||
-rw-r--r-- | fs/btrfs/disk-io.h | 2 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 123 | ||||
-rw-r--r-- | fs/btrfs/extent_io.c | 23 | ||||
-rw-r--r-- | fs/btrfs/file-item.c | 4 | ||||
-rw-r--r-- | fs/btrfs/file.c | 2 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 343 | ||||
-rw-r--r-- | fs/btrfs/ioctl.c | 40 | ||||
-rw-r--r-- | fs/btrfs/locking.c | 2 | ||||
-rw-r--r-- | fs/btrfs/qgroup.c | 4 | ||||
-rw-r--r-- | fs/btrfs/reada.c | 18 | ||||
-rw-r--r-- | fs/btrfs/root-tree.c | 4 | ||||
-rw-r--r-- | fs/btrfs/super.c | 15 | ||||
-rw-r--r-- | fs/btrfs/transaction.c | 3 | ||||
-rw-r--r-- | fs/btrfs/volumes.c | 33 | ||||
-rw-r--r-- | fs/btrfs/volumes.h | 2 |
24 files changed, 465 insertions, 422 deletions
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 761e2cd8fed1..0c16e3dbfd56 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -61,7 +61,7 @@ struct posix_acl *btrfs_get_acl(struct inode *inode, int type) size = __btrfs_getxattr(inode, name, value, size); } if (size > 0) { - acl = posix_acl_from_xattr(value, size); + acl = posix_acl_from_xattr(&init_user_ns, value, size); } else if (size == -ENOENT || size == -ENODATA || size == 0) { /* FIXME, who returns -ENOENT? I think nobody */ acl = NULL; @@ -91,7 +91,7 @@ static int btrfs_xattr_acl_get(struct dentry *dentry, const char *name, return PTR_ERR(acl); if (acl == NULL) return -ENODATA; - ret = posix_acl_to_xattr(acl, value, size); + ret = posix_acl_to_xattr(&init_user_ns, acl, value, size); posix_acl_release(acl); return ret; @@ -141,7 +141,7 @@ static int btrfs_set_acl(struct btrfs_trans_handle *trans, goto out; } - ret = posix_acl_to_xattr(acl, value, size); + ret = posix_acl_to_xattr(&init_user_ns, acl, value, size); if (ret < 0) goto out; } @@ -169,7 +169,7 @@ static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name, return -EOPNOTSUPP; if (value) { - acl = posix_acl_from_xattr(value, size); + acl = posix_acl_from_xattr(&init_user_ns, value, size); if (IS_ERR(acl)) return PTR_ERR(acl); diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index a256f3b2a845..ff6475f409d6 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -1438,10 +1438,10 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info, ret = extent_from_logical(fs_info, logical, path, &found_key); btrfs_release_path(path); - if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK) - ret = -EINVAL; if (ret < 0) return ret; + if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK) + return -EINVAL; extent_item_pos = logical - found_key.objectid; ret = iterate_extent_inodes(fs_info, found_key.objectid, diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 86eff48dab78..43d1c5a3a030 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -818,6 +818,7 @@ static void free_workspace(int type, struct list_head *workspace) btrfs_compress_op[idx]->free_workspace(workspace); atomic_dec(alloc_workspace); wake: + smp_mb(); if (waitqueue_active(workspace_wait)) wake_up(workspace_wait); } diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 9d7621f271ff..6d183f60d63a 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -421,12 +421,6 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, spin_unlock(&fs_info->tree_mod_seq_lock); /* - * we removed the lowest blocker from the blocker list, so there may be - * more processible delayed refs. - */ - wake_up(&fs_info->tree_mod_seq_wait); - - /* * anything that's lower than the lowest existing (read: blocked) * sequence number can be removed from the tree. */ @@ -631,6 +625,9 @@ __tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb) u32 nritems; int ret; + if (btrfs_header_level(eb) == 0) + return; + nritems = btrfs_header_nritems(eb); for (i = nritems - 1; i >= 0; i--) { ret = tree_mod_log_insert_key_locked(fs_info, eb, i, diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 4bab807227ad..9821b672f5a2 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -116,7 +116,7 @@ struct btrfs_ordered_sum; #define BTRFS_FREE_SPACE_OBJECTID -11ULL /* - * The inode number assigned to the special inode for sotring + * The inode number assigned to the special inode for storing * free ino cache */ #define BTRFS_FREE_INO_OBJECTID -12ULL @@ -1252,7 +1252,6 @@ struct btrfs_fs_info { atomic_t tree_mod_seq; struct list_head tree_mod_seq_list; struct seq_list tree_mod_seq_elem; - wait_queue_head_t tree_mod_seq_wait; /* this protects tree_mod_log */ rwlock_t tree_mod_log_lock; @@ -3192,7 +3191,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans, int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, struct bio *bio, u32 *dst); int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode, - struct bio *bio, u64 logical_offset, u32 *dst); + struct bio *bio, u64 logical_offset); int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, u64 pos, diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 335605c8ceab..52c85e2b95d0 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -512,8 +512,8 @@ static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item) rb_erase(&delayed_item->rb_node, root); delayed_item->delayed_node->count--; - atomic_dec(&delayed_root->items); - if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND && + if (atomic_dec_return(&delayed_root->items) < + BTRFS_DELAYED_BACKGROUND && waitqueue_active(&delayed_root->wait)) wake_up(&delayed_root->wait); } @@ -1028,9 +1028,10 @@ do_again: btrfs_release_delayed_item(prev); ret = 0; btrfs_release_path(path); - if (curr) + if (curr) { + mutex_unlock(&node->mutex); goto do_again; - else + } else goto delete_fail; } @@ -1055,8 +1056,7 @@ static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node) delayed_node->count--; delayed_root = delayed_node->root->fs_info->delayed_root; - atomic_dec(&delayed_root->items); - if (atomic_read(&delayed_root->items) < + if (atomic_dec_return(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND && waitqueue_active(&delayed_root->wait)) wake_up(&delayed_root->wait); @@ -1715,8 +1715,8 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans, struct btrfs_inode_item *inode_item, struct inode *inode) { - btrfs_set_stack_inode_uid(inode_item, inode->i_uid); - btrfs_set_stack_inode_gid(inode_item, inode->i_gid); + btrfs_set_stack_inode_uid(inode_item, i_uid_read(inode)); + btrfs_set_stack_inode_gid(inode_item, i_gid_read(inode)); btrfs_set_stack_inode_size(inode_item, BTRFS_I(inode)->disk_i_size); btrfs_set_stack_inode_mode(inode_item, inode->i_mode); btrfs_set_stack_inode_nlink(inode_item, inode->i_nlink); @@ -1764,8 +1764,8 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev) inode_item = &delayed_node->inode_item; - inode->i_uid = btrfs_stack_inode_uid(inode_item); - inode->i_gid = btrfs_stack_inode_gid(inode_item); + i_uid_write(inode, btrfs_stack_inode_uid(inode_item)); + i_gid_write(inode, btrfs_stack_inode_gid(inode_item)); btrfs_i_size_write(inode, btrfs_stack_inode_size(inode_item)); inode->i_mode = btrfs_stack_inode_mode(inode_item); set_nlink(inode, btrfs_stack_inode_nlink(inode_item)); diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index da7419ed01bb..ae9411773397 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -38,17 +38,14 @@ static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref2, struct btrfs_delayed_tree_ref *ref1) { - if (ref1->node.type == BTRFS_TREE_BLOCK_REF_KEY) { - if (ref1->root < ref2->root) - return -1; - if (ref1->root > ref2->root) - return 1; - } else { - if (ref1->parent < ref2->parent) - return -1; - if (ref1->parent > ref2->parent) - return 1; - } + if (ref1->root < ref2->root) + return -1; + if (ref1->root > ref2->root) + return 1; + if (ref1->parent < ref2->parent) + return -1; + if (ref1->parent > ref2->parent) + return 1; return 0; } @@ -85,7 +82,8 @@ static int comp_data_refs(struct btrfs_delayed_data_ref *ref2, * type of the delayed backrefs and content of delayed backrefs. */ static int comp_entry(struct btrfs_delayed_ref_node *ref2, - struct btrfs_delayed_ref_node *ref1) + struct btrfs_delayed_ref_node *ref1, + bool compare_seq) { if (ref1->bytenr < ref2->bytenr) return -1; @@ -102,10 +100,12 @@ static int comp_entry(struct btrfs_delayed_ref_node *ref2, if (ref1->type > ref2->type) return 1; /* merging of sequenced refs is not allowed */ - if (ref1->seq < ref2->seq) - return -1; - if (ref1->seq > ref2->seq) - return 1; + if (compare_seq) { + if (ref1->seq < ref2->seq) + return -1; + if (ref1->seq > ref2->seq) + return 1; + } if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY || ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) { return comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref2), @@ -139,7 +139,7 @@ static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root, entry = rb_entry(parent_node, struct btrfs_delayed_ref_node, rb_node); - cmp = comp_entry(entry, ins); + cmp = comp_entry(entry, ins, 1); if (cmp < 0) p = &(*p)->rb_left; else if (cmp > 0) @@ -233,6 +233,114 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans, return 0; } +static void inline drop_delayed_ref(struct btrfs_trans_handle *trans, + struct btrfs_delayed_ref_root *delayed_refs, + struct btrfs_delayed_ref_node *ref) +{ + rb_erase(&ref->rb_node, &delayed_refs->root); + ref->in_tree = 0; + btrfs_put_delayed_ref(ref); + delayed_refs->num_entries--; + if (trans->delayed_ref_updates) + trans->delayed_ref_updates--; +} + +static int merge_ref(struct btrfs_trans_handle *trans, + struct btrfs_delayed_ref_root *delayed_refs, + struct btrfs_delayed_ref_node *ref, u64 seq) +{ + struct rb_node *node; + int merged = 0; + int mod = 0; + int done = 0; + + node = rb_prev(&ref->rb_node); + while (node) { + struct btrfs_delayed_ref_node *next; + + next = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); + node = rb_prev(node); + if (next->bytenr != ref->bytenr) + break; + if (seq && next->seq >= seq) + break; + if (comp_entry(ref, next, 0)) + continue; + + if (ref->action == next->action) { + mod = next->ref_mod; + } else { + if (ref->ref_mod < next->ref_mod) { + struct btrfs_delayed_ref_node *tmp; + + tmp = ref; + ref = next; + next = tmp; + done = 1; + } + mod = -next->ref_mod; + } + + merged++; + drop_delayed_ref(trans, delayed_refs, next); + ref->ref_mod += mod; + if (ref->ref_mod == 0) { + drop_delayed_ref(trans, delayed_refs, ref); + break; + } else { + /* + * You can't have multiples of the same ref on a tree + * block. + */ + WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY || + ref->type == BTRFS_SHARED_BLOCK_REF_KEY); + } + + if (done) + break; + node = rb_prev(&ref->rb_node); + } + + return merged; +} + +void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + struct btrfs_delayed_ref_root *delayed_refs, + struct btrfs_delayed_ref_head *head) +{ + struct rb_node *node; + u64 seq = 0; + + spin_lock(&fs_info->tree_mod_seq_lock); + if (!list_empty(&fs_info->tree_mod_seq_list)) { + struct seq_list *elem; + + elem = list_first_entry(&fs_info->tree_mod_seq_list, + struct seq_list, list); + seq = elem->seq; + } + spin_unlock(&fs_info->tree_mod_seq_lock); + + node = rb_prev(&head->node.rb_node); + while (node) { + struct btrfs_delayed_ref_node *ref; + + ref = rb_entry(node, struct btrfs_delayed_ref_node, + rb_node); + if (ref->bytenr != head->node.bytenr) + break; + + /* We can't merge refs that are outside of our seq count */ + if (seq && ref->seq >= seq) + break; + if (merge_ref(trans, delayed_refs, ref, seq)) + node = rb_prev(&head->node.rb_node); + else + node = rb_prev(node); + } +} + int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, struct btrfs_delayed_ref_root *delayed_refs, u64 seq) @@ -336,18 +444,11 @@ update_existing_ref(struct btrfs_trans_handle *trans, * every changing the extent allocation tree. */ existing->ref_mod--; - if (existing->ref_mod == 0) { - rb_erase(&existing->rb_node, - &delayed_refs->root); - existing->in_tree = 0; - btrfs_put_delayed_ref(existing); - delayed_refs->num_entries--; - if (trans->delayed_ref_updates) - trans->delayed_ref_updates--; - } else { + if (existing->ref_mod == 0) + drop_delayed_ref(trans, delayed_refs, existing); + else WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY || existing->type == BTRFS_SHARED_BLOCK_REF_KEY); - } } else { WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY || existing->type == BTRFS_SHARED_BLOCK_REF_KEY); @@ -662,9 +763,6 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr, num_bytes, parent, ref_root, level, action, for_cow); - if (!need_ref_seq(for_cow, ref_root) && - waitqueue_active(&fs_info->tree_mod_seq_wait)) - wake_up(&fs_info->tree_mod_seq_wait); spin_unlock(&delayed_refs->lock); if (need_ref_seq(for_cow, ref_root)) btrfs_qgroup_record_ref(trans, &ref->node, extent_op); @@ -713,9 +811,6 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, add_delayed_data_ref(fs_info, trans, &ref->node, bytenr, num_bytes, parent, ref_root, owner, offset, action, for_cow); - if (!need_ref_seq(for_cow, ref_root) && - waitqueue_active(&fs_info->tree_mod_seq_wait)) - wake_up(&fs_info->tree_mod_seq_wait); spin_unlock(&delayed_refs->lock); if (need_ref_seq(for_cow, ref_root)) btrfs_qgroup_record_ref(trans, &ref->node, extent_op); @@ -744,8 +839,6 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, num_bytes, BTRFS_UPDATE_DELAYED_HEAD, extent_op->is_data); - if (waitqueue_active(&fs_info->tree_mod_seq_wait)) - wake_up(&fs_info->tree_mod_seq_wait); spin_unlock(&delayed_refs->lock); return 0; } diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index 0d7c90c366b6..c9d703693df0 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -18,7 +18,7 @@ #ifndef __DELAYED_REF__ #define __DELAYED_REF__ -/* these are the possible values of struct btrfs_delayed_ref->action */ +/* these are the possible values of struct btrfs_delayed_ref_node->action */ #define BTRFS_ADD_DELAYED_REF 1 /* add one backref to the tree */ #define BTRFS_DROP_DELAYED_REF 2 /* delete one backref from the tree */ #define BTRFS_ADD_DELAYED_EXTENT 3 /* record a full extent allocation */ @@ -167,6 +167,10 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, struct btrfs_trans_handle *trans, u64 bytenr, u64 num_bytes, struct btrfs_delayed_extent_op *extent_op); +void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + struct btrfs_delayed_ref_root *delayed_refs, + struct btrfs_delayed_ref_head *head); struct btrfs_delayed_ref_head * btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 62e0cafd6e25..22e98e04c2ea 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -377,9 +377,13 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, ret = read_extent_buffer_pages(io_tree, eb, start, WAIT_COMPLETE, btree_get_extent, mirror_num); - if (!ret && !verify_parent_transid(io_tree, eb, + if (!ret) { + if (!verify_parent_transid(io_tree, eb, parent_transid, 0)) - break; + break; + else + ret = -EIO; + } /* * This buffer's crc is fine, but its contents are corrupted, so @@ -754,9 +758,7 @@ static void run_one_async_done(struct btrfs_work *work) limit = btrfs_async_submit_limit(fs_info); limit = limit * 2 / 3; - atomic_dec(&fs_info->nr_async_submits); - - if (atomic_read(&fs_info->nr_async_submits) < limit && + if (atomic_dec_return(&fs_info->nr_async_submits) < limit && waitqueue_active(&fs_info->async_submit_wait)) wake_up(&fs_info->async_submit_wait); @@ -2032,8 +2034,6 @@ int open_ctree(struct super_block *sb, fs_info->free_chunk_space = 0; fs_info->tree_mod_log = RB_ROOT; - init_waitqueue_head(&fs_info->tree_mod_seq_wait); - /* readahead state */ INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT); spin_lock_init(&fs_info->reada_lock); @@ -2528,8 +2528,7 @@ retry_root_backup: goto fail_trans_kthread; /* do not make disk changes in broken FS */ - if (btrfs_super_log_root(disk_super) != 0 && - !(fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)) { + if (btrfs_super_log_root(disk_super) != 0) { u64 bytenr = btrfs_super_log_root(disk_super); if (fs_devices->rw_devices == 0) { @@ -3189,30 +3188,14 @@ int close_ctree(struct btrfs_root *root) /* clear out the rbtree of defraggable inodes */ btrfs_run_defrag_inodes(fs_info); - /* - * Here come 2 situations when btrfs is broken to flip readonly: - * - * 1. when btrfs flips readonly somewhere else before - * btrfs_commit_super, sb->s_flags has MS_RDONLY flag, - * and btrfs will skip to write sb directly to keep - * ERROR state on disk. - * - * 2. when btrfs flips readonly just in btrfs_commit_super, - * and in such case, btrfs cannot write sb via btrfs_commit_super, - * and since fs_state has been set BTRFS_SUPER_FLAG_ERROR flag, - * btrfs will cleanup all FS resources first and write sb then. - */ if (!(fs_info->sb->s_flags & MS_RDONLY)) { ret = btrfs_commit_super(root); if (ret) printk(KERN_ERR "btrfs: commit super ret %d\n", ret); } - if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { - ret = btrfs_error_commit_super(root); - if (ret) - printk(KERN_ERR "btrfs: commit super ret %d\n", ret); - } + if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) + btrfs_error_commit_super(root); btrfs_put_block_group_cache(fs_info); @@ -3434,18 +3417,11 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, if (read_only) return 0; - if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { - printk(KERN_WARNING "warning: mount fs with errors, " - "running btrfsck is recommended\n"); - } - return 0; } -int btrfs_error_commit_super(struct btrfs_root *root) +void btrfs_error_commit_super(struct btrfs_root *root) { - int ret; - mutex_lock(&root->fs_info->cleaner_mutex); btrfs_run_delayed_iputs(root); mutex_unlock(&root->fs_info->cleaner_mutex); @@ -3455,10 +3431,6 @@ int btrfs_error_commit_super(struct btrfs_root *root) /* cleanup FS via transaction */ btrfs_cleanup_transaction(root); - - ret = write_ctree_super(NULL, root, 0); - - return ret; } static void btrfs_destroy_ordered_operations(struct btrfs_root *root) @@ -3782,14 +3754,17 @@ int btrfs_cleanup_transaction(struct btrfs_root *root) /* FIXME: cleanup wait for commit */ t->in_commit = 1; t->blocked = 1; + smp_mb(); if (waitqueue_active(&root->fs_info->transaction_blocked_wait)) wake_up(&root->fs_info->transaction_blocked_wait); t->blocked = 0; + smp_mb(); if (waitqueue_active(&root->fs_info->transaction_wait)) wake_up(&root->fs_info->transaction_wait); t->commit_done = 1; + smp_mb(); if (waitqueue_active(&t->commit_wait)) wake_up(&t->commit_wait); diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 95e147eea239..c5b00a735fef 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -54,7 +54,7 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root, int max_mirrors); struct buffer_head *btrfs_read_dev_super(struct block_device *bdev); int btrfs_commit_super(struct btrfs_root *root); -int btrfs_error_commit_super(struct btrfs_root *root); +void btrfs_error_commit_super(struct btrfs_root *root); struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize); struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 4e1b153b7c47..ba58024d40d3 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2252,6 +2252,16 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, } /* + * We need to try and merge add/drops of the same ref since we + * can run into issues with relocate dropping the implicit ref + * and then it being added back again before the drop can + * finish. If we merged anything we need to re-loop so we can + * get a good ref. + */ + btrfs_merge_delayed_refs(trans, fs_info, delayed_refs, + locked_ref); + + /* * locked_ref is the head node, so we have to go one * node back for any delayed ref updates */ @@ -2318,12 +2328,23 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, ref->in_tree = 0; rb_erase(&ref->rb_node, &delayed_refs->root); delayed_refs->num_entries--; - /* - * we modified num_entries, but as we're currently running - * delayed refs, skip - * wake_up(&delayed_refs->seq_wait); - * here. - */ + if (locked_ref) { + /* + * when we play the delayed ref, also correct the + * ref_mod on head + */ + switch (ref->action) { + case BTRFS_ADD_DELAYED_REF: + case BTRFS_ADD_DELAYED_EXTENT: + locked_ref->node.ref_mod -= ref->ref_mod; + break; + case BTRFS_DROP_DELAYED_REF: + locked_ref->node.ref_mod += ref->ref_mod; + break; + default: + WARN_ON(1); + } + } spin_unlock(&delayed_refs->lock); ret = run_one_delayed_ref(trans, root, ref, extent_op, @@ -2350,22 +2371,6 @@ next: return count; } -static void wait_for_more_refs(struct btrfs_fs_info *fs_info, - struct btrfs_delayed_ref_root *delayed_refs, - unsigned long num_refs, - struct list_head *first_seq) -{ - spin_unlock(&delayed_refs->lock); - pr_debug("waiting for more refs (num %ld, first %p)\n", - num_refs, first_seq); - wait_event(fs_info->tree_mod_seq_wait, - num_refs != delayed_refs->num_entries || - fs_info->tree_mod_seq_list.next != first_seq); - pr_debug("done waiting for more refs (num %ld, first %p)\n", - delayed_refs->num_entries, fs_info->tree_mod_seq_list.next); - spin_lock(&delayed_refs->lock); -} - #ifdef SCRAMBLE_DELAYED_REFS /* * Normally delayed refs get processed in ascending bytenr order. This @@ -2460,13 +2465,11 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, struct btrfs_delayed_ref_root *delayed_refs; struct btrfs_delayed_ref_node *ref; struct list_head cluster; - struct list_head *first_seq = NULL; int ret; u64 delayed_start; int run_all = count == (unsigned long)-1; int run_most = 0; - unsigned long num_refs = 0; - int consider_waiting; + int loops; /* We'll clean this up in btrfs_cleanup_transaction */ if (trans->aborted) @@ -2484,7 +2487,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, delayed_refs = &trans->transaction->delayed_refs; INIT_LIST_HEAD(&cluster); again: - consider_waiting = 0; + loops = 0; spin_lock(&delayed_refs->lock); #ifdef SCRAMBLE_DELAYED_REFS @@ -2512,31 +2515,6 @@ again: if (ret) break; - if (delayed_start >= delayed_refs->run_delayed_start) { - if (consider_waiting == 0) { - /* - * btrfs_find_ref_cluster looped. let's do one - * more cycle. if we don't run any delayed ref - * during that cycle (because we can't because - * all of them are blocked) and if the number of - * refs doesn't change, we avoid busy waiting. - */ - consider_waiting = 1; - num_refs = delayed_refs->num_entries; - first_seq = root->fs_info->tree_mod_seq_list.next; - } else { - wait_for_more_refs(root->fs_info, delayed_refs, - num_refs, first_seq); - /* - * after waiting, things have changed. we - * dropped the lock and someone else might have - * run some refs, built new clusters and so on. - * therefore, we restart staleness detection. - */ - consider_waiting = 0; - } - } - ret = run_clustered_refs(trans, root, &cluster); if (ret < 0) { spin_unlock(&delayed_refs->lock); @@ -2549,9 +2527,26 @@ again: if (count == 0) break; - if (ret || delayed_refs->run_delayed_start == 0) { + if (delayed_start >= delayed_refs->run_delayed_start) { + if (loops == 0) { + /* + * btrfs_find_ref_cluster looped. let's do one + * more cycle. if we don't run any delayed ref + * during that cycle (because we can't because + * all of them are blocked), bail out. + */ + loops = 1; + } else { + /* + * no runnable refs left, stop trying + */ + BUG_ON(run_all); + break; + } + } + if (ret) { /* refs were run, let's reset staleness detection */ - consider_waiting = 0; + loops = 0; } } @@ -3007,17 +3002,16 @@ again: } spin_unlock(&block_group->lock); - num_pages = (int)div64_u64(block_group->key.offset, 1024 * 1024 * 1024); + /* + * Try to preallocate enough space based on how big the block group is. + * Keep in mind this has to include any pinned space which could end up + * taking up quite a bit since it's not folded into the other space + * cache. + */ + num_pages = (int)div64_u64(block_group->key.offset, 256 * 1024 * 1024); if (!num_pages) num_pages = 1; - /* - * Just to make absolutely sure we have enough space, we're going to - * preallocate 12 pages worth of space for each block group. In - * practice we ought to use at most 8, but we need extra space so we can - * add our header and have a terminator between the extents and the - * bitmaps. - */ num_pages *= 16; num_pages *= PAGE_CACHE_SIZE; @@ -4571,8 +4565,10 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) if (root->fs_info->quota_enabled) { ret = btrfs_qgroup_reserve(root, num_bytes + nr_extents * root->leafsize); - if (ret) + if (ret) { + mutex_unlock(&BTRFS_I(inode)->delalloc_mutex); return ret; + } } ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush); @@ -5294,9 +5290,6 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, rb_erase(&head->node.rb_node, &delayed_refs->root); delayed_refs->num_entries--; - smp_mb(); - if (waitqueue_active(&root->fs_info->tree_mod_seq_wait)) - wake_up(&root->fs_info->tree_mod_seq_wait); /* * we don't take a ref on the node because we're removing it from the diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 45c81bb4ac82..b08ea4717e9d 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -107,6 +107,12 @@ void extent_io_exit(void) list_del(&eb->leak_list); kmem_cache_free(extent_buffer_cache, eb); } + + /* + * Make sure all delayed rcu free are flushed before we + * destroy caches. + */ + rcu_barrier(); if (extent_state_cache) kmem_cache_destroy(extent_state_cache); if (extent_buffer_cache) @@ -2330,23 +2336,10 @@ static void end_bio_extent_readpage(struct bio *bio, int err) if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { ret = tree->ops->readpage_end_io_hook(page, start, end, state, mirror); - if (ret) { - /* no IO indicated but software detected errors - * in the block, either checksum errors or - * issues with the contents */ - struct btrfs_root *root = - BTRFS_I(page->mapping->host)->root; - struct btrfs_device *device; - + if (ret) uptodate = 0; - device = btrfs_find_device_for_logical( - root, start, mirror); - if (device) - btrfs_dev_stat_inc_and_print(device, - BTRFS_DEV_STAT_CORRUPTION_ERRS); - } else { + else clean_io_failure(start, page); - } } if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) { diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index b45b9de0c21d..857d93cd01dc 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -272,9 +272,9 @@ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, } int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode, - struct bio *bio, u64 offset, u32 *dst) + struct bio *bio, u64 offset) { - return __btrfs_lookup_bio_sums(root, inode, bio, offset, dst, 1); + return __btrfs_lookup_bio_sums(root, inode, bio, offset, NULL, 1); } int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 5caf285c6e4d..f6b40e86121b 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1599,6 +1599,7 @@ out: static const struct vm_operations_struct btrfs_file_vm_ops = { .fault = filemap_fault, .page_mkwrite = btrfs_page_mkwrite, + .remap_pages = generic_file_remap_pages, }; static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma) @@ -1610,7 +1611,6 @@ static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma) file_accessed(filp); vma->vm_ops = &btrfs_file_vm_ops; - vma->vm_flags |= VM_CAN_NONLINEAR; return 0; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 6e8f416773d4..a6ed6944e50c 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1008,9 +1008,7 @@ static noinline void async_cow_submit(struct btrfs_work *work) nr_pages = (async_cow->end - async_cow->start + PAGE_CACHE_SIZE) >> PAGE_CACHE_SHIFT; - atomic_sub(nr_pages, &root->fs_info->async_delalloc_pages); - - if (atomic_read(&root->fs_info->async_delalloc_pages) < + if (atomic_sub_return(nr_pages, &root->fs_info->async_delalloc_pages) < 5 * 1024 * 1024 && waitqueue_active(&root->fs_info->async_submit_wait)) wake_up(&root->fs_info->async_submit_wait); @@ -1885,8 +1883,11 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) trans = btrfs_join_transaction_nolock(root); else trans = btrfs_join_transaction(root); - if (IS_ERR(trans)) - return PTR_ERR(trans); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + trans = NULL; + goto out; + } trans->block_rsv = &root->fs_info->delalloc_block_rsv; ret = btrfs_update_inode_fallback(trans, root, inode); if (ret) /* -ENOMEM or corruption */ @@ -1970,8 +1971,8 @@ out: ordered_extent->len - 1, NULL, GFP_NOFS); /* - * This needs to be dont to make sure anybody waiting knows we are done - * upating everything for this ordered extent. + * This needs to be done to make sure anybody waiting knows we are done + * updating everything for this ordered extent. */ btrfs_remove_ordered_extent(inode, ordered_extent); @@ -2571,8 +2572,8 @@ static void btrfs_read_locked_inode(struct inode *inode) struct btrfs_inode_item); inode->i_mode = btrfs_inode_mode(leaf, inode_item); set_nlink(inode, btrfs_inode_nlink(leaf, inode_item)); - inode->i_uid = btrfs_inode_uid(leaf, inode_item); - inode->i_gid = btrfs_inode_gid(leaf, inode_item); + i_uid_write(inode, btrfs_inode_uid(leaf, inode_item)); + i_gid_write(inode, btrfs_inode_gid(leaf, inode_item)); btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item)); tspec = btrfs_inode_atime(inode_item); @@ -2650,8 +2651,8 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, struct btrfs_inode_item *item, struct inode *inode) { - btrfs_set_inode_uid(leaf, item, inode->i_uid); - btrfs_set_inode_gid(leaf, item, inode->i_gid); + btrfs_set_inode_uid(leaf, item, i_uid_read(inode)); + btrfs_set_inode_gid(leaf, item, i_gid_read(inode)); btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size); btrfs_set_inode_mode(leaf, item, inode->i_mode); btrfs_set_inode_nlink(leaf, item, inode->i_nlink); @@ -3174,7 +3175,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, btrfs_i_size_write(dir, dir->i_size - name_len * 2); inode_inc_iversion(dir); dir->i_mtime = dir->i_ctime = CURRENT_TIME; - ret = btrfs_update_inode(trans, root, dir); + ret = btrfs_update_inode_fallback(trans, root, dir); if (ret) btrfs_abort_transaction(trans, root, ret); out: @@ -5774,18 +5775,112 @@ out: return ret; } +static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend, + struct extent_state **cached_state, int writing) +{ + struct btrfs_ordered_extent *ordered; + int ret = 0; + + while (1) { + lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, + 0, cached_state); + /* + * We're concerned with the entire range that we're going to be + * doing DIO to, so we need to make sure theres no ordered + * extents in this range. + */ + ordered = btrfs_lookup_ordered_range(inode, lockstart, + lockend - lockstart + 1); + + /* + * We need to make sure there are no buffered pages in this + * range either, we could have raced between the invalidate in + * generic_file_direct_write and locking the extent. The + * invalidate needs to happen so that reads after a write do not + * get stale data. + */ + if (!ordered && (!writing || + !test_range_bit(&BTRFS_I(inode)->io_tree, + lockstart, lockend, EXTENT_UPTODATE, 0, + *cached_state))) + break; + + unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, + cached_state, GFP_NOFS); + + if (ordered) { + btrfs_start_ordered_extent(inode, ordered, 1); + btrfs_put_ordered_extent(ordered); + } else { + /* Screw you mmap */ + ret = filemap_write_and_wait_range(inode->i_mapping, + lockstart, + lockend); + if (ret) + break; + + /* + * If we found a page that couldn't be invalidated just + * fall back to buffered. + */ + ret = invalidate_inode_pages2_range(inode->i_mapping, + lockstart >> PAGE_CACHE_SHIFT, + lockend >> PAGE_CACHE_SHIFT); + if (ret) + break; + } + + cond_resched(); + } + + return ret; +} + static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { struct extent_map *em; struct btrfs_root *root = BTRFS_I(inode)->root; + struct extent_state *cached_state = NULL; u64 start = iblock << inode->i_blkbits; + u64 lockstart, lockend; u64 len = bh_result->b_size; struct btrfs_trans_handle *trans; + int unlock_bits = EXTENT_LOCKED; + int ret; + + if (create) { + ret = btrfs_delalloc_reserve_space(inode, len); + if (ret) + return ret; + unlock_bits |= EXTENT_DELALLOC | EXTENT_DIRTY; + } else { + len = min_t(u64, len, root->sectorsize); + } + + lockstart = start; + lockend = start + len - 1; + + /* + * If this errors out it's because we couldn't invalidate pagecache for + * this range and we need to fallback to buffered. + */ + if (lock_extent_direct(inode, lockstart, lockend, &cached_state, create)) + return -ENOTBLK; + + if (create) { + ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, + lockend, EXTENT_DELALLOC, NULL, + &cached_state, GFP_NOFS); + if (ret) + goto unlock_err; + } em = btrfs_get_extent(inode, NULL, 0, start, len, 0); - if (IS_ERR(em)) - return PTR_ERR(em); + if (IS_ERR(em)) { + ret = PTR_ERR(em); + goto unlock_err; + } /* * Ok for INLINE and COMPRESSED extents we need to fallback on buffered @@ -5804,17 +5899,16 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) || em->block_start == EXTENT_MAP_INLINE) { free_extent_map(em); - return -ENOTBLK; + ret = -ENOTBLK; + goto unlock_err; } /* Just a good old fashioned hole, return */ if (!create && (em->block_start == EXTENT_MAP_HOLE || test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { free_extent_map(em); - /* DIO will do one hole at a time, so just unlock a sector */ - unlock_extent(&BTRFS_I(inode)->io_tree, start, - start + root->sectorsize - 1); - return 0; + ret = 0; + goto unlock_err; } /* @@ -5827,8 +5921,9 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, * */ if (!create) { - len = em->len - (start - em->start); - goto map; + len = min(len, em->len - (start - em->start)); + lockstart = start + len; + goto unlock; } if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) || @@ -5860,7 +5955,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, btrfs_end_transaction(trans, root); if (ret) { free_extent_map(em); - return ret; + goto unlock_err; } goto unlock; } @@ -5873,14 +5968,12 @@ must_cow: */ len = bh_result->b_size; em = btrfs_new_extent_direct(inode, em, start, len); - if (IS_ERR(em)) - return PTR_ERR(em); + if (IS_ERR(em)) { + ret = PTR_ERR(em); + goto unlock_err; + } len = min(len, em->len - (start - em->start)); unlock: - clear_extent_bit(&BTRFS_I(inode)->io_tree, start, start + len - 1, - EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DIRTY, 1, - 0, NULL, GFP_NOFS); -map: bh_result->b_blocknr = (em->block_start + (start - em->start)) >> inode->i_blkbits; bh_result->b_size = len; @@ -5898,9 +5991,44 @@ map: i_size_write(inode, start + len); } + /* + * In the case of write we need to clear and unlock the entire range, + * in the case of read we need to unlock only the end area that we + * aren't using if there is any left over space. + */ + if (lockstart < lockend) { + if (create && len < lockend - lockstart) { + clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, + lockstart + len - 1, unlock_bits, 1, 0, + &cached_state, GFP_NOFS); + /* + * Beside unlock, we also need to cleanup reserved space + * for the left range by attaching EXTENT_DO_ACCOUNTING. + */ + clear_extent_bit(&BTRFS_I(inode)->io_tree, + lockstart + len, lockend, + unlock_bits | EXTENT_DO_ACCOUNTING, + 1, 0, NULL, GFP_NOFS); + } else { + clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, + lockend, unlock_bits, 1, 0, + &cached_state, GFP_NOFS); + } + } else { + free_extent_state(cached_state); + } + free_extent_map(em); return 0; + +unlock_err: + if (create) + unlock_bits |= EXTENT_DO_ACCOUNTING; + + clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, + unlock_bits, 1, 0, &cached_state, GFP_NOFS); + return ret; } struct btrfs_dio_private { @@ -5908,7 +6036,6 @@ struct btrfs_dio_private { u64 logical_offset; u64 disk_bytenr; u64 bytes; - u32 *csums; void *private; /* number of bios pending for this dio */ @@ -5928,7 +6055,6 @@ static void btrfs_endio_direct_read(struct bio *bio, int err) struct inode *inode = dip->inode; struct btrfs_root *root = BTRFS_I(inode)->root; u64 start; - u32 *private = dip->csums; start = dip->logical_offset; do { @@ -5936,8 +6062,12 @@ static void btrfs_endio_direct_read(struct bio *bio, int err) struct page *page = bvec->bv_page; char *kaddr; u32 csum = ~(u32)0; + u64 private = ~(u32)0; unsigned long flags; + if (get_state_private(&BTRFS_I(inode)->io_tree, + start, &private)) + goto failed; local_irq_save(flags); kaddr = kmap_atomic(page); csum = btrfs_csum_data(root, kaddr + bvec->bv_offset, @@ -5947,18 +6077,18 @@ static void btrfs_endio_direct_read(struct bio *bio, int err) local_irq_restore(flags); flush_dcache_page(bvec->bv_page); - if (csum != *private) { + if (csum != private) { +failed: printk(KERN_ERR "btrfs csum failed ino %llu off" " %llu csum %u private %u\n", (unsigned long long)btrfs_ino(inode), (unsigned long long)start, - csum, *private); + csum, (unsigned)private); err = -EIO; } } start += bvec->bv_len; - private++; bvec++; } while (bvec <= bvec_end); @@ -5966,7 +6096,6 @@ static void btrfs_endio_direct_read(struct bio *bio, int err) dip->logical_offset + dip->bytes - 1); bio->bi_private = dip->private; - kfree(dip->csums); kfree(dip); /* If we had a csum failure make sure to clear the uptodate flag */ @@ -6072,7 +6201,7 @@ static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev, static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, int rw, u64 file_offset, int skip_sum, - u32 *csums, int async_submit) + int async_submit) { int write = rw & REQ_WRITE; struct btrfs_root *root = BTRFS_I(inode)->root; @@ -6105,8 +6234,7 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, if (ret) goto err; } else if (!skip_sum) { - ret = btrfs_lookup_bio_sums_dio(root, inode, bio, - file_offset, csums); + ret = btrfs_lookup_bio_sums_dio(root, inode, bio, file_offset); if (ret) goto err; } @@ -6132,10 +6260,8 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, u64 submit_len = 0; u64 map_length; int nr_pages = 0; - u32 *csums = dip->csums; int ret = 0; int async_submit = 0; - int write = rw & REQ_WRITE; map_length = orig_bio->bi_size; ret = btrfs_map_block(map_tree, READ, start_sector << 9, @@ -6171,16 +6297,13 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, atomic_inc(&dip->pending_bios); ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum, - csums, async_submit); + async_submit); if (ret) { bio_put(bio); atomic_dec(&dip->pending_bios); goto out_err; } - /* Write's use the ordered csums */ - if (!write && !skip_sum) - csums = csums + nr_pages; start_sector += submit_len >> 9; file_offset += submit_len; @@ -6210,7 +6333,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, submit: ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum, - csums, async_submit); + async_submit); if (!ret) return 0; @@ -6246,17 +6369,6 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode, ret = -ENOMEM; goto free_ordered; } - dip->csums = NULL; - - /* Write's use the ordered csum stuff, so we don't need dip->csums */ - if (!write && !skip_sum) { - dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS); - if (!dip->csums) { - kfree(dip); - ret = -ENOMEM; - goto free_ordered; - } - } dip->private = bio->bi_private; dip->inode = inode; @@ -6341,132 +6453,22 @@ static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *io out: return retval; } + static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; - struct btrfs_ordered_extent *ordered; - struct extent_state *cached_state = NULL; - u64 lockstart, lockend; - ssize_t ret; - int writing = rw & WRITE; - int write_bits = 0; - size_t count = iov_length(iov, nr_segs); if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov, - offset, nr_segs)) { + offset, nr_segs)) return 0; - } - - lockstart = offset; - lockend = offset + count - 1; - if (writing) { - ret = btrfs_delalloc_reserve_space(inode, count); - if (ret) - goto out; - } - - while (1) { - lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, - 0, &cached_state); - /* - * We're concerned with the entire range that we're going to be - * doing DIO to, so we need to make sure theres no ordered - * extents in this range. - */ - ordered = btrfs_lookup_ordered_range(inode, lockstart, - lockend - lockstart + 1); - - /* - * We need to make sure there are no buffered pages in this - * range either, we could have raced between the invalidate in - * generic_file_direct_write and locking the extent. The - * invalidate needs to happen so that reads after a write do not - * get stale data. - */ - if (!ordered && (!writing || - !test_range_bit(&BTRFS_I(inode)->io_tree, - lockstart, lockend, EXTENT_UPTODATE, 0, - cached_state))) - break; - - unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, - &cached_state, GFP_NOFS); - - if (ordered) { - btrfs_start_ordered_extent(inode, ordered, 1); - btrfs_put_ordered_extent(ordered); - } else { - /* Screw you mmap */ - ret = filemap_write_and_wait_range(file->f_mapping, - lockstart, - lockend); - if (ret) - goto out; - - /* - * If we found a page that couldn't be invalidated just - * fall back to buffered. - */ - ret = invalidate_inode_pages2_range(file->f_mapping, - lockstart >> PAGE_CACHE_SHIFT, - lockend >> PAGE_CACHE_SHIFT); - if (ret) { - if (ret == -EBUSY) - ret = 0; - goto out; - } - } - - cond_resched(); - } - - /* - * we don't use btrfs_set_extent_delalloc because we don't want - * the dirty or uptodate bits - */ - if (writing) { - write_bits = EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING; - ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, - EXTENT_DELALLOC, NULL, &cached_state, - GFP_NOFS); - if (ret) { - clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, - lockend, EXTENT_LOCKED | write_bits, - 1, 0, &cached_state, GFP_NOFS); - goto out; - } - } - - free_extent_state(cached_state); - cached_state = NULL; - - ret = __blockdev_direct_IO(rw, iocb, inode, + return __blockdev_direct_IO(rw, iocb, inode, BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev, iov, offset, nr_segs, btrfs_get_blocks_direct, NULL, btrfs_submit_direct, 0); - - if (ret < 0 && ret != -EIOCBQUEUED) { - clear_extent_bit(&BTRFS_I(inode)->io_tree, offset, - offset + iov_length(iov, nr_segs) - 1, - EXTENT_LOCKED | write_bits, 1, 0, - &cached_state, GFP_NOFS); - } else if (ret >= 0 && ret < iov_length(iov, nr_segs)) { - /* - * We're falling back to buffered, unlock the section we didn't - * do IO on. - */ - clear_extent_bit(&BTRFS_I(inode)->io_tree, offset + ret, - offset + iov_length(iov, nr_segs) - 1, - EXTENT_LOCKED | write_bits, 1, 0, - &cached_state, GFP_NOFS); - } -out: - free_extent_state(cached_state); - return ret; } static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, @@ -7074,6 +7076,11 @@ static void init_once(void *foo) void btrfs_destroy_cachep(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); if (btrfs_inode_cachep) kmem_cache_destroy(btrfs_inode_cachep); if (btrfs_trans_handle_cachep) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 7bb755677a22..47127c1bd290 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -424,7 +424,7 @@ static noinline int create_subvol(struct btrfs_root *root, uuid_le_gen(&new_uuid); memcpy(root_item.uuid, new_uuid.b, BTRFS_UUID_SIZE); root_item.otime.sec = cpu_to_le64(cur_time.tv_sec); - root_item.otime.nsec = cpu_to_le64(cur_time.tv_nsec); + root_item.otime.nsec = cpu_to_le32(cur_time.tv_nsec); root_item.ctime = root_item.otime; btrfs_set_root_ctransid(&root_item, trans->transid); btrfs_set_root_otransid(&root_item, trans->transid); @@ -575,13 +575,13 @@ fail: */ static inline int btrfs_check_sticky(struct inode *dir, struct inode *inode) { - uid_t fsuid = current_fsuid(); + kuid_t fsuid = current_fsuid(); if (!(dir->i_mode & S_ISVTX)) return 0; - if (inode->i_uid == fsuid) + if (uid_eq(inode->i_uid, fsuid)) return 0; - if (dir->i_uid == fsuid) + if (uid_eq(dir->i_uid, fsuid)) return 0; return !capable(CAP_FOWNER); } @@ -1397,7 +1397,6 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, u64 *transid, bool readonly, struct btrfs_qgroup_inherit **inherit) { - struct file *src_file; int namelen; int ret = 0; @@ -1421,25 +1420,24 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, ret = btrfs_mksubvol(&file->f_path, name, namelen, NULL, transid, readonly, inherit); } else { + struct fd src = fdget(fd); struct inode *src_inode; - src_file = fget(fd); - if (!src_file) { + if (!src.file) { ret = -EINVAL; goto out_drop_write; } - src_inode = src_file->f_path.dentry->d_inode; + src_inode = src.file->f_path.dentry->d_inode; if (src_inode->i_sb != file->f_path.dentry->d_inode->i_sb) { printk(KERN_INFO "btrfs: Snapshot src from " "another FS\n"); ret = -EINVAL; - fput(src_file); - goto out_drop_write; + } else { + ret = btrfs_mksubvol(&file->f_path, name, namelen, + BTRFS_I(src_inode)->root, + transid, readonly, inherit); } - ret = btrfs_mksubvol(&file->f_path, name, namelen, - BTRFS_I(src_inode)->root, - transid, readonly, inherit); - fput(src_file); + fdput(src); } out_drop_write: mnt_drop_write_file(file); @@ -2341,7 +2339,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, { struct inode *inode = fdentry(file)->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; - struct file *src_file; + struct fd src_file; struct inode *src; struct btrfs_trans_handle *trans; struct btrfs_path *path; @@ -2376,24 +2374,24 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, if (ret) return ret; - src_file = fget(srcfd); - if (!src_file) { + src_file = fdget(srcfd); + if (!src_file.file) { ret = -EBADF; goto out_drop_write; } ret = -EXDEV; - if (src_file->f_path.mnt != file->f_path.mnt) + if (src_file.file->f_path.mnt != file->f_path.mnt) goto out_fput; - src = src_file->f_dentry->d_inode; + src = src_file.file->f_dentry->d_inode; ret = -EINVAL; if (src == inode) goto out_fput; /* the src must be open for reading */ - if (!(src_file->f_mode & FMODE_READ)) + if (!(src_file.file->f_mode & FMODE_READ)) goto out_fput; /* don't make the dst file partly checksummed */ @@ -2724,7 +2722,7 @@ out_unlock: vfree(buf); btrfs_free_path(path); out_fput: - fput(src_file); + fdput(src_file); out_drop_write: mnt_drop_write_file(file); return ret; diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index a44eff074805..2a1762c66041 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -67,7 +67,7 @@ void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw) { if (eb->lock_nested) { read_lock(&eb->lock); - if (&eb->lock_nested && current->pid == eb->lock_owner) { + if (eb->lock_nested && current->pid == eb->lock_owner) { read_unlock(&eb->lock); return; } diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index bc424ae5a81a..b65015581744 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1364,8 +1364,10 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, spin_lock(&fs_info->qgroup_lock); dstgroup = add_qgroup_rb(fs_info, objectid); - if (!dstgroup) + if (IS_ERR(dstgroup)) { + ret = PTR_ERR(dstgroup); goto unlock; + } if (srcid) { srcgroup = find_qgroup_rb(fs_info, srcid); diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index 48a4882d8ad5..a955669519a2 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c @@ -68,7 +68,7 @@ struct reada_extent { u32 blocksize; int err; struct list_head extctl; - struct kref refcnt; + int refcnt; spinlock_t lock; struct reada_zone *zones[BTRFS_MAX_MIRRORS]; int nzones; @@ -126,7 +126,7 @@ static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, spin_lock(&fs_info->reada_lock); re = radix_tree_lookup(&fs_info->reada_tree, index); if (re) - kref_get(&re->refcnt); + re->refcnt++; spin_unlock(&fs_info->reada_lock); if (!re) @@ -336,7 +336,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, spin_lock(&fs_info->reada_lock); re = radix_tree_lookup(&fs_info->reada_tree, index); if (re) - kref_get(&re->refcnt); + re->refcnt++; spin_unlock(&fs_info->reada_lock); if (re) @@ -352,7 +352,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, re->top = *top; INIT_LIST_HEAD(&re->extctl); spin_lock_init(&re->lock); - kref_init(&re->refcnt); + re->refcnt = 1; /* * map block @@ -398,7 +398,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, if (ret == -EEXIST) { re_exist = radix_tree_lookup(&fs_info->reada_tree, index); BUG_ON(!re_exist); - kref_get(&re_exist->refcnt); + re_exist->refcnt++; spin_unlock(&fs_info->reada_lock); goto error; } @@ -465,10 +465,6 @@ error: return re_exist; } -static void reada_kref_dummy(struct kref *kr) -{ -} - static void reada_extent_put(struct btrfs_fs_info *fs_info, struct reada_extent *re) { @@ -476,7 +472,7 @@ static void reada_extent_put(struct btrfs_fs_info *fs_info, unsigned long index = re->logical >> PAGE_CACHE_SHIFT; spin_lock(&fs_info->reada_lock); - if (!kref_put(&re->refcnt, reada_kref_dummy)) { + if (--re->refcnt) { spin_unlock(&fs_info->reada_lock); return; } @@ -671,7 +667,7 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info, return 0; } dev->reada_next = re->logical + re->blocksize; - kref_get(&re->refcnt); + re->refcnt++; spin_unlock(&fs_info->reada_lock); diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 6bb465cca20f..10d8e4d88071 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -544,8 +544,8 @@ void btrfs_update_root_times(struct btrfs_trans_handle *trans, struct timespec ct = CURRENT_TIME; spin_lock(&root->root_times_lock); - item->ctransid = trans->transid; + item->ctransid = cpu_to_le64(trans->transid); item->ctime.sec = cpu_to_le64(ct.tv_sec); - item->ctime.nsec = cpu_to_le64(ct.tv_nsec); + item->ctime.nsec = cpu_to_le32(ct.tv_nsec); spin_unlock(&root->root_times_lock); } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index f2eb24c477a3..83d6f9f9c220 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -838,7 +838,6 @@ int btrfs_sync_fs(struct super_block *sb, int wait) struct btrfs_trans_handle *trans; struct btrfs_fs_info *fs_info = btrfs_sb(sb); struct btrfs_root *root = fs_info->tree_root; - int ret; trace_btrfs_sync_fs(wait); @@ -849,11 +848,17 @@ int btrfs_sync_fs(struct super_block *sb, int wait) btrfs_wait_ordered_extents(root, 0, 0); - trans = btrfs_start_transaction(root, 0); + spin_lock(&fs_info->trans_lock); + if (!fs_info->running_transaction) { + spin_unlock(&fs_info->trans_lock); + return 0; + } + spin_unlock(&fs_info->trans_lock); + + trans = btrfs_join_transaction(root); if (IS_ERR(trans)) return PTR_ERR(trans); - ret = btrfs_commit_transaction(trans, root); - return ret; + return btrfs_commit_transaction(trans, root); } static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) @@ -1530,6 +1535,8 @@ static int btrfs_show_devname(struct seq_file *m, struct dentry *root) while (cur_devices) { head = &cur_devices->devices; list_for_each_entry(dev, head, dev_list) { + if (dev->missing) + continue; if (!first_dev || dev->devid < first_dev->devid) first_dev = dev; } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 17be3dedacba..27c26004e050 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1031,6 +1031,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, btrfs_i_size_write(parent_inode, parent_inode->i_size + dentry->d_name.len * 2); + parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; ret = btrfs_update_inode(trans, parent_root, parent_inode); if (ret) goto abort_trans_dput; @@ -1066,7 +1067,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, memcpy(new_root_item->parent_uuid, root->root_item.uuid, BTRFS_UUID_SIZE); new_root_item->otime.sec = cpu_to_le64(cur_time.tv_sec); - new_root_item->otime.nsec = cpu_to_le64(cur_time.tv_nsec); + new_root_item->otime.nsec = cpu_to_le32(cur_time.tv_nsec); btrfs_set_root_otransid(new_root_item, trans->transid); memset(&new_root_item->stime, 0, sizeof(new_root_item->stime)); memset(&new_root_item->rtime, 0, sizeof(new_root_item->rtime)); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index e86ae04abe6a..88b969aeeb71 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -227,9 +227,8 @@ loop_lock: cur = pending; pending = pending->bi_next; cur->bi_next = NULL; - atomic_dec(&fs_info->nr_async_bios); - if (atomic_read(&fs_info->nr_async_bios) < limit && + if (atomic_dec_return(&fs_info->nr_async_bios) < limit && waitqueue_active(&fs_info->async_submit_wait)) wake_up(&fs_info->async_submit_wait); @@ -569,9 +568,11 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) memcpy(new_device, device, sizeof(*new_device)); /* Safe because we are under uuid_mutex */ - name = rcu_string_strdup(device->name->str, GFP_NOFS); - BUG_ON(device->name && !name); /* -ENOMEM */ - rcu_assign_pointer(new_device->name, name); + if (device->name) { + name = rcu_string_strdup(device->name->str, GFP_NOFS); + BUG_ON(device->name && !name); /* -ENOMEM */ + rcu_assign_pointer(new_device->name, name); + } new_device->bdev = NULL; new_device->writeable = 0; new_device->in_fs_metadata = 0; @@ -4605,28 +4606,6 @@ int btrfs_read_sys_array(struct btrfs_root *root) return ret; } -struct btrfs_device *btrfs_find_device_for_logical(struct btrfs_root *root, - u64 logical, int mirror_num) -{ - struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree; - int ret; - u64 map_length = 0; - struct btrfs_bio *bbio = NULL; - struct btrfs_device *device; - - BUG_ON(mirror_num == 0); - ret = btrfs_map_block(map_tree, WRITE, logical, &map_length, &bbio, - mirror_num); - if (ret) { - BUG_ON(bbio != NULL); - return NULL; - } - BUG_ON(mirror_num != bbio->mirror_num); - device = bbio->stripes[mirror_num - 1].dev; - kfree(bbio); - return device; -} - int btrfs_read_chunk_tree(struct btrfs_root *root) { struct btrfs_path *path; diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 5479325987b3..53c06af92e8d 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -289,8 +289,6 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info); int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes, u64 *start, u64 *max_avail); -struct btrfs_device *btrfs_find_device_for_logical(struct btrfs_root *root, - u64 logical, int mirror_num); void btrfs_dev_stat_print_on_error(struct btrfs_device *device); void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index); int btrfs_get_dev_stats(struct btrfs_root *root, |