diff options
Diffstat (limited to 'fs/bcachefs/fs.c')
-rw-r--r-- | fs/bcachefs/fs.c | 551 |
1 files changed, 433 insertions, 118 deletions
diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index fc834bdf1f52..687af0eea0c2 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -33,6 +33,7 @@ #include <linux/backing-dev.h> #include <linux/exportfs.h> #include <linux/fiemap.h> +#include <linux/fileattr.h> #include <linux/fs_context.h> #include <linux/module.h> #include <linux/pagemap.h> @@ -51,6 +52,24 @@ static void bch2_vfs_inode_init(struct btree_trans *, subvol_inum, struct bch_inode_unpacked *, struct bch_subvolume *); +/* Set VFS inode flags from bcachefs inode: */ +static inline void bch2_inode_flags_to_vfs(struct bch_fs *c, struct bch_inode_info *inode) +{ + static const __maybe_unused unsigned bch_flags_to_vfs[] = { + [__BCH_INODE_sync] = S_SYNC, + [__BCH_INODE_immutable] = S_IMMUTABLE, + [__BCH_INODE_append] = S_APPEND, + [__BCH_INODE_noatime] = S_NOATIME, + }; + + set_flags(bch_flags_to_vfs, inode->ei_inode.bi_flags, inode->v.i_flags); + + if (bch2_inode_casefold(c, &inode->ei_inode)) + inode->v.i_flags |= S_CASEFOLD; + else + inode->v.i_flags &= ~S_CASEFOLD; +} + void bch2_inode_update_after_write(struct btree_trans *trans, struct bch_inode_info *inode, struct bch_inode_unpacked *bi, @@ -79,7 +98,7 @@ void bch2_inode_update_after_write(struct btree_trans *trans, inode->ei_inode = *bi; - bch2_inode_flags_to_vfs(inode); + bch2_inode_flags_to_vfs(c, inode); } int __must_check bch2_write_inode(struct bch_fs *c, @@ -88,7 +107,7 @@ int __must_check bch2_write_inode(struct bch_fs *c, void *p, unsigned fields) { struct btree_trans *trans = bch2_trans_get(c); - struct btree_iter iter = { NULL }; + struct btree_iter iter = {}; struct bch_inode_unpacked inode_u; int ret; retry: @@ -105,8 +124,9 @@ retry: goto err; struct bch_extent_rebalance new_r = bch2_inode_rebalance_opts_get(c, &inode_u); + bool rebalance_changed = memcmp(&old_r, &new_r, sizeof(new_r)); - if (memcmp(&old_r, &new_r, sizeof(new_r))) { + if (rebalance_changed) { ret = bch2_set_rebalance_needs_scan_trans(trans, inode_u.bi_inum); if (ret) goto err; @@ -127,6 +147,9 @@ err: if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; + if (rebalance_changed) + bch2_rebalance_wakeup(c); + bch2_fs_fatal_err_on(bch2_err_matches(ret, ENOENT), c, "%s: inode %llu:%llu not found when updating", bch2_err_str(ret), @@ -172,11 +195,6 @@ int bch2_fs_quota_transfer(struct bch_fs *c, return ret; } -static bool subvol_inum_eq(subvol_inum a, subvol_inum b) -{ - return a.subvol == b.subvol && a.inum == b.inum; -} - static u32 bch2_vfs_inode_hash_fn(const void *data, u32 len, u32 seed) { const subvol_inum *inum = data; @@ -333,9 +351,8 @@ repeat: if (!trans) { __wait_on_freeing_inode(c, inode, inum); } else { - bch2_trans_unlock(trans); - __wait_on_freeing_inode(c, inode, inum); - int ret = bch2_trans_relock(trans); + int ret = drop_locks_do(trans, + (__wait_on_freeing_inode(c, inode, inum), 0)); if (ret) return ERR_PTR(ret); } @@ -631,13 +648,18 @@ static struct bch_inode_info *bch2_lookup_trans(struct btree_trans *trans, const struct qstr *name) { struct bch_fs *c = trans->c; - struct btree_iter dirent_iter = {}; subvol_inum inum = {}; struct printbuf buf = PRINTBUF; + struct qstr lookup_name; + int ret = bch2_maybe_casefold(trans, dir_hash_info, name, &lookup_name); + if (ret) + return ERR_PTR(ret); + + struct btree_iter dirent_iter = {}; struct bkey_s_c k = bch2_hash_lookup(trans, &dirent_iter, bch2_dirent_hash_desc, - dir_hash_info, dir, name, 0); - int ret = bkey_err(k); + dir_hash_info, dir, &lookup_name, 0); + ret = bkey_err(k); if (ret) return ERR_PTR(ret); @@ -700,7 +722,6 @@ static struct dentry *bch2_lookup(struct inode *vdir, struct dentry *dentry, if (IS_ERR(inode)) inode = NULL; -#ifdef CONFIG_UNICODE if (!inode && IS_CASEFOLDED(vdir)) { /* * Do not cache a negative dentry in casefolded directories @@ -715,7 +736,6 @@ static struct dentry *bch2_lookup(struct inode *vdir, struct dentry *dentry, */ return NULL; } -#endif return d_splice_alias(&inode->v, dentry); } @@ -825,6 +845,9 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry, */ set_nlink(&inode->v, 0); } + + if (IS_CASEFOLDED(vdir)) + d_invalidate(dentry); err: bch2_trans_put(trans); bch2_unlock_inodes(INODE_UPDATE_LOCK, dir, inode); @@ -1075,7 +1098,7 @@ int bch2_setattr_nonsize(struct mnt_idmap *idmap, struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_qid qid; struct btree_trans *trans; - struct btree_iter inode_iter = { NULL }; + struct btree_iter inode_iter = {}; struct bch_inode_unpacked inode_u; struct posix_acl *acl = NULL; kuid_t kuid; @@ -1235,10 +1258,20 @@ static int bch2_tmpfile(struct mnt_idmap *idmap, return finish_open_simple(file, 0); } +struct bch_fiemap_extent { + struct bkey_buf kbuf; + unsigned flags; +}; + static int bch2_fill_extent(struct bch_fs *c, struct fiemap_extent_info *info, - struct bkey_s_c k, unsigned flags) + struct bch_fiemap_extent *fe) { + struct bkey_s_c k = bkey_i_to_s_c(fe->kbuf.k); + unsigned flags = fe->flags; + + BUG_ON(!k.k->size); + if (bkey_extent_is_direct_data(k.k)) { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; @@ -1291,110 +1324,225 @@ static int bch2_fill_extent(struct bch_fs *c, } } -static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, - u64 start, u64 len) +/* + * Scan a range of an inode for data in pagecache. + * + * Intended to be retryable, so don't modify the output params until success is + * imminent. + */ +static int +bch2_fiemap_hole_pagecache(struct inode *vinode, u64 *start, u64 *end, + bool nonblock) { - struct bch_fs *c = vinode->i_sb->s_fs_info; - struct bch_inode_info *ei = to_bch_ei(vinode); - struct btree_trans *trans; - struct btree_iter iter; - struct bkey_s_c k; - struct bkey_buf cur, prev; - bool have_extent = false; - int ret = 0; + loff_t dstart, dend; - ret = fiemap_prep(&ei->v, info, start, &len, FIEMAP_FLAG_SYNC); - if (ret) + dstart = bch2_seek_pagecache_data(vinode, *start, *end, 0, nonblock); + if (dstart < 0) + return dstart; + + if (dstart == *end) { + *start = dstart; + return 0; + } + + dend = bch2_seek_pagecache_hole(vinode, dstart, *end, 0, nonblock); + if (dend < 0) + return dend; + + /* race */ + BUG_ON(dstart == dend); + + *start = dstart; + *end = dend; + return 0; +} + +/* + * Scan a range of pagecache that corresponds to a file mapping hole in the + * extent btree. If data is found, fake up an extent key so it looks like a + * delalloc extent to the rest of the fiemap processing code. + */ +static int +bch2_next_fiemap_pagecache_extent(struct btree_trans *trans, struct bch_inode_info *inode, + u64 start, u64 end, struct bch_fiemap_extent *cur) +{ + struct bch_fs *c = trans->c; + struct bkey_i_extent *delextent; + struct bch_extent_ptr ptr = {}; + loff_t dstart = start << 9, dend = end << 9; + int ret; + + /* + * We hold btree locks here so we cannot block on folio locks without + * dropping trans locks first. Run a nonblocking scan for the common + * case of no folios over holes and fall back on failure. + * + * Note that dropping locks like this is technically racy against + * writeback inserting to the extent tree, but a non-sync fiemap scan is + * fundamentally racy with writeback anyways. Therefore, just report the + * range as delalloc regardless of whether we have to cycle trans locks. + */ + ret = bch2_fiemap_hole_pagecache(&inode->v, &dstart, &dend, true); + if (ret == -EAGAIN) + ret = drop_locks_do(trans, + bch2_fiemap_hole_pagecache(&inode->v, &dstart, &dend, false)); + if (ret < 0) return ret; - struct bpos end = POS(ei->v.i_ino, (start + len) >> 9); - if (start + len < start) - return -EINVAL; + /* + * Create a fake extent key in the buffer. We have to add a dummy extent + * pointer for the fill code to add an extent entry. It's explicitly + * zeroed to reflect delayed allocation (i.e. phys offset 0). + */ + bch2_bkey_buf_realloc(&cur->kbuf, c, sizeof(*delextent) / sizeof(u64)); + delextent = bkey_extent_init(cur->kbuf.k); + delextent->k.p = POS(inode->ei_inum.inum, dend >> 9); + delextent->k.size = (dend - dstart) >> 9; + bch2_bkey_append_ptr(&delextent->k_i, ptr); - start >>= 9; + cur->flags = FIEMAP_EXTENT_DELALLOC; - bch2_bkey_buf_init(&cur); - bch2_bkey_buf_init(&prev); - trans = bch2_trans_get(c); + return 0; +} + +static int bch2_next_fiemap_extent(struct btree_trans *trans, + struct bch_inode_info *inode, + u64 start, u64 end, + struct bch_fiemap_extent *cur) +{ + u32 snapshot; + int ret = bch2_subvolume_get_snapshot(trans, inode->ei_inum.subvol, &snapshot); + if (ret) + return ret; + struct btree_iter iter; bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, - POS(ei->v.i_ino, start), 0); + SPOS(inode->ei_inum.inum, start, snapshot), 0); - while (!ret || bch2_err_matches(ret, BCH_ERR_transaction_restart)) { - enum btree_id data_btree = BTREE_ID_extents; + struct bkey_s_c k = + bch2_btree_iter_peek_max(trans, &iter, POS(inode->ei_inum.inum, end)); + ret = bkey_err(k); + if (ret) + goto err; - bch2_trans_begin(trans); + u64 pagecache_end = k.k ? max(start, bkey_start_offset(k.k)) : end; - u32 snapshot; - ret = bch2_subvolume_get_snapshot(trans, ei->ei_inum.subvol, &snapshot); - if (ret) - continue; + ret = bch2_next_fiemap_pagecache_extent(trans, inode, start, pagecache_end, cur); + if (ret) + goto err; - bch2_btree_iter_set_snapshot(&iter, snapshot); + struct bpos pagecache_start = bkey_start_pos(&cur->kbuf.k->k); - k = bch2_btree_iter_peek_max(&iter, end); - ret = bkey_err(k); + /* + * Does the pagecache or the btree take precedence? + * + * It _should_ be the pagecache, so that we correctly report delalloc + * extents when dirty in the pagecache (we're COW, after all). + * + * But we'd have to add per-sector writeback tracking to + * bch_folio_state, otherwise we report delalloc extents for clean + * cached data in the pagecache. + * + * We should do this, but even then fiemap won't report stable mappings: + * on bcachefs data moves around in the background (copygc, rebalance) + * and we don't provide a way for userspace to lock that out. + */ + if (k.k && + bkey_le(bpos_max(iter.pos, bkey_start_pos(k.k)), + pagecache_start)) { + bch2_bkey_buf_reassemble(&cur->kbuf, trans->c, k); + bch2_cut_front(iter.pos, cur->kbuf.k); + bch2_cut_back(POS(inode->ei_inum.inum, end), cur->kbuf.k); + cur->flags = 0; + } else if (k.k) { + bch2_cut_back(bkey_start_pos(k.k), cur->kbuf.k); + } + + if (cur->kbuf.k->k.type == KEY_TYPE_reflink_p) { + unsigned sectors = cur->kbuf.k->k.size; + s64 offset_into_extent = 0; + enum btree_id data_btree = BTREE_ID_extents; + ret = bch2_read_indirect_extent(trans, &data_btree, &offset_into_extent, + &cur->kbuf); if (ret) - continue; + goto err; - if (!k.k) - break; + struct bkey_i *k = cur->kbuf.k; + sectors = min_t(unsigned, sectors, k->k.size - offset_into_extent); - if (!bkey_extent_is_data(k.k) && - k.k->type != KEY_TYPE_reservation) { - bch2_btree_iter_advance(&iter); - continue; - } + bch2_cut_front(POS(k->k.p.inode, + bkey_start_offset(&k->k) + offset_into_extent), + k); + bch2_key_resize(&k->k, sectors); + k->k.p = iter.pos; + k->k.p.offset += k->k.size; + } +err: + bch2_trans_iter_exit(trans, &iter); + return ret; +} - s64 offset_into_extent = iter.pos.offset - bkey_start_offset(k.k); - unsigned sectors = k.k->size - offset_into_extent; +static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, + u64 start, u64 len) +{ + struct bch_fs *c = vinode->i_sb->s_fs_info; + struct bch_inode_info *ei = to_bch_ei(vinode); + struct btree_trans *trans; + struct bch_fiemap_extent cur, prev; + int ret = 0; + + ret = fiemap_prep(&ei->v, info, start, &len, 0); + if (ret) + return ret; + + if (start + len < start) + return -EINVAL; + + start >>= 9; + u64 end = (start + len) >> 9; + + bch2_bkey_buf_init(&cur.kbuf); + bch2_bkey_buf_init(&prev.kbuf); + bkey_init(&prev.kbuf.k->k); - bch2_bkey_buf_reassemble(&cur, c, k); + trans = bch2_trans_get(c); - ret = bch2_read_indirect_extent(trans, &data_btree, - &offset_into_extent, &cur); + while (start < end) { + ret = lockrestart_do(trans, + bch2_next_fiemap_extent(trans, ei, start, end, &cur)); if (ret) - continue; + goto err; - k = bkey_i_to_s_c(cur.k); - bch2_bkey_buf_realloc(&prev, c, k.k->u64s); + BUG_ON(bkey_start_offset(&cur.kbuf.k->k) < start); + BUG_ON(cur.kbuf.k->k.p.offset > end); - sectors = min_t(unsigned, sectors, k.k->size - offset_into_extent); + if (bkey_start_offset(&cur.kbuf.k->k) == end) + break; - bch2_cut_front(POS(k.k->p.inode, - bkey_start_offset(k.k) + - offset_into_extent), - cur.k); - bch2_key_resize(&cur.k->k, sectors); - cur.k->k.p = iter.pos; - cur.k->k.p.offset += cur.k->k.size; + start = cur.kbuf.k->k.p.offset; - if (have_extent) { + if (!bkey_deleted(&prev.kbuf.k->k)) { bch2_trans_unlock(trans); - ret = bch2_fill_extent(c, info, - bkey_i_to_s_c(prev.k), 0); + ret = bch2_fill_extent(c, info, &prev); if (ret) - break; + goto err; } - bkey_copy(prev.k, cur.k); - have_extent = true; - - bch2_btree_iter_set_pos(&iter, - POS(iter.pos.inode, iter.pos.offset + sectors)); + bch2_bkey_buf_copy(&prev.kbuf, c, cur.kbuf.k); + prev.flags = cur.flags; } - bch2_trans_iter_exit(trans, &iter); - if (!ret && have_extent) { + if (!bkey_deleted(&prev.kbuf.k->k)) { bch2_trans_unlock(trans); - ret = bch2_fill_extent(c, info, bkey_i_to_s_c(prev.k), - FIEMAP_EXTENT_LAST); + prev.flags |= FIEMAP_EXTENT_LAST; + ret = bch2_fill_extent(c, info, &prev); } - +err: bch2_trans_put(trans); - bch2_bkey_buf_exit(&cur, c); - bch2_bkey_buf_exit(&prev, c); - return ret < 0 ? ret : 0; + bch2_bkey_buf_exit(&cur.kbuf, c); + bch2_bkey_buf_exit(&prev.kbuf, c); + + return bch2_err_class(ret < 0 ? ret : 0); } static const struct vm_operations_struct bch_vm_ops = { @@ -1403,11 +1551,11 @@ static const struct vm_operations_struct bch_vm_ops = { .page_mkwrite = bch2_page_mkwrite, }; -static int bch2_mmap(struct file *file, struct vm_area_struct *vma) +static int bch2_mmap_prepare(struct vm_area_desc *desc) { - file_accessed(file); + file_accessed(desc->file); - vma->vm_ops = &bch_vm_ops; + desc->vm_ops = &bch_vm_ops; return 0; } @@ -1423,11 +1571,12 @@ static int bch2_vfs_readdir(struct file *file, struct dir_context *ctx) { struct bch_inode_info *inode = file_bch_inode(file); struct bch_fs *c = inode->v.i_sb->s_fs_info; + struct bch_hash_info hash = bch2_hash_info_init(c, &inode->ei_inode); if (!dir_emit_dots(file, ctx)) return 0; - int ret = bch2_readdir(c, inode_inum(inode), ctx); + int ret = bch2_readdir(c, inode_inum(inode), &hash, ctx); bch_err_fn(c, ret); return bch2_err_class(ret); @@ -1449,12 +1598,148 @@ static int bch2_open(struct inode *vinode, struct file *file) return generic_file_open(vinode, file); } +/* bcachefs inode flags -> FS_IOC_GETFLAGS: */ +static const __maybe_unused unsigned bch_flags_to_uflags[] = { + [__BCH_INODE_sync] = FS_SYNC_FL, + [__BCH_INODE_immutable] = FS_IMMUTABLE_FL, + [__BCH_INODE_append] = FS_APPEND_FL, + [__BCH_INODE_nodump] = FS_NODUMP_FL, + [__BCH_INODE_noatime] = FS_NOATIME_FL, +}; + +/* bcachefs inode flags -> FS_IOC_FSGETXATTR: */ +static const __maybe_unused unsigned bch_flags_to_xflags[] = { + [__BCH_INODE_sync] = FS_XFLAG_SYNC, + [__BCH_INODE_immutable] = FS_XFLAG_IMMUTABLE, + [__BCH_INODE_append] = FS_XFLAG_APPEND, + [__BCH_INODE_nodump] = FS_XFLAG_NODUMP, + [__BCH_INODE_noatime] = FS_XFLAG_NOATIME, +}; + +static int bch2_fileattr_get(struct dentry *dentry, + struct file_kattr *fa) +{ + struct bch_inode_info *inode = to_bch_ei(d_inode(dentry)); + struct bch_fs *c = inode->v.i_sb->s_fs_info; + + fileattr_fill_xflags(fa, map_flags(bch_flags_to_xflags, inode->ei_inode.bi_flags)); + + if (inode->ei_inode.bi_fields_set & (1 << Inode_opt_project)) + fa->fsx_xflags |= FS_XFLAG_PROJINHERIT; + + if (bch2_inode_casefold(c, &inode->ei_inode)) + fa->flags |= FS_CASEFOLD_FL; + + fa->fsx_projid = inode->ei_qid.q[QTYP_PRJ]; + return 0; +} + +struct flags_set { + unsigned mask; + unsigned flags; + unsigned projid; + bool set_project; + bool set_casefold; + bool casefold; +}; + +static int fssetxattr_inode_update_fn(struct btree_trans *trans, + struct bch_inode_info *inode, + struct bch_inode_unpacked *bi, + void *p) +{ + struct bch_fs *c = trans->c; + struct flags_set *s = p; + + /* + * We're relying on btree locking here for exclusion with other ioctl + * calls - use the flags in the btree (@bi), not inode->i_flags: + */ + if (!S_ISREG(bi->bi_mode) && + !S_ISDIR(bi->bi_mode) && + (s->flags & (BCH_INODE_nodump|BCH_INODE_noatime)) != s->flags) + return -EINVAL; + + if (s->casefold != bch2_inode_casefold(c, bi)) { + int ret = bch2_inode_set_casefold(trans, inode_inum(inode), bi, s->casefold); + if (ret) + return ret; + } + + if (s->set_project) { + bi->bi_project = s->projid; + bi->bi_fields_set |= BIT(Inode_opt_project); + } + + bi->bi_flags &= ~s->mask; + bi->bi_flags |= s->flags; + + bi->bi_ctime = timespec_to_bch2_time(c, current_time(&inode->v)); + return 0; +} + +static int bch2_fileattr_set(struct mnt_idmap *idmap, + struct dentry *dentry, + struct file_kattr *fa) +{ + struct bch_inode_info *inode = to_bch_ei(d_inode(dentry)); + struct bch_fs *c = inode->v.i_sb->s_fs_info; + struct flags_set s = {}; + int ret; + + if (fa->fsx_valid) { + fa->fsx_xflags &= ~FS_XFLAG_PROJINHERIT; + + s.mask = map_defined(bch_flags_to_xflags); + s.flags |= map_flags_rev(bch_flags_to_xflags, fa->fsx_xflags); + if (fa->fsx_xflags) + return -EOPNOTSUPP; + + if (fa->fsx_projid >= U32_MAX) + return -EINVAL; + + /* + * inode fields accessible via the xattr interface are stored with a +1 + * bias, so that 0 means unset: + */ + if ((inode->ei_inode.bi_project || + fa->fsx_projid) && + inode->ei_inode.bi_project != fa->fsx_projid + 1) { + s.projid = fa->fsx_projid + 1; + s.set_project = true; + } + } + + if (fa->flags_valid) { + s.mask = map_defined(bch_flags_to_uflags); + + s.set_casefold = true; + s.casefold = (fa->flags & FS_CASEFOLD_FL) != 0; + fa->flags &= ~FS_CASEFOLD_FL; + + s.flags |= map_flags_rev(bch_flags_to_uflags, fa->flags); + if (fa->flags) + return -EOPNOTSUPP; + } + + mutex_lock(&inode->ei_update_lock); + ret = bch2_subvol_is_ro(c, inode->ei_inum.subvol) ?: + (s.set_project + ? bch2_set_projid(c, inode, fa->fsx_projid) + : 0) ?: + bch2_write_inode(c, inode, fssetxattr_inode_update_fn, &s, + ATTR_CTIME); + mutex_unlock(&inode->ei_update_lock); + + return bch2_err_class(ret); +} + static const struct file_operations bch_file_operations = { .open = bch2_open, .llseek = bch2_llseek, .read_iter = bch2_read_iter, .write_iter = bch2_write_iter, - .mmap = bch2_mmap, + .mmap_prepare = bch2_mmap_prepare, .get_unmapped_area = thp_get_unmapped_area, .fsync = bch2_fsync, .splice_read = filemap_splice_read, @@ -1476,6 +1761,8 @@ static const struct inode_operations bch_file_inode_operations = { .get_inode_acl = bch2_get_acl, .set_acl = bch2_set_acl, #endif + .fileattr_get = bch2_fileattr_get, + .fileattr_set = bch2_fileattr_set, }; static const struct inode_operations bch_dir_inode_operations = { @@ -1496,6 +1783,8 @@ static const struct inode_operations bch_dir_inode_operations = { .get_inode_acl = bch2_get_acl, .set_acl = bch2_set_acl, #endif + .fileattr_get = bch2_fileattr_get, + .fileattr_set = bch2_fileattr_set, }; static const struct file_operations bch_dir_file_operations = { @@ -1518,6 +1807,8 @@ static const struct inode_operations bch_symlink_inode_operations = { .get_inode_acl = bch2_get_acl, .set_acl = bch2_set_acl, #endif + .fileattr_get = bch2_fileattr_get, + .fileattr_set = bch2_fileattr_set, }; static const struct inode_operations bch_special_inode_operations = { @@ -1528,6 +1819,8 @@ static const struct inode_operations bch_special_inode_operations = { .get_inode_acl = bch2_get_acl, .set_acl = bch2_set_acl, #endif + .fileattr_get = bch2_fileattr_get, + .fileattr_set = bch2_fileattr_set, }; static const struct address_space_operations bch_address_space_operations = { @@ -1697,30 +1990,30 @@ retry: if (ret) goto err; - bch2_btree_iter_set_snapshot(&iter1, snapshot); - bch2_btree_iter_set_snapshot(&iter2, snapshot); + bch2_btree_iter_set_snapshot(trans, &iter1, snapshot); + bch2_btree_iter_set_snapshot(trans, &iter2, snapshot); ret = bch2_inode_find_by_inum_trans(trans, inode_inum(inode), &inode_u); if (ret) goto err; if (inode_u.bi_dir == dir->ei_inode.bi_inum) { - bch2_btree_iter_set_pos(&iter1, POS(inode_u.bi_dir, inode_u.bi_dir_offset)); + bch2_btree_iter_set_pos(trans, &iter1, POS(inode_u.bi_dir, inode_u.bi_dir_offset)); - k = bch2_btree_iter_peek_slot(&iter1); + k = bch2_btree_iter_peek_slot(trans, &iter1); ret = bkey_err(k); if (ret) goto err; if (k.k->type != KEY_TYPE_dirent) { - ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode; + ret = bch_err_throw(c, ENOENT_dirent_doesnt_match_inode); goto err; } d = bkey_s_c_to_dirent(k); ret = bch2_dirent_read_target(trans, inode_inum(dir), d, &target); if (ret > 0) - ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode; + ret = bch_err_throw(c, ENOENT_dirent_doesnt_match_inode); if (ret) goto err; @@ -1731,7 +2024,7 @@ retry: * File with multiple hardlinks and our backref is to the wrong * directory - linear search: */ - for_each_btree_key_continue_norestart(iter2, 0, k, ret) { + for_each_btree_key_continue_norestart(trans, iter2, 0, k, ret) { if (k.k->p.inode > dir->ei_inode.bi_inum) break; @@ -1886,7 +2179,13 @@ static void bch2_evict_inode(struct inode *vinode) KEY_TYPE_QUOTA_WARN); bch2_quota_acct(c, inode->ei_qid, Q_INO, -1, KEY_TYPE_QUOTA_WARN); - bch2_inode_rm(c, inode_inum(inode)); + int ret = bch2_inode_rm(c, inode_inum(inode)); + if (ret && !bch2_err_matches(ret, EROFS)) { + bch_err_msg(c, ret, "VFS incorrectly tried to delete inode %llu:%llu", + inode->ei_inum.subvol, + inode->ei_inum.inum); + bch2_sb_error_count(c, BCH_FSCK_ERR_vfs_bad_inode_rm); + } /* * If we are deleting, we need it present in the vfs hash table @@ -2033,7 +2332,8 @@ static int bch2_show_devname(struct seq_file *seq, struct dentry *root) struct bch_fs *c = root->d_sb->s_fs_info; bool first = true; - for_each_online_member(c, ca) { + guard(rcu)(); + for_each_online_member_rcu(c, ca) { if (!first) seq_putc(seq, ':'); first = false; @@ -2145,7 +2445,7 @@ static int bch2_fs_get_tree(struct fs_context *fc) struct inode *vinode; struct bch2_opts_parse *opts_parse = fc->fs_private; struct bch_opts opts = opts_parse->opts; - darray_str devs; + darray_const_str devs; darray_fs devs_to_fs = {}; int ret; @@ -2169,7 +2469,7 @@ static int bch2_fs_get_tree(struct fs_context *fc) if (!IS_ERR(sb)) goto got_sb; - c = bch2_fs_open(devs.data, devs.nr, opts); + c = bch2_fs_open(&devs, &opts); ret = PTR_ERR_OR_ZERO(c); if (ret) goto err; @@ -2185,10 +2485,17 @@ static int bch2_fs_get_tree(struct fs_context *fc) bch2_opts_apply(&c->opts, opts); + ret = bch2_fs_start(c); + if (ret) + goto err_stop_fs; + /* - * need to initialise sb and set c->vfs_sb _before_ starting fs, - * for blk_holder_ops + * We might be doing a RO mount because other options required it, or we + * have no alloc info and it's a small image with no room to regenerate + * it */ + if (c->opts.read_only) + fc->sb_flags |= SB_RDONLY; sb = sget(fc->fs_type, NULL, bch2_set_super, fc->sb_flags|SB_NOSEC, c); ret = PTR_ERR_OR_ZERO(sb); @@ -2220,7 +2527,12 @@ got_sb: sb->s_time_min = div_s64(S64_MIN, c->sb.time_units_per_sec) + 1; sb->s_time_max = div_s64(S64_MAX, c->sb.time_units_per_sec); super_set_uuid(sb, c->sb.user_uuid.b, sizeof(c->sb.user_uuid)); - super_set_sysfs_name_uuid(sb); + + if (c->sb.multi_device) + super_set_sysfs_name_uuid(sb); + else + strscpy(sb->s_sysfs_name, c->name, sizeof(sb->s_sysfs_name)); + sb->s_shrink->seeks = 0; c->vfs_sb = sb; strscpy(sb->s_id, c->name, sizeof(sb->s_id)); @@ -2231,14 +2543,15 @@ got_sb: sb->s_bdi->ra_pages = VM_READAHEAD_PAGES; - for_each_online_member(c, ca) { - struct block_device *bdev = ca->disk_sb.bdev; + scoped_guard(rcu) { + for_each_online_member_rcu(c, ca) { + struct block_device *bdev = ca->disk_sb.bdev; - /* XXX: create an anonymous device for multi device filesystems */ - sb->s_bdev = bdev; - sb->s_dev = bdev->bd_dev; - percpu_ref_put(&ca->io_ref); - break; + /* XXX: create an anonymous device for multi device filesystems */ + sb->s_bdev = bdev; + sb->s_dev = bdev->bd_dev; + break; + } } c->dev = sb->s_dev; @@ -2250,9 +2563,11 @@ got_sb: sb->s_shrink->seeks = 0; - ret = bch2_fs_start(c); - if (ret) - goto err_put_super; +#ifdef CONFIG_UNICODE + if (bch2_fs_casefold_enabled(c)) + sb->s_encoding = c->cf_encoding; + generic_set_sb_d_ops(sb); +#endif vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_SUBVOL_INUM); ret = PTR_ERR_OR_ZERO(vinode); |