diff options
Diffstat (limited to 'fs/ext2')
| -rw-r--r-- | fs/ext2/Kconfig | 35 | ||||
| -rw-r--r-- | fs/ext2/Makefile | 7 | ||||
| -rw-r--r-- | fs/ext2/acl.c | 232 | ||||
| -rw-r--r-- | fs/ext2/acl.h | 12 | ||||
| -rw-r--r-- | fs/ext2/balloc.c | 289 | ||||
| -rw-r--r-- | fs/ext2/dir.c | 463 | ||||
| -rw-r--r-- | fs/ext2/ext2.h | 122 | ||||
| -rw-r--r-- | fs/ext2/file.c | 306 | ||||
| -rw-r--r-- | fs/ext2/ialloc.c | 43 | ||||
| -rw-r--r-- | fs/ext2/inode.c | 500 | ||||
| -rw-r--r-- | fs/ext2/ioctl.c | 115 | ||||
| -rw-r--r-- | fs/ext2/namei.c | 269 | ||||
| -rw-r--r-- | fs/ext2/super.c | 1053 | ||||
| -rw-r--r-- | fs/ext2/symlink.c | 28 | ||||
| -rw-r--r-- | fs/ext2/trace.c | 6 | ||||
| -rw-r--r-- | fs/ext2/trace.h | 94 | ||||
| -rw-r--r-- | fs/ext2/xattr.c | 504 | ||||
| -rw-r--r-- | fs/ext2/xattr.h | 29 | ||||
| -rw-r--r-- | fs/ext2/xattr_security.c | 41 | ||||
| -rw-r--r-- | fs/ext2/xattr_trusted.c | 39 | ||||
| -rw-r--r-- | fs/ext2/xattr_user.c | 43 | ||||
| -rw-r--r-- | fs/ext2/xip.c | 90 | ||||
| -rw-r--r-- | fs/ext2/xip.h | 26 |
23 files changed, 2313 insertions, 2033 deletions
diff --git a/fs/ext2/Kconfig b/fs/ext2/Kconfig index 14a6780fd034..d5bce83ad905 100644 --- a/fs/ext2/Kconfig +++ b/fs/ext2/Kconfig @@ -1,20 +1,29 @@ +# SPDX-License-Identifier: GPL-2.0-only config EXT2_FS - tristate "Second extended fs support" + tristate "Second extended fs support (DEPRECATED)" + select BUFFER_HEAD + select FS_IOMAP help Ext2 is a standard Linux file system for hard disks. - To compile this file system support as a module, choose M here: the - module will be called ext2. + This filesystem driver is deprecated because it does not properly + support inode time stamps beyond 03:14:07 UTC on 19 January 2038. - If unsure, say Y. + Ext2 users are advised to use ext4 driver to access their filesystem. + The driver is fully compatible, supports filesystems without journal + or extents, and also supports larger time stamps if the filesystem + is created with at least 256 byte inodes. + + This code is kept as a simple reference for filesystem developers. + + If unsure, say N. config EXT2_FS_XATTR bool "Ext2 extended attributes" depends on EXT2_FS help Extended attributes are name:value pairs associated with inodes by - the kernel or by users (see the attr(5) manual page, or visit - <http://acl.bestbits.at/> for details). + the kernel or by users (see the attr(5) manual page for details). If unsure, say N. @@ -26,9 +35,6 @@ config EXT2_FS_POSIX_ACL Posix Access Control Lists (ACLs) support permissions for users and groups beyond the owner/group/world scheme. - To learn more about Access Control Lists, visit the Posix ACLs for - Linux website <http://acl.bestbits.at/>. - If you don't know what Access Control Lists are, say N config EXT2_FS_SECURITY @@ -42,14 +48,3 @@ config EXT2_FS_SECURITY If you are not using a security module that requires using extended attributes for file security labels, say N. - -config EXT2_FS_XIP - bool "Ext2 execute in place support" - depends on EXT2_FS && MMU - help - Execute in place can be used on memory-backed block devices. If you - enable this option, you can select to mount block devices which are - capable of this feature without using the page cache. - - If you do not use a block device that is capable of using this, - or if unsure, say N. diff --git a/fs/ext2/Makefile b/fs/ext2/Makefile index f42af45cfd88..8860948ef9ca 100644 --- a/fs/ext2/Makefile +++ b/fs/ext2/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # Makefile for the linux ext2-filesystem routines. # @@ -5,9 +6,11 @@ obj-$(CONFIG_EXT2_FS) += ext2.o ext2-y := balloc.o dir.o file.o ialloc.o inode.o \ - ioctl.o namei.o super.o symlink.o + ioctl.o namei.o super.o symlink.o trace.o + +# For tracepoints to include our trace.h from tracepoint infrastructure +CFLAGS_trace.o := -I$(src) ext2-$(CONFIG_EXT2_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o ext2-$(CONFIG_EXT2_FS_POSIX_ACL) += acl.o ext2-$(CONFIG_EXT2_FS_SECURITY) += xattr_security.o -ext2-$(CONFIG_EXT2_FS_XIP) += xip.o diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c index 110b6b371a4e..7e54c31589c7 100644 --- a/fs/ext2/acl.c +++ b/fs/ext2/acl.c @@ -1,10 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 /* * linux/fs/ext2/acl.c * * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de> */ -#include <linux/capability.h> #include <linux/init.h> #include <linux/sched.h> #include <linux/slab.h> @@ -141,19 +141,15 @@ fail: * inode->i_mutex: don't care */ struct posix_acl * -ext2_get_acl(struct inode *inode, int type) +ext2_get_acl(struct inode *inode, int type, bool rcu) { int name_index; char *value = NULL; struct posix_acl *acl; int retval; - if (!test_opt(inode->i_sb, POSIX_ACL)) - return NULL; - - acl = get_cached_acl(inode, type); - if (acl != ACL_NOT_CACHED) - return acl; + if (rcu) + return ERR_PTR(-ECHILD); switch (type) { case ACL_TYPE_ACCESS: @@ -180,42 +176,20 @@ ext2_get_acl(struct inode *inode, int type) acl = ERR_PTR(retval); kfree(value); - if (!IS_ERR(acl)) - set_cached_acl(inode, type, acl); - return acl; } -/* - * inode->i_mutex: down - */ static int -ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl) +__ext2_set_acl(struct inode *inode, struct posix_acl *acl, int type) { int name_index; void *value = NULL; size_t size = 0; int error; - if (S_ISLNK(inode->i_mode)) - return -EOPNOTSUPP; - if (!test_opt(inode->i_sb, POSIX_ACL)) - return 0; - switch(type) { case ACL_TYPE_ACCESS: name_index = EXT2_XATTR_INDEX_POSIX_ACL_ACCESS; - if (acl) { - error = posix_acl_equiv_mode(acl, &inode->i_mode); - if (error < 0) - return error; - else { - inode->i_ctime = CURRENT_TIME_SEC; - mark_inode_dirty(inode); - if (error == 0) - acl = NULL; - } - } break; case ACL_TYPE_DEFAULT: @@ -242,177 +216,61 @@ ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl) } /* - * Initialize the ACLs of a new inode. Called from ext2_new_inode. - * - * dir->i_mutex: down - * inode->i_mutex: up (access to inode is still exclusive) + * inode->i_mutex: down */ int -ext2_init_acl(struct inode *inode, struct inode *dir) +ext2_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, + struct posix_acl *acl, int type) { - struct posix_acl *acl = NULL; - int error = 0; - - if (!S_ISLNK(inode->i_mode)) { - if (test_opt(dir->i_sb, POSIX_ACL)) { - acl = ext2_get_acl(dir, ACL_TYPE_DEFAULT); - if (IS_ERR(acl)) - return PTR_ERR(acl); - } - if (!acl) - inode->i_mode &= ~current_umask(); - } - if (test_opt(inode->i_sb, POSIX_ACL) && acl) { - if (S_ISDIR(inode->i_mode)) { - error = ext2_set_acl(inode, ACL_TYPE_DEFAULT, acl); - if (error) - goto cleanup; - } - error = posix_acl_create(&acl, GFP_KERNEL, &inode->i_mode); - if (error < 0) + int error; + int update_mode = 0; + struct inode *inode = d_inode(dentry); + umode_t mode = inode->i_mode; + + if (type == ACL_TYPE_ACCESS && acl) { + error = posix_acl_update_mode(&nop_mnt_idmap, inode, &mode, + &acl); + if (error) return error; - if (error > 0) { - /* This is an extended ACL */ - error = ext2_set_acl(inode, ACL_TYPE_ACCESS, acl); - } + update_mode = 1; + } + error = __ext2_set_acl(inode, acl, type); + if (!error && update_mode) { + inode->i_mode = mode; + inode_set_ctime_current(inode); + mark_inode_dirty(inode); } -cleanup: - posix_acl_release(acl); - return error; + return error; } /* - * Does chmod for an inode that may have an Access Control List. The - * inode->i_mode field must be updated to the desired value by the caller - * before calling this function. - * Returns 0 on success, or a negative error number. - * - * We change the ACL rather than storing some ACL entries in the file - * mode permission bits (which would be more efficient), because that - * would break once additional permissions (like ACL_APPEND, ACL_DELETE - * for directories) are added. There are no more bits available in the - * file mode. + * Initialize the ACLs of a new inode. Called from ext2_new_inode. * - * inode->i_mutex: down + * dir->i_mutex: down + * inode->i_mutex: up (access to inode is still exclusive) */ int -ext2_acl_chmod(struct inode *inode) +ext2_init_acl(struct inode *inode, struct inode *dir) { - struct posix_acl *acl; - int error; + struct posix_acl *default_acl, *acl; + int error; - if (!test_opt(inode->i_sb, POSIX_ACL)) - return 0; - if (S_ISLNK(inode->i_mode)) - return -EOPNOTSUPP; - acl = ext2_get_acl(inode, ACL_TYPE_ACCESS); - if (IS_ERR(acl) || !acl) - return PTR_ERR(acl); - error = posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode); + error = posix_acl_create(dir, &inode->i_mode, &default_acl, &acl); if (error) return error; - error = ext2_set_acl(inode, ACL_TYPE_ACCESS, acl); - posix_acl_release(acl); - return error; -} -/* - * Extended attribut handlers - */ -static size_t -ext2_xattr_list_acl_access(struct dentry *dentry, char *list, size_t list_size, - const char *name, size_t name_len, int type) -{ - const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS); - - if (!test_opt(dentry->d_sb, POSIX_ACL)) - return 0; - if (list && size <= list_size) - memcpy(list, POSIX_ACL_XATTR_ACCESS, size); - return size; -} - -static size_t -ext2_xattr_list_acl_default(struct dentry *dentry, char *list, size_t list_size, - const char *name, size_t name_len, int type) -{ - const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT); - - if (!test_opt(dentry->d_sb, POSIX_ACL)) - return 0; - if (list && size <= list_size) - memcpy(list, POSIX_ACL_XATTR_DEFAULT, size); - return size; -} - -static int -ext2_xattr_get_acl(struct dentry *dentry, const char *name, void *buffer, - size_t size, int type) -{ - struct posix_acl *acl; - int error; - - if (strcmp(name, "") != 0) - return -EINVAL; - if (!test_opt(dentry->d_sb, POSIX_ACL)) - return -EOPNOTSUPP; - - acl = ext2_get_acl(dentry->d_inode, type); - if (IS_ERR(acl)) - return PTR_ERR(acl); - if (acl == NULL) - return -ENODATA; - error = posix_acl_to_xattr(&init_user_ns, acl, buffer, size); - posix_acl_release(acl); - - return error; -} - -static int -ext2_xattr_set_acl(struct dentry *dentry, const char *name, const void *value, - size_t size, int flags, int type) -{ - struct posix_acl *acl; - int error; - - if (strcmp(name, "") != 0) - return -EINVAL; - if (!test_opt(dentry->d_sb, POSIX_ACL)) - return -EOPNOTSUPP; - if (!inode_owner_or_capable(dentry->d_inode)) - return -EPERM; - - if (value) { - acl = posix_acl_from_xattr(&init_user_ns, value, size); - if (IS_ERR(acl)) - return PTR_ERR(acl); - else if (acl) { - error = posix_acl_valid(acl); - if (error) - goto release_and_out; - } - } else - acl = NULL; - - error = ext2_set_acl(dentry->d_inode, type, acl); - -release_and_out: - posix_acl_release(acl); + if (default_acl) { + error = __ext2_set_acl(inode, default_acl, ACL_TYPE_DEFAULT); + posix_acl_release(default_acl); + } else { + inode->i_default_acl = NULL; + } + if (acl) { + if (!error) + error = __ext2_set_acl(inode, acl, ACL_TYPE_ACCESS); + posix_acl_release(acl); + } else { + inode->i_acl = NULL; + } return error; } - -const struct xattr_handler ext2_xattr_acl_access_handler = { - .prefix = POSIX_ACL_XATTR_ACCESS, - .flags = ACL_TYPE_ACCESS, - .list = ext2_xattr_list_acl_access, - .get = ext2_xattr_get_acl, - .set = ext2_xattr_set_acl, -}; - -const struct xattr_handler ext2_xattr_acl_default_handler = { - .prefix = POSIX_ACL_XATTR_DEFAULT, - .flags = ACL_TYPE_DEFAULT, - .list = ext2_xattr_list_acl_default, - .get = ext2_xattr_get_acl, - .set = ext2_xattr_set_acl, -}; diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h index 503bfb0ed79b..4a8443a2b8ec 100644 --- a/fs/ext2/acl.h +++ b/fs/ext2/acl.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* File: fs/ext2/acl.h @@ -54,8 +55,9 @@ static inline int ext2_acl_count(size_t size) #ifdef CONFIG_EXT2_FS_POSIX_ACL /* acl.c */ -extern struct posix_acl *ext2_get_acl(struct inode *inode, int type); -extern int ext2_acl_chmod (struct inode *); +extern struct posix_acl *ext2_get_acl(struct inode *inode, int type, bool rcu); +extern int ext2_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, + struct posix_acl *acl, int type); extern int ext2_init_acl (struct inode *, struct inode *); #else @@ -63,12 +65,6 @@ extern int ext2_init_acl (struct inode *, struct inode *); #define ext2_get_acl NULL #define ext2_set_acl NULL -static inline int -ext2_acl_chmod (struct inode *inode) -{ - return 0; -} - static inline int ext2_init_acl (struct inode *inode, struct inode *dir) { return 0; diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c index 9f9992b37924..b8cfab8f98b9 100644 --- a/fs/ext2/balloc.c +++ b/fs/ext2/balloc.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * linux/fs/ext2/balloc.c * @@ -15,6 +16,7 @@ #include <linux/quotaops.h> #include <linux/slab.h> #include <linux/sched.h> +#include <linux/cred.h> #include <linux/buffer_head.h> #include <linux/capability.h> @@ -34,8 +36,6 @@ */ -#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) - struct ext2_group_desc * ext2_get_group_desc(struct super_block * sb, unsigned int block_group, struct buffer_head ** bh) @@ -46,10 +46,9 @@ struct ext2_group_desc * ext2_get_group_desc(struct super_block * sb, struct ext2_sb_info *sbi = EXT2_SB(sb); if (block_group >= sbi->s_groups_count) { - ext2_error (sb, "ext2_get_group_desc", - "block_group >= groups_count - " - "block_group = %d, groups_count = %lu", - block_group, sbi->s_groups_count); + WARN(1, "block_group >= groups_count - " + "block_group = %d, groups_count = %lu", + block_group, sbi->s_groups_count); return NULL; } @@ -57,10 +56,9 @@ struct ext2_group_desc * ext2_get_group_desc(struct super_block * sb, group_desc = block_group >> EXT2_DESC_PER_BLOCK_BITS(sb); offset = block_group & (EXT2_DESC_PER_BLOCK(sb) - 1); if (!sbi->s_group_desc[group_desc]) { - ext2_error (sb, "ext2_get_group_desc", - "Group descriptor not loaded - " - "block_group = %d, group_desc = %lu, desc = %lu", - block_group, group_desc, offset); + WARN(1, "Group descriptor not loaded - " + "block_group = %d, group_desc = %lu, desc = %lu", + block_group, group_desc, offset); return NULL; } @@ -79,26 +77,33 @@ static int ext2_valid_block_bitmap(struct super_block *sb, ext2_grpblk_t next_zero_bit; ext2_fsblk_t bitmap_blk; ext2_fsblk_t group_first_block; + ext2_grpblk_t max_bit; group_first_block = ext2_group_first_block_no(sb, block_group); + max_bit = ext2_group_last_block_no(sb, block_group) - group_first_block; /* check whether block bitmap block number is set */ bitmap_blk = le32_to_cpu(desc->bg_block_bitmap); offset = bitmap_blk - group_first_block; - if (!ext2_test_bit(offset, bh->b_data)) + if (offset < 0 || offset > max_bit || + !ext2_test_bit(offset, bh->b_data)) /* bad block bitmap */ goto err_out; /* check whether the inode bitmap block number is set */ bitmap_blk = le32_to_cpu(desc->bg_inode_bitmap); offset = bitmap_blk - group_first_block; - if (!ext2_test_bit(offset, bh->b_data)) + if (offset < 0 || offset > max_bit || + !ext2_test_bit(offset, bh->b_data)) /* bad block bitmap */ goto err_out; /* check whether the inode table block number is set */ bitmap_blk = le32_to_cpu(desc->bg_inode_table); offset = bitmap_blk - group_first_block; + if (offset < 0 || offset > max_bit || + offset + EXT2_SB(sb)->s_itb_per_group - 1 > max_bit) + goto err_out; next_zero_bit = ext2_find_next_zero_bit(bh->b_data, offset + EXT2_SB(sb)->s_itb_per_group, offset); @@ -126,6 +131,7 @@ read_block_bitmap(struct super_block *sb, unsigned int block_group) struct ext2_group_desc * desc; struct buffer_head * bh = NULL; ext2_fsblk_t bitmap_blk; + int ret; desc = ext2_get_group_desc(sb, block_group, NULL); if (!desc) @@ -139,10 +145,10 @@ read_block_bitmap(struct super_block *sb, unsigned int block_group) block_group, le32_to_cpu(desc->bg_block_bitmap)); return NULL; } - if (likely(bh_uptodate_or_lock(bh))) + ret = bh_read(bh, 0); + if (ret > 0) return bh; - - if (bh_submit_read(bh) < 0) { + if (ret < 0) { brelse(bh); ext2_error(sb, __func__, "Cannot read block bitmap - " @@ -187,7 +193,7 @@ static void group_adjust_blocks(struct super_block *sb, int group_no, /** * __rsv_window_dump() -- Dump the filesystem block allocation reservation map - * @rb_root: root of per-filesystem reservation rb tree + * @root: root of per-filesystem reservation rb tree * @verbose: verbose mode * @fn: function which wishes to dump the reservation map * @@ -267,7 +273,7 @@ goal_in_my_reservation(struct ext2_reserve_window *rsv, ext2_grpblk_t grp_goal, ext2_fsblk_t group_first_block, group_last_block; group_first_block = ext2_group_first_block_no(sb, group); - group_last_block = group_first_block + EXT2_BLOCKS_PER_GROUP(sb) - 1; + group_last_block = ext2_group_last_block_no(sb, group); if ((rsv->_rsv_start > group_last_block) || (rsv->_rsv_end < group_first_block)) @@ -280,7 +286,7 @@ goal_in_my_reservation(struct ext2_reserve_window *rsv, ext2_grpblk_t grp_goal, /** * search_reserve_window() - * @rb_root: root of reservation tree + * @root: root of reservation tree * @goal: target allocation block * * Find the reserved window which includes the goal, or the previous one @@ -413,7 +419,7 @@ void ext2_init_block_alloc_info(struct inode *inode) struct ext2_block_alloc_info *block_i; struct super_block *sb = inode->i_sb; - block_i = kmalloc(sizeof(*block_i), GFP_NOFS); + block_i = kmalloc(sizeof(*block_i), GFP_KERNEL); if (block_i) { struct ext2_reserve_window_node *rsv = &block_i->rsv_window_node; @@ -473,8 +479,8 @@ void ext2_discard_reservation(struct inode *inode) * @block: start physical block to free * @count: number of blocks to free */ -void ext2_free_blocks (struct inode * inode, unsigned long block, - unsigned long count) +void ext2_free_blocks(struct inode * inode, ext2_fsblk_t block, + unsigned long count) { struct buffer_head *bitmap_bh = NULL; struct buffer_head * bh2; @@ -488,9 +494,7 @@ void ext2_free_blocks (struct inode * inode, unsigned long block, struct ext2_super_block * es = sbi->s_es; unsigned freed = 0, group_freed; - if (block < le32_to_cpu(es->s_first_data_block) || - block + count < block || - block + count > le32_to_cpu(es->s_blocks_count)) { + if (!ext2_data_block_valid(sbi, block, count)) { ext2_error (sb, "ext2_free_blocks", "Freeing blocks not in datazone - " "block = %lu, count = %lu", block, count); @@ -546,7 +550,7 @@ do_more: } mark_buffer_dirty(bitmap_bh); - if (sb->s_flags & MS_SYNCHRONOUS) + if (sb->s_flags & SB_SYNCHRONOUS) sync_dirty_buffer(bitmap_bh); group_adjust_blocks(sb, block_group, desc, bh2, group_freed); @@ -666,37 +670,24 @@ ext2_try_to_allocate(struct super_block *sb, int group, unsigned long *count, struct ext2_reserve_window *my_rsv) { - ext2_fsblk_t group_first_block; - ext2_grpblk_t start, end; + ext2_fsblk_t group_first_block = ext2_group_first_block_no(sb, group); + ext2_fsblk_t group_last_block = ext2_group_last_block_no(sb, group); + ext2_grpblk_t start, end; unsigned long num = 0; + start = 0; + end = group_last_block - group_first_block + 1; /* we do allocation within the reservation window if we have a window */ if (my_rsv) { - group_first_block = ext2_group_first_block_no(sb, group); if (my_rsv->_rsv_start >= group_first_block) start = my_rsv->_rsv_start - group_first_block; - else - /* reservation window cross group boundary */ - start = 0; - end = my_rsv->_rsv_end - group_first_block + 1; - if (end > EXT2_BLOCKS_PER_GROUP(sb)) - /* reservation window crosses group boundary */ - end = EXT2_BLOCKS_PER_GROUP(sb); - if ((start <= grp_goal) && (grp_goal < end)) - start = grp_goal; - else + if (my_rsv->_rsv_end < group_last_block) + end = my_rsv->_rsv_end - group_first_block + 1; + if (grp_goal < start || grp_goal >= end) grp_goal = -1; - } else { - if (grp_goal > 0) - start = grp_goal; - else - start = 0; - end = EXT2_BLOCKS_PER_GROUP(sb); } - BUG_ON(start > EXT2_BLOCKS_PER_GROUP(sb)); -repeat: if (grp_goal < 0) { grp_goal = find_next_usable_block(start, bitmap_bh, end); if (grp_goal < 0) @@ -711,67 +702,55 @@ repeat: ; } } - start = grp_goal; - if (ext2_set_bit_atomic(sb_bgl_lock(EXT2_SB(sb), group), grp_goal, - bitmap_bh->b_data)) { - /* - * The block was allocated by another thread, or it was - * allocated and then freed by another thread - */ - start++; - grp_goal++; - if (start >= end) - goto fail_access; - goto repeat; - } - num++; - grp_goal++; - while (num < *count && grp_goal < end - && !ext2_set_bit_atomic(sb_bgl_lock(EXT2_SB(sb), group), + for (; num < *count && grp_goal < end; grp_goal++) { + if (ext2_set_bit_atomic(sb_bgl_lock(EXT2_SB(sb), group), grp_goal, bitmap_bh->b_data)) { + if (num == 0) + continue; + break; + } num++; - grp_goal++; } + + if (num == 0) + goto fail_access; + *count = num; return grp_goal - num; fail_access: - *count = num; return -1; } /** - * find_next_reservable_window(): - * find a reservable space within the given range. - * It does not allocate the reservation window for now: - * alloc_new_reservation() will do the work later. - * - * @search_head: the head of the searching list; - * This is not necessarily the list head of the whole filesystem + * find_next_reservable_window - Find a reservable space within the given range. + * @search_head: The list to search. + * @my_rsv: The reservation we're currently using. + * @sb: The super block. + * @start_block: The first block we consider to start the real search from + * @last_block: The maximum block number that our goal reservable space + * could start from. * - * We have both head and start_block to assist the search - * for the reservable space. The list starts from head, - * but we will shift to the place where start_block is, - * then start from there, when looking for a reservable space. + * It does not allocate the reservation window: alloc_new_reservation() + * will do the work later. * - * @size: the target new reservation window size + * We search the given range, rather than the whole reservation double + * linked list, (start_block, last_block) to find a free region that is + * of my size and has not been reserved. * - * @group_first_block: the first block we consider to start - * the real search from + * @search_head is not necessarily the list head of the whole filesystem. + * We have both head and @start_block to assist the search for the + * reservable space. The list starts from head, but we will shift to + * the place where start_block is, then start from there, when looking + * for a reservable space. * - * @last_block: - * the maximum block number that our goal reservable space - * could start from. This is normally the last block in this - * group. The search will end when we found the start of next - * possible reservable space is out of this boundary. - * This could handle the cross boundary reservation window - * request. - * - * basically we search from the given range, rather than the whole - * reservation double linked list, (start_block, last_block) - * to find a free region that is of my size and has not - * been reserved. + * @last_block is normally the last block in this group. The search will end + * when we found the start of next possible reservable space is out + * of this boundary. This could handle the cross boundary reservation + * window request. * + * Return: -1 if we could not find a range of sufficient size. If we could, + * return 0 and fill in @my_rsv with the range information. */ static int find_next_reservable_window( struct ext2_reserve_window_node *search_head, @@ -859,41 +838,34 @@ static int find_next_reservable_window( } /** - * alloc_new_reservation()--allocate a new reservation window - * - * To make a new reservation, we search part of the filesystem - * reservation list (the list that inside the group). We try to - * allocate a new reservation window near the allocation goal, - * or the beginning of the group, if there is no goal. - * - * We first find a reservable space after the goal, then from - * there, we check the bitmap for the first free block after - * it. If there is no free block until the end of group, then the - * whole group is full, we failed. Otherwise, check if the free - * block is inside the expected reservable space, if so, we - * succeed. - * If the first free block is outside the reservable space, then - * start from the first free block, we search for next available - * space, and go on. + * alloc_new_reservation - Allocate a new reservation window. + * @my_rsv: The reservation we're currently using. + * @grp_goal: The goal block relative to the start of the group. + * @sb: The super block. + * @group: The group we are trying to allocate in. + * @bitmap_bh: The block group block bitmap. * - * on succeed, a new reservation will be found and inserted into the list - * It contains at least one free block, and it does not overlap with other - * reservation windows. + * To make a new reservation, we search part of the filesystem reservation + * list (the list inside the group). We try to allocate a new + * reservation window near @grp_goal, or the beginning of the + * group, if @grp_goal is negative. * - * failed: we failed to find a reservation window in this group + * We first find a reservable space after the goal, then from there, + * we check the bitmap for the first free block after it. If there is + * no free block until the end of group, then the whole group is full, + * we failed. Otherwise, check if the free block is inside the expected + * reservable space, if so, we succeed. * - * @rsv: the reservation + * If the first free block is outside the reservable space, then start + * from the first free block, we search for next available space, and + * go on. * - * @grp_goal: The goal (group-relative). It is where the search for a - * free reservable space should start from. - * if we have a goal(goal >0 ), then start from there, - * no goal(goal = -1), we start from the first block - * of the group. - * - * @sb: the super block - * @group: the group we are trying to allocate in - * @bitmap_bh: the block group block bitmap + * on succeed, a new reservation will be found and inserted into the + * list. It contains at least one free block, and it does not overlap + * with other reservation windows. * + * Return: 0 on success, -1 if we failed to find a reservation window + * in this group */ static int alloc_new_reservation(struct ext2_reserve_window_node *my_rsv, ext2_grpblk_t grp_goal, struct super_block *sb, @@ -908,7 +880,7 @@ static int alloc_new_reservation(struct ext2_reserve_window_node *my_rsv, spinlock_t *rsv_lock = &EXT2_SB(sb)->s_rsv_window_lock; group_first_block = ext2_group_first_block_no(sb, group); - group_end_block = group_first_block + (EXT2_BLOCKS_PER_GROUP(sb) - 1); + group_end_block = ext2_group_last_block_no(sb, group); if (grp_goal < 0) start_block = group_first_block; @@ -1115,7 +1087,7 @@ ext2_try_to_allocate_with_rsv(struct super_block *sb, unsigned int group, * first block is the block number of the first block in this group */ group_first_block = ext2_group_first_block_no(sb, group); - group_last_block = group_first_block + (EXT2_BLOCKS_PER_GROUP(sb) - 1); + group_last_block = ext2_group_last_block_no(sb, group); /* * Basically we will allocate a new block from inode's reservation @@ -1157,8 +1129,13 @@ ext2_try_to_allocate_with_rsv(struct super_block *sb, unsigned int group, if ((my_rsv->rsv_start > group_last_block) || (my_rsv->rsv_end < group_first_block)) { + ext2_error(sb, __func__, + "Reservation out of group %u range goal %d fsb[%lu,%lu] rsv[%lu, %lu]", + group, grp_goal, group_first_block, + group_last_block, my_rsv->rsv_start, + my_rsv->rsv_end); rsv_window_dump(&EXT2_SB(sb)->s_rsv_window_root, 1); - BUG(); + return -1; } ret = ext2_try_to_allocate(sb, group, bitmap_bh, grp_goal, &num, &my_rsv->rsv_window); @@ -1194,11 +1171,32 @@ static int ext2_has_free_blocks(struct ext2_sb_info *sbi) } /* + * Returns 1 if the passed-in block region is valid; 0 if some part overlaps + * with filesystem metadata blocks. + */ +int ext2_data_block_valid(struct ext2_sb_info *sbi, ext2_fsblk_t start_blk, + unsigned int count) +{ + if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) || + (start_blk + count - 1 < start_blk) || + (start_blk + count - 1 >= le32_to_cpu(sbi->s_es->s_blocks_count))) + return 0; + + /* Ensure we do not step over superblock */ + if ((start_blk <= sbi->s_sb_block) && + (start_blk + count - 1 >= sbi->s_sb_block)) + return 0; + + return 1; +} + +/* * ext2_new_blocks() -- core block(s) allocation function * @inode: file inode * @goal: given target block(filesystem wide) * @count: target number of blocks to allocate * @errp: error code + * @flags: allocate flags * * ext2_new_blocks uses a goal block to assist allocation. If the goal is * free, or there is a free block within 32 blocks of the goal, that block @@ -1208,7 +1206,7 @@ static int ext2_has_free_blocks(struct ext2_sb_info *sbi) * This function also updates quota and i_blocks field. */ ext2_fsblk_t ext2_new_blocks(struct inode *inode, ext2_fsblk_t goal, - unsigned long *count, int *errp) + unsigned long *count, int *errp, unsigned int flags) { struct buffer_head *bitmap_bh = NULL; struct buffer_head *gdp_bh; @@ -1247,15 +1245,15 @@ ext2_fsblk_t ext2_new_blocks(struct inode *inode, ext2_fsblk_t goal, es = EXT2_SB(sb)->s_es; ext2_debug("goal=%lu.\n", goal); /* - * Allocate a block from reservation only when - * filesystem is mounted with reservation(default,-o reservation), and - * it's a regular file, and - * the desired window size is greater than 0 (One could use ioctl - * command EXT2_IOC_SETRSVSZ to set the window size to 0 to turn off - * reservation on that particular file) + * Allocate a block from reservation only when the filesystem is + * mounted with reservation(default,-o reservation), and it's a regular + * file, and the desired window size is greater than 0 (One could use + * ioctl command EXT2_IOC_SETRSVSZ to set the window size to 0 to turn + * off reservation on that particular file). Also do not use the + * reservation window if the caller asked us not to do it. */ block_i = EXT2_I(inode)->i_block_alloc_info; - if (block_i) { + if (!(flags & EXT2_ALLOC_NORESERVE) && block_i) { windowsz = block_i->rsv_window_node.rsv_goal_size; if (windowsz > 0) my_rsv = &block_i->rsv_window_node; @@ -1293,6 +1291,13 @@ retry_alloc: if (free_blocks > 0) { grp_target_blk = ((goal - le32_to_cpu(es->s_first_data_block)) % EXT2_BLOCKS_PER_GROUP(sb)); + /* + * In case we retry allocation (due to fs reservation not + * working out or fs corruption), the bitmap_bh is non-null + * pointer and we have to release it before calling + * read_block_bitmap(). + */ + brelse(bitmap_bh); bitmap_bh = read_block_bitmap(sb, group_no); if (!bitmap_bh) goto io_error; @@ -1384,6 +1389,7 @@ allocated: * use. So we may want to selectively mark some of the blocks * as free */ + num = *count; goto retry_alloc; } @@ -1401,7 +1407,7 @@ allocated: percpu_counter_sub(&sbi->s_freeblocks_counter, num); mark_buffer_dirty(bitmap_bh); - if (sb->s_flags & MS_SYNCHRONOUS) + if (sb->s_flags & SB_SYNCHRONOUS) sync_dirty_buffer(bitmap_bh); *errp = 0; @@ -1427,13 +1433,6 @@ out: return 0; } -ext2_fsblk_t ext2_new_block(struct inode *inode, unsigned long goal, int *errp) -{ - unsigned long count = 1; - - return ext2_new_blocks(inode, goal, &count, errp); -} - #ifdef EXT2FS_DEBUG unsigned long ext2_count_free(struct buffer_head *map, unsigned int numchars) @@ -1477,11 +1476,11 @@ unsigned long ext2_count_free_blocks (struct super_block * sb) desc_count, bitmap_count); return bitmap_count; #else - for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) { - desc = ext2_get_group_desc (sb, i, NULL); - if (!desc) - continue; - desc_count += le16_to_cpu(desc->bg_free_blocks_count); + for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) { + desc = ext2_get_group_desc(sb, i, NULL); + if (!desc) + continue; + desc_count += le16_to_cpu(desc->bg_free_blocks_count); } return desc_count; #endif diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index 6e1d4ab09d72..b07b3b369710 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * linux/fs/ext2/dir.c * @@ -25,6 +26,7 @@ #include <linux/buffer_head.h> #include <linux/pagemap.h> #include <linux/swap.h> +#include <linux/iversion.h> typedef struct ext2_dir_entry_2 ext2_dirent; @@ -37,7 +39,7 @@ static inline unsigned ext2_rec_len_from_disk(__le16 dlen) { unsigned len = le16_to_cpu(dlen); -#if (PAGE_CACHE_SIZE >= 65536) +#if (PAGE_SIZE >= 65536) if (len == EXT2_MAX_REC_LEN) return 1 << 16; #endif @@ -46,7 +48,7 @@ static inline unsigned ext2_rec_len_from_disk(__le16 dlen) static inline __le16 ext2_rec_len_to_disk(unsigned len) { -#if (PAGE_CACHE_SIZE >= 65536) +#if (PAGE_SIZE >= 65536) if (len == (1 << 16)) return cpu_to_le16(EXT2_MAX_REC_LEN); else @@ -64,17 +66,6 @@ static inline unsigned ext2_chunk_size(struct inode *inode) return inode->i_sb->s_blocksize; } -static inline void ext2_put_page(struct page *page) -{ - kunmap(page); - page_cache_release(page); -} - -static inline unsigned long dir_pages(struct inode *inode) -{ - return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT; -} - /* * Return the offset into page `page_nr' of the last valid * byte in that page, plus one. @@ -84,51 +75,40 @@ ext2_last_byte(struct inode *inode, unsigned long page_nr) { unsigned last_byte = inode->i_size; - last_byte -= page_nr << PAGE_CACHE_SHIFT; - if (last_byte > PAGE_CACHE_SIZE) - last_byte = PAGE_CACHE_SIZE; + last_byte -= page_nr << PAGE_SHIFT; + if (last_byte > PAGE_SIZE) + last_byte = PAGE_SIZE; return last_byte; } -static int ext2_commit_chunk(struct page *page, loff_t pos, unsigned len) +static void ext2_commit_chunk(struct folio *folio, loff_t pos, unsigned len) { - struct address_space *mapping = page->mapping; + struct address_space *mapping = folio->mapping; struct inode *dir = mapping->host; - int err = 0; - dir->i_version++; - block_write_end(NULL, mapping, pos, len, len, page, NULL); + inode_inc_iversion(dir); + block_write_end(pos, len, len, folio); if (pos+len > dir->i_size) { i_size_write(dir, pos+len); mark_inode_dirty(dir); } - - if (IS_DIRSYNC(dir)) { - err = write_one_page(page, 1); - if (!err) - err = sync_inode_metadata(dir, 1); - } else { - unlock_page(page); - } - - return err; + folio_unlock(folio); } -static void ext2_check_page(struct page *page, int quiet) +static bool ext2_check_folio(struct folio *folio, int quiet, char *kaddr) { - struct inode *dir = page->mapping->host; + struct inode *dir = folio->mapping->host; struct super_block *sb = dir->i_sb; unsigned chunk_size = ext2_chunk_size(dir); - char *kaddr = page_address(page); u32 max_inumber = le32_to_cpu(EXT2_SB(sb)->s_es->s_inodes_count); unsigned offs, rec_len; - unsigned limit = PAGE_CACHE_SIZE; + unsigned limit = folio_size(folio); ext2_dirent *p; char *error; - if ((dir->i_size >> PAGE_CACHE_SHIFT) == page->index) { - limit = dir->i_size & ~PAGE_CACHE_MASK; + if (dir->i_size < folio_pos(folio) + limit) { + limit = offset_in_folio(folio, dir->i_size); if (limit & (chunk_size - 1)) goto Ebadsize; if (!limit) @@ -152,8 +132,8 @@ static void ext2_check_page(struct page *page, int quiet) if (offs != limit) goto Eend; out: - SetPageChecked(page); - return; + folio_set_checked(folio); + return true; /* Too bad, we had an error */ @@ -180,41 +160,51 @@ Einumber: bad_entry: if (!quiet) ext2_error(sb, __func__, "bad entry in directory #%lu: : %s - " - "offset=%lu, inode=%lu, rec_len=%d, name_len=%d", - dir->i_ino, error, (page->index<<PAGE_CACHE_SHIFT)+offs, + "offset=%llu, inode=%lu, rec_len=%d, name_len=%d", + dir->i_ino, error, folio_pos(folio) + offs, (unsigned long) le32_to_cpu(p->inode), rec_len, p->name_len); goto fail; Eend: if (!quiet) { p = (ext2_dirent *)(kaddr + offs); - ext2_error(sb, "ext2_check_page", + ext2_error(sb, "ext2_check_folio", "entry in directory #%lu spans the page boundary" - "offset=%lu, inode=%lu", - dir->i_ino, (page->index<<PAGE_CACHE_SHIFT)+offs, + "offset=%llu, inode=%lu", + dir->i_ino, folio_pos(folio) + offs, (unsigned long) le32_to_cpu(p->inode)); } fail: - SetPageChecked(page); - SetPageError(page); + return false; } -static struct page * ext2_get_page(struct inode *dir, unsigned long n, - int quiet) +/* + * Calls to ext2_get_folio()/folio_release_kmap() must be nested according + * to the rules documented in kmap_local_folio()/kunmap_local(). + * + * NOTE: ext2_find_entry() and ext2_dotdot() act as a call + * to folio_release_kmap() and should be treated as a call to + * folio_release_kmap() for nesting purposes. + */ +static void *ext2_get_folio(struct inode *dir, unsigned long n, + int quiet, struct folio **foliop) { struct address_space *mapping = dir->i_mapping; - struct page *page = read_mapping_page(mapping, n, NULL); - if (!IS_ERR(page)) { - kmap(page); - if (!PageChecked(page)) - ext2_check_page(page, quiet); - if (PageError(page)) + struct folio *folio = read_mapping_folio(mapping, n, NULL); + void *kaddr; + + if (IS_ERR(folio)) + return ERR_CAST(folio); + kaddr = kmap_local_folio(folio, 0); + if (unlikely(!folio_test_checked(folio))) { + if (!ext2_check_folio(folio, quiet, kaddr)) goto fail; } - return page; + *foliop = folio; + return kaddr; fail: - ext2_put_page(page); + folio_release_kmap(folio, kaddr); return ERR_PTR(-EIO); } @@ -252,36 +242,13 @@ ext2_validate_entry(char *base, unsigned offset, unsigned mask) break; p = ext2_next_entry(p); } - return (char *)p - base; + return offset_in_page(p); } -static unsigned char ext2_filetype_table[EXT2_FT_MAX] = { - [EXT2_FT_UNKNOWN] = DT_UNKNOWN, - [EXT2_FT_REG_FILE] = DT_REG, - [EXT2_FT_DIR] = DT_DIR, - [EXT2_FT_CHRDEV] = DT_CHR, - [EXT2_FT_BLKDEV] = DT_BLK, - [EXT2_FT_FIFO] = DT_FIFO, - [EXT2_FT_SOCK] = DT_SOCK, - [EXT2_FT_SYMLINK] = DT_LNK, -}; - -#define S_SHIFT 12 -static unsigned char ext2_type_by_mode[S_IFMT >> S_SHIFT] = { - [S_IFREG >> S_SHIFT] = EXT2_FT_REG_FILE, - [S_IFDIR >> S_SHIFT] = EXT2_FT_DIR, - [S_IFCHR >> S_SHIFT] = EXT2_FT_CHRDEV, - [S_IFBLK >> S_SHIFT] = EXT2_FT_BLKDEV, - [S_IFIFO >> S_SHIFT] = EXT2_FT_FIFO, - [S_IFSOCK >> S_SHIFT] = EXT2_FT_SOCK, - [S_IFLNK >> S_SHIFT] = EXT2_FT_SYMLINK, -}; - static inline void ext2_set_de_type(ext2_dirent *de, struct inode *inode) { - umode_t mode = inode->i_mode; if (EXT2_HAS_INCOMPAT_FEATURE(inode->i_sb, EXT2_FEATURE_INCOMPAT_FILETYPE)) - de->file_type = ext2_type_by_mode[(mode & S_IFMT)>>S_SHIFT]; + de->file_type = fs_umode_to_ftype(inode->i_mode); else de->file_type = 0; } @@ -292,39 +259,39 @@ ext2_readdir(struct file *file, struct dir_context *ctx) loff_t pos = ctx->pos; struct inode *inode = file_inode(file); struct super_block *sb = inode->i_sb; - unsigned int offset = pos & ~PAGE_CACHE_MASK; - unsigned long n = pos >> PAGE_CACHE_SHIFT; + unsigned int offset = pos & ~PAGE_MASK; + unsigned long n = pos >> PAGE_SHIFT; unsigned long npages = dir_pages(inode); unsigned chunk_mask = ~(ext2_chunk_size(inode)-1); - unsigned char *types = NULL; - int need_revalidate = file->f_version != inode->i_version; + bool need_revalidate = !inode_eq_iversion(inode, *(u64 *)file->private_data); + bool has_filetype; if (pos > inode->i_size - EXT2_DIR_REC_LEN(1)) return 0; - if (EXT2_HAS_INCOMPAT_FEATURE(sb, EXT2_FEATURE_INCOMPAT_FILETYPE)) - types = ext2_filetype_table; + has_filetype = + EXT2_HAS_INCOMPAT_FEATURE(sb, EXT2_FEATURE_INCOMPAT_FILETYPE); for ( ; n < npages; n++, offset = 0) { - char *kaddr, *limit; ext2_dirent *de; - struct page *page = ext2_get_page(inode, n, 0); + struct folio *folio; + char *kaddr = ext2_get_folio(inode, n, 0, &folio); + char *limit; - if (IS_ERR(page)) { + if (IS_ERR(kaddr)) { ext2_error(sb, __func__, "bad page in #%lu", inode->i_ino); - ctx->pos += PAGE_CACHE_SIZE - offset; - return PTR_ERR(page); + ctx->pos += PAGE_SIZE - offset; + return PTR_ERR(kaddr); } - kaddr = page_address(page); if (unlikely(need_revalidate)) { if (offset) { offset = ext2_validate_entry(kaddr, offset, chunk_mask); - ctx->pos = (n<<PAGE_CACHE_SHIFT) + offset; + ctx->pos = (n<<PAGE_SHIFT) + offset; } - file->f_version = inode->i_version; - need_revalidate = 0; + *(u64 *)file->private_data = inode_query_iversion(inode); + need_revalidate = false; } de = (ext2_dirent *)(kaddr+offset); limit = kaddr + ext2_last_byte(inode, n) - EXT2_DIR_REC_LEN(1); @@ -332,25 +299,25 @@ ext2_readdir(struct file *file, struct dir_context *ctx) if (de->rec_len == 0) { ext2_error(sb, __func__, "zero-length directory entry"); - ext2_put_page(page); + folio_release_kmap(folio, de); return -EIO; } if (de->inode) { unsigned char d_type = DT_UNKNOWN; - if (types && de->file_type < EXT2_FT_MAX) - d_type = types[de->file_type]; + if (has_filetype) + d_type = fs_ftype_to_dtype(de->file_type); if (!dir_emit(ctx, de->name, de->name_len, le32_to_cpu(de->inode), d_type)) { - ext2_put_page(page); + folio_release_kmap(folio, de); return 0; } } ctx->pos += ext2_rec_len_from_disk(de->rec_len); } - ext2_put_page(page); + folio_release_kmap(folio, kaddr); } return 0; } @@ -362,56 +329,58 @@ ext2_readdir(struct file *file, struct dir_context *ctx) * returns the page in which the entry was found (as a parameter - res_page), * and the entry itself. Page is returned mapped and unlocked. * Entry is guaranteed to be valid. + * + * On Success folio_release_kmap() should be called on *foliop. + * + * NOTE: Calls to ext2_get_folio()/folio_release_kmap() must be nested + * according to the rules documented in kmap_local_folio()/kunmap_local(). + * + * ext2_find_entry() and ext2_dotdot() act as a call to ext2_get_folio() + * and should be treated as a call to ext2_get_folio() for nesting + * purposes. */ -struct ext2_dir_entry_2 *ext2_find_entry (struct inode * dir, - struct qstr *child, struct page ** res_page) +struct ext2_dir_entry_2 *ext2_find_entry (struct inode *dir, + const struct qstr *child, struct folio **foliop) { const char *name = child->name; int namelen = child->len; unsigned reclen = EXT2_DIR_REC_LEN(namelen); unsigned long start, n; unsigned long npages = dir_pages(dir); - struct page *page = NULL; struct ext2_inode_info *ei = EXT2_I(dir); ext2_dirent * de; - int dir_has_error = 0; if (npages == 0) goto out; - /* OFFSET_CACHE */ - *res_page = NULL; - start = ei->i_dir_start_lookup; if (start >= npages) start = 0; n = start; do { - char *kaddr; - page = ext2_get_page(dir, n, dir_has_error); - if (!IS_ERR(page)) { - kaddr = page_address(page); - de = (ext2_dirent *) kaddr; - kaddr += ext2_last_byte(dir, n) - reclen; - while ((char *) de <= kaddr) { - if (de->rec_len == 0) { - ext2_error(dir->i_sb, __func__, - "zero-length directory entry"); - ext2_put_page(page); - goto out; - } - if (ext2_match (namelen, name, de)) - goto found; - de = ext2_next_entry(de); + char *kaddr = ext2_get_folio(dir, n, 0, foliop); + if (IS_ERR(kaddr)) + return ERR_CAST(kaddr); + + de = (ext2_dirent *) kaddr; + kaddr += ext2_last_byte(dir, n) - reclen; + while ((char *) de <= kaddr) { + if (de->rec_len == 0) { + ext2_error(dir->i_sb, __func__, + "zero-length directory entry"); + folio_release_kmap(*foliop, de); + goto out; } - ext2_put_page(page); - } else - dir_has_error = 1; + if (ext2_match(namelen, name, de)) + goto found; + de = ext2_next_entry(de); + } + folio_release_kmap(*foliop, kaddr); if (++n >= npages) n = 0; - /* next page is past the blocks we've got */ - if (unlikely(n > (dir->i_blocks >> (PAGE_CACHE_SHIFT - 9)))) { + /* next folio is past the blocks we've got */ + if (unlikely(n > (dir->i_blocks >> (PAGE_SHIFT - 9)))) { ext2_error(dir->i_sb, __func__, "dir %lu size %lld exceeds block count %llu", dir->i_ino, dir->i_size, @@ -420,65 +389,85 @@ struct ext2_dir_entry_2 *ext2_find_entry (struct inode * dir, } } while (n != start); out: - return NULL; + return ERR_PTR(-ENOENT); found: - *res_page = page; ei->i_dir_start_lookup = n; return de; } -struct ext2_dir_entry_2 * ext2_dotdot (struct inode *dir, struct page **p) +/* + * Return the '..' directory entry and the page in which the entry was found + * (as a parameter - p). + * + * On Success folio_release_kmap() should be called on *foliop. + * + * NOTE: Calls to ext2_get_folio()/folio_release_kmap() must be nested + * according to the rules documented in kmap_local_folio()/kunmap_local(). + * + * ext2_find_entry() and ext2_dotdot() act as a call to ext2_get_folio() + * and should be treated as a call to ext2_get_folio() for nesting + * purposes. + */ +struct ext2_dir_entry_2 *ext2_dotdot(struct inode *dir, struct folio **foliop) { - struct page *page = ext2_get_page(dir, 0, 0); - ext2_dirent *de = NULL; + ext2_dirent *de = ext2_get_folio(dir, 0, 0, foliop); - if (!IS_ERR(page)) { - de = ext2_next_entry((ext2_dirent *) page_address(page)); - *p = page; - } - return de; + if (!IS_ERR(de)) + return ext2_next_entry(de); + return NULL; } -ino_t ext2_inode_by_name(struct inode *dir, struct qstr *child) +int ext2_inode_by_name(struct inode *dir, const struct qstr *child, ino_t *ino) { - ino_t res = 0; struct ext2_dir_entry_2 *de; - struct page *page; - - de = ext2_find_entry (dir, child, &page); - if (de) { - res = le32_to_cpu(de->inode); - ext2_put_page(page); - } - return res; + struct folio *folio; + + de = ext2_find_entry(dir, child, &folio); + if (IS_ERR(de)) + return PTR_ERR(de); + + *ino = le32_to_cpu(de->inode); + folio_release_kmap(folio, de); + return 0; } -static int ext2_prepare_chunk(struct page *page, loff_t pos, unsigned len) +static int ext2_prepare_chunk(struct folio *folio, loff_t pos, unsigned len) { - return __block_write_begin(page, pos, len, ext2_get_block); + return __block_write_begin(folio, pos, len, ext2_get_block); +} + +static int ext2_handle_dirsync(struct inode *dir) +{ + int err; + + err = filemap_write_and_wait(dir->i_mapping); + if (!err) + err = sync_inode_metadata(dir, 1); + return err; } -/* Releases the page */ -void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de, - struct page *page, struct inode *inode, int update_times) +int ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de, + struct folio *folio, struct inode *inode, bool update_times) { - loff_t pos = page_offset(page) + - (char *) de - (char *) page_address(page); + loff_t pos = folio_pos(folio) + offset_in_folio(folio, de); unsigned len = ext2_rec_len_from_disk(de->rec_len); int err; - lock_page(page); - err = ext2_prepare_chunk(page, pos, len); - BUG_ON(err); + folio_lock(folio); + err = ext2_prepare_chunk(folio, pos, len); + if (err) { + folio_unlock(folio); + return err; + } de->inode = cpu_to_le32(inode->i_ino); ext2_set_de_type(de, inode); - err = ext2_commit_chunk(page, pos, len); - ext2_put_page(page); + ext2_commit_chunk(folio, pos, len); if (update_times) - dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; + inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); EXT2_I(dir)->i_flags &= ~EXT2_BTREE_FL; mark_inode_dirty(dir); + return ext2_handle_dirsync(dir); } /* @@ -486,37 +475,34 @@ void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de, */ int ext2_add_link (struct dentry *dentry, struct inode *inode) { - struct inode *dir = dentry->d_parent->d_inode; + struct inode *dir = d_inode(dentry->d_parent); const char *name = dentry->d_name.name; int namelen = dentry->d_name.len; unsigned chunk_size = ext2_chunk_size(dir); unsigned reclen = EXT2_DIR_REC_LEN(namelen); unsigned short rec_len, name_len; - struct page *page = NULL; + struct folio *folio = NULL; ext2_dirent * de; unsigned long npages = dir_pages(dir); unsigned long n; - char *kaddr; loff_t pos; int err; /* * We take care of directory expansion in the same loop. - * This code plays outside i_size, so it locks the page + * This code plays outside i_size, so it locks the folio * to protect that region. */ for (n = 0; n <= npages; n++) { + char *kaddr = ext2_get_folio(dir, n, 0, &folio); char *dir_end; - page = ext2_get_page(dir, n, 0); - err = PTR_ERR(page); - if (IS_ERR(page)) - goto out; - lock_page(page); - kaddr = page_address(page); + if (IS_ERR(kaddr)) + return PTR_ERR(kaddr); + folio_lock(folio); dir_end = kaddr + ext2_last_byte(dir, n); de = (ext2_dirent *)kaddr; - kaddr += PAGE_CACHE_SIZE - reclen; + kaddr += folio_size(folio) - reclen; while ((char *)de <= kaddr) { if ((char *)de == dir_end) { /* We hit i_size */ @@ -543,16 +529,15 @@ int ext2_add_link (struct dentry *dentry, struct inode *inode) goto got_it; de = (ext2_dirent *) ((char *) de + rec_len); } - unlock_page(page); - ext2_put_page(page); + folio_unlock(folio); + folio_release_kmap(folio, kaddr); } BUG(); return -EINVAL; got_it: - pos = page_offset(page) + - (char*)de - (char*)page_address(page); - err = ext2_prepare_chunk(page, pos, rec_len); + pos = folio_pos(folio) + offset_in_folio(folio, de); + err = ext2_prepare_chunk(folio, pos, rec_len); if (err) goto out_unlock; if (de->inode) { @@ -565,62 +550,65 @@ got_it: memcpy(de->name, name, namelen); de->inode = cpu_to_le32(inode->i_ino); ext2_set_de_type (de, inode); - err = ext2_commit_chunk(page, pos, rec_len); - dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; + ext2_commit_chunk(folio, pos, rec_len); + inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); EXT2_I(dir)->i_flags &= ~EXT2_BTREE_FL; mark_inode_dirty(dir); + err = ext2_handle_dirsync(dir); /* OFFSET_CACHE */ out_put: - ext2_put_page(page); -out: + folio_release_kmap(folio, de); return err; out_unlock: - unlock_page(page); + folio_unlock(folio); goto out_put; } /* * ext2_delete_entry deletes a directory entry by merging it with the - * previous entry. Page is up-to-date. Releases the page. + * previous entry. Page is up-to-date. */ -int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page ) +int ext2_delete_entry(struct ext2_dir_entry_2 *dir, struct folio *folio) { - struct inode *inode = page->mapping->host; - char *kaddr = page_address(page); - unsigned from = ((char*)dir - kaddr) & ~(ext2_chunk_size(inode)-1); - unsigned to = ((char *)dir - kaddr) + - ext2_rec_len_from_disk(dir->rec_len); + struct inode *inode = folio->mapping->host; + size_t from, to; + char *kaddr; loff_t pos; - ext2_dirent * pde = NULL; - ext2_dirent * de = (ext2_dirent *) (kaddr + from); + ext2_dirent *de, *pde = NULL; int err; + from = offset_in_folio(folio, dir); + to = from + ext2_rec_len_from_disk(dir->rec_len); + kaddr = (char *)dir - from; + from &= ~(ext2_chunk_size(inode)-1); + de = (ext2_dirent *)(kaddr + from); + while ((char*)de < (char*)dir) { if (de->rec_len == 0) { ext2_error(inode->i_sb, __func__, "zero-length directory entry"); - err = -EIO; - goto out; + return -EIO; } pde = de; de = ext2_next_entry(de); } if (pde) - from = (char*)pde - (char*)page_address(page); - pos = page_offset(page) + from; - lock_page(page); - err = ext2_prepare_chunk(page, pos, to - from); - BUG_ON(err); + from = offset_in_folio(folio, pde); + pos = folio_pos(folio) + from; + folio_lock(folio); + err = ext2_prepare_chunk(folio, pos, to - from); + if (err) { + folio_unlock(folio); + return err; + } if (pde) pde->rec_len = ext2_rec_len_to_disk(to - from); dir->inode = 0; - err = ext2_commit_chunk(page, pos, to - from); - inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC; + ext2_commit_chunk(folio, pos, to - from); + inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); EXT2_I(inode)->i_flags &= ~EXT2_BTREE_FL; mark_inode_dirty(inode); -out: - ext2_put_page(page); - return err; + return ext2_handle_dirsync(inode); } /* @@ -628,21 +616,21 @@ out: */ int ext2_make_empty(struct inode *inode, struct inode *parent) { - struct page *page = grab_cache_page(inode->i_mapping, 0); + struct folio *folio = filemap_grab_folio(inode->i_mapping, 0); unsigned chunk_size = ext2_chunk_size(inode); struct ext2_dir_entry_2 * de; int err; void *kaddr; - if (!page) - return -ENOMEM; + if (IS_ERR(folio)) + return PTR_ERR(folio); - err = ext2_prepare_chunk(page, 0, chunk_size); + err = ext2_prepare_chunk(folio, 0, chunk_size); if (err) { - unlock_page(page); + folio_unlock(folio); goto fail; } - kaddr = kmap_atomic(page); + kaddr = kmap_local_folio(folio, 0); memset(kaddr, 0, chunk_size); de = (struct ext2_dir_entry_2 *)kaddr; de->name_len = 1; @@ -657,33 +645,30 @@ int ext2_make_empty(struct inode *inode, struct inode *parent) de->inode = cpu_to_le32(parent->i_ino); memcpy (de->name, "..\0", 4); ext2_set_de_type (de, inode); - kunmap_atomic(kaddr); - err = ext2_commit_chunk(page, 0, chunk_size); + kunmap_local(kaddr); + ext2_commit_chunk(folio, 0, chunk_size); + err = ext2_handle_dirsync(inode); fail: - page_cache_release(page); + folio_put(folio); return err; } /* * routine to check that the specified directory is empty (for rmdir) */ -int ext2_empty_dir (struct inode * inode) +int ext2_empty_dir(struct inode *inode) { - struct page *page = NULL; + struct folio *folio; + char *kaddr; unsigned long i, npages = dir_pages(inode); - int dir_has_error = 0; for (i = 0; i < npages; i++) { - char *kaddr; - ext2_dirent * de; - page = ext2_get_page(inode, i, dir_has_error); + ext2_dirent *de; - if (IS_ERR(page)) { - dir_has_error = 1; - continue; - } + kaddr = ext2_get_folio(inode, i, 0, &folio); + if (IS_ERR(kaddr)) + return 0; - kaddr = page_address(page); de = (ext2_dirent *)kaddr; kaddr += ext2_last_byte(inode, i) - EXT2_DIR_REC_LEN(1); @@ -709,19 +694,41 @@ int ext2_empty_dir (struct inode * inode) } de = ext2_next_entry(de); } - ext2_put_page(page); + folio_release_kmap(folio, kaddr); } return 1; not_empty: - ext2_put_page(page); + folio_release_kmap(folio, kaddr); return 0; } +static int ext2_dir_open(struct inode *inode, struct file *file) +{ + file->private_data = kzalloc(sizeof(u64), GFP_KERNEL); + if (!file->private_data) + return -ENOMEM; + return 0; +} + +static int ext2_dir_release(struct inode *inode, struct file *file) +{ + kfree(file->private_data); + return 0; +} + +static loff_t ext2_dir_llseek(struct file *file, loff_t offset, int whence) +{ + return generic_llseek_cookie(file, offset, whence, + (u64 *)file->private_data); +} + const struct file_operations ext2_dir_operations = { - .llseek = generic_file_llseek, + .open = ext2_dir_open, + .release = ext2_dir_release, + .llseek = ext2_dir_llseek, .read = generic_read_dir, - .iterate = ext2_readdir, + .iterate_shared = ext2_readdir, .unlocked_ioctl = ext2_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = ext2_compat_ioctl, diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index d9a17d0b124d..cf97b76e9fd3 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 1992, 1993, 1994, 1995 * Remy Card (card@masi.ibp.fr) @@ -15,6 +16,8 @@ #include <linux/blockgroup_lock.h> #include <linux/percpu_counter.h> #include <linux/rbtree.h> +#include <linux/mm.h> +#include <linux/highmem.h> /* XXX Here for now... not interested in restructing headers JUST now */ @@ -51,8 +54,8 @@ struct ext2_block_alloc_info { /* * Was i_next_alloc_goal in ext2_inode_info * is the *physical* companion to i_next_alloc_block. - * it the the physical block number of the block which was most-recentl - * allocated to this file. This give us the goal (target) for the next + * it is the physical block number of the block which was most-recently + * allocated to this file. This gives us the goal (target) for the next * allocation when we detect linearly ascending requests. */ ext2_fsblk_t last_alloc_physical_block; @@ -61,14 +64,13 @@ struct ext2_block_alloc_info { #define rsv_start rsv_window._rsv_start #define rsv_end rsv_window._rsv_end +struct mb_cache; + /* * second extended-fs super-block data in memory */ struct ext2_sb_info { - unsigned long s_frag_size; /* Size of a fragment in bytes */ - unsigned long s_frags_per_block;/* Number of fragments per block */ unsigned long s_inodes_per_block;/* Number of inodes per block */ - unsigned long s_frags_per_group;/* Number of fragments in a group */ unsigned long s_blocks_per_group;/* Number of blocks in a group */ unsigned long s_inodes_per_group;/* Number of inodes in a group */ unsigned long s_itb_per_group; /* Number of inode table blocks per group */ @@ -111,6 +113,9 @@ struct ext2_sb_info { * of the mount options. */ spinlock_t s_lock; + struct mb_cache *s_ea_block_cache; + struct dax_device *s_daxdev; + u64 s_dax_part_off; }; static inline spinlock_t * @@ -170,8 +175,9 @@ static inline struct ext2_sb_info *EXT2_SB(struct super_block *sb) * Macro-instructions used to manage several block sizes */ #define EXT2_MIN_BLOCK_SIZE 1024 -#define EXT2_MAX_BLOCK_SIZE 4096 +#define EXT2_MAX_BLOCK_SIZE 65536 #define EXT2_MIN_BLOCK_LOG_SIZE 10 +#define EXT2_MAX_BLOCK_LOG_SIZE 16 #define EXT2_BLOCK_SIZE(s) ((s)->s_blocksize) #define EXT2_ADDR_PER_BLOCK(s) (EXT2_BLOCK_SIZE(s) / sizeof (__u32)) #define EXT2_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) @@ -180,15 +186,6 @@ static inline struct ext2_sb_info *EXT2_SB(struct super_block *sb) #define EXT2_FIRST_INO(s) (EXT2_SB(s)->s_first_ino) /* - * Macro-instructions used to manage fragments - */ -#define EXT2_MIN_FRAG_SIZE 1024 -#define EXT2_MAX_FRAG_SIZE 4096 -#define EXT2_MIN_FRAG_LOG_SIZE 10 -#define EXT2_FRAG_SIZE(s) (EXT2_SB(s)->s_frag_size) -#define EXT2_FRAGS_PER_BLOCK(s) (EXT2_SB(s)->s_frags_per_block) - -/* * Structure of a blocks group descriptor */ struct ext2_group_desc @@ -276,8 +273,6 @@ static inline __u32 ext2_mask_flags(umode_t mode, __u32 flags) /* * ioctl commands */ -#define EXT2_IOC_GETFLAGS FS_IOC_GETFLAGS -#define EXT2_IOC_SETFLAGS FS_IOC_SETFLAGS #define EXT2_IOC_GETVERSION FS_IOC_GETVERSION #define EXT2_IOC_SETVERSION FS_IOC_SETVERSION #define EXT2_IOC_GETRSVSZ _IOR('f', 5, long) @@ -286,8 +281,6 @@ static inline __u32 ext2_mask_flags(umode_t mode, __u32 flags) /* * ioctl commands in 32 bit emulation */ -#define EXT2_IOC32_GETFLAGS FS_IOC32_GETFLAGS -#define EXT2_IOC32_SETFLAGS FS_IOC32_SETFLAGS #define EXT2_IOC32_GETVERSION FS_IOC32_GETVERSION #define EXT2_IOC32_SETVERSION FS_IOC32_SETVERSION @@ -364,26 +357,28 @@ struct ext2_inode { */ #define EXT2_VALID_FS 0x0001 /* Unmounted cleanly */ #define EXT2_ERROR_FS 0x0002 /* Errors detected */ +#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ /* * Mount flags */ -#define EXT2_MOUNT_CHECK 0x000001 /* Do mount-time checks */ #define EXT2_MOUNT_OLDALLOC 0x000002 /* Don't use the new Orlov allocator */ #define EXT2_MOUNT_GRPID 0x000004 /* Create files with directory's group */ #define EXT2_MOUNT_DEBUG 0x000008 /* Some debugging messages */ #define EXT2_MOUNT_ERRORS_CONT 0x000010 /* Continue on errors */ #define EXT2_MOUNT_ERRORS_RO 0x000020 /* Remount fs ro on errors */ #define EXT2_MOUNT_ERRORS_PANIC 0x000040 /* Panic on errors */ +#define EXT2_MOUNT_ERRORS_MASK 0x000070 #define EXT2_MOUNT_MINIX_DF 0x000080 /* Mimics the Minix statfs */ #define EXT2_MOUNT_NOBH 0x000100 /* No buffer_heads */ #define EXT2_MOUNT_NO_UID32 0x000200 /* Disable 32-bit UIDs */ #define EXT2_MOUNT_XATTR_USER 0x004000 /* Extended user attributes */ #define EXT2_MOUNT_POSIX_ACL 0x008000 /* POSIX Access Control Lists */ -#define EXT2_MOUNT_XIP 0x010000 /* Execute in place */ +#define EXT2_MOUNT_XIP 0x010000 /* Obsolete, use DAX */ #define EXT2_MOUNT_USRQUOTA 0x020000 /* user quota */ #define EXT2_MOUNT_GRPQUOTA 0x040000 /* group quota */ #define EXT2_MOUNT_RESERVATION 0x080000 /* Preallocation */ +#define EXT2_MOUNT_DAX 0x100000 /* Direct Access */ #define clear_opt(o, opt) o &= ~EXT2_MOUNT_##opt @@ -405,6 +400,12 @@ struct ext2_inode { #define EXT2_ERRORS_DEFAULT EXT2_ERRORS_CONTINUE /* + * Allocation flags + */ +#define EXT2_ALLOC_NORESERVE 0x1 /* Do not use reservation + * window for allocation */ + +/* * Structure of the super block */ struct ext2_super_block { @@ -597,22 +598,6 @@ struct ext2_dir_entry_2 { }; /* - * Ext2 directory file types. Only the low 3 bits are used. The - * other bits are reserved for now. - */ -enum { - EXT2_FT_UNKNOWN = 0, - EXT2_FT_REG_FILE = 1, - EXT2_FT_DIR = 2, - EXT2_FT_CHRDEV = 3, - EXT2_FT_BLKDEV = 4, - EXT2_FT_FIFO = 5, - EXT2_FT_SOCK = 6, - EXT2_FT_SYMLINK = 7, - EXT2_FT_MAX -}; - -/* * EXT2_DIR_PAD defines the directory entries boundaries * * NOTE: It must be a multiple of 4 @@ -689,6 +674,9 @@ struct ext2_inode_info { struct mutex truncate_mutex; struct inode vfs_inode; struct list_head i_orphan; /* unlinked but open inodes */ +#ifdef CONFIG_QUOTA + struct dquot __rcu *i_dquot[MAXQUOTAS]; +#endif }; /* @@ -714,14 +702,13 @@ static inline struct ext2_inode_info *EXT2_I(struct inode *inode) /* balloc.c */ extern int ext2_bg_has_super(struct super_block *sb, int group); extern unsigned long ext2_bg_num_gdb(struct super_block *sb, int group); -extern ext2_fsblk_t ext2_new_block(struct inode *, unsigned long, int *); -extern ext2_fsblk_t ext2_new_blocks(struct inode *, unsigned long, - unsigned long *, int *); -extern void ext2_free_blocks (struct inode *, unsigned long, - unsigned long); +extern ext2_fsblk_t ext2_new_blocks(struct inode *, ext2_fsblk_t, + unsigned long *, int *, unsigned int); +extern int ext2_data_block_valid(struct ext2_sb_info *sbi, ext2_fsblk_t start_blk, + unsigned int count); +extern void ext2_free_blocks(struct inode *, ext2_fsblk_t, unsigned long); extern unsigned long ext2_count_free_blocks (struct super_block *); extern unsigned long ext2_count_dirs (struct super_block *); -extern void ext2_check_blocks_bitmap (struct super_block *); extern struct ext2_group_desc * ext2_get_group_desc(struct super_block * sb, unsigned int block_group, struct buffer_head ** bh); @@ -731,34 +718,41 @@ extern void ext2_init_block_alloc_info(struct inode *); extern void ext2_rsv_window_add(struct super_block *sb, struct ext2_reserve_window_node *rsv); /* dir.c */ -extern int ext2_add_link (struct dentry *, struct inode *); -extern ino_t ext2_inode_by_name(struct inode *, struct qstr *); -extern int ext2_make_empty(struct inode *, struct inode *); -extern struct ext2_dir_entry_2 * ext2_find_entry (struct inode *,struct qstr *, struct page **); -extern int ext2_delete_entry (struct ext2_dir_entry_2 *, struct page *); -extern int ext2_empty_dir (struct inode *); -extern struct ext2_dir_entry_2 * ext2_dotdot (struct inode *, struct page **); -extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct page *, struct inode *, int); +int ext2_add_link(struct dentry *, struct inode *); +int ext2_inode_by_name(struct inode *dir, + const struct qstr *child, ino_t *ino); +int ext2_make_empty(struct inode *, struct inode *); +struct ext2_dir_entry_2 *ext2_find_entry(struct inode *, const struct qstr *, + struct folio **foliop); +int ext2_delete_entry(struct ext2_dir_entry_2 *dir, struct folio *folio); +int ext2_empty_dir(struct inode *); +struct ext2_dir_entry_2 *ext2_dotdot(struct inode *dir, struct folio **foliop); +int ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de, + struct folio *folio, struct inode *inode, bool update_times); /* ialloc.c */ extern struct inode * ext2_new_inode (struct inode *, umode_t, const struct qstr *); extern void ext2_free_inode (struct inode *); extern unsigned long ext2_count_free_inodes (struct super_block *); -extern void ext2_check_inodes_bitmap (struct super_block *); extern unsigned long ext2_count_free (struct buffer_head *, unsigned); /* inode.c */ extern struct inode *ext2_iget (struct super_block *, unsigned long); extern int ext2_write_inode (struct inode *, struct writeback_control *); extern void ext2_evict_inode(struct inode *); +void ext2_write_failed(struct address_space *mapping, loff_t to); extern int ext2_get_block(struct inode *, sector_t, struct buffer_head *, int); -extern int ext2_setattr (struct dentry *, struct iattr *); +extern int ext2_setattr (struct mnt_idmap *, struct dentry *, struct iattr *); +extern int ext2_getattr (struct mnt_idmap *, const struct path *, + struct kstat *, u32, unsigned int); extern void ext2_set_inode_flags(struct inode *inode); -extern void ext2_get_inode_flags(struct ext2_inode_info *); extern int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len); /* ioctl.c */ +extern int ext2_fileattr_get(struct dentry *dentry, struct file_kattr *fa); +extern int ext2_fileattr_set(struct mnt_idmap *idmap, + struct dentry *dentry, struct file_kattr *fa); extern long ext2_ioctl(struct file *, unsigned int, unsigned long); extern long ext2_compat_ioctl(struct file *, unsigned int, unsigned long); @@ -771,7 +765,8 @@ void ext2_error(struct super_block *, const char *, const char *, ...); extern __printf(3, 4) void ext2_msg(struct super_block *, const char *, const char *, ...); extern void ext2_update_dynamic_rev (struct super_block *sb); -extern void ext2_write_super (struct super_block *); +extern void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es, + int wait); /* * Inodes and files operations @@ -785,12 +780,11 @@ extern int ext2_fsync(struct file *file, loff_t start, loff_t end, int datasync); extern const struct inode_operations ext2_file_inode_operations; extern const struct file_operations ext2_file_operations; -extern const struct file_operations ext2_xip_file_operations; /* inode.c */ +extern void ext2_set_file_ops(struct inode *inode); extern const struct address_space_operations ext2_aops; -extern const struct address_space_operations ext2_aops_xip; -extern const struct address_space_operations ext2_nobh_aops; +extern const struct iomap_ops ext2_iomap_ops; /* namei.c */ extern const struct inode_operations ext2_dir_inode_operations; @@ -807,6 +801,18 @@ ext2_group_first_block_no(struct super_block *sb, unsigned long group_no) le32_to_cpu(EXT2_SB(sb)->s_es->s_first_data_block); } +static inline ext2_fsblk_t +ext2_group_last_block_no(struct super_block *sb, unsigned long group_no) +{ + struct ext2_sb_info *sbi = EXT2_SB(sb); + + if (group_no == sbi->s_groups_count - 1) + return le32_to_cpu(sbi->s_es->s_blocks_count) - 1; + else + return ext2_group_first_block_no(sb, group_no) + + EXT2_BLOCKS_PER_GROUP(sb) - 1; +} + #define ext2_set_bit __test_and_set_bit_le #define ext2_clear_bit __test_and_clear_bit_le #define ext2_test_bit test_bit_le diff --git a/fs/ext2/file.c b/fs/ext2/file.c index a5b3a5db3120..76bddce462fc 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * linux/fs/ext2/file.c * @@ -20,10 +21,121 @@ #include <linux/time.h> #include <linux/pagemap.h> +#include <linux/dax.h> #include <linux/quotaops.h> +#include <linux/iomap.h> +#include <linux/uio.h> +#include <linux/buffer_head.h> #include "ext2.h" #include "xattr.h" #include "acl.h" +#include "trace.h" + +#ifdef CONFIG_FS_DAX +static ssize_t ext2_dax_read_iter(struct kiocb *iocb, struct iov_iter *to) +{ + struct inode *inode = iocb->ki_filp->f_mapping->host; + ssize_t ret; + + if (!iov_iter_count(to)) + return 0; /* skip atime */ + + inode_lock_shared(inode); + ret = dax_iomap_rw(iocb, to, &ext2_iomap_ops); + inode_unlock_shared(inode); + + file_accessed(iocb->ki_filp); + return ret; +} + +static ssize_t ext2_dax_write_iter(struct kiocb *iocb, struct iov_iter *from) +{ + struct file *file = iocb->ki_filp; + struct inode *inode = file->f_mapping->host; + ssize_t ret; + + inode_lock(inode); + ret = generic_write_checks(iocb, from); + if (ret <= 0) + goto out_unlock; + ret = file_remove_privs(file); + if (ret) + goto out_unlock; + ret = file_update_time(file); + if (ret) + goto out_unlock; + + ret = dax_iomap_rw(iocb, from, &ext2_iomap_ops); + if (ret > 0 && iocb->ki_pos > i_size_read(inode)) { + i_size_write(inode, iocb->ki_pos); + mark_inode_dirty(inode); + } + +out_unlock: + inode_unlock(inode); + if (ret > 0) + ret = generic_write_sync(iocb, ret); + return ret; +} + +/* + * The lock ordering for ext2 DAX fault paths is: + * + * mmap_lock (MM) + * sb_start_pagefault (vfs, freeze) + * address_space->invalidate_lock + * address_space->i_mmap_rwsem or page_lock (mutually exclusive in DAX) + * ext2_inode_info->truncate_mutex + * + * The default page_lock and i_size verification done by non-DAX fault paths + * is sufficient because ext2 doesn't support hole punching. + */ +static vm_fault_t ext2_dax_fault(struct vm_fault *vmf) +{ + struct inode *inode = file_inode(vmf->vma->vm_file); + vm_fault_t ret; + bool write = (vmf->flags & FAULT_FLAG_WRITE) && + (vmf->vma->vm_flags & VM_SHARED); + + if (write) { + sb_start_pagefault(inode->i_sb); + file_update_time(vmf->vma->vm_file); + } + filemap_invalidate_lock_shared(inode->i_mapping); + + ret = dax_iomap_fault(vmf, 0, NULL, NULL, &ext2_iomap_ops); + + filemap_invalidate_unlock_shared(inode->i_mapping); + if (write) + sb_end_pagefault(inode->i_sb); + return ret; +} + +static const struct vm_operations_struct ext2_dax_vm_ops = { + .fault = ext2_dax_fault, + /* + * .huge_fault is not supported for DAX because allocation in ext2 + * cannot be reliably aligned to huge page sizes and so pmd faults + * will always fail and fail back to regular faults. + */ + .page_mkwrite = ext2_dax_fault, + .pfn_mkwrite = ext2_dax_fault, +}; + +static int ext2_file_mmap_prepare(struct vm_area_desc *desc) +{ + struct file *file = desc->file; + + if (!IS_DAX(file_inode(file))) + return generic_file_mmap_prepare(desc); + + file_accessed(file); + desc->vm_ops = &ext2_dax_vm_ops; + return 0; +} +#else +#define ext2_file_mmap_prepare generic_file_mmap_prepare +#endif /* * Called when filp is released. This happens when all file descriptors @@ -44,64 +156,184 @@ int ext2_fsync(struct file *file, loff_t start, loff_t end, int datasync) { int ret; struct super_block *sb = file->f_mapping->host->i_sb; - struct address_space *mapping = sb->s_bdev->bd_inode->i_mapping; - ret = generic_file_fsync(file, start, end, datasync); - if (ret == -EIO || test_and_clear_bit(AS_EIO, &mapping->flags)) { + ret = generic_buffers_fsync(file, start, end, datasync); + if (ret == -EIO) /* We don't really know where the IO error happened... */ ext2_error(sb, __func__, "detected IO error when writing metadata buffers"); - ret = -EIO; + return ret; +} + +static ssize_t ext2_dio_read_iter(struct kiocb *iocb, struct iov_iter *to) +{ + struct file *file = iocb->ki_filp; + struct inode *inode = file->f_mapping->host; + ssize_t ret; + + trace_ext2_dio_read_begin(iocb, to, 0); + inode_lock_shared(inode); + ret = iomap_dio_rw(iocb, to, &ext2_iomap_ops, NULL, 0, NULL, 0); + inode_unlock_shared(inode); + trace_ext2_dio_read_end(iocb, to, ret); + + return ret; +} + +static int ext2_dio_write_end_io(struct kiocb *iocb, ssize_t size, + int error, unsigned int flags) +{ + loff_t pos = iocb->ki_pos; + struct inode *inode = file_inode(iocb->ki_filp); + + if (error) + goto out; + + /* + * If we are extending the file, we have to update i_size here before + * page cache gets invalidated in iomap_dio_rw(). This prevents racing + * buffered reads from zeroing out too much from page cache pages. + * Note that all extending writes always happens synchronously with + * inode lock held by ext2_dio_write_iter(). So it is safe to update + * inode size here for extending file writes. + */ + pos += size; + if (pos > i_size_read(inode)) { + i_size_write(inode, pos); + mark_inode_dirty(inode); + } +out: + trace_ext2_dio_write_endio(iocb, size, error); + return error; +} + +static const struct iomap_dio_ops ext2_dio_write_ops = { + .end_io = ext2_dio_write_end_io, +}; + +static ssize_t ext2_dio_write_iter(struct kiocb *iocb, struct iov_iter *from) +{ + struct file *file = iocb->ki_filp; + struct inode *inode = file->f_mapping->host; + ssize_t ret; + unsigned int flags = 0; + unsigned long blocksize = inode->i_sb->s_blocksize; + loff_t offset = iocb->ki_pos; + loff_t count = iov_iter_count(from); + ssize_t status = 0; + + trace_ext2_dio_write_begin(iocb, from, 0); + inode_lock(inode); + ret = generic_write_checks(iocb, from); + if (ret <= 0) + goto out_unlock; + + ret = kiocb_modified(iocb); + if (ret) + goto out_unlock; + + /* use IOMAP_DIO_FORCE_WAIT for unaligned or extending writes */ + if (iocb->ki_pos + iov_iter_count(from) > i_size_read(inode) || + (!IS_ALIGNED(iocb->ki_pos | iov_iter_alignment(from), blocksize))) + flags |= IOMAP_DIO_FORCE_WAIT; + + ret = iomap_dio_rw(iocb, from, &ext2_iomap_ops, &ext2_dio_write_ops, + flags, NULL, 0); + + /* ENOTBLK is magic return value for fallback to buffered-io */ + if (ret == -ENOTBLK) + ret = 0; + + if (ret < 0 && ret != -EIOCBQUEUED) + ext2_write_failed(inode->i_mapping, offset + count); + + /* handle case for partial write and for fallback to buffered write */ + if (ret >= 0 && iov_iter_count(from)) { + loff_t pos, endbyte; + int ret2; + + iocb->ki_flags &= ~IOCB_DIRECT; + pos = iocb->ki_pos; + status = generic_perform_write(iocb, from); + if (unlikely(status < 0)) { + ret = status; + goto out_unlock; + } + + ret += status; + endbyte = pos + status - 1; + ret2 = filemap_write_and_wait_range(inode->i_mapping, pos, + endbyte); + if (!ret2) + invalidate_mapping_pages(inode->i_mapping, + pos >> PAGE_SHIFT, + endbyte >> PAGE_SHIFT); + if (ret > 0) + generic_write_sync(iocb, ret); } + +out_unlock: + inode_unlock(inode); + if (status) + trace_ext2_dio_write_buff_end(iocb, from, status); + trace_ext2_dio_write_end(iocb, from, ret); return ret; } -/* - * We have mostly NULL's here: the current defaults are ok for - * the ext2 filesystem. - */ -const struct file_operations ext2_file_operations = { - .llseek = generic_file_llseek, - .read = do_sync_read, - .write = do_sync_write, - .aio_read = generic_file_aio_read, - .aio_write = generic_file_aio_write, - .unlocked_ioctl = ext2_ioctl, -#ifdef CONFIG_COMPAT - .compat_ioctl = ext2_compat_ioctl, +static ssize_t ext2_file_read_iter(struct kiocb *iocb, struct iov_iter *to) +{ +#ifdef CONFIG_FS_DAX + if (IS_DAX(iocb->ki_filp->f_mapping->host)) + return ext2_dax_read_iter(iocb, to); #endif - .mmap = generic_file_mmap, - .open = dquot_file_open, - .release = ext2_release_file, - .fsync = ext2_fsync, - .splice_read = generic_file_splice_read, - .splice_write = generic_file_splice_write, -}; + if (iocb->ki_flags & IOCB_DIRECT) + return ext2_dio_read_iter(iocb, to); + + return generic_file_read_iter(iocb, to); +} -#ifdef CONFIG_EXT2_FS_XIP -const struct file_operations ext2_xip_file_operations = { +static ssize_t ext2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) +{ +#ifdef CONFIG_FS_DAX + if (IS_DAX(iocb->ki_filp->f_mapping->host)) + return ext2_dax_write_iter(iocb, from); +#endif + if (iocb->ki_flags & IOCB_DIRECT) + return ext2_dio_write_iter(iocb, from); + + return generic_file_write_iter(iocb, from); +} + +static int ext2_file_open(struct inode *inode, struct file *filp) +{ + filp->f_mode |= FMODE_CAN_ODIRECT; + return dquot_file_open(inode, filp); +} + +const struct file_operations ext2_file_operations = { .llseek = generic_file_llseek, - .read = xip_file_read, - .write = xip_file_write, + .read_iter = ext2_file_read_iter, + .write_iter = ext2_file_write_iter, .unlocked_ioctl = ext2_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = ext2_compat_ioctl, #endif - .mmap = xip_file_mmap, - .open = dquot_file_open, + .mmap_prepare = ext2_file_mmap_prepare, + .open = ext2_file_open, .release = ext2_release_file, .fsync = ext2_fsync, + .get_unmapped_area = thp_get_unmapped_area, + .splice_read = filemap_splice_read, + .splice_write = iter_file_splice_write, }; -#endif const struct inode_operations ext2_file_inode_operations = { -#ifdef CONFIG_EXT2_FS_XATTR - .setxattr = generic_setxattr, - .getxattr = generic_getxattr, .listxattr = ext2_listxattr, - .removexattr = generic_removexattr, -#endif + .getattr = ext2_getattr, .setattr = ext2_setattr, - .get_acl = ext2_get_acl, + .get_inode_acl = ext2_get_acl, + .set_acl = ext2_set_acl, .fiemap = ext2_fiemap, + .fileattr_get = ext2_fileattr_get, + .fileattr_set = ext2_fileattr_set, }; diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c index 7cadd823bb31..fdf63e9c6e7c 100644 --- a/fs/ext2/ialloc.c +++ b/fs/ext2/ialloc.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * linux/fs/ext2/ialloc.c * @@ -79,6 +80,7 @@ static void ext2_release_inode(struct super_block *sb, int group, int dir) if (dir) le16_add_cpu(&desc->bg_used_dirs_count, -1); spin_unlock(sb_bgl_lock(EXT2_SB(sb), group)); + percpu_counter_inc(&EXT2_SB(sb)->s_freeinodes_counter); if (dir) percpu_counter_dec(&EXT2_SB(sb)->s_dirs_counter); mark_buffer_dirty(bh); @@ -144,7 +146,7 @@ void ext2_free_inode (struct inode * inode) else ext2_release_inode(sb, block_group, is_directory); mark_buffer_dirty(bitmap_bh); - if (sb->s_flags & MS_SYNCHRONOUS) + if (sb->s_flags & SB_SYNCHRONOUS) sync_dirty_buffer(bitmap_bh); brelse(bitmap_bh); @@ -168,13 +170,6 @@ static void ext2_preread_inode(struct inode *inode) unsigned long offset; unsigned long block; struct ext2_group_desc * gdp; - struct backing_dev_info *bdi; - - bdi = inode->i_mapping->backing_dev_info; - if (bdi_read_congested(bdi)) - return; - if (bdi_write_congested(bdi)) - return; block_group = (inode->i_ino - 1) / EXT2_INODES_PER_GROUP(inode->i_sb); gdp = ext2_get_group_desc(inode->i_sb, block_group, NULL); @@ -221,8 +216,6 @@ static int find_group_dir(struct super_block *sb, struct inode *parent) best_desc = desc; } } - if (!best_desc) - return -1; return best_group; } @@ -278,14 +271,12 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent) avefreeb = free_blocks / ngroups; ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter); - if ((parent == sb->s_root->d_inode) || + if ((parent == d_inode(sb->s_root)) || (EXT2_I(parent)->i_flags & EXT2_TOPDIR_FL)) { - struct ext2_group_desc *best_desc = NULL; int best_ndir = inodes_per_group; int best_group = -1; - get_random_bytes(&group, sizeof(group)); - parent_group = (unsigned)group % ngroups; + parent_group = get_random_u32_below(ngroups); for (i = 0; i < ngroups; i++) { group = (parent_group + i) % ngroups; desc = ext2_get_group_desc (sb, group, NULL); @@ -299,10 +290,8 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent) continue; best_group = group; best_ndir = le16_to_cpu(desc->bg_used_dirs_count); - best_desc = desc; } if (best_group >= 0) { - desc = best_desc; group = best_group; goto found; } @@ -465,6 +454,11 @@ struct inode *ext2_new_inode(struct inode *dir, umode_t mode, for (i = 0; i < sbi->s_groups_count; i++) { gdp = ext2_get_group_desc(sb, group, &bh2); + if (!gdp) { + if (++group == sbi->s_groups_count) + group = 0; + continue; + } brelse(bitmap_bh); bitmap_bh = read_inode_bitmap(sb, group); if (!bitmap_bh) { @@ -507,11 +501,12 @@ repeat_in_this_group: /* * Scanned all blockgroups. */ + brelse(bitmap_bh); err = -ENOSPC; goto fail; got: mark_buffer_dirty(bitmap_bh); - if (sb->s_flags & MS_SYNCHRONOUS) + if (sb->s_flags & SB_SYNCHRONOUS) sync_dirty_buffer(bitmap_bh); brelse(bitmap_bh); @@ -525,7 +520,7 @@ got: goto fail; } - percpu_counter_add(&sbi->s_freeinodes_counter, -1); + percpu_counter_dec(&sbi->s_freeinodes_counter); if (S_ISDIR(mode)) percpu_counter_inc(&sbi->s_dirs_counter); @@ -547,11 +542,11 @@ got: inode->i_uid = current_fsuid(); inode->i_gid = dir->i_gid; } else - inode_init_owner(inode, dir, mode); + inode_init_owner(&nop_mnt_idmap, inode, dir, mode); inode->i_ino = ino; inode->i_blocks = 0; - inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; + simple_inode_init_ts(inode); memset(ei->i_data, 0, sizeof(ei->i_data)); ei->i_flags = ext2_mask_flags(mode, EXT2_I(dir)->i_flags & EXT2_FL_INHERITED); @@ -577,7 +572,10 @@ got: goto fail; } - dquot_initialize(inode); + err = dquot_initialize(inode); + if (err) + goto fail_drop; + err = dquot_alloc_inode(inode); if (err) goto fail_drop; @@ -602,8 +600,7 @@ fail_drop: dquot_drop(inode); inode->i_flags |= S_NOQUOTA; clear_nlink(inode); - unlock_new_inode(inode); - iput(inode); + discard_new_inode(inode); return ERR_PTR(err); fail: diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 0a87bb10998d..dbfe9098a124 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * linux/fs/ext2/inode.c * @@ -25,16 +26,18 @@ #include <linux/time.h> #include <linux/highuid.h> #include <linux/pagemap.h> +#include <linux/dax.h> +#include <linux/blkdev.h> #include <linux/quotaops.h> #include <linux/writeback.h> #include <linux/buffer_head.h> #include <linux/mpage.h> #include <linux/fiemap.h> +#include <linux/iomap.h> #include <linux/namei.h> -#include <linux/aio.h> +#include <linux/uio.h> #include "ext2.h" #include "acl.h" -#include "xip.h" #include "xattr.h" static int __ext2_write_inode(struct inode *inode, int do_sync); @@ -53,12 +56,12 @@ static inline int ext2_inode_is_fast_symlink(struct inode *inode) static void ext2_truncate_blocks(struct inode *inode, loff_t offset); -static void ext2_write_failed(struct address_space *mapping, loff_t to) +void ext2_write_failed(struct address_space *mapping, loff_t to) { struct inode *inode = mapping->host; if (to > inode->i_size) { - truncate_pagecache(inode, to, inode->i_size); + truncate_pagecache(inode, inode->i_size); ext2_truncate_blocks(inode, inode->i_size); } } @@ -78,12 +81,12 @@ void ext2_evict_inode(struct inode * inode) dquot_drop(inode); } - truncate_inode_pages(&inode->i_data, 0); + truncate_inode_pages_final(&inode->i_data); if (want_delete) { sb_start_intwrite(inode->i_sb); /* set dtime */ - EXT2_I(inode)->i_dtime = get_seconds(); + EXT2_I(inode)->i_dtime = ktime_get_real_seconds(); mark_inode_dirty(inode); __ext2_write_inode(inode, inode_needs_sync(inode)); /* truncate to 0 */ @@ -352,8 +355,7 @@ static inline ext2_fsblk_t ext2_find_goal(struct inode *inode, long block, * @blks: number of data blocks to be mapped. * @blocks_to_boundary: the offset in the indirect block * - * return the total number of blocks to be allocate, including the - * direct and indirect blocks. + * return the number of direct blocks to allocate. */ static int ext2_blks_to_allocate(Indirect * branch, int k, unsigned long blks, @@ -383,14 +385,16 @@ ext2_blks_to_allocate(Indirect * branch, int k, unsigned long blks, } /** - * ext2_alloc_blocks: multiple allocate blocks needed for a branch - * @indirect_blks: the number of blocks need to allocate for indirect - * blocks + * ext2_alloc_blocks: Allocate multiple blocks needed for a branch. + * @inode: Owner. + * @goal: Preferred place for allocation. + * @indirect_blks: The number of blocks needed to allocate for indirect blocks. + * @blks: The number of blocks need to allocate for direct blocks. + * @new_blocks: On return it will store the new block numbers for + * the indirect blocks(if needed) and the first direct block. + * @err: Error pointer. * - * @new_blocks: on return it will store the new block numbers for - * the indirect blocks(if needed) and the first direct block, - * @blks: on return it will store the total number of allocated - * direct blocks + * Return: Number of blocks allocated. */ static int ext2_alloc_blocks(struct inode *inode, ext2_fsblk_t goal, int indirect_blks, int blks, @@ -415,7 +419,7 @@ static int ext2_alloc_blocks(struct inode *inode, while (1) { count = target; /* allocating blocks for indirect blocks and direct blocks */ - current_block = ext2_new_blocks(inode,goal,&count,err); + current_block = ext2_new_blocks(inode, goal, &count, err, 0); if (*err) goto failed_out; @@ -448,7 +452,9 @@ failed_out: /** * ext2_alloc_branch - allocate and set up a chain of blocks. * @inode: owner - * @num: depth of the chain (number of blocks to allocate) + * @indirect_blks: depth of the chain (number of blocks to allocate) + * @blks: number of allocated direct blocks + * @goal: preferred place for allocation * @offsets: offsets (in the blocks) to store the pointers to next. * @branch: place to store the chain in. * @@ -593,7 +599,7 @@ static void ext2_splice_branch(struct inode *inode, if (where->bh) mark_buffer_dirty_inode(where->bh, inode); - inode->i_ctime = CURRENT_TIME_SEC; + inode_set_ctime_current(inode); mark_inode_dirty(inode); } @@ -617,10 +623,10 @@ static void ext2_splice_branch(struct inode *inode, */ static int ext2_get_blocks(struct inode *inode, sector_t iblock, unsigned long maxblocks, - struct buffer_head *bh_result, + u32 *bno, bool *new, bool *boundary, int create) { - int err = -EIO; + int err; int offsets[4]; Indirect chain[4]; Indirect *partial; @@ -632,16 +638,17 @@ static int ext2_get_blocks(struct inode *inode, int count = 0; ext2_fsblk_t first_block = 0; + BUG_ON(maxblocks == 0); + depth = ext2_block_to_path(inode,iblock,offsets,&blocks_to_boundary); if (depth == 0) - return (err); + return -EIO; partial = ext2_get_branch(inode, depth, offsets, chain, &err); /* Simplest case - block found, no allocation needed */ if (!partial) { first_block = le32_to_cpu(chain[depth - 1].key); - clear_buffer_new(bh_result); /* What's this do? */ count++; /*map more blocks*/ while (count < maxblocks && count <= blocks_to_boundary) { @@ -656,6 +663,7 @@ static int ext2_get_blocks(struct inode *inode, */ err = -EAGAIN; count = 0; + partial = chain + depth - 1; break; } blk = le32_to_cpu(*(chain[depth-1].p + count)); @@ -694,11 +702,13 @@ static int ext2_get_blocks(struct inode *inode, if (!partial) { count++; mutex_unlock(&ei->truncate_mutex); - if (err) - goto cleanup; - clear_buffer_new(bh_result); goto got_it; } + + if (err) { + mutex_unlock(&ei->truncate_mutex); + goto cleanup; + } } /* @@ -713,7 +723,7 @@ static int ext2_get_blocks(struct inode *inode, /* the number of blocks need to allocate for [d,t]indirect blocks */ indirect_blks = (chain + depth) - partial - 1; /* - * Next look up the indirect map to count the totoal number of + * Next look up the indirect map to count the total number of * direct blocks to allocate for this branch. */ count = ext2_blks_to_allocate(partial, indirect_blks, @@ -729,25 +739,34 @@ static int ext2_get_blocks(struct inode *inode, goto cleanup; } - if (ext2_use_xip(inode->i_sb)) { + if (IS_DAX(inode)) { + /* + * We must unmap blocks before zeroing so that writeback cannot + * overwrite zeros with stale data from block device page cache. + */ + clean_bdev_aliases(inode->i_sb->s_bdev, + le32_to_cpu(chain[depth-1].key), + count); /* - * we need to clear the block + * block must be initialised before we put it in the tree + * so that it's not found by another thread before it's + * initialised */ - err = ext2_clear_xip_target (inode, - le32_to_cpu(chain[depth-1].key)); + err = sb_issue_zeroout(inode->i_sb, + le32_to_cpu(chain[depth-1].key), count, + GFP_KERNEL); if (err) { mutex_unlock(&ei->truncate_mutex); goto cleanup; } } + *new = true; ext2_splice_branch(inode, iblock, partial, indirect_blks, count); mutex_unlock(&ei->truncate_mutex); - set_buffer_new(bh_result); got_it: - map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key)); if (count > blocks_to_boundary) - set_buffer_boundary(bh_result); + *boundary = true; err = count; /* Clean up and exit */ partial = chain + depth - 1; /* the whole chain */ @@ -756,90 +775,178 @@ cleanup: brelse(partial->bh); partial--; } + if (err > 0) + *bno = le32_to_cpu(chain[depth-1].key); return err; } -int ext2_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) +int ext2_get_block(struct inode *inode, sector_t iblock, + struct buffer_head *bh_result, int create) { unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; - int ret = ext2_get_blocks(inode, iblock, max_blocks, - bh_result, create); - if (ret > 0) { - bh_result->b_size = (ret << inode->i_blkbits); - ret = 0; - } - return ret; + bool new = false, boundary = false; + u32 bno; + int ret; -} + ret = ext2_get_blocks(inode, iblock, max_blocks, &bno, &new, &boundary, + create); + if (ret <= 0) + return ret; -int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, - u64 start, u64 len) -{ - return generic_block_fiemap(inode, fieinfo, start, len, - ext2_get_block); -} + map_bh(bh_result, inode->i_sb, bno); + bh_result->b_size = (ret << inode->i_blkbits); + if (new) + set_buffer_new(bh_result); + if (boundary) + set_buffer_boundary(bh_result); + return 0; -static int ext2_writepage(struct page *page, struct writeback_control *wbc) -{ - return block_write_full_page(page, ext2_get_block, wbc); } -static int ext2_readpage(struct file *file, struct page *page) +static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length, + unsigned flags, struct iomap *iomap, struct iomap *srcmap) { - return mpage_readpage(page, ext2_get_block); + unsigned int blkbits = inode->i_blkbits; + unsigned long first_block = offset >> blkbits; + unsigned long max_blocks = (length + (1 << blkbits) - 1) >> blkbits; + struct ext2_sb_info *sbi = EXT2_SB(inode->i_sb); + bool new = false, boundary = false; + u32 bno; + int ret; + bool create = flags & IOMAP_WRITE; + + /* + * For writes that could fill holes inside i_size on a + * DIO_SKIP_HOLES filesystem we forbid block creations: only + * overwrites are permitted. + */ + if ((flags & IOMAP_DIRECT) && + (first_block << blkbits) < i_size_read(inode)) + create = 0; + + /* + * Writes that span EOF might trigger an IO size update on completion, + * so consider them to be dirty for the purposes of O_DSYNC even if + * there is no other metadata changes pending or have been made here. + */ + if ((flags & IOMAP_WRITE) && offset + length > i_size_read(inode)) + iomap->flags |= IOMAP_F_DIRTY; + + ret = ext2_get_blocks(inode, first_block, max_blocks, + &bno, &new, &boundary, create); + if (ret < 0) + return ret; + + iomap->flags = 0; + iomap->offset = (u64)first_block << blkbits; + if (flags & IOMAP_DAX) + iomap->dax_dev = sbi->s_daxdev; + else + iomap->bdev = inode->i_sb->s_bdev; + + if (ret == 0) { + /* + * Switch to buffered-io for writing to holes in a non-extent + * based filesystem to avoid stale data exposure problem. + */ + if (!create && (flags & IOMAP_WRITE) && (flags & IOMAP_DIRECT)) + return -ENOTBLK; + iomap->type = IOMAP_HOLE; + iomap->addr = IOMAP_NULL_ADDR; + iomap->length = 1 << blkbits; + } else { + iomap->type = IOMAP_MAPPED; + iomap->addr = (u64)bno << blkbits; + if (flags & IOMAP_DAX) + iomap->addr += sbi->s_dax_part_off; + iomap->length = (u64)ret << blkbits; + iomap->flags |= IOMAP_F_MERGED; + } + + if (new) + iomap->flags |= IOMAP_F_NEW; + return 0; } static int -ext2_readpages(struct file *file, struct address_space *mapping, - struct list_head *pages, unsigned nr_pages) +ext2_iomap_end(struct inode *inode, loff_t offset, loff_t length, + ssize_t written, unsigned flags, struct iomap *iomap) { - return mpage_readpages(mapping, pages, nr_pages, ext2_get_block); + /* + * Switch to buffered-io in case of any error. + * Blocks allocated can be used by the buffered-io path. + */ + if ((flags & IOMAP_DIRECT) && (flags & IOMAP_WRITE) && written == 0) + return -ENOTBLK; + + if (iomap->type == IOMAP_MAPPED && + written < length && + (flags & IOMAP_WRITE)) + ext2_write_failed(inode->i_mapping, offset + length); + return 0; } -static int -ext2_write_begin(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned flags, - struct page **pagep, void **fsdata) +const struct iomap_ops ext2_iomap_ops = { + .iomap_begin = ext2_iomap_begin, + .iomap_end = ext2_iomap_end, +}; + +int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, + u64 start, u64 len) { int ret; + loff_t i_size; + + inode_lock(inode); + i_size = i_size_read(inode); + /* + * iomap_fiemap() returns EINVAL for 0 length. Make sure we don't trim + * length to 0 but still trim the range as much as possible since + * ext2_get_blocks() iterates unmapped space block by block which is + * slow. + */ + if (i_size == 0) + i_size = 1; + len = min_t(u64, len, i_size); + ret = iomap_fiemap(inode, fieinfo, start, len, &ext2_iomap_ops); + inode_unlock(inode); - ret = block_write_begin(mapping, pos, len, flags, pagep, - ext2_get_block); - if (ret < 0) - ext2_write_failed(mapping, pos + len); return ret; } -static int ext2_write_end(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned copied, - struct page *page, void *fsdata) +static int ext2_read_folio(struct file *file, struct folio *folio) { - int ret; + return mpage_read_folio(folio, ext2_get_block); +} - ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); - if (ret < len) - ext2_write_failed(mapping, pos + len); - return ret; +static void ext2_readahead(struct readahead_control *rac) +{ + mpage_readahead(rac, ext2_get_block); } static int -ext2_nobh_write_begin(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned flags, - struct page **pagep, void **fsdata) +ext2_write_begin(const struct kiocb *iocb, struct address_space *mapping, + loff_t pos, unsigned len, struct folio **foliop, void **fsdata) { int ret; - ret = nobh_write_begin(mapping, pos, len, flags, pagep, fsdata, - ext2_get_block); + ret = block_write_begin(mapping, pos, len, foliop, ext2_get_block); if (ret < 0) ext2_write_failed(mapping, pos + len); return ret; } -static int ext2_nobh_writepage(struct page *page, - struct writeback_control *wbc) +static int ext2_write_end(const struct kiocb *iocb, + struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct folio *folio, void *fsdata) { - return nobh_writepage(page, ext2_get_block, wbc); + int ret; + + ret = generic_write_end(iocb, mapping, pos, len, copied, folio, fsdata); + if (ret < len) + ext2_write_failed(mapping, pos + len); + return ret; } static sector_t ext2_bmap(struct address_space *mapping, sector_t block) @@ -847,58 +954,37 @@ static sector_t ext2_bmap(struct address_space *mapping, sector_t block) return generic_block_bmap(mapping,block,ext2_get_block); } -static ssize_t -ext2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, - loff_t offset, unsigned long nr_segs) +static int +ext2_writepages(struct address_space *mapping, struct writeback_control *wbc) { - struct file *file = iocb->ki_filp; - struct address_space *mapping = file->f_mapping; - struct inode *inode = mapping->host; - ssize_t ret; - - ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, - ext2_get_block); - if (ret < 0 && (rw & WRITE)) - ext2_write_failed(mapping, offset + iov_length(iov, nr_segs)); - return ret; + return mpage_writepages(mapping, wbc, ext2_get_block); } static int -ext2_writepages(struct address_space *mapping, struct writeback_control *wbc) +ext2_dax_writepages(struct address_space *mapping, struct writeback_control *wbc) { - return mpage_writepages(mapping, wbc, ext2_get_block); + struct ext2_sb_info *sbi = EXT2_SB(mapping->host->i_sb); + + return dax_writeback_mapping_range(mapping, sbi->s_daxdev, wbc); } const struct address_space_operations ext2_aops = { - .readpage = ext2_readpage, - .readpages = ext2_readpages, - .writepage = ext2_writepage, + .dirty_folio = block_dirty_folio, + .invalidate_folio = block_invalidate_folio, + .read_folio = ext2_read_folio, + .readahead = ext2_readahead, .write_begin = ext2_write_begin, .write_end = ext2_write_end, .bmap = ext2_bmap, - .direct_IO = ext2_direct_IO, .writepages = ext2_writepages, - .migratepage = buffer_migrate_page, + .migrate_folio = buffer_migrate_folio, .is_partially_uptodate = block_is_partially_uptodate, - .error_remove_page = generic_error_remove_page, + .error_remove_folio = generic_error_remove_folio, }; -const struct address_space_operations ext2_aops_xip = { - .bmap = ext2_bmap, - .get_xip_mem = ext2_get_xip_mem, -}; - -const struct address_space_operations ext2_nobh_aops = { - .readpage = ext2_readpage, - .readpages = ext2_readpages, - .writepage = ext2_nobh_writepage, - .write_begin = ext2_nobh_write_begin, - .write_end = nobh_write_end, - .bmap = ext2_bmap, - .direct_IO = ext2_direct_IO, - .writepages = ext2_writepages, - .migratepage = buffer_migrate_page, - .error_remove_page = generic_error_remove_page, +static const struct address_space_operations ext2_dax_aops = { + .writepages = ext2_dax_writepages, + .dirty_folio = noop_dirty_folio, }; /* @@ -1009,8 +1095,8 @@ no_top: */ static inline void ext2_free_data(struct inode *inode, __le32 *p, __le32 *q) { - unsigned long block_to_free = 0, count = 0; - unsigned long nr; + ext2_fsblk_t block_to_free = 0, count = 0; + ext2_fsblk_t nr; for ( ; p < q ; p++) { nr = le32_to_cpu(*p); @@ -1050,7 +1136,7 @@ static inline void ext2_free_data(struct inode *inode, __le32 *p, __le32 *q) static void ext2_free_branches(struct inode *inode, __le32 *p, __le32 *q, int depth) { struct buffer_head * bh; - unsigned long nr; + ext2_fsblk_t nr; if (depth--) { int addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb); @@ -1082,6 +1168,7 @@ static void ext2_free_branches(struct inode *inode, __le32 *p, __le32 *q, int de ext2_free_data(inode, p, q); } +/* mapping->invalidate_lock must be held when calling this function */ static void __ext2_truncate_blocks(struct inode *inode, loff_t offset) { __le32 *i_data = EXT2_I(inode)->i_data; @@ -1097,6 +1184,10 @@ static void __ext2_truncate_blocks(struct inode *inode, loff_t offset) blocksize = inode->i_sb->s_blocksize; iblock = (offset + blocksize-1) >> EXT2_BLOCK_SIZE_BITS(inode->i_sb); +#ifdef CONFIG_FS_DAX + WARN_ON(!rwsem_is_locked(&inode->i_mapping->invalidate_lock)); +#endif + n = ext2_block_to_path(inode, iblock, offsets, NULL); if (n == 0) return; @@ -1142,6 +1233,7 @@ do_indirects: mark_inode_dirty(inode); ext2_free_branches(inode, &nr, &nr+1, 1); } + fallthrough; case EXT2_IND_BLOCK: nr = i_data[EXT2_DIND_BLOCK]; if (nr) { @@ -1149,6 +1241,7 @@ do_indirects: mark_inode_dirty(inode); ext2_free_branches(inode, &nr, &nr+1, 2); } + fallthrough; case EXT2_DIND_BLOCK: nr = i_data[EXT2_TIND_BLOCK]; if (nr) { @@ -1156,6 +1249,7 @@ do_indirects: mark_inode_dirty(inode); ext2_free_branches(inode, &nr, &nr+1, 3); } + break; case EXT2_TIND_BLOCK: ; } @@ -1167,22 +1261,15 @@ do_indirects: static void ext2_truncate_blocks(struct inode *inode, loff_t offset) { - /* - * XXX: it seems like a bug here that we don't allow - * IS_APPEND inode to have blocks-past-i_size trimmed off. - * review and fix this. - * - * Also would be nice to be able to handle IO errors and such, - * but that's probably too much to ask. - */ if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) return; if (ext2_inode_is_fast_symlink(inode)) return; - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) - return; + + filemap_invalidate_lock(inode->i_mapping); __ext2_truncate_blocks(inode, offset); + filemap_invalidate_unlock(inode->i_mapping); } static int ext2_setsize(struct inode *inode, loff_t newsize) @@ -1199,21 +1286,21 @@ static int ext2_setsize(struct inode *inode, loff_t newsize) inode_dio_wait(inode); - if (mapping_is_xip(inode->i_mapping)) - error = xip_truncate_page(inode->i_mapping, newsize); - else if (test_opt(inode->i_sb, NOBH)) - error = nobh_truncate_page(inode->i_mapping, - newsize, ext2_get_block); + if (IS_DAX(inode)) + error = dax_truncate_page(inode, newsize, NULL, + &ext2_iomap_ops); else error = block_truncate_page(inode->i_mapping, newsize, ext2_get_block); if (error) return error; + filemap_invalidate_lock(inode->i_mapping); truncate_setsize(inode, newsize); __ext2_truncate_blocks(inode, newsize); + filemap_invalidate_unlock(inode->i_mapping); - inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; + inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); if (inode_needs_sync(inode)) { sync_mapping_buffers(inode->i_mapping); sync_inode_metadata(inode, 1); @@ -1271,7 +1358,8 @@ void ext2_set_inode_flags(struct inode *inode) { unsigned int flags = EXT2_I(inode)->i_flags; - inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); + inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME | + S_DIRSYNC | S_DAX); if (flags & EXT2_SYNC_FL) inode->i_flags |= S_SYNC; if (flags & EXT2_APPEND_FL) @@ -1282,31 +1370,24 @@ void ext2_set_inode_flags(struct inode *inode) inode->i_flags |= S_NOATIME; if (flags & EXT2_DIRSYNC_FL) inode->i_flags |= S_DIRSYNC; + if (test_opt(inode->i_sb, DAX) && S_ISREG(inode->i_mode)) + inode->i_flags |= S_DAX; } -/* Propagate flags from i_flags to EXT2_I(inode)->i_flags */ -void ext2_get_inode_flags(struct ext2_inode_info *ei) +void ext2_set_file_ops(struct inode *inode) { - unsigned int flags = ei->vfs_inode.i_flags; - - ei->i_flags &= ~(EXT2_SYNC_FL|EXT2_APPEND_FL| - EXT2_IMMUTABLE_FL|EXT2_NOATIME_FL|EXT2_DIRSYNC_FL); - if (flags & S_SYNC) - ei->i_flags |= EXT2_SYNC_FL; - if (flags & S_APPEND) - ei->i_flags |= EXT2_APPEND_FL; - if (flags & S_IMMUTABLE) - ei->i_flags |= EXT2_IMMUTABLE_FL; - if (flags & S_NOATIME) - ei->i_flags |= EXT2_NOATIME_FL; - if (flags & S_DIRSYNC) - ei->i_flags |= EXT2_DIRSYNC_FL; + inode->i_op = &ext2_file_inode_operations; + inode->i_fop = &ext2_file_operations; + if (IS_DAX(inode)) + inode->i_mapping->a_ops = &ext2_dax_aops; + else + inode->i_mapping->a_ops = &ext2_aops; } struct inode *ext2_iget (struct super_block *sb, unsigned long ino) { struct ext2_inode_info *ei; - struct buffer_head * bh; + struct buffer_head * bh = NULL; struct ext2_inode *raw_inode; struct inode *inode; long ret = -EIO; @@ -1317,7 +1398,7 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino) inode = iget_locked(sb, ino); if (!inode) return ERR_PTR(-ENOMEM); - if (!(inode->i_state & I_NEW)) + if (!(inode_state_read_once(inode) & I_NEW)) return inode; ei = EXT2_I(inode); @@ -1340,10 +1421,9 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino) i_gid_write(inode, i_gid); set_nlink(inode, le16_to_cpu(raw_inode->i_links_count)); inode->i_size = le32_to_cpu(raw_inode->i_size); - inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime); - inode->i_ctime.tv_sec = (signed)le32_to_cpu(raw_inode->i_ctime); - inode->i_mtime.tv_sec = (signed)le32_to_cpu(raw_inode->i_mtime); - inode->i_atime.tv_nsec = inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec = 0; + inode_set_atime(inode, (signed)le32_to_cpu(raw_inode->i_atime), 0); + inode_set_ctime(inode, (signed)le32_to_cpu(raw_inode->i_ctime), 0); + inode_set_mtime(inode, (signed)le32_to_cpu(raw_inode->i_mtime), 0); ei->i_dtime = le32_to_cpu(raw_inode->i_dtime); /* We now have enough fields to check if the inode was active or not. * This is needed because nfsd might try to access dead inodes @@ -1352,21 +1432,34 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino) */ if (inode->i_nlink == 0 && (inode->i_mode == 0 || ei->i_dtime)) { /* this inode is deleted */ - brelse (bh); ret = -ESTALE; goto bad_inode; } inode->i_blocks = le32_to_cpu(raw_inode->i_blocks); ei->i_flags = le32_to_cpu(raw_inode->i_flags); + ext2_set_inode_flags(inode); ei->i_faddr = le32_to_cpu(raw_inode->i_faddr); ei->i_frag_no = raw_inode->i_frag; ei->i_frag_size = raw_inode->i_fsize; ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl); ei->i_dir_acl = 0; + + if (ei->i_file_acl && + !ext2_data_block_valid(EXT2_SB(sb), ei->i_file_acl, 1)) { + ext2_error(sb, "ext2_iget", "bad extended attribute block %u", + ei->i_file_acl); + ret = -EFSCORRUPTED; + goto bad_inode; + } + if (S_ISREG(inode->i_mode)) inode->i_size |= ((__u64)le32_to_cpu(raw_inode->i_size_high)) << 32; else ei->i_dir_acl = le32_to_cpu(raw_inode->i_dir_acl); + if (i_size_read(inode) < 0) { + ret = -EFSCORRUPTED; + goto bad_inode; + } ei->i_dtime = 0; inode->i_generation = le32_to_cpu(raw_inode->i_generation); ei->i_state = 0; @@ -1381,35 +1474,21 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino) ei->i_data[n] = raw_inode->i_block[n]; if (S_ISREG(inode->i_mode)) { - inode->i_op = &ext2_file_inode_operations; - if (ext2_use_xip(inode->i_sb)) { - inode->i_mapping->a_ops = &ext2_aops_xip; - inode->i_fop = &ext2_xip_file_operations; - } else if (test_opt(inode->i_sb, NOBH)) { - inode->i_mapping->a_ops = &ext2_nobh_aops; - inode->i_fop = &ext2_file_operations; - } else { - inode->i_mapping->a_ops = &ext2_aops; - inode->i_fop = &ext2_file_operations; - } + ext2_set_file_ops(inode); } else if (S_ISDIR(inode->i_mode)) { inode->i_op = &ext2_dir_inode_operations; inode->i_fop = &ext2_dir_operations; - if (test_opt(inode->i_sb, NOBH)) - inode->i_mapping->a_ops = &ext2_nobh_aops; - else - inode->i_mapping->a_ops = &ext2_aops; + inode->i_mapping->a_ops = &ext2_aops; } else if (S_ISLNK(inode->i_mode)) { if (ext2_inode_is_fast_symlink(inode)) { + inode->i_link = (char *)ei->i_data; inode->i_op = &ext2_fast_symlink_inode_operations; nd_terminate_link(ei->i_data, inode->i_size, sizeof(ei->i_data) - 1); } else { inode->i_op = &ext2_symlink_inode_operations; - if (test_opt(inode->i_sb, NOBH)) - inode->i_mapping->a_ops = &ext2_nobh_aops; - else - inode->i_mapping->a_ops = &ext2_aops; + inode_nohighmem(inode); + inode->i_mapping->a_ops = &ext2_aops; } } else { inode->i_op = &ext2_special_inode_operations; @@ -1421,11 +1500,11 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino) new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); } brelse (bh); - ext2_set_inode_flags(inode); unlock_new_inode(inode); return inode; bad_inode: + brelse(bh); iget_failed(inode); return ERR_PTR(ret); } @@ -1445,12 +1524,11 @@ static int __ext2_write_inode(struct inode *inode, int do_sync) if (IS_ERR(raw_inode)) return -EIO; - /* For fields not not tracking in the in-memory inode, + /* For fields not tracking in the in-memory inode, * initialise them to zero for new inodes. */ if (ei->i_state & EXT2_STATE_NEW) memset(raw_inode, 0, EXT2_SB(sb)->s_inode_size); - ext2_get_inode_flags(ei); raw_inode->i_mode = cpu_to_le16(inode->i_mode); if (!(test_opt(sb, NO_UID32))) { raw_inode->i_uid_low = cpu_to_le16(low_16_bits(uid)); @@ -1474,9 +1552,9 @@ static int __ext2_write_inode(struct inode *inode, int do_sync) } raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); raw_inode->i_size = cpu_to_le32(inode->i_size); - raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec); - raw_inode->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec); - raw_inode->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec); + raw_inode->i_atime = cpu_to_le32(inode_get_atime_sec(inode)); + raw_inode->i_ctime = cpu_to_le32(inode_get_ctime_sec(inode)); + raw_inode->i_mtime = cpu_to_le32(inode_get_mtime_sec(inode)); raw_inode->i_blocks = cpu_to_le32(inode->i_blocks); raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); @@ -1502,7 +1580,7 @@ static int __ext2_write_inode(struct inode *inode, int do_sync) EXT2_SET_RO_COMPAT_FEATURE(sb, EXT2_FEATURE_RO_COMPAT_LARGE_FILE); spin_unlock(&EXT2_SB(sb)->s_lock); - ext2_write_super(sb); + ext2_sync_super(sb, EXT2_SB(sb)->s_es, 1); } } } @@ -1540,20 +1618,50 @@ int ext2_write_inode(struct inode *inode, struct writeback_control *wbc) return __ext2_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL); } -int ext2_setattr(struct dentry *dentry, struct iattr *iattr) +int ext2_getattr(struct mnt_idmap *idmap, const struct path *path, + struct kstat *stat, u32 request_mask, unsigned int query_flags) +{ + struct inode *inode = d_inode(path->dentry); + struct ext2_inode_info *ei = EXT2_I(inode); + unsigned int flags; + + flags = ei->i_flags & EXT2_FL_USER_VISIBLE; + if (flags & EXT2_APPEND_FL) + stat->attributes |= STATX_ATTR_APPEND; + if (flags & EXT2_COMPR_FL) + stat->attributes |= STATX_ATTR_COMPRESSED; + if (flags & EXT2_IMMUTABLE_FL) + stat->attributes |= STATX_ATTR_IMMUTABLE; + if (flags & EXT2_NODUMP_FL) + stat->attributes |= STATX_ATTR_NODUMP; + stat->attributes_mask |= (STATX_ATTR_APPEND | + STATX_ATTR_COMPRESSED | + STATX_ATTR_ENCRYPTED | + STATX_ATTR_IMMUTABLE | + STATX_ATTR_NODUMP); + + generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); + return 0; +} + +int ext2_setattr(struct mnt_idmap *idmap, struct dentry *dentry, + struct iattr *iattr) { - struct inode *inode = dentry->d_inode; + struct inode *inode = d_inode(dentry); int error; - error = inode_change_ok(inode, iattr); + error = setattr_prepare(&nop_mnt_idmap, dentry, iattr); if (error) return error; - if (is_quota_modification(inode, iattr)) - dquot_initialize(inode); - if ((iattr->ia_valid & ATTR_UID && !uid_eq(iattr->ia_uid, inode->i_uid)) || - (iattr->ia_valid & ATTR_GID && !gid_eq(iattr->ia_gid, inode->i_gid))) { - error = dquot_transfer(inode, iattr); + if (is_quota_modification(&nop_mnt_idmap, inode, iattr)) { + error = dquot_initialize(inode); + if (error) + return error; + } + if (i_uid_needs_update(&nop_mnt_idmap, iattr, inode) || + i_gid_needs_update(&nop_mnt_idmap, iattr, inode)) { + error = dquot_transfer(&nop_mnt_idmap, inode, iattr); if (error) return error; } @@ -1562,9 +1670,9 @@ int ext2_setattr(struct dentry *dentry, struct iattr *iattr) if (error) return error; } - setattr_copy(inode, iattr); + setattr_copy(&nop_mnt_idmap, inode, iattr); if (iattr->ia_valid & ATTR_MODE) - error = ext2_acl_chmod(inode); + error = posix_acl_chmod(&nop_mnt_idmap, dentry, inode->i_mode); mark_inode_dirty(inode); return error; diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c index 5d46c09863f0..c3fea55b8efa 100644 --- a/fs/ext2/ioctl.c +++ b/fs/ext2/ioctl.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * linux/fs/ext2/ioctl.c * @@ -14,85 +15,58 @@ #include <linux/compat.h> #include <linux/mount.h> #include <asm/current.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> +#include <linux/fileattr.h> - -long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +int ext2_fileattr_get(struct dentry *dentry, struct file_kattr *fa) { - struct inode *inode = file_inode(filp); - struct ext2_inode_info *ei = EXT2_I(inode); - unsigned int flags; - unsigned short rsv_window_size; - int ret; + struct ext2_inode_info *ei = EXT2_I(d_inode(dentry)); - ext2_debug ("cmd = %u, arg = %lu\n", cmd, arg); + fileattr_fill_flags(fa, ei->i_flags & EXT2_FL_USER_VISIBLE); - switch (cmd) { - case EXT2_IOC_GETFLAGS: - ext2_get_inode_flags(ei); - flags = ei->i_flags & EXT2_FL_USER_VISIBLE; - return put_user(flags, (int __user *) arg); - case EXT2_IOC_SETFLAGS: { - unsigned int oldflags; + return 0; +} - ret = mnt_want_write_file(filp); - if (ret) - return ret; +int ext2_fileattr_set(struct mnt_idmap *idmap, + struct dentry *dentry, struct file_kattr *fa) +{ + struct inode *inode = d_inode(dentry); + struct ext2_inode_info *ei = EXT2_I(inode); - if (!inode_owner_or_capable(inode)) { - ret = -EACCES; - goto setflags_out; - } + if (fileattr_has_fsx(fa)) + return -EOPNOTSUPP; - if (get_user(flags, (int __user *) arg)) { - ret = -EFAULT; - goto setflags_out; - } + /* Is it quota file? Do not allow user to mess with it */ + if (IS_NOQUOTA(inode)) + return -EPERM; - flags = ext2_mask_flags(inode->i_mode, flags); + ei->i_flags = (ei->i_flags & ~EXT2_FL_USER_MODIFIABLE) | + (fa->flags & EXT2_FL_USER_MODIFIABLE); - mutex_lock(&inode->i_mutex); - /* Is it quota file? Do not allow user to mess with it */ - if (IS_NOQUOTA(inode)) { - mutex_unlock(&inode->i_mutex); - ret = -EPERM; - goto setflags_out; - } - oldflags = ei->i_flags; + ext2_set_inode_flags(inode); + inode_set_ctime_current(inode); + mark_inode_dirty(inode); - /* - * The IMMUTABLE and APPEND_ONLY flags can only be changed by - * the relevant capability. - * - * This test looks nicer. Thanks to Pauline Middelink - */ - if ((flags ^ oldflags) & (EXT2_APPEND_FL | EXT2_IMMUTABLE_FL)) { - if (!capable(CAP_LINUX_IMMUTABLE)) { - mutex_unlock(&inode->i_mutex); - ret = -EPERM; - goto setflags_out; - } - } + return 0; +} - flags = flags & EXT2_FL_USER_MODIFIABLE; - flags |= oldflags & ~EXT2_FL_USER_MODIFIABLE; - ei->i_flags = flags; - ext2_set_inode_flags(inode); - inode->i_ctime = CURRENT_TIME_SEC; - mutex_unlock(&inode->i_mutex); +long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + struct ext2_inode_info *ei = EXT2_I(inode); + unsigned short rsv_window_size; + int ret; - mark_inode_dirty(inode); -setflags_out: - mnt_drop_write_file(filp); - return ret; - } + ext2_debug ("cmd = %u, arg = %lu\n", cmd, arg); + + switch (cmd) { case EXT2_IOC_GETVERSION: return put_user(inode->i_generation, (int __user *) arg); case EXT2_IOC_SETVERSION: { __u32 generation; - if (!inode_owner_or_capable(inode)) + if (!inode_owner_or_capable(&nop_mnt_idmap, inode)) return -EPERM; ret = mnt_want_write_file(filp); if (ret) @@ -102,10 +76,10 @@ setflags_out: goto setversion_out; } - mutex_lock(&inode->i_mutex); - inode->i_ctime = CURRENT_TIME_SEC; + inode_lock(inode); + inode_set_ctime_current(inode); inode->i_generation = generation; - mutex_unlock(&inode->i_mutex); + inode_unlock(inode); mark_inode_dirty(inode); setversion_out: @@ -125,7 +99,7 @@ setversion_out: if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode)) return -ENOTTY; - if (!inode_owner_or_capable(inode)) + if (!inode_owner_or_capable(&nop_mnt_idmap, inode)) return -EACCES; if (get_user(rsv_window_size, (int __user *)arg)) @@ -153,10 +127,13 @@ setversion_out: if (ei->i_block_alloc_info){ struct ext2_reserve_window_node *rsv = &ei->i_block_alloc_info->rsv_window_node; rsv->rsv_goal_size = rsv_window_size; + } else { + ret = -ENOMEM; } + mutex_unlock(&ei->truncate_mutex); mnt_drop_write_file(filp); - return 0; + return ret; } default: return -ENOTTY; @@ -168,12 +145,6 @@ long ext2_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { /* These are just misnamed, they actually get/put from/to user an int */ switch (cmd) { - case EXT2_IOC32_GETFLAGS: - cmd = EXT2_IOC_GETFLAGS; - break; - case EXT2_IOC32_SETFLAGS: - cmd = EXT2_IOC_SETFLAGS; - break; case EXT2_IOC32_GETVERSION: cmd = EXT2_IOC_GETVERSION; break; diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index 256dd5f4c1c4..bde617a66cec 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * linux/fs/ext2/namei.c * @@ -35,19 +36,16 @@ #include "ext2.h" #include "xattr.h" #include "acl.h" -#include "xip.h" static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode) { int err = ext2_add_link(dentry, inode); if (!err) { - unlock_new_inode(inode); - d_instantiate(dentry, inode); + d_instantiate_new(dentry, inode); return 0; } inode_dec_link_count(inode); - unlock_new_inode(inode); - iput(inode); + discard_new_inode(inode); return err; } @@ -59,13 +57,17 @@ static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, uns { struct inode * inode; ino_t ino; + int res; if (dentry->d_name.len > EXT2_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); - ino = ext2_inode_by_name(dir, &dentry->d_name); - inode = NULL; - if (ino) { + res = ext2_inode_by_name(dir, &dentry->d_name, &ino); + if (res) { + if (res != -ENOENT) + return ERR_PTR(res); + inode = NULL; + } else { inode = ext2_iget(dir->i_sb, ino); if (inode == ERR_PTR(-ESTALE)) { ext2_error(dir->i_sb, __func__, @@ -79,11 +81,14 @@ static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, uns struct dentry *ext2_get_parent(struct dentry *child) { - struct qstr dotdot = QSTR_INIT("..", 2); - unsigned long ino = ext2_inode_by_name(child->d_inode, &dotdot); - if (!ino) - return ERR_PTR(-ENOENT); - return d_obtain_alias(ext2_iget(child->d_inode->i_sb, ino)); + ino_t ino; + int res; + + res = ext2_inode_by_name(d_inode(child), &dotdot_name, &ino); + if (res) + return ERR_PTR(res); + + return d_obtain_alias(ext2_iget(child->d_sb, ino)); } /* @@ -94,79 +99,63 @@ struct dentry *ext2_get_parent(struct dentry *child) * If the create succeeds, we fill in the inode information * with d_instantiate(). */ -static int ext2_create (struct inode * dir, struct dentry * dentry, umode_t mode, bool excl) +static int ext2_create (struct mnt_idmap * idmap, + struct inode * dir, struct dentry * dentry, + umode_t mode, bool excl) { struct inode *inode; + int err; - dquot_initialize(dir); + err = dquot_initialize(dir); + if (err) + return err; inode = ext2_new_inode(dir, mode, &dentry->d_name); if (IS_ERR(inode)) return PTR_ERR(inode); - inode->i_op = &ext2_file_inode_operations; - if (ext2_use_xip(inode->i_sb)) { - inode->i_mapping->a_ops = &ext2_aops_xip; - inode->i_fop = &ext2_xip_file_operations; - } else if (test_opt(inode->i_sb, NOBH)) { - inode->i_mapping->a_ops = &ext2_nobh_aops; - inode->i_fop = &ext2_file_operations; - } else { - inode->i_mapping->a_ops = &ext2_aops; - inode->i_fop = &ext2_file_operations; - } + ext2_set_file_ops(inode); mark_inode_dirty(inode); return ext2_add_nondir(dentry, inode); } -static int ext2_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) +static int ext2_tmpfile(struct mnt_idmap *idmap, struct inode *dir, + struct file *file, umode_t mode) { struct inode *inode = ext2_new_inode(dir, mode, NULL); if (IS_ERR(inode)) return PTR_ERR(inode); - inode->i_op = &ext2_file_inode_operations; - if (ext2_use_xip(inode->i_sb)) { - inode->i_mapping->a_ops = &ext2_aops_xip; - inode->i_fop = &ext2_xip_file_operations; - } else if (test_opt(inode->i_sb, NOBH)) { - inode->i_mapping->a_ops = &ext2_nobh_aops; - inode->i_fop = &ext2_file_operations; - } else { - inode->i_mapping->a_ops = &ext2_aops; - inode->i_fop = &ext2_file_operations; - } + ext2_set_file_ops(inode); mark_inode_dirty(inode); - d_tmpfile(dentry, inode); + d_tmpfile(file, inode); unlock_new_inode(inode); - return 0; + return finish_open_simple(file, 0); } -static int ext2_mknod (struct inode * dir, struct dentry *dentry, umode_t mode, dev_t rdev) +static int ext2_mknod (struct mnt_idmap * idmap, struct inode * dir, + struct dentry *dentry, umode_t mode, dev_t rdev) { struct inode * inode; int err; - if (!new_valid_dev(rdev)) - return -EINVAL; - - dquot_initialize(dir); + err = dquot_initialize(dir); + if (err) + return err; inode = ext2_new_inode (dir, mode, &dentry->d_name); err = PTR_ERR(inode); if (!IS_ERR(inode)) { init_special_inode(inode, inode->i_mode, rdev); -#ifdef CONFIG_EXT2_FS_XATTR inode->i_op = &ext2_special_inode_operations; -#endif mark_inode_dirty(inode); err = ext2_add_nondir(dentry, inode); } return err; } -static int ext2_symlink (struct inode * dir, struct dentry * dentry, - const char * symname) +static int ext2_symlink (struct mnt_idmap * idmap, struct inode * dir, + struct dentry * dentry, const char * symname) { struct super_block * sb = dir->i_sb; int err = -ENAMETOOLONG; @@ -176,7 +165,9 @@ static int ext2_symlink (struct inode * dir, struct dentry * dentry, if (l > sb->s_blocksize) goto out; - dquot_initialize(dir); + err = dquot_initialize(dir); + if (err) + goto out; inode = ext2_new_inode (dir, S_IFLNK | S_IRWXUGO, &dentry->d_name); err = PTR_ERR(inode); @@ -186,17 +177,16 @@ static int ext2_symlink (struct inode * dir, struct dentry * dentry, if (l > sizeof (EXT2_I(inode)->i_data)) { /* slow symlink */ inode->i_op = &ext2_symlink_inode_operations; - if (test_opt(inode->i_sb, NOBH)) - inode->i_mapping->a_ops = &ext2_nobh_aops; - else - inode->i_mapping->a_ops = &ext2_aops; + inode_nohighmem(inode); + inode->i_mapping->a_ops = &ext2_aops; err = page_symlink(inode, symname, l); if (err) goto out_fail; } else { /* fast symlink */ inode->i_op = &ext2_fast_symlink_inode_operations; - memcpy((char*)(EXT2_I(inode)->i_data),symname,l); + inode->i_link = (char*)EXT2_I(inode)->i_data; + memcpy(inode->i_link, symname, l); inode->i_size = l-1; } mark_inode_dirty(inode); @@ -207,20 +197,21 @@ out: out_fail: inode_dec_link_count(inode); - unlock_new_inode(inode); - iput (inode); + discard_new_inode(inode); goto out; } static int ext2_link (struct dentry * old_dentry, struct inode * dir, struct dentry *dentry) { - struct inode *inode = old_dentry->d_inode; + struct inode *inode = d_inode(old_dentry); int err; - dquot_initialize(dir); + err = dquot_initialize(dir); + if (err) + return err; - inode->i_ctime = CURRENT_TIME_SEC; + inode_set_ctime_current(inode); inode_inc_link_count(inode); ihold(inode); @@ -234,12 +225,16 @@ static int ext2_link (struct dentry * old_dentry, struct inode * dir, return err; } -static int ext2_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) +static struct dentry *ext2_mkdir(struct mnt_idmap * idmap, + struct inode * dir, struct dentry * dentry, + umode_t mode) { struct inode * inode; int err; - dquot_initialize(dir); + err = dquot_initialize(dir); + if (err) + return ERR_PTR(err); inode_inc_link_count(dir); @@ -250,10 +245,7 @@ static int ext2_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) inode->i_op = &ext2_dir_inode_operations; inode->i_fop = &ext2_dir_operations; - if (test_opt(inode->i_sb, NOBH)) - inode->i_mapping->a_ops = &ext2_nobh_aops; - else - inode->i_mapping->a_ops = &ext2_aops; + inode->i_mapping->a_ops = &ext2_aops; inode_inc_link_count(inode); @@ -265,39 +257,42 @@ static int ext2_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) if (err) goto out_fail; - unlock_new_inode(inode); - d_instantiate(dentry, inode); + d_instantiate_new(dentry, inode); out: - return err; + return ERR_PTR(err); out_fail: inode_dec_link_count(inode); inode_dec_link_count(inode); - unlock_new_inode(inode); - iput(inode); + discard_new_inode(inode); out_dir: inode_dec_link_count(dir); goto out; } -static int ext2_unlink(struct inode * dir, struct dentry *dentry) +static int ext2_unlink(struct inode *dir, struct dentry *dentry) { - struct inode * inode = dentry->d_inode; - struct ext2_dir_entry_2 * de; - struct page * page; - int err = -ENOENT; + struct inode *inode = d_inode(dentry); + struct ext2_dir_entry_2 *de; + struct folio *folio; + int err; - dquot_initialize(dir); + err = dquot_initialize(dir); + if (err) + goto out; - de = ext2_find_entry (dir, &dentry->d_name, &page); - if (!de) + de = ext2_find_entry(dir, &dentry->d_name, &folio); + if (IS_ERR(de)) { + err = PTR_ERR(de); goto out; + } - err = ext2_delete_entry (de, page); + err = ext2_delete_entry(de, folio); + folio_release_kmap(folio, de); if (err) goto out; - inode->i_ctime = dir->i_ctime; + inode_set_ctime_to_ts(inode, inode_get_ctime(dir)); inode_dec_link_count(inode); err = 0; out: @@ -306,7 +301,7 @@ out: static int ext2_rmdir (struct inode * dir, struct dentry *dentry) { - struct inode * inode = dentry->d_inode; + struct inode * inode = d_inode(dentry); int err = -ENOTEMPTY; if (ext2_empty_dir(inode)) { @@ -320,53 +315,69 @@ static int ext2_rmdir (struct inode * dir, struct dentry *dentry) return err; } -static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry, - struct inode * new_dir, struct dentry * new_dentry ) +static int ext2_rename (struct mnt_idmap * idmap, + struct inode * old_dir, struct dentry * old_dentry, + struct inode * new_dir, struct dentry * new_dentry, + unsigned int flags) { - struct inode * old_inode = old_dentry->d_inode; - struct inode * new_inode = new_dentry->d_inode; - struct page * dir_page = NULL; + struct inode * old_inode = d_inode(old_dentry); + struct inode * new_inode = d_inode(new_dentry); + struct folio *dir_folio = NULL; struct ext2_dir_entry_2 * dir_de = NULL; - struct page * old_page; + struct folio * old_folio; struct ext2_dir_entry_2 * old_de; - int err = -ENOENT; + bool old_is_dir = S_ISDIR(old_inode->i_mode); + int err; - dquot_initialize(old_dir); - dquot_initialize(new_dir); + if (flags & ~RENAME_NOREPLACE) + return -EINVAL; - old_de = ext2_find_entry (old_dir, &old_dentry->d_name, &old_page); - if (!old_de) - goto out; + err = dquot_initialize(old_dir); + if (err) + return err; - if (S_ISDIR(old_inode->i_mode)) { + err = dquot_initialize(new_dir); + if (err) + return err; + + old_de = ext2_find_entry(old_dir, &old_dentry->d_name, &old_folio); + if (IS_ERR(old_de)) + return PTR_ERR(old_de); + + if (old_is_dir && old_dir != new_dir) { err = -EIO; - dir_de = ext2_dotdot(old_inode, &dir_page); + dir_de = ext2_dotdot(old_inode, &dir_folio); if (!dir_de) goto out_old; } if (new_inode) { - struct page *new_page; + struct folio *new_folio; struct ext2_dir_entry_2 *new_de; err = -ENOTEMPTY; - if (dir_de && !ext2_empty_dir (new_inode)) + if (old_is_dir && !ext2_empty_dir(new_inode)) goto out_dir; - err = -ENOENT; - new_de = ext2_find_entry (new_dir, &new_dentry->d_name, &new_page); - if (!new_de) + new_de = ext2_find_entry(new_dir, &new_dentry->d_name, + &new_folio); + if (IS_ERR(new_de)) { + err = PTR_ERR(new_de); + goto out_dir; + } + err = ext2_set_link(new_dir, new_de, new_folio, old_inode, true); + folio_release_kmap(new_folio, new_de); + if (err) goto out_dir; - ext2_set_link(new_dir, new_de, new_page, old_inode, 1); - new_inode->i_ctime = CURRENT_TIME_SEC; - if (dir_de) + inode_set_ctime_current(new_inode); + if (old_is_dir) drop_nlink(new_inode); inode_dec_link_count(new_inode); } else { err = ext2_add_link(new_dentry, old_inode); if (err) goto out_dir; - if (dir_de) + if (old_is_dir) inode_inc_link_count(new_dir); } @@ -374,32 +385,22 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry, * Like most other Unix systems, set the ctime for inodes on a * rename. */ - old_inode->i_ctime = CURRENT_TIME_SEC; + inode_set_ctime_current(old_inode); mark_inode_dirty(old_inode); - ext2_delete_entry (old_de, old_page); - - if (dir_de) { + err = ext2_delete_entry(old_de, old_folio); + if (!err && old_is_dir) { if (old_dir != new_dir) - ext2_set_link(old_inode, dir_de, dir_page, new_dir, 0); - else { - kunmap(dir_page); - page_cache_release(dir_page); - } + err = ext2_set_link(old_inode, dir_de, dir_folio, + new_dir, false); + inode_dec_link_count(old_dir); } - return 0; - - out_dir: - if (dir_de) { - kunmap(dir_page); - page_cache_release(dir_page); - } + if (dir_de) + folio_release_kmap(dir_folio, dir_de); out_old: - kunmap(old_page); - page_cache_release(old_page); -out: + folio_release_kmap(old_folio, old_de); return err; } @@ -413,24 +414,20 @@ const struct inode_operations ext2_dir_inode_operations = { .rmdir = ext2_rmdir, .mknod = ext2_mknod, .rename = ext2_rename, -#ifdef CONFIG_EXT2_FS_XATTR - .setxattr = generic_setxattr, - .getxattr = generic_getxattr, .listxattr = ext2_listxattr, - .removexattr = generic_removexattr, -#endif + .getattr = ext2_getattr, .setattr = ext2_setattr, - .get_acl = ext2_get_acl, + .get_inode_acl = ext2_get_acl, + .set_acl = ext2_set_acl, .tmpfile = ext2_tmpfile, + .fileattr_get = ext2_fileattr_get, + .fileattr_set = ext2_fileattr_set, }; const struct inode_operations ext2_special_inode_operations = { -#ifdef CONFIG_EXT2_FS_XATTR - .setxattr = generic_setxattr, - .getxattr = generic_getxattr, .listxattr = ext2_listxattr, - .removexattr = generic_removexattr, -#endif + .getattr = ext2_getattr, .setattr = ext2_setattr, - .get_acl = ext2_get_acl, + .get_inode_acl = ext2_get_acl, + .set_acl = ext2_set_acl, }; diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 288534920fe5..121e634c792a 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/ext2/super.c * @@ -22,7 +23,8 @@ #include <linux/slab.h> #include <linux/init.h> #include <linux/blkdev.h> -#include <linux/parser.h> +#include <linux/fs_context.h> +#include <linux/fs_parser.h> #include <linux/random.h> #include <linux/buffer_head.h> #include <linux/exportfs.h> @@ -31,15 +33,14 @@ #include <linux/mount.h> #include <linux/log2.h> #include <linux/quotaops.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> +#include <linux/dax.h> +#include <linux/iversion.h> #include "ext2.h" #include "xattr.h" #include "acl.h" -#include "xip.h" -static void ext2_sync_super(struct super_block *sb, - struct ext2_super_block *es, int wait); -static int ext2_remount (struct super_block * sb, int * flags, char * data); +static void ext2_write_super(struct super_block *sb); static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf); static int ext2_sync_fs(struct super_block *sb, int wait); static int ext2_freeze(struct super_block *sb); @@ -53,7 +54,7 @@ void ext2_error(struct super_block *sb, const char *function, struct ext2_sb_info *sbi = EXT2_SB(sb); struct ext2_super_block *es = sbi->s_es; - if (!(sb->s_flags & MS_RDONLY)) { + if (!sb_rdonly(sb)) { spin_lock(&sbi->s_lock); sbi->s_mount_state |= EXT2_ERROR_FS; es->s_state |= cpu_to_le16(EXT2_ERROR_FS); @@ -73,13 +74,40 @@ void ext2_error(struct super_block *sb, const char *function, if (test_opt(sb, ERRORS_PANIC)) panic("EXT2-fs: panic from previous error\n"); - if (test_opt(sb, ERRORS_RO)) { + if (!sb_rdonly(sb) && test_opt(sb, ERRORS_RO)) { ext2_msg(sb, KERN_CRIT, "error: remounting filesystem read-only"); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; } } +static void ext2_msg_fc(struct fs_context *fc, const char *prefix, + const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + const char *s_id; + + if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) { + s_id = fc->root->d_sb->s_id; + } else { + /* get last path component of source */ + s_id = strrchr(fc->source, '/'); + if (s_id) + s_id++; + else + s_id = fc->source; + } + va_start(args, fmt); + + vaf.fmt = fmt; + vaf.va = &args; + + printk("%sEXT2-fs (%s): %pV\n", prefix, s_id, &vaf); + + va_end(args); +} + void ext2_msg(struct super_block *sb, const char *prefix, const char *fmt, ...) { @@ -124,16 +152,34 @@ void ext2_update_dynamic_rev(struct super_block *sb) */ } +#ifdef CONFIG_QUOTA +static int ext2_quota_off(struct super_block *sb, int type); + +static void ext2_quota_off_umount(struct super_block *sb) +{ + int type; + + for (type = 0; type < MAXQUOTAS; type++) + ext2_quota_off(sb, type); +} +#else +static inline void ext2_quota_off_umount(struct super_block *sb) +{ +} +#endif + static void ext2_put_super (struct super_block * sb) { int db_count; int i; struct ext2_sb_info *sbi = EXT2_SB(sb); - dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); + ext2_quota_off_umount(sb); - ext2_xattr_put_super(sb); - if (!(sb->s_flags & MS_RDONLY)) { + ext2_xattr_destroy_cache(sbi->s_ea_block_cache); + sbi->s_ea_block_cache = NULL; + + if (!sb_rdonly(sb)) { struct ext2_super_block *es = sbi->s_es; spin_lock(&sbi->s_lock); @@ -143,9 +189,8 @@ static void ext2_put_super (struct super_block * sb) } db_count = sbi->s_gdb_count; for (i = 0; i < db_count; i++) - if (sbi->s_group_desc[i]) - brelse (sbi->s_group_desc[i]); - kfree(sbi->s_group_desc); + brelse(sbi->s_group_desc[i]); + kvfree(sbi->s_group_desc); kfree(sbi->s_debts); percpu_counter_destroy(&sbi->s_freeblocks_counter); percpu_counter_destroy(&sbi->s_freeinodes_counter); @@ -153,6 +198,7 @@ static void ext2_put_super (struct super_block * sb) brelse (sbi->s_sbh); sb->s_fs_info = NULL; kfree(sbi->s_blockgroup_lock); + fs_put_dax(sbi->s_daxdev, NULL); kfree(sbi); } @@ -161,25 +207,23 @@ static struct kmem_cache * ext2_inode_cachep; static struct inode *ext2_alloc_inode(struct super_block *sb) { struct ext2_inode_info *ei; - ei = (struct ext2_inode_info *)kmem_cache_alloc(ext2_inode_cachep, GFP_KERNEL); + ei = alloc_inode_sb(sb, ext2_inode_cachep, GFP_KERNEL); if (!ei) return NULL; ei->i_block_alloc_info = NULL; - ei->vfs_inode.i_version = 1; + inode_set_iversion(&ei->vfs_inode, 1); +#ifdef CONFIG_QUOTA + memset(&ei->i_dquot, 0, sizeof(ei->i_dquot)); +#endif + return &ei->vfs_inode; } -static void ext2_i_callback(struct rcu_head *head) +static void ext2_free_in_core_inode(struct inode *inode) { - struct inode *inode = container_of(head, struct inode, i_rcu); kmem_cache_free(ext2_inode_cachep, EXT2_I(inode)); } -static void ext2_destroy_inode(struct inode *inode) -{ - call_rcu(&inode->i_rcu, ext2_i_callback); -} - static void init_once(void *foo) { struct ext2_inode_info *ei = (struct ext2_inode_info *) foo; @@ -192,13 +236,14 @@ static void init_once(void *foo) inode_init_once(&ei->vfs_inode); } -static int init_inodecache(void) +static int __init init_inodecache(void) { - ext2_inode_cachep = kmem_cache_create("ext2_inode_cache", - sizeof(struct ext2_inode_info), - 0, (SLAB_RECLAIM_ACCOUNT| - SLAB_MEM_SPREAD), - init_once); + ext2_inode_cachep = kmem_cache_create_usercopy("ext2_inode_cache", + sizeof(struct ext2_inode_info), 0, + SLAB_RECLAIM_ACCOUNT | SLAB_ACCOUNT, + offsetof(struct ext2_inode_info, i_data), + sizeof_field(struct ext2_inode_info, i_data), + init_once); if (ext2_inode_cachep == NULL) return -ENOMEM; return 0; @@ -277,21 +322,17 @@ static int ext2_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",noacl"); #endif - if (test_opt(sb, NOBH)) - seq_puts(seq, ",nobh"); - -#if defined(CONFIG_QUOTA) - if (sbi->s_mount_opt & EXT2_MOUNT_USRQUOTA) + if (test_opt(sb, USRQUOTA)) seq_puts(seq, ",usrquota"); - if (sbi->s_mount_opt & EXT2_MOUNT_GRPQUOTA) + if (test_opt(sb, GRPQUOTA)) seq_puts(seq, ",grpquota"); -#endif -#if defined(CONFIG_EXT2_FS_XIP) - if (sbi->s_mount_opt & EXT2_MOUNT_XIP) + if (test_opt(sb, XIP)) seq_puts(seq, ",xip"); -#endif + + if (test_opt(sb, DAX)) + seq_puts(seq, ",dax"); if (!test_opt(sb, RESERVATION)) seq_puts(seq, ",noreservation"); @@ -303,11 +344,28 @@ static int ext2_show_options(struct seq_file *seq, struct dentry *root) #ifdef CONFIG_QUOTA static ssize_t ext2_quota_read(struct super_block *sb, int type, char *data, size_t len, loff_t off); static ssize_t ext2_quota_write(struct super_block *sb, int type, const char *data, size_t len, loff_t off); +static int ext2_quota_on(struct super_block *sb, int type, int format_id, + const struct path *path); +static struct dquot __rcu **ext2_get_dquots(struct inode *inode) +{ + return EXT2_I(inode)->i_dquot; +} + +static const struct quotactl_ops ext2_quotactl_ops = { + .quota_on = ext2_quota_on, + .quota_off = ext2_quota_off, + .quota_sync = dquot_quota_sync, + .get_state = dquot_get_state, + .set_info = dquot_set_dqinfo, + .get_dqblk = dquot_get_dqblk, + .set_dqblk = dquot_set_dqblk, + .get_nextdqblk = dquot_get_next_dqblk, +}; #endif static const struct super_operations ext2_sops = { .alloc_inode = ext2_alloc_inode, - .destroy_inode = ext2_destroy_inode, + .free_inode = ext2_free_in_core_inode, .write_inode = ext2_write_inode, .evict_inode = ext2_evict_inode, .put_super = ext2_put_super, @@ -315,11 +373,11 @@ static const struct super_operations ext2_sops = { .freeze_fs = ext2_freeze, .unfreeze_fs = ext2_unfreeze, .statfs = ext2_statfs, - .remount_fs = ext2_remount, .show_options = ext2_show_options, #ifdef CONFIG_QUOTA .quota_read = ext2_quota_read, .quota_write = ext2_quota_write, + .get_dquots = ext2_get_dquots, #endif }; @@ -364,232 +422,224 @@ static struct dentry *ext2_fh_to_parent(struct super_block *sb, struct fid *fid, } static const struct export_operations ext2_export_ops = { + .encode_fh = generic_encode_ino32_fh, .fh_to_dentry = ext2_fh_to_dentry, .fh_to_parent = ext2_fh_to_parent, .get_parent = ext2_get_parent, }; -static unsigned long get_sb_block(void **data) -{ - unsigned long sb_block; - char *options = (char *) *data; - - if (!options || strncmp(options, "sb=", 3) != 0) - return 1; /* Default location */ - options += 3; - sb_block = simple_strtoul(options, &options, 0); - if (*options && *options != ',') { - printk("EXT2-fs: Invalid sb specification: %s\n", - (char *) *data); - return 1; - } - if (*options == ',') - options++; - *data = (void *) options; - return sb_block; -} - enum { - Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, - Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, - Opt_err_ro, Opt_nouid32, Opt_nocheck, Opt_debug, - Opt_oldalloc, Opt_orlov, Opt_nobh, Opt_user_xattr, Opt_nouser_xattr, - Opt_acl, Opt_noacl, Opt_xip, Opt_ignore, Opt_err, Opt_quota, - Opt_usrquota, Opt_grpquota, Opt_reservation, Opt_noreservation + Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, Opt_resgid, Opt_resuid, + Opt_sb, Opt_errors, Opt_nouid32, Opt_debug, Opt_oldalloc, Opt_orlov, + Opt_nobh, Opt_user_xattr, Opt_acl, Opt_xip, Opt_dax, Opt_ignore, + Opt_quota, Opt_usrquota, Opt_grpquota, Opt_reservation, +}; + +static const struct constant_table ext2_param_errors[] = { + {"continue", EXT2_MOUNT_ERRORS_CONT}, + {"panic", EXT2_MOUNT_ERRORS_PANIC}, + {"remount-ro", EXT2_MOUNT_ERRORS_RO}, + {} +}; + +static const struct fs_parameter_spec ext2_param_spec[] = { + fsparam_flag ("bsddf", Opt_bsd_df), + fsparam_flag ("minixdf", Opt_minix_df), + fsparam_flag ("grpid", Opt_grpid), + fsparam_flag ("bsdgroups", Opt_grpid), + fsparam_flag ("nogrpid", Opt_nogrpid), + fsparam_flag ("sysvgroups", Opt_nogrpid), + fsparam_gid ("resgid", Opt_resgid), + fsparam_uid ("resuid", Opt_resuid), + fsparam_u32 ("sb", Opt_sb), + fsparam_enum ("errors", Opt_errors, ext2_param_errors), + fsparam_flag ("nouid32", Opt_nouid32), + fsparam_flag ("debug", Opt_debug), + fsparam_flag ("oldalloc", Opt_oldalloc), + fsparam_flag ("orlov", Opt_orlov), + fsparam_flag ("nobh", Opt_nobh), + fsparam_flag_no ("user_xattr", Opt_user_xattr), + fsparam_flag_no ("acl", Opt_acl), + fsparam_flag ("xip", Opt_xip), + fsparam_flag ("dax", Opt_dax), + fsparam_flag ("grpquota", Opt_grpquota), + fsparam_flag ("noquota", Opt_ignore), + fsparam_flag ("quota", Opt_quota), + fsparam_flag ("usrquota", Opt_usrquota), + fsparam_flag_no ("reservation", Opt_reservation), + {} }; -static const match_table_t tokens = { - {Opt_bsd_df, "bsddf"}, - {Opt_minix_df, "minixdf"}, - {Opt_grpid, "grpid"}, - {Opt_grpid, "bsdgroups"}, - {Opt_nogrpid, "nogrpid"}, - {Opt_nogrpid, "sysvgroups"}, - {Opt_resgid, "resgid=%u"}, - {Opt_resuid, "resuid=%u"}, - {Opt_sb, "sb=%u"}, - {Opt_err_cont, "errors=continue"}, - {Opt_err_panic, "errors=panic"}, - {Opt_err_ro, "errors=remount-ro"}, - {Opt_nouid32, "nouid32"}, - {Opt_nocheck, "check=none"}, - {Opt_nocheck, "nocheck"}, - {Opt_debug, "debug"}, - {Opt_oldalloc, "oldalloc"}, - {Opt_orlov, "orlov"}, - {Opt_nobh, "nobh"}, - {Opt_user_xattr, "user_xattr"}, - {Opt_nouser_xattr, "nouser_xattr"}, - {Opt_acl, "acl"}, - {Opt_noacl, "noacl"}, - {Opt_xip, "xip"}, - {Opt_grpquota, "grpquota"}, - {Opt_ignore, "noquota"}, - {Opt_quota, "quota"}, - {Opt_usrquota, "usrquota"}, - {Opt_reservation, "reservation"}, - {Opt_noreservation, "noreservation"}, - {Opt_err, NULL} +#define EXT2_SPEC_s_resuid (1 << 0) +#define EXT2_SPEC_s_resgid (1 << 1) + +struct ext2_fs_context { + unsigned long vals_s_flags; /* Bits to set in s_flags */ + unsigned long mask_s_flags; /* Bits changed in s_flags */ + unsigned int vals_s_mount_opt; + unsigned int mask_s_mount_opt; + kuid_t s_resuid; + kgid_t s_resgid; + unsigned long s_sb_block; + unsigned int spec; + }; -static int parse_options(char *options, struct super_block *sb) +static inline void ctx_set_mount_opt(struct ext2_fs_context *ctx, + unsigned long flag) { - char *p; - struct ext2_sb_info *sbi = EXT2_SB(sb); - substring_t args[MAX_OPT_ARGS]; - int option; - kuid_t uid; - kgid_t gid; - - if (!options) - return 1; - - while ((p = strsep (&options, ",")) != NULL) { - int token; - if (!*p) - continue; - - token = match_token(p, tokens, args); - switch (token) { - case Opt_bsd_df: - clear_opt (sbi->s_mount_opt, MINIX_DF); - break; - case Opt_minix_df: - set_opt (sbi->s_mount_opt, MINIX_DF); - break; - case Opt_grpid: - set_opt (sbi->s_mount_opt, GRPID); - break; - case Opt_nogrpid: - clear_opt (sbi->s_mount_opt, GRPID); - break; - case Opt_resuid: - if (match_int(&args[0], &option)) - return 0; - uid = make_kuid(current_user_ns(), option); - if (!uid_valid(uid)) { - ext2_msg(sb, KERN_ERR, "Invalid uid value %d", option); - return 0; - - } - sbi->s_resuid = uid; - break; - case Opt_resgid: - if (match_int(&args[0], &option)) - return 0; - gid = make_kgid(current_user_ns(), option); - if (!gid_valid(gid)) { - ext2_msg(sb, KERN_ERR, "Invalid gid value %d", option); - return 0; - } - sbi->s_resgid = gid; - break; - case Opt_sb: - /* handled by get_sb_block() instead of here */ - /* *sb_block = match_int(&args[0]); */ - break; - case Opt_err_panic: - clear_opt (sbi->s_mount_opt, ERRORS_CONT); - clear_opt (sbi->s_mount_opt, ERRORS_RO); - set_opt (sbi->s_mount_opt, ERRORS_PANIC); - break; - case Opt_err_ro: - clear_opt (sbi->s_mount_opt, ERRORS_CONT); - clear_opt (sbi->s_mount_opt, ERRORS_PANIC); - set_opt (sbi->s_mount_opt, ERRORS_RO); - break; - case Opt_err_cont: - clear_opt (sbi->s_mount_opt, ERRORS_RO); - clear_opt (sbi->s_mount_opt, ERRORS_PANIC); - set_opt (sbi->s_mount_opt, ERRORS_CONT); - break; - case Opt_nouid32: - set_opt (sbi->s_mount_opt, NO_UID32); - break; - case Opt_nocheck: - clear_opt (sbi->s_mount_opt, CHECK); - break; - case Opt_debug: - set_opt (sbi->s_mount_opt, DEBUG); - break; - case Opt_oldalloc: - set_opt (sbi->s_mount_opt, OLDALLOC); - break; - case Opt_orlov: - clear_opt (sbi->s_mount_opt, OLDALLOC); - break; - case Opt_nobh: - set_opt (sbi->s_mount_opt, NOBH); - break; + ctx->mask_s_mount_opt |= flag; + ctx->vals_s_mount_opt |= flag; +} + +static inline void ctx_clear_mount_opt(struct ext2_fs_context *ctx, + unsigned long flag) +{ + ctx->mask_s_mount_opt |= flag; + ctx->vals_s_mount_opt &= ~flag; +} + +static inline unsigned long +ctx_test_mount_opt(struct ext2_fs_context *ctx, unsigned long flag) +{ + return (ctx->vals_s_mount_opt & flag); +} + +static inline bool +ctx_parsed_mount_opt(struct ext2_fs_context *ctx, unsigned long flag) +{ + return (ctx->mask_s_mount_opt & flag); +} + +static void ext2_free_fc(struct fs_context *fc) +{ + kfree(fc->fs_private); +} + +static int ext2_parse_param(struct fs_context *fc, struct fs_parameter *param) +{ + struct ext2_fs_context *ctx = fc->fs_private; + int opt; + struct fs_parse_result result; + + opt = fs_parse(fc, ext2_param_spec, param, &result); + if (opt < 0) + return opt; + + switch (opt) { + case Opt_bsd_df: + ctx_clear_mount_opt(ctx, EXT2_MOUNT_MINIX_DF); + break; + case Opt_minix_df: + ctx_set_mount_opt(ctx, EXT2_MOUNT_MINIX_DF); + break; + case Opt_grpid: + ctx_set_mount_opt(ctx, EXT2_MOUNT_GRPID); + break; + case Opt_nogrpid: + ctx_clear_mount_opt(ctx, EXT2_MOUNT_GRPID); + break; + case Opt_resuid: + ctx->s_resuid = result.uid; + ctx->spec |= EXT2_SPEC_s_resuid; + break; + case Opt_resgid: + ctx->s_resgid = result.gid; + ctx->spec |= EXT2_SPEC_s_resgid; + break; + case Opt_sb: + /* Note that this is silently ignored on remount */ + ctx->s_sb_block = result.uint_32; + break; + case Opt_errors: + ctx_clear_mount_opt(ctx, EXT2_MOUNT_ERRORS_MASK); + ctx_set_mount_opt(ctx, result.uint_32); + break; + case Opt_nouid32: + ctx_set_mount_opt(ctx, EXT2_MOUNT_NO_UID32); + break; + case Opt_debug: + ctx_set_mount_opt(ctx, EXT2_MOUNT_DEBUG); + break; + case Opt_oldalloc: + ctx_set_mount_opt(ctx, EXT2_MOUNT_OLDALLOC); + break; + case Opt_orlov: + ctx_clear_mount_opt(ctx, EXT2_MOUNT_OLDALLOC); + break; + case Opt_nobh: + ext2_msg_fc(fc, KERN_INFO, "nobh option not supported\n"); + break; #ifdef CONFIG_EXT2_FS_XATTR - case Opt_user_xattr: - set_opt (sbi->s_mount_opt, XATTR_USER); - break; - case Opt_nouser_xattr: - clear_opt (sbi->s_mount_opt, XATTR_USER); - break; + case Opt_user_xattr: + if (!result.negated) + ctx_set_mount_opt(ctx, EXT2_MOUNT_XATTR_USER); + else + ctx_clear_mount_opt(ctx, EXT2_MOUNT_XATTR_USER); + break; #else - case Opt_user_xattr: - case Opt_nouser_xattr: - ext2_msg(sb, KERN_INFO, "(no)user_xattr options" - "not supported"); - break; + case Opt_user_xattr: + ext2_msg_fc(fc, KERN_INFO, "(no)user_xattr options not supported"); + break; #endif #ifdef CONFIG_EXT2_FS_POSIX_ACL - case Opt_acl: - set_opt(sbi->s_mount_opt, POSIX_ACL); - break; - case Opt_noacl: - clear_opt(sbi->s_mount_opt, POSIX_ACL); - break; + case Opt_acl: + if (!result.negated) + ctx_set_mount_opt(ctx, EXT2_MOUNT_POSIX_ACL); + else + ctx_clear_mount_opt(ctx, EXT2_MOUNT_POSIX_ACL); + break; #else - case Opt_acl: - case Opt_noacl: - ext2_msg(sb, KERN_INFO, - "(no)acl options not supported"); - break; + case Opt_acl: + ext2_msg_fc(fc, KERN_INFO, "(no)acl options not supported"); + break; #endif - case Opt_xip: -#ifdef CONFIG_EXT2_FS_XIP - set_opt (sbi->s_mount_opt, XIP); + case Opt_xip: + ext2_msg_fc(fc, KERN_INFO, "use dax instead of xip"); + ctx_set_mount_opt(ctx, EXT2_MOUNT_XIP); + fallthrough; + case Opt_dax: +#ifdef CONFIG_FS_DAX + ext2_msg_fc(fc, KERN_WARNING, + "DAX enabled. Warning: DAX support in ext2 driver is deprecated" + " and will be removed at the end of 2025. Please use ext4 driver instead."); + ctx_set_mount_opt(ctx, EXT2_MOUNT_DAX); #else - ext2_msg(sb, KERN_INFO, "xip option not supported"); + ext2_msg_fc(fc, KERN_INFO, "dax option not supported"); #endif - break; + break; #if defined(CONFIG_QUOTA) - case Opt_quota: - case Opt_usrquota: - set_opt(sbi->s_mount_opt, USRQUOTA); - break; - - case Opt_grpquota: - set_opt(sbi->s_mount_opt, GRPQUOTA); - break; + case Opt_quota: + case Opt_usrquota: + ctx_set_mount_opt(ctx, EXT2_MOUNT_USRQUOTA); + break; + + case Opt_grpquota: + ctx_set_mount_opt(ctx, EXT2_MOUNT_GRPQUOTA); + break; #else - case Opt_quota: - case Opt_usrquota: - case Opt_grpquota: - ext2_msg(sb, KERN_INFO, - "quota operations not supported"); - break; + case Opt_quota: + case Opt_usrquota: + case Opt_grpquota: + ext2_msg_fc(fc, KERN_INFO, "quota operations not supported"); + break; #endif - - case Opt_reservation: - set_opt(sbi->s_mount_opt, RESERVATION); - ext2_msg(sb, KERN_INFO, "reservations ON"); - break; - case Opt_noreservation: - clear_opt(sbi->s_mount_opt, RESERVATION); - ext2_msg(sb, KERN_INFO, "reservations OFF"); - break; - case Opt_ignore: - break; - default: - return 0; + case Opt_reservation: + if (!result.negated) { + ctx_set_mount_opt(ctx, EXT2_MOUNT_RESERVATION); + ext2_msg_fc(fc, KERN_INFO, "reservations ON"); + } else { + ctx_clear_mount_opt(ctx, EXT2_MOUNT_RESERVATION); + ext2_msg_fc(fc, KERN_INFO, "reservations OFF"); } + break; + case Opt_ignore: + break; + default: + return -EINVAL; } - return 1; + return 0; } static int ext2_setup_super (struct super_block * sb, @@ -603,7 +653,7 @@ static int ext2_setup_super (struct super_block * sb, ext2_msg(sb, KERN_ERR, "error: revision level too high, " "forcing read-only mode"); - res = MS_RDONLY; + res = SB_RDONLY; } if (read_only) return res; @@ -623,7 +673,8 @@ static int ext2_setup_super (struct super_block * sb, "running e2fsck is recommended"); else if (le32_to_cpu(es->s_checkinterval) && (le32_to_cpu(es->s_lastcheck) + - le32_to_cpu(es->s_checkinterval) <= get_seconds())) + le32_to_cpu(es->s_checkinterval) <= + ktime_get_real_seconds())) ext2_msg(sb, KERN_WARNING, "warning: checktime reached, " "running e2fsck is recommended"); @@ -631,10 +682,9 @@ static int ext2_setup_super (struct super_block * sb, es->s_max_mnt_count = cpu_to_le16(EXT2_DFL_MAX_MNT_COUNT); le16_add_cpu(&es->s_mnt_count, 1); if (test_opt (sb, DEBUG)) - ext2_msg(sb, KERN_INFO, "%s, %s, bs=%lu, fs=%lu, gc=%lu, " + ext2_msg(sb, KERN_INFO, "%s, %s, bs=%lu, gc=%lu, " "bpg=%lu, ipg=%lu, mo=%04lx]", EXT2FS_VERSION, EXT2FS_DATE, sb->s_blocksize, - sbi->s_frag_size, sbi->s_groups_count, EXT2_BLOCKS_PER_GROUP(sb), EXT2_INODES_PER_GROUP(sb), @@ -652,13 +702,7 @@ static int ext2_check_descriptors(struct super_block *sb) for (i = 0; i < sbi->s_groups_count; i++) { struct ext2_group_desc *gdp = ext2_get_group_desc(sb, i, NULL); ext2_fsblk_t first_block = ext2_group_first_block_no(sb, i); - ext2_fsblk_t last_block; - - if (i == sbi->s_groups_count - 1) - last_block = le32_to_cpu(sbi->s_es->s_blocks_count) - 1; - else - last_block = first_block + - (EXT2_BLOCKS_PER_GROUP(sb) - 1); + ext2_fsblk_t last_block = ext2_group_last_block_no(sb, i); if (le32_to_cpu(gdp->bg_block_bitmap) < first_block || le32_to_cpu(gdp->bg_block_bitmap) > last_block) @@ -701,7 +745,8 @@ static loff_t ext2_max_size(int bits) { loff_t res = EXT2_NDIR_BLOCKS; int meta_blocks; - loff_t upper_limit; + unsigned int upper_limit; + unsigned int ppb = 1 << (bits-2); /* This is calculated to be the largest file size for a * dense, file such that the total number of @@ -715,24 +760,38 @@ static loff_t ext2_max_size(int bits) /* total blocks in file system block size */ upper_limit >>= (bits - 9); - - /* indirect blocks */ - meta_blocks = 1; - /* double indirect blocks */ - meta_blocks += 1 + (1LL << (bits-2)); - /* tripple indirect blocks */ - meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2))); - - upper_limit -= meta_blocks; - upper_limit <<= bits; - + /* Compute how many blocks we can address by block tree */ res += 1LL << (bits-2); res += 1LL << (2*(bits-2)); res += 1LL << (3*(bits-2)); + /* Compute how many metadata blocks are needed */ + meta_blocks = 1; + meta_blocks += 1 + ppb; + meta_blocks += 1 + ppb + ppb * ppb; + /* Does block tree limit file size? */ + if (res + meta_blocks <= upper_limit) + goto check_lfs; + + res = upper_limit; + /* How many metadata blocks are needed for addressing upper_limit? */ + upper_limit -= EXT2_NDIR_BLOCKS; + /* indirect blocks */ + meta_blocks = 1; + upper_limit -= ppb; + /* double indirect blocks */ + if (upper_limit < ppb * ppb) { + meta_blocks += 1 + DIV_ROUND_UP(upper_limit, ppb); + res -= meta_blocks; + goto check_lfs; + } + meta_blocks += 1 + ppb; + upper_limit -= ppb * ppb; + /* tripple indirect blocks for the rest */ + meta_blocks += 1 + DIV_ROUND_UP(upper_limit, ppb) + + DIV_ROUND_UP(upper_limit, ppb*ppb); + res -= meta_blocks; +check_lfs: res <<= bits; - if (res > upper_limit) - res = upper_limit; - if (res > MAX_LFS_FILESIZE) res = MAX_LFS_FILESIZE; @@ -745,7 +804,6 @@ static unsigned long descriptor_loc(struct super_block *sb, { struct ext2_sb_info *sbi = EXT2_SB(sb); unsigned long bg, first_meta_bg; - int has_super = 0; first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg); @@ -753,45 +811,105 @@ static unsigned long descriptor_loc(struct super_block *sb, nr < first_meta_bg) return (logic_sb_block + nr + 1); bg = sbi->s_desc_per_block * nr; - if (ext2_bg_has_super(sb, bg)) - has_super = 1; - return ext2_group_first_block_no(sb, bg) + has_super; + return ext2_group_first_block_no(sb, bg) + ext2_bg_has_super(sb, bg); +} + +/* + * Set all mount options either from defaults on disk, or from parsed + * options. Parsed/specified options override on-disk defaults. + */ +static void ext2_set_options(struct fs_context *fc, struct ext2_sb_info *sbi) +{ + struct ext2_fs_context *ctx = fc->fs_private; + struct ext2_super_block *es = sbi->s_es; + unsigned long def_mount_opts = le32_to_cpu(es->s_default_mount_opts); + + /* Copy parsed mount options to sbi */ + sbi->s_mount_opt = ctx->vals_s_mount_opt; + + /* Use in-superblock defaults only if not specified during parsing */ + if (!ctx_parsed_mount_opt(ctx, EXT2_MOUNT_DEBUG) && + def_mount_opts & EXT2_DEFM_DEBUG) + set_opt(sbi->s_mount_opt, DEBUG); + + if (!ctx_parsed_mount_opt(ctx, EXT2_MOUNT_GRPID) && + def_mount_opts & EXT2_DEFM_BSDGROUPS) + set_opt(sbi->s_mount_opt, GRPID); + + if (!ctx_parsed_mount_opt(ctx, EXT2_MOUNT_NO_UID32) && + def_mount_opts & EXT2_DEFM_UID16) + set_opt(sbi->s_mount_opt, NO_UID32); + +#ifdef CONFIG_EXT2_FS_XATTR + if (!ctx_parsed_mount_opt(ctx, EXT2_MOUNT_XATTR_USER) && + def_mount_opts & EXT2_DEFM_XATTR_USER) + set_opt(sbi->s_mount_opt, XATTR_USER); +#endif +#ifdef CONFIG_EXT2_FS_POSIX_ACL + if (!ctx_parsed_mount_opt(ctx, EXT2_MOUNT_POSIX_ACL) && + def_mount_opts & EXT2_DEFM_ACL) + set_opt(sbi->s_mount_opt, POSIX_ACL); +#endif + + if (!ctx_parsed_mount_opt(ctx, EXT2_MOUNT_ERRORS_MASK)) { + if (le16_to_cpu(sbi->s_es->s_errors) == EXT2_ERRORS_PANIC) + set_opt(sbi->s_mount_opt, ERRORS_PANIC); + else if (le16_to_cpu(sbi->s_es->s_errors) == EXT2_ERRORS_CONTINUE) + set_opt(sbi->s_mount_opt, ERRORS_CONT); + else + set_opt(sbi->s_mount_opt, ERRORS_RO); + } + + if (ctx->spec & EXT2_SPEC_s_resuid) + sbi->s_resuid = ctx->s_resuid; + else + sbi->s_resuid = make_kuid(&init_user_ns, + le16_to_cpu(es->s_def_resuid)); + + if (ctx->spec & EXT2_SPEC_s_resgid) + sbi->s_resgid = ctx->s_resgid; + else + sbi->s_resgid = make_kgid(&init_user_ns, + le16_to_cpu(es->s_def_resgid)); } -static int ext2_fill_super(struct super_block *sb, void *data, int silent) +static int ext2_fill_super(struct super_block *sb, struct fs_context *fc) { + struct ext2_fs_context *ctx = fc->fs_private; + int silent = fc->sb_flags & SB_SILENT; struct buffer_head * bh; struct ext2_sb_info * sbi; struct ext2_super_block * es; struct inode *root; unsigned long block; - unsigned long sb_block = get_sb_block(&data); + unsigned long sb_block = ctx->s_sb_block; unsigned long logic_sb_block; unsigned long offset = 0; - unsigned long def_mount_opts; - long ret = -EINVAL; + long ret = -ENOMEM; int blocksize = BLOCK_SIZE; int db_count; int i, j; __le32 features; int err; - err = -ENOMEM; sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); if (!sbi) - goto failed; + return -ENOMEM; sbi->s_blockgroup_lock = kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL); if (!sbi->s_blockgroup_lock) { kfree(sbi); - goto failed; + return -ENOMEM; } sb->s_fs_info = sbi; sbi->s_sb_block = sb_block; + sbi->s_daxdev = fs_dax_get_by_bdev(sb->s_bdev, &sbi->s_dax_part_off, + NULL, NULL); spin_lock_init(&sbi->s_lock); + ret = -EINVAL; /* * See what the current blocksize for the device is, and @@ -832,44 +950,11 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) if (sb->s_magic != EXT2_SUPER_MAGIC) goto cantfind_ext2; - /* Set defaults before we parse the mount options */ - def_mount_opts = le32_to_cpu(es->s_default_mount_opts); - if (def_mount_opts & EXT2_DEFM_DEBUG) - set_opt(sbi->s_mount_opt, DEBUG); - if (def_mount_opts & EXT2_DEFM_BSDGROUPS) - set_opt(sbi->s_mount_opt, GRPID); - if (def_mount_opts & EXT2_DEFM_UID16) - set_opt(sbi->s_mount_opt, NO_UID32); -#ifdef CONFIG_EXT2_FS_XATTR - if (def_mount_opts & EXT2_DEFM_XATTR_USER) - set_opt(sbi->s_mount_opt, XATTR_USER); -#endif -#ifdef CONFIG_EXT2_FS_POSIX_ACL - if (def_mount_opts & EXT2_DEFM_ACL) - set_opt(sbi->s_mount_opt, POSIX_ACL); -#endif - - if (le16_to_cpu(sbi->s_es->s_errors) == EXT2_ERRORS_PANIC) - set_opt(sbi->s_mount_opt, ERRORS_PANIC); - else if (le16_to_cpu(sbi->s_es->s_errors) == EXT2_ERRORS_CONTINUE) - set_opt(sbi->s_mount_opt, ERRORS_CONT); - else - set_opt(sbi->s_mount_opt, ERRORS_RO); + ext2_set_options(fc, sbi); - sbi->s_resuid = make_kuid(&init_user_ns, le16_to_cpu(es->s_def_resuid)); - sbi->s_resgid = make_kgid(&init_user_ns, le16_to_cpu(es->s_def_resgid)); - - set_opt(sbi->s_mount_opt, RESERVATION); - - if (!parse_options((char *) data, sb)) - goto failed_mount; - - sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | - ((EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? - MS_POSIXACL : 0); - - ext2_xip_verify_sb(sb); /* see if bdev supports xip, unset - EXT2_MOUNT_XIP if not */ + sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | + (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0); + sb->s_iflags |= SB_I_CGROUPWB; if (le32_to_cpu(es->s_rev_level) == EXT2_GOOD_OLD_REV && (EXT2_HAS_COMPAT_FEATURE(sb, ~0U) || @@ -890,21 +975,31 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) le32_to_cpu(features)); goto failed_mount; } - if (!(sb->s_flags & MS_RDONLY) && - (features = EXT2_HAS_RO_COMPAT_FEATURE(sb, ~EXT2_FEATURE_RO_COMPAT_SUPP))){ + if (!sb_rdonly(sb) && (features = EXT2_HAS_RO_COMPAT_FEATURE(sb, ~EXT2_FEATURE_RO_COMPAT_SUPP))){ ext2_msg(sb, KERN_ERR, "error: couldn't mount RDWR because of " "unsupported optional features (%x)", le32_to_cpu(features)); goto failed_mount; } + if (le32_to_cpu(es->s_log_block_size) > + (EXT2_MAX_BLOCK_LOG_SIZE - BLOCK_SIZE_BITS)) { + ext2_msg(sb, KERN_ERR, + "Invalid log block size: %u", + le32_to_cpu(es->s_log_block_size)); + goto failed_mount; + } blocksize = BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size); - if (ext2_use_xip(sb) && blocksize != PAGE_SIZE) { - if (!silent) + if (test_opt(sb, DAX)) { + if (!sbi->s_daxdev) { ext2_msg(sb, KERN_ERR, - "error: unsupported blocksize for xip"); - goto failed_mount; + "DAX unsupported by block device. Turning off DAX."); + clear_opt(sbi->s_mount_opt, DAX); + } else if (blocksize != PAGE_SIZE) { + ext2_msg(sb, KERN_ERR, "unsupported blocksize for DAX\n"); + clear_opt(sbi->s_mount_opt, DAX); + } } /* If the blocksize doesn't match, re-read the thing.. */ @@ -935,6 +1030,8 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) sb->s_maxbytes = ext2_max_size(sb->s_blocksize_bits); sb->s_max_links = EXT2_LINK_MAX; + sb->s_time_min = S32_MIN; + sb->s_time_max = S32_MAX; if (le32_to_cpu(es->s_rev_level) == EXT2_GOOD_OLD_REV) { sbi->s_inode_size = EXT2_GOOD_OLD_INODE_SIZE; @@ -952,18 +1049,9 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) } } - sbi->s_frag_size = EXT2_MIN_FRAG_SIZE << - le32_to_cpu(es->s_log_frag_size); - if (sbi->s_frag_size == 0) - goto cantfind_ext2; - sbi->s_frags_per_block = sb->s_blocksize / sbi->s_frag_size; - sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group); - sbi->s_frags_per_group = le32_to_cpu(es->s_frags_per_group); sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group); - if (EXT2_INODE_SIZE(sb) == 0) - goto cantfind_ext2; sbi->s_inodes_per_block = sb->s_blocksize / EXT2_INODE_SIZE(sb); if (sbi->s_inodes_per_block == 0 || sbi->s_inodes_per_group == 0) goto cantfind_ext2; @@ -987,11 +1075,10 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount; } - if (sb->s_blocksize != sbi->s_frag_size) { + if (es->s_log_frag_size != es->s_log_block_size) { ext2_msg(sb, KERN_ERR, - "error: fragsize %lu != blocksize %lu" - "(not supported yet)", - sbi->s_frag_size, sb->s_blocksize); + "error: fragsize log %u != blocksize log %u", + le32_to_cpu(es->s_log_frag_size), sb->s_blocksize_bits); goto failed_mount; } @@ -1001,34 +1088,52 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) sbi->s_blocks_per_group); goto failed_mount; } - if (sbi->s_frags_per_group > sb->s_blocksize * 8) { + /* At least inode table, bitmaps, and sb have to fit in one group */ + if (sbi->s_blocks_per_group <= sbi->s_itb_per_group + 3) { ext2_msg(sb, KERN_ERR, - "error: #fragments per group too big: %lu", - sbi->s_frags_per_group); + "error: #blocks per group smaller than metadata size: %lu <= %lu", + sbi->s_blocks_per_group, sbi->s_inodes_per_group + 3); goto failed_mount; } - if (sbi->s_inodes_per_group > sb->s_blocksize * 8) { + if (sbi->s_inodes_per_group < sbi->s_inodes_per_block || + sbi->s_inodes_per_group > sb->s_blocksize * 8) { ext2_msg(sb, KERN_ERR, - "error: #inodes per group too big: %lu", + "error: invalid #inodes per group: %lu", sbi->s_inodes_per_group); goto failed_mount; } + if (sb_bdev_nr_blocks(sb) < le32_to_cpu(es->s_blocks_count)) { + ext2_msg(sb, KERN_ERR, + "bad geometry: block count %u exceeds size of device (%u blocks)", + le32_to_cpu(es->s_blocks_count), + (unsigned)sb_bdev_nr_blocks(sb)); + goto failed_mount; + } - if (EXT2_BLOCKS_PER_GROUP(sb) == 0) - goto cantfind_ext2; - sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) - - le32_to_cpu(es->s_first_data_block) - 1) - / EXT2_BLOCKS_PER_GROUP(sb)) + 1; + sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) - + le32_to_cpu(es->s_first_data_block) - 1) + / EXT2_BLOCKS_PER_GROUP(sb)) + 1; + if ((u64)sbi->s_groups_count * sbi->s_inodes_per_group != + le32_to_cpu(es->s_inodes_count)) { + ext2_msg(sb, KERN_ERR, "error: invalid #inodes: %u vs computed %llu", + le32_to_cpu(es->s_inodes_count), + (u64)sbi->s_groups_count * sbi->s_inodes_per_group); + goto failed_mount; + } db_count = (sbi->s_groups_count + EXT2_DESC_PER_BLOCK(sb) - 1) / EXT2_DESC_PER_BLOCK(sb); - sbi->s_group_desc = kmalloc (db_count * sizeof (struct buffer_head *), GFP_KERNEL); + sbi->s_group_desc = kvmalloc_array(db_count, + sizeof(struct buffer_head *), + GFP_KERNEL); if (sbi->s_group_desc == NULL) { + ret = -ENOMEM; ext2_msg(sb, KERN_ERR, "error: not enough memory"); goto failed_mount; } bgl_lock_init(sbi->s_blockgroup_lock); sbi->s_debts = kcalloc(sbi->s_groups_count, sizeof(*sbi->s_debts), GFP_KERNEL); if (!sbi->s_debts) { + ret = -ENOMEM; ext2_msg(sb, KERN_ERR, "error: not enough memory"); goto failed_mount_group_desc; } @@ -1051,7 +1156,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) get_random_bytes(&sbi->s_next_generation, sizeof(u32)); spin_lock_init(&sbi->s_next_gen_lock); - /* per fileystem reservation list head & lock */ + /* per filesystem reservation list head & lock */ spin_lock_init(&sbi->s_rsv_window_lock); sbi->s_rsv_window_root = RB_ROOT; /* @@ -1067,19 +1172,29 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) ext2_rsv_window_add(sb, &sbi->s_rsv_window_head); err = percpu_counter_init(&sbi->s_freeblocks_counter, - ext2_count_free_blocks(sb)); + ext2_count_free_blocks(sb), GFP_KERNEL); if (!err) { err = percpu_counter_init(&sbi->s_freeinodes_counter, - ext2_count_free_inodes(sb)); + ext2_count_free_inodes(sb), GFP_KERNEL); } if (!err) { err = percpu_counter_init(&sbi->s_dirs_counter, - ext2_count_dirs(sb)); + ext2_count_dirs(sb), GFP_KERNEL); } if (err) { + ret = err; ext2_msg(sb, KERN_ERR, "error: insufficient memory"); goto failed_mount3; } + +#ifdef CONFIG_EXT2_FS_XATTR + sbi->s_ea_block_cache = ext2_xattr_create_cache(); + if (!sbi->s_ea_block_cache) { + ret = -ENOMEM; + ext2_msg(sb, KERN_ERR, "Failed to create ea_block_cache"); + goto failed_mount3; + } +#endif /* * set up enough so that it can read an inode */ @@ -1089,7 +1204,8 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) #ifdef CONFIG_QUOTA sb->dq_op = &dquot_operations; - sb->s_qcop = &dquot_quotactl_ops; + sb->s_qcop = &ext2_quotactl_ops; + sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP; #endif root = ext2_iget(sb, EXT2_ROOT_INO); @@ -1112,8 +1228,8 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) if (EXT2_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) ext2_msg(sb, KERN_WARNING, "warning: mounting ext3 filesystem as ext2"); - if (ext2_setup_super (sb, es, sb->s_flags & MS_RDONLY)) - sb->s_flags |= MS_RDONLY; + if (ext2_setup_super (sb, es, sb_rdonly(sb))) + sb->s_flags |= SB_RDONLY; ext2_write_super(sb); return 0; @@ -1124,6 +1240,7 @@ cantfind_ext2: sb->s_id); goto failed_mount; failed_mount3: + ext2_xattr_destroy_cache(sbi->s_ea_block_cache); percpu_counter_destroy(&sbi->s_freeblocks_counter); percpu_counter_destroy(&sbi->s_freeinodes_counter); percpu_counter_destroy(&sbi->s_dirs_counter); @@ -1131,15 +1248,15 @@ failed_mount2: for (i = 0; i < db_count; i++) brelse(sbi->s_group_desc[i]); failed_mount_group_desc: - kfree(sbi->s_group_desc); + kvfree(sbi->s_group_desc); kfree(sbi->s_debts); failed_mount: brelse(bh); failed_sbi: + fs_put_dax(sbi->s_daxdev, NULL); sb->s_fs_info = NULL; kfree(sbi->s_blockgroup_lock); kfree(sbi); -failed: return ret; } @@ -1157,20 +1274,20 @@ static void ext2_clear_super_error(struct super_block *sb) * write and hope for the best. */ ext2_msg(sb, KERN_ERR, - "previous I/O error to superblock detected\n"); + "previous I/O error to superblock detected"); clear_buffer_write_io_error(sbh); set_buffer_uptodate(sbh); } } -static void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es, - int wait) +void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es, + int wait) { ext2_clear_super_error(sb); spin_lock(&EXT2_SB(sb)->s_lock); es->s_free_blocks_count = cpu_to_le32(ext2_count_free_blocks(sb)); es->s_free_inodes_count = cpu_to_le32(ext2_count_free_inodes(sb)); - es->s_wtime = cpu_to_le32(get_seconds()); + es->s_wtime = cpu_to_le32(ktime_get_real_seconds()); /* unlock before we do IO */ spin_unlock(&EXT2_SB(sb)->s_lock); mark_buffer_dirty(EXT2_SB(sb)->s_sbh); @@ -1239,93 +1356,65 @@ static int ext2_unfreeze(struct super_block *sb) return 0; } -void ext2_write_super(struct super_block *sb) +static void ext2_write_super(struct super_block *sb) { - if (!(sb->s_flags & MS_RDONLY)) + if (!sb_rdonly(sb)) ext2_sync_fs(sb, 1); } -static int ext2_remount (struct super_block * sb, int * flags, char * data) +static int ext2_reconfigure(struct fs_context *fc) { + struct ext2_fs_context *ctx = fc->fs_private; + struct super_block *sb = fc->root->d_sb; struct ext2_sb_info * sbi = EXT2_SB(sb); struct ext2_super_block * es; - unsigned long old_mount_opt = sbi->s_mount_opt; - struct ext2_mount_options old_opts; - unsigned long old_sb_flags; + struct ext2_mount_options new_opts; + int flags = fc->sb_flags; int err; - spin_lock(&sbi->s_lock); + sync_filesystem(sb); - /* Store the old options */ - old_sb_flags = sb->s_flags; - old_opts.s_mount_opt = sbi->s_mount_opt; - old_opts.s_resuid = sbi->s_resuid; - old_opts.s_resgid = sbi->s_resgid; - - /* - * Allow the "check" option to be passed as a remount option. - */ - if (!parse_options(data, sb)) { - err = -EINVAL; - goto restore_opts; - } - - sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | - ((sbi->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); - - ext2_xip_verify_sb(sb); /* see if bdev supports xip, unset - EXT2_MOUNT_XIP if not */ - - if ((ext2_use_xip(sb)) && (sb->s_blocksize != PAGE_SIZE)) { - ext2_msg(sb, KERN_WARNING, - "warning: unsupported blocksize for xip"); - err = -EINVAL; - goto restore_opts; - } + new_opts.s_mount_opt = ctx->vals_s_mount_opt; + new_opts.s_resuid = ctx->s_resuid; + new_opts.s_resgid = ctx->s_resgid; + spin_lock(&sbi->s_lock); es = sbi->s_es; - if ((sbi->s_mount_opt ^ old_mount_opt) & EXT2_MOUNT_XIP) { + if ((sbi->s_mount_opt ^ new_opts.s_mount_opt) & EXT2_MOUNT_DAX) { ext2_msg(sb, KERN_WARNING, "warning: refusing change of " - "xip flag with busy inodes while remounting"); - sbi->s_mount_opt &= ~EXT2_MOUNT_XIP; - sbi->s_mount_opt |= old_mount_opt & EXT2_MOUNT_XIP; + "dax flag with busy inodes while remounting"); + new_opts.s_mount_opt ^= EXT2_MOUNT_DAX; } - if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) { - spin_unlock(&sbi->s_lock); - return 0; - } - if (*flags & MS_RDONLY) { + if ((bool)(flags & SB_RDONLY) == sb_rdonly(sb)) + goto out_set; + if (flags & SB_RDONLY) { if (le16_to_cpu(es->s_state) & EXT2_VALID_FS || - !(sbi->s_mount_state & EXT2_VALID_FS)) { - spin_unlock(&sbi->s_lock); - return 0; - } + !(sbi->s_mount_state & EXT2_VALID_FS)) + goto out_set; /* * OK, we are remounting a valid rw partition rdonly, so set * the rdonly flag and then mark the partition as valid again. */ es->s_state = cpu_to_le16(sbi->s_mount_state); - es->s_mtime = cpu_to_le32(get_seconds()); + es->s_mtime = cpu_to_le32(ktime_get_real_seconds()); spin_unlock(&sbi->s_lock); err = dquot_suspend(sb, -1); - if (err < 0) { - spin_lock(&sbi->s_lock); - goto restore_opts; - } + if (err < 0) + return err; ext2_sync_super(sb, es, 1); } else { __le32 ret = EXT2_HAS_RO_COMPAT_FEATURE(sb, ~EXT2_FEATURE_RO_COMPAT_SUPP); if (ret) { + spin_unlock(&sbi->s_lock); ext2_msg(sb, KERN_WARNING, "warning: couldn't remount RDWR because of " "unsupported optional features (%x).", le32_to_cpu(ret)); - err = -EROFS; - goto restore_opts; + return -EROFS; } /* * Mounting a RDONLY partition read-write, so reread and @@ -1334,7 +1423,7 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data) */ sbi->s_mount_state = le16_to_cpu(es->s_state); if (!ext2_setup_super (sb, es, 0)) - sb->s_flags &= ~MS_RDONLY; + sb->s_flags &= ~SB_RDONLY; spin_unlock(&sbi->s_lock); ext2_write_super(sb); @@ -1342,14 +1431,16 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data) dquot_resume(sb, -1); } - return 0; -restore_opts: - sbi->s_mount_opt = old_opts.s_mount_opt; - sbi->s_resuid = old_opts.s_resuid; - sbi->s_resgid = old_opts.s_resgid; - sb->s_flags = old_sb_flags; + spin_lock(&sbi->s_lock); +out_set: + sbi->s_mount_opt = new_opts.s_mount_opt; + sbi->s_resuid = new_opts.s_resuid; + sbi->s_resgid = new_opts.s_resgid; + sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | + (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0); spin_unlock(&sbi->s_lock); - return err; + + return 0; } static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf) @@ -1357,7 +1448,6 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf) struct super_block *sb = dentry->d_sb; struct ext2_sb_info *sbi = EXT2_SB(sb); struct ext2_super_block *es = sbi->s_es; - u64 fsid; spin_lock(&sbi->s_lock); @@ -1411,18 +1501,14 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf) buf->f_ffree = ext2_count_free_inodes(sb); es->s_free_inodes_count = cpu_to_le32(buf->f_ffree); buf->f_namelen = EXT2_NAME_LEN; - fsid = le64_to_cpup((void *)es->s_uuid) ^ - le64_to_cpup((void *)es->s_uuid + sizeof(u64)); - buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL; - buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL; + buf->f_fsid = uuid_to_fsid(es->s_uuid); spin_unlock(&sbi->s_lock); return 0; } -static struct dentry *ext2_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int ext2_get_tree(struct fs_context *fc) { - return mount_bdev(fs_type, flags, dev_name, data, ext2_fill_super); + return get_tree_bdev(fc, ext2_fill_super); } #ifdef CONFIG_QUOTA @@ -1450,8 +1536,7 @@ static ssize_t ext2_quota_read(struct super_block *sb, int type, char *data, len = i_size-off; toread = len; while (toread > 0) { - tocopy = sb->s_blocksize - offset < toread ? - sb->s_blocksize - offset : toread; + tocopy = min_t(size_t, sb->s_blocksize - offset, toread); tmp_bh.b_state = 0; tmp_bh.b_size = sb->s_blocksize; @@ -1489,10 +1574,10 @@ static ssize_t ext2_quota_write(struct super_block *sb, int type, struct buffer_head *bh; while (towrite > 0) { - tocopy = sb->s_blocksize - offset < towrite ? - sb->s_blocksize - offset : towrite; + tocopy = min_t(size_t, sb->s_blocksize - offset, towrite); tmp_bh.b_state = 0; + tmp_bh.b_size = sb->s_blocksize; err = ext2_get_block(inode, blk, &tmp_bh, 1); if (err < 0) goto out; @@ -1506,7 +1591,7 @@ static ssize_t ext2_quota_write(struct super_block *sb, int type, } lock_buffer(bh); memcpy(bh->b_data+offset, data, tocopy); - flush_dcache_page(bh->b_page); + flush_dcache_folio(bh->b_folio); set_buffer_uptodate(bh); mark_buffer_dirty(bh); unlock_buffer(bh); @@ -1521,39 +1606,118 @@ out: return err; if (inode->i_size < off+len-towrite) i_size_write(inode, off+len-towrite); - inode->i_version++; - inode->i_mtime = inode->i_ctime = CURRENT_TIME; + inode_inc_iversion(inode); + inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); mark_inode_dirty(inode); return len - towrite; } +static int ext2_quota_on(struct super_block *sb, int type, int format_id, + const struct path *path) +{ + int err; + struct inode *inode; + + err = dquot_quota_on(sb, type, format_id, path); + if (err) + return err; + + inode = d_inode(path->dentry); + inode_lock(inode); + EXT2_I(inode)->i_flags |= EXT2_NOATIME_FL | EXT2_IMMUTABLE_FL; + inode_set_flags(inode, S_NOATIME | S_IMMUTABLE, + S_NOATIME | S_IMMUTABLE); + inode_unlock(inode); + mark_inode_dirty(inode); + + return 0; +} + +static int ext2_quota_off(struct super_block *sb, int type) +{ + struct inode *inode = sb_dqopt(sb)->files[type]; + int err; + + if (!inode || !igrab(inode)) + goto out; + + err = dquot_quota_off(sb, type); + if (err) + goto out_put; + + inode_lock(inode); + EXT2_I(inode)->i_flags &= ~(EXT2_NOATIME_FL | EXT2_IMMUTABLE_FL); + inode_set_flags(inode, 0, S_NOATIME | S_IMMUTABLE); + inode_unlock(inode); + mark_inode_dirty(inode); +out_put: + iput(inode); + return err; +out: + return dquot_quota_off(sb, type); +} + #endif +static const struct fs_context_operations ext2_context_ops = { + .parse_param = ext2_parse_param, + .get_tree = ext2_get_tree, + .reconfigure = ext2_reconfigure, + .free = ext2_free_fc, +}; + +static int ext2_init_fs_context(struct fs_context *fc) +{ + struct ext2_fs_context *ctx; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) { + struct super_block *sb = fc->root->d_sb; + struct ext2_sb_info *sbi = EXT2_SB(sb); + + spin_lock(&sbi->s_lock); + ctx->vals_s_mount_opt = sbi->s_mount_opt; + ctx->vals_s_flags = sb->s_flags; + ctx->s_resuid = sbi->s_resuid; + ctx->s_resgid = sbi->s_resgid; + spin_unlock(&sbi->s_lock); + } else { + ctx->s_sb_block = 1; + ctx_set_mount_opt(ctx, EXT2_MOUNT_RESERVATION); + } + + fc->fs_private = ctx; + fc->ops = &ext2_context_ops; + + return 0; +} + static struct file_system_type ext2_fs_type = { .owner = THIS_MODULE, .name = "ext2", - .mount = ext2_mount, .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, + .init_fs_context = ext2_init_fs_context, + .parameters = ext2_param_spec, }; MODULE_ALIAS_FS("ext2"); static int __init init_ext2_fs(void) { - int err = init_ext2_xattr(); - if (err) - return err; + int err; + err = init_inodecache(); if (err) - goto out1; - err = register_filesystem(&ext2_fs_type); + return err; + err = register_filesystem(&ext2_fs_type); if (err) goto out; return 0; out: destroy_inodecache(); -out1: - exit_ext2_xattr(); return err; } @@ -1561,7 +1725,6 @@ static void __exit exit_ext2_fs(void) { unregister_filesystem(&ext2_fs_type); destroy_inodecache(); - exit_ext2_xattr(); } MODULE_AUTHOR("Remy Card and others"); diff --git a/fs/ext2/symlink.c b/fs/ext2/symlink.c index 565cf817bbf1..948d3a441403 100644 --- a/fs/ext2/symlink.c +++ b/fs/ext2/symlink.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * linux/fs/ext2/symlink.c * @@ -19,36 +20,17 @@ #include "ext2.h" #include "xattr.h" -#include <linux/namei.h> - -static void *ext2_follow_link(struct dentry *dentry, struct nameidata *nd) -{ - struct ext2_inode_info *ei = EXT2_I(dentry->d_inode); - nd_set_link(nd, (char *)ei->i_data); - return NULL; -} const struct inode_operations ext2_symlink_inode_operations = { - .readlink = generic_readlink, - .follow_link = page_follow_link_light, - .put_link = page_put_link, + .get_link = page_get_link, + .getattr = ext2_getattr, .setattr = ext2_setattr, -#ifdef CONFIG_EXT2_FS_XATTR - .setxattr = generic_setxattr, - .getxattr = generic_getxattr, .listxattr = ext2_listxattr, - .removexattr = generic_removexattr, -#endif }; const struct inode_operations ext2_fast_symlink_inode_operations = { - .readlink = generic_readlink, - .follow_link = ext2_follow_link, + .get_link = simple_get_link, + .getattr = ext2_getattr, .setattr = ext2_setattr, -#ifdef CONFIG_EXT2_FS_XATTR - .setxattr = generic_setxattr, - .getxattr = generic_getxattr, .listxattr = ext2_listxattr, - .removexattr = generic_removexattr, -#endif }; diff --git a/fs/ext2/trace.c b/fs/ext2/trace.c new file mode 100644 index 000000000000..b01cdf6526fd --- /dev/null +++ b/fs/ext2/trace.c @@ -0,0 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "ext2.h" +#include <linux/uio.h> + +#define CREATE_TRACE_POINTS +#include "trace.h" diff --git a/fs/ext2/trace.h b/fs/ext2/trace.h new file mode 100644 index 000000000000..7d230e13576e --- /dev/null +++ b/fs/ext2/trace.h @@ -0,0 +1,94 @@ +// SPDX-License-Identifier: GPL-2.0 + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM ext2 + +#if !defined(_EXT2_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _EXT2_TRACE_H + +#include <linux/tracepoint.h> + +DECLARE_EVENT_CLASS(ext2_dio_class, + TP_PROTO(struct kiocb *iocb, struct iov_iter *iter, ssize_t ret), + TP_ARGS(iocb, iter, ret), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(loff_t, isize) + __field(loff_t, pos) + __field(size_t, count) + __field(int, ki_flags) + __field(bool, aio) + __field(ssize_t, ret) + ), + TP_fast_assign( + __entry->dev = file_inode(iocb->ki_filp)->i_sb->s_dev; + __entry->ino = file_inode(iocb->ki_filp)->i_ino; + __entry->isize = file_inode(iocb->ki_filp)->i_size; + __entry->pos = iocb->ki_pos; + __entry->count = iov_iter_count(iter); + __entry->ki_flags = iocb->ki_flags; + __entry->aio = !is_sync_kiocb(iocb); + __entry->ret = ret; + ), + TP_printk("dev %d:%d ino 0x%lx isize 0x%llx pos 0x%llx len %zu flags %s aio %d ret %zd", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->isize, + __entry->pos, + __entry->count, + __print_flags(__entry->ki_flags, "|", TRACE_IOCB_STRINGS), + __entry->aio, + __entry->ret) +); + +#define DEFINE_DIO_RW_EVENT(name) \ +DEFINE_EVENT(ext2_dio_class, name, \ + TP_PROTO(struct kiocb *iocb, struct iov_iter *iter, ssize_t ret), \ + TP_ARGS(iocb, iter, ret)) +DEFINE_DIO_RW_EVENT(ext2_dio_write_begin); +DEFINE_DIO_RW_EVENT(ext2_dio_write_end); +DEFINE_DIO_RW_EVENT(ext2_dio_write_buff_end); +DEFINE_DIO_RW_EVENT(ext2_dio_read_begin); +DEFINE_DIO_RW_EVENT(ext2_dio_read_end); + +TRACE_EVENT(ext2_dio_write_endio, + TP_PROTO(struct kiocb *iocb, ssize_t size, int ret), + TP_ARGS(iocb, size, ret), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(loff_t, isize) + __field(loff_t, pos) + __field(ssize_t, size) + __field(int, ki_flags) + __field(bool, aio) + __field(int, ret) + ), + TP_fast_assign( + __entry->dev = file_inode(iocb->ki_filp)->i_sb->s_dev; + __entry->ino = file_inode(iocb->ki_filp)->i_ino; + __entry->isize = file_inode(iocb->ki_filp)->i_size; + __entry->pos = iocb->ki_pos; + __entry->size = size; + __entry->ki_flags = iocb->ki_flags; + __entry->aio = !is_sync_kiocb(iocb); + __entry->ret = ret; + ), + TP_printk("dev %d:%d ino 0x%lx isize 0x%llx pos 0x%llx len %zd flags %s aio %d ret %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->isize, + __entry->pos, + __entry->size, + __print_flags(__entry->ki_flags, "|", TRACE_IOCB_STRINGS), + __entry->aio, + __entry->ret) +); + +#endif /* _EXT2_TRACE_H */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace +#include <trace/define_trace.h> diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c index 2d7557db3ae8..c885dcc3bd0d 100644 --- a/fs/ext2/xattr.c +++ b/fs/ext2/xattr.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * linux/fs/ext2/xattr.c * @@ -55,6 +56,7 @@ #include <linux/buffer_head.h> #include <linux/init.h> +#include <linux/printk.h> #include <linux/slab.h> #include <linux/mbcache.h> #include <linux/quotaops.h> @@ -77,34 +79,30 @@ printk("\n"); \ } while (0) # define ea_bdebug(bh, f...) do { \ - char b[BDEVNAME_SIZE]; \ - printk(KERN_DEBUG "block %s:%lu: ", \ - bdevname(bh->b_bdev, b), \ - (unsigned long) bh->b_blocknr); \ + printk(KERN_DEBUG "block %pg:%lu: ", \ + bh->b_bdev, (unsigned long) bh->b_blocknr); \ printk(f); \ printk("\n"); \ } while (0) #else -# define ea_idebug(f...) -# define ea_bdebug(f...) +# define ea_idebug(inode, f...) no_printk(f) +# define ea_bdebug(bh, f...) no_printk(f) #endif static int ext2_xattr_set2(struct inode *, struct buffer_head *, struct ext2_xattr_header *); -static int ext2_xattr_cache_insert(struct buffer_head *); +static int ext2_xattr_cache_insert(struct mb_cache *, struct buffer_head *); static struct buffer_head *ext2_xattr_cache_find(struct inode *, struct ext2_xattr_header *); static void ext2_xattr_rehash(struct ext2_xattr_header *, struct ext2_xattr_entry *); -static struct mb_cache *ext2_xattr_cache; - -static const struct xattr_handler *ext2_xattr_handler_map[] = { +static const struct xattr_handler * const ext2_xattr_handler_map[] = { [EXT2_XATTR_INDEX_USER] = &ext2_xattr_user_handler, #ifdef CONFIG_EXT2_FS_POSIX_ACL - [EXT2_XATTR_INDEX_POSIX_ACL_ACCESS] = &ext2_xattr_acl_access_handler, - [EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT] = &ext2_xattr_acl_default_handler, + [EXT2_XATTR_INDEX_POSIX_ACL_ACCESS] = &nop_posix_acl_access, + [EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT] = &nop_posix_acl_default, #endif [EXT2_XATTR_INDEX_TRUSTED] = &ext2_xattr_trusted_handler, #ifdef CONFIG_EXT2_FS_SECURITY @@ -112,27 +110,76 @@ static const struct xattr_handler *ext2_xattr_handler_map[] = { #endif }; -const struct xattr_handler *ext2_xattr_handlers[] = { +const struct xattr_handler * const ext2_xattr_handlers[] = { &ext2_xattr_user_handler, &ext2_xattr_trusted_handler, -#ifdef CONFIG_EXT2_FS_POSIX_ACL - &ext2_xattr_acl_access_handler, - &ext2_xattr_acl_default_handler, -#endif #ifdef CONFIG_EXT2_FS_SECURITY &ext2_xattr_security_handler, #endif NULL }; -static inline const struct xattr_handler * -ext2_xattr_handler(int name_index) +#define EA_BLOCK_CACHE(inode) (EXT2_SB(inode->i_sb)->s_ea_block_cache) + +static inline const char *ext2_xattr_prefix(int name_index, + struct dentry *dentry) { const struct xattr_handler *handler = NULL; if (name_index > 0 && name_index < ARRAY_SIZE(ext2_xattr_handler_map)) handler = ext2_xattr_handler_map[name_index]; - return handler; + + if (!xattr_handler_can_list(handler, dentry)) + return NULL; + + return xattr_prefix(handler); +} + +static bool +ext2_xattr_header_valid(struct ext2_xattr_header *header) +{ + if (header->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) || + header->h_blocks != cpu_to_le32(1)) + return false; + + return true; +} + +static bool +ext2_xattr_entry_valid(struct ext2_xattr_entry *entry, + char *end, size_t end_offs) +{ + struct ext2_xattr_entry *next; + size_t size; + + next = EXT2_XATTR_NEXT(entry); + if ((char *)next >= end) + return false; + + if (entry->e_value_block != 0) + return false; + + size = le32_to_cpu(entry->e_value_size); + if (size > end_offs || + le16_to_cpu(entry->e_value_offs) + size > end_offs) + return false; + + return true; +} + +static int +ext2_xattr_cmp_entry(int name_index, size_t name_len, const char *name, + struct ext2_xattr_entry *entry) +{ + int cmp; + + cmp = name_index - entry->e_name_index; + if (!cmp) + cmp = name_len - entry->e_name_len; + if (!cmp) + cmp = memcmp(name, entry->e_name, name_len); + + return cmp; } /* @@ -153,7 +200,8 @@ ext2_xattr_get(struct inode *inode, int name_index, const char *name, struct ext2_xattr_entry *entry; size_t name_len, size; char *end; - int error; + int error, not_found; + struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode); ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld", name_index, name, buffer, (long)buffer_size); @@ -176,9 +224,9 @@ ext2_xattr_get(struct inode *inode, int name_index, const char *name, ea_bdebug(bh, "b_count=%d, refcount=%d", atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); end = bh->b_data + bh->b_size; - if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) || - HDR(bh)->h_blocks != cpu_to_le32(1)) { -bad_block: ext2_error(inode->i_sb, "ext2_xattr_get", + if (!ext2_xattr_header_valid(HDR(bh))) { +bad_block: + ext2_error(inode->i_sb, "ext2_xattr_get", "inode %ld: bad block %d", inode->i_ino, EXT2_I(inode)->i_file_acl); error = -EIO; @@ -188,30 +236,26 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_get", /* find named attribute */ entry = FIRST_ENTRY(bh); while (!IS_LAST_ENTRY(entry)) { - struct ext2_xattr_entry *next = - EXT2_XATTR_NEXT(entry); - if ((char *)next >= end) + if (!ext2_xattr_entry_valid(entry, end, + inode->i_sb->s_blocksize)) goto bad_block; - if (name_index == entry->e_name_index && - name_len == entry->e_name_len && - memcmp(name, entry->e_name, name_len) == 0) + + not_found = ext2_xattr_cmp_entry(name_index, name_len, name, + entry); + if (!not_found) goto found; - entry = next; + if (not_found < 0) + break; + + entry = EXT2_XATTR_NEXT(entry); } - if (ext2_xattr_cache_insert(bh)) + if (ext2_xattr_cache_insert(ea_block_cache, bh)) ea_idebug(inode, "cache insert failed"); error = -ENODATA; goto cleanup; found: - /* check the buffer size */ - if (entry->e_value_block != 0) - goto bad_block; size = le32_to_cpu(entry->e_value_size); - if (size > inode->i_sb->s_blocksize || - le16_to_cpu(entry->e_value_offs) + size > inode->i_sb->s_blocksize) - goto bad_block; - - if (ext2_xattr_cache_insert(bh)) + if (ext2_xattr_cache_insert(ea_block_cache, bh)) ea_idebug(inode, "cache insert failed"); if (buffer) { error = -ERANGE; @@ -243,12 +287,13 @@ cleanup: static int ext2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size) { - struct inode *inode = dentry->d_inode; + struct inode *inode = d_inode(dentry); struct buffer_head *bh = NULL; struct ext2_xattr_entry *entry; char *end; size_t rest = buffer_size; int error; + struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode); ea_idebug(inode, "buffer=%p, buffer_size=%ld", buffer, (long)buffer_size); @@ -265,9 +310,9 @@ ext2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size) ea_bdebug(bh, "b_count=%d, refcount=%d", atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); end = bh->b_data + bh->b_size; - if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) || - HDR(bh)->h_blocks != cpu_to_le32(1)) { -bad_block: ext2_error(inode->i_sb, "ext2_xattr_list", + if (!ext2_xattr_header_valid(HDR(bh))) { +bad_block: + ext2_error(inode->i_sb, "ext2_xattr_list", "inode %ld: bad block %d", inode->i_ino, EXT2_I(inode)->i_file_acl); error = -EIO; @@ -277,32 +322,34 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_list", /* check the on-disk data structure */ entry = FIRST_ENTRY(bh); while (!IS_LAST_ENTRY(entry)) { - struct ext2_xattr_entry *next = EXT2_XATTR_NEXT(entry); - - if ((char *)next >= end) + if (!ext2_xattr_entry_valid(entry, end, + inode->i_sb->s_blocksize)) goto bad_block; - entry = next; + entry = EXT2_XATTR_NEXT(entry); } - if (ext2_xattr_cache_insert(bh)) + if (ext2_xattr_cache_insert(ea_block_cache, bh)) ea_idebug(inode, "cache insert failed"); /* list the attribute names */ for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry); entry = EXT2_XATTR_NEXT(entry)) { - const struct xattr_handler *handler = - ext2_xattr_handler(entry->e_name_index); - - if (handler) { - size_t size = handler->list(dentry, buffer, rest, - entry->e_name, - entry->e_name_len, - handler->flags); + const char *prefix; + + prefix = ext2_xattr_prefix(entry->e_name_index, dentry); + if (prefix) { + size_t prefix_len = strlen(prefix); + size_t size = prefix_len + entry->e_name_len + 1; + if (buffer) { if (size > rest) { error = -ERANGE; goto cleanup; } - buffer += size; + memcpy(buffer, prefix, prefix_len); + buffer += prefix_len; + memcpy(buffer, entry->e_name, entry->e_name_len); + buffer += entry->e_name_len; + *buffer++ = 0; } rest -= size; } @@ -319,7 +366,7 @@ cleanup: /* * Inode operation listxattr() * - * dentry->d_inode->i_mutex: don't care + * d_inode(dentry)->i_mutex: don't care */ ssize_t ext2_listxattr(struct dentry *dentry, char *buffer, size_t size) @@ -337,6 +384,7 @@ static void ext2_xattr_update_super_block(struct super_block *sb) return; spin_lock(&EXT2_SB(sb)->s_lock); + ext2_update_dynamic_rev(sb); EXT2_SET_COMPAT_FEATURE(sb, EXT2_FEATURE_COMPAT_EXT_ATTR); spin_unlock(&EXT2_SB(sb)->s_lock); mark_buffer_dirty(EXT2_SB(sb)->s_sbh); @@ -361,7 +409,7 @@ ext2_xattr_set(struct inode *inode, int name_index, const char *name, struct super_block *sb = inode->i_sb; struct buffer_head *bh = NULL; struct ext2_xattr_header *header = NULL; - struct ext2_xattr_entry *here, *last; + struct ext2_xattr_entry *here = NULL, *last = NULL; size_t name_len, free, min_offs = sb->s_blocksize; int not_found = 1, error; char *end; @@ -388,6 +436,9 @@ ext2_xattr_set(struct inode *inode, int name_index, const char *name, name_len = strlen(name); if (name_len > 255 || value_len > sb->s_blocksize) return -ERANGE; + error = dquot_initialize(inode); + if (error) + return error; down_write(&EXT2_I(inode)->xattr_sem); if (EXT2_I(inode)->i_file_acl) { /* The inode already has an extended attribute block. */ @@ -400,47 +451,39 @@ ext2_xattr_set(struct inode *inode, int name_index, const char *name, le32_to_cpu(HDR(bh)->h_refcount)); header = HDR(bh); end = bh->b_data + bh->b_size; - if (header->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) || - header->h_blocks != cpu_to_le32(1)) { -bad_block: ext2_error(sb, "ext2_xattr_set", + if (!ext2_xattr_header_valid(header)) { +bad_block: + ext2_error(sb, "ext2_xattr_set", "inode %ld: bad block %d", inode->i_ino, EXT2_I(inode)->i_file_acl); error = -EIO; goto cleanup; } - /* Find the named attribute. */ - here = FIRST_ENTRY(bh); - while (!IS_LAST_ENTRY(here)) { - struct ext2_xattr_entry *next = EXT2_XATTR_NEXT(here); - if ((char *)next >= end) - goto bad_block; - if (!here->e_value_block && here->e_value_size) { - size_t offs = le16_to_cpu(here->e_value_offs); - if (offs < min_offs) - min_offs = offs; - } - not_found = name_index - here->e_name_index; - if (!not_found) - not_found = name_len - here->e_name_len; - if (!not_found) - not_found = memcmp(name, here->e_name,name_len); - if (not_found <= 0) - break; - here = next; - } - last = here; - /* We still need to compute min_offs and last. */ + /* + * Find the named attribute. If not found, 'here' will point + * to entry where the new attribute should be inserted to + * maintain sorting. + */ + last = FIRST_ENTRY(bh); while (!IS_LAST_ENTRY(last)) { - struct ext2_xattr_entry *next = EXT2_XATTR_NEXT(last); - if ((char *)next >= end) + if (!ext2_xattr_entry_valid(last, end, sb->s_blocksize)) goto bad_block; - if (!last->e_value_block && last->e_value_size) { + if (last->e_value_size) { size_t offs = le16_to_cpu(last->e_value_offs); if (offs < min_offs) min_offs = offs; } - last = next; + if (not_found > 0) { + not_found = ext2_xattr_cmp_entry(name_index, + name_len, + name, last); + if (not_found <= 0) + here = last; + } + last = EXT2_XATTR_NEXT(last); } + if (not_found > 0) + here = last; /* Check whether we have enough space left. */ free = min_offs - ((char*)last - (char*)header) - sizeof(__u32); @@ -448,7 +491,6 @@ bad_block: ext2_error(sb, "ext2_xattr_set", /* We will use a new extended attribute block. */ free = sb->s_blocksize - sizeof(struct ext2_xattr_header) - sizeof(__u32); - here = last = NULL; /* avoid gcc uninitialized warning. */ } if (not_found) { @@ -464,14 +506,7 @@ bad_block: ext2_error(sb, "ext2_xattr_set", error = -EEXIST; if (flags & XATTR_CREATE) goto cleanup; - if (!here->e_value_block && here->e_value_size) { - size_t size = le32_to_cpu(here->e_value_size); - - if (le16_to_cpu(here->e_value_offs) + size > - sb->s_blocksize || size > sb->s_blocksize) - goto bad_block; - free += EXT2_XATTR_SIZE(size); - } + free += EXT2_XATTR_SIZE(le32_to_cpu(here->e_value_size)); free += EXT2_XATTR_LEN(name_len); } error = -ENOSPC; @@ -481,48 +516,48 @@ bad_block: ext2_error(sb, "ext2_xattr_set", /* Here we know that we can set the new attribute. */ if (header) { - struct mb_cache_entry *ce; + int offset; - /* assert(header == HDR(bh)); */ - ce = mb_cache_entry_get(ext2_xattr_cache, bh->b_bdev, - bh->b_blocknr); lock_buffer(bh); if (header->h_refcount == cpu_to_le32(1)) { - ea_bdebug(bh, "modifying in-place"); - if (ce) - mb_cache_entry_free(ce); - /* keep the buffer locked while modifying it. */ - } else { - int offset; - - if (ce) - mb_cache_entry_release(ce); - unlock_buffer(bh); - ea_bdebug(bh, "cloning"); - header = kmalloc(bh->b_size, GFP_KERNEL); - error = -ENOMEM; - if (header == NULL) - goto cleanup; - memcpy(header, HDR(bh), bh->b_size); - header->h_refcount = cpu_to_le32(1); - - offset = (char *)here - bh->b_data; - here = ENTRY((char *)header + offset); - offset = (char *)last - bh->b_data; - last = ENTRY((char *)header + offset); + __u32 hash = le32_to_cpu(header->h_hash); + struct mb_cache_entry *oe; + + oe = mb_cache_entry_delete_or_get(EA_BLOCK_CACHE(inode), + hash, bh->b_blocknr); + if (!oe) { + ea_bdebug(bh, "modifying in-place"); + goto update_block; + } + /* + * Someone is trying to reuse the block, leave it alone + */ + mb_cache_entry_put(EA_BLOCK_CACHE(inode), oe); } + unlock_buffer(bh); + ea_bdebug(bh, "cloning"); + header = kmemdup(HDR(bh), bh->b_size, GFP_KERNEL); + error = -ENOMEM; + if (header == NULL) + goto cleanup; + header->h_refcount = cpu_to_le32(1); + + offset = (char *)here - bh->b_data; + here = ENTRY((char *)header + offset); + offset = (char *)last - bh->b_data; + last = ENTRY((char *)header + offset); } else { /* Allocate a buffer where we construct the new block. */ header = kzalloc(sb->s_blocksize, GFP_KERNEL); error = -ENOMEM; if (header == NULL) goto cleanup; - end = (char *)header + sb->s_blocksize; header->h_magic = cpu_to_le32(EXT2_XATTR_MAGIC); header->h_blocks = header->h_refcount = cpu_to_le32(1); last = here = ENTRY(header+1); } +update_block: /* Iff we are modifying the block in-place, bh is locked here. */ if (not_found) { @@ -535,7 +570,7 @@ bad_block: ext2_error(sb, "ext2_xattr_set", here->e_name_len = name_len; memcpy(here->e_name, name, name_len); } else { - if (!here->e_value_block && here->e_value_size) { + if (here->e_value_size) { char *first_val = (char *)header + min_offs; size_t offs = le16_to_cpu(here->e_value_offs); char *val = (char *)header + offs; @@ -555,18 +590,19 @@ bad_block: ext2_error(sb, "ext2_xattr_set", /* Remove the old value. */ memmove(first_val + size, first_val, val - first_val); memset(first_val, 0, size); - here->e_value_offs = 0; min_offs += size; /* Adjust all value offsets. */ last = ENTRY(header+1); while (!IS_LAST_ENTRY(last)) { size_t o = le16_to_cpu(last->e_value_offs); - if (!last->e_value_block && o < offs) + if (o < offs) last->e_value_offs = cpu_to_le16(o + size); last = EXT2_XATTR_NEXT(last); } + + here->e_value_offs = 0; } if (value == NULL) { /* Remove the old name. */ @@ -606,14 +642,63 @@ skip_replace: } cleanup: - brelse(bh); if (!(bh && header == HDR(bh))) kfree(header); + brelse(bh); up_write(&EXT2_I(inode)->xattr_sem); return error; } +static void ext2_xattr_release_block(struct inode *inode, + struct buffer_head *bh) +{ + struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode); + +retry_ref: + lock_buffer(bh); + if (HDR(bh)->h_refcount == cpu_to_le32(1)) { + __u32 hash = le32_to_cpu(HDR(bh)->h_hash); + struct mb_cache_entry *oe; + + /* + * This must happen under buffer lock to properly + * serialize with ext2_xattr_set() reusing the block. + */ + oe = mb_cache_entry_delete_or_get(ea_block_cache, hash, + bh->b_blocknr); + if (oe) { + /* + * Someone is trying to reuse the block. Wait + * and retry. + */ + unlock_buffer(bh); + mb_cache_entry_wait_unused(oe); + mb_cache_entry_put(ea_block_cache, oe); + goto retry_ref; + } + + /* Free the old block. */ + ea_bdebug(bh, "freeing"); + ext2_free_blocks(inode, bh->b_blocknr, 1); + /* We let our caller release bh, so we + * need to duplicate the buffer before. */ + get_bh(bh); + bforget(bh); + unlock_buffer(bh); + } else { + /* Decrement the refcount only. */ + le32_add_cpu(&HDR(bh)->h_refcount, -1); + dquot_free_block(inode, 1); + mark_buffer_dirty(bh); + unlock_buffer(bh); + ea_bdebug(bh, "refcount now=%d", + le32_to_cpu(HDR(bh)->h_refcount)); + if (IS_SYNC(inode)) + sync_dirty_buffer(bh); + } +} + /* * Second half of ext2_xattr_set(): Update the file system. */ @@ -624,6 +709,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh, struct super_block *sb = inode->i_sb; struct buffer_head *new_bh = NULL; int error; + struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode); if (header) { new_bh = ext2_xattr_cache_find(inode, header); @@ -651,15 +737,18 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh, don't need to change the reference count. */ new_bh = old_bh; get_bh(new_bh); - ext2_xattr_cache_insert(new_bh); + ext2_xattr_cache_insert(ea_block_cache, new_bh); } else { /* We need to allocate a new block */ ext2_fsblk_t goal = ext2_group_first_block_no(sb, EXT2_I(inode)->i_block_group); - int block = ext2_new_block(inode, goal, &error); + unsigned long count = 1; + ext2_fsblk_t block = ext2_new_blocks(inode, goal, + &count, &error, + EXT2_ALLOC_NORESERVE); if (error) goto cleanup; - ea_idebug(inode, "creating block %d", block); + ea_idebug(inode, "creating block %lu", block); new_bh = sb_getblk(sb, block); if (unlikely(!new_bh)) { @@ -672,7 +761,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh, memcpy(new_bh->b_data, header, new_bh->b_size); set_buffer_uptodate(new_bh); unlock_buffer(new_bh); - ext2_xattr_cache_insert(new_bh); + ext2_xattr_cache_insert(ea_block_cache, new_bh); ext2_xattr_update_super_block(sb); } @@ -687,7 +776,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh, /* Update the inode. */ EXT2_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0; - inode->i_ctime = CURRENT_TIME_SEC; + inode_set_ctime_current(inode); if (IS_SYNC(inode)) { error = sync_inode_metadata(inode, 1); /* In case sync failed due to ENOSPC the inode was actually @@ -705,38 +794,11 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh, error = 0; if (old_bh && old_bh != new_bh) { - struct mb_cache_entry *ce; - /* * If there was an old block and we are no longer using it, * release the old block. */ - ce = mb_cache_entry_get(ext2_xattr_cache, old_bh->b_bdev, - old_bh->b_blocknr); - lock_buffer(old_bh); - if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) { - /* Free the old block. */ - if (ce) - mb_cache_entry_free(ce); - ea_bdebug(old_bh, "freeing"); - ext2_free_blocks(inode, old_bh->b_blocknr, 1); - mark_inode_dirty(inode); - /* We let our caller release old_bh, so we - * need to duplicate the buffer before. */ - get_bh(old_bh); - bforget(old_bh); - } else { - /* Decrement the refcount only. */ - le32_add_cpu(&HDR(old_bh)->h_refcount, -1); - if (ce) - mb_cache_entry_release(ce); - dquot_free_block_nodirty(inode, 1); - mark_inode_dirty(inode); - mark_buffer_dirty(old_bh); - ea_bdebug(old_bh, "refcount now=%d", - le32_to_cpu(HDR(old_bh)->h_refcount)); - } - unlock_buffer(old_bh); + ext2_xattr_release_block(inode, old_bh); } cleanup: @@ -755,11 +817,27 @@ void ext2_xattr_delete_inode(struct inode *inode) { struct buffer_head *bh = NULL; - struct mb_cache_entry *ce; + struct ext2_sb_info *sbi = EXT2_SB(inode->i_sb); - down_write(&EXT2_I(inode)->xattr_sem); + /* + * We are the only ones holding inode reference. The xattr_sem should + * better be unlocked! We could as well just not acquire xattr_sem at + * all but this makes the code more futureproof. OTOH we need trylock + * here to avoid false-positive warning from lockdep about reclaim + * circular dependency. + */ + if (WARN_ON_ONCE(!down_write_trylock(&EXT2_I(inode)->xattr_sem))) + return; if (!EXT2_I(inode)->i_file_acl) goto cleanup; + + if (!ext2_data_block_valid(sbi, EXT2_I(inode)->i_file_acl, 1)) { + ext2_error(inode->i_sb, "ext2_xattr_delete_inode", + "inode %ld: xattr block %d is out of data blocks range", + inode->i_ino, EXT2_I(inode)->i_file_acl); + goto cleanup; + } + bh = sb_bread(inode->i_sb, EXT2_I(inode)->i_file_acl); if (!bh) { ext2_error(inode->i_sb, "ext2_xattr_delete_inode", @@ -768,34 +846,13 @@ ext2_xattr_delete_inode(struct inode *inode) goto cleanup; } ea_bdebug(bh, "b_count=%d", atomic_read(&(bh->b_count))); - if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) || - HDR(bh)->h_blocks != cpu_to_le32(1)) { + if (!ext2_xattr_header_valid(HDR(bh))) { ext2_error(inode->i_sb, "ext2_xattr_delete_inode", "inode %ld: bad block %d", inode->i_ino, EXT2_I(inode)->i_file_acl); goto cleanup; } - ce = mb_cache_entry_get(ext2_xattr_cache, bh->b_bdev, bh->b_blocknr); - lock_buffer(bh); - if (HDR(bh)->h_refcount == cpu_to_le32(1)) { - if (ce) - mb_cache_entry_free(ce); - ext2_free_blocks(inode, EXT2_I(inode)->i_file_acl, 1); - get_bh(bh); - bforget(bh); - unlock_buffer(bh); - } else { - le32_add_cpu(&HDR(bh)->h_refcount, -1); - if (ce) - mb_cache_entry_release(ce); - ea_bdebug(bh, "refcount now=%d", - le32_to_cpu(HDR(bh)->h_refcount)); - unlock_buffer(bh); - mark_buffer_dirty(bh); - if (IS_SYNC(inode)) - sync_dirty_buffer(bh); - dquot_free_block_nodirty(inode, 1); - } + ext2_xattr_release_block(inode, bh); EXT2_I(inode)->i_file_acl = 0; cleanup: @@ -804,18 +861,6 @@ cleanup: } /* - * ext2_xattr_put_super() - * - * This is called when a file system is unmounted. - */ -void -ext2_xattr_put_super(struct super_block *sb) -{ - mb_cache_shrink(sb->s_bdev); -} - - -/* * ext2_xattr_cache_insert() * * Create a new entry in the extended attribute cache, and insert @@ -824,28 +869,20 @@ ext2_xattr_put_super(struct super_block *sb) * Returns 0, or a negative error number on failure. */ static int -ext2_xattr_cache_insert(struct buffer_head *bh) +ext2_xattr_cache_insert(struct mb_cache *cache, struct buffer_head *bh) { __u32 hash = le32_to_cpu(HDR(bh)->h_hash); - struct mb_cache_entry *ce; int error; - ce = mb_cache_entry_alloc(ext2_xattr_cache, GFP_NOFS); - if (!ce) - return -ENOMEM; - error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, hash); + error = mb_cache_entry_create(cache, GFP_KERNEL, hash, bh->b_blocknr, + true); if (error) { - mb_cache_entry_free(ce); if (error == -EBUSY) { - ea_bdebug(bh, "already in cache (%d cache entries)", - atomic_read(&ext2_xattr_cache->c_entry_count)); + ea_bdebug(bh, "already in cache"); error = 0; } - } else { - ea_bdebug(bh, "inserting [%x] (%d cache entries)", (int)hash, - atomic_read(&ext2_xattr_cache->c_entry_count)); - mb_cache_entry_release(ce); - } + } else + ea_bdebug(bh, "inserting [%x]", (int)hash); return error; } @@ -902,45 +939,40 @@ ext2_xattr_cache_find(struct inode *inode, struct ext2_xattr_header *header) { __u32 hash = le32_to_cpu(header->h_hash); struct mb_cache_entry *ce; + struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode); if (!header->h_hash) return NULL; /* never share */ ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); -again: - ce = mb_cache_entry_find_first(ext2_xattr_cache, inode->i_sb->s_bdev, - hash); + + ce = mb_cache_entry_find_first(ea_block_cache, hash); while (ce) { struct buffer_head *bh; - if (IS_ERR(ce)) { - if (PTR_ERR(ce) == -EAGAIN) - goto again; - break; - } - - bh = sb_bread(inode->i_sb, ce->e_block); + bh = sb_bread(inode->i_sb, ce->e_value); if (!bh) { ext2_error(inode->i_sb, "ext2_xattr_cache_find", "inode %ld: block %ld read error", - inode->i_ino, (unsigned long) ce->e_block); + inode->i_ino, (unsigned long) ce->e_value); } else { lock_buffer(bh); if (le32_to_cpu(HDR(bh)->h_refcount) > - EXT2_XATTR_REFCOUNT_MAX) { + EXT2_XATTR_REFCOUNT_MAX) { ea_idebug(inode, "block %ld refcount %d>%d", - (unsigned long) ce->e_block, + (unsigned long) ce->e_value, le32_to_cpu(HDR(bh)->h_refcount), EXT2_XATTR_REFCOUNT_MAX); } else if (!ext2_xattr_cmp(header, HDR(bh))) { ea_bdebug(bh, "b_count=%d", atomic_read(&(bh->b_count))); - mb_cache_entry_release(ce); + mb_cache_entry_touch(ea_block_cache, ce); + mb_cache_entry_put(ea_block_cache, ce); return bh; } unlock_buffer(bh); brelse(bh); } - ce = mb_cache_entry_find_next(ce, inode->i_sb->s_bdev, hash); + ce = mb_cache_entry_find_next(ea_block_cache, ce); } return NULL; } @@ -1013,17 +1045,15 @@ static void ext2_xattr_rehash(struct ext2_xattr_header *header, #undef BLOCK_HASH_SHIFT -int __init -init_ext2_xattr(void) +#define HASH_BUCKET_BITS 10 + +struct mb_cache *ext2_xattr_create_cache(void) { - ext2_xattr_cache = mb_cache_create("ext2_xattr", 6); - if (!ext2_xattr_cache) - return -ENOMEM; - return 0; + return mb_cache_create(HASH_BUCKET_BITS); } -void -exit_ext2_xattr(void) +void ext2_xattr_destroy_cache(struct mb_cache *cache) { - mb_cache_destroy(ext2_xattr_cache); + if (cache) + mb_cache_destroy(cache); } diff --git a/fs/ext2/xattr.h b/fs/ext2/xattr.h index 5e41cccff762..6a4966949047 100644 --- a/fs/ext2/xattr.h +++ b/fs/ext2/xattr.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* File: linux/ext2_xattr.h @@ -38,7 +39,7 @@ struct ext2_xattr_entry { __le32 e_value_block; /* disk block attribute is stored on (n/i) */ __le32 e_value_size; /* size of attribute value */ __le32 e_hash; /* hash value of name and value */ - char e_name[0]; /* attribute name */ + char e_name[]; /* attribute name */ }; #define EXT2_XATTR_PAD_BITS 2 @@ -53,12 +54,12 @@ struct ext2_xattr_entry { #define EXT2_XATTR_SIZE(size) \ (((size) + EXT2_XATTR_ROUND) & ~EXT2_XATTR_ROUND) +struct mb_cache; + # ifdef CONFIG_EXT2_FS_XATTR extern const struct xattr_handler ext2_xattr_user_handler; extern const struct xattr_handler ext2_xattr_trusted_handler; -extern const struct xattr_handler ext2_xattr_acl_access_handler; -extern const struct xattr_handler ext2_xattr_acl_default_handler; extern const struct xattr_handler ext2_xattr_security_handler; extern ssize_t ext2_listxattr(struct dentry *, char *, size_t); @@ -67,12 +68,11 @@ extern int ext2_xattr_get(struct inode *, int, const char *, void *, size_t); extern int ext2_xattr_set(struct inode *, int, const char *, const void *, size_t, int); extern void ext2_xattr_delete_inode(struct inode *); -extern void ext2_xattr_put_super(struct super_block *); -extern int init_ext2_xattr(void); -extern void exit_ext2_xattr(void); +extern struct mb_cache *ext2_xattr_create_cache(void); +extern void ext2_xattr_destroy_cache(struct mb_cache *cache); -extern const struct xattr_handler *ext2_xattr_handlers[]; +extern const struct xattr_handler * const ext2_xattr_handlers[]; # else /* CONFIG_EXT2_FS_XATTR */ @@ -95,23 +95,12 @@ ext2_xattr_delete_inode(struct inode *inode) { } -static inline void -ext2_xattr_put_super(struct super_block *sb) -{ -} - -static inline int -init_ext2_xattr(void) -{ - return 0; -} - -static inline void -exit_ext2_xattr(void) +static inline void ext2_xattr_destroy_cache(struct mb_cache *cache) { } #define ext2_xattr_handlers NULL +#define ext2_listxattr NULL # endif /* CONFIG_EXT2_FS_XATTR */ diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c index cfedb2cb0d8c..db47b8ab153e 100644 --- a/fs/ext2/xattr_security.c +++ b/fs/ext2/xattr_security.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * linux/fs/ext2/xattr_security.c * Handler for storing security labels as extended attributes. @@ -7,43 +8,28 @@ #include <linux/security.h> #include "xattr.h" -static size_t -ext2_xattr_security_list(struct dentry *dentry, char *list, size_t list_size, - const char *name, size_t name_len, int type) -{ - const int prefix_len = XATTR_SECURITY_PREFIX_LEN; - const size_t total_len = prefix_len + name_len + 1; - - if (list && total_len <= list_size) { - memcpy(list, XATTR_SECURITY_PREFIX, prefix_len); - memcpy(list+prefix_len, name, name_len); - list[prefix_len + name_len] = '\0'; - } - return total_len; -} - static int -ext2_xattr_security_get(struct dentry *dentry, const char *name, - void *buffer, size_t size, int type) +ext2_xattr_security_get(const struct xattr_handler *handler, + struct dentry *unused, struct inode *inode, + const char *name, void *buffer, size_t size) { - if (strcmp(name, "") == 0) - return -EINVAL; - return ext2_xattr_get(dentry->d_inode, EXT2_XATTR_INDEX_SECURITY, name, + return ext2_xattr_get(inode, EXT2_XATTR_INDEX_SECURITY, name, buffer, size); } static int -ext2_xattr_security_set(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags, int type) +ext2_xattr_security_set(const struct xattr_handler *handler, + struct mnt_idmap *idmap, + struct dentry *unused, struct inode *inode, + const char *name, const void *value, + size_t size, int flags) { - if (strcmp(name, "") == 0) - return -EINVAL; - return ext2_xattr_set(dentry->d_inode, EXT2_XATTR_INDEX_SECURITY, name, + return ext2_xattr_set(inode, EXT2_XATTR_INDEX_SECURITY, name, value, size, flags); } -int ext2_initxattrs(struct inode *inode, const struct xattr *xattr_array, - void *fs_info) +static int ext2_initxattrs(struct inode *inode, const struct xattr *xattr_array, + void *fs_info) { const struct xattr *xattr; int err = 0; @@ -68,7 +54,6 @@ ext2_init_security(struct inode *inode, struct inode *dir, const struct xattr_handler ext2_xattr_security_handler = { .prefix = XATTR_SECURITY_PREFIX, - .list = ext2_xattr_security_list, .get = ext2_xattr_security_get, .set = ext2_xattr_security_set, }; diff --git a/fs/ext2/xattr_trusted.c b/fs/ext2/xattr_trusted.c index 7e192574c001..995f931228ce 100644 --- a/fs/ext2/xattr_trusted.c +++ b/fs/ext2/xattr_trusted.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * linux/fs/ext2/xattr_trusted.c * Handler for trusted extended attributes. @@ -8,41 +9,29 @@ #include "ext2.h" #include "xattr.h" -static size_t -ext2_xattr_trusted_list(struct dentry *dentry, char *list, size_t list_size, - const char *name, size_t name_len, int type) +static bool +ext2_xattr_trusted_list(struct dentry *dentry) { - const int prefix_len = XATTR_TRUSTED_PREFIX_LEN; - const size_t total_len = prefix_len + name_len + 1; - - if (!capable(CAP_SYS_ADMIN)) - return 0; - - if (list && total_len <= list_size) { - memcpy(list, XATTR_TRUSTED_PREFIX, prefix_len); - memcpy(list+prefix_len, name, name_len); - list[prefix_len + name_len] = '\0'; - } - return total_len; + return capable(CAP_SYS_ADMIN); } static int -ext2_xattr_trusted_get(struct dentry *dentry, const char *name, - void *buffer, size_t size, int type) +ext2_xattr_trusted_get(const struct xattr_handler *handler, + struct dentry *unused, struct inode *inode, + const char *name, void *buffer, size_t size) { - if (strcmp(name, "") == 0) - return -EINVAL; - return ext2_xattr_get(dentry->d_inode, EXT2_XATTR_INDEX_TRUSTED, name, + return ext2_xattr_get(inode, EXT2_XATTR_INDEX_TRUSTED, name, buffer, size); } static int -ext2_xattr_trusted_set(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags, int type) +ext2_xattr_trusted_set(const struct xattr_handler *handler, + struct mnt_idmap *idmap, + struct dentry *unused, struct inode *inode, + const char *name, const void *value, + size_t size, int flags) { - if (strcmp(name, "") == 0) - return -EINVAL; - return ext2_xattr_set(dentry->d_inode, EXT2_XATTR_INDEX_TRUSTED, name, + return ext2_xattr_set(inode, EXT2_XATTR_INDEX_TRUSTED, name, value, size, flags); } diff --git a/fs/ext2/xattr_user.c b/fs/ext2/xattr_user.c index f470e44c4b8d..dd1507231081 100644 --- a/fs/ext2/xattr_user.c +++ b/fs/ext2/xattr_user.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * linux/fs/ext2/xattr_user.c * Handler for extended user attributes. @@ -10,46 +11,34 @@ #include "ext2.h" #include "xattr.h" -static size_t -ext2_xattr_user_list(struct dentry *dentry, char *list, size_t list_size, - const char *name, size_t name_len, int type) +static bool +ext2_xattr_user_list(struct dentry *dentry) { - const size_t prefix_len = XATTR_USER_PREFIX_LEN; - const size_t total_len = prefix_len + name_len + 1; - - if (!test_opt(dentry->d_sb, XATTR_USER)) - return 0; - - if (list && total_len <= list_size) { - memcpy(list, XATTR_USER_PREFIX, prefix_len); - memcpy(list+prefix_len, name, name_len); - list[prefix_len + name_len] = '\0'; - } - return total_len; + return test_opt(dentry->d_sb, XATTR_USER); } static int -ext2_xattr_user_get(struct dentry *dentry, const char *name, - void *buffer, size_t size, int type) +ext2_xattr_user_get(const struct xattr_handler *handler, + struct dentry *unused, struct inode *inode, + const char *name, void *buffer, size_t size) { - if (strcmp(name, "") == 0) - return -EINVAL; - if (!test_opt(dentry->d_sb, XATTR_USER)) + if (!test_opt(inode->i_sb, XATTR_USER)) return -EOPNOTSUPP; - return ext2_xattr_get(dentry->d_inode, EXT2_XATTR_INDEX_USER, + return ext2_xattr_get(inode, EXT2_XATTR_INDEX_USER, name, buffer, size); } static int -ext2_xattr_user_set(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags, int type) +ext2_xattr_user_set(const struct xattr_handler *handler, + struct mnt_idmap *idmap, + struct dentry *unused, struct inode *inode, + const char *name, const void *value, + size_t size, int flags) { - if (strcmp(name, "") == 0) - return -EINVAL; - if (!test_opt(dentry->d_sb, XATTR_USER)) + if (!test_opt(inode->i_sb, XATTR_USER)) return -EOPNOTSUPP; - return ext2_xattr_set(dentry->d_inode, EXT2_XATTR_INDEX_USER, + return ext2_xattr_set(inode, EXT2_XATTR_INDEX_USER, name, value, size, flags); } diff --git a/fs/ext2/xip.c b/fs/ext2/xip.c deleted file mode 100644 index 1c3312858fcf..000000000000 --- a/fs/ext2/xip.c +++ /dev/null @@ -1,90 +0,0 @@ -/* - * linux/fs/ext2/xip.c - * - * Copyright (C) 2005 IBM Corporation - * Author: Carsten Otte (cotte@de.ibm.com) - */ - -#include <linux/mm.h> -#include <linux/fs.h> -#include <linux/genhd.h> -#include <linux/buffer_head.h> -#include <linux/blkdev.h> -#include "ext2.h" -#include "xip.h" - -static inline int -__inode_direct_access(struct inode *inode, sector_t block, - void **kaddr, unsigned long *pfn) -{ - struct block_device *bdev = inode->i_sb->s_bdev; - const struct block_device_operations *ops = bdev->bd_disk->fops; - sector_t sector; - - sector = block * (PAGE_SIZE / 512); /* ext2 block to bdev sector */ - - BUG_ON(!ops->direct_access); - return ops->direct_access(bdev, sector, kaddr, pfn); -} - -static inline int -__ext2_get_block(struct inode *inode, pgoff_t pgoff, int create, - sector_t *result) -{ - struct buffer_head tmp; - int rc; - - memset(&tmp, 0, sizeof(struct buffer_head)); - rc = ext2_get_block(inode, pgoff, &tmp, create); - *result = tmp.b_blocknr; - - /* did we get a sparse block (hole in the file)? */ - if (!tmp.b_blocknr && !rc) { - BUG_ON(create); - rc = -ENODATA; - } - - return rc; -} - -int -ext2_clear_xip_target(struct inode *inode, sector_t block) -{ - void *kaddr; - unsigned long pfn; - int rc; - - rc = __inode_direct_access(inode, block, &kaddr, &pfn); - if (!rc) - clear_page(kaddr); - return rc; -} - -void ext2_xip_verify_sb(struct super_block *sb) -{ - struct ext2_sb_info *sbi = EXT2_SB(sb); - - if ((sbi->s_mount_opt & EXT2_MOUNT_XIP) && - !sb->s_bdev->bd_disk->fops->direct_access) { - sbi->s_mount_opt &= (~EXT2_MOUNT_XIP); - ext2_msg(sb, KERN_WARNING, - "warning: ignoring xip option - " - "not supported by bdev"); - } -} - -int ext2_get_xip_mem(struct address_space *mapping, pgoff_t pgoff, int create, - void **kmem, unsigned long *pfn) -{ - int rc; - sector_t block; - - /* first, retrieve the sector number */ - rc = __ext2_get_block(mapping->host, pgoff, create, &block); - if (rc) - return rc; - - /* retrieve address of the target data */ - rc = __inode_direct_access(mapping->host, block, kmem, pfn); - return rc; -} diff --git a/fs/ext2/xip.h b/fs/ext2/xip.h deleted file mode 100644 index 18b34d2f31b3..000000000000 --- a/fs/ext2/xip.h +++ /dev/null @@ -1,26 +0,0 @@ -/* - * linux/fs/ext2/xip.h - * - * Copyright (C) 2005 IBM Corporation - * Author: Carsten Otte (cotte@de.ibm.com) - */ - -#ifdef CONFIG_EXT2_FS_XIP -extern void ext2_xip_verify_sb (struct super_block *); -extern int ext2_clear_xip_target (struct inode *, sector_t); - -static inline int ext2_use_xip (struct super_block *sb) -{ - struct ext2_sb_info *sbi = EXT2_SB(sb); - return (sbi->s_mount_opt & EXT2_MOUNT_XIP); -} -int ext2_get_xip_mem(struct address_space *, pgoff_t, int, - void **, unsigned long *); -#define mapping_is_xip(map) unlikely(map->a_ops->get_xip_mem) -#else -#define mapping_is_xip(map) 0 -#define ext2_xip_verify_sb(sb) do { } while (0) -#define ext2_use_xip(sb) 0 -#define ext2_clear_xip_target(inode, chain) 0 -#define ext2_get_xip_mem NULL -#endif |
