diff options
Diffstat (limited to 'fs/ext4/namei.c')
| -rw-r--r-- | fs/ext4/namei.c | 2006 |
1 files changed, 1191 insertions, 815 deletions
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 2b928eb07fa2..c4b5e252af0e 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -35,6 +35,7 @@ #include <linux/buffer_head.h> #include <linux/bio.h> #include <linux/iversion.h> +#include <linux/unicode.h> #include "ext4.h" #include "ext4_jbd2.h" @@ -53,6 +54,7 @@ static struct buffer_head *ext4_append(handle_t *handle, struct inode *inode, ext4_lblk_t *block) { + struct ext4_map_blocks map; struct buffer_head *bh; int err; @@ -62,27 +64,58 @@ static struct buffer_head *ext4_append(handle_t *handle, return ERR_PTR(-ENOSPC); *block = inode->i_size >> inode->i_sb->s_blocksize_bits; + map.m_lblk = *block; + map.m_len = 1; + + /* + * We're appending new directory block. Make sure the block is not + * allocated yet, otherwise we will end up corrupting the + * directory. + */ + err = ext4_map_blocks(NULL, inode, &map, 0); + if (err < 0) + return ERR_PTR(err); + if (err) { + EXT4_ERROR_INODE(inode, "Logical block already allocated"); + return ERR_PTR(-EFSCORRUPTED); + } bh = ext4_bread(handle, inode, *block, EXT4_GET_BLOCKS_CREATE); if (IS_ERR(bh)) return bh; inode->i_size += inode->i_sb->s_blocksize; EXT4_I(inode)->i_disksize = inode->i_size; + err = ext4_mark_inode_dirty(handle, inode); + if (err) + goto out; BUFFER_TRACE(bh, "get_write_access"); - err = ext4_journal_get_write_access(handle, bh); - if (err) { - brelse(bh); - ext4_std_error(inode->i_sb, err); - return ERR_PTR(err); - } + err = ext4_journal_get_write_access(handle, inode->i_sb, bh, + EXT4_JTR_NONE); + if (err) + goto out; return bh; + +out: + brelse(bh); + ext4_std_error(inode->i_sb, err); + return ERR_PTR(err); } static int ext4_dx_csum_verify(struct inode *inode, struct ext4_dir_entry *dirent); +/* + * Hints to ext4_read_dirblock regarding whether we expect a directory + * block being read to be an index block, or a block containing + * directory entries (and if the latter, whether it was found via a + * logical block in an htree index block). This is used to control + * what sort of sanity checkinig ext4_read_dirblock() will do on the + * directory block read from the storage device. EITHER will means + * the caller doesn't know what kind of directory block will be read, + * so no specific verification will be done. + */ typedef enum { - EITHER, INDEX, DIRENT + EITHER, INDEX, DIRENT, DIRENT_HTREE } dirblock_type_t; #define ext4_read_dirblock(inode, block, type) \ @@ -98,7 +131,17 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode, struct ext4_dir_entry *dirent; int is_dx_block = 0; - bh = ext4_bread(NULL, inode, block, 0); + if (block >= inode->i_size >> inode->i_blkbits) { + ext4_error_inode(inode, func, line, block, + "Attempting to read directory block (%u) that is past i_size (%llu)", + block, inode->i_size); + return ERR_PTR(-EFSCORRUPTED); + } + + if (ext4_simulate_fail(inode->i_sb, EXT4_SIM_DIRBLOCK_EIO)) + bh = ERR_PTR(-EIO); + else + bh = ext4_bread(NULL, inode, block, 0); if (IS_ERR(bh)) { __ext4_warning(inode->i_sb, func, line, "inode #%lu: lblock %lu: comm %s: " @@ -108,11 +151,15 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode, return bh; } - if (!bh) { + /* The first directory block must not be a hole. */ + if (!bh && (type == INDEX || type == DIRENT_HTREE || block == 0)) { ext4_error_inode(inode, func, line, block, - "Directory hole found"); + "Directory hole found for htree %s block %u", + (type == INDEX) ? "index" : "leaf", block); return ERR_PTR(-EFSCORRUPTED); } + if (!bh) + return NULL; dirent = (struct ext4_dir_entry *) bh->b_data; /* Determine whether or not we have an index block */ if (is_dx(inode)) { @@ -129,7 +176,7 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode, brelse(bh); return ERR_PTR(-EFSCORRUPTED); } - if (!ext4_has_metadata_csum(inode->i_sb) || + if (!ext4_has_feature_metadata_csum(inode->i_sb) || buffer_verified(bh)) return bh; @@ -139,21 +186,25 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode, * caller is sure it should be an index block. */ if (is_dx_block && type == INDEX) { - if (ext4_dx_csum_verify(inode, dirent)) + if (ext4_dx_csum_verify(inode, dirent) && + !ext4_simulate_fail(inode->i_sb, EXT4_SIM_DIRBLOCK_CRC)) set_buffer_verified(bh); else { - ext4_error_inode(inode, func, line, block, - "Directory index failed checksum"); + ext4_error_inode_err(inode, func, line, block, + EFSBADCRC, + "Directory index failed checksum"); brelse(bh); return ERR_PTR(-EFSBADCRC); } } if (!is_dx_block) { - if (ext4_dirent_csum_verify(inode, dirent)) + if (ext4_dirblock_csum_verify(inode, bh) && + !ext4_simulate_fail(inode->i_sb, EXT4_SIM_DIRBLOCK_CRC)) set_buffer_verified(bh); else { - ext4_error_inode(inode, func, line, block, - "Directory block failed checksum"); + ext4_error_inode_err(inode, func, line, block, + EFSBADCRC, + "Directory block failed checksum"); brelse(bh); return ERR_PTR(-EFSBADCRC); } @@ -161,10 +212,6 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode, return bh; } -#ifndef assert -#define assert(test) J_ASSERT(test) -#endif - #ifdef DX_DEBUG #define dxtrace(command) command #else @@ -212,13 +259,13 @@ struct dx_root u8 unused_flags; } info; - struct dx_entry entries[0]; + struct dx_entry entries[]; }; struct dx_node { struct fake_dirent fake; - struct dx_entry entries[0]; + struct dx_entry entries[]; }; @@ -244,34 +291,6 @@ struct dx_tail { __le32 dt_checksum; /* crc32c(uuid+inum+dirblock) */ }; -static inline ext4_lblk_t dx_get_block(struct dx_entry *entry); -static void dx_set_block(struct dx_entry *entry, ext4_lblk_t value); -static inline unsigned dx_get_hash(struct dx_entry *entry); -static void dx_set_hash(struct dx_entry *entry, unsigned value); -static unsigned dx_get_count(struct dx_entry *entries); -static unsigned dx_get_limit(struct dx_entry *entries); -static void dx_set_count(struct dx_entry *entries, unsigned value); -static void dx_set_limit(struct dx_entry *entries, unsigned value); -static unsigned dx_root_limit(struct inode *dir, unsigned infosize); -static unsigned dx_node_limit(struct inode *dir); -static struct dx_frame *dx_probe(struct ext4_filename *fname, - struct inode *dir, - struct dx_hash_info *hinfo, - struct dx_frame *frame); -static void dx_release(struct dx_frame *frames); -static int dx_make_map(struct inode *dir, struct ext4_dir_entry_2 *de, - unsigned blocksize, struct dx_hash_info *hinfo, - struct dx_map_entry map[]); -static void dx_sort_map(struct dx_map_entry *map, unsigned count); -static struct ext4_dir_entry_2 *dx_move_dirents(char *from, char *to, - struct dx_map_entry *offsets, int count, unsigned blocksize); -static struct ext4_dir_entry_2* dx_pack_dirents(char *base, unsigned blocksize); -static void dx_insert_block(struct dx_frame *frame, - u32 hash, ext4_lblk_t block); -static int ext4_htree_next_block(struct inode *dir, __u32 hash, - struct dx_frame *frame, - struct dx_frame *frames, - __u32 *start_hash); static struct buffer_head * ext4_dx_find_entry(struct inode *dir, struct ext4_filename *fname, struct ext4_dir_entry_2 **res_dir); @@ -279,9 +298,11 @@ static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname, struct inode *dir, struct inode *inode); /* checksumming functions */ -void initialize_dirent_tail(struct ext4_dir_entry_tail *t, - unsigned int blocksize) +void ext4_initialize_dirent_tail(struct buffer_head *bh, + unsigned int blocksize) { + struct ext4_dir_entry_tail *t = EXT4_DIRENT_TAIL(bh->b_data, blocksize); + memset(t, 0, sizeof(struct ext4_dir_entry_tail)); t->det_rec_len = ext4_rec_len_to_disk( sizeof(struct ext4_dir_entry_tail), blocksize); @@ -290,31 +311,32 @@ void initialize_dirent_tail(struct ext4_dir_entry_tail *t, /* Walk through a dirent block to find a checksum "dirent" at the tail */ static struct ext4_dir_entry_tail *get_dirent_tail(struct inode *inode, - struct ext4_dir_entry *de) + struct buffer_head *bh) { struct ext4_dir_entry_tail *t; + int blocksize = EXT4_BLOCK_SIZE(inode->i_sb); #ifdef PARANOID struct ext4_dir_entry *d, *top; - d = de; - top = (struct ext4_dir_entry *)(((void *)de) + - (EXT4_BLOCK_SIZE(inode->i_sb) - - sizeof(struct ext4_dir_entry_tail))); - while (d < top && d->rec_len) + d = (struct ext4_dir_entry *)bh->b_data; + top = (struct ext4_dir_entry *)(bh->b_data + + (blocksize - sizeof(struct ext4_dir_entry_tail))); + while (d < top && ext4_rec_len_from_disk(d->rec_len, blocksize)) d = (struct ext4_dir_entry *)(((void *)d) + - le16_to_cpu(d->rec_len)); + ext4_rec_len_from_disk(d->rec_len, blocksize)); if (d != top) return NULL; t = (struct ext4_dir_entry_tail *)d; #else - t = EXT4_DIRENT_TAIL(de, EXT4_BLOCK_SIZE(inode->i_sb)); + t = EXT4_DIRENT_TAIL(bh->b_data, EXT4_BLOCK_SIZE(inode->i_sb)); #endif if (t->det_reserved_zero1 || - le16_to_cpu(t->det_rec_len) != sizeof(struct ext4_dir_entry_tail) || + (ext4_rec_len_from_disk(t->det_rec_len, blocksize) != + sizeof(struct ext4_dir_entry_tail)) || t->det_reserved_zero2 || t->det_reserved_ft != EXT4_FT_DIR_CSUM) return NULL; @@ -322,14 +344,12 @@ static struct ext4_dir_entry_tail *get_dirent_tail(struct inode *inode, return t; } -static __le32 ext4_dirent_csum(struct inode *inode, - struct ext4_dir_entry *dirent, int size) +static __le32 ext4_dirblock_csum(struct inode *inode, void *dirent, int size) { - struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); struct ext4_inode_info *ei = EXT4_I(inode); __u32 csum; - csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)dirent, size); + csum = ext4_chksum(ei->i_csum_seed, (__u8 *)dirent, size); return cpu_to_le32(csum); } @@ -343,49 +363,49 @@ static void __warn_no_space_for_csum(struct inode *inode, const char *func, "No space for directory leaf checksum. Please run e2fsck -D."); } -int ext4_dirent_csum_verify(struct inode *inode, struct ext4_dir_entry *dirent) +int ext4_dirblock_csum_verify(struct inode *inode, struct buffer_head *bh) { struct ext4_dir_entry_tail *t; - if (!ext4_has_metadata_csum(inode->i_sb)) + if (!ext4_has_feature_metadata_csum(inode->i_sb)) return 1; - t = get_dirent_tail(inode, dirent); + t = get_dirent_tail(inode, bh); if (!t) { warn_no_space_for_csum(inode); return 0; } - if (t->det_checksum != ext4_dirent_csum(inode, dirent, - (void *)t - (void *)dirent)) + if (t->det_checksum != ext4_dirblock_csum(inode, bh->b_data, + (char *)t - bh->b_data)) return 0; return 1; } -static void ext4_dirent_csum_set(struct inode *inode, - struct ext4_dir_entry *dirent) +static void ext4_dirblock_csum_set(struct inode *inode, + struct buffer_head *bh) { struct ext4_dir_entry_tail *t; - if (!ext4_has_metadata_csum(inode->i_sb)) + if (!ext4_has_feature_metadata_csum(inode->i_sb)) return; - t = get_dirent_tail(inode, dirent); + t = get_dirent_tail(inode, bh); if (!t) { warn_no_space_for_csum(inode); return; } - t->det_checksum = ext4_dirent_csum(inode, dirent, - (void *)t - (void *)dirent); + t->det_checksum = ext4_dirblock_csum(inode, bh->b_data, + (char *)t - bh->b_data); } -int ext4_handle_dirty_dirent_node(handle_t *handle, - struct inode *inode, - struct buffer_head *bh) +int ext4_handle_dirty_dirblock(handle_t *handle, + struct inode *inode, + struct buffer_head *bh) { - ext4_dirent_csum_set(inode, (struct ext4_dir_entry *)bh->b_data); + ext4_dirblock_csum_set(inode, bh); return ext4_handle_dirty_metadata(handle, inode, bh); } @@ -396,13 +416,14 @@ static struct dx_countlimit *get_dx_countlimit(struct inode *inode, struct ext4_dir_entry *dp; struct dx_root_info *root; int count_offset; + int blocksize = EXT4_BLOCK_SIZE(inode->i_sb); + unsigned int rlen = ext4_rec_len_from_disk(dirent->rec_len, blocksize); - if (le16_to_cpu(dirent->rec_len) == EXT4_BLOCK_SIZE(inode->i_sb)) + if (rlen == blocksize) count_offset = 8; - else if (le16_to_cpu(dirent->rec_len) == 12) { + else if (rlen == 12) { dp = (struct ext4_dir_entry *)(((void *)dirent) + 12); - if (le16_to_cpu(dp->rec_len) != - EXT4_BLOCK_SIZE(inode->i_sb) - 12) + if (ext4_rec_len_from_disk(dp->rec_len, blocksize) != blocksize - 12) return NULL; root = (struct dx_root_info *)(((void *)dp + 12)); if (root->reserved_zero || @@ -420,7 +441,6 @@ static struct dx_countlimit *get_dx_countlimit(struct inode *inode, static __le32 ext4_dx_csum(struct inode *inode, struct ext4_dir_entry *dirent, int count_offset, int count, struct dx_tail *t) { - struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); struct ext4_inode_info *ei = EXT4_I(inode); __u32 csum; int size; @@ -428,9 +448,9 @@ static __le32 ext4_dx_csum(struct inode *inode, struct ext4_dir_entry *dirent, int offset = offsetof(struct dx_tail, dt_checksum); size = count_offset + (count * sizeof(struct dx_entry)); - csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)dirent, size); - csum = ext4_chksum(sbi, csum, (__u8 *)t, offset); - csum = ext4_chksum(sbi, csum, (__u8 *)&dummy_csum, sizeof(dummy_csum)); + csum = ext4_chksum(ei->i_csum_seed, (__u8 *)dirent, size); + csum = ext4_chksum(csum, (__u8 *)t, offset); + csum = ext4_chksum(csum, (__u8 *)&dummy_csum, sizeof(dummy_csum)); return cpu_to_le32(csum); } @@ -442,7 +462,7 @@ static int ext4_dx_csum_verify(struct inode *inode, struct dx_tail *t; int count_offset, limit, count; - if (!ext4_has_metadata_csum(inode->i_sb)) + if (!ext4_has_feature_metadata_csum(inode->i_sb)) return 1; c = get_dx_countlimit(inode, dirent, &count_offset); @@ -471,7 +491,7 @@ static void ext4_dx_csum_set(struct inode *inode, struct ext4_dir_entry *dirent) struct dx_tail *t; int count_offset, limit, count; - if (!ext4_has_metadata_csum(inode->i_sb)) + if (!ext4_has_feature_metadata_csum(inode->i_sb)) return; c = get_dx_countlimit(inode, dirent, &count_offset); @@ -556,19 +576,21 @@ static inline void dx_set_limit(struct dx_entry *entries, unsigned value) static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize) { - unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) - - EXT4_DIR_REC_LEN(2) - infosize; + unsigned int entry_space = dir->i_sb->s_blocksize - + ext4_dir_rec_len(1, NULL) - + ext4_dir_rec_len(2, NULL) - infosize; - if (ext4_has_metadata_csum(dir->i_sb)) + if (ext4_has_feature_metadata_csum(dir->i_sb)) entry_space -= sizeof(struct dx_tail); return entry_space / sizeof(struct dx_entry); } static inline unsigned dx_node_limit(struct inode *dir) { - unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0); + unsigned int entry_space = dir->i_sb->s_blocksize - + ext4_dir_rec_len(0, dir); - if (ext4_has_metadata_csum(dir->i_sb)) + if (ext4_has_feature_metadata_csum(dir->i_sb)) entry_space -= sizeof(struct dx_tail); return entry_space / sizeof(struct dx_entry); } @@ -612,7 +634,7 @@ static struct stats dx_show_leaf(struct inode *dir, { if (show_names) { -#ifdef CONFIG_EXT4_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION int len; char *name; struct fscrypt_str fname_crypto_str = @@ -621,15 +643,9 @@ static struct stats dx_show_leaf(struct inode *dir, name = de->name; len = de->name_len; - if (ext4_encrypted_inode(dir)) - res = fscrypt_get_encryption_info(dir); - if (res) { - printk(KERN_WARNING "Error setting up" - " fname crypto: %d\n", res); - } - if (!fscrypt_has_encryption_key(dir)) { + if (!IS_ENCRYPTED(dir)) { /* Directory is not encrypted */ - ext4fs_dirhash(de->name, + (void) ext4fs_dirhash(dir, de->name, de->name_len, &h); printk("%*.s:(U)%x.%u ", len, name, h.hash, @@ -641,8 +657,7 @@ static struct stats dx_show_leaf(struct inode *dir, /* Directory is encrypted */ res = fscrypt_fname_alloc_buffer( - dir, len, - &fname_crypto_str); + len, &fname_crypto_str); if (res) printk(KERN_WARNING "Error " "allocating crypto " @@ -662,8 +677,12 @@ static struct stats dx_show_leaf(struct inode *dir, name = fname_crypto_str.name; len = fname_crypto_str.len; } - ext4fs_dirhash(de->name, de->name_len, - &h); + if (IS_CASEFOLDED(dir)) + h.hash = EXT4_DIRENT_HASH(de); + else + (void) ext4fs_dirhash(dir, + de->name, + de->name_len, &h); printk("%*.s:(E)%x.%u ", len, name, h.hash, (unsigned) ((char *) de - base)); @@ -673,12 +692,13 @@ static struct stats dx_show_leaf(struct inode *dir, #else int len = de->name_len; char *name = de->name; - ext4fs_dirhash(de->name, de->name_len, &h); + (void) ext4fs_dirhash(dir, de->name, + de->name_len, &h); printk("%*.s:%x.%u ", len, name, h.hash, (unsigned) ((char *) de - base)); #endif } - space += EXT4_DIR_REC_LEN(de->name_len); + space += ext4_dir_rec_len(de->name_len, dir); names++; } de = ext4_next_entry(de, size); @@ -720,6 +740,29 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir, (space/bcount)*100/blocksize); return (struct stats) { names, space, bcount}; } + +/* + * Linear search cross check + */ +static inline void htree_rep_invariant_check(struct dx_entry *at, + struct dx_entry *target, + u32 hash, unsigned int n) +{ + while (n--) { + dxtrace(printk(KERN_CONT ",")); + if (dx_get_hash(++at) > hash) { + at--; + break; + } + } + ASSERT(at == target - 1); +} +#else /* DX_DEBUG */ +static inline void htree_rep_invariant_check(struct dx_entry *at, + struct dx_entry *target, + u32 hash, unsigned int n) +{ +} #endif /* DX_DEBUG */ /* @@ -735,12 +778,14 @@ static struct dx_frame * dx_probe(struct ext4_filename *fname, struct inode *dir, struct dx_hash_info *hinfo, struct dx_frame *frame_in) { - unsigned count, indirect; + unsigned count, indirect, level, i; struct dx_entry *at, *entries, *p, *q, *m; struct dx_root *root; struct dx_frame *frame = frame_in; struct dx_frame *ret_err = ERR_PTR(ERR_BAD_DX_DIR); u32 hash; + ext4_lblk_t block; + ext4_lblk_t blocks[EXT4_HTREE_LEVEL]; memset(frame_in, 0, EXT4_HTREE_LEVEL * sizeof(frame_in[0])); frame->bh = ext4_read_dirblock(dir, 0, INDEX); @@ -750,19 +795,41 @@ dx_probe(struct ext4_filename *fname, struct inode *dir, root = (struct dx_root *) frame->bh->b_data; if (root->info.hash_version != DX_HASH_TEA && root->info.hash_version != DX_HASH_HALF_MD4 && - root->info.hash_version != DX_HASH_LEGACY) { + root->info.hash_version != DX_HASH_LEGACY && + root->info.hash_version != DX_HASH_SIPHASH) { ext4_warning_inode(dir, "Unrecognised inode hash code %u", root->info.hash_version); goto fail; } + if (ext4_hash_in_dirent(dir)) { + if (root->info.hash_version != DX_HASH_SIPHASH) { + ext4_warning_inode(dir, + "Hash in dirent, but hash is not SIPHASH"); + goto fail; + } + } else { + if (root->info.hash_version == DX_HASH_SIPHASH) { + ext4_warning_inode(dir, + "Hash code is SIPHASH, but hash not in dirent"); + goto fail; + } + } if (fname) hinfo = &fname->hinfo; hinfo->hash_version = root->info.hash_version; if (hinfo->hash_version <= DX_HASH_TEA) hinfo->hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned; hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed; - if (fname && fname_name(fname)) - ext4fs_dirhash(fname_name(fname), fname_len(fname), hinfo); + /* hash is already computed for encrypted casefolded directory */ + if (fname && fname_name(fname) && + !(IS_ENCRYPTED(dir) && IS_CASEFOLDED(dir))) { + int ret = ext4fs_dirhash(dir, fname_name(fname), + fname_len(fname), hinfo); + if (ret < 0) { + ret_err = ERR_PTR(ret); + goto fail; + } + } hash = hinfo->hash; if (root->info.unused_flags & 1) { @@ -796,6 +863,8 @@ dx_probe(struct ext4_filename *fname, struct inode *dir, } dxtrace(printk("Look up %x", hash)); + level = 0; + blocks[0] = 0; while (1) { count = dx_get_count(entries); if (!count || count > dx_get_limit(entries)) { @@ -816,20 +885,7 @@ dx_probe(struct ext4_filename *fname, struct inode *dir, p = m + 1; } - if (0) { // linear search cross check - unsigned n = count - 1; - at = entries; - while (n--) - { - dxtrace(printk(KERN_CONT ",")); - if (dx_get_hash(++at) > hash) - { - at--; - break; - } - } - assert (at == p - 1); - } + htree_rep_invariant_check(entries, p, hash, count - 1); at = p - 1; dxtrace(printk(KERN_CONT " %x->%u\n", @@ -837,15 +893,27 @@ dx_probe(struct ext4_filename *fname, struct inode *dir, dx_get_block(at))); frame->entries = entries; frame->at = at; - if (!indirect--) + + block = dx_get_block(at); + for (i = 0; i <= level; i++) { + if (blocks[i] == block) { + ext4_warning_inode(dir, + "dx entry: tree cycle block %u points back to block %u", + blocks[level], block); + goto fail; + } + } + if (++level > indirect) return frame; + blocks[level] = block; frame++; - frame->bh = ext4_read_dirblock(dir, dx_get_block(at), INDEX); + frame->bh = ext4_read_dirblock(dir, block, INDEX); if (IS_ERR(frame->bh)) { ret_err = (struct dx_frame *) frame->bh; frame->bh = NULL; goto fail; } + entries = ((struct dx_node *) frame->bh->b_data)->entries; if (dx_get_limit(entries) != dx_node_limit(dir)) { @@ -871,12 +939,15 @@ static void dx_release(struct dx_frame *frames) { struct dx_root_info *info; int i; + unsigned int indirect_levels; if (frames[0].bh == NULL) return; info = &((struct dx_root *)frames[0].bh->b_data)->info; - for (i = 0; i <= info->indirect_levels; i++) { + /* save local copy, "info" may be freed after brelse() */ + indirect_levels = info->indirect_levels; + for (i = 0; i <= indirect_levels; i++) { if (frames[i].bh == NULL) break; brelse(frames[i].bh); @@ -932,7 +1003,7 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash, * If the hash is 1, then continue only if the next page has a * continuation hash of any value. This is used for readdir * handling. Otherwise, check to see if the hash matches the - * desired contiuation hash. If it doesn't, return since + * desired continuation hash. If it doesn't, return since * there's no point to read in the successive index pages. */ bhash = dx_get_hash(p->at); @@ -973,49 +1044,66 @@ static int htree_dirblock_to_tree(struct file *dir_file, struct ext4_dir_entry_2 *de, *top; int err = 0, count = 0; struct fscrypt_str fname_crypto_str = FSTR_INIT(NULL, 0), tmp_str; + int csum = ext4_has_feature_metadata_csum(dir->i_sb); dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n", (unsigned long)block)); - bh = ext4_read_dirblock(dir, block, DIRENT); + bh = ext4_read_dirblock(dir, block, DIRENT_HTREE); if (IS_ERR(bh)) return PTR_ERR(bh); de = (struct ext4_dir_entry_2 *) bh->b_data; + /* csum entries are not larger in the casefolded encrypted case */ top = (struct ext4_dir_entry_2 *) ((char *) de + dir->i_sb->s_blocksize - - EXT4_DIR_REC_LEN(0)); -#ifdef CONFIG_EXT4_FS_ENCRYPTION + ext4_dir_rec_len(0, + csum ? NULL : dir)); /* Check if the directory is encrypted */ - if (ext4_encrypted_inode(dir)) { - err = fscrypt_get_encryption_info(dir); + if (IS_ENCRYPTED(dir)) { + err = fscrypt_prepare_readdir(dir); if (err < 0) { brelse(bh); return err; } - err = fscrypt_fname_alloc_buffer(dir, EXT4_NAME_LEN, - &fname_crypto_str); + err = fscrypt_fname_alloc_buffer(EXT4_NAME_LEN, + &fname_crypto_str); if (err < 0) { brelse(bh); return err; } } -#endif + for (; de < top; de = ext4_next_entry(de, dir->i_sb->s_blocksize)) { if (ext4_check_dir_entry(dir, NULL, de, bh, bh->b_data, bh->b_size, - (block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb)) + EXT4_LBLK_TO_B(dir, block) + ((char *)de - bh->b_data))) { /* silently ignore the rest of the block */ break; } - ext4fs_dirhash(de->name, de->name_len, hinfo); + if (ext4_hash_in_dirent(dir)) { + if (de->name_len && de->inode) { + hinfo->hash = EXT4_DIRENT_HASH(de); + hinfo->minor_hash = EXT4_DIRENT_MINOR_HASH(de); + } else { + hinfo->hash = 0; + hinfo->minor_hash = 0; + } + } else { + err = ext4fs_dirhash(dir, de->name, + de->name_len, hinfo); + if (err < 0) { + count = err; + goto errout; + } + } if ((hinfo->hash < start_hash) || ((hinfo->hash == start_hash) && (hinfo->minor_hash < start_minor_hash))) continue; if (de->inode == 0) continue; - if (!ext4_encrypted_inode(dir)) { + if (!IS_ENCRYPTED(dir)) { tmp_str.name = de->name; tmp_str.len = de->name_len; err = ext4_htree_store_dirent(dir_file, @@ -1047,9 +1135,7 @@ static int htree_dirblock_to_tree(struct file *dir_file, } errout: brelse(bh); -#ifdef CONFIG_EXT4_FS_ENCRYPTION fscrypt_fname_free_buffer(&fname_crypto_str); -#endif return count; } @@ -1079,17 +1165,21 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, start_hash, start_minor_hash)); dir = file_inode(dir_file); if (!(ext4_test_inode_flag(dir, EXT4_INODE_INDEX))) { - hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version; + if (ext4_hash_in_dirent(dir)) + hinfo.hash_version = DX_HASH_SIPHASH; + else + hinfo.hash_version = + EXT4_SB(dir->i_sb)->s_def_hash_version; if (hinfo.hash_version <= DX_HASH_TEA) hinfo.hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned; hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed; if (ext4_has_inline_data(dir)) { int has_inline_data = 1; - count = htree_inlinedir_to_tree(dir_file, dir, 0, - &hinfo, start_hash, - start_minor_hash, - &has_inline_data); + count = ext4_inlinedir_to_tree(dir_file, dir, 0, + &hinfo, start_hash, + start_minor_hash, + &has_inline_data); if (has_inline_data) { *next_hash = ~0; return count; @@ -1187,25 +1277,41 @@ static inline int search_dirblock(struct buffer_head *bh, * Create map of hash values, offsets, and sizes, stored at end of block. * Returns number of entries mapped. */ -static int dx_make_map(struct inode *dir, struct ext4_dir_entry_2 *de, - unsigned blocksize, struct dx_hash_info *hinfo, +static int dx_make_map(struct inode *dir, struct buffer_head *bh, + struct dx_hash_info *hinfo, struct dx_map_entry *map_tail) { int count = 0; - char *base = (char *) de; + struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *)bh->b_data; + unsigned int buflen = bh->b_size; + char *base = bh->b_data; struct dx_hash_info h = *hinfo; + int blocksize = EXT4_BLOCK_SIZE(dir->i_sb); + + if (ext4_has_feature_metadata_csum(dir->i_sb)) + buflen -= sizeof(struct ext4_dir_entry_tail); - while ((char *) de < base + blocksize) { + while ((char *) de < base + buflen) { + if (ext4_check_dir_entry(dir, NULL, de, bh, base, buflen, + ((char *)de) - base)) + return -EFSCORRUPTED; if (de->name_len && de->inode) { - ext4fs_dirhash(de->name, de->name_len, &h); + if (ext4_hash_in_dirent(dir)) + h.hash = EXT4_DIRENT_HASH(de); + else { + int err = ext4fs_dirhash(dir, de->name, + de->name_len, &h); + if (err < 0) + return err; + } map_tail--; map_tail->hash = h.hash; map_tail->offs = ((char *) de - base)>>2; - map_tail->size = le16_to_cpu(de->rec_len); + map_tail->size = ext4_rec_len_from_disk(de->rec_len, + blocksize); count++; cond_resched(); } - /* XXX: do we need to check rec_len == 0 case? -Chris */ de = ext4_next_entry(de, blocksize); } return count; @@ -1244,21 +1350,61 @@ static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block) struct dx_entry *old = frame->at, *new = old + 1; int count = dx_get_count(entries); - assert(count < dx_get_limit(entries)); - assert(old < entries + count); + ASSERT(count < dx_get_limit(entries)); + ASSERT(old < entries + count); memmove(new + 1, new, (char *)(entries + count) - (char *)(new)); dx_set_hash(new, hash); dx_set_block(new, block); dx_set_count(entries, count + 1); } +#if IS_ENABLED(CONFIG_UNICODE) +int ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname, + struct ext4_filename *name) +{ + struct qstr *cf_name = &name->cf_name; + unsigned char *buf; + struct dx_hash_info *hinfo = &name->hinfo; + int len; + + if (!IS_CASEFOLDED(dir) || + (IS_ENCRYPTED(dir) && !fscrypt_has_encryption_key(dir))) { + cf_name->name = NULL; + return 0; + } + + buf = kmalloc(EXT4_NAME_LEN, GFP_NOFS); + if (!buf) + return -ENOMEM; + + len = utf8_casefold(dir->i_sb->s_encoding, iname, buf, EXT4_NAME_LEN); + if (len <= 0) { + kfree(buf); + buf = NULL; + } + cf_name->name = buf; + cf_name->len = (unsigned) len; + + if (!IS_ENCRYPTED(dir)) + return 0; + + hinfo->hash_version = DX_HASH_SIPHASH; + hinfo->seed = NULL; + if (cf_name->name) + return ext4fs_dirhash(dir, cf_name->name, cf_name->len, hinfo); + else + return ext4fs_dirhash(dir, iname->name, iname->len, hinfo); +} +#endif + /* * Test whether a directory entry matches the filename being searched for. * * Return: %true if the directory entry matches, otherwise %false. */ -static inline bool ext4_match(const struct ext4_filename *fname, - const struct ext4_dir_entry_2 *de) +static bool ext4_match(struct inode *parent, + const struct ext4_filename *fname, + struct ext4_dir_entry_2 *de) { struct fscrypt_name f; @@ -1267,14 +1413,45 @@ static inline bool ext4_match(const struct ext4_filename *fname, f.usr_fname = fname->usr_fname; f.disk_name = fname->disk_name; -#ifdef CONFIG_EXT4_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION f.crypto_buf = fname->crypto_buf; #endif + +#if IS_ENABLED(CONFIG_UNICODE) + if (IS_CASEFOLDED(parent) && + (!IS_ENCRYPTED(parent) || fscrypt_has_encryption_key(parent))) { + /* + * Just checking IS_ENCRYPTED(parent) below is not + * sufficient to decide whether one can use the hash for + * skipping the string comparison, because the key might + * have been added right after + * ext4_fname_setup_ci_filename(). In this case, a hash + * mismatch will be a false negative. Therefore, make + * sure cf_name was properly initialized before + * considering the calculated hash. + */ + if (sb_no_casefold_compat_fallback(parent->i_sb) && + IS_ENCRYPTED(parent) && fname->cf_name.name && + (fname->hinfo.hash != EXT4_DIRENT_HASH(de) || + fname->hinfo.minor_hash != EXT4_DIRENT_MINOR_HASH(de))) + return false; + /* + * Treat comparison errors as not a match. The + * only case where it happens is on a disk + * corruption or ENOMEM. + */ + + return generic_ci_match(parent, fname->usr_fname, + &fname->cf_name, de->name, + de->name_len) > 0; + } +#endif + return fscrypt_match_name(&f, de->name, de->name_len); } /* - * Returns 0 if not found, -1 on failure, and 1 on success + * Returns 0 if not found, -EFSCORRUPTED on failure, and 1 on success */ int ext4_search_dir(struct buffer_head *bh, char *search_buf, int buf_size, struct inode *dir, struct ext4_filename *fname, @@ -1286,16 +1463,16 @@ int ext4_search_dir(struct buffer_head *bh, char *search_buf, int buf_size, de = (struct ext4_dir_entry_2 *)search_buf; dlimit = search_buf + buf_size; - while ((char *) de < dlimit) { + while ((char *) de < dlimit - EXT4_BASE_DIR_LEN) { /* this code is executed quadratically often */ /* do minimal checking `by hand' */ - if ((char *) de + de->name_len <= dlimit && - ext4_match(fname, de)) { + if (de->name + de->name_len <= dlimit && + ext4_match(dir, fname, de)) { /* found a match - just to be sure, do * a full check */ - if (ext4_check_dir_entry(dir, NULL, de, bh, bh->b_data, - bh->b_size, offset)) - return -1; + if (ext4_check_dir_entry(dir, NULL, de, bh, search_buf, + buf_size, offset)) + return -EFSCORRUPTED; *res_dir = de; return 1; } @@ -1303,7 +1480,7 @@ int ext4_search_dir(struct buffer_head *bh, char *search_buf, int buf_size, de_len = ext4_rec_len_from_disk(de->rec_len, dir->i_sb->s_blocksize); if (de_len <= 0) - return -1; + return -EFSCORRUPTED; offset += de_len; de = (struct ext4_dir_entry_2 *) ((char *) de + de_len); } @@ -1327,7 +1504,7 @@ static int is_dx_internal_node(struct inode *dir, ext4_lblk_t block, } /* - * ext4_find_entry() + * __ext4_find_entry() * * finds an entry in the specified directory with the wanted name. It * returns the cache buffer in which the entry was found, and the entry @@ -1337,45 +1514,37 @@ static int is_dx_internal_node(struct inode *dir, ext4_lblk_t block, * The returned buffer_head has ->b_count elevated. The caller is expected * to brelse() it when appropriate. */ -static struct buffer_head * ext4_find_entry (struct inode *dir, - const struct qstr *d_name, - struct ext4_dir_entry_2 **res_dir, - int *inlined) +static struct buffer_head *__ext4_find_entry(struct inode *dir, + struct ext4_filename *fname, + struct ext4_dir_entry_2 **res_dir, + int *inlined) { struct super_block *sb; struct buffer_head *bh_use[NAMEI_RA_SIZE]; struct buffer_head *bh, *ret = NULL; ext4_lblk_t start, block; - const u8 *name = d_name->name; + const u8 *name = fname->usr_fname->name; size_t ra_max = 0; /* Number of bh's in the readahead buffer, bh_use[] */ size_t ra_ptr = 0; /* Current index into readahead buffer */ ext4_lblk_t nblocks; int i, namelen, retval; - struct ext4_filename fname; *res_dir = NULL; sb = dir->i_sb; - namelen = d_name->len; + namelen = fname->usr_fname->len; if (namelen > EXT4_NAME_LEN) return NULL; - retval = ext4_fname_setup_filename(dir, d_name, 1, &fname); - if (retval == -ENOENT) - return NULL; - if (retval) - return ERR_PTR(retval); - if (ext4_has_inline_data(dir)) { int has_inline_data = 1; - ret = ext4_find_inline_entry(dir, &fname, res_dir, + ret = ext4_find_inline_entry(dir, fname, res_dir, &has_inline_data); - if (has_inline_data) { - if (inlined) - *inlined = 1; + if (inlined) + *inlined = has_inline_data; + if (has_inline_data || IS_ERR(ret)) goto cleanup_and_exit; - } } if ((namelen <= 2) && (name[0] == '.') && @@ -1389,16 +1558,21 @@ static struct buffer_head * ext4_find_entry (struct inode *dir, goto restart; } if (is_dx(dir)) { - ret = ext4_dx_find_entry(dir, &fname, res_dir); + ret = ext4_dx_find_entry(dir, fname, res_dir); /* * On success, or if the error was file not found, * return. Otherwise, fall back to doing a search the * old fashioned way. */ - if (!IS_ERR(ret) || PTR_ERR(ret) != ERR_BAD_DX_DIR) + if (IS_ERR(ret) && PTR_ERR(ret) == ERR_BAD_DX_DIR) + dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, " + "falling back\n")); + else if (!sb_no_casefold_compat_fallback(dir->i_sb) && + *res_dir == NULL && IS_CASEFOLDED(dir)) + dxtrace(printk(KERN_DEBUG "ext4_find_entry: casefold " + "failed, falling back\n")); + else goto cleanup_and_exit; - dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, " - "falling back\n")); ret = NULL; } nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb); @@ -1415,6 +1589,7 @@ restart: /* * We deal with the read-ahead logic here. */ + cond_resched(); if (ra_ptr >= ra_max) { /* Refill the readahead buffer */ ra_ptr = 0; @@ -1435,8 +1610,9 @@ restart: goto next; wait_on_buffer(bh); if (!buffer_uptodate(bh)) { - EXT4_ERROR_INODE(dir, "reading directory lblock %lu", - (unsigned long) block); + EXT4_ERROR_INODE_ERR(dir, EIO, + "reading directory lblock %lu", + (unsigned long) block); brelse(bh); ret = ERR_PTR(-EIO); goto cleanup_and_exit; @@ -1444,25 +1620,27 @@ restart: if (!buffer_verified(bh) && !is_dx_internal_node(dir, block, (struct ext4_dir_entry *)bh->b_data) && - !ext4_dirent_csum_verify(dir, - (struct ext4_dir_entry *)bh->b_data)) { - EXT4_ERROR_INODE(dir, "checksumming directory " - "block %lu", (unsigned long)block); + !ext4_dirblock_csum_verify(dir, bh)) { + EXT4_ERROR_INODE_ERR(dir, EFSBADCRC, + "checksumming directory " + "block %lu", (unsigned long)block); brelse(bh); ret = ERR_PTR(-EFSBADCRC); goto cleanup_and_exit; } set_buffer_verified(bh); - i = search_dirblock(bh, dir, &fname, - block << EXT4_BLOCK_SIZE_BITS(sb), res_dir); + i = search_dirblock(bh, dir, fname, + EXT4_LBLK_TO_B(dir, block), res_dir); if (i == 1) { EXT4_I(dir)->i_dir_start_lookup = block; ret = bh; goto cleanup_and_exit; } else { brelse(bh); - if (i < 0) + if (i < 0) { + ret = ERR_PTR(i); goto cleanup_and_exit; + } } next: if (++block >= nblocks) @@ -1484,39 +1662,77 @@ cleanup_and_exit: /* Clean up the read-ahead blocks */ for (; ra_ptr < ra_max; ra_ptr++) brelse(bh_use[ra_ptr]); - ext4_fname_free_filename(&fname); return ret; } +static struct buffer_head *ext4_find_entry(struct inode *dir, + const struct qstr *d_name, + struct ext4_dir_entry_2 **res_dir, + int *inlined) +{ + int err; + struct ext4_filename fname; + struct buffer_head *bh; + + err = ext4_fname_setup_filename(dir, d_name, 1, &fname); + if (err == -ENOENT) + return NULL; + if (err) + return ERR_PTR(err); + + bh = __ext4_find_entry(dir, &fname, res_dir, inlined); + + ext4_fname_free_filename(&fname); + return bh; +} + +static struct buffer_head *ext4_lookup_entry(struct inode *dir, + struct dentry *dentry, + struct ext4_dir_entry_2 **res_dir) +{ + int err; + struct ext4_filename fname; + struct buffer_head *bh; + + err = ext4_fname_prepare_lookup(dir, dentry, &fname); + if (err == -ENOENT) + return NULL; + if (err) + return ERR_PTR(err); + + bh = __ext4_find_entry(dir, &fname, res_dir, NULL); + + ext4_fname_free_filename(&fname); + return bh; +} + static struct buffer_head * ext4_dx_find_entry(struct inode *dir, struct ext4_filename *fname, struct ext4_dir_entry_2 **res_dir) { - struct super_block * sb = dir->i_sb; struct dx_frame frames[EXT4_HTREE_LEVEL], *frame; struct buffer_head *bh; ext4_lblk_t block; int retval; -#ifdef CONFIG_EXT4_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION *res_dir = NULL; #endif frame = dx_probe(fname, dir, NULL, frames); if (IS_ERR(frame)) - return (struct buffer_head *) frame; + return ERR_CAST(frame); do { block = dx_get_block(frame->at); - bh = ext4_read_dirblock(dir, block, DIRENT); + bh = ext4_read_dirblock(dir, block, DIRENT_HTREE); if (IS_ERR(bh)) goto errout; retval = search_dirblock(bh, dir, fname, - block << EXT4_BLOCK_SIZE_BITS(sb), - res_dir); + EXT4_LBLK_TO_B(dir, block), res_dir); if (retval == 1) goto success; brelse(bh); - if (retval == -1) { + if (retval < 0) { bh = ERR_PTR(ERR_BAD_DX_DIR); goto errout; } @@ -1544,18 +1760,13 @@ success: static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { struct inode *inode; - struct ext4_dir_entry_2 *de; + struct ext4_dir_entry_2 *de = NULL; struct buffer_head *bh; - int err; - - err = fscrypt_prepare_lookup(dir, dentry, flags); - if (err) - return ERR_PTR(err); if (dentry->d_name.len > EXT4_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); - bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL); + bh = ext4_lookup_entry(dir, dentry, &de); if (IS_ERR(bh)) return ERR_CAST(bh); inode = NULL; @@ -1578,7 +1789,7 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi ino); return ERR_PTR(-EFSCORRUPTED); } - if (!IS_ERR(inode) && ext4_encrypted_inode(dir) && + if (!IS_ERR(inode) && IS_ENCRYPTED(dir) && (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) && !fscrypt_has_permitted_context(dir, inode)) { ext4_warning(inode->i_sb, @@ -1588,6 +1799,16 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi return ERR_PTR(-EPERM); } } + + if (IS_ENABLED(CONFIG_UNICODE) && !inode && IS_CASEFOLDED(dir)) { + /* Eventually we want to call d_add_ci(dentry, NULL) + * for negative dentries in the encoding case as + * well. For now, prevent the negative dentry + * from being cached. + */ + return NULL; + } + return d_splice_alias(inode, dentry); } @@ -1595,11 +1816,10 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi struct dentry *ext4_get_parent(struct dentry *child) { __u32 ino; - static const struct qstr dotdot = QSTR_INIT("..", 2); - struct ext4_dir_entry_2 * de; + struct ext4_dir_entry_2 * de = NULL; struct buffer_head *bh; - bh = ext4_find_entry(d_inode(child), &dotdot, &de, NULL); + bh = ext4_find_entry(d_inode(child), &dotdot_name, &de, NULL); if (IS_ERR(bh)) return ERR_CAST(bh); if (!bh) @@ -1621,7 +1841,8 @@ struct dentry *ext4_get_parent(struct dentry *child) * Returns pointer to last entry moved. */ static struct ext4_dir_entry_2 * -dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count, +dx_move_dirents(struct inode *dir, char *from, char *to, + struct dx_map_entry *map, int count, unsigned blocksize) { unsigned rec_len = 0; @@ -1629,11 +1850,19 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count, while (count--) { struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *) (from + (map->offs<<2)); - rec_len = EXT4_DIR_REC_LEN(de->name_len); + rec_len = ext4_dir_rec_len(de->name_len, dir); + memcpy (to, de, rec_len); ((struct ext4_dir_entry_2 *) to)->rec_len = ext4_rec_len_to_disk(rec_len, blocksize); + + /* wipe dir_entry excluding the rec_len field */ de->inode = 0; + memset(&de->name_len, 0, ext4_rec_len_from_disk(de->rec_len, + blocksize) - + offsetof(struct ext4_dir_entry_2, + name_len)); + map++; to += rec_len; } @@ -1644,7 +1873,8 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count, * Compact each dir entry in the range to the minimal rec_len. * Returns pointer to last entry in range. */ -static struct ext4_dir_entry_2* dx_pack_dirents(char *base, unsigned blocksize) +static struct ext4_dir_entry_2 *dx_pack_dirents(struct inode *dir, char *base, + unsigned int blocksize) { struct ext4_dir_entry_2 *next, *to, *prev, *de = (struct ext4_dir_entry_2 *) base; unsigned rec_len = 0; @@ -1653,7 +1883,7 @@ static struct ext4_dir_entry_2* dx_pack_dirents(char *base, unsigned blocksize) while ((char*)de < base + blocksize) { next = ext4_next_entry(de, blocksize); if (de->inode && de->name_len) { - rec_len = EXT4_DIR_REC_LEN(de->name_len); + rec_len = ext4_dir_rec_len(de->name_len, dir); if (de > to) memmove(to, de, rec_len); to->rec_len = ext4_rec_len_to_disk(rec_len, blocksize); @@ -1675,7 +1905,8 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, struct dx_hash_info *hinfo) { unsigned blocksize = dir->i_sb->s_blocksize; - unsigned count, continued; + unsigned continued; + int count; struct buffer_head *bh2; ext4_lblk_t newblock; u32 hash2; @@ -1683,27 +1914,28 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, char *data1 = (*bh)->b_data, *data2; unsigned split, move, size; struct ext4_dir_entry_2 *de = NULL, *de2; - struct ext4_dir_entry_tail *t; int csum_size = 0; int err = 0, i; - if (ext4_has_metadata_csum(dir->i_sb)) + if (ext4_has_feature_metadata_csum(dir->i_sb)) csum_size = sizeof(struct ext4_dir_entry_tail); bh2 = ext4_append(handle, dir, &newblock); if (IS_ERR(bh2)) { brelse(*bh); *bh = NULL; - return (struct ext4_dir_entry_2 *) bh2; + return ERR_CAST(bh2); } BUFFER_TRACE(*bh, "get_write_access"); - err = ext4_journal_get_write_access(handle, *bh); + err = ext4_journal_get_write_access(handle, dir->i_sb, *bh, + EXT4_JTR_NONE); if (err) goto journal_error; BUFFER_TRACE(frame->bh, "get_write_access"); - err = ext4_journal_get_write_access(handle, frame->bh); + err = ext4_journal_get_write_access(handle, dir->i_sb, frame->bh, + EXT4_JTR_NONE); if (err) goto journal_error; @@ -1711,11 +1943,14 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, /* create map in the end of data2 block */ map = (struct dx_map_entry *) (data2 + blocksize); - count = dx_make_map(dir, (struct ext4_dir_entry_2 *) data1, - blocksize, hinfo, map); + count = dx_make_map(dir, *bh, hinfo, map); + if (count < 0) { + err = count; + goto journal_error; + } map -= count; dx_sort_map(map, count); - /* Split the existing block in the middle, size-wise */ + /* Ensure that neither split block is over half full */ size = 0; move = 0; for (i = count-1; i >= 0; i--) { @@ -1725,8 +1960,27 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, size += map[i].size; move++; } - /* map index at which we will split */ - split = count - move; + /* + * map index at which we will split + * + * If the sum of active entries didn't exceed half the block size, just + * split it in half by count; each resulting block will have at least + * half the space free. + */ + if (i >= 0) + split = count - move; + else + split = count/2; + + if (WARN_ON_ONCE(split == 0)) { + /* Should never happen, but avoid out-of-bounds access below */ + ext4_error_inode_block(dir, (*bh)->b_blocknr, 0, + "bad indexed directory? hash=%08x:%08x count=%d move=%u", + hinfo->hash, hinfo->minor_hash, count, move); + err = -EFSCORRUPTED; + goto out; + } + hash2 = map[split].hash; continued = hash2 == map[split - 1].hash; dxtrace(printk(KERN_INFO "Split block %lu at %x, %i/%i\n", @@ -1734,9 +1988,9 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, hash2, split, count-split)); /* Fancy dance to stay within two buffers */ - de2 = dx_move_dirents(data1, data2, map + split, count - split, + de2 = dx_move_dirents(dir, data1, data2, map + split, count - split, blocksize); - de = dx_pack_dirents(data1, blocksize); + de = dx_pack_dirents(dir, data1, blocksize); de->rec_len = ext4_rec_len_to_disk(data1 + (blocksize - csum_size) - (char *) de, blocksize); @@ -1744,11 +1998,8 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, (char *) de2, blocksize); if (csum_size) { - t = EXT4_DIRENT_TAIL(data2, blocksize); - initialize_dirent_tail(t, blocksize); - - t = EXT4_DIRENT_TAIL(data1, blocksize); - initialize_dirent_tail(t, blocksize); + ext4_initialize_dirent_tail(*bh, blocksize); + ext4_initialize_dirent_tail(bh2, blocksize); } dxtrace(dx_show_leaf(dir, hinfo, (struct ext4_dir_entry_2 *) data1, @@ -1762,7 +2013,7 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, de = de2; } dx_insert_block(frame, hash2 + continued, newblock); - err = ext4_handle_dirty_dirent_node(handle, dir, bh2); + err = ext4_handle_dirty_dirblock(handle, dir, bh2); if (err) goto journal_error; err = ext4_handle_dirty_dx_node(handle, dir, frame->bh); @@ -1773,34 +2024,34 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, return de; journal_error: + ext4_std_error(dir->i_sb, err); +out: brelse(*bh); brelse(bh2); *bh = NULL; - ext4_std_error(dir->i_sb, err); return ERR_PTR(err); } -int ext4_find_dest_de(struct inode *dir, struct inode *inode, - struct buffer_head *bh, +int ext4_find_dest_de(struct inode *dir, struct buffer_head *bh, void *buf, int buf_size, struct ext4_filename *fname, struct ext4_dir_entry_2 **dest_de) { struct ext4_dir_entry_2 *de; - unsigned short reclen = EXT4_DIR_REC_LEN(fname_len(fname)); + unsigned short reclen = ext4_dir_rec_len(fname_len(fname), dir); int nlen, rlen; unsigned int offset = 0; char *top; - de = (struct ext4_dir_entry_2 *)buf; + de = buf; top = buf + buf_size - reclen; while ((char *) de <= top) { if (ext4_check_dir_entry(dir, NULL, de, bh, buf, buf_size, offset)) return -EFSCORRUPTED; - if (ext4_match(fname, de)) + if (ext4_match(dir, fname, de)) return -EEXIST; - nlen = EXT4_DIR_REC_LEN(de->name_len); + nlen = ext4_dir_rec_len(de->name_len, dir); rlen = ext4_rec_len_from_disk(de->rec_len, buf_size); if ((de->inode ? rlen - nlen : rlen) >= reclen) break; @@ -1814,7 +2065,8 @@ int ext4_find_dest_de(struct inode *dir, struct inode *inode, return 0; } -void ext4_insert_dentry(struct inode *inode, +void ext4_insert_dentry(struct inode *dir, + struct inode *inode, struct ext4_dir_entry_2 *de, int buf_size, struct ext4_filename *fname) @@ -1822,7 +2074,7 @@ void ext4_insert_dentry(struct inode *inode, int nlen, rlen; - nlen = EXT4_DIR_REC_LEN(de->name_len); + nlen = ext4_dir_rec_len(de->name_len, dir); rlen = ext4_rec_len_from_disk(de->rec_len, buf_size); if (de->inode) { struct ext4_dir_entry_2 *de1 = @@ -1836,6 +2088,13 @@ void ext4_insert_dentry(struct inode *inode, ext4_set_de_type(inode->i_sb, de, inode->i_mode); de->name_len = fname_len(fname); memcpy(de->name, fname_name(fname), fname_len(fname)); + if (ext4_hash_in_dirent(dir)) { + struct dx_hash_info *hinfo = &fname->hinfo; + + EXT4_DIRENT_HASHES(de)->hash = cpu_to_le32(hinfo->hash); + EXT4_DIRENT_HASHES(de)->minor_hash = + cpu_to_le32(hinfo->minor_hash); + } } /* @@ -1853,26 +2112,27 @@ static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname, { unsigned int blocksize = dir->i_sb->s_blocksize; int csum_size = 0; - int err; + int err, err2; - if (ext4_has_metadata_csum(inode->i_sb)) + if (ext4_has_feature_metadata_csum(inode->i_sb)) csum_size = sizeof(struct ext4_dir_entry_tail); if (!de) { - err = ext4_find_dest_de(dir, inode, bh, bh->b_data, + err = ext4_find_dest_de(dir, bh, bh->b_data, blocksize - csum_size, fname, &de); if (err) return err; } BUFFER_TRACE(bh, "get_write_access"); - err = ext4_journal_get_write_access(handle, bh); + err = ext4_journal_get_write_access(handle, dir->i_sb, bh, + EXT4_JTR_NONE); if (err) { ext4_std_error(dir->i_sb, err); return err; } /* By now the buffer is marked for journaling */ - ext4_insert_dentry(inode, de, blocksize, fname); + ext4_insert_dentry(dir, inode, de, blocksize, fname); /* * XXX shouldn't update any times until successful @@ -1885,15 +2145,61 @@ static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname, * happen is that the times are slightly out of date * and/or different from the directory change time. */ - dir->i_mtime = dir->i_ctime = current_time(dir); + inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); ext4_update_dx_flag(dir); inode_inc_iversion(dir); - ext4_mark_inode_dirty(handle, dir); + err2 = ext4_mark_inode_dirty(handle, dir); BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); - err = ext4_handle_dirty_dirent_node(handle, dir, bh); + err = ext4_handle_dirty_dirblock(handle, dir, bh); if (err) ext4_std_error(dir->i_sb, err); - return 0; + return err ? err : err2; +} + +static bool ext4_check_dx_root(struct inode *dir, struct dx_root *root) +{ + struct fake_dirent *fde; + const char *error_msg; + unsigned int rlen; + unsigned int blocksize = dir->i_sb->s_blocksize; + char *blockend = (char *)root + dir->i_sb->s_blocksize; + + fde = &root->dot; + if (unlikely(fde->name_len != 1)) { + error_msg = "invalid name_len for '.'"; + goto corrupted; + } + if (unlikely(strncmp(root->dot_name, ".", fde->name_len))) { + error_msg = "invalid name for '.'"; + goto corrupted; + } + rlen = ext4_rec_len_from_disk(fde->rec_len, blocksize); + if (unlikely((char *)fde + rlen >= blockend)) { + error_msg = "invalid rec_len for '.'"; + goto corrupted; + } + + fde = &root->dotdot; + if (unlikely(fde->name_len != 2)) { + error_msg = "invalid name_len for '..'"; + goto corrupted; + } + if (unlikely(strncmp(root->dotdot_name, "..", fde->name_len))) { + error_msg = "invalid name for '..'"; + goto corrupted; + } + rlen = ext4_rec_len_from_disk(fde->rec_len, blocksize); + if (unlikely((char *)fde + rlen >= blockend)) { + error_msg = "invalid rec_len for '..'"; + goto corrupted; + } + + return true; + +corrupted: + EXT4_ERROR_INODE(dir, "Corrupt dir, %s, running e2fsck is recommended", + error_msg); + return false; } /* @@ -1909,8 +2215,7 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname, struct dx_frame frames[EXT4_HTREE_LEVEL], *frame; struct dx_entry *entries; struct ext4_dir_entry_2 *de, *de2; - struct ext4_dir_entry_tail *t; - char *data1, *top; + char *data2, *top; unsigned len; int retval; unsigned blocksize; @@ -1918,29 +2223,30 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname, struct fake_dirent *fde; int csum_size = 0; - if (ext4_has_metadata_csum(inode->i_sb)) + if (ext4_has_feature_metadata_csum(inode->i_sb)) csum_size = sizeof(struct ext4_dir_entry_tail); blocksize = dir->i_sb->s_blocksize; dxtrace(printk(KERN_DEBUG "Creating index: inode %lu\n", dir->i_ino)); BUFFER_TRACE(bh, "get_write_access"); - retval = ext4_journal_get_write_access(handle, bh); + retval = ext4_journal_get_write_access(handle, dir->i_sb, bh, + EXT4_JTR_NONE); if (retval) { ext4_std_error(dir->i_sb, retval); brelse(bh); return retval; } + root = (struct dx_root *) bh->b_data; + if (!ext4_check_dx_root(dir, root)) { + brelse(bh); + return -EFSCORRUPTED; + } /* The 0th block becomes the root, move the dirents out */ fde = &root->dotdot; de = (struct ext4_dir_entry_2 *)((char *)fde + ext4_rec_len_from_disk(fde->rec_len, blocksize)); - if ((char *) de >= (((char *) root) + blocksize)) { - EXT4_ERROR_INODE(dir, "invalid rec_len for '..'"); - brelse(bh); - return -EFSCORRUPTED; - } len = ((char *) root) + (blocksize - csum_size) - (char *) de; /* Allocate new block for the 0th block's dirents */ @@ -1950,29 +2256,39 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname, return PTR_ERR(bh2); } ext4_set_inode_flag(dir, EXT4_INODE_INDEX); - data1 = bh2->b_data; + data2 = bh2->b_data; - memcpy (data1, de, len); - de = (struct ext4_dir_entry_2 *) data1; - top = data1 + len; - while ((char *)(de2 = ext4_next_entry(de, blocksize)) < top) + memcpy(data2, de, len); + memset(de, 0, len); /* wipe old data */ + de = (struct ext4_dir_entry_2 *) data2; + top = data2 + len; + while ((char *)(de2 = ext4_next_entry(de, blocksize)) < top) { + if (ext4_check_dir_entry(dir, NULL, de, bh2, data2, len, + (char *)de - data2)) { + brelse(bh2); + brelse(bh); + return -EFSCORRUPTED; + } de = de2; - de->rec_len = ext4_rec_len_to_disk(data1 + (blocksize - csum_size) - - (char *) de, - blocksize); - - if (csum_size) { - t = EXT4_DIRENT_TAIL(data1, blocksize); - initialize_dirent_tail(t, blocksize); } + de->rec_len = ext4_rec_len_to_disk(data2 + (blocksize - csum_size) - + (char *) de, blocksize); + + if (csum_size) + ext4_initialize_dirent_tail(bh2, blocksize); /* Initialize the root; the dot dirents already exist */ de = (struct ext4_dir_entry_2 *) (&root->dotdot); - de->rec_len = ext4_rec_len_to_disk(blocksize - EXT4_DIR_REC_LEN(2), - blocksize); + de->rec_len = ext4_rec_len_to_disk( + blocksize - ext4_dir_rec_len(2, NULL), blocksize); memset (&root->info, 0, sizeof(root->info)); root->info.info_length = sizeof(root->info); - root->info.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version; + if (ext4_hash_in_dirent(dir)) + root->info.hash_version = DX_HASH_SIPHASH; + else + root->info.hash_version = + EXT4_SB(dir->i_sb)->s_def_hash_version; + entries = root->entries; dx_set_block(entries, 1); dx_set_count(entries, 1); @@ -1983,8 +2299,17 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname, if (fname->hinfo.hash_version <= DX_HASH_TEA) fname->hinfo.hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned; fname->hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed; - ext4fs_dirhash(fname_name(fname), fname_len(fname), &fname->hinfo); + /* casefolded encrypted hashes are computed on fname setup */ + if (!ext4_hash_in_dirent(dir)) { + int err = ext4fs_dirhash(dir, fname_name(fname), + fname_len(fname), &fname->hinfo); + if (err < 0) { + brelse(bh2); + brelse(bh); + return err; + } + } memset(frames, 0, sizeof(frames)); frame = frames; frame->entries = entries; @@ -1993,10 +2318,10 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname, retval = ext4_handle_dirty_dx_node(handle, dir, frame->bh); if (retval) - goto out_frames; - retval = ext4_handle_dirty_dirent_node(handle, dir, bh2); + goto out_frames; + retval = ext4_handle_dirty_dirblock(handle, dir, bh2); if (retval) - goto out_frames; + goto out_frames; de = do_split(handle,dir, &bh2, frame, &fname->hinfo); if (IS_ERR(de)) { @@ -2034,7 +2359,6 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, struct inode *dir = d_inode(dentry->d_parent); struct buffer_head *bh = NULL; struct ext4_dir_entry_2 *de; - struct ext4_dir_entry_tail *t; struct super_block *sb; struct ext4_filename fname; int retval; @@ -2043,12 +2367,16 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, ext4_lblk_t block, blocks; int csum_size = 0; - if (ext4_has_metadata_csum(inode->i_sb)) + if (ext4_has_feature_metadata_csum(inode->i_sb)) csum_size = sizeof(struct ext4_dir_entry_tail); sb = dir->i_sb; blocksize = sb->s_blocksize; - if (!dentry->d_name.len) + + if (fscrypt_is_nokey_name(dentry)) + return -ENOKEY; + + if (!generic_ci_validate_strict_name(dir, &dentry->d_name)) return -EINVAL; retval = ext4_fname_setup_filename(dir, &dentry->d_name, 0, &fname); @@ -2069,13 +2397,27 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, retval = ext4_dx_add_entry(handle, &fname, dir, inode); if (!retval || (retval != ERR_BAD_DX_DIR)) goto out; + /* Can we just ignore htree data? */ + if (ext4_has_feature_metadata_csum(sb)) { + EXT4_ERROR_INODE(dir, + "Directory has corrupted htree index."); + retval = -EFSCORRUPTED; + goto out; + } ext4_clear_inode_flag(dir, EXT4_INODE_INDEX); dx_fallback++; - ext4_mark_inode_dirty(handle, dir); + retval = ext4_mark_inode_dirty(handle, dir); + if (unlikely(retval)) + goto out; } blocks = dir->i_size >> sb->s_blocksize_bits; for (block = 0; block < blocks; block++) { bh = ext4_read_dirblock(dir, block, DIRENT); + if (bh == NULL) { + bh = ext4_bread(handle, dir, block, + EXT4_GET_BLOCKS_CREATE); + goto add_to_new_block; + } if (IS_ERR(bh)) { retval = PTR_ERR(bh); bh = NULL; @@ -2096,6 +2438,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, brelse(bh); } bh = ext4_append(handle, dir, &block); +add_to_new_block: if (IS_ERR(bh)) { retval = PTR_ERR(bh); bh = NULL; @@ -2105,10 +2448,8 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, de->inode = 0; de->rec_len = ext4_rec_len_to_disk(blocksize - csum_size, blocksize); - if (csum_size) { - t = EXT4_DIRENT_TAIL(bh->b_data, blocksize); - initialize_dirent_tail(t, blocksize); - } + if (csum_size) + ext4_initialize_dirent_tail(bh, blocksize); retval = add_dirent_to_buf(handle, &fname, dir, inode, de, bh); out: @@ -2140,7 +2481,7 @@ again: return PTR_ERR(frame); entries = frame->entries; at = frame->at; - bh = ext4_read_dirblock(dir, dx_get_block(frame->at), DIRENT); + bh = ext4_read_dirblock(dir, dx_get_block(frame->at), DIRENT_HTREE); if (IS_ERR(bh)) { err = PTR_ERR(bh); bh = NULL; @@ -2148,7 +2489,7 @@ again: } BUFFER_TRACE(bh, "get_write_access"); - err = ext4_journal_get_write_access(handle, bh); + err = ext4_journal_get_write_access(handle, sb, bh, EXT4_JTR_NONE); if (err) goto journal_error; @@ -2205,9 +2546,12 @@ again: node2->fake.rec_len = ext4_rec_len_to_disk(sb->s_blocksize, sb->s_blocksize); BUFFER_TRACE(frame->bh, "get_write_access"); - err = ext4_journal_get_write_access(handle, frame->bh); - if (err) + err = ext4_journal_get_write_access(handle, sb, frame->bh, + EXT4_JTR_NONE); + if (err) { + brelse(bh2); goto journal_error; + } if (!add_level) { unsigned icount1 = icount/2, icount2 = icount - icount1; unsigned hash2 = dx_get_hash(entries + icount1); @@ -2215,10 +2559,13 @@ again: icount1, icount2)); BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */ - err = ext4_journal_get_write_access(handle, - (frame - 1)->bh); - if (err) + err = ext4_journal_get_write_access(handle, sb, + (frame - 1)->bh, + EXT4_JTR_NONE); + if (err) { + brelse(bh2); goto journal_error; + } memcpy((char *) entries2, (char *) (entries + icount1), icount2 * sizeof(struct dx_entry)); @@ -2228,7 +2575,7 @@ again: /* Which index block gets the new entry? */ if (at - entries >= icount1) { - frame->at = at = at - entries - icount1 + entries2; + frame->at = at - entries - icount1 + entries2; frame->entries = entries = entries2; swap(frame->bh, bh2); } @@ -2237,18 +2584,19 @@ again: dxtrace(dx_show_index("node", ((struct dx_node *) bh2->b_data)->entries)); err = ext4_handle_dirty_dx_node(handle, dir, bh2); - if (err) + if (err) { + brelse(bh2); goto journal_error; + } brelse (bh2); err = ext4_handle_dirty_dx_node(handle, dir, (frame - 1)->bh); if (err) goto journal_error; - if (restart) { - err = ext4_handle_dirty_dx_node(handle, dir, - frame->bh); + err = ext4_handle_dirty_dx_node(handle, dir, + frame->bh); + if (restart || err) goto journal_error; - } } else { struct dx_root *dxroot; memcpy((char *) entries2, (char *) entries, @@ -2264,8 +2612,10 @@ again: "Creating %d level index...\n", dxroot->info.indirect_levels)); err = ext4_handle_dirty_dx_node(handle, dir, frame->bh); - if (err) + if (err) { + brelse(bh2); goto journal_error; + } err = ext4_handle_dirty_dx_node(handle, dir, bh2); brelse(bh2); restart = 1; @@ -2297,8 +2647,7 @@ cleanup: * ext4_generic_delete_entry deletes a directory entry by merging it * with the previous entry */ -int ext4_generic_delete_entry(handle_t *handle, - struct inode *dir, +int ext4_generic_delete_entry(struct inode *dir, struct ext4_dir_entry_2 *de_del, struct buffer_head *bh, void *entry_buf, @@ -2311,21 +2660,33 @@ int ext4_generic_delete_entry(handle_t *handle, i = 0; pde = NULL; - de = (struct ext4_dir_entry_2 *)entry_buf; + de = entry_buf; while (i < buf_size - csum_size) { if (ext4_check_dir_entry(dir, NULL, de, bh, - bh->b_data, bh->b_size, i)) + entry_buf, buf_size, i)) return -EFSCORRUPTED; if (de == de_del) { - if (pde) + if (pde) { pde->rec_len = ext4_rec_len_to_disk( ext4_rec_len_from_disk(pde->rec_len, blocksize) + ext4_rec_len_from_disk(de->rec_len, blocksize), blocksize); - else + + /* wipe entire dir_entry */ + memset(de, 0, ext4_rec_len_from_disk(de->rec_len, + blocksize)); + } else { + /* wipe dir_entry excluding the rec_len field */ de->inode = 0; + memset(&de->name_len, 0, + ext4_rec_len_from_disk(de->rec_len, + blocksize) - + offsetof(struct ext4_dir_entry_2, + name_len)); + } + inode_inc_iversion(dir); return 0; } @@ -2351,22 +2712,22 @@ static int ext4_delete_entry(handle_t *handle, return err; } - if (ext4_has_metadata_csum(dir->i_sb)) + if (ext4_has_feature_metadata_csum(dir->i_sb)) csum_size = sizeof(struct ext4_dir_entry_tail); BUFFER_TRACE(bh, "get_write_access"); - err = ext4_journal_get_write_access(handle, bh); + err = ext4_journal_get_write_access(handle, dir->i_sb, bh, + EXT4_JTR_NONE); if (unlikely(err)) goto out; - err = ext4_generic_delete_entry(handle, dir, de_del, - bh, bh->b_data, + err = ext4_generic_delete_entry(dir, de_del, bh, bh->b_data, dir->i_sb->s_blocksize, csum_size); if (err) goto out; BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); - err = ext4_handle_dirty_dirent_node(handle, dir, bh); + err = ext4_handle_dirty_dirblock(handle, dir, bh); if (unlikely(err)) goto out; @@ -2388,7 +2749,7 @@ out: * for checking S_ISDIR(inode) (since the INODE_INDEX feature will not be set * on regular files) and to avoid creating huge/slow non-HTREE directories. */ -static void ext4_inc_count(handle_t *handle, struct inode *inode) +static void ext4_inc_count(struct inode *inode) { inc_nlink(inode); if (is_dx(inode) && @@ -2400,25 +2761,37 @@ static void ext4_inc_count(handle_t *handle, struct inode *inode) * If a directory had nlink == 1, then we should let it be 1. This indicates * directory has >EXT4_LINK_MAX subdirs. */ -static void ext4_dec_count(handle_t *handle, struct inode *inode) +static void ext4_dec_count(struct inode *inode) { if (!S_ISDIR(inode->i_mode) || inode->i_nlink > 2) drop_nlink(inode); } +/* + * Add non-directory inode to a directory. On success, the inode reference is + * consumed by dentry is instantiation. This is also indicated by clearing of + * *inodep pointer. On failure, the caller is responsible for dropping the + * inode reference in the safe context. + */ static int ext4_add_nondir(handle_t *handle, - struct dentry *dentry, struct inode *inode) + struct dentry *dentry, struct inode **inodep) { + struct inode *dir = d_inode(dentry->d_parent); + struct inode *inode = *inodep; int err = ext4_add_entry(handle, dentry, inode); if (!err) { - ext4_mark_inode_dirty(handle, inode); + err = ext4_mark_inode_dirty(handle, inode); + if (IS_DIRSYNC(dir)) + ext4_handle_sync(handle); d_instantiate_new(dentry, inode); - return 0; + *inodep = NULL; + return err; } drop_nlink(inode); + ext4_mark_inode_dirty(handle, inode); + ext4_orphan_add(handle, inode); unlock_new_inode(inode); - iput(inode); return err; } @@ -2430,8 +2803,8 @@ static int ext4_add_nondir(handle_t *handle, * If the create succeeds, we fill in the inode information * with d_instantiate(). */ -static int ext4_create(struct inode *dir, struct dentry *dentry, umode_t mode, - bool excl) +static int ext4_create(struct mnt_idmap *idmap, struct inode *dir, + struct dentry *dentry, umode_t mode, bool excl) { handle_t *handle; struct inode *inode; @@ -2444,27 +2817,29 @@ static int ext4_create(struct inode *dir, struct dentry *dentry, umode_t mode, credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3); retry: - inode = ext4_new_inode_start_handle(dir, mode, &dentry->d_name, 0, - NULL, EXT4_HT_DIR, credits); + inode = ext4_new_inode_start_handle(idmap, dir, mode, &dentry->d_name, + 0, NULL, EXT4_HT_DIR, credits); handle = ext4_journal_current_handle(); err = PTR_ERR(inode); if (!IS_ERR(inode)) { inode->i_op = &ext4_file_inode_operations; inode->i_fop = &ext4_file_operations; ext4_set_aops(inode); - err = ext4_add_nondir(handle, dentry, inode); - if (!err && IS_DIRSYNC(dir)) - ext4_handle_sync(handle); + err = ext4_add_nondir(handle, dentry, &inode); + if (!err) + ext4_fc_track_create(handle, dentry); } if (handle) ext4_journal_stop(handle); + if (!IS_ERR_OR_NULL(inode)) + iput(inode); if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) goto retry; return err; } -static int ext4_mknod(struct inode *dir, struct dentry *dentry, - umode_t mode, dev_t rdev) +static int ext4_mknod(struct mnt_idmap *idmap, struct inode *dir, + struct dentry *dentry, umode_t mode, dev_t rdev) { handle_t *handle; struct inode *inode; @@ -2477,25 +2852,28 @@ static int ext4_mknod(struct inode *dir, struct dentry *dentry, credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3); retry: - inode = ext4_new_inode_start_handle(dir, mode, &dentry->d_name, 0, - NULL, EXT4_HT_DIR, credits); + inode = ext4_new_inode_start_handle(idmap, dir, mode, &dentry->d_name, + 0, NULL, EXT4_HT_DIR, credits); handle = ext4_journal_current_handle(); err = PTR_ERR(inode); if (!IS_ERR(inode)) { init_special_inode(inode, inode->i_mode, rdev); inode->i_op = &ext4_special_inode_operations; - err = ext4_add_nondir(handle, dentry, inode); - if (!err && IS_DIRSYNC(dir)) - ext4_handle_sync(handle); + err = ext4_add_nondir(handle, dentry, &inode); + if (!err) + ext4_fc_track_create(handle, dentry); } if (handle) ext4_journal_stop(handle); + if (!IS_ERR_OR_NULL(inode)) + iput(inode); if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) goto retry; return err; } -static int ext4_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) +static int ext4_tmpfile(struct mnt_idmap *idmap, struct inode *dir, + struct file *file, umode_t mode) { handle_t *handle; struct inode *inode; @@ -2506,10 +2884,10 @@ static int ext4_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) return err; retry: - inode = ext4_new_inode_start_handle(dir, mode, + inode = ext4_new_inode_start_handle(idmap, dir, mode, NULL, 0, NULL, EXT4_HT_DIR, - EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb) + + EXT4_MAXQUOTAS_TRANS_BLOCKS(dir->i_sb) + 4 + EXT4_XATTR_TRANS_BLOCKS); handle = ext4_journal_current_handle(); err = PTR_ERR(inode); @@ -2517,7 +2895,7 @@ retry: inode->i_op = &ext4_file_inode_operations; inode->i_fop = &ext4_file_operations; ext4_set_aops(inode); - d_tmpfile(dentry, inode); + d_tmpfile(file, inode); err = ext4_orphan_add(handle, inode); if (err) goto err_unlock_inode; @@ -2528,55 +2906,66 @@ retry: ext4_journal_stop(handle); if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) goto retry; - return err; + return finish_open_simple(file, err); err_unlock_inode: ext4_journal_stop(handle); unlock_new_inode(inode); return err; } -struct ext4_dir_entry_2 *ext4_init_dot_dotdot(struct inode *inode, - struct ext4_dir_entry_2 *de, - int blocksize, int csum_size, - unsigned int parent_ino, int dotdot_real_len) +int ext4_init_dirblock(handle_t *handle, struct inode *inode, + struct buffer_head *bh, unsigned int parent_ino, + void *inline_buf, int inline_size) { + struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *) bh->b_data; + size_t blocksize = bh->b_size; + int csum_size = 0, header_size; + + if (ext4_has_feature_metadata_csum(inode->i_sb)) + csum_size = sizeof(struct ext4_dir_entry_tail); + de->inode = cpu_to_le32(inode->i_ino); de->name_len = 1; - de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len), + de->rec_len = ext4_rec_len_to_disk(ext4_dir_rec_len(de->name_len, NULL), blocksize); - strcpy(de->name, "."); + memcpy(de->name, ".", 2); ext4_set_de_type(inode->i_sb, de, S_IFDIR); de = ext4_next_entry(de, blocksize); de->inode = cpu_to_le32(parent_ino); de->name_len = 2; - if (!dotdot_real_len) + memcpy(de->name, "..", 3); + ext4_set_de_type(inode->i_sb, de, S_IFDIR); + if (inline_buf) { + de->rec_len = ext4_rec_len_to_disk( + ext4_dir_rec_len(de->name_len, NULL), + blocksize); + de = ext4_next_entry(de, blocksize); + header_size = (char *)de - bh->b_data; + memcpy((void *)de, inline_buf, inline_size); + ext4_update_final_de(bh->b_data, inline_size + header_size, + blocksize - csum_size); + } else { de->rec_len = ext4_rec_len_to_disk(blocksize - - (csum_size + EXT4_DIR_REC_LEN(1)), + (csum_size + ext4_dir_rec_len(1, NULL)), blocksize); - else - de->rec_len = ext4_rec_len_to_disk( - EXT4_DIR_REC_LEN(de->name_len), blocksize); - strcpy(de->name, ".."); - ext4_set_de_type(inode->i_sb, de, S_IFDIR); + } - return ext4_next_entry(de, blocksize); + if (csum_size) + ext4_initialize_dirent_tail(bh, blocksize); + BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata"); + set_buffer_uptodate(bh); + set_buffer_verified(bh); + return ext4_handle_dirty_dirblock(handle, inode, bh); } -static int ext4_init_new_dir(handle_t *handle, struct inode *dir, +int ext4_init_new_dir(handle_t *handle, struct inode *dir, struct inode *inode) { struct buffer_head *dir_block = NULL; - struct ext4_dir_entry_2 *de; - struct ext4_dir_entry_tail *t; ext4_lblk_t block = 0; - unsigned int blocksize = dir->i_sb->s_blocksize; - int csum_size = 0; int err; - if (ext4_has_metadata_csum(dir->i_sb)) - csum_size = sizeof(struct ext4_dir_entry_tail); - if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) { err = ext4_try_create_inline_dir(handle, dir, inode); if (err < 0 && err != -ENOSPC) @@ -2585,45 +2974,35 @@ static int ext4_init_new_dir(handle_t *handle, struct inode *dir, goto out; } + set_nlink(inode, 2); inode->i_size = 0; dir_block = ext4_append(handle, inode, &block); if (IS_ERR(dir_block)) return PTR_ERR(dir_block); - de = (struct ext4_dir_entry_2 *)dir_block->b_data; - ext4_init_dot_dotdot(inode, de, blocksize, csum_size, dir->i_ino, 0); - set_nlink(inode, 2); - if (csum_size) { - t = EXT4_DIRENT_TAIL(dir_block->b_data, blocksize); - initialize_dirent_tail(t, blocksize); - } - - BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata"); - err = ext4_handle_dirty_dirent_node(handle, inode, dir_block); - if (err) - goto out; - set_buffer_verified(dir_block); + err = ext4_init_dirblock(handle, inode, dir_block, dir->i_ino, NULL, 0); out: brelse(dir_block); return err; } -static int ext4_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) +static struct dentry *ext4_mkdir(struct mnt_idmap *idmap, struct inode *dir, + struct dentry *dentry, umode_t mode) { handle_t *handle; struct inode *inode; - int err, credits, retries = 0; + int err, err2 = 0, credits, retries = 0; if (EXT4_DIR_LINK_MAX(dir)) - return -EMLINK; + return ERR_PTR(-EMLINK); err = dquot_initialize(dir); if (err) - return err; + return ERR_PTR(err); credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3); retry: - inode = ext4_new_inode_start_handle(dir, S_IFDIR | mode, + inode = ext4_new_inode_start_handle(idmap, dir, S_IFDIR | mode, &dentry->d_name, 0, NULL, EXT4_HT_DIR, credits); handle = ext4_journal_current_handle(); @@ -2642,26 +3021,33 @@ retry: if (err) { out_clear_inode: clear_nlink(inode); + ext4_orphan_add(handle, inode); unlock_new_inode(inode); - ext4_mark_inode_dirty(handle, inode); + err2 = ext4_mark_inode_dirty(handle, inode); + if (unlikely(err2)) + err = err2; + ext4_journal_stop(handle); iput(inode); - goto out_stop; + goto out_retry; } - ext4_inc_count(handle, dir); + ext4_inc_count(dir); + ext4_update_dx_flag(dir); err = ext4_mark_inode_dirty(handle, dir); if (err) goto out_clear_inode; d_instantiate_new(dentry, inode); + ext4_fc_track_create(handle, dentry); if (IS_DIRSYNC(dir)) ext4_handle_sync(handle); out_stop: if (handle) ext4_journal_stop(handle); +out_retry: if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) goto retry; - return err; + return ERR_PTR(err); } /* @@ -2671,7 +3057,7 @@ bool ext4_empty_dir(struct inode *inode) { unsigned int offset; struct buffer_head *bh; - struct ext4_dir_entry_2 *de, *de1; + struct ext4_dir_entry_2 *de; struct super_block *sb; if (ext4_has_inline_data(inode)) { @@ -2684,238 +3070,73 @@ bool ext4_empty_dir(struct inode *inode) } sb = inode->i_sb; - if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2)) { + if (inode->i_size < ext4_dir_rec_len(1, NULL) + + ext4_dir_rec_len(2, NULL)) { EXT4_ERROR_INODE(inode, "invalid size"); - return true; + return false; } bh = ext4_read_dirblock(inode, 0, EITHER); if (IS_ERR(bh)) - return true; + return false; de = (struct ext4_dir_entry_2 *) bh->b_data; - de1 = ext4_next_entry(de, sb->s_blocksize); - if (le32_to_cpu(de->inode) != inode->i_ino || - le32_to_cpu(de1->inode) == 0 || - strcmp(".", de->name) || strcmp("..", de1->name)) { - ext4_warning_inode(inode, "directory missing '.' and/or '..'"); + if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data, bh->b_size, + 0) || + le32_to_cpu(de->inode) != inode->i_ino || de->name_len != 1 || + de->name[0] != '.') { + ext4_warning_inode(inode, "directory missing '.'"); + brelse(bh); + return false; + } + offset = ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize); + de = ext4_next_entry(de, sb->s_blocksize); + if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data, bh->b_size, + offset) || + le32_to_cpu(de->inode) == 0 || de->name_len != 2 || + de->name[0] != '.' || de->name[1] != '.') { + ext4_warning_inode(inode, "directory missing '..'"); brelse(bh); - return true; + return false; } - offset = ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize) + - ext4_rec_len_from_disk(de1->rec_len, sb->s_blocksize); - de = ext4_next_entry(de1, sb->s_blocksize); + offset += ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize); while (offset < inode->i_size) { - if ((void *) de >= (void *) (bh->b_data+sb->s_blocksize)) { + if (!(offset & (sb->s_blocksize - 1))) { unsigned int lblock; brelse(bh); lblock = offset >> EXT4_BLOCK_SIZE_BITS(sb); bh = ext4_read_dirblock(inode, lblock, EITHER); + if (bh == NULL) { + offset += sb->s_blocksize; + continue; + } if (IS_ERR(bh)) - return true; - de = (struct ext4_dir_entry_2 *) bh->b_data; + return false; } + de = (struct ext4_dir_entry_2 *) (bh->b_data + + (offset & (sb->s_blocksize - 1))); if (ext4_check_dir_entry(inode, NULL, de, bh, - bh->b_data, bh->b_size, offset)) { - de = (struct ext4_dir_entry_2 *)(bh->b_data + - sb->s_blocksize); - offset = (offset | (sb->s_blocksize - 1)) + 1; - continue; - } - if (le32_to_cpu(de->inode)) { + bh->b_data, bh->b_size, offset) || + le32_to_cpu(de->inode)) { brelse(bh); return false; } offset += ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize); - de = ext4_next_entry(de, sb->s_blocksize); } brelse(bh); return true; } -/* - * ext4_orphan_add() links an unlinked or truncated inode into a list of - * such inodes, starting at the superblock, in case we crash before the - * file is closed/deleted, or in case the inode truncate spans multiple - * transactions and the last transaction is not recovered after a crash. - * - * At filesystem recovery time, we walk this list deleting unlinked - * inodes and truncating linked inodes in ext4_orphan_cleanup(). - * - * Orphan list manipulation functions must be called under i_mutex unless - * we are just creating the inode or deleting it. - */ -int ext4_orphan_add(handle_t *handle, struct inode *inode) -{ - struct super_block *sb = inode->i_sb; - struct ext4_sb_info *sbi = EXT4_SB(sb); - struct ext4_iloc iloc; - int err = 0, rc; - bool dirty = false; - - if (!sbi->s_journal || is_bad_inode(inode)) - return 0; - - WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) && - !inode_is_locked(inode)); - /* - * Exit early if inode already is on orphan list. This is a big speedup - * since we don't have to contend on the global s_orphan_lock. - */ - if (!list_empty(&EXT4_I(inode)->i_orphan)) - return 0; - - /* - * Orphan handling is only valid for files with data blocks - * being truncated, or files being unlinked. Note that we either - * hold i_mutex, or the inode can not be referenced from outside, - * so i_nlink should not be bumped due to race - */ - J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || - S_ISLNK(inode->i_mode)) || inode->i_nlink == 0); - - BUFFER_TRACE(sbi->s_sbh, "get_write_access"); - err = ext4_journal_get_write_access(handle, sbi->s_sbh); - if (err) - goto out; - - err = ext4_reserve_inode_write(handle, inode, &iloc); - if (err) - goto out; - - mutex_lock(&sbi->s_orphan_lock); - /* - * Due to previous errors inode may be already a part of on-disk - * orphan list. If so skip on-disk list modification. - */ - if (!NEXT_ORPHAN(inode) || NEXT_ORPHAN(inode) > - (le32_to_cpu(sbi->s_es->s_inodes_count))) { - /* Insert this inode at the head of the on-disk orphan list */ - NEXT_ORPHAN(inode) = le32_to_cpu(sbi->s_es->s_last_orphan); - sbi->s_es->s_last_orphan = cpu_to_le32(inode->i_ino); - dirty = true; - } - list_add(&EXT4_I(inode)->i_orphan, &sbi->s_orphan); - mutex_unlock(&sbi->s_orphan_lock); - - if (dirty) { - err = ext4_handle_dirty_super(handle, sb); - rc = ext4_mark_iloc_dirty(handle, inode, &iloc); - if (!err) - err = rc; - if (err) { - /* - * We have to remove inode from in-memory list if - * addition to on disk orphan list failed. Stray orphan - * list entries can cause panics at unmount time. - */ - mutex_lock(&sbi->s_orphan_lock); - list_del_init(&EXT4_I(inode)->i_orphan); - mutex_unlock(&sbi->s_orphan_lock); - } - } else - brelse(iloc.bh); - - jbd_debug(4, "superblock will point to %lu\n", inode->i_ino); - jbd_debug(4, "orphan inode %lu will point to %d\n", - inode->i_ino, NEXT_ORPHAN(inode)); -out: - ext4_std_error(sb, err); - return err; -} - -/* - * ext4_orphan_del() removes an unlinked or truncated inode from the list - * of such inodes stored on disk, because it is finally being cleaned up. - */ -int ext4_orphan_del(handle_t *handle, struct inode *inode) -{ - struct list_head *prev; - struct ext4_inode_info *ei = EXT4_I(inode); - struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); - __u32 ino_next; - struct ext4_iloc iloc; - int err = 0; - - if (!sbi->s_journal && !(sbi->s_mount_state & EXT4_ORPHAN_FS)) - return 0; - - WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) && - !inode_is_locked(inode)); - /* Do this quick check before taking global s_orphan_lock. */ - if (list_empty(&ei->i_orphan)) - return 0; - - if (handle) { - /* Grab inode buffer early before taking global s_orphan_lock */ - err = ext4_reserve_inode_write(handle, inode, &iloc); - } - - mutex_lock(&sbi->s_orphan_lock); - jbd_debug(4, "remove inode %lu from orphan list\n", inode->i_ino); - - prev = ei->i_orphan.prev; - list_del_init(&ei->i_orphan); - - /* If we're on an error path, we may not have a valid - * transaction handle with which to update the orphan list on - * disk, but we still need to remove the inode from the linked - * list in memory. */ - if (!handle || err) { - mutex_unlock(&sbi->s_orphan_lock); - goto out_err; - } - - ino_next = NEXT_ORPHAN(inode); - if (prev == &sbi->s_orphan) { - jbd_debug(4, "superblock will point to %u\n", ino_next); - BUFFER_TRACE(sbi->s_sbh, "get_write_access"); - err = ext4_journal_get_write_access(handle, sbi->s_sbh); - if (err) { - mutex_unlock(&sbi->s_orphan_lock); - goto out_brelse; - } - sbi->s_es->s_last_orphan = cpu_to_le32(ino_next); - mutex_unlock(&sbi->s_orphan_lock); - err = ext4_handle_dirty_super(handle, inode->i_sb); - } else { - struct ext4_iloc iloc2; - struct inode *i_prev = - &list_entry(prev, struct ext4_inode_info, i_orphan)->vfs_inode; - - jbd_debug(4, "orphan inode %lu will point to %u\n", - i_prev->i_ino, ino_next); - err = ext4_reserve_inode_write(handle, i_prev, &iloc2); - if (err) { - mutex_unlock(&sbi->s_orphan_lock); - goto out_brelse; - } - NEXT_ORPHAN(i_prev) = ino_next; - err = ext4_mark_iloc_dirty(handle, i_prev, &iloc2); - mutex_unlock(&sbi->s_orphan_lock); - } - if (err) - goto out_brelse; - NEXT_ORPHAN(inode) = 0; - err = ext4_mark_iloc_dirty(handle, inode, &iloc); -out_err: - ext4_std_error(inode->i_sb, err); - return err; - -out_brelse: - brelse(iloc.bh); - goto out_err; -} - static int ext4_rmdir(struct inode *dir, struct dentry *dentry) { int retval; struct inode *inode; struct buffer_head *bh; - struct ext4_dir_entry_2 *de; + struct ext4_dir_entry_2 *de = NULL; handle_t *handle = NULL; - if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb)))) - return -EIO; + retval = ext4_emergency_state(dir->i_sb); + if (unlikely(retval)) + return retval; /* Initialize quotas before so that eventual writes go in * separate transaction */ @@ -2969,11 +3190,24 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry) * recovery. */ inode->i_size = 0; ext4_orphan_add(handle, inode); - inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode); - ext4_mark_inode_dirty(handle, inode); - ext4_dec_count(handle, dir); + inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); + inode_set_ctime_current(inode); + retval = ext4_mark_inode_dirty(handle, inode); + if (retval) + goto end_rmdir; + ext4_dec_count(dir); ext4_update_dx_flag(dir); - ext4_mark_inode_dirty(handle, dir); + ext4_fc_track_unlink(handle, dentry); + retval = ext4_mark_inode_dirty(handle, dir); + + /* VFS negative dentries are incompatible with Encoding and + * Case-insensitiveness. Eventually we'll want avoid + * invalidating the dentries here, alongside with returning the + * negative dentries at ext4_lookup(), when it is better + * supported by the VFS for the CI case. + */ + if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir)) + d_invalidate(dentry); end_rmdir: brelse(bh); @@ -2982,77 +3216,142 @@ end_rmdir: return retval; } -static int ext4_unlink(struct inode *dir, struct dentry *dentry) +int __ext4_unlink(struct inode *dir, const struct qstr *d_name, + struct inode *inode, + struct dentry *dentry /* NULL during fast_commit recovery */) { - int retval; - struct inode *inode; + int retval = -ENOENT; struct buffer_head *bh; - struct ext4_dir_entry_2 *de; - handle_t *handle = NULL; - - if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb)))) - return -EIO; - - trace_ext4_unlink_enter(dir, dentry); - /* Initialize quotas before so that eventual writes go - * in separate transaction */ - retval = dquot_initialize(dir); - if (retval) - return retval; - retval = dquot_initialize(d_inode(dentry)); - if (retval) - return retval; + struct ext4_dir_entry_2 *de = NULL; + handle_t *handle; + int skip_remove_dentry = 0; - retval = -ENOENT; - bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL); + /* + * Keep this outside the transaction; it may have to set up the + * directory's encryption key, which isn't GFP_NOFS-safe. + */ + bh = ext4_find_entry(dir, d_name, &de, NULL); if (IS_ERR(bh)) return PTR_ERR(bh); - if (!bh) - goto end_unlink; - inode = d_inode(dentry); + if (!bh) + return -ENOENT; - retval = -EFSCORRUPTED; - if (le32_to_cpu(de->inode) != inode->i_ino) - goto end_unlink; + if (le32_to_cpu(de->inode) != inode->i_ino) { + /* + * It's okay if we find dont find dentry which matches + * the inode. That's because it might have gotten + * renamed to a different inode number + */ + if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) + skip_remove_dentry = 1; + else + goto out_bh; + } handle = ext4_journal_start(dir, EXT4_HT_DIR, EXT4_DATA_TRANS_BLOCKS(dir->i_sb)); if (IS_ERR(handle)) { retval = PTR_ERR(handle); - handle = NULL; - goto end_unlink; + goto out_bh; } if (IS_DIRSYNC(dir)) ext4_handle_sync(handle); - if (inode->i_nlink == 0) { - ext4_warning_inode(inode, "Deleting file '%.*s' with no links", - dentry->d_name.len, dentry->d_name.name); - set_nlink(inode, 1); + if (!skip_remove_dentry) { + retval = ext4_delete_entry(handle, dir, de, bh); + if (retval) + goto out_handle; + inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); + ext4_update_dx_flag(dir); + retval = ext4_mark_inode_dirty(handle, dir); + if (retval) + goto out_handle; + } else { + retval = 0; } - retval = ext4_delete_entry(handle, dir, de, bh); - if (retval) - goto end_unlink; - dir->i_ctime = dir->i_mtime = current_time(dir); - ext4_update_dx_flag(dir); - ext4_mark_inode_dirty(handle, dir); - drop_nlink(inode); + if (inode->i_nlink == 0) + ext4_warning_inode(inode, "Deleting file '%.*s' with no links", + d_name->len, d_name->name); + else + drop_nlink(inode); if (!inode->i_nlink) ext4_orphan_add(handle, inode); - inode->i_ctime = current_time(inode); - ext4_mark_inode_dirty(handle, inode); - -end_unlink: + inode_set_ctime_current(inode); + retval = ext4_mark_inode_dirty(handle, inode); + if (dentry && !retval) + ext4_fc_track_unlink(handle, dentry); +out_handle: + ext4_journal_stop(handle); +out_bh: brelse(bh); - if (handle) - ext4_journal_stop(handle); + return retval; +} + +static int ext4_unlink(struct inode *dir, struct dentry *dentry) +{ + int retval; + + retval = ext4_emergency_state(dir->i_sb); + if (unlikely(retval)) + return retval; + + trace_ext4_unlink_enter(dir, dentry); + /* + * Initialize quotas before so that eventual writes go + * in separate transaction + */ + retval = dquot_initialize(dir); + if (retval) + goto out_trace; + retval = dquot_initialize(d_inode(dentry)); + if (retval) + goto out_trace; + + retval = __ext4_unlink(dir, &dentry->d_name, d_inode(dentry), dentry); + + /* VFS negative dentries are incompatible with Encoding and + * Case-insensitiveness. Eventually we'll want avoid + * invalidating the dentries here, alongside with returning the + * negative dentries at ext4_lookup(), when it is better + * supported by the VFS for the CI case. + */ + if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir)) + d_invalidate(dentry); + +out_trace: trace_ext4_unlink_exit(dentry, retval); return retval; } -static int ext4_symlink(struct inode *dir, +static int ext4_init_symlink_block(handle_t *handle, struct inode *inode, + struct fscrypt_str *disk_link) +{ + struct buffer_head *bh; + char *kaddr; + int err = 0; + + bh = ext4_bread(handle, inode, 0, EXT4_GET_BLOCKS_CREATE); + if (IS_ERR(bh)) + return PTR_ERR(bh); + + BUFFER_TRACE(bh, "get_write_access"); + err = ext4_journal_get_write_access(handle, inode->i_sb, bh, EXT4_JTR_NONE); + if (err) + goto out; + + kaddr = (char *)bh->b_data; + memcpy(kaddr, disk_link->name, disk_link->len); + inode->i_size = disk_link->len - 1; + EXT4_I(inode)->i_disksize = inode->i_size; + err = ext4_handle_dirty_metadata(handle, inode, bh); +out: + brelse(bh); + return err; +} + +static int ext4_symlink(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const char *symname) { handle_t *handle; @@ -3060,9 +3359,11 @@ static int ext4_symlink(struct inode *dir, int err, len = strlen(symname); int credits; struct fscrypt_str disk_link; + int retries = 0; - if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb)))) - return -EIO; + err = ext4_emergency_state(dir->i_sb); + if (unlikely(err)) + return err; err = fscrypt_prepare_symlink(dir, symname, len, dir->i_sb->s_blocksize, &disk_link); @@ -3073,34 +3374,24 @@ static int ext4_symlink(struct inode *dir, if (err) return err; - if ((disk_link.len > EXT4_N_BLOCKS * 4)) { - /* - * For non-fast symlinks, we just allocate inode and put it on - * orphan list in the first transaction => we need bitmap, - * group descriptor, sb, inode block, quota blocks, and - * possibly selinux xattr blocks. - */ - credits = 4 + EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb) + - EXT4_XATTR_TRANS_BLOCKS; - } else { - /* - * Fast symlink. We have to add entry to directory - * (EXT4_DATA_TRANS_BLOCKS + EXT4_INDEX_EXTRA_TRANS_BLOCKS), - * allocate new inode (bitmap, group descriptor, inode block, - * quota blocks, sb is already counted in previous macros). - */ - credits = EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + - EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3; - } - - inode = ext4_new_inode_start_handle(dir, S_IFLNK|S_IRWXUGO, + /* + * EXT4_INDEX_EXTRA_TRANS_BLOCKS for addition of entry into the + * directory. +3 for inode, inode bitmap, group descriptor allocation. + * EXT4_DATA_TRANS_BLOCKS for the data block allocation and + * modification. + */ + credits = EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3; +retry: + inode = ext4_new_inode_start_handle(idmap, dir, S_IFLNK|S_IRWXUGO, &dentry->d_name, 0, NULL, EXT4_HT_DIR, credits); handle = ext4_journal_current_handle(); if (IS_ERR(inode)) { if (handle) ext4_journal_stop(handle); - return PTR_ERR(inode); + err = PTR_ERR(inode); + goto out_retry; } if (IS_ENCRYPTED(inode)) { @@ -3108,103 +3399,56 @@ static int ext4_symlink(struct inode *dir, if (err) goto err_drop_inode; inode->i_op = &ext4_encrypted_symlink_inode_operations; + } else { + if ((disk_link.len > EXT4_N_BLOCKS * 4)) { + inode->i_op = &ext4_symlink_inode_operations; + } else { + inode->i_op = &ext4_fast_symlink_inode_operations; + } } if ((disk_link.len > EXT4_N_BLOCKS * 4)) { - if (!IS_ENCRYPTED(inode)) - inode->i_op = &ext4_symlink_inode_operations; - inode_nohighmem(inode); - ext4_set_aops(inode); - /* - * We cannot call page_symlink() with transaction started - * because it calls into ext4_write_begin() which can wait - * for transaction commit if we are running out of space - * and thus we deadlock. So we have to stop transaction now - * and restart it when symlink contents is written. - * - * To keep fs consistent in case of crash, we have to put inode - * to orphan list in the mean time. - */ - drop_nlink(inode); - err = ext4_orphan_add(handle, inode); - ext4_journal_stop(handle); - handle = NULL; - if (err) - goto err_drop_inode; - err = __page_symlink(inode, disk_link.name, disk_link.len, 1); - if (err) - goto err_drop_inode; - /* - * Now inode is being linked into dir (EXT4_DATA_TRANS_BLOCKS - * + EXT4_INDEX_EXTRA_TRANS_BLOCKS), inode is also modified - */ - handle = ext4_journal_start(dir, EXT4_HT_DIR, - EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + - EXT4_INDEX_EXTRA_TRANS_BLOCKS + 1); - if (IS_ERR(handle)) { - err = PTR_ERR(handle); - handle = NULL; - goto err_drop_inode; - } - set_nlink(inode, 1); - err = ext4_orphan_del(handle, inode); + /* alloc symlink block and fill it */ + err = ext4_init_symlink_block(handle, inode, &disk_link); if (err) goto err_drop_inode; } else { /* clear the extent format for fast symlink */ ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS); - if (!IS_ENCRYPTED(inode)) { - inode->i_op = &ext4_fast_symlink_inode_operations; - inode->i_link = (char *)&EXT4_I(inode)->i_data; - } memcpy((char *)&EXT4_I(inode)->i_data, disk_link.name, disk_link.len); inode->i_size = disk_link.len - 1; + EXT4_I(inode)->i_disksize = inode->i_size; + if (!IS_ENCRYPTED(inode)) + inode_set_cached_link(inode, (char *)&EXT4_I(inode)->i_data, + inode->i_size); } - EXT4_I(inode)->i_disksize = inode->i_size; - err = ext4_add_nondir(handle, dentry, inode); - if (!err && IS_DIRSYNC(dir)) - ext4_handle_sync(handle); - + err = ext4_add_nondir(handle, dentry, &inode); if (handle) ext4_journal_stop(handle); - goto out_free_encrypted_link; + iput(inode); + goto out_retry; err_drop_inode: - if (handle) - ext4_journal_stop(handle); clear_nlink(inode); + ext4_mark_inode_dirty(handle, inode); + ext4_orphan_add(handle, inode); unlock_new_inode(inode); + if (handle) + ext4_journal_stop(handle); iput(inode); -out_free_encrypted_link: +out_retry: + if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) + goto retry; if (disk_link.name != (unsigned char *)symname) kfree(disk_link.name); return err; } -static int ext4_link(struct dentry *old_dentry, - struct inode *dir, struct dentry *dentry) +int __ext4_link(struct inode *dir, struct inode *inode, struct dentry *dentry) { handle_t *handle; - struct inode *inode = d_inode(old_dentry); int err, retries = 0; - - if (inode->i_nlink >= EXT4_LINK_MAX) - return -EMLINK; - - err = fscrypt_prepare_link(old_dentry, dir, dentry); - if (err) - return err; - - if ((ext4_test_inode_flag(dir, EXT4_INODE_PROJINHERIT)) && - (!projid_eq(EXT4_I(dir)->i_projid, - EXT4_I(old_dentry->d_inode)->i_projid))) - return -EXDEV; - - err = dquot_initialize(dir); - if (err) - return err; - retry: handle = ext4_journal_start(dir, EXT4_HT_DIR, (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + @@ -3215,19 +3459,20 @@ retry: if (IS_DIRSYNC(dir)) ext4_handle_sync(handle); - inode->i_ctime = current_time(inode); - ext4_inc_count(handle, inode); + inode_set_ctime_current(inode); + ext4_inc_count(inode); ihold(inode); err = ext4_add_entry(handle, dentry, inode); if (!err) { - ext4_mark_inode_dirty(handle, inode); + err = ext4_mark_inode_dirty(handle, inode); /* this can happen only for tmpfile being * linked the first time */ if (inode->i_nlink == 1) ext4_orphan_del(handle, inode); d_instantiate(dentry, inode); + ext4_fc_track_link(handle, dentry); } else { drop_nlink(inode); iput(inode); @@ -3238,6 +3483,29 @@ retry: return err; } +static int ext4_link(struct dentry *old_dentry, + struct inode *dir, struct dentry *dentry) +{ + struct inode *inode = d_inode(old_dentry); + int err; + + if (inode->i_nlink >= EXT4_LINK_MAX) + return -EMLINK; + + err = fscrypt_prepare_link(old_dentry, dir, dentry); + if (err) + return err; + + if ((ext4_test_inode_flag(dir, EXT4_INODE_PROJINHERIT)) && + (!projid_eq(EXT4_I(dir)->i_projid, + EXT4_I(old_dentry->d_inode)->i_projid))) + return -EXDEV; + + err = dquot_initialize(dir); + if (err) + return err; + return __ext4_link(dir, inode, dentry); +} /* * Try to find buffer head where contains the parent block. @@ -3253,14 +3521,39 @@ static struct buffer_head *ext4_get_first_dir_block(handle_t *handle, struct buffer_head *bh; if (!ext4_has_inline_data(inode)) { + struct ext4_dir_entry_2 *de; + unsigned int offset; + bh = ext4_read_dirblock(inode, 0, EITHER); if (IS_ERR(bh)) { *retval = PTR_ERR(bh); return NULL; } - *parent_de = ext4_next_entry( - (struct ext4_dir_entry_2 *)bh->b_data, - inode->i_sb->s_blocksize); + + de = (struct ext4_dir_entry_2 *) bh->b_data; + if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data, + bh->b_size, 0) || + le32_to_cpu(de->inode) != inode->i_ino || + de->name_len != 1 || de->name[0] != '.') { + EXT4_ERROR_INODE(inode, "directory missing '.'"); + brelse(bh); + *retval = -EFSCORRUPTED; + return NULL; + } + offset = ext4_rec_len_from_disk(de->rec_len, + inode->i_sb->s_blocksize); + de = ext4_next_entry(de, inode->i_sb->s_blocksize); + if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data, + bh->b_size, offset) || + le32_to_cpu(de->inode) == 0 || de->name_len != 2 || + de->name[0] != '.' || de->name[1] != '.') { + EXT4_ERROR_INODE(inode, "directory missing '..'"); + brelse(bh); + *retval = -EFSCORRUPTED; + return NULL; + } + *parent_de = de; + return bh; } @@ -3286,10 +3579,14 @@ struct ext4_renament { int dir_inlined; }; -static int ext4_rename_dir_prepare(handle_t *handle, struct ext4_renament *ent) +static int ext4_rename_dir_prepare(handle_t *handle, struct ext4_renament *ent, bool is_cross) { int retval; + ent->is_dir = true; + if (!is_cross) + return 0; + ent->dir_bh = ext4_get_first_dir_block(handle, ent->inode, &retval, &ent->parent_de, &ent->dir_inlined); @@ -3298,7 +3595,8 @@ static int ext4_rename_dir_prepare(handle_t *handle, struct ext4_renament *ent) if (le32_to_cpu(ent->parent_de->inode) != ent->dir->i_ino) return -EFSCORRUPTED; BUFFER_TRACE(ent->dir_bh, "get_write_access"); - return ext4_journal_get_write_access(handle, ent->dir_bh); + return ext4_journal_get_write_access(handle, ent->dir->i_sb, + ent->dir_bh, EXT4_JTR_NONE); } static int ext4_rename_dir_finish(handle_t *handle, struct ext4_renament *ent, @@ -3306,6 +3604,9 @@ static int ext4_rename_dir_finish(handle_t *handle, struct ext4_renament *ent, { int retval; + if (!ent->dir_bh) + return 0; + ent->parent_de->inode = cpu_to_le32(dir_ino); BUFFER_TRACE(ent->dir_bh, "call ext4_handle_dirty_metadata"); if (!ent->dir_inlined) { @@ -3314,9 +3615,8 @@ static int ext4_rename_dir_finish(handle_t *handle, struct ext4_renament *ent, ent->inode, ent->dir_bh); } else { - retval = ext4_handle_dirty_dirent_node(handle, - ent->inode, - ent->dir_bh); + retval = ext4_handle_dirty_dirblock(handle, ent->inode, + ent->dir_bh); } } else { retval = ext4_mark_inode_dirty(handle, ent->inode); @@ -3331,32 +3631,54 @@ static int ext4_rename_dir_finish(handle_t *handle, struct ext4_renament *ent, static int ext4_setent(handle_t *handle, struct ext4_renament *ent, unsigned ino, unsigned file_type) { - int retval; + int retval, retval2; BUFFER_TRACE(ent->bh, "get write access"); - retval = ext4_journal_get_write_access(handle, ent->bh); + retval = ext4_journal_get_write_access(handle, ent->dir->i_sb, ent->bh, + EXT4_JTR_NONE); if (retval) return retval; ent->de->inode = cpu_to_le32(ino); if (ext4_has_feature_filetype(ent->dir->i_sb)) ent->de->file_type = file_type; inode_inc_iversion(ent->dir); - ent->dir->i_ctime = ent->dir->i_mtime = - current_time(ent->dir); - ext4_mark_inode_dirty(handle, ent->dir); + inode_set_mtime_to_ts(ent->dir, inode_set_ctime_current(ent->dir)); + retval = ext4_mark_inode_dirty(handle, ent->dir); BUFFER_TRACE(ent->bh, "call ext4_handle_dirty_metadata"); if (!ent->inlined) { - retval = ext4_handle_dirty_dirent_node(handle, - ent->dir, ent->bh); - if (unlikely(retval)) { - ext4_std_error(ent->dir->i_sb, retval); - return retval; + retval2 = ext4_handle_dirty_dirblock(handle, ent->dir, ent->bh); + if (unlikely(retval2)) { + ext4_std_error(ent->dir->i_sb, retval2); + return retval2; } } - brelse(ent->bh); - ent->bh = NULL; + return retval; +} - return 0; +static void ext4_resetent(handle_t *handle, struct ext4_renament *ent, + unsigned ino, unsigned file_type) +{ + struct ext4_renament old = *ent; + int retval = 0; + + /* + * old->de could have moved from under us during make indexed dir, + * so the old->de may no longer valid and need to find it again + * before reset old inode info. + */ + old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, + &old.inlined); + if (IS_ERR(old.bh)) + retval = PTR_ERR(old.bh); + if (!old.bh) + retval = -ENOENT; + if (retval) { + ext4_std_error(old.dir->i_sb, retval); + return; + } + + ext4_setent(handle, &old, ino, file_type); + brelse(old.bh); } static int ext4_find_delete_entry(handle_t *handle, struct inode *dir, @@ -3364,7 +3686,7 @@ static int ext4_find_delete_entry(handle_t *handle, struct inode *dir, { int retval = -ENOENT; struct buffer_head *bh; - struct ext4_dir_entry_2 *de; + struct ext4_dir_entry_2 *de = NULL; bh = ext4_find_entry(dir, d_name, &de, NULL); if (IS_ERR(bh)) @@ -3412,14 +3734,15 @@ static void ext4_update_dir_count(handle_t *handle, struct ext4_renament *ent) { if (ent->dir_nlink_delta) { if (ent->dir_nlink_delta == -1) - ext4_dec_count(handle, ent->dir); + ext4_dec_count(ent->dir); else - ext4_inc_count(handle, ent->dir); + ext4_inc_count(ent->dir); ext4_mark_inode_dirty(handle, ent->dir); } } -static struct inode *ext4_whiteout_for_rename(struct ext4_renament *ent, +static struct inode *ext4_whiteout_for_rename(struct mnt_idmap *idmap, + struct ext4_renament *ent, int credits, handle_t **h) { struct inode *wh; @@ -3433,7 +3756,8 @@ static struct inode *ext4_whiteout_for_rename(struct ext4_renament *ent, credits += (EXT4_MAXQUOTAS_TRANS_BLOCKS(ent->dir->i_sb) + EXT4_XATTR_TRANS_BLOCKS + 4); retry: - wh = ext4_new_inode_start_handle(ent->dir, S_IFCHR | WHITEOUT_MODE, + wh = ext4_new_inode_start_handle(idmap, ent->dir, + S_IFCHR | WHITEOUT_MODE, &ent->dentry->d_name, 0, NULL, EXT4_HT_DIR, credits); @@ -3460,9 +3784,9 @@ retry: * while new_{dentry,inode) refers to the destination dentry/inode * This comes from rename(const char *oldpath, const char *newpath) */ -static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, struct dentry *new_dentry, - unsigned int flags) +static int ext4_rename(struct mnt_idmap *idmap, struct inode *old_dir, + struct dentry *old_dentry, struct inode *new_dir, + struct dentry *new_dentry, unsigned int flags) { handle_t *handle = NULL; struct ext4_renament old = { @@ -3495,6 +3819,9 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, retval = dquot_initialize(old.dir); if (retval) return retval; + retval = dquot_initialize(old.inode); + if (retval) + return retval; retval = dquot_initialize(new.dir); if (retval) return retval; @@ -3507,9 +3834,11 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, return retval; } - old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL); + old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, + &old.inlined); if (IS_ERR(old.bh)) return PTR_ERR(old.bh); + /* * Check for inode number is _not_ due to possible IO errors. * We might rmdir the source, keep it as pwd of some process @@ -3518,14 +3847,14 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, */ retval = -ENOENT; if (!old.bh || le32_to_cpu(old.de->inode) != old.inode->i_ino) - goto end_rename; + goto release_bh; new.bh = ext4_find_entry(new.dir, &new.dentry->d_name, &new.de, &new.inlined); if (IS_ERR(new.bh)) { retval = PTR_ERR(new.bh); new.bh = NULL; - goto end_rename; + goto release_bh; } if (new.bh) { if (!new.inode) { @@ -3542,18 +3871,17 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, handle = ext4_journal_start(old.dir, EXT4_HT_DIR, credits); if (IS_ERR(handle)) { retval = PTR_ERR(handle); - handle = NULL; - goto end_rename; + goto release_bh; } } else { - whiteout = ext4_whiteout_for_rename(&old, credits, &handle); + whiteout = ext4_whiteout_for_rename(idmap, &old, credits, &handle); if (IS_ERR(whiteout)) { retval = PTR_ERR(whiteout); - whiteout = NULL; - goto end_rename; + goto release_bh; } } + old_file_type = old.de->file_type; if (IS_DIRSYNC(old.dir) || IS_DIRSYNC(new.dir)) ext4_handle_sync(handle); @@ -3567,7 +3895,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, if (new.dir != old.dir && EXT4_DIR_LINK_MAX(new.dir)) goto end_rename; } - retval = ext4_rename_dir_prepare(handle, &old); + retval = ext4_rename_dir_prepare(handle, &old, new.dir != old.dir); if (retval) goto end_rename; } @@ -3581,7 +3909,6 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, force_reread = (new.dir->i_ino == old.dir->i_ino && ext4_test_inode_flag(new.dir, EXT4_INODE_INLINE_DATA)); - old_file_type = old.de->file_type; if (whiteout) { /* * Do this before adding a new entry, so the old entry is sure @@ -3591,7 +3918,10 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, EXT4_FT_CHRDEV); if (retval) goto end_rename; - ext4_mark_inode_dirty(handle, whiteout); + retval = ext4_mark_inode_dirty(handle, whiteout); + if (unlikely(retval)) + goto end_rename; + } if (!new.bh) { retval = ext4_add_entry(handle, new.dentry, old.inode); @@ -3611,8 +3941,10 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, * Like most other Unix systems, set the ctime for inodes on a * rename. */ - old.inode->i_ctime = current_time(old.inode); - ext4_mark_inode_dirty(handle, old.inode); + inode_set_ctime_current(old.inode); + retval = ext4_mark_inode_dirty(handle, old.inode); + if (unlikely(retval)) + goto end_rename; if (!whiteout) { /* @@ -3622,48 +3954,87 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, } if (new.inode) { - ext4_dec_count(handle, new.inode); - new.inode->i_ctime = current_time(new.inode); + ext4_dec_count(new.inode); + inode_set_ctime_current(new.inode); } - old.dir->i_ctime = old.dir->i_mtime = current_time(old.dir); + inode_set_mtime_to_ts(old.dir, inode_set_ctime_current(old.dir)); ext4_update_dx_flag(old.dir); - if (old.dir_bh) { + if (old.is_dir) { retval = ext4_rename_dir_finish(handle, &old, new.dir->i_ino); if (retval) goto end_rename; - ext4_dec_count(handle, old.dir); + ext4_dec_count(old.dir); if (new.inode) { /* checked ext4_empty_dir above, can't have another * parent, ext4_dec_count() won't work for many-linked * dirs */ clear_nlink(new.inode); } else { - ext4_inc_count(handle, new.dir); + ext4_inc_count(new.dir); ext4_update_dx_flag(new.dir); - ext4_mark_inode_dirty(handle, new.dir); + retval = ext4_mark_inode_dirty(handle, new.dir); + if (unlikely(retval)) + goto end_rename; + } + } + retval = ext4_mark_inode_dirty(handle, old.dir); + if (unlikely(retval)) + goto end_rename; + + if (old.is_dir) { + /* + * We disable fast commits here that's because the + * replay code is not yet capable of changing dot dot + * dirents in directories. + */ + ext4_fc_mark_ineligible(old.inode->i_sb, + EXT4_FC_REASON_RENAME_DIR, handle); + } else { + struct super_block *sb = old.inode->i_sb; + + if (new.inode) + ext4_fc_track_unlink(handle, new.dentry); + if (test_opt2(sb, JOURNAL_FAST_COMMIT) && + !(EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY) && + !(ext4_test_mount_flag(sb, EXT4_MF_FC_INELIGIBLE))) { + __ext4_fc_track_link(handle, old.inode, new.dentry); + __ext4_fc_track_unlink(handle, old.inode, old.dentry); + if (whiteout) + __ext4_fc_track_create(handle, whiteout, + old.dentry); } } - ext4_mark_inode_dirty(handle, old.dir); + if (new.inode) { - ext4_mark_inode_dirty(handle, new.inode); + retval = ext4_mark_inode_dirty(handle, new.inode); + if (unlikely(retval)) + goto end_rename; if (!new.inode->i_nlink) ext4_orphan_add(handle, new.inode); } retval = 0; end_rename: - brelse(old.dir_bh); - brelse(old.bh); - brelse(new.bh); if (whiteout) { - if (retval) + if (retval) { + ext4_resetent(handle, &old, + old.inode->i_ino, old_file_type); drop_nlink(whiteout); + ext4_mark_inode_dirty(handle, whiteout); + ext4_orphan_add(handle, whiteout); + } unlock_new_inode(whiteout); + ext4_journal_stop(handle); iput(whiteout); - } - if (handle) + } else { ext4_journal_stop(handle); + } +release_bh: + brelse(old.dir_bh); + brelse(old.bh); + brelse(new.bh); + return retval; } @@ -3683,7 +4054,6 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry, }; u8 new_file_type; int retval; - struct timespec64 ctime; if ((ext4_test_inode_flag(new_dir, EXT4_INODE_PROJINHERIT) && !projid_eq(EXT4_I(new_dir)->i_projid, @@ -3739,14 +4109,12 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry, ext4_handle_sync(handle); if (S_ISDIR(old.inode->i_mode)) { - old.is_dir = true; - retval = ext4_rename_dir_prepare(handle, &old); + retval = ext4_rename_dir_prepare(handle, &old, new.dir != old.dir); if (retval) goto end_rename; } if (S_ISDIR(new.inode->i_mode)) { - new.is_dir = true; - retval = ext4_rename_dir_prepare(handle, &new); + retval = ext4_rename_dir_prepare(handle, &new, new.dir != old.dir); if (retval) goto end_rename; } @@ -3777,12 +4145,16 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry, * Like most other Unix systems, set the ctime for inodes on a * rename. */ - ctime = current_time(old.inode); - old.inode->i_ctime = ctime; - new.inode->i_ctime = ctime; - ext4_mark_inode_dirty(handle, old.inode); - ext4_mark_inode_dirty(handle, new.inode); - + inode_set_ctime_current(old.inode); + inode_set_ctime_current(new.inode); + retval = ext4_mark_inode_dirty(handle, old.inode); + if (unlikely(retval)) + goto end_rename; + retval = ext4_mark_inode_dirty(handle, new.inode); + if (unlikely(retval)) + goto end_rename; + ext4_fc_mark_ineligible(new.inode->i_sb, + EXT4_FC_REASON_CROSS_RENAME, handle); if (old.dir_bh) { retval = ext4_rename_dir_finish(handle, &old, new.dir->i_ino); if (retval) @@ -3807,14 +4179,16 @@ end_rename: return retval; } -static int ext4_rename2(struct inode *old_dir, struct dentry *old_dentry, +static int ext4_rename2(struct mnt_idmap *idmap, + struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { int err; - if (unlikely(ext4_forced_shutdown(EXT4_SB(old_dir->i_sb)))) - return -EIO; + err = ext4_emergency_state(old_dir->i_sb); + if (unlikely(err)) + return err; if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) return -EINVAL; @@ -3829,7 +4203,7 @@ static int ext4_rename2(struct inode *old_dir, struct dentry *old_dentry, new_dir, new_dentry); } - return ext4_rename(old_dir, old_dentry, new_dir, new_dentry, flags); + return ext4_rename(idmap, old_dir, old_dentry, new_dir, new_dentry, flags); } /* @@ -3849,15 +4223,17 @@ const struct inode_operations ext4_dir_inode_operations = { .setattr = ext4_setattr, .getattr = ext4_getattr, .listxattr = ext4_listxattr, - .get_acl = ext4_get_acl, + .get_inode_acl = ext4_get_acl, .set_acl = ext4_set_acl, .fiemap = ext4_fiemap, + .fileattr_get = ext4_fileattr_get, + .fileattr_set = ext4_fileattr_set, }; const struct inode_operations ext4_special_inode_operations = { .setattr = ext4_setattr, .getattr = ext4_getattr, .listxattr = ext4_listxattr, - .get_acl = ext4_get_acl, + .get_inode_acl = ext4_get_acl, .set_acl = ext4_set_acl, }; |
