summaryrefslogtreecommitdiff
path: root/fs/ext4/dir.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/dir.c')
-rw-r--r--fs/ext4/dir.c353
1 files changed, 218 insertions, 135 deletions
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 3c7d288ae94c..256fe2c1d4c1 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* linux/fs/ext4/dir.c
*
@@ -22,18 +23,21 @@
*/
#include <linux/fs.h>
-#include <linux/jbd2.h>
#include <linux/buffer_head.h>
#include <linux/slab.h>
-#include <linux/rbtree.h>
+#include <linux/iversion.h>
+#include <linux/unicode.h>
#include "ext4.h"
#include "xattr.h"
static int ext4_dx_readdir(struct file *, struct dir_context *);
/**
+ * is_dx_dir() - check if a directory is using htree indexing
+ * @inode: directory inode
+ *
* Check if the given dir-inode refers to an htree-indexed directory
- * (or a directory which chould potentially get coverted to use htree
+ * (or a directory which could potentially get converted to use htree
* indexing).
*
* Return 1 if it is a dx dir, 0 if not
@@ -42,8 +46,7 @@ static int is_dx_dir(struct inode *inode)
{
struct super_block *sb = inode->i_sb;
- if (EXT4_HAS_COMPAT_FEATURE(inode->i_sb,
- EXT4_FEATURE_COMPAT_DIR_INDEX) &&
+ if (ext4_has_feature_dir_index(inode->i_sb) &&
((ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) ||
((inode->i_size >> sb->s_blocksize_bits) == 1) ||
ext4_has_inline_data(inode)))
@@ -52,6 +55,18 @@ static int is_dx_dir(struct inode *inode)
return 0;
}
+static bool is_fake_dir_entry(struct ext4_dir_entry_2 *de)
+{
+ /* Check if . or .. , or skip if namelen is 0 */
+ if ((de->name_len > 0) && (de->name_len <= 2) && (de->name[0] == '.') &&
+ (de->name[1] == '.' || de->name[1] == '\0'))
+ return true;
+ /* Check if this is a csum entry */
+ if (de->file_type == EXT4_FT_DIR_CSUM)
+ return true;
+ return false;
+}
+
/*
* Return 0 if the directory entry is OK, and 1 if there is a problem
*
@@ -69,35 +84,44 @@ int __ext4_check_dir_entry(const char *function, unsigned int line,
const char *error_msg = NULL;
const int rlen = ext4_rec_len_from_disk(de->rec_len,
dir->i_sb->s_blocksize);
+ const int next_offset = ((char *) de - buf) + rlen;
+ bool fake = is_fake_dir_entry(de);
+ bool has_csum = ext4_has_feature_metadata_csum(dir->i_sb);
- if (unlikely(rlen < EXT4_DIR_REC_LEN(1)))
+ if (unlikely(rlen < ext4_dir_rec_len(1, fake ? NULL : dir)))
error_msg = "rec_len is smaller than minimal";
else if (unlikely(rlen % 4 != 0))
error_msg = "rec_len % 4 != 0";
- else if (unlikely(rlen < EXT4_DIR_REC_LEN(de->name_len)))
+ else if (unlikely(rlen < ext4_dir_rec_len(de->name_len,
+ fake ? NULL : dir)))
error_msg = "rec_len is too small for name_len";
- else if (unlikely(((char *) de - buf) + rlen > size))
- error_msg = "directory entry across range";
+ else if (unlikely(next_offset > size))
+ error_msg = "directory entry overrun";
+ else if (unlikely(next_offset > size - ext4_dir_rec_len(1,
+ has_csum ? NULL : dir) &&
+ next_offset != size))
+ error_msg = "directory entry too close to block end";
else if (unlikely(le32_to_cpu(de->inode) >
le32_to_cpu(EXT4_SB(dir->i_sb)->s_es->s_inodes_count)))
error_msg = "inode out of bounds";
+ else if (unlikely(next_offset == size && de->name_len == 1 &&
+ de->name[0] == '.'))
+ error_msg = "'.' directory cannot be the last in data block";
else
return 0;
if (filp)
ext4_error_file(filp, function, line, bh->b_blocknr,
- "bad entry in directory: %s - offset=%u(%u), "
- "inode=%u, rec_len=%d, name_len=%d",
- error_msg, (unsigned) (offset % size),
- offset, le32_to_cpu(de->inode),
- rlen, de->name_len);
+ "bad entry in directory: %s - offset=%u, "
+ "inode=%u, rec_len=%d, size=%d fake=%d",
+ error_msg, offset, le32_to_cpu(de->inode),
+ rlen, size, fake);
else
ext4_error_inode(dir, function, line, bh->b_blocknr,
- "bad entry in directory: %s - offset=%u(%u), "
- "inode=%u, rec_len=%d, name_len=%d",
- error_msg, (unsigned) (offset % size),
- offset, le32_to_cpu(de->inode),
- rlen, de->name_len);
+ "bad entry in directory: %s - offset=%u, "
+ "inode=%u, rec_len=%d, size=%d fake=%d",
+ error_msg, offset, le32_to_cpu(de->inode),
+ rlen, size, fake);
return 1;
}
@@ -105,68 +129,86 @@ int __ext4_check_dir_entry(const char *function, unsigned int line,
static int ext4_readdir(struct file *file, struct dir_context *ctx)
{
unsigned int offset;
- int i, stored;
+ int i;
struct ext4_dir_entry_2 *de;
int err;
struct inode *inode = file_inode(file);
struct super_block *sb = inode->i_sb;
- int dir_has_error = 0;
+ struct buffer_head *bh = NULL;
+ struct fscrypt_str fstr = FSTR_INIT(NULL, 0);
+ struct dir_private_info *info = file->private_data;
+
+ err = fscrypt_prepare_readdir(inode);
+ if (err)
+ return err;
if (is_dx_dir(inode)) {
err = ext4_dx_readdir(file, ctx);
- if (err != ERR_BAD_DX_DIR) {
+ if (err != ERR_BAD_DX_DIR)
return err;
+
+ /* Can we just clear INDEX flag to ignore htree information? */
+ if (!ext4_has_feature_metadata_csum(sb)) {
+ /*
+ * We don't set the inode dirty flag since it's not
+ * critical that it gets flushed back to the disk.
+ */
+ ext4_clear_inode_flag(inode, EXT4_INODE_INDEX);
}
- /*
- * We don't set the inode dirty flag since it's not
- * critical that it get flushed back to the disk.
- */
- ext4_clear_inode_flag(file_inode(file),
- EXT4_INODE_INDEX);
}
if (ext4_has_inline_data(inode)) {
int has_inline_data = 1;
- int ret = ext4_read_inline_dir(file, ctx,
+ err = ext4_read_inline_dir(file, ctx,
&has_inline_data);
if (has_inline_data)
- return ret;
+ return err;
}
- stored = 0;
- offset = ctx->pos & (sb->s_blocksize - 1);
+ if (IS_ENCRYPTED(inode)) {
+ err = fscrypt_fname_alloc_buffer(EXT4_NAME_LEN, &fstr);
+ if (err < 0)
+ return err;
+ }
while (ctx->pos < inode->i_size) {
struct ext4_map_blocks map;
- struct buffer_head *bh = NULL;
+ if (fatal_signal_pending(current)) {
+ err = -ERESTARTSYS;
+ goto errout;
+ }
+ cond_resched();
+ offset = ctx->pos & (sb->s_blocksize - 1);
map.m_lblk = ctx->pos >> EXT4_BLOCK_SIZE_BITS(sb);
map.m_len = 1;
err = ext4_map_blocks(NULL, inode, &map, 0);
+ if (err == 0) {
+ /* m_len should never be zero but let's avoid
+ * an infinite loop if it somehow is */
+ if (map.m_len == 0)
+ map.m_len = 1;
+ ctx->pos += map.m_len * sb->s_blocksize;
+ continue;
+ }
if (err > 0) {
- pgoff_t index = map.m_pblk >>
- (PAGE_CACHE_SHIFT - inode->i_blkbits);
+ pgoff_t index = map.m_pblk << inode->i_blkbits >>
+ PAGE_SHIFT;
if (!ra_has_index(&file->f_ra, index))
page_cache_sync_readahead(
- sb->s_bdev->bd_inode->i_mapping,
- &file->f_ra, file,
- index, 1);
- file->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT;
- bh = ext4_bread(NULL, inode, map.m_lblk, 0, &err);
+ sb->s_bdev->bd_mapping,
+ &file->f_ra, file, index,
+ 1 << EXT4_SB(sb)->s_min_folio_order);
+ file->f_ra.prev_pos = (loff_t)index << PAGE_SHIFT;
+ bh = ext4_bread(NULL, inode, map.m_lblk, 0);
+ if (IS_ERR(bh)) {
+ err = PTR_ERR(bh);
+ bh = NULL;
+ goto errout;
+ }
}
- /*
- * We ignore I/O errors on directories so users have a chance
- * of recovering data when there's a bad sector
- */
if (!bh) {
- if (!dir_has_error) {
- EXT4_ERROR_FILE(file, 0,
- "directory contains a "
- "hole at offset %llu",
- (unsigned long long) ctx->pos);
- dir_has_error = 1;
- }
/* corrupt size? Maybe no more blocks to read */
if (ctx->pos > inode->i_blocks << 9)
break;
@@ -176,13 +218,13 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
/* Check the checksum */
if (!buffer_verified(bh) &&
- !ext4_dirent_csum_verify(inode,
- (struct ext4_dir_entry *)bh->b_data)) {
+ !ext4_dirblock_csum_verify(inode, bh)) {
EXT4_ERROR_FILE(file, 0, "directory fails checksum "
"at offset %llu",
(unsigned long long)ctx->pos);
ctx->pos += sb->s_blocksize - offset;
brelse(bh);
+ bh = NULL;
continue;
}
set_buffer_verified(bh);
@@ -191,7 +233,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
* readdir(2), then we might be pointing to an invalid
* dirent right now. Scan from the start of the block
* to make sure. */
- if (file->f_version != inode->i_version) {
+ if (!inode_eq_iversion(inode, info->cookie)) {
for (i = 0; i < sb->s_blocksize && i < offset; ) {
de = (struct ext4_dir_entry_2 *)
(bh->b_data + i);
@@ -202,7 +244,8 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
* failure will be detected in the
* dirent test below. */
if (ext4_rec_len_from_disk(de->rec_len,
- sb->s_blocksize) < EXT4_DIR_REC_LEN(1))
+ sb->s_blocksize) < ext4_dir_rec_len(1,
+ inode))
break;
i += ext4_rec_len_from_disk(de->rec_len,
sb->s_blocksize);
@@ -210,7 +253,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
offset = i;
ctx->pos = (ctx->pos & ~(sb->s_blocksize - 1))
| offset;
- file->f_version = inode->i_version;
+ info->cookie = inode_query_iversion(inode);
}
while (ctx->pos < inode->i_size
@@ -229,31 +272,62 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
offset += ext4_rec_len_from_disk(de->rec_len,
sb->s_blocksize);
if (le32_to_cpu(de->inode)) {
- if (!dir_emit(ctx, de->name,
- de->name_len,
- le32_to_cpu(de->inode),
- get_dtype(sb, de->file_type))) {
- brelse(bh);
- return 0;
+ if (!IS_ENCRYPTED(inode)) {
+ if (!dir_emit(ctx, de->name,
+ de->name_len,
+ le32_to_cpu(de->inode),
+ get_dtype(sb, de->file_type)))
+ goto done;
+ } else {
+ int save_len = fstr.len;
+ struct fscrypt_str de_name =
+ FSTR_INIT(de->name,
+ de->name_len);
+ u32 hash;
+ u32 minor_hash;
+
+ if (IS_CASEFOLDED(inode)) {
+ hash = EXT4_DIRENT_HASH(de);
+ minor_hash = EXT4_DIRENT_MINOR_HASH(de);
+ } else {
+ hash = 0;
+ minor_hash = 0;
+ }
+
+ /* Directory is encrypted */
+ err = fscrypt_fname_disk_to_usr(inode,
+ hash, minor_hash, &de_name, &fstr);
+ de_name = fstr;
+ fstr.len = save_len;
+ if (err)
+ goto errout;
+ if (!dir_emit(ctx,
+ de_name.name, de_name.len,
+ le32_to_cpu(de->inode),
+ get_dtype(sb, de->file_type)))
+ goto done;
}
}
ctx->pos += ext4_rec_len_from_disk(de->rec_len,
sb->s_blocksize);
}
- offset = 0;
+ if ((ctx->pos < inode->i_size) && !dir_relax_shared(inode))
+ goto done;
brelse(bh);
- if (ctx->pos < inode->i_size) {
- if (!dir_relax(inode))
- return 0;
- }
+ bh = NULL;
}
- return 0;
+done:
+ err = 0;
+errout:
+ fscrypt_fname_free_buffer(&fstr);
+ brelse(bh);
+ return err;
}
static inline int is_32bit_api(void)
{
#ifdef CONFIG_COMPAT
- return is_compat_task();
+ return in_compat_syscall();
#else
return (BITS_PER_LONG == 32);
#endif
@@ -322,14 +396,17 @@ static inline loff_t ext4_get_htree_eof(struct file *filp)
static loff_t ext4_dir_llseek(struct file *file, loff_t offset, int whence)
{
struct inode *inode = file->f_mapping->host;
+ struct dir_private_info *info = file->private_data;
int dx_dir = is_dx_dir(inode);
- loff_t htree_max = ext4_get_htree_eof(file);
+ loff_t ret, htree_max = ext4_get_htree_eof(file);
if (likely(dx_dir))
- return generic_file_llseek_size(file, offset, whence,
+ ret = generic_file_llseek_size(file, offset, whence,
htree_max, htree_max);
else
- return ext4_llseek(file, offset, whence);
+ ret = ext4_llseek(file, offset, whence);
+ info->cookie = inode_peek_iversion(inode) - 1;
+ return ret;
}
/*
@@ -344,64 +421,36 @@ struct fname {
__u32 inode;
__u8 name_len;
__u8 file_type;
- char name[0];
+ char name[] __counted_by(name_len);
};
/*
- * This functoin implements a non-recursive way of freeing all of the
+ * This function implements a non-recursive way of freeing all of the
* nodes in the red-black tree.
*/
static void free_rb_tree_fname(struct rb_root *root)
{
- struct rb_node *n = root->rb_node;
- struct rb_node *parent;
- struct fname *fname;
-
- while (n) {
- /* Do the node's children first */
- if (n->rb_left) {
- n = n->rb_left;
- continue;
- }
- if (n->rb_right) {
- n = n->rb_right;
- continue;
- }
- /*
- * The node has no children; free it, and then zero
- * out parent's link to it. Finally go to the
- * beginning of the loop and try to free the parent
- * node.
- */
- parent = rb_parent(n);
- fname = rb_entry(n, struct fname, rb_hash);
+ struct fname *fname, *next;
+
+ rbtree_postorder_for_each_entry_safe(fname, next, root, rb_hash)
while (fname) {
struct fname *old = fname;
fname = fname->next;
kfree(old);
}
- if (!parent)
- *root = RB_ROOT;
- else if (parent->rb_left == n)
- parent->rb_left = NULL;
- else if (parent->rb_right == n)
- parent->rb_right = NULL;
- n = parent;
- }
-}
+ *root = RB_ROOT;
+}
-static struct dir_private_info *ext4_htree_create_dir_info(struct file *filp,
- loff_t pos)
+static void ext4_htree_init_dir_info(struct file *filp, loff_t pos)
{
- struct dir_private_info *p;
-
- p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL);
- if (!p)
- return NULL;
- p->curr_hash = pos2maj_hash(filp, pos);
- p->curr_minor_hash = pos2min_hash(filp, pos);
- return p;
+ struct dir_private_info *p = filp->private_data;
+
+ if (is_dx_dir(file_inode(filp)) && !p->initialized) {
+ p->curr_hash = pos2maj_hash(filp, pos);
+ p->curr_minor_hash = pos2min_hash(filp, pos);
+ p->initialized = true;
+ }
}
void ext4_htree_free_dir_info(struct dir_private_info *p)
@@ -412,31 +461,34 @@ void ext4_htree_free_dir_info(struct dir_private_info *p)
/*
* Given a directory entry, enter it into the fname rb tree.
+ *
+ * When filename encryption is enabled, the dirent will hold the
+ * encrypted filename, while the htree will hold decrypted filename.
+ * The decrypted filename is passed in via ent_name. parameter.
*/
int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
__u32 minor_hash,
- struct ext4_dir_entry_2 *dirent)
+ struct ext4_dir_entry_2 *dirent,
+ struct fscrypt_str *ent_name)
{
struct rb_node **p, *parent = NULL;
struct fname *fname, *new_fn;
struct dir_private_info *info;
- int len;
info = dir_file->private_data;
p = &info->root.rb_node;
/* Create and allocate the fname structure */
- len = sizeof(struct fname) + dirent->name_len + 1;
- new_fn = kzalloc(len, GFP_KERNEL);
+ new_fn = kzalloc(struct_size(new_fn, name, ent_name->len + 1),
+ GFP_KERNEL);
if (!new_fn)
return -ENOMEM;
new_fn->hash = hash;
new_fn->minor_hash = minor_hash;
new_fn->inode = le32_to_cpu(dirent->inode);
- new_fn->name_len = dirent->name_len;
+ new_fn->name_len = ent_name->len;
new_fn->file_type = dirent->file_type;
- memcpy(new_fn->name, dirent->name, dirent->name_len);
- new_fn->name[dirent->name_len] = 0;
+ memcpy(new_fn->name, ent_name->name, ent_name->len);
while (*p) {
parent = *p;
@@ -472,7 +524,7 @@ int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
/*
* This is a helper function for ext4_dx_readdir. It calls filldir
- * for all entres on the fname linked list. (Normally there is only
+ * for all entries on the fname linked list. (Normally there is only
* one entry on the linked list, unless there are 62 bit hash collisions.)
*/
static int call_filldir(struct file *file, struct dir_context *ctx,
@@ -507,14 +559,9 @@ static int ext4_dx_readdir(struct file *file, struct dir_context *ctx)
struct dir_private_info *info = file->private_data;
struct inode *inode = file_inode(file);
struct fname *fname;
- int ret;
+ int ret = 0;
- if (!info) {
- info = ext4_htree_create_dir_info(file, ctx->pos);
- if (!info)
- return -ENOMEM;
- file->private_data = info;
- }
+ ext4_htree_init_dir_info(file, ctx->pos);
if (ctx->pos == ext4_get_htree_eof(file))
return 0; /* EOF */
@@ -547,15 +594,15 @@ static int ext4_dx_readdir(struct file *file, struct dir_context *ctx)
* cached entries.
*/
if ((!info->curr_node) ||
- (file->f_version != inode->i_version)) {
+ !inode_eq_iversion(inode, info->cookie)) {
info->curr_node = NULL;
free_rb_tree_fname(&info->root);
- file->f_version = inode->i_version;
+ info->cookie = inode_query_iversion(inode);
ret = ext4_htree_fill_tree(file, info->curr_hash,
info->curr_minor_hash,
&info->next_hash);
if (ret < 0)
- return ret;
+ goto finished;
if (ret == 0) {
ctx->pos = ext4_get_htree_eof(file);
break;
@@ -586,7 +633,7 @@ static int ext4_dx_readdir(struct file *file, struct dir_context *ctx)
}
finished:
info->last_pos = ctx->pos;
- return 0;
+ return ret < 0 ? ret : 0;
}
static int ext4_release_dir(struct inode *inode, struct file *filp)
@@ -597,10 +644,46 @@ static int ext4_release_dir(struct inode *inode, struct file *filp)
return 0;
}
+int ext4_check_all_de(struct inode *dir, struct buffer_head *bh, void *buf,
+ int buf_size)
+{
+ struct ext4_dir_entry_2 *de;
+ int rlen;
+ unsigned int offset = 0;
+ char *top;
+
+ de = buf;
+ top = buf + buf_size;
+ while ((char *) de < top) {
+ if (ext4_check_dir_entry(dir, NULL, de, bh,
+ buf, buf_size, offset))
+ return -EFSCORRUPTED;
+ rlen = ext4_rec_len_from_disk(de->rec_len, buf_size);
+ de = (struct ext4_dir_entry_2 *)((char *)de + rlen);
+ offset += rlen;
+ }
+ if ((char *) de > top)
+ return -EFSCORRUPTED;
+
+ return 0;
+}
+
+static int ext4_dir_open(struct inode *inode, struct file *file)
+{
+ struct dir_private_info *info;
+
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
+ if (!info)
+ return -ENOMEM;
+ file->private_data = info;
+ return 0;
+}
+
const struct file_operations ext4_dir_operations = {
+ .open = ext4_dir_open,
.llseek = ext4_dir_llseek,
.read = generic_read_dir,
- .iterate = ext4_readdir,
+ .iterate_shared = ext4_readdir,
.unlocked_ioctl = ext4_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = ext4_compat_ioctl,