diff options
Diffstat (limited to 'fs/ext4/inline.c')
| -rw-r--r-- | fs/ext4/inline.c | 1102 |
1 files changed, 551 insertions, 551 deletions
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index d9ecbf1113a7..1f6bc05593df 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -1,28 +1,31 @@ +// SPDX-License-Identifier: LGPL-2.1 /* * Copyright (c) 2012 Taobao. * Written by Tao Ma <boyu.mt@taobao.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of version 2.1 of the GNU Lesser General Public License - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. */ + +#include <linux/iomap.h> +#include <linux/fiemap.h> +#include <linux/namei.h> +#include <linux/iversion.h> +#include <linux/sched/mm.h> + #include "ext4_jbd2.h" #include "ext4.h" #include "xattr.h" #include "truncate.h" -#include <linux/fiemap.h> #define EXT4_XATTR_SYSTEM_DATA "data" #define EXT4_MIN_INLINE_DATA_SIZE ((sizeof(__le32) * EXT4_N_BLOCKS)) #define EXT4_INLINE_DOTDOT_OFFSET 2 #define EXT4_INLINE_DOTDOT_SIZE 4 -int ext4_get_inline_size(struct inode *inode) + +static int ext4_da_convert_inline_data_to_extent(struct address_space *mapping, + struct inode *inode, + void **fsdata); + +static int ext4_get_inline_size(struct inode *inode) { if (EXT4_I(inode)->i_inline_off) return EXT4_I(inode)->i_inline_size; @@ -36,8 +39,12 @@ static int get_max_inline_xattr_value_size(struct inode *inode, struct ext4_xattr_ibody_header *header; struct ext4_xattr_entry *entry; struct ext4_inode *raw_inode; + void *end; int free, min_offs; + if (!EXT4_INODE_HAS_XATTR_SPACE(inode)) + return 0; + min_offs = EXT4_SB(inode->i_sb)->s_inode_size - EXT4_GOOD_OLD_INODE_SIZE - EXT4_I(inode)->i_extra_isize - @@ -56,14 +63,23 @@ static int get_max_inline_xattr_value_size(struct inode *inode, raw_inode = ext4_raw_inode(iloc); header = IHDR(inode, raw_inode); entry = IFIRST(header); + end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; /* Compute min_offs. */ - for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) { - if (!entry->e_value_block && entry->e_value_size) { + while (!IS_LAST_ENTRY(entry)) { + void *next = EXT4_XATTR_NEXT(entry); + + if (next >= end) { + EXT4_ERROR_INODE(inode, + "corrupt xattr in inline inode"); + return 0; + } + if (!entry->e_value_inum && entry->e_value_size) { size_t offs = le16_to_cpu(entry->e_value_offs); if (offs < min_offs) min_offs = offs; } + entry = next; } free = min_offs - ((void *)entry - (void *)IFIRST(header)) - sizeof(__u32); @@ -102,9 +118,9 @@ int ext4_get_max_inline_size(struct inode *inode) error = ext4_get_inode_loc(inode, &iloc); if (error) { - ext4_error_inode(inode, __func__, __LINE__, 0, - "can't get inode location %lu", - inode->i_ino); + ext4_error_inode_err(inode, __func__, __LINE__, 0, -error, + "can't get inode location %lu", + inode->i_ino); return 0; } @@ -120,12 +136,6 @@ int ext4_get_max_inline_size(struct inode *inode) return max_inline_size + EXT4_MIN_INLINE_DATA_SIZE; } -int ext4_has_inline_data(struct inode *inode) -{ - return ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA) && - EXT4_I(inode)->i_inline_off; -} - /* * this function does not take xattr_sem, which is OK because it is * currently only used in a code path coming form ext4_iget, before @@ -154,11 +164,16 @@ int ext4_find_inline_data_nolock(struct inode *inode) goto out; if (!is.s.not_found) { + if (is.s.here->e_value_inum) { + EXT4_ERROR_INODE(inode, "inline data xattr refers " + "to an external xattr inode"); + error = -EFSCORRUPTED; + goto out; + } EXT4_I(inode)->i_inline_off = (u16)((void *)is.s.here - (void *)ext4_raw_inode(&is.iloc)); EXT4_I(inode)->i_inline_size = EXT4_MIN_INLINE_DATA_SIZE + le32_to_cpu(is.s.here->e_value_size); - ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); } out: brelse(is.iloc.bh); @@ -179,8 +194,7 @@ static int ext4_read_inline_data(struct inode *inode, void *buffer, BUG_ON(len > EXT4_I(inode)->i_inline_size); - cp_len = len < EXT4_MIN_INLINE_DATA_SIZE ? - len : EXT4_MIN_INLINE_DATA_SIZE; + cp_len = min_t(unsigned int, len, EXT4_MIN_INLINE_DATA_SIZE); raw_inode = ext4_raw_inode(iloc); memcpy(buffer, (void *)(raw_inode->i_block), cp_len); @@ -208,17 +222,20 @@ out: /* * write the buffer to the inline inode. * If 'create' is set, we don't need to do the extra copy in the xattr - * value since it is already handled by ext4_xattr_ibody_inline_set. + * value since it is already handled by ext4_xattr_ibody_set. * That saves us one memcpy. */ -void ext4_write_inline_data(struct inode *inode, struct ext4_iloc *iloc, - void *buffer, loff_t pos, unsigned int len) +static void ext4_write_inline_data(struct inode *inode, struct ext4_iloc *iloc, + void *buffer, loff_t pos, unsigned int len) { struct ext4_xattr_entry *entry; struct ext4_xattr_ibody_header *header; struct ext4_inode *raw_inode; int cp_len = 0; + if (unlikely(ext4_emergency_state(inode->i_sb))) + return; + BUG_ON(!EXT4_I(inode)->i_inline_off); BUG_ON(pos + len > EXT4_I(inode)->i_inline_size); @@ -264,7 +281,9 @@ static int ext4_create_inline_data(handle_t *handle, if (error) return error; - error = ext4_journal_get_write_access(handle, is.iloc.bh); + BUFFER_TRACE(is.iloc.bh, "get_write_access"); + error = ext4_journal_get_write_access(handle, inode->i_sb, is.iloc.bh, + EXT4_JTR_NONE); if (error) goto out; @@ -276,7 +295,7 @@ static int ext4_create_inline_data(handle_t *handle, len = 0; } - /* Insert the the xttr entry. */ + /* Insert the xttr entry. */ i.value = value; i.value_len = len; @@ -284,9 +303,13 @@ static int ext4_create_inline_data(handle_t *handle, if (error) goto out; - BUG_ON(!is.s.not_found); + if (!is.s.not_found) { + EXT4_ERROR_INODE(inode, "unexpected inline data xattr"); + error = -EFSCORRUPTED; + goto out; + } - error = ext4_xattr_ibody_inline_set(handle, inode, &i, &is); + error = ext4_xattr_ibody_set(handle, inode, &i, &is); if (error) { if (error == -ENOSPC) ext4_clear_inode_state(inode, @@ -335,27 +358,35 @@ static int ext4_update_inline_data(handle_t *handle, struct inode *inode, if (error) goto out; - BUG_ON(is.s.not_found); + if (is.s.not_found) { + EXT4_ERROR_INODE(inode, "missing inline data xattr"); + error = -EFSCORRUPTED; + goto out; + } len -= EXT4_MIN_INLINE_DATA_SIZE; value = kzalloc(len, GFP_NOFS); - if (!value) + if (!value) { + error = -ENOMEM; goto out; + } error = ext4_xattr_ibody_get(inode, i.name_index, i.name, value, len); - if (error == -ENODATA) + if (error < 0) goto out; - error = ext4_journal_get_write_access(handle, is.iloc.bh); + BUFFER_TRACE(is.iloc.bh, "get_write_access"); + error = ext4_journal_get_write_access(handle, inode->i_sb, is.iloc.bh, + EXT4_JTR_NONE); if (error) goto out; - /* Update the xttr entry. */ + /* Update the xattr entry. */ i.value = value; i.value_len = len; - error = ext4_xattr_ibody_inline_set(handle, inode, &i, &is); + error = ext4_xattr_ibody_set(handle, inode, &i, &is); if (error) goto out; @@ -373,10 +404,10 @@ out: return error; } -int ext4_prepare_inline_data(handle_t *handle, struct inode *inode, - unsigned int len) +static int ext4_prepare_inline_data(handle_t *handle, struct inode *inode, + loff_t len) { - int ret, size; + int ret, size, no_expand; struct ext4_inode_info *ei = EXT4_I(inode); if (!ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) @@ -386,15 +417,19 @@ int ext4_prepare_inline_data(handle_t *handle, struct inode *inode, if (size < len) return -ENOSPC; - down_write(&EXT4_I(inode)->xattr_sem); - + ext4_write_lock_xattr(inode, &no_expand); + /* + * ei->i_inline_size may have changed since the initial check + * if other xattrs were added. Recalculate to ensure + * ext4_update_inline_data() validates against current capacity. + */ + (void) ext4_find_inline_data_nolock(inode); if (ei->i_inline_off) ret = ext4_update_inline_data(handle, inode, len); else ret = ext4_create_inline_data(handle, inode, len); - up_write(&EXT4_I(inode)->xattr_sem); - + ext4_write_unlock_xattr(inode, &no_expand); return ret; } @@ -416,27 +451,33 @@ static int ext4_destroy_inline_data_nolock(handle_t *handle, if (!ei->i_inline_off) return 0; + down_write(&ei->i_data_sem); + error = ext4_get_inode_loc(inode, &is.iloc); - if (error) + if (error) { + up_write(&ei->i_data_sem); return error; + } error = ext4_xattr_ibody_find(inode, &i, &is); if (error) goto out; - error = ext4_journal_get_write_access(handle, is.iloc.bh); + BUFFER_TRACE(is.iloc.bh, "get_write_access"); + error = ext4_journal_get_write_access(handle, inode->i_sb, is.iloc.bh, + EXT4_JTR_NONE); if (error) goto out; - error = ext4_xattr_ibody_inline_set(handle, inode, &i, &is); + error = ext4_xattr_ibody_set(handle, inode, &i, &is); if (error) goto out; memset((void *)ext4_raw_inode(&is.iloc)->i_block, 0, EXT4_MIN_INLINE_DATA_SIZE); + memset(ei->i_data, 0, EXT4_MIN_INLINE_DATA_SIZE); - if (EXT4_HAS_INCOMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_INCOMPAT_EXTENTS)) { + if (ext4_has_feature_extents(inode->i_sb)) { if (S_ISDIR(inode->i_mode) || S_ISREG(inode->i_mode) || S_ISLNK(inode->i_mode)) { ext4_set_inode_flag(inode, EXT4_INODE_EXTENTS); @@ -455,19 +496,20 @@ out: brelse(is.iloc.bh); if (error == -ENODATA) error = 0; + up_write(&ei->i_data_sem); return error; } -static int ext4_read_inline_page(struct inode *inode, struct page *page) +static int ext4_read_inline_folio(struct inode *inode, struct folio *folio) { void *kaddr; int ret = 0; size_t len; struct ext4_iloc iloc; - BUG_ON(!PageLocked(page)); + BUG_ON(!folio_test_locked(folio)); BUG_ON(!ext4_has_inline_data(inode)); - BUG_ON(page->index); + BUG_ON(folio->index); if (!EXT4_I(inode)->i_inline_off) { ext4_warning(inode->i_sb, "inode %lu doesn't have inline data.", @@ -480,19 +522,19 @@ static int ext4_read_inline_page(struct inode *inode, struct page *page) goto out; len = min_t(size_t, ext4_get_inline_size(inode), i_size_read(inode)); - kaddr = kmap_atomic(page); + BUG_ON(len > PAGE_SIZE); + kaddr = kmap_local_folio(folio, 0); ret = ext4_read_inline_data(inode, kaddr, len, &iloc); - flush_dcache_page(page); - kunmap_atomic(kaddr); - zero_user_segment(page, len, PAGE_CACHE_SIZE); - SetPageUptodate(page); + kaddr = folio_zero_tail(folio, len, kaddr + len); + kunmap_local(kaddr); + folio_mark_uptodate(folio); brelse(iloc.bh); out: return ret; } -int ext4_readpage_inline(struct inode *inode, struct page *page) +int ext4_readpage_inline(struct inode *inode, struct folio *folio) { int ret = 0; @@ -506,27 +548,26 @@ int ext4_readpage_inline(struct inode *inode, struct page *page) * Current inline data can only exist in the 1st page, * So for all the other pages, just set them uptodate. */ - if (!page->index) - ret = ext4_read_inline_page(inode, page); - else if (!PageUptodate(page)) { - zero_user_segment(page, 0, PAGE_CACHE_SIZE); - SetPageUptodate(page); + if (!folio->index) + ret = ext4_read_inline_folio(inode, folio); + else if (!folio_test_uptodate(folio)) { + folio_zero_segment(folio, 0, folio_size(folio)); + folio_mark_uptodate(folio); } up_read(&EXT4_I(inode)->xattr_sem); - unlock_page(page); + folio_unlock(folio); return ret >= 0 ? 0 : ret; } static int ext4_convert_inline_data_to_extent(struct address_space *mapping, - struct inode *inode, - unsigned flags) + struct inode *inode) { - int ret, needed_blocks; + int ret, needed_blocks, no_expand; handle_t *handle = NULL; int retries = 0, sem_held = 0; - struct page *page = NULL; + struct folio *folio = NULL; unsigned from, to; struct ext4_iloc iloc; @@ -539,7 +580,7 @@ static int ext4_convert_inline_data_to_extent(struct address_space *mapping, return 0; } - needed_blocks = ext4_writepage_trans_blocks(inode); + needed_blocks = ext4_chunk_trans_extent(inode, 1); ret = ext4_get_inode_loc(inode, &iloc); if (ret) @@ -555,15 +596,14 @@ retry: /* We cannot recurse into the filesystem as the transaction is already * started */ - flags |= AOP_FLAG_NOFS; - - page = grab_cache_page_write_begin(mapping, 0, flags); - if (!page) { - ret = -ENOMEM; - goto out; + folio = __filemap_get_folio(mapping, 0, FGP_WRITEBEGIN | FGP_NOFS, + mapping_gfp_mask(mapping)); + if (IS_ERR(folio)) { + ret = PTR_ERR(folio); + goto out_nofolio; } - down_write(&EXT4_I(inode)->xattr_sem); + ext4_write_lock_xattr(inode, &no_expand); sem_held = 1; /* If some one has already done this for us, just exit. */ if (!ext4_has_inline_data(inode)) { @@ -573,32 +613,37 @@ retry: from = 0; to = ext4_get_inline_size(inode); - if (!PageUptodate(page)) { - ret = ext4_read_inline_page(inode, page); + if (!folio_test_uptodate(folio)) { + ret = ext4_read_inline_folio(inode, folio); if (ret < 0) goto out; } + ext4_fc_track_inode(handle, inode); ret = ext4_destroy_inline_data_nolock(handle, inode); if (ret) goto out; - if (ext4_should_dioread_nolock(inode)) - ret = __block_write_begin(page, from, to, ext4_get_block_write); - else - ret = __block_write_begin(page, from, to, ext4_get_block); + if (ext4_should_dioread_nolock(inode)) { + ret = ext4_block_write_begin(handle, folio, from, to, + ext4_get_block_unwritten); + } else + ret = ext4_block_write_begin(handle, folio, from, to, + ext4_get_block); + clear_buffer_new(folio_buffers(folio)); if (!ret && ext4_should_journal_data(inode)) { - ret = ext4_walk_page_buffers(handle, page_buffers(page), - from, to, NULL, - do_journal_get_write_access); + ret = ext4_walk_page_buffers(handle, inode, + folio_buffers(folio), from, to, + NULL, do_journal_get_write_access); } if (ret) { - unlock_page(page); - page_cache_release(page); + folio_unlock(folio); + folio_put(folio); + folio = NULL; ext4_orphan_add(handle, inode); - up_write(&EXT4_I(inode)->xattr_sem); + ext4_write_unlock_xattr(inode, &no_expand); sem_held = 0; ext4_journal_stop(handle); handle = NULL; @@ -616,14 +661,16 @@ retry: if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) goto retry; - block_commit_write(page, from, to); + if (folio) + block_commit_write(folio, from, to); out: - if (page) { - unlock_page(page); - page_cache_release(page); + if (folio) { + folio_unlock(folio); + folio_put(folio); } +out_nofolio: if (sem_held) - up_write(&EXT4_I(inode)->xattr_sem); + ext4_write_unlock_xattr(inode, &no_expand); if (handle) ext4_journal_stop(handle); brelse(iloc.bh); @@ -631,147 +678,191 @@ out: } /* - * Try to write data in the inode. - * If the inode has inline data, check whether the new write can be - * in the inode also. If not, create the page the handle, move the data - * to the page make it update and let the later codes create extent for it. + * Prepare the write for the inline data. + * If the data can be written into the inode, we just read + * the page and make it uptodate, and start the journal. + * Otherwise read the page, makes it dirty so that it can be + * handle in writepages(the i_disksize update is left to the + * normal ext4_da_write_end). */ -int ext4_try_to_write_inline_data(struct address_space *mapping, - struct inode *inode, - loff_t pos, unsigned len, - unsigned flags, - struct page **pagep) +int ext4_generic_write_inline_data(struct address_space *mapping, + struct inode *inode, + loff_t pos, unsigned len, + struct folio **foliop, + void **fsdata, bool da) { int ret; handle_t *handle; - struct page *page; + struct folio *folio; struct ext4_iloc iloc; - - if (pos + len > ext4_get_max_inline_size(inode)) - goto convert; + int retries = 0; ret = ext4_get_inode_loc(inode, &iloc); if (ret) return ret; - /* - * The possible write could happen in the inode, - * so try to reserve the space in inode first. - */ +retry_journal: handle = ext4_journal_start(inode, EXT4_HT_INODE, 1); if (IS_ERR(handle)) { ret = PTR_ERR(handle); - handle = NULL; - goto out; + goto out_release_bh; } ret = ext4_prepare_inline_data(handle, inode, pos + len); if (ret && ret != -ENOSPC) - goto out; + goto out_stop_journal; - /* We don't have space in inline inode, so convert it to extent. */ if (ret == -ENOSPC) { ext4_journal_stop(handle); - brelse(iloc.bh); - goto convert; - } + if (!da) { + brelse(iloc.bh); + /* Retry inside */ + return ext4_convert_inline_data_to_extent(mapping, inode); + } - flags |= AOP_FLAG_NOFS; + ret = ext4_da_convert_inline_data_to_extent(mapping, inode, fsdata); + if (ret == -ENOSPC && + ext4_should_retry_alloc(inode->i_sb, &retries)) + goto retry_journal; + goto out_release_bh; + } - page = grab_cache_page_write_begin(mapping, 0, flags); - if (!page) { - ret = -ENOMEM; - goto out; + folio = __filemap_get_folio(mapping, 0, FGP_WRITEBEGIN | FGP_NOFS, + mapping_gfp_mask(mapping)); + if (IS_ERR(folio)) { + ret = PTR_ERR(folio); + goto out_stop_journal; } - *pagep = page; down_read(&EXT4_I(inode)->xattr_sem); + /* Someone else had converted it to extent */ if (!ext4_has_inline_data(inode)) { ret = 0; - unlock_page(page); - page_cache_release(page); - goto out_up_read; + goto out_release_folio; } - if (!PageUptodate(page)) { - ret = ext4_read_inline_page(inode, page); + if (!folio_test_uptodate(folio)) { + ret = ext4_read_inline_folio(inode, folio); if (ret < 0) - goto out_up_read; + goto out_release_folio; } - ret = 1; - handle = NULL; -out_up_read: + ret = ext4_journal_get_write_access(handle, inode->i_sb, iloc.bh, EXT4_JTR_NONE); + if (ret) + goto out_release_folio; + *foliop = folio; up_read(&EXT4_I(inode)->xattr_sem); -out: - if (handle) - ext4_journal_stop(handle); + brelse(iloc.bh); + return 1; + +out_release_folio: + up_read(&EXT4_I(inode)->xattr_sem); + folio_unlock(folio); + folio_put(folio); +out_stop_journal: + ext4_journal_stop(handle); +out_release_bh: brelse(iloc.bh); return ret; -convert: - return ext4_convert_inline_data_to_extent(mapping, - inode, flags); +} + +/* + * Try to write data in the inode. + * If the inode has inline data, check whether the new write can be + * in the inode also. If not, create the page the handle, move the data + * to the page make it update and let the later codes create extent for it. + */ +int ext4_try_to_write_inline_data(struct address_space *mapping, + struct inode *inode, + loff_t pos, unsigned len, + struct folio **foliop) +{ + if (pos + len > ext4_get_max_inline_size(inode)) + return ext4_convert_inline_data_to_extent(mapping, inode); + return ext4_generic_write_inline_data(mapping, inode, pos, len, + foliop, NULL, false); } int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len, - unsigned copied, struct page *page) + unsigned copied, struct folio *folio) { - int ret; + handle_t *handle = ext4_journal_current_handle(); + int no_expand; void *kaddr; struct ext4_iloc iloc; + int ret = 0, ret2; - if (unlikely(copied < len)) { - if (!PageUptodate(page)) { - copied = 0; - goto out; - } - } - - ret = ext4_get_inode_loc(inode, &iloc); - if (ret) { - ext4_std_error(inode->i_sb, ret); + if (unlikely(copied < len) && !folio_test_uptodate(folio)) copied = 0; - goto out; - } - down_write(&EXT4_I(inode)->xattr_sem); - BUG_ON(!ext4_has_inline_data(inode)); + if (likely(copied)) { + ret = ext4_get_inode_loc(inode, &iloc); + if (ret) { + folio_unlock(folio); + folio_put(folio); + ext4_std_error(inode->i_sb, ret); + goto out; + } + ext4_write_lock_xattr(inode, &no_expand); + BUG_ON(!ext4_has_inline_data(inode)); - kaddr = kmap_atomic(page); - ext4_write_inline_data(inode, &iloc, kaddr, pos, len); - kunmap_atomic(kaddr); - SetPageUptodate(page); - /* clear page dirty so that writepages wouldn't work for us. */ - ClearPageDirty(page); + /* + * ei->i_inline_off may have changed since + * ext4_write_begin() called + * ext4_try_to_write_inline_data() + */ + (void) ext4_find_inline_data_nolock(inode); - up_write(&EXT4_I(inode)->xattr_sem); - brelse(iloc.bh); -out: - return copied; -} + kaddr = kmap_local_folio(folio, 0); + ext4_write_inline_data(inode, &iloc, kaddr, pos, copied); + kunmap_local(kaddr); + folio_mark_uptodate(folio); + /* clear dirty flag so that writepages wouldn't work for us. */ + folio_clear_dirty(folio); -struct buffer_head * -ext4_journalled_write_inline_data(struct inode *inode, - unsigned len, - struct page *page) -{ - int ret; - void *kaddr; - struct ext4_iloc iloc; + ext4_write_unlock_xattr(inode, &no_expand); + brelse(iloc.bh); - ret = ext4_get_inode_loc(inode, &iloc); - if (ret) { - ext4_std_error(inode->i_sb, ret); - return NULL; + /* + * It's important to update i_size while still holding folio + * lock: page writeout could otherwise come in and zero + * beyond i_size. + */ + ext4_update_inode_size(inode, pos + copied); } + folio_unlock(folio); + folio_put(folio); - down_write(&EXT4_I(inode)->xattr_sem); - kaddr = kmap_atomic(page); - ext4_write_inline_data(inode, &iloc, kaddr, 0, len); - kunmap_atomic(kaddr); - up_write(&EXT4_I(inode)->xattr_sem); + /* + * Don't mark the inode dirty under folio lock. First, it unnecessarily + * makes the holding time of folio lock longer. Second, it forces lock + * ordering of folio lock and transaction start for journaling + * filesystems. + */ + if (likely(copied)) + mark_inode_dirty(inode); +out: + /* + * If we didn't copy as much data as expected, we need to trim back + * size of xattr containing inline data. + */ + if (pos + len > inode->i_size && ext4_can_truncate(inode)) + ext4_orphan_add(handle, inode); - return iloc.bh; + ret2 = ext4_journal_stop(handle); + if (!ret) + ret = ret2; + if (pos + len > inode->i_size) { + ext4_truncate_failed_write(inode); + /* + * If truncate failed early the inode might still be + * on the orphan list; we need to make sure the inode + * is removed from the orphan list in that case. + */ + if (inode->i_nlink) + ext4_orphan_del(NULL, inode); + } + return ret ? ret : copied; } /* @@ -781,19 +872,19 @@ ext4_journalled_write_inline_data(struct inode *inode, * clear the inode state safely. * 2. The inode has inline data, then we need to read the data, make it * update and dirty so that ext4_da_writepages can handle it. We don't - * need to start the journal since the file's metatdata isn't changed now. + * need to start the journal since the file's metadata isn't changed now. */ static int ext4_da_convert_inline_data_to_extent(struct address_space *mapping, struct inode *inode, - unsigned flags, void **fsdata) { int ret = 0, inline_size; - struct page *page; + struct folio *folio; - page = grab_cache_page_write_begin(mapping, 0, flags); - if (!page) - return -ENOMEM; + folio = __filemap_get_folio(mapping, 0, FGP_WRITEBEGIN, + mapping_gfp_mask(mapping)); + if (IS_ERR(folio)) + return PTR_ERR(folio); down_read(&EXT4_I(inode)->xattr_sem); if (!ext4_has_inline_data(inode)) { @@ -803,155 +894,37 @@ static int ext4_da_convert_inline_data_to_extent(struct address_space *mapping, inline_size = ext4_get_inline_size(inode); - if (!PageUptodate(page)) { - ret = ext4_read_inline_page(inode, page); + if (!folio_test_uptodate(folio)) { + ret = ext4_read_inline_folio(inode, folio); if (ret < 0) goto out; } - ret = __block_write_begin(page, 0, inline_size, - ext4_da_get_block_prep); + ret = ext4_block_write_begin(NULL, folio, 0, inline_size, + ext4_da_get_block_prep); if (ret) { + up_read(&EXT4_I(inode)->xattr_sem); + folio_unlock(folio); + folio_put(folio); ext4_truncate_failed_write(inode); - goto out; + return ret; } - SetPageDirty(page); - SetPageUptodate(page); + clear_buffer_new(folio_buffers(folio)); + folio_mark_dirty(folio); + folio_mark_uptodate(folio); ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); *fsdata = (void *)CONVERT_INLINE_DATA; out: up_read(&EXT4_I(inode)->xattr_sem); - if (page) { - unlock_page(page); - page_cache_release(page); + if (folio) { + folio_unlock(folio); + folio_put(folio); } return ret; } -/* - * Prepare the write for the inline data. - * If the the data can be written into the inode, we just read - * the page and make it uptodate, and start the journal. - * Otherwise read the page, makes it dirty so that it can be - * handle in writepages(the i_disksize update is left to the - * normal ext4_da_write_end). - */ -int ext4_da_write_inline_data_begin(struct address_space *mapping, - struct inode *inode, - loff_t pos, unsigned len, - unsigned flags, - struct page **pagep, - void **fsdata) -{ - int ret, inline_size; - handle_t *handle; - struct page *page; - struct ext4_iloc iloc; - - ret = ext4_get_inode_loc(inode, &iloc); - if (ret) - return ret; - - handle = ext4_journal_start(inode, EXT4_HT_INODE, 1); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - handle = NULL; - goto out; - } - - inline_size = ext4_get_max_inline_size(inode); - - ret = -ENOSPC; - if (inline_size >= pos + len) { - ret = ext4_prepare_inline_data(handle, inode, pos + len); - if (ret && ret != -ENOSPC) - goto out; - } - - if (ret == -ENOSPC) { - ret = ext4_da_convert_inline_data_to_extent(mapping, - inode, - flags, - fsdata); - goto out; - } - - /* - * We cannot recurse into the filesystem as the transaction - * is already started. - */ - flags |= AOP_FLAG_NOFS; - - page = grab_cache_page_write_begin(mapping, 0, flags); - if (!page) { - ret = -ENOMEM; - goto out; - } - - down_read(&EXT4_I(inode)->xattr_sem); - if (!ext4_has_inline_data(inode)) { - ret = 0; - goto out_release_page; - } - - if (!PageUptodate(page)) { - ret = ext4_read_inline_page(inode, page); - if (ret < 0) - goto out_release_page; - } - - up_read(&EXT4_I(inode)->xattr_sem); - *pagep = page; - handle = NULL; - brelse(iloc.bh); - return 1; -out_release_page: - up_read(&EXT4_I(inode)->xattr_sem); - unlock_page(page); - page_cache_release(page); -out: - if (handle) - ext4_journal_stop(handle); - brelse(iloc.bh); - return ret; -} - -int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos, - unsigned len, unsigned copied, - struct page *page) -{ - int i_size_changed = 0; - - copied = ext4_write_inline_data_end(inode, pos, len, copied, page); - - /* - * No need to use i_size_read() here, the i_size - * cannot change under us because we hold i_mutex. - * - * But it's important to update i_size while still holding page lock: - * page writeout could otherwise come in and zero beyond i_size. - */ - if (pos+copied > inode->i_size) { - i_size_write(inode, pos+copied); - i_size_changed = 1; - } - unlock_page(page); - page_cache_release(page); - - /* - * Don't mark the inode dirty under page lock. First, it unnecessarily - * makes the holding time of page lock longer. Second, it forces lock - * ordering of page lock and transaction start for journaling - * filesystems. - */ - if (i_size_changed) - mark_inode_dirty(inode); - - return copied; -} - #ifdef INLINE_DIR_DEBUG void ext4_show_inline_dir(struct inode *dir, struct buffer_head *bh, void *inline_start, int inline_size) @@ -965,7 +938,7 @@ void ext4_show_inline_dir(struct inode *dir, struct buffer_head *bh, offset = 0; while ((void *)de < dlimit) { de_len = ext4_rec_len_from_disk(de->rec_len, inline_size); - trace_printk("de: off %u rlen %u name %*.s nlen %u ino %u\n", + trace_printk("de: off %u rlen %u name %.*s nlen %u ino %u\n", offset, de_len, de->name_len, de->name, de->name_len, le32_to_cpu(de->inode)); if (ext4_check_dir_entry(dir, NULL, de, bh, @@ -986,29 +959,26 @@ void ext4_show_inline_dir(struct inode *dir, struct buffer_head *bh, * and -EEXIST if directory entry already exists. */ static int ext4_add_dirent_to_inline(handle_t *handle, - struct dentry *dentry, + struct ext4_filename *fname, + struct inode *dir, struct inode *inode, struct ext4_iloc *iloc, void *inline_start, int inline_size) { - struct inode *dir = dentry->d_parent->d_inode; - const char *name = dentry->d_name.name; - int namelen = dentry->d_name.len; - unsigned short reclen; int err; struct ext4_dir_entry_2 *de; - reclen = EXT4_DIR_REC_LEN(namelen); - err = ext4_find_dest_de(dir, inode, iloc->bh, - inline_start, inline_size, - name, namelen, &de); + err = ext4_find_dest_de(dir, iloc->bh, inline_start, + inline_size, fname, &de); if (err) return err; - err = ext4_journal_get_write_access(handle, iloc->bh); + BUFFER_TRACE(iloc->bh, "get_write_access"); + err = ext4_journal_get_write_access(handle, dir->i_sb, iloc->bh, + EXT4_JTR_NONE); if (err) return err; - ext4_insert_dentry(inode, de, inline_size, name, namelen); + ext4_insert_dentry(dir, inode, de, inline_size, fname); ext4_show_inline_dir(dir, iloc->bh, inline_start, inline_size); @@ -1023,10 +993,9 @@ static int ext4_add_dirent_to_inline(handle_t *handle, * happen is that the times are slightly out of date * and/or different from the directory change time. */ - dir->i_mtime = dir->i_ctime = ext4_current_time(dir); + inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); ext4_update_dx_flag(dir); - dir->i_version++; - ext4_mark_inode_dirty(handle, dir); + inode_inc_iversion(dir); return 1; } @@ -1046,20 +1015,20 @@ static void *ext4_get_inline_xattr_pos(struct inode *inode, } /* Set the final de to cover the whole block. */ -static void ext4_update_final_de(void *de_buf, int old_size, int new_size) +void ext4_update_final_de(void *de_buf, int old_size, int new_size) { struct ext4_dir_entry_2 *de, *prev_de; void *limit; int de_len; - de = (struct ext4_dir_entry_2 *)de_buf; + de = de_buf; if (old_size) { limit = de_buf + old_size; do { prev_de = de; de_len = ext4_rec_len_from_disk(de->rec_len, old_size); de_buf += de_len; - de = (struct ext4_dir_entry_2 *)de_buf; + de = de_buf; } while (de_buf < limit); prev_de->rec_len = ext4_rec_len_to_disk(de_len + new_size - @@ -1078,7 +1047,7 @@ static int ext4_update_inline_dir(handle_t *handle, struct inode *dir, int old_size = EXT4_I(dir)->i_inline_size - EXT4_MIN_INLINE_DATA_SIZE; int new_size = get_max_inline_xattr_value_size(dir, iloc); - if (new_size - old_size <= EXT4_DIR_REC_LEN(1)) + if (new_size - old_size <= ext4_dir_rec_len(1, NULL)) return -ENOSPC; ret = ext4_update_inline_data(handle, dir, @@ -1097,60 +1066,19 @@ static void ext4_restore_inline_data(handle_t *handle, struct inode *inode, struct ext4_iloc *iloc, void *buf, int inline_size) { - ext4_create_inline_data(handle, inode, inline_size); + int ret; + + ret = ext4_create_inline_data(handle, inode, inline_size); + if (ret) { + ext4_msg(inode->i_sb, KERN_EMERG, + "error restoring inline_data for inode -- potential data loss! (inode %lu, error %d)", + inode->i_ino, ret); + return; + } ext4_write_inline_data(inode, iloc, buf, 0, inline_size); ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); } -static int ext4_finish_convert_inline_dir(handle_t *handle, - struct inode *inode, - struct buffer_head *dir_block, - void *buf, - int inline_size) -{ - int err, csum_size = 0, header_size = 0; - struct ext4_dir_entry_2 *de; - struct ext4_dir_entry_tail *t; - void *target = dir_block->b_data; - - /* - * First create "." and ".." and then copy the dir information - * back to the block. - */ - de = (struct ext4_dir_entry_2 *)target; - de = ext4_init_dot_dotdot(inode, de, - inode->i_sb->s_blocksize, csum_size, - le32_to_cpu(((struct ext4_dir_entry_2 *)buf)->inode), 1); - header_size = (void *)de - target; - - memcpy((void *)de, buf + EXT4_INLINE_DOTDOT_SIZE, - inline_size - EXT4_INLINE_DOTDOT_SIZE); - - if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) - csum_size = sizeof(struct ext4_dir_entry_tail); - - inode->i_size = inode->i_sb->s_blocksize; - i_size_write(inode, inode->i_sb->s_blocksize); - EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize; - ext4_update_final_de(dir_block->b_data, - inline_size - EXT4_INLINE_DOTDOT_SIZE + header_size, - inode->i_sb->s_blocksize - csum_size); - - if (csum_size) { - t = EXT4_DIRENT_TAIL(dir_block->b_data, - inode->i_sb->s_blocksize); - initialize_dirent_tail(t, inode->i_sb->s_blocksize); - } - set_buffer_uptodate(dir_block); - err = ext4_handle_dirty_dirent_node(handle, inode, dir_block); - if (err) - goto out; - set_buffer_verified(dir_block); -out: - return err; -} - static int ext4_convert_inline_data_nolock(handle_t *handle, struct inode *inode, struct ext4_iloc *iloc) @@ -1172,6 +1100,18 @@ static int ext4_convert_inline_data_nolock(handle_t *handle, if (error < 0) goto out; + /* + * Make sure the inline directory entries pass checks before we try to + * convert them, so that we avoid touching stuff that needs fsck. + */ + if (S_ISDIR(inode->i_mode)) { + error = ext4_check_all_de(inode, iloc->bh, + buf + EXT4_INLINE_DOTDOT_SIZE, + inline_size - EXT4_INLINE_DOTDOT_SIZE); + if (error) + goto out; + } + error = ext4_destroy_inline_data_nolock(handle, inode); if (error) goto out; @@ -1194,7 +1134,8 @@ static int ext4_convert_inline_data_nolock(handle_t *handle, } lock_buffer(data_bh); - error = ext4_journal_get_create_access(handle, data_bh); + error = ext4_journal_get_create_access(handle, inode->i_sb, data_bh, + EXT4_JTR_NONE); if (error) { unlock_buffer(data_bh); error = -EIO; @@ -1205,14 +1146,23 @@ static int ext4_convert_inline_data_nolock(handle_t *handle, if (!S_ISDIR(inode->i_mode)) { memcpy(data_bh->b_data, buf, inline_size); set_buffer_uptodate(data_bh); + unlock_buffer(data_bh); error = ext4_handle_dirty_metadata(handle, inode, data_bh); } else { - error = ext4_finish_convert_inline_dir(handle, inode, data_bh, - buf, inline_size); + unlock_buffer(data_bh); + inode->i_size = inode->i_sb->s_blocksize; + i_size_write(inode, inode->i_sb->s_blocksize); + EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize; + + error = ext4_init_dirblock(handle, inode, data_bh, + le32_to_cpu(((struct ext4_dir_entry_2 *)buf)->inode), + buf + EXT4_INLINE_DOTDOT_SIZE, + inline_size - EXT4_INLINE_DOTDOT_SIZE); + if (!error) + error = ext4_mark_inode_dirty(handle, inode); } - unlock_buffer(data_bh); out_restore: if (error) ext4_restore_inline_data(handle, inode, iloc, buf, inline_size); @@ -1228,19 +1178,18 @@ out: * If succeeds, return 0. If not, extended the inline dir and copied data to * the new created block. */ -int ext4_try_add_inline_entry(handle_t *handle, struct dentry *dentry, - struct inode *inode) +int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname, + struct inode *dir, struct inode *inode) { - int ret, inline_size; + int ret, ret2, inline_size, no_expand; void *inline_start; struct ext4_iloc iloc; - struct inode *dir = dentry->d_parent->d_inode; ret = ext4_get_inode_loc(dir, &iloc); if (ret) return ret; - down_write(&EXT4_I(dir)->xattr_sem); + ext4_write_lock_xattr(dir, &no_expand); if (!ext4_has_inline_data(dir)) goto out; @@ -1248,7 +1197,7 @@ int ext4_try_add_inline_entry(handle_t *handle, struct dentry *dentry, EXT4_INLINE_DOTDOT_SIZE; inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE; - ret = ext4_add_dirent_to_inline(handle, dentry, inode, &iloc, + ret = ext4_add_dirent_to_inline(handle, fname, dir, inode, &iloc, inline_start, inline_size); if (ret != -ENOSPC) goto out; @@ -1269,8 +1218,9 @@ int ext4_try_add_inline_entry(handle_t *handle, struct dentry *dentry, if (inline_size) { inline_start = ext4_get_inline_xattr_pos(dir, &iloc); - ret = ext4_add_dirent_to_inline(handle, dentry, inode, &iloc, - inline_start, inline_size); + ret = ext4_add_dirent_to_inline(handle, fname, dir, + inode, &iloc, inline_start, + inline_size); if (ret != -ENOSPC) goto out; @@ -1284,8 +1234,10 @@ int ext4_try_add_inline_entry(handle_t *handle, struct dentry *dentry, ret = ext4_convert_inline_data_nolock(handle, dir, &iloc); out: - ext4_mark_inode_dirty(handle, dir); - up_write(&EXT4_I(dir)->xattr_sem); + ext4_write_unlock_xattr(dir, &no_expand); + ret2 = ext4_mark_inode_dirty(handle, dir); + if (unlikely(ret2 && !ret)) + ret = ret2; brelse(iloc.bh); return ret; } @@ -1295,11 +1247,11 @@ out: * inlined dir. It returns the number directory entries loaded * into the tree. If there is an error it is returned in err. */ -int htree_inlinedir_to_tree(struct file *dir_file, - struct inode *dir, ext4_lblk_t block, - struct dx_hash_info *hinfo, - __u32 start_hash, __u32 start_minor_hash, - int *has_inline_data) +int ext4_inlinedir_to_tree(struct file *dir_file, + struct inode *dir, ext4_lblk_t block, + struct dx_hash_info *hinfo, + __u32 start_hash, __u32 start_minor_hash, + int *has_inline_data) { int err = 0, count = 0; unsigned int parent_ino; @@ -1310,6 +1262,7 @@ int htree_inlinedir_to_tree(struct file *dir_file, struct ext4_iloc iloc; void *dir_buf = NULL; struct ext4_dir_entry_2 fake; + struct fscrypt_str tmp_str; ret = ext4_get_inode_loc(inode, &iloc); if (ret) @@ -1346,20 +1299,20 @@ int htree_inlinedir_to_tree(struct file *dir_file, if (pos == 0) { fake.inode = cpu_to_le32(inode->i_ino); fake.name_len = 1; - strcpy(fake.name, "."); + memcpy(fake.name, ".", 2); fake.rec_len = ext4_rec_len_to_disk( - EXT4_DIR_REC_LEN(fake.name_len), - inline_size); + ext4_dir_rec_len(fake.name_len, NULL), + inline_size); ext4_set_de_type(inode->i_sb, &fake, S_IFDIR); de = &fake; pos = EXT4_INLINE_DOTDOT_OFFSET; } else if (pos == EXT4_INLINE_DOTDOT_OFFSET) { fake.inode = cpu_to_le32(parent_ino); fake.name_len = 2; - strcpy(fake.name, ".."); + memcpy(fake.name, "..", 3); fake.rec_len = ext4_rec_len_to_disk( - EXT4_DIR_REC_LEN(fake.name_len), - inline_size); + ext4_dir_rec_len(fake.name_len, NULL), + inline_size); ext4_set_de_type(inode->i_sb, &fake, S_IFDIR); de = &fake; pos = EXT4_INLINE_DOTDOT_SIZE; @@ -1374,17 +1327,28 @@ int htree_inlinedir_to_tree(struct file *dir_file, } } - ext4fs_dirhash(de->name, de->name_len, hinfo); + if (ext4_hash_in_dirent(dir)) { + hinfo->hash = EXT4_DIRENT_HASH(de); + hinfo->minor_hash = EXT4_DIRENT_MINOR_HASH(de); + } else { + err = ext4fs_dirhash(dir, de->name, de->name_len, hinfo); + if (err) { + ret = err; + goto out; + } + } if ((hinfo->hash < start_hash) || ((hinfo->hash == start_hash) && (hinfo->minor_hash < start_minor_hash))) continue; if (de->inode == 0) continue; - err = ext4_htree_store_dirent(dir_file, - hinfo->hash, hinfo->minor_hash, de); + tmp_str.name = de->name; + tmp_str.len = de->name_len; + err = ext4_htree_store_dirent(dir_file, hinfo->hash, + hinfo->minor_hash, de, &tmp_str); if (err) { - count = err; + ret = err; goto out; } count++; @@ -1417,6 +1381,7 @@ int ext4_read_inline_dir(struct file *file, struct ext4_iloc iloc; void *dir_buf = NULL; int dotdot_offset, dotdot_size, extra_offset, extra_size; + struct dir_private_info *info = file->private_data; ret = ext4_get_inode_loc(inode, &iloc); if (ret) @@ -1442,6 +1407,7 @@ int ext4_read_inline_dir(struct file *file, if (ret < 0) goto out; + ret = 0; sb = inode->i_sb; parent_ino = le32_to_cpu(((struct ext4_dir_entry_2 *)dir_buf)->inode); offset = ctx->pos; @@ -1453,18 +1419,18 @@ int ext4_read_inline_dir(struct file *file, * So we will use extra_offset and extra_size to indicate them * during the inline dir iteration. */ - dotdot_offset = EXT4_DIR_REC_LEN(1); - dotdot_size = dotdot_offset + EXT4_DIR_REC_LEN(2); + dotdot_offset = ext4_dir_rec_len(1, NULL); + dotdot_size = dotdot_offset + ext4_dir_rec_len(2, NULL); extra_offset = dotdot_size - EXT4_INLINE_DOTDOT_SIZE; extra_size = extra_offset + inline_size; /* - * If the version has changed since the last call to + * If the cookie has changed since the last call to * readdir(2), then we might be pointing to an invalid * dirent right now. Scan from the start of the inline * dir to make sure. */ - if (file->f_version != inode->i_version) { + if (!inode_eq_iversion(inode, info->cookie)) { for (i = 0; i < extra_size && i < offset;) { /* * "." is with offset 0 and @@ -1489,14 +1455,14 @@ int ext4_read_inline_dir(struct file *file, * failure will be detected in the * dirent test below. */ if (ext4_rec_len_from_disk(de->rec_len, extra_size) - < EXT4_DIR_REC_LEN(1)) + < ext4_dir_rec_len(1, NULL)) break; i += ext4_rec_len_from_disk(de->rec_len, extra_size); } offset = i; ctx->pos = offset; - file->f_version = inode->i_version; + info->cookie = inode_query_iversion(inode); } while (ctx->pos < extra_size) { @@ -1533,6 +1499,35 @@ out: return ret; } +void *ext4_read_inline_link(struct inode *inode) +{ + struct ext4_iloc iloc; + int ret, inline_size; + void *link; + + ret = ext4_get_inode_loc(inode, &iloc); + if (ret) + return ERR_PTR(ret); + + ret = -ENOMEM; + inline_size = ext4_get_inline_size(inode); + link = kmalloc(inline_size + 1, GFP_NOFS); + if (!link) + goto out; + + ret = ext4_read_inline_data(inode, link, inline_size, &iloc); + if (ret < 0) { + kfree(link); + goto out; + } + nd_terminate_link(link, inode->i_size, ret); +out: + if (ret < 0) + link = ERR_PTR(ret); + brelse(iloc.bh); + return link; +} + struct buffer_head *ext4_get_first_inline_block(struct inode *inode, struct ext4_dir_entry_2 **parent_de, int *retval) @@ -1587,29 +1582,41 @@ out: } struct buffer_head *ext4_find_inline_entry(struct inode *dir, - const struct qstr *d_name, + struct ext4_filename *fname, struct ext4_dir_entry_2 **res_dir, int *has_inline_data) { + struct ext4_xattr_ibody_find is = { + .s = { .not_found = -ENODATA, }, + }; + struct ext4_xattr_info i = { + .name_index = EXT4_XATTR_INDEX_SYSTEM, + .name = EXT4_XATTR_SYSTEM_DATA, + }; int ret; - struct ext4_iloc iloc; void *inline_start; int inline_size; - if (ext4_get_inode_loc(dir, &iloc)) - return NULL; + ret = ext4_get_inode_loc(dir, &is.iloc); + if (ret) + return ERR_PTR(ret); down_read(&EXT4_I(dir)->xattr_sem); + + ret = ext4_xattr_ibody_find(dir, &i, &is); + if (ret) + goto out; + if (!ext4_has_inline_data(dir)) { *has_inline_data = 0; goto out; } - inline_start = (void *)ext4_raw_inode(&iloc)->i_block + + inline_start = (void *)ext4_raw_inode(&is.iloc)->i_block + EXT4_INLINE_DOTDOT_SIZE; inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE; - ret = search_dir(iloc.bh, inline_start, inline_size, - dir, d_name, 0, res_dir); + ret = ext4_search_dir(is.iloc.bh, inline_start, inline_size, + dir, fname, 0, res_dir); if (ret == 1) goto out_find; if (ret < 0) @@ -1618,20 +1625,23 @@ struct buffer_head *ext4_find_inline_entry(struct inode *dir, if (ext4_get_inline_size(dir) == EXT4_MIN_INLINE_DATA_SIZE) goto out; - inline_start = ext4_get_inline_xattr_pos(dir, &iloc); + inline_start = ext4_get_inline_xattr_pos(dir, &is.iloc); inline_size = ext4_get_inline_size(dir) - EXT4_MIN_INLINE_DATA_SIZE; - ret = search_dir(iloc.bh, inline_start, inline_size, - dir, d_name, 0, res_dir); + ret = ext4_search_dir(is.iloc.bh, inline_start, inline_size, + dir, fname, 0, res_dir); if (ret == 1) goto out_find; out: - brelse(iloc.bh); - iloc.bh = NULL; + brelse(is.iloc.bh); + if (ret < 0) + is.iloc.bh = ERR_PTR(ret); + else + is.iloc.bh = NULL; out_find: up_read(&EXT4_I(dir)->xattr_sem); - return iloc.bh; + return is.iloc.bh; } int ext4_delete_inline_entry(handle_t *handle, @@ -1640,7 +1650,7 @@ int ext4_delete_inline_entry(handle_t *handle, struct buffer_head *bh, int *has_inline_data) { - int err, inline_size; + int err, inline_size, no_expand; struct ext4_iloc iloc; void *inline_start; @@ -1648,7 +1658,7 @@ int ext4_delete_inline_entry(handle_t *handle, if (err) return err; - down_write(&EXT4_I(dir)->xattr_sem); + ext4_write_lock_xattr(dir, &no_expand); if (!ext4_has_inline_data(dir)) { *has_inline_data = 0; goto out; @@ -1666,23 +1676,22 @@ int ext4_delete_inline_entry(handle_t *handle, EXT4_MIN_INLINE_DATA_SIZE; } - err = ext4_journal_get_write_access(handle, bh); + BUFFER_TRACE(bh, "get_write_access"); + err = ext4_journal_get_write_access(handle, dir->i_sb, bh, + EXT4_JTR_NONE); if (err) goto out; - err = ext4_generic_delete_entry(handle, dir, de_del, bh, + err = ext4_generic_delete_entry(dir, de_del, bh, inline_start, inline_size, 0); if (err) goto out; - BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); - err = ext4_mark_inode_dirty(handle, dir); - if (unlikely(err)) - goto out; - ext4_show_inline_dir(dir, iloc.bh, inline_start, inline_size); out: - up_write(&EXT4_I(dir)->xattr_sem); + ext4_write_unlock_xattr(dir, &no_expand); + if (likely(err == 0)) + err = ext4_mark_inode_dirty(handle, dir); brelse(iloc.bh); if (err != -ENOENT) ext4_std_error(dir->i_sb, err); @@ -1718,25 +1727,28 @@ ext4_get_inline_entry(struct inode *inode, return (struct ext4_dir_entry_2 *)(inline_pos + offset); } -int empty_inline_dir(struct inode *dir, int *has_inline_data) +bool empty_inline_dir(struct inode *dir, int *has_inline_data) { int err, inline_size; struct ext4_iloc iloc; + size_t inline_len; void *inline_pos; unsigned int offset; struct ext4_dir_entry_2 *de; - int ret = 1; + bool ret = false; err = ext4_get_inode_loc(dir, &iloc); if (err) { - EXT4_ERROR_INODE(dir, "error %d getting inode %lu block", - err, dir->i_ino); - return 1; + EXT4_ERROR_INODE_ERR(dir, -err, + "error %d getting inode %lu block", + err, dir->i_ino); + return false; } down_read(&EXT4_I(dir)->xattr_sem); if (!ext4_has_inline_data(dir)) { *has_inline_data = 0; + ret = true; goto out; } @@ -1745,12 +1757,12 @@ int empty_inline_dir(struct inode *dir, int *has_inline_data) ext4_warning(dir->i_sb, "bad inline directory (dir #%lu) - no `..'", dir->i_ino); - ret = 1; goto out; } + inline_len = ext4_get_inline_size(dir); offset = EXT4_INLINE_DOTDOT_SIZE; - while (offset < dir->i_size) { + while (offset < inline_len) { de = ext4_get_inline_entry(dir, &iloc, offset, &inline_pos, &inline_size); if (ext4_check_dir_entry(dir, NULL, de, @@ -1759,20 +1771,19 @@ int empty_inline_dir(struct inode *dir, int *has_inline_data) ext4_warning(dir->i_sb, "bad inline directory (dir #%lu) - " "inode %u, rec_len %u, name_len %d" - "inline size %d\n", + "inline size %d", dir->i_ino, le32_to_cpu(de->inode), le16_to_cpu(de->rec_len), de->name_len, inline_size); - ret = 1; goto out; } if (le32_to_cpu(de->inode)) { - ret = 0; goto out; } offset += ext4_rec_len_from_disk(de->rec_len, inline_size); } + ret = true; out: up_read(&EXT4_I(dir)->xattr_sem); brelse(iloc.bh); @@ -1781,91 +1792,51 @@ out: int ext4_destroy_inline_data(handle_t *handle, struct inode *inode) { - int ret; + int ret, no_expand; - down_write(&EXT4_I(inode)->xattr_sem); + ext4_write_lock_xattr(inode, &no_expand); ret = ext4_destroy_inline_data_nolock(handle, inode); - up_write(&EXT4_I(inode)->xattr_sem); + ext4_write_unlock_xattr(inode, &no_expand); return ret; } -int ext4_inline_data_fiemap(struct inode *inode, - struct fiemap_extent_info *fieinfo, - int *has_inline) +int ext4_inline_data_iomap(struct inode *inode, struct iomap *iomap) { - __u64 physical = 0; - __u64 length; - __u32 flags = FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_LAST; - int error = 0; + __u64 addr; + int error = -EAGAIN; struct ext4_iloc iloc; down_read(&EXT4_I(inode)->xattr_sem); - if (!ext4_has_inline_data(inode)) { - *has_inline = 0; + if (!ext4_has_inline_data(inode)) goto out; - } error = ext4_get_inode_loc(inode, &iloc); if (error) goto out; - physical = (__u64)iloc.bh->b_blocknr << inode->i_sb->s_blocksize_bits; - physical += (char *)ext4_raw_inode(&iloc) - iloc.bh->b_data; - physical += offsetof(struct ext4_inode, i_block); - length = i_size_read(inode); + addr = (__u64)iloc.bh->b_blocknr << inode->i_sb->s_blocksize_bits; + addr += (char *)ext4_raw_inode(&iloc) - iloc.bh->b_data; + addr += offsetof(struct ext4_inode, i_block); - if (physical) - error = fiemap_fill_next_extent(fieinfo, 0, physical, - length, flags); brelse(iloc.bh); -out: - up_read(&EXT4_I(inode)->xattr_sem); - return (error < 0 ? error : 0); -} - -/* - * Called during xattr set, and if we can sparse space 'needed', - * just create the extent tree evict the data to the outer block. - * - * We use jbd2 instead of page cache to move data to the 1st block - * so that the whole transaction can be committed as a whole and - * the data isn't lost because of the delayed page cache write. - */ -int ext4_try_to_evict_inline_data(handle_t *handle, - struct inode *inode, - int needed) -{ - int error; - struct ext4_xattr_entry *entry; - struct ext4_xattr_ibody_header *header; - struct ext4_inode *raw_inode; - struct ext4_iloc iloc; - error = ext4_get_inode_loc(inode, &iloc); - if (error) - return error; - - raw_inode = ext4_raw_inode(&iloc); - header = IHDR(inode, raw_inode); - entry = (struct ext4_xattr_entry *)((void *)raw_inode + - EXT4_I(inode)->i_inline_off); - if (EXT4_XATTR_LEN(entry->e_name_len) + - EXT4_XATTR_SIZE(le32_to_cpu(entry->e_value_size)) < needed) { - error = -ENOSPC; - goto out; - } + iomap->addr = addr; + iomap->offset = 0; + iomap->length = min_t(loff_t, ext4_get_inline_size(inode), + i_size_read(inode)); + iomap->type = IOMAP_INLINE; + iomap->flags = 0; - error = ext4_convert_inline_data_nolock(handle, inode, &iloc); out: - brelse(iloc.bh); + up_read(&EXT4_I(inode)->xattr_sem); return error; } -void ext4_inline_data_truncate(struct inode *inode, int *has_inline) +int ext4_inline_data_truncate(struct inode *inode, int *has_inline) { handle_t *handle; - int inline_size, value_len, needed_blocks; + int inline_size, value_len, needed_blocks, no_expand, err = 0; size_t i_size; void *value = NULL; struct ext4_xattr_ibody_find is = { @@ -1877,22 +1848,23 @@ void ext4_inline_data_truncate(struct inode *inode, int *has_inline) }; - needed_blocks = ext4_writepage_trans_blocks(inode); + needed_blocks = ext4_chunk_trans_extent(inode, 1); handle = ext4_journal_start(inode, EXT4_HT_INODE, needed_blocks); if (IS_ERR(handle)) - return; + return PTR_ERR(handle); - down_write(&EXT4_I(inode)->xattr_sem); + ext4_write_lock_xattr(inode, &no_expand); if (!ext4_has_inline_data(inode)) { + ext4_write_unlock_xattr(inode, &no_expand); *has_inline = 0; ext4_journal_stop(handle); - return; + return 0; } - if (ext4_orphan_add(handle, inode)) + if ((err = ext4_orphan_add(handle, inode)) != 0) goto out; - if (ext4_get_inode_loc(inode, &is.iloc)) + if ((err = ext4_get_inode_loc(inode, &is.iloc)) != 0) goto out; down_write(&EXT4_I(inode)->i_data_sem); @@ -1901,33 +1873,53 @@ void ext4_inline_data_truncate(struct inode *inode, int *has_inline) EXT4_I(inode)->i_disksize = i_size; if (i_size < inline_size) { + /* + * if there's inline data to truncate and this file was + * converted to extents after that inline data was written, + * the extent status cache must be cleared to avoid leaving + * behind stale delayed allocated extent entries + */ + if (!ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) + ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS); + /* Clear the content in the xattr space. */ if (inline_size > EXT4_MIN_INLINE_DATA_SIZE) { - if (ext4_xattr_ibody_find(inode, &i, &is)) + if ((err = ext4_xattr_ibody_find(inode, &i, &is)) != 0) goto out_error; - BUG_ON(is.s.not_found); + if (is.s.not_found) { + EXT4_ERROR_INODE(inode, + "missing inline data xattr"); + err = -EFSCORRUPTED; + goto out_error; + } value_len = le32_to_cpu(is.s.here->e_value_size); value = kmalloc(value_len, GFP_NOFS); - if (!value) + if (!value) { + err = -ENOMEM; goto out_error; + } - if (ext4_xattr_ibody_get(inode, i.name_index, i.name, - value, value_len)) + err = ext4_xattr_ibody_get(inode, i.name_index, + i.name, value, value_len); + if (err <= 0) goto out_error; i.value = value; i.value_len = i_size > EXT4_MIN_INLINE_DATA_SIZE ? i_size - EXT4_MIN_INLINE_DATA_SIZE : 0; - if (ext4_xattr_ibody_inline_set(handle, inode, &i, &is)) + err = ext4_xattr_ibody_set(handle, inode, &i, &is); + if (err) goto out_error; } /* Clear the content within i_blocks. */ - if (i_size < EXT4_MIN_INLINE_DATA_SIZE) - memset(ext4_raw_inode(&is.iloc)->i_block + i_size, 0, - EXT4_MIN_INLINE_DATA_SIZE - i_size); + if (i_size < EXT4_MIN_INLINE_DATA_SIZE) { + void *p = (void *) ext4_raw_inode(&is.iloc)->i_block; + memset(p + i_size, 0, + EXT4_MIN_INLINE_DATA_SIZE - i_size); + } EXT4_I(inode)->i_inline_size = i_size < EXT4_MIN_INLINE_DATA_SIZE ? @@ -1938,32 +1930,45 @@ out_error: up_write(&EXT4_I(inode)->i_data_sem); out: brelse(is.iloc.bh); - up_write(&EXT4_I(inode)->xattr_sem); + ext4_write_unlock_xattr(inode, &no_expand); kfree(value); if (inode->i_nlink) ext4_orphan_del(handle, inode); - inode->i_mtime = inode->i_ctime = ext4_current_time(inode); - ext4_mark_inode_dirty(handle, inode); - if (IS_SYNC(inode)) - ext4_handle_sync(handle); - + if (err == 0) { + inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); + err = ext4_mark_inode_dirty(handle, inode); + if (IS_SYNC(inode)) + ext4_handle_sync(handle); + } ext4_journal_stop(handle); - return; + return err; } int ext4_convert_inline_data(struct inode *inode) { - int error, needed_blocks; + int error, needed_blocks, no_expand; handle_t *handle; struct ext4_iloc iloc; if (!ext4_has_inline_data(inode)) { ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); return 0; + } else if (!ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) { + /* + * Inode has inline data but EXT4_STATE_MAY_INLINE_DATA is + * cleared. This means we are in the middle of moving of + * inline data to delay allocated block. Just force writeout + * here to finish conversion. + */ + error = filemap_flush(inode->i_mapping); + if (error) + return error; + if (!ext4_has_inline_data(inode)) + return 0; } - needed_blocks = ext4_writepage_trans_blocks(inode); + needed_blocks = ext4_chunk_trans_extent(inode, 1); iloc.bh = NULL; error = ext4_get_inode_loc(inode, &iloc); @@ -1976,15 +1981,10 @@ int ext4_convert_inline_data(struct inode *inode) goto out_free; } - down_write(&EXT4_I(inode)->xattr_sem); - if (!ext4_has_inline_data(inode)) { - up_write(&EXT4_I(inode)->xattr_sem); - goto out; - } - - error = ext4_convert_inline_data_nolock(handle, inode, &iloc); - up_write(&EXT4_I(inode)->xattr_sem); -out: + ext4_write_lock_xattr(inode, &no_expand); + if (ext4_has_inline_data(inode)) + error = ext4_convert_inline_data_nolock(handle, inode, &iloc); + ext4_write_unlock_xattr(inode, &no_expand); ext4_journal_stop(handle); out_free: brelse(iloc.bh); |
