diff options
Diffstat (limited to 'fs')
37 files changed, 886 insertions, 404 deletions
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index c27c27300d95..e474127dd255 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -451,7 +451,9 @@ failed_out: /** * ext2_alloc_branch - allocate and set up a chain of blocks. * @inode: owner - * @num: depth of the chain (number of blocks to allocate) + * @indirect_blks: depth of the chain (number of blocks to allocate) + * @blks: number of allocated direct blocks + * @goal: preferred place for allocation * @offsets: offsets (in the blocks) to store the pointers to next. * @branch: place to store the chain in. * diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index 63e599524085..217b290ae3a5 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -285,7 +285,7 @@ static int f2fs_acl_create_masq(struct posix_acl *acl, umode_t *mode_p) /* assert(atomic_read(acl->a_refcount) == 1); */ FOREACH_ACL_ENTRY(pa, acl, pe) { - switch(pa->e_tag) { + switch (pa->e_tag) { case ACL_USER_OBJ: pa->e_perm &= (mode >> 6) | ~S_IRWXO; mode &= (pa->e_perm << 6) | ~S_IRWXU; @@ -326,7 +326,7 @@ static int f2fs_acl_create_masq(struct posix_acl *acl, umode_t *mode_p) } *mode_p = (*mode_p & ~S_IRWXUGO) | mode; - return not_equiv; + return not_equiv; } static int f2fs_acl_create(struct inode *dir, umode_t *mode, diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index a98e1b02279e..ed70b68b2b38 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -66,7 +66,7 @@ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index, .old_blkaddr = index, .new_blkaddr = index, .encrypted_page = NULL, - .is_meta = is_meta, + .is_por = !is_meta, }; int err; @@ -130,6 +130,30 @@ struct page *f2fs_get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index) return __get_meta_page(sbi, index, false); } +static bool __is_bitmap_valid(struct f2fs_sb_info *sbi, block_t blkaddr, + int type) +{ + struct seg_entry *se; + unsigned int segno, offset; + bool exist; + + if (type != DATA_GENERIC_ENHANCE && type != DATA_GENERIC_ENHANCE_READ) + return true; + + segno = GET_SEGNO(sbi, blkaddr); + offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); + se = get_seg_entry(sbi, segno); + + exist = f2fs_test_bit(offset, se->cur_valid_map); + if (!exist && type == DATA_GENERIC_ENHANCE) { + f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent error " + "blkaddr:%u, sit bitmap:%d", blkaddr, exist); + set_sbi_flag(sbi, SBI_NEED_FSCK); + WARN_ON(1); + } + return exist; +} + bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type) { @@ -151,15 +175,22 @@ bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, return false; break; case META_POR: + if (unlikely(blkaddr >= MAX_BLKADDR(sbi) || + blkaddr < MAIN_BLKADDR(sbi))) + return false; + break; case DATA_GENERIC: + case DATA_GENERIC_ENHANCE: + case DATA_GENERIC_ENHANCE_READ: if (unlikely(blkaddr >= MAX_BLKADDR(sbi) || - blkaddr < MAIN_BLKADDR(sbi))) { - if (type == DATA_GENERIC) { - f2fs_msg(sbi->sb, KERN_WARNING, - "access invalid blkaddr:%u", blkaddr); - WARN_ON(1); - } + blkaddr < MAIN_BLKADDR(sbi))) { + f2fs_msg(sbi->sb, KERN_WARNING, + "access invalid blkaddr:%u", blkaddr); + set_sbi_flag(sbi, SBI_NEED_FSCK); + WARN_ON(1); return false; + } else { + return __is_bitmap_valid(sbi, blkaddr, type); } break; case META_GENERIC: @@ -189,7 +220,7 @@ int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, .op_flags = sync ? (REQ_META | REQ_PRIO) : REQ_RAHEAD, .encrypted_page = NULL, .in_list = false, - .is_meta = (type != META_POR), + .is_por = (type == META_POR), }; struct blk_plug plug; @@ -644,6 +675,12 @@ int f2fs_recover_orphan_inodes(struct f2fs_sb_info *sbi) if (!is_set_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG)) return 0; + if (bdev_read_only(sbi->sb->s_bdev)) { + f2fs_msg(sbi->sb, KERN_INFO, "write access " + "unavailable, skipping orphan cleanup"); + return 0; + } + if (s_flags & SB_RDONLY) { f2fs_msg(sbi->sb, KERN_INFO, "orphan cleanup on readonly fs"); sbi->sb->s_flags &= ~SB_RDONLY; @@ -758,13 +795,27 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) } } +static __u32 f2fs_checkpoint_chksum(struct f2fs_sb_info *sbi, + struct f2fs_checkpoint *ckpt) +{ + unsigned int chksum_ofs = le32_to_cpu(ckpt->checksum_offset); + __u32 chksum; + + chksum = f2fs_crc32(sbi, ckpt, chksum_ofs); + if (chksum_ofs < CP_CHKSUM_OFFSET) { + chksum_ofs += sizeof(chksum); + chksum = f2fs_chksum(sbi, chksum, (__u8 *)ckpt + chksum_ofs, + F2FS_BLKSIZE - chksum_ofs); + } + return chksum; +} + static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr, struct f2fs_checkpoint **cp_block, struct page **cp_page, unsigned long long *version) { - unsigned long blk_size = sbi->blocksize; size_t crc_offset = 0; - __u32 crc = 0; + __u32 crc; *cp_page = f2fs_get_meta_page(sbi, cp_addr); if (IS_ERR(*cp_page)) @@ -773,15 +824,27 @@ static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr, *cp_block = (struct f2fs_checkpoint *)page_address(*cp_page); crc_offset = le32_to_cpu((*cp_block)->checksum_offset); - if (crc_offset > (blk_size - sizeof(__le32))) { + if (crc_offset < CP_MIN_CHKSUM_OFFSET || + crc_offset > CP_CHKSUM_OFFSET) { f2fs_put_page(*cp_page, 1); f2fs_msg(sbi->sb, KERN_WARNING, "invalid crc_offset: %zu", crc_offset); return -EINVAL; } - crc = cur_cp_crc(*cp_block); - if (!f2fs_crc_valid(sbi, crc, *cp_block, crc_offset)) { + if (__is_set_ckpt_flags(*cp_block, CP_LARGE_NAT_BITMAP_FLAG)) { + if (crc_offset != CP_MIN_CHKSUM_OFFSET) { + f2fs_put_page(*cp_page, 1); + f2fs_msg(sbi->sb, KERN_WARNING, + "layout of large_nat_bitmap is deprecated, " + "run fsck to repair, chksum_offset: %zu", + crc_offset); + return -EINVAL; + } + } + + crc = f2fs_checkpoint_chksum(sbi, *cp_block); + if (crc != cur_cp_crc(*cp_block)) { f2fs_put_page(*cp_page, 1); f2fs_msg(sbi->sb, KERN_WARNING, "invalid crc value"); return -EINVAL; @@ -1009,13 +1072,11 @@ retry: if (inode) { unsigned long cur_ino = inode->i_ino; - if (is_dir) - F2FS_I(inode)->cp_task = current; + F2FS_I(inode)->cp_task = current; filemap_fdatawrite(inode->i_mapping); - if (is_dir) - F2FS_I(inode)->cp_task = NULL; + F2FS_I(inode)->cp_task = NULL; iput(inode); /* We need to give cpu to another writers. */ @@ -1391,7 +1452,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP)); get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP)); - crc32 = f2fs_crc32(sbi, ckpt, le32_to_cpu(ckpt->checksum_offset)); + crc32 = f2fs_checkpoint_chksum(sbi, ckpt); *((__le32 *)((unsigned char *)ckpt + le32_to_cpu(ckpt->checksum_offset))) = cpu_to_le32(crc32); @@ -1475,7 +1536,11 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) clear_sbi_flag(sbi, SBI_IS_DIRTY); clear_sbi_flag(sbi, SBI_NEED_CP); clear_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH); + + spin_lock(&sbi->stat_lock); sbi->unusable_block_count = 0; + spin_unlock(&sbi->stat_lock); + __set_cp_next_pack(sbi); /* @@ -1500,6 +1565,9 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) unsigned long long ckpt_ver; int err = 0; + if (f2fs_readonly(sbi->sb) || f2fs_hw_is_readonly(sbi)) + return -EROFS; + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { if (cpc->reason != CP_PAUSE) return 0; @@ -1516,10 +1584,6 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) err = -EIO; goto out; } - if (f2fs_readonly(sbi->sb)) { - err = -EROFS; - goto out; - } trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "start block_ops"); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 64040e998439..eda4181d2092 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -218,12 +218,14 @@ struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi, struct block_device *bdev = sbi->sb->s_bdev; int i; - for (i = 0; i < sbi->s_ndevs; i++) { - if (FDEV(i).start_blk <= blk_addr && - FDEV(i).end_blk >= blk_addr) { - blk_addr -= FDEV(i).start_blk; - bdev = FDEV(i).bdev; - break; + if (f2fs_is_multi_device(sbi)) { + for (i = 0; i < sbi->s_ndevs; i++) { + if (FDEV(i).start_blk <= blk_addr && + FDEV(i).end_blk >= blk_addr) { + blk_addr -= FDEV(i).start_blk; + bdev = FDEV(i).bdev; + break; + } } } if (bio) { @@ -237,6 +239,9 @@ int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr) { int i; + if (!f2fs_is_multi_device(sbi)) + return 0; + for (i = 0; i < sbi->s_ndevs; i++) if (FDEV(i).start_blk <= blkaddr && FDEV(i).end_blk >= blkaddr) return i; @@ -420,7 +425,7 @@ static void __submit_merged_write_cond(struct f2fs_sb_info *sbi, void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type) { - __submit_merged_write_cond(sbi, NULL, 0, 0, type, true); + __submit_merged_write_cond(sbi, NULL, NULL, 0, type, true); } void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi, @@ -448,7 +453,8 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) fio->encrypted_page : fio->page; if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr, - __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC)) + fio->is_por ? META_POR : (__is_meta_io(fio) ? + META_GENERIC : DATA_GENERIC_ENHANCE))) return -EFAULT; trace_f2fs_submit_page_bio(page, fio); @@ -498,9 +504,7 @@ next: spin_unlock(&io->io_lock); } - if (__is_valid_data_blkaddr(fio->old_blkaddr)) - verify_block_addr(fio, fio->old_blkaddr); - verify_block_addr(fio, fio->new_blkaddr); + verify_fio_blkaddr(fio); bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page; @@ -557,9 +561,6 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, struct bio_post_read_ctx *ctx; unsigned int post_read_steps = 0; - if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) - return ERR_PTR(-EFAULT); - bio = f2fs_bio_alloc(sbi, min_t(int, nr_pages, BIO_MAX_PAGES), false); if (!bio) return ERR_PTR(-ENOMEM); @@ -587,8 +588,10 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, static int f2fs_submit_page_read(struct inode *inode, struct page *page, block_t blkaddr) { - struct bio *bio = f2fs_grab_read_bio(inode, blkaddr, 1, 0); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct bio *bio; + bio = f2fs_grab_read_bio(inode, blkaddr, 1, 0); if (IS_ERR(bio)) return PTR_ERR(bio); @@ -600,8 +603,8 @@ static int f2fs_submit_page_read(struct inode *inode, struct page *page, return -EFAULT; } ClearPageError(page); - inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA); - __submit_bio(F2FS_I_SB(inode), bio, DATA); + inc_page_count(sbi, F2FS_RD_DATA); + __submit_bio(sbi, bio, DATA); return 0; } @@ -729,6 +732,11 @@ struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index, if (f2fs_lookup_extent_cache(inode, index, &ei)) { dn.data_blkaddr = ei.blk + index - ei.fofs; + if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), dn.data_blkaddr, + DATA_GENERIC_ENHANCE_READ)) { + err = -EFAULT; + goto put_err; + } goto got_it; } @@ -742,6 +750,13 @@ struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index, err = -ENOENT; goto put_err; } + if (dn.data_blkaddr != NEW_ADDR && + !f2fs_is_valid_blkaddr(F2FS_I_SB(inode), + dn.data_blkaddr, + DATA_GENERIC_ENHANCE)) { + err = -EFAULT; + goto put_err; + } got_it: if (PageUptodate(page)) { unlock_page(page); @@ -1084,12 +1099,12 @@ next_block: blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node); if (__is_valid_data_blkaddr(blkaddr) && - !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) { + !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) { err = -EFAULT; goto sync_out; } - if (is_valid_data_blkaddr(sbi, blkaddr)) { + if (__is_valid_data_blkaddr(blkaddr)) { /* use out-place-update for driect IO under LFS mode */ if (test_opt(sbi, LFS) && flag == F2FS_GET_BLOCK_DIO && map->m_may_create) { @@ -1499,6 +1514,118 @@ out: return ret; } +static int f2fs_read_single_page(struct inode *inode, struct page *page, + unsigned nr_pages, + struct f2fs_map_blocks *map, + struct bio **bio_ret, + sector_t *last_block_in_bio, + bool is_readahead) +{ + struct bio *bio = *bio_ret; + const unsigned blkbits = inode->i_blkbits; + const unsigned blocksize = 1 << blkbits; + sector_t block_in_file; + sector_t last_block; + sector_t last_block_in_file; + sector_t block_nr; + int ret = 0; + + block_in_file = (sector_t)page->index; + last_block = block_in_file + nr_pages; + last_block_in_file = (i_size_read(inode) + blocksize - 1) >> + blkbits; + if (last_block > last_block_in_file) + last_block = last_block_in_file; + + /* just zeroing out page which is beyond EOF */ + if (block_in_file >= last_block) + goto zero_out; + /* + * Map blocks using the previous result first. + */ + if ((map->m_flags & F2FS_MAP_MAPPED) && + block_in_file > map->m_lblk && + block_in_file < (map->m_lblk + map->m_len)) + goto got_it; + + /* + * Then do more f2fs_map_blocks() calls until we are + * done with this page. + */ + map->m_lblk = block_in_file; + map->m_len = last_block - block_in_file; + + ret = f2fs_map_blocks(inode, map, 0, F2FS_GET_BLOCK_DEFAULT); + if (ret) + goto out; +got_it: + if ((map->m_flags & F2FS_MAP_MAPPED)) { + block_nr = map->m_pblk + block_in_file - map->m_lblk; + SetPageMappedToDisk(page); + + if (!PageUptodate(page) && !cleancache_get_page(page)) { + SetPageUptodate(page); + goto confused; + } + + if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr, + DATA_GENERIC_ENHANCE_READ)) { + ret = -EFAULT; + goto out; + } + } else { +zero_out: + zero_user_segment(page, 0, PAGE_SIZE); + if (!PageUptodate(page)) + SetPageUptodate(page); + unlock_page(page); + goto out; + } + + /* + * This page will go to BIO. Do we need to send this + * BIO off first? + */ + if (bio && (*last_block_in_bio != block_nr - 1 || + !__same_bdev(F2FS_I_SB(inode), block_nr, bio))) { +submit_and_realloc: + __submit_bio(F2FS_I_SB(inode), bio, DATA); + bio = NULL; + } + if (bio == NULL) { + bio = f2fs_grab_read_bio(inode, block_nr, nr_pages, + is_readahead ? REQ_RAHEAD : 0); + if (IS_ERR(bio)) { + ret = PTR_ERR(bio); + bio = NULL; + goto out; + } + } + + /* + * If the page is under writeback, we need to wait for + * its completion to see the correct decrypted data. + */ + f2fs_wait_on_block_writeback(inode, block_nr); + + if (bio_add_page(bio, page, blocksize, 0) < blocksize) + goto submit_and_realloc; + + inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA); + ClearPageError(page); + *last_block_in_bio = block_nr; + goto out; +confused: + if (bio) { + __submit_bio(F2FS_I_SB(inode), bio, DATA); + bio = NULL; + } + unlock_page(page); +out: + *bio_ret = bio; + return ret; +} + /* * This function was originally taken from fs/mpage.c, and customized for f2fs. * Major change was from block_size == page_size in f2fs by default. @@ -1515,13 +1642,8 @@ static int f2fs_mpage_readpages(struct address_space *mapping, struct bio *bio = NULL; sector_t last_block_in_bio = 0; struct inode *inode = mapping->host; - const unsigned blkbits = inode->i_blkbits; - const unsigned blocksize = 1 << blkbits; - sector_t block_in_file; - sector_t last_block; - sector_t last_block_in_file; - sector_t block_nr; struct f2fs_map_blocks map; + int ret = 0; map.m_pblk = 0; map.m_lblk = 0; @@ -1544,98 +1666,13 @@ static int f2fs_mpage_readpages(struct address_space *mapping, goto next_page; } - block_in_file = (sector_t)page->index; - last_block = block_in_file + nr_pages; - last_block_in_file = (i_size_read(inode) + blocksize - 1) >> - blkbits; - if (last_block > last_block_in_file) - last_block = last_block_in_file; - - /* just zeroing out page which is beyond EOF */ - if (block_in_file >= last_block) - goto zero_out; - /* - * Map blocks using the previous result first. - */ - if ((map.m_flags & F2FS_MAP_MAPPED) && - block_in_file > map.m_lblk && - block_in_file < (map.m_lblk + map.m_len)) - goto got_it; - - /* - * Then do more f2fs_map_blocks() calls until we are - * done with this page. - */ - map.m_lblk = block_in_file; - map.m_len = last_block - block_in_file; - - if (f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_DEFAULT)) - goto set_error_page; -got_it: - if ((map.m_flags & F2FS_MAP_MAPPED)) { - block_nr = map.m_pblk + block_in_file - map.m_lblk; - SetPageMappedToDisk(page); - - if (!PageUptodate(page) && !cleancache_get_page(page)) { - SetPageUptodate(page); - goto confused; - } - - if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr, - DATA_GENERIC)) - goto set_error_page; - } else { -zero_out: + ret = f2fs_read_single_page(inode, page, nr_pages, &map, &bio, + &last_block_in_bio, is_readahead); + if (ret) { + SetPageError(page); zero_user_segment(page, 0, PAGE_SIZE); - if (!PageUptodate(page)) - SetPageUptodate(page); unlock_page(page); - goto next_page; } - - /* - * This page will go to BIO. Do we need to send this - * BIO off first? - */ - if (bio && (last_block_in_bio != block_nr - 1 || - !__same_bdev(F2FS_I_SB(inode), block_nr, bio))) { -submit_and_realloc: - __submit_bio(F2FS_I_SB(inode), bio, DATA); - bio = NULL; - } - if (bio == NULL) { - bio = f2fs_grab_read_bio(inode, block_nr, nr_pages, - is_readahead ? REQ_RAHEAD : 0); - if (IS_ERR(bio)) { - bio = NULL; - goto set_error_page; - } - } - - /* - * If the page is under writeback, we need to wait for - * its completion to see the correct decrypted data. - */ - f2fs_wait_on_block_writeback(inode, block_nr); - - if (bio_add_page(bio, page, blocksize, 0) < blocksize) - goto submit_and_realloc; - - inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA); - ClearPageError(page); - last_block_in_bio = block_nr; - goto next_page; -set_error_page: - SetPageError(page); - zero_user_segment(page, 0, PAGE_SIZE); - unlock_page(page); - goto next_page; -confused: - if (bio) { - __submit_bio(F2FS_I_SB(inode), bio, DATA); - bio = NULL; - } - unlock_page(page); next_page: if (pages) put_page(page); @@ -1643,7 +1680,7 @@ next_page: BUG_ON(pages && !list_empty(pages)); if (bio) __submit_bio(F2FS_I_SB(inode), bio, DATA); - return 0; + return pages ? 0 : ret; } static int f2fs_read_data_page(struct file *file, struct page *page) @@ -1813,7 +1850,7 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio) fio->old_blkaddr = ei.blk + page->index - ei.fofs; if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr, - DATA_GENERIC)) + DATA_GENERIC_ENHANCE)) return -EFAULT; ipu_force = true; @@ -1840,7 +1877,7 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio) got_it: if (__is_valid_data_blkaddr(fio->old_blkaddr) && !f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr, - DATA_GENERIC)) { + DATA_GENERIC_ENHANCE)) { err = -EFAULT; goto out_writepage; } @@ -1848,7 +1885,8 @@ got_it: * If current allocation needs SSR, * it had better in-place writes for updated data. */ - if (ipu_force || (is_valid_data_blkaddr(fio->sbi, fio->old_blkaddr) && + if (ipu_force || + (__is_valid_data_blkaddr(fio->old_blkaddr) && need_inplace_update(fio))) { err = encrypt_one_page(fio); if (err) @@ -1866,9 +1904,10 @@ got_it: true); if (PageWriteback(page)) end_page_writeback(page); + } else { + set_inode_flag(inode, FI_UPDATE_WRITE); } trace_f2fs_do_write_data_page(fio->page, IPU); - set_inode_flag(inode, FI_UPDATE_WRITE); return err; } @@ -2030,7 +2069,8 @@ out: } unlock_page(page); - if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode)) + if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode) && + !F2FS_I(inode)->cp_task) f2fs_balance_fs(sbi, need_balance_fs); if (unlikely(f2fs_cp_error(sbi))) { @@ -2491,6 +2531,11 @@ repeat: zero_user_segment(page, 0, PAGE_SIZE); SetPageUptodate(page); } else { + if (!f2fs_is_valid_blkaddr(sbi, blkaddr, + DATA_GENERIC_ENHANCE_READ)) { + err = -EFAULT; + goto fail; + } err = f2fs_submit_page_read(inode, page, blkaddr); if (err) goto fail; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index bacf5c2a8850..06b89a9862ab 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -210,7 +210,14 @@ enum { META_SSA, META_MAX, META_POR, - DATA_GENERIC, + DATA_GENERIC, /* check range only */ + DATA_GENERIC_ENHANCE, /* strong check on range and segment bitmap */ + DATA_GENERIC_ENHANCE_READ, /* + * strong check on range and segment + * bitmap but no warning due to race + * condition of read on truncated area + * by extent_cache + */ META_GENERIC, }; @@ -1041,7 +1048,7 @@ struct f2fs_io_info { bool submitted; /* indicate IO submission */ int need_lock; /* indicate we need to lock cp_rwsem */ bool in_list; /* indicate fio is in io_list */ - bool is_meta; /* indicate borrow meta inode mapping or not */ + bool is_por; /* indicate IO is from recovery or not */ bool retry; /* need to reallocate block address */ enum iostat_type io_type; /* io type */ struct writeback_control *io_wbc; /* writeback control */ @@ -1068,8 +1075,8 @@ struct f2fs_dev_info { block_t start_blk; block_t end_blk; #ifdef CONFIG_BLK_DEV_ZONED - unsigned int nr_blkz; /* Total number of zones */ - u8 *blkz_type; /* Array of zones type */ + unsigned int nr_blkz; /* Total number of zones */ + unsigned long *blkz_seq; /* Bitmap indicating sequential zones */ #endif }; @@ -1366,6 +1373,17 @@ static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type) } #endif +/* + * Test if the mounted volume is a multi-device volume. + * - For a single regular disk volume, sbi->s_ndevs is 0. + * - For a single zoned disk volume, sbi->s_ndevs is 1. + * - For a multi-device volume, sbi->s_ndevs is always 2 or more. + */ +static inline bool f2fs_is_multi_device(struct f2fs_sb_info *sbi) +{ + return sbi->s_ndevs > 1; +} + /* For write statistics. Suppose sector size is 512 bytes, * and the return value is in kbytes. s is of struct f2fs_sb_info. */ @@ -1777,6 +1795,7 @@ enospc: return -ENOSPC; } +void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...); static inline void dec_valid_block_count(struct f2fs_sb_info *sbi, struct inode *inode, block_t count) @@ -1785,13 +1804,21 @@ static inline void dec_valid_block_count(struct f2fs_sb_info *sbi, spin_lock(&sbi->stat_lock); f2fs_bug_on(sbi, sbi->total_valid_block_count < (block_t) count); - f2fs_bug_on(sbi, inode->i_blocks < sectors); sbi->total_valid_block_count -= (block_t)count; if (sbi->reserved_blocks && sbi->current_reserved_blocks < sbi->reserved_blocks) sbi->current_reserved_blocks = min(sbi->reserved_blocks, sbi->current_reserved_blocks + count); spin_unlock(&sbi->stat_lock); + if (unlikely(inode->i_blocks < sectors)) { + f2fs_msg(sbi->sb, KERN_WARNING, + "Inconsistent i_blocks, ino:%lu, iblocks:%llu, sectors:%llu", + inode->i_ino, + (unsigned long long)inode->i_blocks, + (unsigned long long)sectors); + set_sbi_flag(sbi, SBI_NEED_FSCK); + return; + } f2fs_i_blocks_write(inode, count, false, true); } @@ -1889,7 +1916,11 @@ static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag) if (is_set_ckpt_flags(sbi, CP_LARGE_NAT_BITMAP_FLAG)) { offset = (flag == SIT_BITMAP) ? le32_to_cpu(ckpt->nat_ver_bitmap_bytesize) : 0; - return &ckpt->sit_nat_version_bitmap + offset; + /* + * if large_nat_bitmap feature is enabled, leave checksum + * protection for all nat/sit bitmaps. + */ + return &ckpt->sit_nat_version_bitmap + offset + sizeof(__le32); } if (__cp_payload(sbi) > 0) { @@ -2008,7 +2039,6 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi, f2fs_bug_on(sbi, !sbi->total_valid_block_count); f2fs_bug_on(sbi, !sbi->total_valid_node_count); - f2fs_bug_on(sbi, !is_inode && !inode->i_blocks); sbi->total_valid_node_count--; sbi->total_valid_block_count--; @@ -2018,10 +2048,19 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi, spin_unlock(&sbi->stat_lock); - if (is_inode) + if (is_inode) { dquot_free_inode(inode); - else + } else { + if (unlikely(inode->i_blocks == 0)) { + f2fs_msg(sbi->sb, KERN_WARNING, + "Inconsistent i_blocks, ino:%lu, iblocks:%llu", + inode->i_ino, + (unsigned long long)inode->i_blocks); + set_sbi_flag(sbi, SBI_NEED_FSCK); + return; + } f2fs_i_blocks_write(inode, 1, false, true); + } } static inline unsigned int valid_node_count(struct f2fs_sb_info *sbi) @@ -2545,7 +2584,14 @@ static inline int f2fs_has_inline_xattr(struct inode *inode) static inline unsigned int addrs_per_inode(struct inode *inode) { - return CUR_ADDRS_PER_INODE(inode) - get_inline_xattr_addrs(inode); + unsigned int addrs = CUR_ADDRS_PER_INODE(inode) - + get_inline_xattr_addrs(inode); + return ALIGN_DOWN(addrs, 1); +} + +static inline unsigned int addrs_per_block(struct inode *inode) +{ + return ALIGN_DOWN(DEF_ADDRS_PER_BLOCK, 1); } static inline void *inline_xattr_addr(struct inode *inode, struct page *page) @@ -2558,7 +2604,9 @@ static inline void *inline_xattr_addr(struct inode *inode, struct page *page) static inline int inline_xattr_size(struct inode *inode) { - return get_inline_xattr_addrs(inode) * sizeof(__le32); + if (f2fs_has_inline_xattr(inode)) + return get_inline_xattr_addrs(inode) * sizeof(__le32); + return 0; } static inline int f2fs_has_inline_data(struct inode *inode) @@ -2800,12 +2848,10 @@ static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, #define __is_large_section(sbi) ((sbi)->segs_per_sec > 1) -#define __is_meta_io(fio) (PAGE_TYPE_OF_BIO((fio)->type) == META && \ - (!is_read_io((fio)->op) || (fio)->is_meta)) +#define __is_meta_io(fio) (PAGE_TYPE_OF_BIO((fio)->type) == META) bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type); -void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...); static inline void verify_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type) { @@ -2824,15 +2870,6 @@ static inline bool __is_valid_data_blkaddr(block_t blkaddr) return true; } -static inline bool is_valid_data_blkaddr(struct f2fs_sb_info *sbi, - block_t blkaddr) -{ - if (!__is_valid_data_blkaddr(blkaddr)) - return false; - verify_blkaddr(sbi, blkaddr, DATA_GENERIC); - return true; -} - static inline void f2fs_set_page_private(struct page *page, unsigned long data) { @@ -3530,16 +3567,12 @@ F2FS_FEATURE_FUNCS(lost_found, LOST_FOUND); F2FS_FEATURE_FUNCS(sb_chksum, SB_CHKSUM); #ifdef CONFIG_BLK_DEV_ZONED -static inline int get_blkz_type(struct f2fs_sb_info *sbi, - struct block_device *bdev, block_t blkaddr) +static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi, + block_t blkaddr) { unsigned int zno = blkaddr >> sbi->log_blocks_per_blkz; - int i; - for (i = 0; i < sbi->s_ndevs; i++) - if (FDEV(i).bdev == bdev) - return FDEV(i).blkz_type[zno]; - return -EINVAL; + return test_bit(zno, FDEV(devi).blkz_seq); } #endif @@ -3548,9 +3581,23 @@ static inline bool f2fs_hw_should_discard(struct f2fs_sb_info *sbi) return f2fs_sb_has_blkzoned(sbi); } +static inline bool f2fs_bdev_support_discard(struct block_device *bdev) +{ + return blk_queue_discard(bdev_get_queue(bdev)) || + bdev_is_zoned(bdev); +} + static inline bool f2fs_hw_support_discard(struct f2fs_sb_info *sbi) { - return blk_queue_discard(bdev_get_queue(sbi->sb->s_bdev)); + int i; + + if (!f2fs_is_multi_device(sbi)) + return f2fs_bdev_support_discard(sbi->sb->s_bdev); + + for (i = 0; i < sbi->s_ndevs; i++) + if (f2fs_bdev_support_discard(FDEV(i).bdev)) + return true; + return false; } static inline bool f2fs_realtime_discard_enable(struct f2fs_sb_info *sbi) @@ -3559,6 +3606,20 @@ static inline bool f2fs_realtime_discard_enable(struct f2fs_sb_info *sbi) f2fs_hw_should_discard(sbi); } +static inline bool f2fs_hw_is_readonly(struct f2fs_sb_info *sbi) +{ + int i; + + if (!f2fs_is_multi_device(sbi)) + return bdev_read_only(sbi->sb->s_bdev); + + for (i = 0; i < sbi->s_ndevs; i++) + if (bdev_read_only(FDEV(i).bdev)) + return true; + return false; +} + + static inline void set_opt_mode(struct f2fs_sb_info *sbi, unsigned int mt) { clear_opt(sbi, ADAPTIVE); @@ -3614,7 +3675,7 @@ static inline bool f2fs_force_buffered_io(struct inode *inode, if (f2fs_post_read_required(inode)) return true; - if (sbi->s_ndevs) + if (f2fs_is_multi_device(sbi)) return true; /* * for blkzoned device, fallback direct IO to buffered IO, so @@ -3651,4 +3712,4 @@ static inline bool is_journalled_quota(struct f2fs_sb_info *sbi) return false; } -#endif +#endif /* _LINUX_F2FS_H */ diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 5742ab8b57dc..45b45f37d347 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -39,6 +39,8 @@ static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf) ret = filemap_fault(vmf); up_read(&F2FS_I(inode)->i_mmap_sem); + trace_f2fs_filemap_fault(inode, vmf->pgoff, (unsigned long)ret); + return ret; } @@ -356,7 +358,7 @@ static bool __found_offset(struct f2fs_sb_info *sbi, block_t blkaddr, switch (whence) { case SEEK_DATA: if ((blkaddr == NEW_ADDR && dirty == pgofs) || - is_valid_data_blkaddr(sbi, blkaddr)) + __is_valid_data_blkaddr(blkaddr)) return true; break; case SEEK_HOLE: @@ -422,7 +424,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) if (__is_valid_data_blkaddr(blkaddr) && !f2fs_is_valid_blkaddr(F2FS_I_SB(inode), - blkaddr, DATA_GENERIC)) { + blkaddr, DATA_GENERIC_ENHANCE)) { f2fs_put_dnode(&dn); goto fail; } @@ -523,7 +525,8 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) f2fs_set_data_blkaddr(dn); if (__is_valid_data_blkaddr(blkaddr) && - !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) + !f2fs_is_valid_blkaddr(sbi, blkaddr, + DATA_GENERIC_ENHANCE)) continue; f2fs_invalidate_blocks(sbi, blkaddr); @@ -552,7 +555,7 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) void f2fs_truncate_data_blocks(struct dnode_of_data *dn) { - f2fs_truncate_data_blocks_range(dn, ADDRS_PER_BLOCK); + f2fs_truncate_data_blocks_range(dn, ADDRS_PER_BLOCK(dn->inode)); } static int truncate_partial_data_page(struct inode *inode, u64 from, @@ -1006,7 +1009,8 @@ next_dnode: } else if (ret == -ENOENT) { if (dn.max_level == 0) return -ENOENT; - done = min((pgoff_t)ADDRS_PER_BLOCK - dn.ofs_in_node, len); + done = min((pgoff_t)ADDRS_PER_BLOCK(inode) - dn.ofs_in_node, + len); blkaddr += done; do_replace += done; goto next; @@ -1017,6 +1021,14 @@ next_dnode: for (i = 0; i < done; i++, blkaddr++, do_replace++, dn.ofs_in_node++) { *blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node); + + if (__is_valid_data_blkaddr(*blkaddr) && + !f2fs_is_valid_blkaddr(sbi, *blkaddr, + DATA_GENERIC_ENHANCE)) { + f2fs_put_dnode(&dn); + return -EFAULT; + } + if (!f2fs_is_checkpointed_data(sbi, *blkaddr)) { if (test_opt(sbi, LFS)) { @@ -1157,7 +1169,7 @@ static int __exchange_data_block(struct inode *src_inode, int ret; while (len) { - olen = min((pgoff_t)4 * ADDRS_PER_BLOCK, len); + olen = min((pgoff_t)4 * ADDRS_PER_BLOCK(src_inode), len); src_blkaddr = f2fs_kvzalloc(F2FS_I_SB(src_inode), array_size(olen, sizeof(block_t)), @@ -2573,10 +2585,10 @@ static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg) sizeof(range))) return -EFAULT; - if (sbi->s_ndevs <= 1 || sbi->s_ndevs - 1 <= range.dev_num || + if (!f2fs_is_multi_device(sbi) || sbi->s_ndevs - 1 <= range.dev_num || __is_large_section(sbi)) { f2fs_msg(sbi->sb, KERN_WARNING, - "Can't flush %u in %d for segs_per_sec %u != 1\n", + "Can't flush %u in %d for segs_per_sec %u != 1", range.dev_num, sbi->s_ndevs, sbi->segs_per_sec); return -EINVAL; @@ -2858,7 +2870,7 @@ int f2fs_pin_file_control(struct inode *inode, bool inc) if (fi->i_gc_failures[GC_FAILURE_PIN] > sbi->gc_pin_file_threshold) { f2fs_msg(sbi->sb, KERN_WARNING, - "%s: Enable GC = ino %lx after %x GC trials\n", + "%s: Enable GC = ino %lx after %x GC trials", __func__, inode->i_ino, fi->i_gc_failures[GC_FAILURE_PIN]); clear_inode_flag(inode, FI_PIN_FILE); @@ -3035,15 +3047,21 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) struct inode *inode = file_inode(file); ssize_t ret; - if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) - return -EIO; + if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) { + ret = -EIO; + goto out; + } - if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT)) - return -EINVAL; + if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT)) { + ret = -EINVAL; + goto out; + } if (!inode_trylock(inode)) { - if (iocb->ki_flags & IOCB_NOWAIT) - return -EAGAIN; + if (iocb->ki_flags & IOCB_NOWAIT) { + ret = -EAGAIN; + goto out; + } inode_lock(inode); } @@ -3056,19 +3074,16 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (iov_iter_fault_in_readable(from, iov_iter_count(from))) set_inode_flag(inode, FI_NO_PREALLOC); - if ((iocb->ki_flags & IOCB_NOWAIT) && - (iocb->ki_flags & IOCB_DIRECT)) { - if (!f2fs_overwrite_io(inode, iocb->ki_pos, + if ((iocb->ki_flags & IOCB_NOWAIT)) { + if (!f2fs_overwrite_io(inode, iocb->ki_pos, iov_iter_count(from)) || - f2fs_has_inline_data(inode) || - f2fs_force_buffered_io(inode, - iocb, from)) { - clear_inode_flag(inode, - FI_NO_PREALLOC); - inode_unlock(inode); - return -EAGAIN; - } - + f2fs_has_inline_data(inode) || + f2fs_force_buffered_io(inode, iocb, from)) { + clear_inode_flag(inode, FI_NO_PREALLOC); + inode_unlock(inode); + ret = -EAGAIN; + goto out; + } } else { preallocated = true; target_size = iocb->ki_pos + iov_iter_count(from); @@ -3077,7 +3092,8 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (err) { clear_inode_flag(inode, FI_NO_PREALLOC); inode_unlock(inode); - return err; + ret = err; + goto out; } } ret = __generic_file_write_iter(iocb, from); @@ -3091,7 +3107,9 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) f2fs_update_iostat(F2FS_I_SB(inode), APP_WRITE_IO, ret); } inode_unlock(inode); - +out: + trace_f2fs_file_write_iter(inode, iocb->ki_pos, + iov_iter_count(from), ret); if (ret > 0) ret = generic_write_sync(iocb, ret); return ret; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 195cf0f9d9ef..963fb4571fd9 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -591,7 +591,7 @@ block_t f2fs_start_bidx_of_node(unsigned int node_ofs, struct inode *inode) int dec = (node_ofs - indirect_blks - 3) / (NIDS_PER_BLOCK + 1); bidx = node_ofs - 5 - dec; } - return bidx * ADDRS_PER_BLOCK + ADDRS_PER_INODE(inode); + return bidx * ADDRS_PER_BLOCK(inode) + ADDRS_PER_INODE(inode); } static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, @@ -656,6 +656,11 @@ static int ra_data_block(struct inode *inode, pgoff_t index) if (f2fs_lookup_extent_cache(inode, index, &ei)) { dn.data_blkaddr = ei.blk + index - ei.fofs; + if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr, + DATA_GENERIC_ENHANCE_READ))) { + err = -EFAULT; + goto put_page; + } goto got_it; } @@ -665,8 +670,12 @@ static int ra_data_block(struct inode *inode, pgoff_t index) goto put_page; f2fs_put_dnode(&dn); + if (!__is_valid_data_blkaddr(dn.data_blkaddr)) { + err = -ENOENT; + goto put_page; + } if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr, - DATA_GENERIC))) { + DATA_GENERIC_ENHANCE))) { err = -EFAULT; goto put_page; } @@ -1175,6 +1184,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, "type [%d, %d] in SSA and SIT", segno, type, GET_SUM_TYPE((&sum->footer))); set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_stop_checkpoint(sbi, false); goto skip; } @@ -1346,7 +1356,7 @@ void f2fs_build_gc_manager(struct f2fs_sb_info *sbi) sbi->gc_pin_file_threshold = DEF_GC_FAILED_PINNED_FILES; /* give warm/cold data area from slower device */ - if (sbi->s_ndevs && !__is_large_section(sbi)) + if (f2fs_is_multi_device(sbi) && !__is_large_section(sbi)) SIT_I(sbi)->last_victim[ALLOC_NEXT] = GET_SEGNO(sbi, FDEV(0).end_blk) + 1; } diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index bb6a152310ef..404d2462a0fe 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -420,6 +420,14 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage, stat_dec_inline_dir(dir); clear_inode_flag(dir, FI_INLINE_DENTRY); + /* + * should retrieve reserved space which was used to keep + * inline_dentry's structure for backward compatibility. + */ + if (!f2fs_sb_has_flexible_inline_xattr(F2FS_I_SB(dir)) && + !f2fs_has_inline_xattr(dir)) + F2FS_I(dir)->i_inline_xattr_size = 0; + f2fs_i_depth_write(dir, 1); if (i_size_read(dir) < PAGE_SIZE) f2fs_i_size_write(dir, PAGE_SIZE); @@ -501,6 +509,15 @@ static int f2fs_move_rehashed_dirents(struct inode *dir, struct page *ipage, stat_dec_inline_dir(dir); clear_inode_flag(dir, FI_INLINE_DENTRY); + + /* + * should retrieve reserved space which was used to keep + * inline_dentry's structure for backward compatibility. + */ + if (!f2fs_sb_has_flexible_inline_xattr(F2FS_I_SB(dir)) && + !f2fs_has_inline_xattr(dir)) + F2FS_I(dir)->i_inline_xattr_size = 0; + kvfree(backup_dentry); return 0; recover: diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index e7f2e8759315..ccb02226dd2c 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -73,7 +73,7 @@ static int __written_first_block(struct f2fs_sb_info *sbi, if (!__is_valid_data_blkaddr(addr)) return 1; - if (!f2fs_is_valid_blkaddr(sbi, addr, DATA_GENERIC)) + if (!f2fs_is_valid_blkaddr(sbi, addr, DATA_GENERIC_ENHANCE)) return -EFAULT; return 0; } @@ -177,8 +177,8 @@ bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct page *page) if (provided != calculated) f2fs_msg(sbi->sb, KERN_WARNING, - "checksum invalid, ino = %x, %x vs. %x", - ino_of_node(page), provided, calculated); + "checksum invalid, nid = %lu, ino_of_node = %x, %x vs. %x", + page->index, ino_of_node(page), provided, calculated); return provided == calculated; } @@ -267,9 +267,10 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) struct extent_info *ei = &F2FS_I(inode)->extent_tree->largest; if (ei->len && - (!f2fs_is_valid_blkaddr(sbi, ei->blk, DATA_GENERIC) || + (!f2fs_is_valid_blkaddr(sbi, ei->blk, + DATA_GENERIC_ENHANCE) || !f2fs_is_valid_blkaddr(sbi, ei->blk + ei->len - 1, - DATA_GENERIC))) { + DATA_GENERIC_ENHANCE))) { set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_msg(sbi->sb, KERN_WARNING, "%s: inode (ino=%lx) extent info [%u, %u, %u] " @@ -488,6 +489,7 @@ make_now: return inode; bad_inode: + f2fs_inode_synced(inode); iget_failed(inode); trace_f2fs_iget_exit(inode, ret); return ERR_PTR(ret); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index c3e8a901d47a..0f77f9242751 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -143,7 +143,7 @@ fail_drop: return ERR_PTR(err); } -static int is_extension_exist(const unsigned char *s, const char *sub) +static inline int is_extension_exist(const unsigned char *s, const char *sub) { size_t slen = strlen(s); size_t sublen = strlen(sub); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index d6e48a6487d5..18a038a2a9fa 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -454,7 +454,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, new_blkaddr == NULL_ADDR); f2fs_bug_on(sbi, nat_get_blkaddr(e) == NEW_ADDR && new_blkaddr == NEW_ADDR); - f2fs_bug_on(sbi, is_valid_data_blkaddr(sbi, nat_get_blkaddr(e)) && + f2fs_bug_on(sbi, __is_valid_data_blkaddr(nat_get_blkaddr(e)) && new_blkaddr == NEW_ADDR); /* increment version no as node is removed */ @@ -465,7 +465,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, /* change address */ nat_set_blkaddr(e, new_blkaddr); - if (!is_valid_data_blkaddr(sbi, new_blkaddr)) + if (!__is_valid_data_blkaddr(new_blkaddr)) set_nat_flag(e, IS_CHECKPOINTED, false); __set_nat_cache_dirty(nm_i, e); @@ -526,6 +526,7 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct f2fs_nat_entry ne; struct nat_entry *e; pgoff_t index; + block_t blkaddr; int i; ni->nid = nid; @@ -569,6 +570,11 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid, node_info_from_raw_nat(ni, &ne); f2fs_put_page(page, 1); cache: + blkaddr = le32_to_cpu(ne.block_addr); + if (__is_valid_data_blkaddr(blkaddr) && + !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) + return -EFAULT; + /* cache nat entry */ cache_nat_entry(sbi, nid, &ne); return 0; @@ -600,9 +606,9 @@ static void f2fs_ra_node_pages(struct page *parent, int start, int n) pgoff_t f2fs_get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs) { const long direct_index = ADDRS_PER_INODE(dn->inode); - const long direct_blks = ADDRS_PER_BLOCK; - const long indirect_blks = ADDRS_PER_BLOCK * NIDS_PER_BLOCK; - unsigned int skipped_unit = ADDRS_PER_BLOCK; + const long direct_blks = ADDRS_PER_BLOCK(dn->inode); + const long indirect_blks = ADDRS_PER_BLOCK(dn->inode) * NIDS_PER_BLOCK; + unsigned int skipped_unit = ADDRS_PER_BLOCK(dn->inode); int cur_level = dn->cur_level; int max_level = dn->max_level; pgoff_t base = 0; @@ -638,9 +644,9 @@ static int get_node_path(struct inode *inode, long block, int offset[4], unsigned int noffset[4]) { const long direct_index = ADDRS_PER_INODE(inode); - const long direct_blks = ADDRS_PER_BLOCK; + const long direct_blks = ADDRS_PER_BLOCK(inode); const long dptrs_per_blk = NIDS_PER_BLOCK; - const long indirect_blks = ADDRS_PER_BLOCK * NIDS_PER_BLOCK; + const long indirect_blks = ADDRS_PER_BLOCK(inode) * NIDS_PER_BLOCK; const long dindirect_blks = indirect_blks * NIDS_PER_BLOCK; int n = 0; int level = 0; @@ -1181,8 +1187,14 @@ int f2fs_remove_inode_page(struct inode *inode) f2fs_put_dnode(&dn); return -EIO; } - f2fs_bug_on(F2FS_I_SB(inode), - inode->i_blocks != 0 && inode->i_blocks != 8); + + if (unlikely(inode->i_blocks != 0 && inode->i_blocks != 8)) { + f2fs_msg(F2FS_I_SB(inode)->sb, KERN_WARNING, + "Inconsistent i_blocks, ino:%lu, iblocks:%llu", + inode->i_ino, + (unsigned long long)inode->i_blocks); + set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK); + } /* will put inode & node pages */ err = truncate_node(&dn); @@ -1277,9 +1289,10 @@ static int read_node_page(struct page *page, int op_flags) int err; if (PageUptodate(page)) { -#ifdef CONFIG_F2FS_CHECK_FS - f2fs_bug_on(sbi, !f2fs_inode_chksum_verify(sbi, page)); -#endif + if (!f2fs_inode_chksum_verify(sbi, page)) { + ClearPageUptodate(page); + return -EBADMSG; + } return LOCKED_PAGE; } @@ -1543,7 +1556,8 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, } if (__is_valid_data_blkaddr(ni.blk_addr) && - !f2fs_is_valid_blkaddr(sbi, ni.blk_addr, DATA_GENERIC)) { + !f2fs_is_valid_blkaddr(sbi, ni.blk_addr, + DATA_GENERIC_ENHANCE)) { up_read(&sbi->node_write); goto redirty_out; } @@ -2078,6 +2092,9 @@ static bool add_free_nid(struct f2fs_sb_info *sbi, if (unlikely(nid == 0)) return false; + if (unlikely(f2fs_check_nid_range(sbi, nid))) + return false; + i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS); i->nid = nid; i->state = FREE_NID; diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index e3883db868d8..e04f82b3f4fc 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -325,8 +325,10 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, break; } - if (!is_recoverable_dnode(page)) + if (!is_recoverable_dnode(page)) { + f2fs_put_page(page, 1); break; + } if (!is_fsync_dnode(page)) goto next; @@ -338,8 +340,10 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, if (!check_only && IS_INODE(page) && is_dent_dnode(page)) { err = f2fs_recover_inode_page(sbi, page); - if (err) + if (err) { + f2fs_put_page(page, 1); break; + } quota_inode = true; } @@ -355,6 +359,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, err = 0; goto next; } + f2fs_put_page(page, 1); break; } } @@ -370,6 +375,7 @@ next: "%s: detect looped node chain, " "blkaddr:%u, next:%u", __func__, blkaddr, next_blkaddr_of_node(page)); + f2fs_put_page(page, 1); err = -EINVAL; break; } @@ -380,7 +386,6 @@ next: f2fs_ra_meta_pages_cond(sbi, blkaddr); } - f2fs_put_page(page, 1); return err; } @@ -546,7 +551,15 @@ retry_dn: goto err; f2fs_bug_on(sbi, ni.ino != ino_of_node(page)); - f2fs_bug_on(sbi, ofs_of_node(dn.node_page) != ofs_of_node(page)); + + if (ofs_of_node(dn.node_page) != ofs_of_node(page)) { + f2fs_msg(sbi->sb, KERN_WARNING, + "Inconsistent ofs_of_node, ino:%lu, ofs:%u, %u", + inode->i_ino, ofs_of_node(dn.node_page), + ofs_of_node(page)); + err = -EFAULT; + goto err; + } for (; start < end; start++, dn.ofs_in_node++) { block_t src, dest; @@ -554,6 +567,18 @@ retry_dn: src = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node); dest = datablock_addr(dn.inode, page, dn.ofs_in_node); + if (__is_valid_data_blkaddr(src) && + !f2fs_is_valid_blkaddr(sbi, src, META_POR)) { + err = -EFAULT; + goto err; + } + + if (__is_valid_data_blkaddr(dest) && + !f2fs_is_valid_blkaddr(sbi, dest, META_POR)) { + err = -EFAULT; + goto err; + } + /* skip recovering if dest is the same as src */ if (src == dest) continue; @@ -666,8 +691,10 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, */ if (IS_INODE(page)) { err = recover_inode(entry->inode, page); - if (err) + if (err) { + f2fs_put_page(page, 1); break; + } } if (entry->last_dentry == blkaddr) { err = recover_dentry(entry->inode, page, dir_list); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index aa7fe79b62b2..8dee063c833f 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -580,7 +580,7 @@ static int submit_flush_wait(struct f2fs_sb_info *sbi, nid_t ino) int ret = 0; int i; - if (!sbi->s_ndevs) + if (!f2fs_is_multi_device(sbi)) return __submit_flush_wait(sbi, sbi->sb->s_bdev); for (i = 0; i < sbi->s_ndevs; i++) { @@ -648,7 +648,8 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino) return ret; } - if (atomic_inc_return(&fcc->queued_flush) == 1 || sbi->s_ndevs > 1) { + if (atomic_inc_return(&fcc->queued_flush) == 1 || + f2fs_is_multi_device(sbi)) { ret = submit_flush_wait(sbi, ino); atomic_dec(&fcc->queued_flush); @@ -754,7 +755,7 @@ int f2fs_flush_device_cache(struct f2fs_sb_info *sbi) { int ret = 0, i; - if (!sbi->s_ndevs) + if (!f2fs_is_multi_device(sbi)) return 0; for (i = 1; i < sbi->s_ndevs; i++) { @@ -1367,9 +1368,12 @@ static int __queue_discard_cmd(struct f2fs_sb_info *sbi, { block_t lblkstart = blkstart; + if (!f2fs_bdev_support_discard(bdev)) + return 0; + trace_f2fs_queue_discard(bdev, blkstart, blklen); - if (sbi->s_ndevs) { + if (f2fs_is_multi_device(sbi)) { int devi = f2fs_target_device_index(sbi, blkstart); blkstart -= FDEV(devi).start_blk; @@ -1732,42 +1736,36 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi, block_t lblkstart = blkstart; int devi = 0; - if (sbi->s_ndevs) { + if (f2fs_is_multi_device(sbi)) { devi = f2fs_target_device_index(sbi, blkstart); + if (blkstart < FDEV(devi).start_blk || + blkstart > FDEV(devi).end_blk) { + f2fs_msg(sbi->sb, KERN_ERR, "Invalid block %x", + blkstart); + return -EIO; + } blkstart -= FDEV(devi).start_blk; } - /* - * We need to know the type of the zone: for conventional zones, - * use regular discard if the drive supports it. For sequential - * zones, reset the zone write pointer. - */ - switch (get_blkz_type(sbi, bdev, blkstart)) { - - case BLK_ZONE_TYPE_CONVENTIONAL: - if (!blk_queue_discard(bdev_get_queue(bdev))) - return 0; - return __queue_discard_cmd(sbi, bdev, lblkstart, blklen); - case BLK_ZONE_TYPE_SEQWRITE_REQ: - case BLK_ZONE_TYPE_SEQWRITE_PREF: + /* For sequential zones, reset the zone write pointer */ + if (f2fs_blkz_is_seq(sbi, devi, blkstart)) { sector = SECTOR_FROM_BLOCK(blkstart); nr_sects = SECTOR_FROM_BLOCK(blklen); if (sector & (bdev_zone_sectors(bdev) - 1) || nr_sects != bdev_zone_sectors(bdev)) { - f2fs_msg(sbi->sb, KERN_INFO, - "(%d) %s: Unaligned discard attempted (block %x + %x)", + f2fs_msg(sbi->sb, KERN_ERR, + "(%d) %s: Unaligned zone reset attempted (block %x + %x)", devi, sbi->s_ndevs ? FDEV(devi).path: "", blkstart, blklen); return -EIO; } trace_f2fs_issue_reset_zone(bdev, blkstart); - return blkdev_reset_zones(bdev, sector, - nr_sects, GFP_NOFS); - default: - /* Unknown zone type: broken device ? */ - return -EIO; + return blkdev_reset_zones(bdev, sector, nr_sects, GFP_NOFS); } + + /* For conventional zones, use regular discard if supported */ + return __queue_discard_cmd(sbi, bdev, lblkstart, blklen); } #endif @@ -1775,8 +1773,7 @@ static int __issue_discard_async(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t blkstart, block_t blklen) { #ifdef CONFIG_BLK_DEV_ZONED - if (f2fs_sb_has_blkzoned(sbi) && - bdev_zoned_model(bdev) != BLK_ZONED_NONE) + if (f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(bdev)) return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen); #endif return __queue_discard_cmd(sbi, bdev, blkstart, blklen); @@ -2172,8 +2169,11 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) * before, we must track that to know how much space we * really have. */ - if (f2fs_test_bit(offset, se->ckpt_valid_map)) + if (f2fs_test_bit(offset, se->ckpt_valid_map)) { + spin_lock(&sbi->stat_lock); sbi->unusable_block_count++; + spin_unlock(&sbi->stat_lock); + } } if (f2fs_test_and_clear_bit(offset, se->discard_map)) @@ -2220,7 +2220,7 @@ bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr) struct seg_entry *se; bool is_cp = false; - if (!is_valid_data_blkaddr(sbi, blkaddr)) + if (!__is_valid_data_blkaddr(blkaddr)) return true; down_read(&sit_i->sentry_lock); @@ -3089,7 +3089,7 @@ static void update_device_state(struct f2fs_io_info *fio) struct f2fs_sb_info *sbi = fio->sbi; unsigned int devidx; - if (!sbi->s_ndevs) + if (!f2fs_is_multi_device(sbi)) return; devidx = f2fs_target_device_index(sbi, fio->new_blkaddr); @@ -3187,13 +3187,18 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio) { int err; struct f2fs_sb_info *sbi = fio->sbi; + unsigned int segno; fio->new_blkaddr = fio->old_blkaddr; /* i/o temperature is needed for passing down write hints */ __get_segment_type(fio); - f2fs_bug_on(sbi, !IS_DATASEG(get_seg_entry(sbi, - GET_SEGNO(sbi, fio->new_blkaddr))->type)); + segno = GET_SEGNO(sbi, fio->new_blkaddr); + + if (!IS_DATASEG(get_seg_entry(sbi, segno)->type)) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + return -EFAULT; + } stat_inc_inplace_blocks(fio->sbi); @@ -3336,7 +3341,7 @@ void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr) if (!f2fs_post_read_required(inode)) return; - if (!is_valid_data_blkaddr(sbi, blkaddr)) + if (!__is_valid_data_blkaddr(blkaddr)) return; cpage = find_lock_page(META_MAPPING(sbi), blkaddr); diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 5c7ed0442d6e..429007b8036e 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -82,7 +82,7 @@ (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & ((sbi)->blocks_per_seg - 1)) #define GET_SEGNO(sbi, blk_addr) \ - ((!is_valid_data_blkaddr(sbi, blk_addr)) ? \ + ((!__is_valid_data_blkaddr(blk_addr)) ? \ NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \ GET_SEGNO_FROM_SEG0(sbi, blk_addr))) #define BLKS_PER_SEC(sbi) \ @@ -656,14 +656,15 @@ static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno) f2fs_bug_on(sbi, segno > TOTAL_SEGS(sbi) - 1); } -static inline void verify_block_addr(struct f2fs_io_info *fio, block_t blk_addr) +static inline void verify_fio_blkaddr(struct f2fs_io_info *fio) { struct f2fs_sb_info *sbi = fio->sbi; - if (__is_meta_io(fio)) - verify_blkaddr(sbi, blk_addr, META_GENERIC); - else - verify_blkaddr(sbi, blk_addr, DATA_GENERIC); + if (__is_valid_data_blkaddr(fio->old_blkaddr)) + verify_blkaddr(sbi, fio->old_blkaddr, __is_meta_io(fio) ? + META_GENERIC : DATA_GENERIC); + verify_blkaddr(sbi, fio->new_blkaddr, __is_meta_io(fio) ? + META_GENERIC : DATA_GENERIC_ENHANCE); } /* @@ -672,7 +673,6 @@ static inline void verify_block_addr(struct f2fs_io_info *fio, block_t blk_addr) static inline int check_block_count(struct f2fs_sb_info *sbi, int segno, struct f2fs_sit_entry *raw_sit) { -#ifdef CONFIG_F2FS_CHECK_FS bool is_valid = test_bit_le(0, raw_sit->valid_map) ? true : false; int valid_blocks = 0; int cur_pos = 0, next_pos; @@ -699,7 +699,7 @@ static inline int check_block_count(struct f2fs_sb_info *sbi, set_sbi_flag(sbi, SBI_NEED_FSCK); return -EINVAL; } -#endif + /* check segment usage, and check boundary of a given segment number */ if (unlikely(GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg || segno > TOTAL_SEGS(sbi) - 1)) { diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 4c55d2ea9df3..6b959bbb336a 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1019,7 +1019,7 @@ static void destroy_device_list(struct f2fs_sb_info *sbi) for (i = 0; i < sbi->s_ndevs; i++) { blkdev_put(FDEV(i).bdev, FMODE_EXCL); #ifdef CONFIG_BLK_DEV_ZONED - kvfree(FDEV(i).blkz_type); + kvfree(FDEV(i).blkz_seq); #endif } kvfree(sbi->devs); @@ -1221,10 +1221,13 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_blocks = total_count - start_count; buf->f_bfree = user_block_count - valid_user_blocks(sbi) - sbi->current_reserved_blocks; + + spin_lock(&sbi->stat_lock); if (unlikely(buf->f_bfree <= sbi->unusable_block_count)) buf->f_bfree = 0; else buf->f_bfree -= sbi->unusable_block_count; + spin_unlock(&sbi->stat_lock); if (buf->f_bfree > F2FS_OPTION(sbi).root_reserved_blocks) buf->f_bavail = buf->f_bfree - @@ -1499,9 +1502,15 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) mutex_lock(&sbi->gc_mutex); cpc.reason = CP_PAUSE; set_sbi_flag(sbi, SBI_CP_DISABLED); - f2fs_write_checkpoint(sbi, &cpc); + err = f2fs_write_checkpoint(sbi, &cpc); + if (err) + goto out_unlock; + spin_lock(&sbi->stat_lock); sbi->unusable_block_count = 0; + spin_unlock(&sbi->stat_lock); + +out_unlock: mutex_unlock(&sbi->gc_mutex); restore_flag: sbi->sb->s_flags = s_flags; /* Restore MS_RDONLY status */ @@ -2271,7 +2280,7 @@ static const struct export_operations f2fs_export_ops = { static loff_t max_file_blocks(void) { loff_t result = 0; - loff_t leaf_count = ADDRS_PER_BLOCK; + loff_t leaf_count = DEF_ADDRS_PER_BLOCK; /* * note: previously, result is equal to (DEF_ADDRS_PER_INODE - @@ -2449,7 +2458,7 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, /* Currently, support only 4KB page cache size */ if (F2FS_BLKSIZE != PAGE_SIZE) { f2fs_msg(sb, KERN_INFO, - "Invalid page_cache_size (%lu), supports only 4KB\n", + "Invalid page_cache_size (%lu), supports only 4KB", PAGE_SIZE); return 1; } @@ -2458,7 +2467,7 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, blocksize = 1 << le32_to_cpu(raw_super->log_blocksize); if (blocksize != F2FS_BLKSIZE) { f2fs_msg(sb, KERN_INFO, - "Invalid blocksize (%u), supports only 4KB\n", + "Invalid blocksize (%u), supports only 4KB", blocksize); return 1; } @@ -2466,7 +2475,7 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, /* check log blocks per segment */ if (le32_to_cpu(raw_super->log_blocks_per_seg) != 9) { f2fs_msg(sb, KERN_INFO, - "Invalid log blocks per segment (%u)\n", + "Invalid log blocks per segment (%u)", le32_to_cpu(raw_super->log_blocks_per_seg)); return 1; } @@ -2587,7 +2596,8 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi) unsigned int log_blocks_per_seg; unsigned int segment_count_main; unsigned int cp_pack_start_sum, cp_payload; - block_t user_block_count; + block_t user_block_count, valid_user_blocks; + block_t avail_node_count, valid_node_count; int i, j; total = le32_to_cpu(raw_super->segment_count); @@ -2622,6 +2632,24 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi) return 1; } + valid_user_blocks = le64_to_cpu(ckpt->valid_block_count); + if (valid_user_blocks > user_block_count) { + f2fs_msg(sbi->sb, KERN_ERR, + "Wrong valid_user_blocks: %u, user_block_count: %u", + valid_user_blocks, user_block_count); + return 1; + } + + valid_node_count = le32_to_cpu(ckpt->valid_node_count); + avail_node_count = sbi->total_node_count - sbi->nquota_files - + F2FS_RESERVED_NODE_NUM; + if (valid_node_count > avail_node_count) { + f2fs_msg(sbi->sb, KERN_ERR, + "Wrong valid_node_count: %u, avail_node_count: %u", + valid_node_count, avail_node_count); + return 1; + } + main_segs = le32_to_cpu(raw_super->segment_count_main); blocks_per_seg = sbi->blocks_per_seg; @@ -2793,9 +2821,11 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi) if (nr_sectors & (bdev_zone_sectors(bdev) - 1)) FDEV(devi).nr_blkz++; - FDEV(devi).blkz_type = f2fs_kmalloc(sbi, FDEV(devi).nr_blkz, - GFP_KERNEL); - if (!FDEV(devi).blkz_type) + FDEV(devi).blkz_seq = f2fs_kzalloc(sbi, + BITS_TO_LONGS(FDEV(devi).nr_blkz) + * sizeof(unsigned long), + GFP_KERNEL); + if (!FDEV(devi).blkz_seq) return -ENOMEM; #define F2FS_REPORT_NR_ZONES 4096 @@ -2822,7 +2852,8 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi) } for (i = 0; i < nr_zones; i++) { - FDEV(devi).blkz_type[n] = zones[i].type; + if (zones[i].type != BLK_ZONE_TYPE_CONVENTIONAL) + set_bit(n, FDEV(devi).blkz_seq); sector += zones[i].len; n++; } @@ -3105,7 +3136,7 @@ try_onemore: #ifndef CONFIG_BLK_DEV_ZONED if (f2fs_sb_has_blkzoned(sbi)) { f2fs_msg(sb, KERN_ERR, - "Zoned block device support is not enabled\n"); + "Zoned block device support is not enabled"); err = -EOPNOTSUPP; goto free_sb_buf; } @@ -3350,10 +3381,17 @@ try_onemore: * mount should be failed, when device has readonly mode, and * previous checkpoint was not done by clean system shutdown. */ - if (bdev_read_only(sb->s_bdev) && - !is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) { - err = -EROFS; - goto free_meta; + if (f2fs_hw_is_readonly(sbi)) { + if (!is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) { + err = -EROFS; + f2fs_msg(sb, KERN_ERR, + "Need to recover fsync data, but " + "write access unavailable"); + goto free_meta; + } + f2fs_msg(sbi->sb, KERN_INFO, "write access " + "unavailable, skipping recovery"); + goto reset_checkpoint; } if (need_fsck) diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 848a785abe25..e791741d193b 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -202,12 +202,17 @@ static inline const struct xattr_handler *f2fs_xattr_handler(int index) return handler; } -static struct f2fs_xattr_entry *__find_xattr(void *base_addr, int index, - size_t len, const char *name) +static struct f2fs_xattr_entry *__find_xattr(void *base_addr, + void *last_base_addr, int index, + size_t len, const char *name) { struct f2fs_xattr_entry *entry; list_for_each_xattr(entry, base_addr) { + if ((void *)(entry) + sizeof(__u32) > last_base_addr || + (void *)XATTR_NEXT_ENTRY(entry) > last_base_addr) + return NULL; + if (entry->e_name_index != index) continue; if (entry->e_name_len != len) @@ -297,20 +302,22 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage, const char *name, struct f2fs_xattr_entry **xe, void **base_addr, int *base_size) { - void *cur_addr, *txattr_addr, *last_addr = NULL; + void *cur_addr, *txattr_addr, *last_txattr_addr; + void *last_addr = NULL; nid_t xnid = F2FS_I(inode)->i_xattr_nid; - unsigned int size = xnid ? VALID_XATTR_BLOCK_SIZE : 0; unsigned int inline_size = inline_xattr_size(inode); int err = 0; - if (!size && !inline_size) + if (!xnid && !inline_size) return -ENODATA; - *base_size = inline_size + size + XATTR_PADDING_SIZE; + *base_size = XATTR_SIZE(xnid, inode) + XATTR_PADDING_SIZE; txattr_addr = f2fs_kzalloc(F2FS_I_SB(inode), *base_size, GFP_NOFS); if (!txattr_addr) return -ENOMEM; + last_txattr_addr = (void *)txattr_addr + XATTR_SIZE(xnid, inode); + /* read from inline xattr */ if (inline_size) { err = read_inline_xattr(inode, ipage, txattr_addr); @@ -337,7 +344,11 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage, else cur_addr = txattr_addr; - *xe = __find_xattr(cur_addr, index, len, name); + *xe = __find_xattr(cur_addr, last_txattr_addr, index, len, name); + if (!*xe) { + err = -EFAULT; + goto out; + } check: if (IS_XATTR_LAST_ENTRY(*xe)) { err = -ENODATA; @@ -581,7 +592,8 @@ static int __f2fs_setxattr(struct inode *inode, int index, struct page *ipage, int flags) { struct f2fs_xattr_entry *here, *last; - void *base_addr; + void *base_addr, *last_base_addr; + nid_t xnid = F2FS_I(inode)->i_xattr_nid; int found, newsize; size_t len; __u32 new_hsize; @@ -605,8 +617,14 @@ static int __f2fs_setxattr(struct inode *inode, int index, if (error) return error; + last_base_addr = (void *)base_addr + XATTR_SIZE(xnid, inode); + /* find entry with wanted name. */ - here = __find_xattr(base_addr, index, len, name); + here = __find_xattr(base_addr, last_base_addr, index, len, name); + if (!here) { + error = -EFAULT; + goto exit; + } found = IS_XATTR_LAST_ENTRY(here) ? 0 : 1; diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h index 9172ee082ca8..a90920e2f949 100644 --- a/fs/f2fs/xattr.h +++ b/fs/f2fs/xattr.h @@ -71,6 +71,8 @@ struct f2fs_xattr_entry { entry = XATTR_NEXT_ENTRY(entry)) #define VALID_XATTR_BLOCK_SIZE (PAGE_SIZE - sizeof(struct node_footer)) #define XATTR_PADDING_SIZE (sizeof(__u32)) +#define XATTR_SIZE(x,i) (((x) ? VALID_XATTR_BLOCK_SIZE : 0) + \ + (inline_xattr_size(i))) #define MIN_OFFSET(i) XATTR_ALIGN(inline_xattr_size(i) + \ VALID_XATTR_BLOCK_SIZE) diff --git a/fs/fuse/control.c b/fs/fuse/control.c index fe80bea4ad89..14ce1e47f980 100644 --- a/fs/fuse/control.c +++ b/fs/fuse/control.c @@ -10,6 +10,7 @@ #include <linux/init.h> #include <linux/module.h> +#include <linux/fs_context.h> #define FUSE_CTL_SUPER_MAGIC 0x65735543 @@ -317,7 +318,7 @@ void fuse_ctl_remove_conn(struct fuse_conn *fc) drop_nlink(d_inode(fuse_control_sb->s_root)); } -static int fuse_ctl_fill_super(struct super_block *sb, void *data, int silent) +static int fuse_ctl_fill_super(struct super_block *sb, struct fs_context *fctx) { static const struct tree_descr empty_descr = {""}; struct fuse_conn *fc; @@ -343,10 +344,19 @@ static int fuse_ctl_fill_super(struct super_block *sb, void *data, int silent) return 0; } -static struct dentry *fuse_ctl_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data) +static int fuse_ctl_get_tree(struct fs_context *fc) { - return mount_single(fs_type, flags, raw_data, fuse_ctl_fill_super); + return vfs_get_super(fc, vfs_get_single_super, fuse_ctl_fill_super); +} + +static const struct fs_context_operations fuse_ctl_context_ops = { + .get_tree = fuse_ctl_get_tree, +}; + +static int fuse_ctl_init_fs_context(struct fs_context *fc) +{ + fc->ops = &fuse_ctl_context_ops; + return 0; } static void fuse_ctl_kill_sb(struct super_block *sb) @@ -365,7 +375,7 @@ static void fuse_ctl_kill_sb(struct super_block *sb) static struct file_system_type fuse_ctl_fs_type = { .owner = THIS_MODULE, .name = "fusectl", - .mount = fuse_ctl_mount, + .init_fs_context = fuse_ctl_init_fs_context, .kill_sb = fuse_ctl_kill_sb, }; MODULE_ALIAS_FS("fusectl"); diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index 55a26f351467..4b41df1d4642 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c @@ -33,6 +33,8 @@ * closed. */ +#define pr_fmt(fmt) "CUSE: " fmt + #include <linux/fuse.h> #include <linux/cdev.h> #include <linux/device.h> @@ -225,7 +227,7 @@ static int cuse_parse_one(char **pp, char *end, char **keyp, char **valp) return 0; if (end[-1] != '\0') { - printk(KERN_ERR "CUSE: info not properly terminated\n"); + pr_err("info not properly terminated\n"); return -EINVAL; } @@ -242,7 +244,7 @@ static int cuse_parse_one(char **pp, char *end, char **keyp, char **valp) key = strstrip(key); if (!strlen(key)) { - printk(KERN_ERR "CUSE: zero length info key specified\n"); + pr_err("zero length info key specified\n"); return -EINVAL; } @@ -282,12 +284,11 @@ static int cuse_parse_devinfo(char *p, size_t len, struct cuse_devinfo *devinfo) if (strcmp(key, "DEVNAME") == 0) devinfo->name = val; else - printk(KERN_WARNING "CUSE: unknown device info \"%s\"\n", - key); + pr_warn("unknown device info \"%s\"\n", key); } if (!devinfo->name || !strlen(devinfo->name)) { - printk(KERN_ERR "CUSE: DEVNAME unspecified\n"); + pr_err("DEVNAME unspecified\n"); return -EINVAL; } @@ -341,7 +342,7 @@ static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req) else rc = register_chrdev_region(devt, 1, devinfo.name); if (rc) { - printk(KERN_ERR "CUSE: failed to register chrdev region\n"); + pr_err("failed to register chrdev region\n"); goto err; } diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 9971a35cf1ef..24ea19cfe07e 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -906,8 +906,8 @@ static int fuse_check_page(struct page *page) 1 << PG_lru | 1 << PG_active | 1 << PG_reclaim))) { - printk(KERN_WARNING "fuse: trying to steal weird page\n"); - printk(KERN_WARNING " page=%p index=%li flags=%08lx, count=%i, mapcount=%i, mapping=%p\n", page, page->index, page->flags, page_count(page), page_mapcount(page), page->mapping); + pr_warn("trying to steal weird page\n"); + pr_warn(" page=%p index=%li flags=%08lx, count=%i, mapcount=%i, mapping=%p\n", page, page->index, page->flags, page_count(page), page_mapcount(page), page->mapping); return 1; } return 0; @@ -1317,6 +1317,16 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file, unsigned reqsize; unsigned int hash; + /* + * Require sane minimum read buffer - that has capacity for fixed part + * of any request header + negotated max_write room for data. If the + * requirement is not satisfied return EINVAL to the filesystem server + * to indicate that it is not following FUSE server/client contract. + * Don't dequeue / abort any request. + */ + if (nbytes < max_t(size_t, FUSE_MIN_READ_BUFFER, 4096 + fc->max_write)) + return -EINVAL; + restart: spin_lock(&fiq->waitq.lock); err = -EAGAIN; @@ -1749,7 +1759,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, offset = outarg->offset & ~PAGE_MASK; file_size = i_size_read(inode); - num = outarg->size; + num = min(outarg->size, fc->max_write); if (outarg->offset > file_size) num = 0; else if (outarg->offset + num > file_size) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 06096b60f1df..3959f08279e6 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -178,7 +178,9 @@ void fuse_finish_open(struct inode *inode, struct file *file) if (!(ff->open_flags & FOPEN_KEEP_CACHE)) invalidate_inode_pages2(inode->i_mapping); - if (ff->open_flags & FOPEN_NONSEEKABLE) + if (ff->open_flags & FOPEN_STREAM) + stream_open(inode, file); + else if (ff->open_flags & FOPEN_NONSEEKABLE) nonseekable_open(inode, file); if (fc->atomic_o_trunc && (file->f_flags & O_TRUNC)) { struct fuse_inode *fi = get_fuse_inode(inode); @@ -462,7 +464,7 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end, memset(&inarg, 0, sizeof(inarg)); inarg.fh = ff->fh; - inarg.fsync_flags = datasync ? 1 : 0; + inarg.fsync_flags = datasync ? FUSE_FSYNC_FDATASYNC : 0; args.in.h.opcode = opcode; args.in.h.nodeid = get_node_id(inode); args.in.numargs = 1; @@ -1586,7 +1588,7 @@ __acquires(fi->lock) { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); - size_t crop = i_size_read(inode); + loff_t crop = i_size_read(inode); struct fuse_req *req; while (fi->writectr >= 0 && !list_empty(&fi->queued_writes)) { @@ -2576,8 +2578,13 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, #if BITS_PER_LONG == 32 inarg.flags |= FUSE_IOCTL_32BIT; #else - if (flags & FUSE_IOCTL_COMPAT) + if (flags & FUSE_IOCTL_COMPAT) { inarg.flags |= FUSE_IOCTL_32BIT; +#ifdef CONFIG_X86_X32 + if (in_x32_syscall()) + inarg.flags |= FUSE_IOCTL_COMPAT_X32; +#endif + } #endif /* assume all the iovs returned by client always fits in a page */ @@ -3044,6 +3051,13 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, } } + if (!(mode & FALLOC_FL_KEEP_SIZE) && + offset + length > i_size_read(inode)) { + err = inode_newsize_ok(inode, offset + length); + if (err) + return err; + } + if (!(mode & FALLOC_FL_KEEP_SIZE)) set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 0920c0c032a0..24dbca777775 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -9,6 +9,10 @@ #ifndef _FS_FUSE_I_H #define _FS_FUSE_I_H +#ifndef pr_fmt +# define pr_fmt(fmt) "fuse: " fmt +#endif + #include <linux/fuse.h> #include <linux/fs.h> #include <linux/mount.h> @@ -690,6 +694,9 @@ struct fuse_conn { /** Use enhanced/automatic page cache invalidation. */ unsigned auto_inval_data:1; + /** Filesystem is fully reponsible for page cache invalidation. */ + unsigned explicit_inval_data:1; + /** Does the filesystem support readdirplus? */ unsigned do_readdirplus:1; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index f485d09d14df..4bb885b0f032 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -81,14 +81,12 @@ struct fuse_forget_link *fuse_alloc_forget(void) static struct inode *fuse_alloc_inode(struct super_block *sb) { - struct inode *inode; struct fuse_inode *fi; - inode = kmem_cache_alloc(fuse_inode_cachep, GFP_KERNEL); - if (!inode) + fi = kmem_cache_alloc(fuse_inode_cachep, GFP_KERNEL); + if (!fi) return NULL; - fi = get_fuse_inode(inode); fi->i_time = 0; fi->inval_mask = 0; fi->nodeid = 0; @@ -100,11 +98,11 @@ static struct inode *fuse_alloc_inode(struct super_block *sb) spin_lock_init(&fi->lock); fi->forget = fuse_alloc_forget(); if (!fi->forget) { - kmem_cache_free(fuse_inode_cachep, inode); + kmem_cache_free(fuse_inode_cachep, fi); return NULL; } - return inode; + return &fi->inode; } static void fuse_free_inode(struct inode *inode) @@ -233,7 +231,8 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, if (oldsize != attr->size) { truncate_pagecache(inode, attr->size); - inval = true; + if (!fc->explicit_inval_data) + inval = true; } else if (fc->auto_inval_data) { struct timespec64 new_mtime = { .tv_sec = attr->mtime, @@ -908,6 +907,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) fc->dont_mask = 1; if (arg->flags & FUSE_AUTO_INVAL_DATA) fc->auto_inval_data = 1; + else if (arg->flags & FUSE_EXPLICIT_INVAL_DATA) + fc->explicit_inval_data = 1; if (arg->flags & FUSE_DO_READDIRPLUS) { fc->do_readdirplus = 1; if (arg->flags & FUSE_READDIRPLUS_AUTO) @@ -969,7 +970,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT | FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL | FUSE_ABORT_ERROR | FUSE_MAX_PAGES | FUSE_CACHE_SYMLINKS | - FUSE_NO_OPENDIR_SUPPORT; + FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA; req->in.h.opcode = FUSE_INIT; req->in.numargs = 1; req->in.args[0].size = sizeof(*arg); @@ -1393,8 +1394,8 @@ static int __init fuse_init(void) { int res; - printk(KERN_INFO "fuse init (API version %i.%i)\n", - FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION); + pr_info("init (API version %i.%i)\n", + FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION); INIT_LIST_HEAD(&fuse_conn_list); res = fuse_fs_init(); @@ -1430,7 +1431,7 @@ static int __init fuse_init(void) static void __exit fuse_exit(void) { - printk(KERN_DEBUG "fuse exit\n"); + pr_debug("exit\n"); fuse_ctl_cleanup(); fuse_sysfs_cleanup(); diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 1787d295834e..08e4996adc23 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -650,7 +650,6 @@ int gfs2_sys_fs_add(struct gfs2_sbd *sdp) char ro[20]; char spectator[20]; char *envp[] = { ro, spectator, NULL }; - int sysfs_frees_sdp = 0; sprintf(ro, "RDONLY=%d", sb_rdonly(sb)); sprintf(spectator, "SPECTATOR=%d", sdp->sd_args.ar_spectator ? 1 : 0); @@ -661,8 +660,6 @@ int gfs2_sys_fs_add(struct gfs2_sbd *sdp) if (error) goto fail_reg; - sysfs_frees_sdp = 1; /* Freeing sdp is now done by sysfs calling - function gfs2_sbd_release. */ error = sysfs_create_group(&sdp->sd_kobj, &tune_group); if (error) goto fail_reg; @@ -687,10 +684,7 @@ fail_tune: fail_reg: free_percpu(sdp->sd_lkstats); fs_err(sdp, "error %d adding sysfs files\n", error); - if (sysfs_frees_sdp) - kobject_put(&sdp->sd_kobj); - else - kfree(sdp); + kobject_put(&sdp->sd_kobj); sb->s_fs_info = NULL; return error; } diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 5433e37fb0c5..8c7cbac7183c 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -108,6 +108,47 @@ void fsnotify_sb_delete(struct super_block *sb) } /* + * fsnotify_nameremove - a filename was removed from a directory + * + * This is mostly called under parent vfs inode lock so name and + * dentry->d_parent should be stable. However there are some corner cases where + * inode lock is not held. So to be on the safe side and be reselient to future + * callers and out of tree users of d_delete(), we do not assume that d_parent + * and d_name are stable and we use dget_parent() and + * take_dentry_name_snapshot() to grab stable references. + */ +void fsnotify_nameremove(struct dentry *dentry, int isdir) +{ + struct dentry *parent; + struct name_snapshot name; + __u32 mask = FS_DELETE; + + /* d_delete() of pseudo inode? (e.g. __ns_get_path() playing tricks) */ + if (IS_ROOT(dentry)) + return; + + if (isdir) + mask |= FS_ISDIR; + + parent = dget_parent(dentry); + /* Avoid unneeded take_dentry_name_snapshot() */ + if (!(d_inode(parent)->i_fsnotify_mask & FS_DELETE) && + !(dentry->d_sb->s_fsnotify_mask & FS_DELETE)) + goto out_dput; + + take_dentry_name_snapshot(&name, dentry); + + fsnotify(d_inode(parent), mask, d_inode(dentry), FSNOTIFY_EVENT_INODE, + &name.name, 0); + + release_dentry_name_snapshot(&name); + +out_dput: + dput(parent); +} +EXPORT_SYMBOL(fsnotify_nameremove); + +/* * Given an inode, first check if we care what happens to our children. Inotify * and dnotify both tell their parents about events. If we care about any event * on a child we run all of our children and set a dentry flag saying that the diff --git a/fs/notify/mark.c b/fs/notify/mark.c index 22acb0a79b53..b251105f646f 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -619,6 +619,11 @@ restart: /* mark should be the last entry. last is the current last entry */ hlist_add_behind_rcu(&mark->obj_list, &last->obj_list); added: + /* + * Since connector is attached to object using cmpxchg() we are + * guaranteed that connector initialization is fully visible by anyone + * seeing mark->connector set. + */ WRITE_ONCE(mark->connector, conn); out_err: spin_unlock(&conn->lock); diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 68b3303e4b46..56feaa739979 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -909,14 +909,14 @@ static bool ovl_open_need_copy_up(struct dentry *dentry, int flags) return true; } -int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags) +int ovl_maybe_copy_up(struct dentry *dentry, int flags) { int err = 0; - if (ovl_open_need_copy_up(dentry, file_flags)) { + if (ovl_open_need_copy_up(dentry, flags)) { err = ovl_want_write(dentry); if (!err) { - err = ovl_copy_up_flags(dentry, file_flags); + err = ovl_copy_up_flags(dentry, flags); ovl_drop_write(dentry); } } diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 82c129bfe58d..93872bb50230 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -260,7 +260,7 @@ static int ovl_instantiate(struct dentry *dentry, struct inode *inode, * hashed directory inode aliases. */ inode = ovl_get_inode(dentry->d_sb, &oip); - if (WARN_ON(IS_ERR(inode))) + if (IS_ERR(inode)) return PTR_ERR(inode); } else { WARN_ON(ovl_inode_real(inode) != d_inode(newdentry)); diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index 84dd957efa24..540a8b845145 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -11,6 +11,7 @@ #include <linux/mount.h> #include <linux/xattr.h> #include <linux/uio.h> +#include <linux/uaccess.h> #include "overlayfs.h" static char ovl_whatisit(struct inode *inode, struct inode *realinode) @@ -29,10 +30,11 @@ static struct file *ovl_open_realfile(const struct file *file, struct inode *inode = file_inode(file); struct file *realfile; const struct cred *old_cred; + int flags = file->f_flags | O_NOATIME | FMODE_NONOTIFY; old_cred = ovl_override_creds(inode->i_sb); - realfile = open_with_fake_path(&file->f_path, file->f_flags | O_NOATIME, - realinode, current_cred()); + realfile = open_with_fake_path(&file->f_path, flags, realinode, + current_cred()); revert_creds(old_cred); pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n", @@ -50,7 +52,7 @@ static int ovl_change_flags(struct file *file, unsigned int flags) int err; /* No atime modificaton on underlying */ - flags |= O_NOATIME; + flags |= O_NOATIME | FMODE_NONOTIFY; /* If some flag changed that cannot be changed then something's amiss */ if (WARN_ON((file->f_flags ^ flags) & ~OVL_SETFL_MASK)) @@ -116,11 +118,10 @@ static int ovl_real_fdget(const struct file *file, struct fd *real) static int ovl_open(struct inode *inode, struct file *file) { - struct dentry *dentry = file_dentry(file); struct file *realfile; int err; - err = ovl_open_maybe_copy_up(dentry, file->f_flags); + err = ovl_maybe_copy_up(file_dentry(file), file->f_flags); if (err) return err; @@ -145,11 +146,47 @@ static int ovl_release(struct inode *inode, struct file *file) static loff_t ovl_llseek(struct file *file, loff_t offset, int whence) { - struct inode *realinode = ovl_inode_real(file_inode(file)); + struct inode *inode = file_inode(file); + struct fd real; + const struct cred *old_cred; + ssize_t ret; + + /* + * The two special cases below do not need to involve real fs, + * so we can optimizing concurrent callers. + */ + if (offset == 0) { + if (whence == SEEK_CUR) + return file->f_pos; + + if (whence == SEEK_SET) + return vfs_setpos(file, 0, 0); + } + + ret = ovl_real_fdget(file, &real); + if (ret) + return ret; + + /* + * Overlay file f_pos is the master copy that is preserved + * through copy up and modified on read/write, but only real + * fs knows how to SEEK_HOLE/SEEK_DATA and real fs may impose + * limitations that are more strict than ->s_maxbytes for specific + * files, so we use the real file to perform seeks. + */ + inode_lock(inode); + real.file->f_pos = file->f_pos; + + old_cred = ovl_override_creds(inode->i_sb); + ret = vfs_llseek(real.file, offset, whence); + revert_creds(old_cred); + + file->f_pos = real.file->f_pos; + inode_unlock(inode); + + fdput(real); - return generic_file_llseek_size(file, offset, whence, - realinode->i_sb->s_maxbytes, - i_size_read(realinode)); + return ret; } static void ovl_file_accessed(struct file *file) @@ -372,10 +409,68 @@ static long ovl_real_ioctl(struct file *file, unsigned int cmd, return ret; } -static long ovl_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +static unsigned int ovl_get_inode_flags(struct inode *inode) +{ + unsigned int flags = READ_ONCE(inode->i_flags); + unsigned int ovl_iflags = 0; + + if (flags & S_SYNC) + ovl_iflags |= FS_SYNC_FL; + if (flags & S_APPEND) + ovl_iflags |= FS_APPEND_FL; + if (flags & S_IMMUTABLE) + ovl_iflags |= FS_IMMUTABLE_FL; + if (flags & S_NOATIME) + ovl_iflags |= FS_NOATIME_FL; + + return ovl_iflags; +} + +static long ovl_ioctl_set_flags(struct file *file, unsigned long arg) { long ret; struct inode *inode = file_inode(file); + unsigned int flags; + unsigned int old_flags; + + if (!inode_owner_or_capable(inode)) + return -EACCES; + + if (get_user(flags, (int __user *) arg)) + return -EFAULT; + + ret = mnt_want_write_file(file); + if (ret) + return ret; + + inode_lock(inode); + + /* Check the capability before cred override */ + ret = -EPERM; + old_flags = ovl_get_inode_flags(inode); + if (((flags ^ old_flags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) && + !capable(CAP_LINUX_IMMUTABLE)) + goto unlock; + + ret = ovl_maybe_copy_up(file_dentry(file), O_WRONLY); + if (ret) + goto unlock; + + ret = ovl_real_ioctl(file, FS_IOC_SETFLAGS, arg); + + ovl_copyflags(ovl_inode_real(inode), inode); +unlock: + inode_unlock(inode); + + mnt_drop_write_file(file); + + return ret; + +} + +static long ovl_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + long ret; switch (cmd) { case FS_IOC_GETFLAGS: @@ -383,23 +478,7 @@ static long ovl_ioctl(struct file *file, unsigned int cmd, unsigned long arg) break; case FS_IOC_SETFLAGS: - if (!inode_owner_or_capable(inode)) - return -EACCES; - - ret = mnt_want_write_file(file); - if (ret) - return ret; - - ret = ovl_copy_up_with_data(file_dentry(file)); - if (!ret) { - ret = ovl_real_ioctl(file, cmd, arg); - - inode_lock(inode); - ovl_copyflags(ovl_inode_real(inode), inode); - inode_unlock(inode); - } - - mnt_drop_write_file(file); + ret = ovl_ioctl_set_flags(file, arg); break; default: diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 3b7ed5d2279c..b48273e846ad 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -832,7 +832,7 @@ struct inode *ovl_get_inode(struct super_block *sb, int fsid = bylower ? oip->lowerpath->layer->fsid : 0; bool is_dir, metacopy = false; unsigned long ino = 0; - int err = -ENOMEM; + int err = oip->newinode ? -EEXIST : -ENOMEM; if (!realinode) realinode = d_inode(lowerdentry); @@ -917,6 +917,7 @@ out: return inode; out_err: + pr_warn_ratelimited("overlayfs: failed to get inode (%i)\n", err); inode = ERR_PTR(err); goto out; } diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 9c6018287d57..d26efed9f80a 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -421,7 +421,7 @@ extern const struct file_operations ovl_file_operations; int ovl_copy_up(struct dentry *dentry); int ovl_copy_up_with_data(struct dentry *dentry); int ovl_copy_up_flags(struct dentry *dentry, int flags); -int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags); +int ovl_maybe_copy_up(struct dentry *dentry, int flags); int ovl_copy_xattr(struct dentry *old, struct dentry *new); int ovl_set_attr(struct dentry *upper, struct kstat *stat); struct ovl_fh *ovl_encode_real_fh(struct dentry *real, bool is_upper); diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index fc20e06c56ba..9ad72ea7f71f 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -9,7 +9,7 @@ * on the Melbourne quota system as used on BSD derived systems. The internal * implementation is based on one of the several variants of the LINUX * inode-subsystem with added complexity of the diskquota system. - * + * * Author: Marco van Wieringen <mvw@planets.elm.net> * * Fixes: Dmitry Gorodchanin <pgmdsg@ibi.com>, 11 Feb 96 @@ -51,7 +51,7 @@ * Added journalled quota support, fix lock inversion problems * Jan Kara, <jack@suse.cz>, 2003,2004 * - * (C) Copyright 1994 - 1997 Marco van Wieringen + * (C) Copyright 1994 - 1997 Marco van Wieringen */ #include <linux/errno.h> @@ -197,7 +197,7 @@ static struct quota_format_type *find_quota_format(int id) int qm; spin_unlock(&dq_list_lock); - + for (qm = 0; module_names[qm].qm_fmt_id && module_names[qm].qm_fmt_id != id; qm++) ; @@ -424,10 +424,11 @@ int dquot_acquire(struct dquot *dquot) struct quota_info *dqopt = sb_dqopt(dquot->dq_sb); mutex_lock(&dquot->dq_lock); - if (!test_bit(DQ_READ_B, &dquot->dq_flags)) + if (!test_bit(DQ_READ_B, &dquot->dq_flags)) { ret = dqopt->ops[dquot->dq_id.type]->read_dqblk(dquot); - if (ret < 0) - goto out_iolock; + if (ret < 0) + goto out_iolock; + } /* Make sure flags update is visible after dquot has been filled */ smp_mb__before_atomic(); set_bit(DQ_READ_B, &dquot->dq_flags); @@ -1049,7 +1050,9 @@ static void remove_dquot_ref(struct super_block *sb, int type, struct list_head *tofree_head) { struct inode *inode; +#ifdef CONFIG_QUOTA_DEBUG int reserved = 0; +#endif spin_lock(&sb->s_inode_list_lock); list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { @@ -1061,8 +1064,10 @@ static void remove_dquot_ref(struct super_block *sb, int type, */ spin_lock(&dq_data_lock); if (!IS_NOQUOTA(inode)) { +#ifdef CONFIG_QUOTA_DEBUG if (unlikely(inode_get_rsv_space(inode) > 0)) reserved = 1; +#endif remove_inode_dquot_ref(inode, type, tofree_head); } spin_unlock(&dq_data_lock); @@ -1663,7 +1668,7 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags) for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (!dquots[cnt]) continue; - if (flags & DQUOT_SPACE_RESERVE) { + if (reserve) { ret = dquot_add_space(dquots[cnt], 0, number, flags, &warn[cnt]); } else { @@ -1676,13 +1681,11 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags) if (!dquots[cnt]) continue; spin_lock(&dquots[cnt]->dq_dqb_lock); - if (flags & DQUOT_SPACE_RESERVE) { - dquots[cnt]->dq_dqb.dqb_rsvspace -= - number; - } else { - dquots[cnt]->dq_dqb.dqb_curspace -= - number; - } + if (reserve) + dquot_free_reserved_space(dquots[cnt], + number); + else + dquot_decr_space(dquots[cnt], number); spin_unlock(&dquots[cnt]->dq_dqb_lock); } spin_unlock(&inode->i_lock); @@ -1733,7 +1736,7 @@ int dquot_alloc_inode(struct inode *inode) continue; /* Back out changes we already did */ spin_lock(&dquots[cnt]->dq_dqb_lock); - dquots[cnt]->dq_dqb.dqb_curinodes--; + dquot_decr_inodes(dquots[cnt], 1); spin_unlock(&dquots[cnt]->dq_dqb_lock); } goto warn_put_all; @@ -2397,7 +2400,7 @@ out_file_flags: out_fmt: put_quota_format(fmt); - return error; + return error; } /* Reenable quotas on remount RW */ @@ -2775,7 +2778,7 @@ int dquot_get_state(struct super_block *sb, struct qc_state *state) struct qc_type_state *tstate; struct quota_info *dqopt = sb_dqopt(sb); int type; - + memset(state, 0, sizeof(*state)); for (type = 0; type < MAXQUOTAS; type++) { if (!sb_has_quota_active(sb, type)) diff --git a/fs/quota/quota_v1.c b/fs/quota/quota_v1.c index 7ac5298aba70..9f2b2573b83c 100644 --- a/fs/quota/quota_v1.c +++ b/fs/quota/quota_v1.c @@ -127,7 +127,7 @@ static int v1_check_quota_file(struct super_block *sb, int type) { struct inode *inode = sb_dqopt(sb)->files[type]; ulong blocks; - size_t off; + size_t off; struct v2_disk_dqheader dqhead; ssize_t size; loff_t isize; diff --git a/fs/quota/quota_v2.c b/fs/quota/quota_v2.c index a73e5b34db41..3c30034e733f 100644 --- a/fs/quota/quota_v2.c +++ b/fs/quota/quota_v2.c @@ -78,7 +78,7 @@ static int v2_check_quota_file(struct super_block *sb, int type) struct v2_disk_dqheader dqhead; static const uint quota_magics[] = V2_INITQMAGICS; static const uint quota_versions[] = V2_INITQVERSIONS; - + if (v2_read_header(sb, type, &dqhead)) return 0; if (le32_to_cpu(dqhead.dqh_magic) != quota_magics[type] || diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 8a76f9d14bc6..36346dc4cec0 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -1844,7 +1844,7 @@ static int flush_used_journal_lists(struct super_block *s, * removes any nodes in table with name block and dev as bh. * only touchs the hnext and hprev pointers. */ -void remove_journal_hash(struct super_block *sb, +static void remove_journal_hash(struct super_block *sb, struct reiserfs_journal_cnode **table, struct reiserfs_journal_list *jl, unsigned long block, int remove_freed) diff --git a/fs/udf/namei.c b/fs/udf/namei.c index 58cc2414992b..77b6d89b9bcd 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -304,21 +304,6 @@ static struct dentry *udf_lookup(struct inode *dir, struct dentry *dentry, if (dentry->d_name.len > UDF_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); -#ifdef UDF_RECOVERY - /* temporary shorthand for specifying files by inode number */ - if (!strncmp(dentry->d_name.name, ".B=", 3)) { - struct kernel_lb_addr lb = { - .logicalBlockNum = 0, - .partitionReferenceNum = - simple_strtoul(dentry->d_name.name + 3, - NULL, 0), - }; - inode = udf_iget(dir->i_sb, lb); - if (IS_ERR(inode)) - return inode; - } else -#endif /* UDF_RECOVERY */ - fi = udf_find_entry(dir, &dentry->d_name, &fibh, &cfi); if (IS_ERR(fi)) return ERR_CAST(fi); diff --git a/fs/udf/super.c b/fs/udf/super.c index f64691f2168a..a14346137361 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -566,6 +566,11 @@ static int udf_parse_options(char *options, struct udf_options *uopt, if (!remount) { if (uopt->nls_map) unload_nls(uopt->nls_map); + /* + * load_nls() failure is handled later in + * udf_fill_super() after all options are + * parsed. + */ uopt->nls_map = load_nls(args[0].from); uopt->flags |= (1 << UDF_FLAG_NLS_MAP); } |