diff options
Diffstat (limited to 'fs')
183 files changed, 11511 insertions, 3155 deletions
diff --git a/fs/Kconfig b/fs/Kconfig index 708ba336e689..f08fbbfafd9a 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -140,9 +140,10 @@ endmenu endif # BLOCK if BLOCK -menu "DOS/FAT/NT Filesystems" +menu "DOS/FAT/EXFAT/NT Filesystems" source "fs/fat/Kconfig" +source "fs/exfat/Kconfig" source "fs/ntfs/Kconfig" endmenu diff --git a/fs/Makefile b/fs/Makefile index 505e51166973..2ce5112b02c8 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -83,6 +83,7 @@ obj-$(CONFIG_HUGETLBFS) += hugetlbfs/ obj-$(CONFIG_CODA_FS) += coda/ obj-$(CONFIG_MINIX_FS) += minix/ obj-$(CONFIG_FAT_FS) += fat/ +obj-$(CONFIG_EXFAT_FS) += exfat/ obj-$(CONFIG_BFS_FS) += bfs/ obj-$(CONFIG_ISO9660_FS) += isofs/ obj-$(CONFIG_HFSPLUS_FS) += hfsplus/ # Before hfs to find wrapped HFS+ diff --git a/fs/block_dev.c b/fs/block_dev.c index 9501880dff5e..52b6f646cdbd 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -34,6 +34,7 @@ #include <linux/task_io_accounting_ops.h> #include <linux/falloc.h> #include <linux/uaccess.h> +#include <linux/suspend.h> #include "internal.h" struct bdev_inode { @@ -2013,7 +2014,8 @@ ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from) if (bdev_read_only(I_BDEV(bd_inode))) return -EPERM; - if (IS_SWAPFILE(bd_inode)) + /* uswsusp needs write permission to the swap */ + if (IS_SWAPFILE(bd_inode) && !hibernation_available()) return -ETXTBSY; if (!iov_iter_count(from)) diff --git a/fs/exfat/Kconfig b/fs/exfat/Kconfig new file mode 100644 index 000000000000..2d3636dc5b8c --- /dev/null +++ b/fs/exfat/Kconfig @@ -0,0 +1,21 @@ +# SPDX-License-Identifier: GPL-2.0-or-later + +config EXFAT_FS + tristate "exFAT filesystem support" + select NLS + help + This allows you to mount devices formatted with the exFAT file system. + exFAT is typically used on SD-Cards or USB sticks. + + To compile this as a module, choose M here: the module will be called + exfat. + +config EXFAT_DEFAULT_IOCHARSET + string "Default iocharset for exFAT" + default "utf8" + depends on EXFAT_FS + help + Set this to the default input/output character set to use for + converting between the encoding is used for user visible filename and + UTF-16 character that exfat filesystem use, and can be overridden with + the "iocharset" mount option for exFAT filesystems. diff --git a/fs/exfat/Makefile b/fs/exfat/Makefile new file mode 100644 index 000000000000..ed51926a4971 --- /dev/null +++ b/fs/exfat/Makefile @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-2.0-or-later +# +# Makefile for the linux exFAT filesystem support. +# +obj-$(CONFIG_EXFAT_FS) += exfat.o + +exfat-y := inode.o namei.o dir.o super.o fatent.o cache.o nls.o misc.o \ + file.o balloc.o diff --git a/fs/exfat/balloc.c b/fs/exfat/balloc.c new file mode 100644 index 000000000000..6a04cc02565a --- /dev/null +++ b/fs/exfat/balloc.c @@ -0,0 +1,280 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. + */ + +#include <linux/blkdev.h> +#include <linux/slab.h> +#include <linux/buffer_head.h> + +#include "exfat_raw.h" +#include "exfat_fs.h" + +static const unsigned char free_bit[] = { + 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2,/* 0 ~ 19*/ + 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3,/* 20 ~ 39*/ + 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2,/* 40 ~ 59*/ + 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4,/* 60 ~ 79*/ + 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2,/* 80 ~ 99*/ + 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3,/*100 ~ 119*/ + 0, 1, 0, 2, 0, 1, 0, 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2,/*120 ~ 139*/ + 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5,/*140 ~ 159*/ + 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2,/*160 ~ 179*/ + 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3,/*180 ~ 199*/ + 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2,/*200 ~ 219*/ + 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4,/*220 ~ 239*/ + 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 /*240 ~ 254*/ +}; + +static const unsigned char used_bit[] = { + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3,/* 0 ~ 19*/ + 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4,/* 20 ~ 39*/ + 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5,/* 40 ~ 59*/ + 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,/* 60 ~ 79*/ + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4,/* 80 ~ 99*/ + 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6,/*100 ~ 119*/ + 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4,/*120 ~ 139*/ + 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,/*140 ~ 159*/ + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5,/*160 ~ 179*/ + 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4, 3, 4, 4, 5,/*180 ~ 199*/ + 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6,/*200 ~ 219*/ + 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,/*220 ~ 239*/ + 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 /*240 ~ 255*/ +}; + +/* + * Allocation Bitmap Management Functions + */ +static int exfat_allocate_bitmap(struct super_block *sb, + struct exfat_dentry *ep) +{ + struct exfat_sb_info *sbi = EXFAT_SB(sb); + long long map_size; + unsigned int i, need_map_size; + sector_t sector; + + sbi->map_clu = le32_to_cpu(ep->dentry.bitmap.start_clu); + map_size = le64_to_cpu(ep->dentry.bitmap.size); + need_map_size = ((EXFAT_DATA_CLUSTER_COUNT(sbi) - 1) / BITS_PER_BYTE) + + 1; + if (need_map_size != map_size) { + exfat_msg(sb, KERN_ERR, + "bogus allocation bitmap size(need : %u, cur : %lld)", + need_map_size, map_size); + /* + * Only allowed when bogus allocation + * bitmap size is large + */ + if (need_map_size > map_size) + return -EIO; + } + sbi->map_sectors = ((need_map_size - 1) >> + (sb->s_blocksize_bits)) + 1; + sbi->vol_amap = kmalloc_array(sbi->map_sectors, + sizeof(struct buffer_head *), GFP_KERNEL); + if (!sbi->vol_amap) + return -ENOMEM; + + sector = exfat_cluster_to_sector(sbi, sbi->map_clu); + for (i = 0; i < sbi->map_sectors; i++) { + sbi->vol_amap[i] = sb_bread(sb, sector + i); + if (!sbi->vol_amap[i]) { + /* release all buffers and free vol_amap */ + int j = 0; + + while (j < i) + brelse(sbi->vol_amap[j++]); + + kfree(sbi->vol_amap); + sbi->vol_amap = NULL; + return -EIO; + } + } + + sbi->pbr_bh = NULL; + return 0; +} + +int exfat_load_bitmap(struct super_block *sb) +{ + unsigned int i, type; + struct exfat_chain clu; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + + exfat_chain_set(&clu, sbi->root_dir, 0, ALLOC_FAT_CHAIN); + while (clu.dir != EXFAT_EOF_CLUSTER) { + for (i = 0; i < sbi->dentries_per_clu; i++) { + struct exfat_dentry *ep; + struct buffer_head *bh; + + ep = exfat_get_dentry(sb, &clu, i, &bh, NULL); + if (!ep) + return -EIO; + + type = exfat_get_entry_type(ep); + if (type == TYPE_UNUSED) + break; + if (type != TYPE_BITMAP) + continue; + if (ep->dentry.bitmap.flags == 0x0) { + int err; + + err = exfat_allocate_bitmap(sb, ep); + brelse(bh); + return err; + } + brelse(bh); + } + + if (exfat_get_next_cluster(sb, &clu.dir)) + return -EIO; + } + + return -EINVAL; +} + +void exfat_free_bitmap(struct exfat_sb_info *sbi) +{ + int i; + + brelse(sbi->pbr_bh); + + for (i = 0; i < sbi->map_sectors; i++) + __brelse(sbi->vol_amap[i]); + + kfree(sbi->vol_amap); +} + +/* + * If the value of "clu" is 0, it means cluster 2 which is the first cluster of + * the cluster heap. + */ +int exfat_set_bitmap(struct inode *inode, unsigned int clu) +{ + int i, b; + unsigned int ent_idx; + struct super_block *sb = inode->i_sb; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + + WARN_ON(clu < EXFAT_FIRST_CLUSTER); + ent_idx = CLUSTER_TO_BITMAP_ENT(clu); + i = BITMAP_OFFSET_SECTOR_INDEX(sb, ent_idx); + b = BITMAP_OFFSET_BIT_IN_SECTOR(sb, ent_idx); + + set_bit_le(b, sbi->vol_amap[i]->b_data); + exfat_update_bh(sb, sbi->vol_amap[i], IS_DIRSYNC(inode)); + return 0; +} + +/* + * If the value of "clu" is 0, it means cluster 2 which is the first cluster of + * the cluster heap. + */ +void exfat_clear_bitmap(struct inode *inode, unsigned int clu) +{ + int i, b; + unsigned int ent_idx; + struct super_block *sb = inode->i_sb; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + struct exfat_mount_options *opts = &sbi->options; + + WARN_ON(clu < EXFAT_FIRST_CLUSTER); + ent_idx = CLUSTER_TO_BITMAP_ENT(clu); + i = BITMAP_OFFSET_SECTOR_INDEX(sb, ent_idx); + b = BITMAP_OFFSET_BIT_IN_SECTOR(sb, ent_idx); + + clear_bit_le(b, sbi->vol_amap[i]->b_data); + exfat_update_bh(sb, sbi->vol_amap[i], IS_DIRSYNC(inode)); + + if (opts->discard) { + int ret_discard; + + ret_discard = sb_issue_discard(sb, + exfat_cluster_to_sector(sbi, clu + + EXFAT_RESERVED_CLUSTERS), + (1 << sbi->sect_per_clus_bits), GFP_NOFS, 0); + + if (ret_discard == -EOPNOTSUPP) { + exfat_msg(sb, KERN_ERR, + "discard not supported by device, disabling"); + opts->discard = 0; + } + } +} + +/* + * If the value of "clu" is 0, it means cluster 2 which is the first cluster of + * the cluster heap. + */ +unsigned int exfat_find_free_bitmap(struct super_block *sb, unsigned int clu) +{ + unsigned int i, map_i, map_b, ent_idx; + unsigned int clu_base, clu_free; + unsigned char k, clu_mask; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + + WARN_ON(clu < EXFAT_FIRST_CLUSTER); + ent_idx = CLUSTER_TO_BITMAP_ENT(clu); + clu_base = BITMAP_ENT_TO_CLUSTER(ent_idx & ~(BITS_PER_BYTE_MASK)); + clu_mask = IGNORED_BITS_REMAINED(clu, clu_base); + + map_i = BITMAP_OFFSET_SECTOR_INDEX(sb, ent_idx); + map_b = BITMAP_OFFSET_BYTE_IN_SECTOR(sb, ent_idx); + + for (i = EXFAT_FIRST_CLUSTER; i < sbi->num_clusters; + i += BITS_PER_BYTE) { + k = *(sbi->vol_amap[map_i]->b_data + map_b); + if (clu_mask > 0) { + k |= clu_mask; + clu_mask = 0; + } + if (k < 0xFF) { + clu_free = clu_base + free_bit[k]; + if (clu_free < sbi->num_clusters) + return clu_free; + } + clu_base += BITS_PER_BYTE; + + if (++map_b >= sb->s_blocksize || + clu_base >= sbi->num_clusters) { + if (++map_i >= sbi->map_sectors) { + clu_base = EXFAT_FIRST_CLUSTER; + map_i = 0; + } + map_b = 0; + } + } + + return EXFAT_EOF_CLUSTER; +} + +int exfat_count_used_clusters(struct super_block *sb, unsigned int *ret_count) +{ + struct exfat_sb_info *sbi = EXFAT_SB(sb); + unsigned int count = 0; + unsigned int i, map_i = 0, map_b = 0; + unsigned int total_clus = EXFAT_DATA_CLUSTER_COUNT(sbi); + unsigned int last_mask = total_clus & BITS_PER_BYTE_MASK; + unsigned char clu_bits; + const unsigned char last_bit_mask[] = {0, 0b00000001, 0b00000011, + 0b00000111, 0b00001111, 0b00011111, 0b00111111, 0b01111111}; + + total_clus &= ~last_mask; + for (i = 0; i < total_clus; i += BITS_PER_BYTE) { + clu_bits = *(sbi->vol_amap[map_i]->b_data + map_b); + count += used_bit[clu_bits]; + if (++map_b >= (unsigned int)sb->s_blocksize) { + map_i++; + map_b = 0; + } + } + + if (last_mask) { + clu_bits = *(sbi->vol_amap[map_i]->b_data + map_b); + clu_bits &= last_bit_mask[last_mask]; + count += used_bit[clu_bits]; + } + + *ret_count = count; + return 0; +} diff --git a/fs/exfat/cache.c b/fs/exfat/cache.c new file mode 100644 index 000000000000..03d0824fc368 --- /dev/null +++ b/fs/exfat/cache.c @@ -0,0 +1,325 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * linux/fs/fat/cache.c + * + * Written 1992,1993 by Werner Almesberger + * + * Mar 1999. AV. Changed cache, so that it uses the starting cluster instead + * of inode number. + * May 1999. AV. Fixed the bogosity with FAT32 (read "FAT28"). Fscking lusers. + * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. + */ + +#include <linux/slab.h> +#include <asm/unaligned.h> +#include <linux/buffer_head.h> + +#include "exfat_raw.h" +#include "exfat_fs.h" + +#define EXFAT_CACHE_VALID 0 +#define EXFAT_MAX_CACHE 16 + +struct exfat_cache { + struct list_head cache_list; + unsigned int nr_contig; /* number of contiguous clusters */ + unsigned int fcluster; /* cluster number in the file. */ + unsigned int dcluster; /* cluster number on disk. */ +}; + +struct exfat_cache_id { + unsigned int id; + unsigned int nr_contig; + unsigned int fcluster; + unsigned int dcluster; +}; + +static struct kmem_cache *exfat_cachep; + +static void exfat_cache_init_once(void *c) +{ + struct exfat_cache *cache = (struct exfat_cache *)c; + + INIT_LIST_HEAD(&cache->cache_list); +} + +int exfat_cache_init(void) +{ + exfat_cachep = kmem_cache_create("exfat_cache", + sizeof(struct exfat_cache), + 0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, + exfat_cache_init_once); + if (!exfat_cachep) + return -ENOMEM; + return 0; +} + +void exfat_cache_shutdown(void) +{ + if (!exfat_cachep) + return; + kmem_cache_destroy(exfat_cachep); +} + +void exfat_cache_init_inode(struct inode *inode) +{ + struct exfat_inode_info *ei = EXFAT_I(inode); + + spin_lock_init(&ei->cache_lru_lock); + ei->nr_caches = 0; + ei->cache_valid_id = EXFAT_CACHE_VALID + 1; + INIT_LIST_HEAD(&ei->cache_lru); +} + +static inline struct exfat_cache *exfat_cache_alloc(void) +{ + return kmem_cache_alloc(exfat_cachep, GFP_NOFS); +} + +static inline void exfat_cache_free(struct exfat_cache *cache) +{ + WARN_ON(!list_empty(&cache->cache_list)); + kmem_cache_free(exfat_cachep, cache); +} + +static inline void exfat_cache_update_lru(struct inode *inode, + struct exfat_cache *cache) +{ + struct exfat_inode_info *ei = EXFAT_I(inode); + + if (ei->cache_lru.next != &cache->cache_list) + list_move(&cache->cache_list, &ei->cache_lru); +} + +static unsigned int exfat_cache_lookup(struct inode *inode, + unsigned int fclus, struct exfat_cache_id *cid, + unsigned int *cached_fclus, unsigned int *cached_dclus) +{ + struct exfat_inode_info *ei = EXFAT_I(inode); + static struct exfat_cache nohit = { .fcluster = 0, }; + struct exfat_cache *hit = &nohit, *p; + unsigned int offset = EXFAT_EOF_CLUSTER; + + spin_lock(&ei->cache_lru_lock); + list_for_each_entry(p, &ei->cache_lru, cache_list) { + /* Find the cache of "fclus" or nearest cache. */ + if (p->fcluster <= fclus && hit->fcluster < p->fcluster) { + hit = p; + if (hit->fcluster + hit->nr_contig < fclus) { + offset = hit->nr_contig; + } else { + offset = fclus - hit->fcluster; + break; + } + } + } + if (hit != &nohit) { + exfat_cache_update_lru(inode, hit); + + cid->id = ei->cache_valid_id; + cid->nr_contig = hit->nr_contig; + cid->fcluster = hit->fcluster; + cid->dcluster = hit->dcluster; + *cached_fclus = cid->fcluster + offset; + *cached_dclus = cid->dcluster + offset; + } + spin_unlock(&ei->cache_lru_lock); + + return offset; +} + +static struct exfat_cache *exfat_cache_merge(struct inode *inode, + struct exfat_cache_id *new) +{ + struct exfat_inode_info *ei = EXFAT_I(inode); + struct exfat_cache *p; + + list_for_each_entry(p, &ei->cache_lru, cache_list) { + /* Find the same part as "new" in cluster-chain. */ + if (p->fcluster == new->fcluster) { + if (new->nr_contig > p->nr_contig) + p->nr_contig = new->nr_contig; + return p; + } + } + return NULL; +} + +static void exfat_cache_add(struct inode *inode, + struct exfat_cache_id *new) +{ + struct exfat_inode_info *ei = EXFAT_I(inode); + struct exfat_cache *cache, *tmp; + + if (new->fcluster == EXFAT_EOF_CLUSTER) /* dummy cache */ + return; + + spin_lock(&ei->cache_lru_lock); + if (new->id != EXFAT_CACHE_VALID && + new->id != ei->cache_valid_id) + goto unlock; /* this cache was invalidated */ + + cache = exfat_cache_merge(inode, new); + if (cache == NULL) { + if (ei->nr_caches < EXFAT_MAX_CACHE) { + ei->nr_caches++; + spin_unlock(&ei->cache_lru_lock); + + tmp = exfat_cache_alloc(); + if (!tmp) { + spin_lock(&ei->cache_lru_lock); + ei->nr_caches--; + spin_unlock(&ei->cache_lru_lock); + return; + } + + spin_lock(&ei->cache_lru_lock); + cache = exfat_cache_merge(inode, new); + if (cache != NULL) { + ei->nr_caches--; + exfat_cache_free(tmp); + goto out_update_lru; + } + cache = tmp; + } else { + struct list_head *p = ei->cache_lru.prev; + + cache = list_entry(p, + struct exfat_cache, cache_list); + } + cache->fcluster = new->fcluster; + cache->dcluster = new->dcluster; + cache->nr_contig = new->nr_contig; + } +out_update_lru: + exfat_cache_update_lru(inode, cache); +unlock: + spin_unlock(&ei->cache_lru_lock); +} + +/* + * Cache invalidation occurs rarely, thus the LRU chain is not updated. It + * fixes itself after a while. + */ +static void __exfat_cache_inval_inode(struct inode *inode) +{ + struct exfat_inode_info *ei = EXFAT_I(inode); + struct exfat_cache *cache; + + while (!list_empty(&ei->cache_lru)) { + cache = list_entry(ei->cache_lru.next, + struct exfat_cache, cache_list); + list_del_init(&cache->cache_list); + ei->nr_caches--; + exfat_cache_free(cache); + } + /* Update. The copy of caches before this id is discarded. */ + ei->cache_valid_id++; + if (ei->cache_valid_id == EXFAT_CACHE_VALID) + ei->cache_valid_id++; +} + +void exfat_cache_inval_inode(struct inode *inode) +{ + struct exfat_inode_info *ei = EXFAT_I(inode); + + spin_lock(&ei->cache_lru_lock); + __exfat_cache_inval_inode(inode); + spin_unlock(&ei->cache_lru_lock); +} + +static inline int cache_contiguous(struct exfat_cache_id *cid, + unsigned int dclus) +{ + cid->nr_contig++; + return cid->dcluster + cid->nr_contig == dclus; +} + +static inline void cache_init(struct exfat_cache_id *cid, + unsigned int fclus, unsigned int dclus) +{ + cid->id = EXFAT_CACHE_VALID; + cid->fcluster = fclus; + cid->dcluster = dclus; + cid->nr_contig = 0; +} + +int exfat_get_cluster(struct inode *inode, unsigned int cluster, + unsigned int *fclus, unsigned int *dclus, + unsigned int *last_dclus, int allow_eof) +{ + struct super_block *sb = inode->i_sb; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + unsigned int limit = sbi->num_clusters; + struct exfat_inode_info *ei = EXFAT_I(inode); + struct exfat_cache_id cid; + unsigned int content; + + if (ei->start_clu == EXFAT_FREE_CLUSTER) { + exfat_fs_error(sb, + "invalid access to exfat cache (entry 0x%08x)", + ei->start_clu); + return -EIO; + } + + *fclus = 0; + *dclus = ei->start_clu; + *last_dclus = *dclus; + + /* + * Don`t use exfat_cache if zero offset or non-cluster allocation + */ + if (cluster == 0 || *dclus == EXFAT_EOF_CLUSTER) + return 0; + + cache_init(&cid, EXFAT_EOF_CLUSTER, EXFAT_EOF_CLUSTER); + + if (exfat_cache_lookup(inode, cluster, &cid, fclus, dclus) == + EXFAT_EOF_CLUSTER) { + /* + * dummy, always not contiguous + * This is reinitialized by cache_init(), later. + */ + WARN_ON(cid.id != EXFAT_CACHE_VALID || + cid.fcluster != EXFAT_EOF_CLUSTER || + cid.dcluster != EXFAT_EOF_CLUSTER || + cid.nr_contig != 0); + } + + if (*fclus == cluster) + return 0; + + while (*fclus < cluster) { + /* prevent the infinite loop of cluster chain */ + if (*fclus > limit) { + exfat_fs_error(sb, + "detected the cluster chain loop (i_pos %u)", + (*fclus)); + return -EIO; + } + + if (exfat_ent_get(sb, *dclus, &content)) + return -EIO; + + *last_dclus = *dclus; + *dclus = content; + (*fclus)++; + + if (content == EXFAT_EOF_CLUSTER) { + if (!allow_eof) { + exfat_fs_error(sb, + "invalid cluster chain (i_pos %u, last_clus 0x%08x is EOF)", + *fclus, (*last_dclus)); + return -EIO; + } + + break; + } + + if (!cache_contiguous(&cid, *dclus)) + cache_init(&cid, *fclus, *dclus); + } + + exfat_cache_add(inode, &cid); + return 0; +} diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c new file mode 100644 index 000000000000..4b91afb0f051 --- /dev/null +++ b/fs/exfat/dir.c @@ -0,0 +1,1238 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. + */ + +#include <linux/slab.h> +#include <linux/bio.h> +#include <linux/buffer_head.h> + +#include "exfat_raw.h" +#include "exfat_fs.h" + +static int exfat_extract_uni_name(struct exfat_dentry *ep, + unsigned short *uniname) +{ + int i, len = 0; + + for (i = 0; i < EXFAT_FILE_NAME_LEN; i++) { + *uniname = le16_to_cpu(ep->dentry.name.unicode_0_14[i]); + if (*uniname == 0x0) + return len; + uniname++; + len++; + } + + *uniname = 0x0; + return len; + +} + +static void exfat_get_uniname_from_ext_entry(struct super_block *sb, + struct exfat_chain *p_dir, int entry, unsigned short *uniname) +{ + int i; + struct exfat_dentry *ep; + struct exfat_entry_set_cache *es; + + es = exfat_get_dentry_set(sb, p_dir, entry, ES_ALL_ENTRIES, &ep); + if (!es) + return; + + if (es->num_entries < 3) + goto free_es; + + ep += 2; + + /* + * First entry : file entry + * Second entry : stream-extension entry + * Third entry : first file-name entry + * So, the index of first file-name dentry should start from 2. + */ + for (i = 2; i < es->num_entries; i++, ep++) { + /* end of name entry */ + if (exfat_get_entry_type(ep) != TYPE_EXTEND) + goto free_es; + + exfat_extract_uni_name(ep, uniname); + uniname += EXFAT_FILE_NAME_LEN; + } + +free_es: + kfree(es); +} + +/* read a directory entry from the opened directory */ +static int exfat_readdir(struct inode *inode, struct exfat_dir_entry *dir_entry) +{ + int i, dentries_per_clu, dentries_per_clu_bits = 0; + unsigned int type, clu_offset; + sector_t sector; + struct exfat_chain dir, clu; + struct exfat_uni_name uni_name; + struct exfat_dentry *ep; + struct super_block *sb = inode->i_sb; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + struct exfat_inode_info *ei = EXFAT_I(inode); + unsigned int dentry = ei->rwoffset & 0xFFFFFFFF; + struct buffer_head *bh; + + /* check if the given file ID is opened */ + if (ei->type != TYPE_DIR) + return -EPERM; + + if (ei->entry == -1) + exfat_chain_set(&dir, sbi->root_dir, 0, ALLOC_FAT_CHAIN); + else + exfat_chain_set(&dir, ei->start_clu, + EXFAT_B_TO_CLU(i_size_read(inode), sbi), ei->flags); + + dentries_per_clu = sbi->dentries_per_clu; + dentries_per_clu_bits = ilog2(dentries_per_clu); + + clu_offset = dentry >> dentries_per_clu_bits; + exfat_chain_dup(&clu, &dir); + + if (clu.flags == ALLOC_NO_FAT_CHAIN) { + clu.dir += clu_offset; + clu.size -= clu_offset; + } else { + /* hint_information */ + if (clu_offset > 0 && ei->hint_bmap.off != EXFAT_EOF_CLUSTER && + ei->hint_bmap.off > 0 && clu_offset >= ei->hint_bmap.off) { + clu_offset -= ei->hint_bmap.off; + clu.dir = ei->hint_bmap.clu; + } + + while (clu_offset > 0) { + if (exfat_get_next_cluster(sb, &(clu.dir))) + return -EIO; + + clu_offset--; + } + } + + while (clu.dir != EXFAT_EOF_CLUSTER) { + i = dentry & (dentries_per_clu - 1); + + for ( ; i < dentries_per_clu; i++, dentry++) { + ep = exfat_get_dentry(sb, &clu, i, &bh, §or); + if (!ep) + return -EIO; + + type = exfat_get_entry_type(ep); + if (type == TYPE_UNUSED) { + brelse(bh); + break; + } + + if (type != TYPE_FILE && type != TYPE_DIR) { + brelse(bh); + continue; + } + + dir_entry->attr = le16_to_cpu(ep->dentry.file.attr); + exfat_get_entry_time(sbi, &dir_entry->crtime, + ep->dentry.file.create_tz, + ep->dentry.file.create_time, + ep->dentry.file.create_date, + ep->dentry.file.create_time_ms); + exfat_get_entry_time(sbi, &dir_entry->mtime, + ep->dentry.file.modify_tz, + ep->dentry.file.modify_time, + ep->dentry.file.modify_date, + ep->dentry.file.modify_time_ms); + exfat_get_entry_time(sbi, &dir_entry->atime, + ep->dentry.file.access_tz, + ep->dentry.file.access_time, + ep->dentry.file.access_date, + 0); + + *uni_name.name = 0x0; + exfat_get_uniname_from_ext_entry(sb, &dir, dentry, + uni_name.name); + exfat_utf16_to_nls(sb, &uni_name, + dir_entry->namebuf.lfn, + dir_entry->namebuf.lfnbuf_len); + brelse(bh); + + ep = exfat_get_dentry(sb, &clu, i + 1, &bh, NULL); + if (!ep) + return -EIO; + dir_entry->size = + le64_to_cpu(ep->dentry.stream.valid_size); + brelse(bh); + + ei->hint_bmap.off = dentry >> dentries_per_clu_bits; + ei->hint_bmap.clu = clu.dir; + + ei->rwoffset = ++dentry; + return 0; + } + + if (clu.flags == ALLOC_NO_FAT_CHAIN) { + if (--clu.size > 0) + clu.dir++; + else + clu.dir = EXFAT_EOF_CLUSTER; + } else { + if (exfat_get_next_cluster(sb, &(clu.dir))) + return -EIO; + } + } + + dir_entry->namebuf.lfn[0] = '\0'; + ei->rwoffset = dentry; + return 0; +} + +static void exfat_init_namebuf(struct exfat_dentry_namebuf *nb) +{ + nb->lfn = NULL; + nb->lfnbuf_len = 0; +} + +static int exfat_alloc_namebuf(struct exfat_dentry_namebuf *nb) +{ + nb->lfn = __getname(); + if (!nb->lfn) + return -ENOMEM; + nb->lfnbuf_len = MAX_VFSNAME_BUF_SIZE; + return 0; +} + +static void exfat_free_namebuf(struct exfat_dentry_namebuf *nb) +{ + if (!nb->lfn) + return; + + __putname(nb->lfn); + exfat_init_namebuf(nb); +} + +/* skip iterating emit_dots when dir is empty */ +#define ITER_POS_FILLED_DOTS (2) +static int exfat_iterate(struct file *filp, struct dir_context *ctx) +{ + struct inode *inode = filp->f_path.dentry->d_inode; + struct super_block *sb = inode->i_sb; + struct inode *tmp; + struct exfat_dir_entry de; + struct exfat_dentry_namebuf *nb = &(de.namebuf); + struct exfat_inode_info *ei = EXFAT_I(inode); + unsigned long inum; + loff_t cpos, i_pos; + int err = 0, fake_offset = 0; + + exfat_init_namebuf(nb); + mutex_lock(&EXFAT_SB(sb)->s_lock); + + cpos = ctx->pos; + if (!dir_emit_dots(filp, ctx)) + goto unlock; + + if (ctx->pos == ITER_POS_FILLED_DOTS) { + cpos = 0; + fake_offset = 1; + } + + if (cpos & (DENTRY_SIZE - 1)) { + err = -ENOENT; + goto unlock; + } + + /* name buffer should be allocated before use */ + err = exfat_alloc_namebuf(nb); + if (err) + goto unlock; +get_new: + ei->rwoffset = EXFAT_B_TO_DEN(cpos); + + if (cpos >= i_size_read(inode)) + goto end_of_dir; + + err = exfat_readdir(inode, &de); + if (err) { + /* + * At least we tried to read a sector. Move cpos to next sector + * position (should be aligned). + */ + if (err == -EIO) { + cpos += 1 << (sb->s_blocksize_bits); + cpos &= ~(sb->s_blocksize - 1); + } + + err = -EIO; + goto end_of_dir; + } + + cpos = EXFAT_DEN_TO_B(ei->rwoffset); + + if (!nb->lfn[0]) + goto end_of_dir; + + i_pos = ((loff_t)ei->start_clu << 32) | + ((ei->rwoffset - 1) & 0xffffffff); + tmp = exfat_iget(sb, i_pos); + if (tmp) { + inum = tmp->i_ino; + iput(tmp); + } else { + inum = iunique(sb, EXFAT_ROOT_INO); + } + + /* + * Before calling dir_emit(), sb_lock should be released. + * Because page fault can occur in dir_emit() when the size + * of buffer given from user is larger than one page size. + */ + mutex_unlock(&EXFAT_SB(sb)->s_lock); + if (!dir_emit(ctx, nb->lfn, strlen(nb->lfn), inum, + (de.attr & ATTR_SUBDIR) ? DT_DIR : DT_REG)) + goto out_unlocked; + mutex_lock(&EXFAT_SB(sb)->s_lock); + ctx->pos = cpos; + goto get_new; + +end_of_dir: + if (!cpos && fake_offset) + cpos = ITER_POS_FILLED_DOTS; + ctx->pos = cpos; +unlock: + mutex_unlock(&EXFAT_SB(sb)->s_lock); +out_unlocked: + /* + * To improve performance, free namebuf after unlock sb_lock. + * If namebuf is not allocated, this function do nothing + */ + exfat_free_namebuf(nb); + return err; +} + +const struct file_operations exfat_dir_operations = { + .llseek = generic_file_llseek, + .read = generic_read_dir, + .iterate = exfat_iterate, + .fsync = generic_file_fsync, +}; + +int exfat_alloc_new_dir(struct inode *inode, struct exfat_chain *clu) +{ + int ret; + + exfat_chain_set(clu, EXFAT_EOF_CLUSTER, 0, ALLOC_NO_FAT_CHAIN); + + ret = exfat_alloc_cluster(inode, 1, clu); + if (ret) + return ret; + + return exfat_zeroed_cluster(inode, clu->dir); +} + +int exfat_calc_num_entries(struct exfat_uni_name *p_uniname) +{ + int len; + + len = p_uniname->name_len; + if (len == 0) + return -EINVAL; + + /* 1 file entry + 1 stream entry + name entries */ + return ((len - 1) / EXFAT_FILE_NAME_LEN + 3); +} + +unsigned int exfat_get_entry_type(struct exfat_dentry *ep) +{ + if (ep->type == EXFAT_UNUSED) + return TYPE_UNUSED; + if (IS_EXFAT_DELETED(ep->type)) + return TYPE_DELETED; + if (ep->type == EXFAT_INVAL) + return TYPE_INVALID; + if (IS_EXFAT_CRITICAL_PRI(ep->type)) { + if (ep->type == EXFAT_BITMAP) + return TYPE_BITMAP; + if (ep->type == EXFAT_UPCASE) + return TYPE_UPCASE; + if (ep->type == EXFAT_VOLUME) + return TYPE_VOLUME; + if (ep->type == EXFAT_FILE) { + if (le16_to_cpu(ep->dentry.file.attr) & ATTR_SUBDIR) + return TYPE_DIR; + return TYPE_FILE; + } + return TYPE_CRITICAL_PRI; + } + if (IS_EXFAT_BENIGN_PRI(ep->type)) { + if (ep->type == EXFAT_GUID) + return TYPE_GUID; + if (ep->type == EXFAT_PADDING) + return TYPE_PADDING; + if (ep->type == EXFAT_ACLTAB) + return TYPE_ACLTAB; + return TYPE_BENIGN_PRI; + } + if (IS_EXFAT_CRITICAL_SEC(ep->type)) { + if (ep->type == EXFAT_STREAM) + return TYPE_STREAM; + if (ep->type == EXFAT_NAME) + return TYPE_EXTEND; + if (ep->type == EXFAT_ACL) + return TYPE_ACL; + return TYPE_CRITICAL_SEC; + } + return TYPE_BENIGN_SEC; +} + +static void exfat_set_entry_type(struct exfat_dentry *ep, unsigned int type) +{ + if (type == TYPE_UNUSED) { + ep->type = EXFAT_UNUSED; + } else if (type == TYPE_DELETED) { + ep->type &= EXFAT_DELETE; + } else if (type == TYPE_STREAM) { + ep->type = EXFAT_STREAM; + } else if (type == TYPE_EXTEND) { + ep->type = EXFAT_NAME; + } else if (type == TYPE_BITMAP) { + ep->type = EXFAT_BITMAP; + } else if (type == TYPE_UPCASE) { + ep->type = EXFAT_UPCASE; + } else if (type == TYPE_VOLUME) { + ep->type = EXFAT_VOLUME; + } else if (type == TYPE_DIR) { + ep->type = EXFAT_FILE; + ep->dentry.file.attr = cpu_to_le16(ATTR_SUBDIR); + } else if (type == TYPE_FILE) { + ep->type = EXFAT_FILE; + ep->dentry.file.attr = cpu_to_le16(ATTR_ARCHIVE); + } +} + +static void exfat_init_stream_entry(struct exfat_dentry *ep, + unsigned char flags, unsigned int start_clu, + unsigned long long size) +{ + exfat_set_entry_type(ep, TYPE_STREAM); + ep->dentry.stream.flags = flags; + ep->dentry.stream.start_clu = cpu_to_le32(start_clu); + ep->dentry.stream.valid_size = cpu_to_le64(size); + ep->dentry.stream.size = cpu_to_le64(size); +} + +static void exfat_init_name_entry(struct exfat_dentry *ep, + unsigned short *uniname) +{ + int i; + + exfat_set_entry_type(ep, TYPE_EXTEND); + ep->dentry.name.flags = 0x0; + + for (i = 0; i < EXFAT_FILE_NAME_LEN; i++) { + ep->dentry.name.unicode_0_14[i] = cpu_to_le16(*uniname); + if (*uniname == 0x0) + break; + uniname++; + } +} + +int exfat_init_dir_entry(struct inode *inode, struct exfat_chain *p_dir, + int entry, unsigned int type, unsigned int start_clu, + unsigned long long size) +{ + struct super_block *sb = inode->i_sb; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + struct timespec64 ts = current_time(inode); + sector_t sector; + struct exfat_dentry *ep; + struct buffer_head *bh; + + /* + * We cannot use exfat_get_dentry_set here because file ep is not + * initialized yet. + */ + ep = exfat_get_dentry(sb, p_dir, entry, &bh, §or); + if (!ep) + return -EIO; + + exfat_set_entry_type(ep, type); + exfat_set_entry_time(sbi, &ts, + &ep->dentry.file.create_tz, + &ep->dentry.file.create_time, + &ep->dentry.file.create_date, + &ep->dentry.file.create_time_ms); + exfat_set_entry_time(sbi, &ts, + &ep->dentry.file.modify_tz, + &ep->dentry.file.modify_time, + &ep->dentry.file.modify_date, + &ep->dentry.file.modify_time_ms); + exfat_set_entry_time(sbi, &ts, + &ep->dentry.file.access_tz, + &ep->dentry.file.access_time, + &ep->dentry.file.access_date, + NULL); + + exfat_update_bh(sb, bh, IS_DIRSYNC(inode)); + brelse(bh); + + ep = exfat_get_dentry(sb, p_dir, entry + 1, &bh, §or); + if (!ep) + return -EIO; + + exfat_init_stream_entry(ep, + (type == TYPE_FILE) ? ALLOC_FAT_CHAIN : ALLOC_NO_FAT_CHAIN, + start_clu, size); + exfat_update_bh(sb, bh, IS_DIRSYNC(inode)); + brelse(bh); + + return 0; +} + +int exfat_update_dir_chksum(struct inode *inode, struct exfat_chain *p_dir, + int entry) +{ + struct super_block *sb = inode->i_sb; + int ret = 0; + int i, num_entries; + sector_t sector; + unsigned short chksum; + struct exfat_dentry *ep, *fep; + struct buffer_head *fbh, *bh; + + fep = exfat_get_dentry(sb, p_dir, entry, &fbh, §or); + if (!fep) + return -EIO; + + num_entries = fep->dentry.file.num_ext + 1; + chksum = exfat_calc_chksum_2byte(fep, DENTRY_SIZE, 0, CS_DIR_ENTRY); + + for (i = 1; i < num_entries; i++) { + ep = exfat_get_dentry(sb, p_dir, entry + i, &bh, NULL); + if (!ep) { + ret = -EIO; + goto release_fbh; + } + chksum = exfat_calc_chksum_2byte(ep, DENTRY_SIZE, chksum, + CS_DEFAULT); + brelse(bh); + } + + fep->dentry.file.checksum = cpu_to_le16(chksum); + exfat_update_bh(sb, fbh, IS_DIRSYNC(inode)); +release_fbh: + brelse(fbh); + return ret; +} + +int exfat_init_ext_entry(struct inode *inode, struct exfat_chain *p_dir, + int entry, int num_entries, struct exfat_uni_name *p_uniname) +{ + struct super_block *sb = inode->i_sb; + int i; + sector_t sector; + unsigned short *uniname = p_uniname->name; + struct exfat_dentry *ep; + struct buffer_head *bh; + int sync = IS_DIRSYNC(inode); + + ep = exfat_get_dentry(sb, p_dir, entry, &bh, §or); + if (!ep) + return -EIO; + + ep->dentry.file.num_ext = (unsigned char)(num_entries - 1); + exfat_update_bh(sb, bh, sync); + brelse(bh); + + ep = exfat_get_dentry(sb, p_dir, entry + 1, &bh, §or); + if (!ep) + return -EIO; + + ep->dentry.stream.name_len = p_uniname->name_len; + ep->dentry.stream.name_hash = cpu_to_le16(p_uniname->name_hash); + exfat_update_bh(sb, bh, sync); + brelse(bh); + + for (i = EXFAT_FIRST_CLUSTER; i < num_entries; i++) { + ep = exfat_get_dentry(sb, p_dir, entry + i, &bh, §or); + if (!ep) + return -EIO; + + exfat_init_name_entry(ep, uniname); + exfat_update_bh(sb, bh, sync); + brelse(bh); + uniname += EXFAT_FILE_NAME_LEN; + } + + exfat_update_dir_chksum(inode, p_dir, entry); + return 0; +} + +int exfat_remove_entries(struct inode *inode, struct exfat_chain *p_dir, + int entry, int order, int num_entries) +{ + struct super_block *sb = inode->i_sb; + int i; + sector_t sector; + struct exfat_dentry *ep; + struct buffer_head *bh; + + for (i = order; i < num_entries; i++) { + ep = exfat_get_dentry(sb, p_dir, entry + i, &bh, §or); + if (!ep) + return -EIO; + + exfat_set_entry_type(ep, TYPE_DELETED); + exfat_update_bh(sb, bh, IS_DIRSYNC(inode)); + brelse(bh); + } + + return 0; +} + +int exfat_update_dir_chksum_with_entry_set(struct super_block *sb, + struct exfat_entry_set_cache *es, int sync) +{ + struct exfat_sb_info *sbi = EXFAT_SB(sb); + struct buffer_head *bh; + sector_t sec = es->sector; + unsigned int off = es->offset; + int chksum_type = CS_DIR_ENTRY, i, num_entries = es->num_entries; + unsigned int buf_off = (off - es->offset); + unsigned int remaining_byte_in_sector, copy_entries, clu; + unsigned short chksum = 0; + + for (i = 0; i < num_entries; i++) { + chksum = exfat_calc_chksum_2byte(&es->entries[i], DENTRY_SIZE, + chksum, chksum_type); + chksum_type = CS_DEFAULT; + } + + es->entries[0].dentry.file.checksum = cpu_to_le16(chksum); + + while (num_entries) { + /* write per sector base */ + remaining_byte_in_sector = (1 << sb->s_blocksize_bits) - off; + copy_entries = min_t(int, + EXFAT_B_TO_DEN(remaining_byte_in_sector), + num_entries); + bh = sb_bread(sb, sec); + if (!bh) + goto err_out; + memcpy(bh->b_data + off, + (unsigned char *)&es->entries[0] + buf_off, + EXFAT_DEN_TO_B(copy_entries)); + exfat_update_bh(sb, bh, sync); + brelse(bh); + num_entries -= copy_entries; + + if (num_entries) { + /* get next sector */ + if (exfat_is_last_sector_in_cluster(sbi, sec)) { + clu = exfat_sector_to_cluster(sbi, sec); + if (es->alloc_flag == ALLOC_NO_FAT_CHAIN) + clu++; + else if (exfat_get_next_cluster(sb, &clu)) + goto err_out; + sec = exfat_cluster_to_sector(sbi, clu); + } else { + sec++; + } + off = 0; + buf_off += EXFAT_DEN_TO_B(copy_entries); + } + } + + return 0; +err_out: + return -EIO; +} + +static int exfat_walk_fat_chain(struct super_block *sb, + struct exfat_chain *p_dir, unsigned int byte_offset, + unsigned int *clu) +{ + struct exfat_sb_info *sbi = EXFAT_SB(sb); + unsigned int clu_offset; + unsigned int cur_clu; + + clu_offset = EXFAT_B_TO_CLU(byte_offset, sbi); + cur_clu = p_dir->dir; + + if (p_dir->flags == ALLOC_NO_FAT_CHAIN) { + cur_clu += clu_offset; + } else { + while (clu_offset > 0) { + if (exfat_get_next_cluster(sb, &cur_clu)) + return -EIO; + if (cur_clu == EXFAT_EOF_CLUSTER) { + exfat_fs_error(sb, + "invalid dentry access beyond EOF (clu : %u, eidx : %d)", + p_dir->dir, + EXFAT_B_TO_DEN(byte_offset)); + return -EIO; + } + clu_offset--; + } + } + + *clu = cur_clu; + return 0; +} + +int exfat_find_location(struct super_block *sb, struct exfat_chain *p_dir, + int entry, sector_t *sector, int *offset) +{ + int ret; + unsigned int off, clu = 0; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + + off = EXFAT_DEN_TO_B(entry); + + ret = exfat_walk_fat_chain(sb, p_dir, off, &clu); + if (ret) + return ret; + + /* byte offset in cluster */ + off = EXFAT_CLU_OFFSET(off, sbi); + + /* byte offset in sector */ + *offset = EXFAT_BLK_OFFSET(off, sb); + + /* sector offset in cluster */ + *sector = EXFAT_B_TO_BLK(off, sb); + *sector += exfat_cluster_to_sector(sbi, clu); + return 0; +} + +#define EXFAT_MAX_RA_SIZE (128*1024) +static int exfat_dir_readahead(struct super_block *sb, sector_t sec) +{ + struct exfat_sb_info *sbi = EXFAT_SB(sb); + struct buffer_head *bh; + unsigned int max_ra_count = EXFAT_MAX_RA_SIZE >> sb->s_blocksize_bits; + unsigned int page_ra_count = PAGE_SIZE >> sb->s_blocksize_bits; + unsigned int adj_ra_count = max(sbi->sect_per_clus, page_ra_count); + unsigned int ra_count = min(adj_ra_count, max_ra_count); + + /* Read-ahead is not required */ + if (sbi->sect_per_clus == 1) + return 0; + + if (sec < sbi->data_start_sector) { + exfat_msg(sb, KERN_ERR, + "requested sector is invalid(sect:%llu, root:%llu)", + (unsigned long long)sec, sbi->data_start_sector); + return -EIO; + } + + /* Not sector aligned with ra_count, resize ra_count to page size */ + if ((sec - sbi->data_start_sector) & (ra_count - 1)) + ra_count = page_ra_count; + + bh = sb_find_get_block(sb, sec); + if (!bh || !buffer_uptodate(bh)) { + unsigned int i; + + for (i = 0; i < ra_count; i++) + sb_breadahead(sb, (sector_t)(sec + i)); + } + brelse(bh); + return 0; +} + +struct exfat_dentry *exfat_get_dentry(struct super_block *sb, + struct exfat_chain *p_dir, int entry, struct buffer_head **bh, + sector_t *sector) +{ + unsigned int dentries_per_page = EXFAT_B_TO_DEN(PAGE_SIZE); + int off; + sector_t sec; + + if (p_dir->dir == DIR_DELETED) { + exfat_msg(sb, KERN_ERR, "abnormal access to deleted dentry\n"); + return NULL; + } + + if (exfat_find_location(sb, p_dir, entry, &sec, &off)) + return NULL; + + if (p_dir->dir != EXFAT_FREE_CLUSTER && + !(entry & (dentries_per_page - 1))) + exfat_dir_readahead(sb, sec); + + *bh = sb_bread(sb, sec); + if (!*bh) + return NULL; + + if (sector) + *sector = sec; + return (struct exfat_dentry *)((*bh)->b_data + off); +} + +enum exfat_validate_dentry_mode { + ES_MODE_STARTED, + ES_MODE_GET_FILE_ENTRY, + ES_MODE_GET_STRM_ENTRY, + ES_MODE_GET_NAME_ENTRY, + ES_MODE_GET_CRITICAL_SEC_ENTRY, +}; + +static bool exfat_validate_entry(unsigned int type, + enum exfat_validate_dentry_mode *mode) +{ + if (type == TYPE_UNUSED || type == TYPE_DELETED) + return false; + + switch (*mode) { + case ES_MODE_STARTED: + if (type != TYPE_FILE && type != TYPE_DIR) + return false; + *mode = ES_MODE_GET_FILE_ENTRY; + return true; + case ES_MODE_GET_FILE_ENTRY: + if (type != TYPE_STREAM) + return false; + *mode = ES_MODE_GET_STRM_ENTRY; + return true; + case ES_MODE_GET_STRM_ENTRY: + if (type != TYPE_EXTEND) + return false; + *mode = ES_MODE_GET_NAME_ENTRY; + return true; + case ES_MODE_GET_NAME_ENTRY: + if (type == TYPE_STREAM) + return false; + if (type != TYPE_EXTEND) { + if (!(type & TYPE_CRITICAL_SEC)) + return false; + *mode = ES_MODE_GET_CRITICAL_SEC_ENTRY; + } + return true; + case ES_MODE_GET_CRITICAL_SEC_ENTRY: + if (type == TYPE_EXTEND || type == TYPE_STREAM) + return false; + if ((type & TYPE_CRITICAL_SEC) != TYPE_CRITICAL_SEC) + return false; + return true; + default: + WARN_ON_ONCE(1); + return false; + } +} + +/* + * Returns a set of dentries for a file or dir. + * + * Note that this is a copy (dump) of dentries so that user should + * call write_entry_set() to apply changes made in this entry set + * to the real device. + * + * in: + * sb+p_dir+entry: indicates a file/dir + * type: specifies how many dentries should be included. + * out: + * file_ep: will point the first dentry(= file dentry) on success + * return: + * pointer of entry set on success, + * NULL on failure. + */ +struct exfat_entry_set_cache *exfat_get_dentry_set(struct super_block *sb, + struct exfat_chain *p_dir, int entry, unsigned int type, + struct exfat_dentry **file_ep) +{ + int ret; + unsigned int off, byte_offset, clu = 0; + unsigned int entry_type; + sector_t sec; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + struct exfat_entry_set_cache *es; + struct exfat_dentry *ep, *pos; + unsigned char num_entries; + enum exfat_validate_dentry_mode mode = ES_MODE_STARTED; + struct buffer_head *bh; + + if (p_dir->dir == DIR_DELETED) { + exfat_msg(sb, KERN_ERR, "access to deleted dentry\n"); + return NULL; + } + + byte_offset = EXFAT_DEN_TO_B(entry); + ret = exfat_walk_fat_chain(sb, p_dir, byte_offset, &clu); + if (ret) + return NULL; + + /* byte offset in cluster */ + byte_offset = EXFAT_CLU_OFFSET(byte_offset, sbi); + + /* byte offset in sector */ + off = EXFAT_BLK_OFFSET(byte_offset, sb); + + /* sector offset in cluster */ + sec = EXFAT_B_TO_BLK(byte_offset, sb); + sec += exfat_cluster_to_sector(sbi, clu); + + bh = sb_bread(sb, sec); + if (!bh) + return NULL; + + ep = (struct exfat_dentry *)(bh->b_data + off); + entry_type = exfat_get_entry_type(ep); + + if (entry_type != TYPE_FILE && entry_type != TYPE_DIR) + goto release_bh; + + num_entries = type == ES_ALL_ENTRIES ? + ep->dentry.file.num_ext + 1 : type; + es = kmalloc(struct_size(es, entries, num_entries), GFP_KERNEL); + if (!es) + goto release_bh; + + es->num_entries = num_entries; + es->sector = sec; + es->offset = off; + es->alloc_flag = p_dir->flags; + + pos = &es->entries[0]; + + while (num_entries) { + if (!exfat_validate_entry(exfat_get_entry_type(ep), &mode)) + goto free_es; + + /* copy dentry */ + memcpy(pos, ep, sizeof(struct exfat_dentry)); + + if (--num_entries == 0) + break; + + if (((off + DENTRY_SIZE) & (sb->s_blocksize - 1)) < + (off & (sb->s_blocksize - 1))) { + /* get the next sector */ + if (exfat_is_last_sector_in_cluster(sbi, sec)) { + if (es->alloc_flag == ALLOC_NO_FAT_CHAIN) + clu++; + else if (exfat_get_next_cluster(sb, &clu)) + goto free_es; + sec = exfat_cluster_to_sector(sbi, clu); + } else { + sec++; + } + + brelse(bh); + bh = sb_bread(sb, sec); + if (!bh) + goto free_es; + off = 0; + ep = (struct exfat_dentry *)bh->b_data; + } else { + ep++; + off += DENTRY_SIZE; + } + pos++; + } + + if (file_ep) + *file_ep = &es->entries[0]; + brelse(bh); + return es; + +free_es: + kfree(es); +release_bh: + brelse(bh); + return NULL; +} + +enum { + DIRENT_STEP_FILE, + DIRENT_STEP_STRM, + DIRENT_STEP_NAME, + DIRENT_STEP_SECD, +}; + +/* + * return values: + * >= 0 : return dir entiry position with the name in dir + * -EEXIST : (root dir, ".") it is the root dir itself + * -ENOENT : entry with the name does not exist + * -EIO : I/O error + */ +int exfat_find_dir_entry(struct super_block *sb, struct exfat_inode_info *ei, + struct exfat_chain *p_dir, struct exfat_uni_name *p_uniname, + int num_entries, unsigned int type) +{ + int i, rewind = 0, dentry = 0, end_eidx = 0, num_ext = 0, len; + int order, step, name_len = 0; + int dentries_per_clu, num_empty = 0; + unsigned int entry_type; + unsigned short *uniname = NULL; + struct exfat_chain clu; + struct exfat_hint *hint_stat = &ei->hint_stat; + struct exfat_hint_femp candi_empty; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + + dentries_per_clu = sbi->dentries_per_clu; + + exfat_chain_dup(&clu, p_dir); + + if (hint_stat->eidx) { + clu.dir = hint_stat->clu; + dentry = hint_stat->eidx; + end_eidx = dentry; + } + + candi_empty.eidx = EXFAT_HINT_NONE; +rewind: + order = 0; + step = DIRENT_STEP_FILE; + while (clu.dir != EXFAT_EOF_CLUSTER) { + i = dentry & (dentries_per_clu - 1); + for (; i < dentries_per_clu; i++, dentry++) { + struct exfat_dentry *ep; + struct buffer_head *bh; + + if (rewind && dentry == end_eidx) + goto not_found; + + ep = exfat_get_dentry(sb, &clu, i, &bh, NULL); + if (!ep) + return -EIO; + + entry_type = exfat_get_entry_type(ep); + + if (entry_type == TYPE_UNUSED || + entry_type == TYPE_DELETED) { + step = DIRENT_STEP_FILE; + + num_empty++; + if (candi_empty.eidx == EXFAT_HINT_NONE && + num_empty == 1) { + exfat_chain_set(&candi_empty.cur, + clu.dir, clu.size, clu.flags); + } + + if (candi_empty.eidx == EXFAT_HINT_NONE && + num_empty >= num_entries) { + candi_empty.eidx = + dentry - (num_empty - 1); + WARN_ON(candi_empty.eidx < 0); + candi_empty.count = num_empty; + + if (ei->hint_femp.eidx == + EXFAT_HINT_NONE || + candi_empty.eidx <= + ei->hint_femp.eidx) { + memcpy(&ei->hint_femp, + &candi_empty, + sizeof(candi_empty)); + } + } + + brelse(bh); + if (entry_type == TYPE_UNUSED) + goto not_found; + continue; + } + + num_empty = 0; + candi_empty.eidx = EXFAT_HINT_NONE; + + if (entry_type == TYPE_FILE || entry_type == TYPE_DIR) { + step = DIRENT_STEP_FILE; + if (type == TYPE_ALL || type == entry_type) { + num_ext = ep->dentry.file.num_ext; + step = DIRENT_STEP_STRM; + } + brelse(bh); + continue; + } + + if (entry_type == TYPE_STREAM) { + unsigned short name_hash; + + if (step != DIRENT_STEP_STRM) { + step = DIRENT_STEP_FILE; + brelse(bh); + continue; + } + step = DIRENT_STEP_FILE; + name_hash = le16_to_cpu( + ep->dentry.stream.name_hash); + if (p_uniname->name_hash == name_hash && + p_uniname->name_len == + ep->dentry.stream.name_len) { + step = DIRENT_STEP_NAME; + order = 1; + name_len = 0; + } + brelse(bh); + continue; + } + + brelse(bh); + if (entry_type == TYPE_EXTEND) { + unsigned short entry_uniname[16], unichar; + + if (step != DIRENT_STEP_NAME) { + step = DIRENT_STEP_FILE; + continue; + } + + if (++order == 2) + uniname = p_uniname->name; + else + uniname += EXFAT_FILE_NAME_LEN; + + len = exfat_extract_uni_name(ep, entry_uniname); + name_len += len; + + unichar = *(uniname+len); + *(uniname+len) = 0x0; + + if (exfat_uniname_ncmp(sb, uniname, + entry_uniname, len)) { + step = DIRENT_STEP_FILE; + } else if (p_uniname->name_len == name_len) { + if (order == num_ext) + goto found; + step = DIRENT_STEP_SECD; + } + + *(uniname+len) = unichar; + continue; + } + + if (entry_type & + (TYPE_CRITICAL_SEC | TYPE_BENIGN_SEC)) { + if (step == DIRENT_STEP_SECD) { + if (++order == num_ext) + goto found; + continue; + } + } + step = DIRENT_STEP_FILE; + } + + if (clu.flags == ALLOC_NO_FAT_CHAIN) { + if (--clu.size > 0) + clu.dir++; + else + clu.dir = EXFAT_EOF_CLUSTER; + } else { + if (exfat_get_next_cluster(sb, &clu.dir)) + return -EIO; + } + } + +not_found: + /* + * We started at not 0 index,so we should try to find target + * from 0 index to the index we started at. + */ + if (!rewind && end_eidx) { + rewind = 1; + dentry = 0; + clu.dir = p_dir->dir; + /* reset empty hint */ + num_empty = 0; + candi_empty.eidx = EXFAT_HINT_NONE; + goto rewind; + } + + /* initialized hint_stat */ + hint_stat->clu = p_dir->dir; + hint_stat->eidx = 0; + return -ENOENT; + +found: + /* next dentry we'll find is out of this cluster */ + if (!((dentry + 1) & (dentries_per_clu - 1))) { + int ret = 0; + + if (clu.flags == ALLOC_NO_FAT_CHAIN) { + if (--clu.size > 0) + clu.dir++; + else + clu.dir = EXFAT_EOF_CLUSTER; + } else { + ret = exfat_get_next_cluster(sb, &clu.dir); + } + + if (ret || clu.dir != EXFAT_EOF_CLUSTER) { + /* just initialized hint_stat */ + hint_stat->clu = p_dir->dir; + hint_stat->eidx = 0; + return (dentry - num_ext); + } + } + + hint_stat->clu = clu.dir; + hint_stat->eidx = dentry + 1; + return dentry - num_ext; +} + +int exfat_count_ext_entries(struct super_block *sb, struct exfat_chain *p_dir, + int entry, struct exfat_dentry *ep) +{ + int i, count = 0; + unsigned int type; + struct exfat_dentry *ext_ep; + struct buffer_head *bh; + + for (i = 0, entry++; i < ep->dentry.file.num_ext; i++, entry++) { + ext_ep = exfat_get_dentry(sb, p_dir, entry, &bh, NULL); + if (!ext_ep) + return -EIO; + + type = exfat_get_entry_type(ext_ep); + brelse(bh); + if (type == TYPE_EXTEND || type == TYPE_STREAM) + count++; + else + break; + } + return count; +} + +int exfat_count_dir_entries(struct super_block *sb, struct exfat_chain *p_dir) +{ + int i, count = 0; + int dentries_per_clu; + unsigned int entry_type; + struct exfat_chain clu; + struct exfat_dentry *ep; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + struct buffer_head *bh; + + dentries_per_clu = sbi->dentries_per_clu; + + exfat_chain_dup(&clu, p_dir); + + while (clu.dir != EXFAT_EOF_CLUSTER) { + for (i = 0; i < dentries_per_clu; i++) { + ep = exfat_get_dentry(sb, &clu, i, &bh, NULL); + if (!ep) + return -EIO; + entry_type = exfat_get_entry_type(ep); + brelse(bh); + + if (entry_type == TYPE_UNUSED) + return count; + if (entry_type != TYPE_DIR) + continue; + count++; + } + + if (clu.flags == ALLOC_NO_FAT_CHAIN) { + if (--clu.size > 0) + clu.dir++; + else + clu.dir = EXFAT_EOF_CLUSTER; + } else { + if (exfat_get_next_cluster(sb, &(clu.dir))) + return -EIO; + } + } + + return count; +} diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h new file mode 100644 index 000000000000..67d4e46fb810 --- /dev/null +++ b/fs/exfat/exfat_fs.h @@ -0,0 +1,519 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. + */ + +#ifndef _EXFAT_FS_H +#define _EXFAT_FS_H + +#include <linux/fs.h> +#include <linux/ratelimit.h> +#include <linux/nls.h> + +#define EXFAT_SUPER_MAGIC 0x2011BAB0UL +#define EXFAT_ROOT_INO 1 + +#define EXFAT_SB_DIRTY 0 + +#define EXFAT_CLUSTERS_UNTRACKED (~0u) + +/* + * exfat error flags + */ +enum exfat_error_mode { + EXFAT_ERRORS_CONT, /* ignore error and continue */ + EXFAT_ERRORS_PANIC, /* panic on error */ + EXFAT_ERRORS_RO, /* remount r/o on error */ +}; + +/* + * exfat nls lossy flag + */ +enum { + NLS_NAME_NO_LOSSY, /* no lossy */ + NLS_NAME_LOSSY, /* just detected incorrect filename(s) */ + NLS_NAME_OVERLEN, /* the length is over than its limit */ +}; + +#define EXFAT_HASH_BITS 8 +#define EXFAT_HASH_SIZE (1UL << EXFAT_HASH_BITS) + +/* + * Type Definitions + */ +#define ES_2_ENTRIES 2 +#define ES_ALL_ENTRIES 0 + +#define DIR_DELETED 0xFFFF0321 + +/* type values */ +#define TYPE_UNUSED 0x0000 +#define TYPE_DELETED 0x0001 +#define TYPE_INVALID 0x0002 +#define TYPE_CRITICAL_PRI 0x0100 +#define TYPE_BITMAP 0x0101 +#define TYPE_UPCASE 0x0102 +#define TYPE_VOLUME 0x0103 +#define TYPE_DIR 0x0104 +#define TYPE_FILE 0x011F +#define TYPE_CRITICAL_SEC 0x0200 +#define TYPE_STREAM 0x0201 +#define TYPE_EXTEND 0x0202 +#define TYPE_ACL 0x0203 +#define TYPE_BENIGN_PRI 0x0400 +#define TYPE_GUID 0x0401 +#define TYPE_PADDING 0x0402 +#define TYPE_ACLTAB 0x0403 +#define TYPE_BENIGN_SEC 0x0800 +#define TYPE_ALL 0x0FFF + +#define MAX_CHARSET_SIZE 6 /* max size of multi-byte character */ +#define MAX_NAME_LENGTH 255 /* max len of file name excluding NULL */ +#define MAX_VFSNAME_BUF_SIZE ((MAX_NAME_LENGTH + 1) * MAX_CHARSET_SIZE) + +#define FAT_CACHE_SIZE 128 +#define FAT_CACHE_HASH_SIZE 64 +#define BUF_CACHE_SIZE 256 +#define BUF_CACHE_HASH_SIZE 64 + +#define EXFAT_HINT_NONE -1 +#define EXFAT_MIN_SUBDIR 2 + +/* + * helpers for cluster size to byte conversion. + */ +#define EXFAT_CLU_TO_B(b, sbi) ((b) << (sbi)->cluster_size_bits) +#define EXFAT_B_TO_CLU(b, sbi) ((b) >> (sbi)->cluster_size_bits) +#define EXFAT_B_TO_CLU_ROUND_UP(b, sbi) \ + (((b - 1) >> (sbi)->cluster_size_bits) + 1) +#define EXFAT_CLU_OFFSET(off, sbi) ((off) & ((sbi)->cluster_size - 1)) + +/* + * helpers for block size to byte conversion. + */ +#define EXFAT_BLK_TO_B(b, sb) ((b) << (sb)->s_blocksize_bits) +#define EXFAT_B_TO_BLK(b, sb) ((b) >> (sb)->s_blocksize_bits) +#define EXFAT_B_TO_BLK_ROUND_UP(b, sb) \ + (((b - 1) >> (sb)->s_blocksize_bits) + 1) +#define EXFAT_BLK_OFFSET(off, sb) ((off) & ((sb)->s_blocksize - 1)) + +/* + * helpers for block size to dentry size conversion. + */ +#define EXFAT_B_TO_DEN_IDX(b, sbi) \ + ((b) << ((sbi)->cluster_size_bits - DENTRY_SIZE_BITS)) +#define EXFAT_B_TO_DEN(b) ((b) >> DENTRY_SIZE_BITS) +#define EXFAT_DEN_TO_B(b) ((b) << DENTRY_SIZE_BITS) + +/* + * helpers for fat entry. + */ +#define FAT_ENT_SIZE (4) +#define FAT_ENT_SIZE_BITS (2) +#define FAT_ENT_OFFSET_SECTOR(sb, loc) (EXFAT_SB(sb)->FAT1_start_sector + \ + (((u64)loc << FAT_ENT_SIZE_BITS) >> sb->s_blocksize_bits)) +#define FAT_ENT_OFFSET_BYTE_IN_SECTOR(sb, loc) \ + ((loc << FAT_ENT_SIZE_BITS) & (sb->s_blocksize - 1)) + +/* + * helpers for bitmap. + */ +#define CLUSTER_TO_BITMAP_ENT(clu) ((clu) - EXFAT_RESERVED_CLUSTERS) +#define BITMAP_ENT_TO_CLUSTER(ent) ((ent) + EXFAT_RESERVED_CLUSTERS) +#define BITS_PER_SECTOR(sb) ((sb)->s_blocksize * BITS_PER_BYTE) +#define BITS_PER_SECTOR_MASK(sb) (BITS_PER_SECTOR(sb) - 1) +#define BITMAP_OFFSET_SECTOR_INDEX(sb, ent) \ + ((ent / BITS_PER_BYTE) >> (sb)->s_blocksize_bits) +#define BITMAP_OFFSET_BIT_IN_SECTOR(sb, ent) (ent & BITS_PER_SECTOR_MASK(sb)) +#define BITMAP_OFFSET_BYTE_IN_SECTOR(sb, ent) \ + ((ent / BITS_PER_BYTE) & ((sb)->s_blocksize - 1)) +#define BITS_PER_BYTE_MASK 0x7 +#define IGNORED_BITS_REMAINED(clu, clu_base) ((1 << ((clu) - (clu_base))) - 1) + +struct exfat_dentry_namebuf { + char *lfn; + int lfnbuf_len; /* usally MAX_UNINAME_BUF_SIZE */ +}; + +/* unicode name structure */ +struct exfat_uni_name { + /* +3 for null and for converting */ + unsigned short name[MAX_NAME_LENGTH + 3]; + unsigned short name_hash; + unsigned char name_len; +}; + +/* directory structure */ +struct exfat_chain { + unsigned int dir; + unsigned int size; + unsigned char flags; +}; + +/* first empty entry hint information */ +struct exfat_hint_femp { + /* entry index of a directory */ + int eidx; + /* count of continuous empty entry */ + int count; + /* the cluster that first empty slot exists in */ + struct exfat_chain cur; +}; + +/* hint structure */ +struct exfat_hint { + unsigned int clu; + union { + unsigned int off; /* cluster offset */ + int eidx; /* entry index */ + }; +}; + +struct exfat_entry_set_cache { + /* sector number that contains file_entry */ + sector_t sector; + /* byte offset in the sector */ + unsigned int offset; + /* flag in stream entry. 01 for cluster chain, 03 for contig. */ + int alloc_flag; + unsigned int num_entries; + struct exfat_dentry entries[]; +}; + +struct exfat_dir_entry { + struct exfat_chain dir; + int entry; + unsigned int type; + unsigned int start_clu; + unsigned char flags; + unsigned short attr; + loff_t size; + unsigned int num_subdirs; + struct timespec64 atime; + struct timespec64 mtime; + struct timespec64 crtime; + struct exfat_dentry_namebuf namebuf; +}; + +/* + * exfat mount in-memory data + */ +struct exfat_mount_options { + kuid_t fs_uid; + kgid_t fs_gid; + unsigned short fs_fmask; + unsigned short fs_dmask; + /* permission for setting the [am]time */ + unsigned short allow_utime; + /* charset for filename input/display */ + char *iocharset; + /* on error: continue, panic, remount-ro */ + enum exfat_error_mode errors; + unsigned utf8:1, /* Use of UTF-8 character set */ + discard:1; /* Issue discard requests on deletions */ + int time_offset; /* Offset of timestamps from UTC (in minutes) */ +}; + +/* + * EXFAT file system superblock in-memory data + */ +struct exfat_sb_info { + unsigned long long num_sectors; /* num of sectors in volume */ + unsigned int num_clusters; /* num of clusters in volume */ + unsigned int cluster_size; /* cluster size in bytes */ + unsigned int cluster_size_bits; + unsigned int sect_per_clus; /* cluster size in sectors */ + unsigned int sect_per_clus_bits; + unsigned long long FAT1_start_sector; /* FAT1 start sector */ + unsigned long long FAT2_start_sector; /* FAT2 start sector */ + unsigned long long data_start_sector; /* data area start sector */ + unsigned int num_FAT_sectors; /* num of FAT sectors */ + unsigned int root_dir; /* root dir cluster */ + unsigned int dentries_per_clu; /* num of dentries per cluster */ + unsigned int vol_flag; /* volume dirty flag */ + struct buffer_head *pbr_bh; /* buffer_head of PBR sector */ + + unsigned int map_clu; /* allocation bitmap start cluster */ + unsigned int map_sectors; /* num of allocation bitmap sectors */ + struct buffer_head **vol_amap; /* allocation bitmap */ + + unsigned short *vol_utbl; /* upcase table */ + + unsigned int clu_srch_ptr; /* cluster search pointer */ + unsigned int used_clusters; /* number of used clusters */ + + unsigned long s_state; + struct mutex s_lock; /* superblock lock */ + struct exfat_mount_options options; + struct nls_table *nls_io; /* Charset used for input and display */ + struct ratelimit_state ratelimit; + + spinlock_t inode_hash_lock; + struct hlist_head inode_hashtable[EXFAT_HASH_SIZE]; + + struct rcu_head rcu; +}; + +/* + * EXFAT file system inode in-memory data + */ +struct exfat_inode_info { + struct exfat_chain dir; + int entry; + unsigned int type; + unsigned short attr; + unsigned int start_clu; + unsigned char flags; + /* + * the copy of low 32bit of i_version to check + * the validation of hint_stat. + */ + unsigned int version; + /* file offset or dentry index for readdir */ + loff_t rwoffset; + + /* hint for cluster last accessed */ + struct exfat_hint hint_bmap; + /* hint for entry index we try to lookup next time */ + struct exfat_hint hint_stat; + /* hint for first empty entry */ + struct exfat_hint_femp hint_femp; + + spinlock_t cache_lru_lock; + struct list_head cache_lru; + int nr_caches; + /* for avoiding the race between alloc and free */ + unsigned int cache_valid_id; + + /* + * NOTE: i_size_ondisk is 64bits, so must hold ->inode_lock to access. + * physically allocated size. + */ + loff_t i_size_ondisk; + /* block-aligned i_size (used in cont_write_begin) */ + loff_t i_size_aligned; + /* on-disk position of directory entry or 0 */ + loff_t i_pos; + /* hash by i_location */ + struct hlist_node i_hash_fat; + /* protect bmap against truncate */ + struct rw_semaphore truncate_lock; + struct inode vfs_inode; + /* File creation time */ + struct timespec64 i_crtime; +}; + +static inline struct exfat_sb_info *EXFAT_SB(struct super_block *sb) +{ + return sb->s_fs_info; +} + +static inline struct exfat_inode_info *EXFAT_I(struct inode *inode) +{ + return container_of(inode, struct exfat_inode_info, vfs_inode); +} + +/* + * If ->i_mode can't hold 0222 (i.e. ATTR_RO), we use ->i_attrs to + * save ATTR_RO instead of ->i_mode. + * + * If it's directory and !sbi->options.rodir, ATTR_RO isn't read-only + * bit, it's just used as flag for app. + */ +static inline int exfat_mode_can_hold_ro(struct inode *inode) +{ + struct exfat_sb_info *sbi = EXFAT_SB(inode->i_sb); + + if (S_ISDIR(inode->i_mode)) + return 0; + + if ((~sbi->options.fs_fmask) & 0222) + return 1; + return 0; +} + +/* Convert attribute bits and a mask to the UNIX mode. */ +static inline mode_t exfat_make_mode(struct exfat_sb_info *sbi, + unsigned short attr, mode_t mode) +{ + if ((attr & ATTR_READONLY) && !(attr & ATTR_SUBDIR)) + mode &= ~0222; + + if (attr & ATTR_SUBDIR) + return (mode & ~sbi->options.fs_dmask) | S_IFDIR; + + return (mode & ~sbi->options.fs_fmask) | S_IFREG; +} + +/* Return the FAT attribute byte for this inode */ +static inline unsigned short exfat_make_attr(struct inode *inode) +{ + unsigned short attr = EXFAT_I(inode)->attr; + + if (S_ISDIR(inode->i_mode)) + attr |= ATTR_SUBDIR; + if (exfat_mode_can_hold_ro(inode) && !(inode->i_mode & 0222)) + attr |= ATTR_READONLY; + return attr; +} + +static inline void exfat_save_attr(struct inode *inode, unsigned short attr) +{ + if (exfat_mode_can_hold_ro(inode)) + EXFAT_I(inode)->attr = attr & (ATTR_RWMASK | ATTR_READONLY); + else + EXFAT_I(inode)->attr = attr & ATTR_RWMASK; +} + +static inline bool exfat_is_last_sector_in_cluster(struct exfat_sb_info *sbi, + sector_t sec) +{ + return ((sec - sbi->data_start_sector + 1) & + ((1 << sbi->sect_per_clus_bits) - 1)) == 0; +} + +static inline sector_t exfat_cluster_to_sector(struct exfat_sb_info *sbi, + unsigned int clus) +{ + return ((clus - EXFAT_RESERVED_CLUSTERS) << sbi->sect_per_clus_bits) + + sbi->data_start_sector; +} + +static inline int exfat_sector_to_cluster(struct exfat_sb_info *sbi, + sector_t sec) +{ + return ((sec - sbi->data_start_sector) >> sbi->sect_per_clus_bits) + + EXFAT_RESERVED_CLUSTERS; +} + +/* super.c */ +int exfat_set_vol_flags(struct super_block *sb, unsigned short new_flag); + +/* fatent.c */ +#define exfat_get_next_cluster(sb, pclu) exfat_ent_get(sb, *(pclu), pclu) + +int exfat_alloc_cluster(struct inode *inode, unsigned int num_alloc, + struct exfat_chain *p_chain); +int exfat_free_cluster(struct inode *inode, struct exfat_chain *p_chain); +int exfat_ent_get(struct super_block *sb, unsigned int loc, + unsigned int *content); +int exfat_ent_set(struct super_block *sb, unsigned int loc, + unsigned int content); +int exfat_count_ext_entries(struct super_block *sb, struct exfat_chain *p_dir, + int entry, struct exfat_dentry *p_entry); +int exfat_chain_cont_cluster(struct super_block *sb, unsigned int chain, + unsigned int len); +int exfat_zeroed_cluster(struct inode *dir, unsigned int clu); +int exfat_find_last_cluster(struct super_block *sb, struct exfat_chain *p_chain, + unsigned int *ret_clu); +int exfat_count_num_clusters(struct super_block *sb, + struct exfat_chain *p_chain, unsigned int *ret_count); + +/* balloc.c */ +int exfat_load_bitmap(struct super_block *sb); +void exfat_free_bitmap(struct exfat_sb_info *sbi); +int exfat_set_bitmap(struct inode *inode, unsigned int clu); +void exfat_clear_bitmap(struct inode *inode, unsigned int clu); +unsigned int exfat_find_free_bitmap(struct super_block *sb, unsigned int clu); +int exfat_count_used_clusters(struct super_block *sb, unsigned int *ret_count); + +/* file.c */ +extern const struct file_operations exfat_file_operations; +int __exfat_truncate(struct inode *inode, loff_t new_size); +void exfat_truncate(struct inode *inode, loff_t size); +int exfat_setattr(struct dentry *dentry, struct iattr *attr); +int exfat_getattr(const struct path *path, struct kstat *stat, + unsigned int request_mask, unsigned int query_flags); + +/* namei.c */ +extern const struct dentry_operations exfat_dentry_ops; +extern const struct dentry_operations exfat_utf8_dentry_ops; + +/* cache.c */ +int exfat_cache_init(void); +void exfat_cache_shutdown(void); +void exfat_cache_init_inode(struct inode *inode); +void exfat_cache_inval_inode(struct inode *inode); +int exfat_get_cluster(struct inode *inode, unsigned int cluster, + unsigned int *fclus, unsigned int *dclus, + unsigned int *last_dclus, int allow_eof); + +/* dir.c */ +extern const struct inode_operations exfat_dir_inode_operations; +extern const struct file_operations exfat_dir_operations; +unsigned int exfat_get_entry_type(struct exfat_dentry *p_entry); +int exfat_init_dir_entry(struct inode *inode, struct exfat_chain *p_dir, + int entry, unsigned int type, unsigned int start_clu, + unsigned long long size); +int exfat_init_ext_entry(struct inode *inode, struct exfat_chain *p_dir, + int entry, int num_entries, struct exfat_uni_name *p_uniname); +int exfat_remove_entries(struct inode *inode, struct exfat_chain *p_dir, + int entry, int order, int num_entries); +int exfat_update_dir_chksum(struct inode *inode, struct exfat_chain *p_dir, + int entry); +int exfat_update_dir_chksum_with_entry_set(struct super_block *sb, + struct exfat_entry_set_cache *es, int sync); +int exfat_calc_num_entries(struct exfat_uni_name *p_uniname); +int exfat_find_dir_entry(struct super_block *sb, struct exfat_inode_info *ei, + struct exfat_chain *p_dir, struct exfat_uni_name *p_uniname, + int num_entries, unsigned int type); +int exfat_alloc_new_dir(struct inode *inode, struct exfat_chain *clu); +int exfat_find_location(struct super_block *sb, struct exfat_chain *p_dir, + int entry, sector_t *sector, int *offset); +struct exfat_dentry *exfat_get_dentry(struct super_block *sb, + struct exfat_chain *p_dir, int entry, struct buffer_head **bh, + sector_t *sector); +struct exfat_entry_set_cache *exfat_get_dentry_set(struct super_block *sb, + struct exfat_chain *p_dir, int entry, unsigned int type, + struct exfat_dentry **file_ep); +int exfat_count_dir_entries(struct super_block *sb, struct exfat_chain *p_dir); + +/* inode.c */ +extern const struct inode_operations exfat_file_inode_operations; +void exfat_sync_inode(struct inode *inode); +struct inode *exfat_build_inode(struct super_block *sb, + struct exfat_dir_entry *info, loff_t i_pos); +void exfat_hash_inode(struct inode *inode, loff_t i_pos); +void exfat_unhash_inode(struct inode *inode); +struct inode *exfat_iget(struct super_block *sb, loff_t i_pos); +int exfat_write_inode(struct inode *inode, struct writeback_control *wbc); +void exfat_evict_inode(struct inode *inode); +int exfat_block_truncate_page(struct inode *inode, loff_t from); + +/* exfat/nls.c */ +unsigned short exfat_toupper(struct super_block *sb, unsigned short a); +int exfat_uniname_ncmp(struct super_block *sb, unsigned short *a, + unsigned short *b, unsigned int len); +int exfat_utf16_to_nls(struct super_block *sb, + struct exfat_uni_name *uniname, unsigned char *p_cstring, + int len); +int exfat_nls_to_utf16(struct super_block *sb, + const unsigned char *p_cstring, const int len, + struct exfat_uni_name *uniname, int *p_lossy); +int exfat_create_upcase_table(struct super_block *sb); +void exfat_free_upcase_table(struct exfat_sb_info *sbi); +unsigned short exfat_high_surrogate(unicode_t u); +unsigned short exfat_low_surrogate(unicode_t u); + +/* exfat/misc.c */ +void __exfat_fs_error(struct super_block *sb, int report, const char *fmt, ...) + __printf(3, 4) __cold; +#define exfat_fs_error(sb, fmt, args...) \ + __exfat_fs_error(sb, 1, fmt, ## args) +#define exfat_fs_error_ratelimit(sb, fmt, args...) \ + __exfat_fs_error(sb, __ratelimit(&EXFAT_SB(sb)->ratelimit), \ + fmt, ## args) +void exfat_msg(struct super_block *sb, const char *lv, const char *fmt, ...) + __printf(3, 4) __cold; +void exfat_get_entry_time(struct exfat_sb_info *sbi, struct timespec64 *ts, + u8 tz, __le16 time, __le16 date, u8 time_ms); +void exfat_set_entry_time(struct exfat_sb_info *sbi, struct timespec64 *ts, + u8 *tz, __le16 *time, __le16 *date, u8 *time_ms); +unsigned short exfat_calc_chksum_2byte(void *data, int len, + unsigned short chksum, int type); +void exfat_update_bh(struct super_block *sb, struct buffer_head *bh, int sync); +void exfat_chain_set(struct exfat_chain *ec, unsigned int dir, + unsigned int size, unsigned char flags); +void exfat_chain_dup(struct exfat_chain *dup, struct exfat_chain *ec); + +#endif /* !_EXFAT_FS_H */ diff --git a/fs/exfat/exfat_raw.h b/fs/exfat/exfat_raw.h new file mode 100644 index 000000000000..2a841010e649 --- /dev/null +++ b/fs/exfat/exfat_raw.h @@ -0,0 +1,184 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. + */ + +#ifndef _EXFAT_RAW_H +#define _EXFAT_RAW_H + +#include <linux/types.h> + +#define PBR_SIGNATURE 0xAA55 + +#define EXFAT_MAX_FILE_LEN 255 + +#define VOL_CLEAN 0x0000 +#define VOL_DIRTY 0x0002 + +#define EXFAT_EOF_CLUSTER 0xFFFFFFFFu +#define EXFAT_BAD_CLUSTER 0xFFFFFFF7u +#define EXFAT_FREE_CLUSTER 0 +/* Cluster 0, 1 are reserved, the first cluster is 2 in the cluster heap. */ +#define EXFAT_RESERVED_CLUSTERS 2 +#define EXFAT_FIRST_CLUSTER 2 +#define EXFAT_DATA_CLUSTER_COUNT(sbi) \ + ((sbi)->num_clusters - EXFAT_RESERVED_CLUSTERS) + +/* AllocationPossible and NoFatChain field in GeneralSecondaryFlags Field */ +#define ALLOC_FAT_CHAIN 0x01 +#define ALLOC_NO_FAT_CHAIN 0x03 + +#define DENTRY_SIZE 32 /* directory entry size */ +#define DENTRY_SIZE_BITS 5 +/* exFAT allows 8388608(256MB) directory entries */ +#define MAX_EXFAT_DENTRIES 8388608 + +/* dentry types */ +#define EXFAT_UNUSED 0x00 /* end of directory */ +#define EXFAT_DELETE (~0x80) +#define IS_EXFAT_DELETED(x) ((x) < 0x80) /* deleted file (0x01~0x7F) */ +#define EXFAT_INVAL 0x80 /* invalid value */ +#define EXFAT_BITMAP 0x81 /* allocation bitmap */ +#define EXFAT_UPCASE 0x82 /* upcase table */ +#define EXFAT_VOLUME 0x83 /* volume label */ +#define EXFAT_FILE 0x85 /* file or dir */ +#define EXFAT_GUID 0xA0 +#define EXFAT_PADDING 0xA1 +#define EXFAT_ACLTAB 0xA2 +#define EXFAT_STREAM 0xC0 /* stream entry */ +#define EXFAT_NAME 0xC1 /* file name entry */ +#define EXFAT_ACL 0xC2 /* stream entry */ + +#define IS_EXFAT_CRITICAL_PRI(x) (x < 0xA0) +#define IS_EXFAT_BENIGN_PRI(x) (x < 0xC0) +#define IS_EXFAT_CRITICAL_SEC(x) (x < 0xE0) + +/* checksum types */ +#define CS_DIR_ENTRY 0 +#define CS_PBR_SECTOR 1 +#define CS_DEFAULT 2 + +/* file attributes */ +#define ATTR_READONLY 0x0001 +#define ATTR_HIDDEN 0x0002 +#define ATTR_SYSTEM 0x0004 +#define ATTR_VOLUME 0x0008 +#define ATTR_SUBDIR 0x0010 +#define ATTR_ARCHIVE 0x0020 + +#define ATTR_RWMASK (ATTR_HIDDEN | ATTR_SYSTEM | ATTR_VOLUME | \ + ATTR_SUBDIR | ATTR_ARCHIVE) + +#define PBR64_JUMP_BOOT_LEN 3 +#define PBR64_OEM_NAME_LEN 8 +#define PBR64_RESERVED_LEN 53 + +#define EXFAT_FILE_NAME_LEN 15 + +/* EXFAT BIOS parameter block (64 bytes) */ +struct bpb64 { + __u8 jmp_boot[PBR64_JUMP_BOOT_LEN]; + __u8 oem_name[PBR64_OEM_NAME_LEN]; + __u8 res_zero[PBR64_RESERVED_LEN]; +} __packed; + +/* EXFAT EXTEND BIOS parameter block (56 bytes) */ +struct bsx64 { + __le64 vol_offset; + __le64 vol_length; + __le32 fat_offset; + __le32 fat_length; + __le32 clu_offset; + __le32 clu_count; + __le32 root_cluster; + __le32 vol_serial; + __u8 fs_version[2]; + __le16 vol_flags; + __u8 sect_size_bits; + __u8 sect_per_clus_bits; + __u8 num_fats; + __u8 phy_drv_no; + __u8 perc_in_use; + __u8 reserved2[7]; +} __packed; + +/* EXFAT PBR[BPB+BSX] (120 bytes) */ +struct pbr64 { + struct bpb64 bpb; + struct bsx64 bsx; +} __packed; + +/* Common PBR[Partition Boot Record] (512 bytes) */ +struct pbr { + union { + __u8 raw[64]; + struct bpb64 f64; + } bpb; + union { + __u8 raw[56]; + struct bsx64 f64; + } bsx; + __u8 boot_code[390]; + __le16 signature; +} __packed; + +struct exfat_dentry { + __u8 type; + union { + struct { + __u8 num_ext; + __le16 checksum; + __le16 attr; + __le16 reserved1; + __le16 create_time; + __le16 create_date; + __le16 modify_time; + __le16 modify_date; + __le16 access_time; + __le16 access_date; + __u8 create_time_ms; + __u8 modify_time_ms; + __u8 create_tz; + __u8 modify_tz; + __u8 access_tz; + __u8 reserved2[7]; + } __packed file; /* file directory entry */ + struct { + __u8 flags; + __u8 reserved1; + __u8 name_len; + __le16 name_hash; + __le16 reserved2; + __le64 valid_size; + __le32 reserved3; + __le32 start_clu; + __le64 size; + } __packed stream; /* stream extension directory entry */ + struct { + __u8 flags; + __le16 unicode_0_14[EXFAT_FILE_NAME_LEN]; + } __packed name; /* file name directory entry */ + struct { + __u8 flags; + __u8 reserved[18]; + __le32 start_clu; + __le64 size; + } __packed bitmap; /* allocation bitmap directory entry */ + struct { + __u8 reserved1[3]; + __le32 checksum; + __u8 reserved2[12]; + __le32 start_clu; + __le64 size; + } __packed upcase; /* up-case table directory entry */ + } __packed dentry; +} __packed; + +#define EXFAT_TZ_VALID (1 << 7) + +/* Jan 1 GMT 00:00:00 1980 */ +#define EXFAT_MIN_TIMESTAMP_SECS 315532800LL +/* Dec 31 GMT 23:59:59 2107 */ +#define EXFAT_MAX_TIMESTAMP_SECS 4354819199LL + +#endif /* !_EXFAT_RAW_H */ diff --git a/fs/exfat/fatent.c b/fs/exfat/fatent.c new file mode 100644 index 000000000000..a855b1769a96 --- /dev/null +++ b/fs/exfat/fatent.c @@ -0,0 +1,463 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. + */ + +#include <linux/slab.h> +#include <asm/unaligned.h> +#include <linux/buffer_head.h> + +#include "exfat_raw.h" +#include "exfat_fs.h" + +static int exfat_mirror_bh(struct super_block *sb, sector_t sec, + struct buffer_head *bh) +{ + struct buffer_head *c_bh; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + sector_t sec2; + int err = 0; + + if (sbi->FAT2_start_sector != sbi->FAT1_start_sector) { + sec2 = sec - sbi->FAT1_start_sector + sbi->FAT2_start_sector; + c_bh = sb_getblk(sb, sec2); + if (!c_bh) + return -ENOMEM; + memcpy(c_bh->b_data, bh->b_data, sb->s_blocksize); + set_buffer_uptodate(c_bh); + mark_buffer_dirty(c_bh); + if (sb->s_flags & SB_SYNCHRONOUS) + err = sync_dirty_buffer(c_bh); + brelse(c_bh); + } + + return err; +} + +static int __exfat_ent_get(struct super_block *sb, unsigned int loc, + unsigned int *content) +{ + unsigned int off; + sector_t sec; + struct buffer_head *bh; + + sec = FAT_ENT_OFFSET_SECTOR(sb, loc); + off = FAT_ENT_OFFSET_BYTE_IN_SECTOR(sb, loc); + + bh = sb_bread(sb, sec); + if (!bh) + return -EIO; + + *content = le32_to_cpu(*(__le32 *)(&bh->b_data[off])); + + /* remap reserved clusters to simplify code */ + if (*content > EXFAT_BAD_CLUSTER) + *content = EXFAT_EOF_CLUSTER; + + brelse(bh); + return 0; +} + +int exfat_ent_set(struct super_block *sb, unsigned int loc, + unsigned int content) +{ + unsigned int off; + sector_t sec; + __le32 *fat_entry; + struct buffer_head *bh; + + sec = FAT_ENT_OFFSET_SECTOR(sb, loc); + off = FAT_ENT_OFFSET_BYTE_IN_SECTOR(sb, loc); + + bh = sb_bread(sb, sec); + if (!bh) + return -EIO; + + fat_entry = (__le32 *)&(bh->b_data[off]); + *fat_entry = cpu_to_le32(content); + exfat_update_bh(sb, bh, sb->s_flags & SB_SYNCHRONOUS); + exfat_mirror_bh(sb, sec, bh); + brelse(bh); + return 0; +} + +static inline bool is_valid_cluster(struct exfat_sb_info *sbi, + unsigned int clus) +{ + if (clus < EXFAT_FIRST_CLUSTER || sbi->num_clusters <= clus) + return false; + return true; +} + +int exfat_ent_get(struct super_block *sb, unsigned int loc, + unsigned int *content) +{ + struct exfat_sb_info *sbi = EXFAT_SB(sb); + int err; + + if (!is_valid_cluster(sbi, loc)) { + exfat_fs_error(sb, "invalid access to FAT (entry 0x%08x)", + loc); + return -EIO; + } + + err = __exfat_ent_get(sb, loc, content); + if (err) { + exfat_fs_error(sb, + "failed to access to FAT (entry 0x%08x, err:%d)", + loc, err); + return err; + } + + if (*content == EXFAT_FREE_CLUSTER) { + exfat_fs_error(sb, + "invalid access to FAT free cluster (entry 0x%08x)", + loc); + return -EIO; + } + + if (*content == EXFAT_BAD_CLUSTER) { + exfat_fs_error(sb, + "invalid access to FAT bad cluster (entry 0x%08x)", + loc); + return -EIO; + } + + if (*content != EXFAT_EOF_CLUSTER && !is_valid_cluster(sbi, *content)) { + exfat_fs_error(sb, + "invalid access to FAT (entry 0x%08x) bogus content (0x%08x)", + loc, *content); + return -EIO; + } + + return 0; +} + +int exfat_chain_cont_cluster(struct super_block *sb, unsigned int chain, + unsigned int len) +{ + if (!len) + return 0; + + while (len > 1) { + if (exfat_ent_set(sb, chain, chain + 1)) + return -EIO; + chain++; + len--; + } + + if (exfat_ent_set(sb, chain, EXFAT_EOF_CLUSTER)) + return -EIO; + return 0; +} + +int exfat_free_cluster(struct inode *inode, struct exfat_chain *p_chain) +{ + unsigned int num_clusters = 0; + unsigned int clu; + struct super_block *sb = inode->i_sb; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + + /* invalid cluster number */ + if (p_chain->dir == EXFAT_FREE_CLUSTER || + p_chain->dir == EXFAT_EOF_CLUSTER || + p_chain->dir < EXFAT_FIRST_CLUSTER) + return 0; + + /* no cluster to truncate */ + if (p_chain->size == 0) + return 0; + + /* check cluster validation */ + if (p_chain->dir < 2 && p_chain->dir >= sbi->num_clusters) { + exfat_msg(sb, KERN_ERR, "invalid start cluster (%u)", + p_chain->dir); + return -EIO; + } + + set_bit(EXFAT_SB_DIRTY, &sbi->s_state); + clu = p_chain->dir; + + if (p_chain->flags == ALLOC_NO_FAT_CHAIN) { + do { + exfat_clear_bitmap(inode, clu); + clu++; + + num_clusters++; + } while (num_clusters < p_chain->size); + } else { + do { + exfat_clear_bitmap(inode, clu); + + if (exfat_get_next_cluster(sb, &clu)) + goto dec_used_clus; + + num_clusters++; + } while (clu != EXFAT_EOF_CLUSTER); + } + +dec_used_clus: + sbi->used_clusters -= num_clusters; + return 0; +} + +int exfat_find_last_cluster(struct super_block *sb, struct exfat_chain *p_chain, + unsigned int *ret_clu) +{ + unsigned int clu, next; + unsigned int count = 0; + + next = p_chain->dir; + if (p_chain->flags == ALLOC_NO_FAT_CHAIN) { + *ret_clu = next + p_chain->size - 1; + return 0; + } + + do { + count++; + clu = next; + if (exfat_ent_get(sb, clu, &next)) + return -EIO; + } while (next != EXFAT_EOF_CLUSTER); + + if (p_chain->size != count) { + exfat_fs_error(sb, + "bogus directory size (clus : ondisk(%d) != counted(%d))", + p_chain->size, count); + return -EIO; + } + + *ret_clu = clu; + return 0; +} + +static inline int exfat_sync_bhs(struct buffer_head **bhs, int nr_bhs) +{ + int i, err = 0; + + for (i = 0; i < nr_bhs; i++) + write_dirty_buffer(bhs[i], 0); + + for (i = 0; i < nr_bhs; i++) { + wait_on_buffer(bhs[i]); + if (!err && !buffer_uptodate(bhs[i])) + err = -EIO; + } + return err; +} + +int exfat_zeroed_cluster(struct inode *dir, unsigned int clu) +{ + struct super_block *sb = dir->i_sb; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + struct buffer_head *bhs[MAX_BUF_PER_PAGE]; + int nr_bhs = MAX_BUF_PER_PAGE; + sector_t blknr, last_blknr; + int err, i, n; + + blknr = exfat_cluster_to_sector(sbi, clu); + last_blknr = blknr + sbi->sect_per_clus; + + if (last_blknr > sbi->num_sectors && sbi->num_sectors > 0) { + exfat_fs_error_ratelimit(sb, + "%s: out of range(sect:%llu len:%u)", + __func__, (unsigned long long)blknr, + sbi->sect_per_clus); + return -EIO; + } + + /* Zeroing the unused blocks on this cluster */ + n = 0; + while (blknr < last_blknr) { + bhs[n] = sb_getblk(sb, blknr); + if (!bhs[n]) { + err = -ENOMEM; + goto release_bhs; + } + memset(bhs[n]->b_data, 0, sb->s_blocksize); + exfat_update_bh(sb, bhs[n], 0); + + n++; + blknr++; + + if (n == nr_bhs) { + if (IS_DIRSYNC(dir)) { + err = exfat_sync_bhs(bhs, n); + if (err) + goto release_bhs; + } + + for (i = 0; i < n; i++) + brelse(bhs[i]); + n = 0; + } + } + + if (IS_DIRSYNC(dir)) { + err = exfat_sync_bhs(bhs, n); + if (err) + goto release_bhs; + } + + for (i = 0; i < n; i++) + brelse(bhs[i]); + + return 0; + +release_bhs: + exfat_msg(sb, KERN_ERR, "failed zeroed sect %llu\n", + (unsigned long long)blknr); + for (i = 0; i < n; i++) + bforget(bhs[i]); + return err; +} + +int exfat_alloc_cluster(struct inode *inode, unsigned int num_alloc, + struct exfat_chain *p_chain) +{ + int ret = -ENOSPC; + unsigned int num_clusters = 0, total_cnt; + unsigned int hint_clu, new_clu, last_clu = EXFAT_EOF_CLUSTER; + struct super_block *sb = inode->i_sb; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + + total_cnt = EXFAT_DATA_CLUSTER_COUNT(sbi); + + if (unlikely(total_cnt < sbi->used_clusters)) { + exfat_fs_error_ratelimit(sb, + "%s: invalid used clusters(t:%u,u:%u)\n", + __func__, total_cnt, sbi->used_clusters); + return -EIO; + } + + if (num_alloc > total_cnt - sbi->used_clusters) + return -ENOSPC; + + hint_clu = p_chain->dir; + /* find new cluster */ + if (hint_clu == EXFAT_EOF_CLUSTER) { + if (sbi->clu_srch_ptr < EXFAT_FIRST_CLUSTER) { + exfat_msg(sb, KERN_ERR, + "sbi->clu_srch_ptr is invalid (%u)\n", + sbi->clu_srch_ptr); + sbi->clu_srch_ptr = EXFAT_FIRST_CLUSTER; + } + + hint_clu = exfat_find_free_bitmap(sb, sbi->clu_srch_ptr); + if (hint_clu == EXFAT_EOF_CLUSTER) + return -ENOSPC; + } + + /* check cluster validation */ + if (hint_clu < EXFAT_FIRST_CLUSTER && hint_clu >= sbi->num_clusters) { + exfat_msg(sb, KERN_ERR, "hint_cluster is invalid (%u)\n", + hint_clu); + hint_clu = EXFAT_FIRST_CLUSTER; + if (p_chain->flags == ALLOC_NO_FAT_CHAIN) { + if (exfat_chain_cont_cluster(sb, p_chain->dir, + num_clusters)) + return -EIO; + p_chain->flags = ALLOC_FAT_CHAIN; + } + } + + set_bit(EXFAT_SB_DIRTY, &sbi->s_state); + + p_chain->dir = EXFAT_EOF_CLUSTER; + + while ((new_clu = exfat_find_free_bitmap(sb, hint_clu)) != + EXFAT_EOF_CLUSTER) { + if (new_clu != hint_clu && + p_chain->flags == ALLOC_NO_FAT_CHAIN) { + if (exfat_chain_cont_cluster(sb, p_chain->dir, + num_clusters)) { + ret = -EIO; + goto free_cluster; + } + p_chain->flags = ALLOC_FAT_CHAIN; + } + + /* update allocation bitmap */ + if (exfat_set_bitmap(inode, new_clu)) { + ret = -EIO; + goto free_cluster; + } + + num_clusters++; + + /* update FAT table */ + if (p_chain->flags == ALLOC_FAT_CHAIN) { + if (exfat_ent_set(sb, new_clu, EXFAT_EOF_CLUSTER)) { + ret = -EIO; + goto free_cluster; + } + } + + if (p_chain->dir == EXFAT_EOF_CLUSTER) { + p_chain->dir = new_clu; + } else if (p_chain->flags == ALLOC_FAT_CHAIN) { + if (exfat_ent_set(sb, last_clu, new_clu)) { + ret = -EIO; + goto free_cluster; + } + } + last_clu = new_clu; + + if (--num_alloc == 0) { + sbi->clu_srch_ptr = hint_clu; + sbi->used_clusters += num_clusters; + + p_chain->size += num_clusters; + return 0; + } + + hint_clu = new_clu + 1; + if (hint_clu >= sbi->num_clusters) { + hint_clu = EXFAT_FIRST_CLUSTER; + + if (p_chain->flags == ALLOC_NO_FAT_CHAIN) { + if (exfat_chain_cont_cluster(sb, p_chain->dir, + num_clusters)) { + ret = -EIO; + goto free_cluster; + } + p_chain->flags = ALLOC_FAT_CHAIN; + } + } + } +free_cluster: + if (num_clusters) + exfat_free_cluster(inode, p_chain); + return ret; +} + +int exfat_count_num_clusters(struct super_block *sb, + struct exfat_chain *p_chain, unsigned int *ret_count) +{ + unsigned int i, count; + unsigned int clu; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + + if (!p_chain->dir || p_chain->dir == EXFAT_EOF_CLUSTER) { + *ret_count = 0; + return 0; + } + + if (p_chain->flags == ALLOC_NO_FAT_CHAIN) { + *ret_count = p_chain->size; + return 0; + } + + clu = p_chain->dir; + count = 0; + for (i = EXFAT_FIRST_CLUSTER; i < sbi->num_clusters; i++) { + count++; + if (exfat_ent_get(sb, clu, &clu)) + return -EIO; + if (clu == EXFAT_EOF_CLUSTER) + break; + } + + *ret_count = count; + return 0; +} diff --git a/fs/exfat/file.c b/fs/exfat/file.c new file mode 100644 index 000000000000..483f683757aa --- /dev/null +++ b/fs/exfat/file.c @@ -0,0 +1,360 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. + */ + +#include <linux/slab.h> +#include <linux/cred.h> +#include <linux/buffer_head.h> + +#include "exfat_raw.h" +#include "exfat_fs.h" + +static int exfat_cont_expand(struct inode *inode, loff_t size) +{ + struct address_space *mapping = inode->i_mapping; + loff_t start = i_size_read(inode), count = size - i_size_read(inode); + int err, err2; + + err = generic_cont_expand_simple(inode, size); + if (err) + return err; + + inode->i_ctime = inode->i_mtime = current_time(inode); + mark_inode_dirty(inode); + + if (!IS_SYNC(inode)) + return 0; + + err = filemap_fdatawrite_range(mapping, start, start + count - 1); + err2 = sync_mapping_buffers(mapping); + if (!err) + err = err2; + err2 = write_inode_now(inode, 1); + if (!err) + err = err2; + if (err) + return err; + + return filemap_fdatawait_range(mapping, start, start + count - 1); +} + +static bool exfat_allow_set_time(struct exfat_sb_info *sbi, struct inode *inode) +{ + mode_t allow_utime = sbi->options.allow_utime; + + if (!uid_eq(current_fsuid(), inode->i_uid)) { + if (in_group_p(inode->i_gid)) + allow_utime >>= 3; + if (allow_utime & MAY_WRITE) + return true; + } + + /* use a default check */ + return false; +} + +static int exfat_sanitize_mode(const struct exfat_sb_info *sbi, + struct inode *inode, umode_t *mode_ptr) +{ + mode_t i_mode, mask, perm; + + i_mode = inode->i_mode; + + mask = (S_ISREG(i_mode) || S_ISLNK(i_mode)) ? + sbi->options.fs_fmask : sbi->options.fs_dmask; + perm = *mode_ptr & ~(S_IFMT | mask); + + /* Of the r and x bits, all (subject to umask) must be present.*/ + if ((perm & 0555) != (i_mode & 0555)) + return -EPERM; + + if (exfat_mode_can_hold_ro(inode)) { + /* + * Of the w bits, either all (subject to umask) or none must + * be present. + */ + if ((perm & 0222) && ((perm & 0222) != (0222 & ~mask))) + return -EPERM; + } else { + /* + * If exfat_mode_can_hold_ro(inode) is false, can't change + * w bits. + */ + if ((perm & 0222) != (0222 & ~mask)) + return -EPERM; + } + + *mode_ptr &= S_IFMT | perm; + + return 0; +} + +/* resize the file length */ +int __exfat_truncate(struct inode *inode, loff_t new_size) +{ + unsigned int num_clusters_new, num_clusters_phys; + unsigned int last_clu = EXFAT_FREE_CLUSTER; + struct exfat_chain clu; + struct exfat_dentry *ep, *ep2; + struct super_block *sb = inode->i_sb; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + struct exfat_inode_info *ei = EXFAT_I(inode); + struct exfat_entry_set_cache *es = NULL; + int evict = (ei->dir.dir == DIR_DELETED) ? 1 : 0; + + /* check if the given file ID is opened */ + if (ei->type != TYPE_FILE && ei->type != TYPE_DIR) + return -EPERM; + + exfat_set_vol_flags(sb, VOL_DIRTY); + + num_clusters_new = EXFAT_B_TO_CLU_ROUND_UP(i_size_read(inode), sbi); + num_clusters_phys = + EXFAT_B_TO_CLU_ROUND_UP(EXFAT_I(inode)->i_size_ondisk, sbi); + + exfat_chain_set(&clu, ei->start_clu, num_clusters_phys, ei->flags); + + if (new_size > 0) { + /* + * Truncate FAT chain num_clusters after the first cluster + * num_clusters = min(new, phys); + */ + unsigned int num_clusters = + min(num_clusters_new, num_clusters_phys); + + /* + * Follow FAT chain + * (defensive coding - works fine even with corrupted FAT table + */ + if (clu.flags == ALLOC_NO_FAT_CHAIN) { + clu.dir += num_clusters; + clu.size -= num_clusters; + } else { + while (num_clusters > 0) { + last_clu = clu.dir; + if (exfat_get_next_cluster(sb, &(clu.dir))) + return -EIO; + + num_clusters--; + clu.size--; + } + } + } else { + ei->flags = ALLOC_NO_FAT_CHAIN; + ei->start_clu = EXFAT_EOF_CLUSTER; + } + + i_size_write(inode, new_size); + + if (ei->type == TYPE_FILE) + ei->attr |= ATTR_ARCHIVE; + + /* update the directory entry */ + if (!evict) { + struct timespec64 ts; + + es = exfat_get_dentry_set(sb, &(ei->dir), ei->entry, + ES_ALL_ENTRIES, &ep); + if (!es) + return -EIO; + ep2 = ep + 1; + + ts = current_time(inode); + exfat_set_entry_time(sbi, &ts, + &ep->dentry.file.modify_tz, + &ep->dentry.file.modify_time, + &ep->dentry.file.modify_date, + &ep->dentry.file.modify_time_ms); + ep->dentry.file.attr = cpu_to_le16(ei->attr); + + /* File size should be zero if there is no cluster allocated */ + if (ei->start_clu == EXFAT_EOF_CLUSTER) { + ep->dentry.stream.valid_size = 0; + ep->dentry.stream.size = 0; + } else { + ep->dentry.stream.valid_size = cpu_to_le64(new_size); + ep->dentry.stream.size = ep->dentry.stream.valid_size; + } + + if (new_size == 0) { + /* Any directory can not be truncated to zero */ + WARN_ON(ei->type != TYPE_FILE); + + ep2->dentry.stream.flags = ALLOC_FAT_CHAIN; + ep2->dentry.stream.start_clu = EXFAT_FREE_CLUSTER; + } + + if (exfat_update_dir_chksum_with_entry_set(sb, es, + inode_needs_sync(inode))) + return -EIO; + kfree(es); + } + + /* cut off from the FAT chain */ + if (ei->flags == ALLOC_FAT_CHAIN && last_clu != EXFAT_FREE_CLUSTER && + last_clu != EXFAT_EOF_CLUSTER) { + if (exfat_ent_set(sb, last_clu, EXFAT_EOF_CLUSTER)) + return -EIO; + } + + /* invalidate cache and free the clusters */ + /* clear exfat cache */ + exfat_cache_inval_inode(inode); + + /* hint information */ + ei->hint_bmap.off = EXFAT_EOF_CLUSTER; + ei->hint_bmap.clu = EXFAT_EOF_CLUSTER; + if (ei->rwoffset > new_size) + ei->rwoffset = new_size; + + /* hint_stat will be used if this is directory. */ + ei->hint_stat.eidx = 0; + ei->hint_stat.clu = ei->start_clu; + ei->hint_femp.eidx = EXFAT_HINT_NONE; + + /* free the clusters */ + if (exfat_free_cluster(inode, &clu)) + return -EIO; + + exfat_set_vol_flags(sb, VOL_CLEAN); + + return 0; +} + +void exfat_truncate(struct inode *inode, loff_t size) +{ + struct super_block *sb = inode->i_sb; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + unsigned int blocksize = 1 << inode->i_blkbits; + loff_t aligned_size; + int err; + + mutex_lock(&sbi->s_lock); + if (EXFAT_I(inode)->start_clu == 0) { + /* + * Empty start_clu != ~0 (not allocated) + */ + exfat_fs_error(sb, "tried to truncate zeroed cluster."); + goto write_size; + } + + err = __exfat_truncate(inode, i_size_read(inode)); + if (err) + goto write_size; + + inode->i_ctime = inode->i_mtime = current_time(inode); + if (IS_DIRSYNC(inode)) + exfat_sync_inode(inode); + else + mark_inode_dirty(inode); + + inode->i_blocks = ((i_size_read(inode) + (sbi->cluster_size - 1)) & + ~(sbi->cluster_size - 1)) >> inode->i_blkbits; +write_size: + aligned_size = i_size_read(inode); + if (aligned_size & (blocksize - 1)) { + aligned_size |= (blocksize - 1); + aligned_size++; + } + + if (EXFAT_I(inode)->i_size_ondisk > i_size_read(inode)) + EXFAT_I(inode)->i_size_ondisk = aligned_size; + + if (EXFAT_I(inode)->i_size_aligned > i_size_read(inode)) + EXFAT_I(inode)->i_size_aligned = aligned_size; + mutex_unlock(&sbi->s_lock); +} + +int exfat_getattr(const struct path *path, struct kstat *stat, + unsigned int request_mask, unsigned int query_flags) +{ + struct inode *inode = d_backing_inode(path->dentry); + struct exfat_inode_info *ei = EXFAT_I(inode); + + generic_fillattr(inode, stat); + stat->result_mask |= STATX_BTIME; + stat->btime.tv_sec = ei->i_crtime.tv_sec; + stat->btime.tv_nsec = ei->i_crtime.tv_nsec; + stat->blksize = EXFAT_SB(inode->i_sb)->cluster_size; + return 0; +} + +int exfat_setattr(struct dentry *dentry, struct iattr *attr) +{ + struct exfat_sb_info *sbi = EXFAT_SB(dentry->d_sb); + struct inode *inode = dentry->d_inode; + unsigned int ia_valid; + int error; + + if ((attr->ia_valid & ATTR_SIZE) && + attr->ia_size > i_size_read(inode)) { + error = exfat_cont_expand(inode, attr->ia_size); + if (error || attr->ia_valid == ATTR_SIZE) + return error; + attr->ia_valid &= ~ATTR_SIZE; + } + + /* Check for setting the inode time. */ + ia_valid = attr->ia_valid; + if ((ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)) && + exfat_allow_set_time(sbi, inode)) { + attr->ia_valid &= ~(ATTR_MTIME_SET | ATTR_ATIME_SET | + ATTR_TIMES_SET); + } + + error = setattr_prepare(dentry, attr); + attr->ia_valid = ia_valid; + if (error) + goto out; + + if (((attr->ia_valid & ATTR_UID) && + !uid_eq(attr->ia_uid, sbi->options.fs_uid)) || + ((attr->ia_valid & ATTR_GID) && + !gid_eq(attr->ia_gid, sbi->options.fs_gid)) || + ((attr->ia_valid & ATTR_MODE) && + (attr->ia_mode & ~(S_IFREG | S_IFLNK | S_IFDIR | 0777)))) { + error = -EPERM; + goto out; + } + + /* + * We don't return -EPERM here. Yes, strange, but this is too + * old behavior. + */ + if (attr->ia_valid & ATTR_MODE) { + if (exfat_sanitize_mode(sbi, inode, &attr->ia_mode) < 0) + attr->ia_valid &= ~ATTR_MODE; + } + + if (attr->ia_valid & ATTR_SIZE) { + error = exfat_block_truncate_page(inode, attr->ia_size); + if (error) + goto out; + + down_write(&EXFAT_I(inode)->truncate_lock); + truncate_setsize(inode, attr->ia_size); + exfat_truncate(inode, attr->ia_size); + up_write(&EXFAT_I(inode)->truncate_lock); + } + + setattr_copy(inode, attr); + mark_inode_dirty(inode); + +out: + return error; +} + +const struct file_operations exfat_file_operations = { + .llseek = generic_file_llseek, + .read_iter = generic_file_read_iter, + .write_iter = generic_file_write_iter, + .mmap = generic_file_mmap, + .fsync = generic_file_fsync, + .splice_read = generic_file_splice_read, +}; + +const struct inode_operations exfat_file_inode_operations = { + .setattr = exfat_setattr, + .getattr = exfat_getattr, +}; diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c new file mode 100644 index 000000000000..06887492f54b --- /dev/null +++ b/fs/exfat/inode.c @@ -0,0 +1,671 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. + */ + +#include <linux/init.h> +#include <linux/buffer_head.h> +#include <linux/mpage.h> +#include <linux/bio.h> +#include <linux/blkdev.h> +#include <linux/time.h> +#include <linux/writeback.h> +#include <linux/uio.h> +#include <linux/random.h> +#include <linux/iversion.h> + +#include "exfat_raw.h" +#include "exfat_fs.h" + +static int __exfat_write_inode(struct inode *inode, int sync) +{ + int ret = -EIO; + unsigned long long on_disk_size; + struct exfat_dentry *ep, *ep2; + struct exfat_entry_set_cache *es = NULL; + struct super_block *sb = inode->i_sb; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + struct exfat_inode_info *ei = EXFAT_I(inode); + bool is_dir = (ei->type == TYPE_DIR) ? true : false; + + if (inode->i_ino == EXFAT_ROOT_INO) + return 0; + + /* + * If the indode is already unlinked, there is no need for updating it. + */ + if (ei->dir.dir == DIR_DELETED) + return 0; + + if (is_dir && ei->dir.dir == sbi->root_dir && ei->entry == -1) + return 0; + + exfat_set_vol_flags(sb, VOL_DIRTY); + + /* get the directory entry of given file or directory */ + es = exfat_get_dentry_set(sb, &(ei->dir), ei->entry, ES_ALL_ENTRIES, + &ep); + if (!es) + return -EIO; + ep2 = ep + 1; + + ep->dentry.file.attr = cpu_to_le16(exfat_make_attr(inode)); + + /* set FILE_INFO structure using the acquired struct exfat_dentry */ + exfat_set_entry_time(sbi, &ei->i_crtime, + &ep->dentry.file.create_tz, + &ep->dentry.file.create_time, + &ep->dentry.file.create_date, + &ep->dentry.file.create_time_ms); + exfat_set_entry_time(sbi, &inode->i_mtime, + &ep->dentry.file.modify_tz, + &ep->dentry.file.modify_time, + &ep->dentry.file.modify_date, + &ep->dentry.file.modify_time_ms); + exfat_set_entry_time(sbi, &inode->i_atime, + &ep->dentry.file.access_tz, + &ep->dentry.file.access_time, + &ep->dentry.file.access_date, + NULL); + + /* File size should be zero if there is no cluster allocated */ + on_disk_size = i_size_read(inode); + + if (ei->start_clu == EXFAT_EOF_CLUSTER) + on_disk_size = 0; + + ep2->dentry.stream.valid_size = cpu_to_le64(on_disk_size); + ep2->dentry.stream.size = ep2->dentry.stream.valid_size; + + ret = exfat_update_dir_chksum_with_entry_set(sb, es, sync); + kfree(es); + return ret; +} + +int exfat_write_inode(struct inode *inode, struct writeback_control *wbc) +{ + int ret; + + mutex_lock(&EXFAT_SB(inode->i_sb)->s_lock); + ret = __exfat_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL); + mutex_unlock(&EXFAT_SB(inode->i_sb)->s_lock); + + return ret; +} + +void exfat_sync_inode(struct inode *inode) +{ + lockdep_assert_held(&EXFAT_SB(inode->i_sb)->s_lock); + __exfat_write_inode(inode, 1); +} + +/* + * Input: inode, (logical) clu_offset, target allocation area + * Output: errcode, cluster number + * *clu = (~0), if it's unable to allocate a new cluster + */ +static int exfat_map_cluster(struct inode *inode, unsigned int clu_offset, + unsigned int *clu, int create) +{ + int ret, modified = false; + unsigned int last_clu; + struct exfat_chain new_clu; + struct exfat_dentry *ep; + struct exfat_entry_set_cache *es = NULL; + struct super_block *sb = inode->i_sb; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + struct exfat_inode_info *ei = EXFAT_I(inode); + unsigned int local_clu_offset = clu_offset; + unsigned int num_to_be_allocated = 0, num_clusters = 0; + + ei->rwoffset = EXFAT_CLU_TO_B(clu_offset, sbi); + + if (EXFAT_I(inode)->i_size_ondisk > 0) + num_clusters = + EXFAT_B_TO_CLU_ROUND_UP(EXFAT_I(inode)->i_size_ondisk, + sbi); + + if (clu_offset >= num_clusters) + num_to_be_allocated = clu_offset - num_clusters + 1; + + if (!create && (num_to_be_allocated > 0)) { + *clu = EXFAT_EOF_CLUSTER; + return 0; + } + + *clu = last_clu = ei->start_clu; + + if (ei->flags == ALLOC_NO_FAT_CHAIN) { + if (clu_offset > 0 && *clu != EXFAT_EOF_CLUSTER) { + last_clu += clu_offset - 1; + + if (clu_offset == num_clusters) + *clu = EXFAT_EOF_CLUSTER; + else + *clu += clu_offset; + } + } else if (ei->type == TYPE_FILE) { + unsigned int fclus = 0; + int err = exfat_get_cluster(inode, clu_offset, + &fclus, clu, &last_clu, 1); + if (err) + return -EIO; + + clu_offset -= fclus; + } else { + /* hint information */ + if (clu_offset > 0 && ei->hint_bmap.off != EXFAT_EOF_CLUSTER && + ei->hint_bmap.off > 0 && clu_offset >= ei->hint_bmap.off) { + clu_offset -= ei->hint_bmap.off; + /* hint_bmap.clu should be valid */ + WARN_ON(ei->hint_bmap.clu < 2); + *clu = ei->hint_bmap.clu; + } + + while (clu_offset > 0 && *clu != EXFAT_EOF_CLUSTER) { + last_clu = *clu; + if (exfat_get_next_cluster(sb, clu)) + return -EIO; + clu_offset--; + } + } + + if (*clu == EXFAT_EOF_CLUSTER) { + exfat_set_vol_flags(sb, VOL_DIRTY); + + new_clu.dir = (last_clu == EXFAT_EOF_CLUSTER) ? + EXFAT_EOF_CLUSTER : last_clu + 1; + new_clu.size = 0; + new_clu.flags = ei->flags; + + /* allocate a cluster */ + if (num_to_be_allocated < 1) { + /* Broken FAT (i_sze > allocated FAT) */ + exfat_fs_error(sb, "broken FAT chain."); + return -EIO; + } + + ret = exfat_alloc_cluster(inode, num_to_be_allocated, &new_clu); + if (ret) + return ret; + + if (new_clu.dir == EXFAT_EOF_CLUSTER || + new_clu.dir == EXFAT_FREE_CLUSTER) { + exfat_fs_error(sb, + "bogus cluster new allocated (last_clu : %u, new_clu : %u)", + last_clu, new_clu.dir); + return -EIO; + } + + /* append to the FAT chain */ + if (last_clu == EXFAT_EOF_CLUSTER) { + if (new_clu.flags == ALLOC_FAT_CHAIN) + ei->flags = ALLOC_FAT_CHAIN; + ei->start_clu = new_clu.dir; + modified = true; + } else { + if (new_clu.flags != ei->flags) { + /* no-fat-chain bit is disabled, + * so fat-chain should be synced with + * alloc-bitmap + */ + exfat_chain_cont_cluster(sb, ei->start_clu, + num_clusters); + ei->flags = ALLOC_FAT_CHAIN; + modified = true; + } + if (new_clu.flags == ALLOC_FAT_CHAIN) + if (exfat_ent_set(sb, last_clu, new_clu.dir)) + return -EIO; + } + + num_clusters += num_to_be_allocated; + *clu = new_clu.dir; + + if (ei->dir.dir != DIR_DELETED) { + es = exfat_get_dentry_set(sb, &(ei->dir), ei->entry, + ES_ALL_ENTRIES, &ep); + if (!es) + return -EIO; + /* get stream entry */ + ep++; + + /* update directory entry */ + if (modified) { + if (ep->dentry.stream.flags != ei->flags) + ep->dentry.stream.flags = ei->flags; + + if (le32_to_cpu(ep->dentry.stream.start_clu) != + ei->start_clu) + ep->dentry.stream.start_clu = + cpu_to_le32(ei->start_clu); + + ep->dentry.stream.valid_size = + cpu_to_le64(i_size_read(inode)); + ep->dentry.stream.size = + ep->dentry.stream.valid_size; + } + + if (exfat_update_dir_chksum_with_entry_set(sb, es, + inode_needs_sync(inode))) + return -EIO; + kfree(es); + + } /* end of if != DIR_DELETED */ + + inode->i_blocks += + num_to_be_allocated << sbi->sect_per_clus_bits; + + /* + * Move *clu pointer along FAT chains (hole care) because the + * caller of this function expect *clu to be the last cluster. + * This only works when num_to_be_allocated >= 2, + * *clu = (the first cluster of the allocated chain) => + * (the last cluster of ...) + */ + if (ei->flags == ALLOC_NO_FAT_CHAIN) { + *clu += num_to_be_allocated - 1; + } else { + while (num_to_be_allocated > 1) { + if (exfat_get_next_cluster(sb, clu)) + return -EIO; + num_to_be_allocated--; + } + } + + } + + /* hint information */ + ei->hint_bmap.off = local_clu_offset; + ei->hint_bmap.clu = *clu; + + return 0; +} + +static int exfat_map_new_buffer(struct exfat_inode_info *ei, + struct buffer_head *bh, loff_t pos) +{ + if (buffer_delay(bh) && pos > ei->i_size_aligned) + return -EIO; + set_buffer_new(bh); + + /* + * Adjust i_size_aligned if i_size_ondisk is bigger than it. + */ + if (ei->i_size_ondisk > ei->i_size_aligned) + ei->i_size_aligned = ei->i_size_ondisk; + return 0; +} + +static int exfat_get_block(struct inode *inode, sector_t iblock, + struct buffer_head *bh_result, int create) +{ + struct exfat_inode_info *ei = EXFAT_I(inode); + struct super_block *sb = inode->i_sb; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + unsigned long max_blocks = bh_result->b_size >> inode->i_blkbits; + int err = 0; + unsigned long mapped_blocks = 0; + unsigned int cluster, sec_offset; + sector_t last_block; + sector_t phys = 0; + loff_t pos; + + mutex_lock(&sbi->s_lock); + last_block = EXFAT_B_TO_BLK_ROUND_UP(i_size_read(inode), sb); + if (iblock >= last_block && !create) + goto done; + + /* Is this block already allocated? */ + err = exfat_map_cluster(inode, iblock >> sbi->sect_per_clus_bits, + &cluster, create); + if (err) { + if (err != -ENOSPC) + exfat_fs_error_ratelimit(sb, + "failed to bmap (inode : %p iblock : %llu, err : %d)", + inode, (unsigned long long)iblock, err); + goto unlock_ret; + } + + if (cluster == EXFAT_EOF_CLUSTER) + goto done; + + /* sector offset in cluster */ + sec_offset = iblock & (sbi->sect_per_clus - 1); + + phys = exfat_cluster_to_sector(sbi, cluster) + sec_offset; + mapped_blocks = sbi->sect_per_clus - sec_offset; + max_blocks = min(mapped_blocks, max_blocks); + + /* Treat newly added block / cluster */ + if (iblock < last_block) + create = 0; + + if (create || buffer_delay(bh_result)) { + pos = EXFAT_BLK_TO_B((iblock + 1), sb); + if (ei->i_size_ondisk < pos) + ei->i_size_ondisk = pos; + } + + if (create) { + err = exfat_map_new_buffer(ei, bh_result, pos); + if (err) { + exfat_fs_error(sb, + "requested for bmap out of range(pos : (%llu) > i_size_aligned(%llu)\n", + pos, ei->i_size_aligned); + goto unlock_ret; + } + } + + if (buffer_delay(bh_result)) + clear_buffer_delay(bh_result); + map_bh(bh_result, sb, phys); +done: + bh_result->b_size = EXFAT_BLK_TO_B(max_blocks, sb); +unlock_ret: + mutex_unlock(&sbi->s_lock); + return err; +} + +static int exfat_readpage(struct file *file, struct page *page) +{ + return mpage_readpage(page, exfat_get_block); +} + +static int exfat_readpages(struct file *file, struct address_space *mapping, + struct list_head *pages, unsigned int nr_pages) +{ + return mpage_readpages(mapping, pages, nr_pages, exfat_get_block); +} + +static int exfat_writepage(struct page *page, struct writeback_control *wbc) +{ + return block_write_full_page(page, exfat_get_block, wbc); +} + +static int exfat_writepages(struct address_space *mapping, + struct writeback_control *wbc) +{ + return mpage_writepages(mapping, wbc, exfat_get_block); +} + +static void exfat_write_failed(struct address_space *mapping, loff_t to) +{ + struct inode *inode = mapping->host; + + if (to > i_size_read(inode)) { + truncate_pagecache(inode, i_size_read(inode)); + exfat_truncate(inode, EXFAT_I(inode)->i_size_aligned); + } +} + +static int exfat_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned int len, unsigned int flags, + struct page **pagep, void **fsdata) +{ + int ret; + + *pagep = NULL; + ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + exfat_get_block, + &EXFAT_I(mapping->host)->i_size_ondisk); + + if (ret < 0) + exfat_write_failed(mapping, pos+len); + + return ret; +} + +static int exfat_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned int len, unsigned int copied, + struct page *pagep, void *fsdata) +{ + struct inode *inode = mapping->host; + struct exfat_inode_info *ei = EXFAT_I(inode); + int err; + + err = generic_write_end(file, mapping, pos, len, copied, pagep, fsdata); + + if (EXFAT_I(inode)->i_size_aligned < i_size_read(inode)) { + exfat_fs_error(inode->i_sb, + "invalid size(size(%llu) > aligned(%llu)\n", + i_size_read(inode), EXFAT_I(inode)->i_size_aligned); + return -EIO; + } + + if (err < len) + exfat_write_failed(mapping, pos+len); + + if (!(err < 0) && !(ei->attr & ATTR_ARCHIVE)) { + inode->i_mtime = inode->i_ctime = current_time(inode); + ei->attr |= ATTR_ARCHIVE; + mark_inode_dirty(inode); + } + + return err; +} + +static ssize_t exfat_direct_IO(struct kiocb *iocb, struct iov_iter *iter) +{ + struct address_space *mapping = iocb->ki_filp->f_mapping; + struct inode *inode = mapping->host; + loff_t size = iocb->ki_pos + iov_iter_count(iter); + int rw = iov_iter_rw(iter); + ssize_t ret; + + if (rw == WRITE) { + /* + * FIXME: blockdev_direct_IO() doesn't use ->write_begin(), + * so we need to update the ->i_size_aligned to block boundary. + * + * But we must fill the remaining area or hole by nul for + * updating ->i_size_aligned + * + * Return 0, and fallback to normal buffered write. + */ + if (EXFAT_I(inode)->i_size_aligned < size) + return 0; + } + + /* + * Need to use the DIO_LOCKING for avoiding the race + * condition of exfat_get_block() and ->truncate(). + */ + ret = blockdev_direct_IO(iocb, inode, iter, exfat_get_block); + if (ret < 0 && (rw & WRITE)) + exfat_write_failed(mapping, size); + return ret; +} + +static sector_t exfat_aop_bmap(struct address_space *mapping, sector_t block) +{ + sector_t blocknr; + + /* exfat_get_cluster() assumes the requested blocknr isn't truncated. */ + down_read(&EXFAT_I(mapping->host)->truncate_lock); + blocknr = generic_block_bmap(mapping, block, exfat_get_block); + up_read(&EXFAT_I(mapping->host)->truncate_lock); + return blocknr; +} + +/* + * exfat_block_truncate_page() zeroes out a mapping from file offset `from' + * up to the end of the block which corresponds to `from'. + * This is required during truncate to physically zeroout the tail end + * of that block so it doesn't yield old data if the file is later grown. + * Also, avoid causing failure from fsx for cases of "data past EOF" + */ +int exfat_block_truncate_page(struct inode *inode, loff_t from) +{ + return block_truncate_page(inode->i_mapping, from, exfat_get_block); +} + +static const struct address_space_operations exfat_aops = { + .readpage = exfat_readpage, + .readpages = exfat_readpages, + .writepage = exfat_writepage, + .writepages = exfat_writepages, + .write_begin = exfat_write_begin, + .write_end = exfat_write_end, + .direct_IO = exfat_direct_IO, + .bmap = exfat_aop_bmap +}; + +static inline unsigned long exfat_hash(loff_t i_pos) +{ + return hash_32(i_pos, EXFAT_HASH_BITS); +} + +void exfat_hash_inode(struct inode *inode, loff_t i_pos) +{ + struct exfat_sb_info *sbi = EXFAT_SB(inode->i_sb); + struct hlist_head *head = sbi->inode_hashtable + exfat_hash(i_pos); + + spin_lock(&sbi->inode_hash_lock); + EXFAT_I(inode)->i_pos = i_pos; + hlist_add_head(&EXFAT_I(inode)->i_hash_fat, head); + spin_unlock(&sbi->inode_hash_lock); +} + +void exfat_unhash_inode(struct inode *inode) +{ + struct exfat_sb_info *sbi = EXFAT_SB(inode->i_sb); + + spin_lock(&sbi->inode_hash_lock); + hlist_del_init(&EXFAT_I(inode)->i_hash_fat); + EXFAT_I(inode)->i_pos = 0; + spin_unlock(&sbi->inode_hash_lock); +} + +struct inode *exfat_iget(struct super_block *sb, loff_t i_pos) +{ + struct exfat_sb_info *sbi = EXFAT_SB(sb); + struct exfat_inode_info *info; + struct hlist_head *head = sbi->inode_hashtable + exfat_hash(i_pos); + struct inode *inode = NULL; + + spin_lock(&sbi->inode_hash_lock); + hlist_for_each_entry(info, head, i_hash_fat) { + WARN_ON(info->vfs_inode.i_sb != sb); + + if (i_pos != info->i_pos) + continue; + inode = igrab(&info->vfs_inode); + if (inode) + break; + } + spin_unlock(&sbi->inode_hash_lock); + return inode; +} + +/* doesn't deal with root inode */ +static int exfat_fill_inode(struct inode *inode, struct exfat_dir_entry *info) +{ + struct exfat_sb_info *sbi = EXFAT_SB(inode->i_sb); + struct exfat_inode_info *ei = EXFAT_I(inode); + loff_t size = info->size; + + memcpy(&ei->dir, &info->dir, sizeof(struct exfat_chain)); + ei->entry = info->entry; + ei->attr = info->attr; + ei->start_clu = info->start_clu; + ei->flags = info->flags; + ei->type = info->type; + + ei->version = 0; + ei->hint_stat.eidx = 0; + ei->hint_stat.clu = info->start_clu; + ei->hint_femp.eidx = EXFAT_HINT_NONE; + ei->rwoffset = 0; + ei->hint_bmap.off = EXFAT_EOF_CLUSTER; + ei->i_pos = 0; + + inode->i_uid = sbi->options.fs_uid; + inode->i_gid = sbi->options.fs_gid; + inode_inc_iversion(inode); + inode->i_generation = prandom_u32(); + + if (info->attr & ATTR_SUBDIR) { /* directory */ + inode->i_generation &= ~1; + inode->i_mode = exfat_make_mode(sbi, info->attr, 0777); + inode->i_op = &exfat_dir_inode_operations; + inode->i_fop = &exfat_dir_operations; + set_nlink(inode, info->num_subdirs); + } else { /* regular file */ + inode->i_generation |= 1; + inode->i_mode = exfat_make_mode(sbi, info->attr, 0777); + inode->i_op = &exfat_file_inode_operations; + inode->i_fop = &exfat_file_operations; + inode->i_mapping->a_ops = &exfat_aops; + inode->i_mapping->nrpages = 0; + } + + i_size_write(inode, size); + + /* ondisk and aligned size should be aligned with block size */ + if (size & (inode->i_sb->s_blocksize - 1)) { + size |= (inode->i_sb->s_blocksize - 1); + size++; + } + + ei->i_size_aligned = size; + ei->i_size_ondisk = size; + + exfat_save_attr(inode, info->attr); + + inode->i_blocks = ((i_size_read(inode) + (sbi->cluster_size - 1)) & + ~(sbi->cluster_size - 1)) >> inode->i_blkbits; + inode->i_mtime = info->mtime; + inode->i_ctime = info->mtime; + ei->i_crtime = info->crtime; + inode->i_atime = info->atime; + + exfat_cache_init_inode(inode); + + return 0; +} + +struct inode *exfat_build_inode(struct super_block *sb, + struct exfat_dir_entry *info, loff_t i_pos) +{ + struct inode *inode; + int err; + + inode = exfat_iget(sb, i_pos); + if (inode) + goto out; + inode = new_inode(sb); + if (!inode) { + inode = ERR_PTR(-ENOMEM); + goto out; + } + inode->i_ino = iunique(sb, EXFAT_ROOT_INO); + inode_set_iversion(inode, 1); + err = exfat_fill_inode(inode, info); + if (err) { + iput(inode); + inode = ERR_PTR(err); + goto out; + } + exfat_hash_inode(inode, i_pos); + insert_inode_hash(inode); +out: + return inode; +} + +void exfat_evict_inode(struct inode *inode) +{ + truncate_inode_pages(&inode->i_data, 0); + + if (!inode->i_nlink) { + i_size_write(inode, 0); + mutex_lock(&EXFAT_SB(inode->i_sb)->s_lock); + __exfat_truncate(inode, 0); + mutex_unlock(&EXFAT_SB(inode->i_sb)->s_lock); + } + + invalidate_inode_buffers(inode); + clear_inode(inode); + exfat_cache_inval_inode(inode); + exfat_unhash_inode(inode); +} diff --git a/fs/exfat/misc.c b/fs/exfat/misc.c new file mode 100644 index 000000000000..14a3300848f6 --- /dev/null +++ b/fs/exfat/misc.c @@ -0,0 +1,163 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Written 1992,1993 by Werner Almesberger + * 22/11/2000 - Fixed fat_date_unix2dos for dates earlier than 01/01/1980 + * and date_dos2unix for date==0 by Igor Zhbanov(bsg@uniyar.ac.ru) + * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. + */ + +#include <linux/time.h> +#include <linux/fs.h> +#include <linux/slab.h> +#include <linux/buffer_head.h> + +#include "exfat_raw.h" +#include "exfat_fs.h" + +/* + * exfat_fs_error reports a file system problem that might indicate fa data + * corruption/inconsistency. Depending on 'errors' mount option the + * panic() is called, or error message is printed FAT and nothing is done, + * or filesystem is remounted read-only (default behavior). + * In case the file system is remounted read-only, it can be made writable + * again by remounting it. + */ +void __exfat_fs_error(struct super_block *sb, int report, const char *fmt, ...) +{ + struct exfat_mount_options *opts = &EXFAT_SB(sb)->options; + va_list args; + struct va_format vaf; + + if (report) { + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + exfat_msg(sb, KERN_ERR, "error, %pV\n", &vaf); + va_end(args); + } + + if (opts->errors == EXFAT_ERRORS_PANIC) { + panic("exFAT-fs (%s): fs panic from previous error\n", + sb->s_id); + } else if (opts->errors == EXFAT_ERRORS_RO && !sb_rdonly(sb)) { + sb->s_flags |= SB_RDONLY; + exfat_msg(sb, KERN_ERR, "Filesystem has been set read-only"); + } +} + +/* + * exfat_msg() - print preformated EXFAT specific messages. + * All logs except what uses exfat_fs_error() should be written by exfat_msg() + */ +void exfat_msg(struct super_block *sb, const char *level, const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + /* level means KERN_ pacility level */ + printk("%sexFAT-fs (%s): %pV\n", level, sb->s_id, &vaf); + va_end(args); +} + +#define SECS_PER_MIN (60) +#define TIMEZONE_SEC(x) ((x) * 15 * SECS_PER_MIN) + +static void exfat_adjust_tz(struct timespec64 *ts, u8 tz_off) +{ + if (tz_off <= 0x3F) + ts->tv_sec -= TIMEZONE_SEC(tz_off); + else /* 0x40 <= (tz_off & 0x7F) <=0x7F */ + ts->tv_sec += TIMEZONE_SEC(0x80 - tz_off); +} + +/* Convert a EXFAT time/date pair to a UNIX date (seconds since 1 1 70). */ +void exfat_get_entry_time(struct exfat_sb_info *sbi, struct timespec64 *ts, + u8 tz, __le16 time, __le16 date, u8 time_ms) +{ + u16 t = le16_to_cpu(time); + u16 d = le16_to_cpu(date); + + ts->tv_sec = mktime64(1980 + (d >> 9), d >> 5 & 0x000F, d & 0x001F, + t >> 11, (t >> 5) & 0x003F, (t & 0x001F) << 1); + + + /* time_ms field represent 0 ~ 199(1990 ms) */ + if (time_ms) { + ts->tv_sec += time_ms / 100; + ts->tv_nsec = (time_ms % 100) * 10 * NSEC_PER_MSEC; + } + + if (tz & EXFAT_TZ_VALID) + /* Adjust timezone to UTC0. */ + exfat_adjust_tz(ts, tz & ~EXFAT_TZ_VALID); + else + /* Convert from local time to UTC using time_offset. */ + ts->tv_sec -= sbi->options.time_offset * SECS_PER_MIN; +} + +/* Convert linear UNIX date to a EXFAT time/date pair. */ +void exfat_set_entry_time(struct exfat_sb_info *sbi, struct timespec64 *ts, + u8 *tz, __le16 *time, __le16 *date, u8 *time_ms) +{ + struct tm tm; + u16 t, d; + + time64_to_tm(ts->tv_sec, 0, &tm); + t = (tm.tm_hour << 11) | (tm.tm_min << 5) | (tm.tm_sec >> 1); + d = ((tm.tm_year - 80) << 9) | ((tm.tm_mon + 1) << 5) | tm.tm_mday; + + *time = cpu_to_le16(t); + *date = cpu_to_le16(d); + + /* time_ms field represent 0 ~ 199(1990 ms) */ + if (time_ms) + *time_ms = (tm.tm_sec & 1) * 100 + + ts->tv_nsec / (10 * NSEC_PER_MSEC); + + /* + * Record 00h value for OffsetFromUtc field and 1 value for OffsetValid + * to indicate that local time and UTC are the same. + */ + *tz = EXFAT_TZ_VALID; +} + +unsigned short exfat_calc_chksum_2byte(void *data, int len, + unsigned short chksum, int type) +{ + int i; + unsigned char *c = (unsigned char *)data; + + for (i = 0; i < len; i++, c++) { + if (((i == 2) || (i == 3)) && (type == CS_DIR_ENTRY)) + continue; + chksum = (((chksum & 1) << 15) | ((chksum & 0xFFFE) >> 1)) + + (unsigned short)*c; + } + return chksum; +} + +void exfat_update_bh(struct super_block *sb, struct buffer_head *bh, int sync) +{ + set_bit(EXFAT_SB_DIRTY, &EXFAT_SB(sb)->s_state); + set_buffer_uptodate(bh); + mark_buffer_dirty(bh); + + if (sync) + sync_dirty_buffer(bh); +} + +void exfat_chain_set(struct exfat_chain *ec, unsigned int dir, + unsigned int size, unsigned char flags) +{ + ec->dir = dir; + ec->size = size; + ec->flags = flags; +} + +void exfat_chain_dup(struct exfat_chain *dup, struct exfat_chain *ec) +{ + return exfat_chain_set(dup, ec->dir, ec->size, ec->flags); +} diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c new file mode 100644 index 000000000000..a8681d91f569 --- /dev/null +++ b/fs/exfat/namei.c @@ -0,0 +1,1448 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. + */ + +#include <linux/iversion.h> +#include <linux/namei.h> +#include <linux/slab.h> +#include <linux/buffer_head.h> +#include <linux/nls.h> + +#include "exfat_raw.h" +#include "exfat_fs.h" + +static inline unsigned long exfat_d_version(struct dentry *dentry) +{ + return (unsigned long) dentry->d_fsdata; +} + +static inline void exfat_d_version_set(struct dentry *dentry, + unsigned long version) +{ + dentry->d_fsdata = (void *) version; +} + +/* + * If new entry was created in the parent, it could create the 8.3 alias (the + * shortname of logname). So, the parent may have the negative-dentry which + * matches the created 8.3 alias. + * + * If it happened, the negative dentry isn't actually negative anymore. So, + * drop it. + */ +static int exfat_d_revalidate(struct dentry *dentry, unsigned int flags) +{ + int ret; + + if (flags & LOOKUP_RCU) + return -ECHILD; + + /* + * This is not negative dentry. Always valid. + * + * Note, rename() to existing directory entry will have ->d_inode, and + * will use existing name which isn't specified name by user. + * + * We may be able to drop this positive dentry here. But dropping + * positive dentry isn't good idea. So it's unsupported like + * rename("filename", "FILENAME") for now. + */ + if (d_really_is_positive(dentry)) + return 1; + + /* + * Drop the negative dentry, in order to make sure to use the case + * sensitive name which is specified by user if this is for creation. + */ + if (flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET)) + return 0; + + spin_lock(&dentry->d_lock); + ret = inode_eq_iversion(d_inode(dentry->d_parent), + exfat_d_version(dentry)); + spin_unlock(&dentry->d_lock); + return ret; +} + +/* returns the length of a struct qstr, ignoring trailing dots */ +static unsigned int exfat_striptail_len(unsigned int len, const char *name) +{ + while (len && name[len - 1] == '.') + len--; + return len; +} + +/* + * Compute the hash for the exfat name corresponding to the dentry. If the name + * is invalid, we leave the hash code unchanged so that the existing dentry can + * be used. The exfat fs routines will return ENOENT or EINVAL as appropriate. + */ +static int exfat_d_hash(const struct dentry *dentry, struct qstr *qstr) +{ + struct super_block *sb = dentry->d_sb; + struct nls_table *t = EXFAT_SB(sb)->nls_io; + const unsigned char *name = qstr->name; + unsigned int len = exfat_striptail_len(qstr->len, qstr->name); + unsigned long hash = init_name_hash(dentry); + int i, charlen; + wchar_t c; + + for (i = 0; i < len; i += charlen) { + charlen = t->char2uni(&name[i], len - i, &c); + if (charlen < 0) + return charlen; + hash = partial_name_hash(exfat_toupper(sb, c), hash); + } + + qstr->hash = end_name_hash(hash); + return 0; +} + +static int exfat_d_cmp(const struct dentry *dentry, unsigned int len, + const char *str, const struct qstr *name) +{ + struct super_block *sb = dentry->d_sb; + struct nls_table *t = EXFAT_SB(sb)->nls_io; + unsigned int alen = exfat_striptail_len(name->len, name->name); + unsigned int blen = exfat_striptail_len(len, str); + wchar_t c1, c2; + int charlen, i; + + if (alen != blen) + return 1; + + for (i = 0; i < len; i += charlen) { + charlen = t->char2uni(&name->name[i], alen - i, &c1); + if (charlen < 0) + return 1; + if (charlen != t->char2uni(&str[i], blen - i, &c2)) + return 1; + + if (exfat_toupper(sb, c1) != exfat_toupper(sb, c2)) + return 1; + } + + return 0; +} + +const struct dentry_operations exfat_dentry_ops = { + .d_revalidate = exfat_d_revalidate, + .d_hash = exfat_d_hash, + .d_compare = exfat_d_cmp, +}; + +static int exfat_utf8_d_hash(const struct dentry *dentry, struct qstr *qstr) +{ + struct super_block *sb = dentry->d_sb; + const unsigned char *name = qstr->name; + unsigned int len = exfat_striptail_len(qstr->len, qstr->name); + unsigned long hash = init_name_hash(dentry); + int i, charlen; + unicode_t u; + + for (i = 0; i < len; i += charlen) { + charlen = utf8_to_utf32(&name[i], len - i, &u); + if (charlen < 0) + return charlen; + + /* + * Convert to UTF-16: code points above U+FFFF are encoded as + * surrogate pairs. + * exfat_toupper() works only for code points up to the U+FFFF. + */ + if (u > 0xFFFF) { + hash = partial_name_hash(exfat_high_surrogate(u), hash); + hash = partial_name_hash(exfat_low_surrogate(u), hash); + } else { + hash = partial_name_hash(exfat_toupper(sb, u), hash); + } + } + + qstr->hash = end_name_hash(hash); + return 0; +} + +static int exfat_utf8_d_cmp(const struct dentry *dentry, unsigned int len, + const char *str, const struct qstr *name) +{ + struct super_block *sb = dentry->d_sb; + unsigned int alen = exfat_striptail_len(name->len, name->name); + unsigned int blen = exfat_striptail_len(len, str); + unicode_t u_a, u_b; + int charlen, i; + + if (alen != blen) + return 1; + + for (i = 0; i < alen; i += charlen) { + charlen = utf8_to_utf32(&name->name[i], alen - i, &u_a); + if (charlen < 0) + return 1; + if (charlen != utf8_to_utf32(&str[i], blen - i, &u_b)) + return 1; + + if (u_a <= 0xFFFF && u_b <= 0xFFFF) { + if (exfat_toupper(sb, u_a) != exfat_toupper(sb, u_b)) + return 1; + } else if (u_a > 0xFFFF && u_b > 0xFFFF) { + if (exfat_low_surrogate(u_a) != + exfat_low_surrogate(u_b) || + exfat_high_surrogate(u_a) != + exfat_high_surrogate(u_b)) + return 1; + } else { + return 1; + } + } + + return 0; +} + +const struct dentry_operations exfat_utf8_dentry_ops = { + .d_revalidate = exfat_d_revalidate, + .d_hash = exfat_utf8_d_hash, + .d_compare = exfat_utf8_d_cmp, +}; + +/* used only in search empty_slot() */ +#define CNT_UNUSED_NOHIT (-1) +#define CNT_UNUSED_HIT (-2) +/* search EMPTY CONTINUOUS "num_entries" entries */ +static int exfat_search_empty_slot(struct super_block *sb, + struct exfat_hint_femp *hint_femp, struct exfat_chain *p_dir, + int num_entries) +{ + int i, dentry, num_empty = 0; + int dentries_per_clu; + unsigned int type; + struct exfat_chain clu; + struct exfat_dentry *ep; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + struct buffer_head *bh; + + dentries_per_clu = sbi->dentries_per_clu; + + if (hint_femp->eidx != EXFAT_HINT_NONE) { + dentry = hint_femp->eidx; + if (num_entries <= hint_femp->count) { + hint_femp->eidx = EXFAT_HINT_NONE; + return dentry; + } + + exfat_chain_dup(&clu, &hint_femp->cur); + } else { + exfat_chain_dup(&clu, p_dir); + dentry = 0; + } + + while (clu.dir != EXFAT_EOF_CLUSTER) { + i = dentry & (dentries_per_clu - 1); + + for (; i < dentries_per_clu; i++, dentry++) { + ep = exfat_get_dentry(sb, &clu, i, &bh, NULL); + if (!ep) + return -EIO; + type = exfat_get_entry_type(ep); + brelse(bh); + + if (type == TYPE_UNUSED || type == TYPE_DELETED) { + num_empty++; + if (hint_femp->eidx == EXFAT_HINT_NONE) { + hint_femp->eidx = dentry; + hint_femp->count = CNT_UNUSED_NOHIT; + exfat_chain_set(&hint_femp->cur, + clu.dir, clu.size, clu.flags); + } + + if (type == TYPE_UNUSED && + hint_femp->count != CNT_UNUSED_HIT) + hint_femp->count = CNT_UNUSED_HIT; + } else { + if (hint_femp->eidx != EXFAT_HINT_NONE && + hint_femp->count == CNT_UNUSED_HIT) { + /* unused empty group means + * an empty group which includes + * unused dentry + */ + exfat_fs_error(sb, + "found bogus dentry(%d) beyond unused empty group(%d) (start_clu : %u, cur_clu : %u)", + dentry, hint_femp->eidx, + p_dir->dir, clu.dir); + return -EIO; + } + + num_empty = 0; + hint_femp->eidx = EXFAT_HINT_NONE; + } + + if (num_empty >= num_entries) { + /* found and invalidate hint_femp */ + hint_femp->eidx = EXFAT_HINT_NONE; + return (dentry - (num_entries - 1)); + } + } + + if (clu.flags == ALLOC_NO_FAT_CHAIN) { + if (--clu.size > 0) + clu.dir++; + else + clu.dir = EXFAT_EOF_CLUSTER; + } else { + if (exfat_get_next_cluster(sb, &clu.dir)) + return -EIO; + } + } + + return -ENOSPC; +} + +static int exfat_check_max_dentries(struct inode *inode) +{ + if (EXFAT_B_TO_DEN(i_size_read(inode)) >= MAX_EXFAT_DENTRIES) { + /* + * exFAT spec allows a dir to grow upto 8388608(256MB) + * dentries + */ + return -ENOSPC; + } + return 0; +} + +/* find empty directory entry. + * if there isn't any empty slot, expand cluster chain. + */ +static int exfat_find_empty_entry(struct inode *inode, + struct exfat_chain *p_dir, int num_entries) +{ + int dentry; + unsigned int ret, last_clu; + sector_t sector; + loff_t size = 0; + struct exfat_chain clu; + struct exfat_dentry *ep = NULL; + struct super_block *sb = inode->i_sb; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + struct exfat_inode_info *ei = EXFAT_I(inode); + struct exfat_hint_femp hint_femp; + + hint_femp.eidx = EXFAT_HINT_NONE; + + if (ei->hint_femp.eidx != EXFAT_HINT_NONE) { + memcpy(&hint_femp, &ei->hint_femp, + sizeof(struct exfat_hint_femp)); + ei->hint_femp.eidx = EXFAT_HINT_NONE; + } + + while ((dentry = exfat_search_empty_slot(sb, &hint_femp, p_dir, + num_entries)) < 0) { + if (dentry == -EIO) + break; + + if (exfat_check_max_dentries(inode)) + return -ENOSPC; + + /* we trust p_dir->size regardless of FAT type */ + if (exfat_find_last_cluster(sb, p_dir, &last_clu)) + return -EIO; + + /* + * Allocate new cluster to this directory + */ + exfat_chain_set(&clu, last_clu + 1, 0, p_dir->flags); + + /* allocate a cluster */ + ret = exfat_alloc_cluster(inode, 1, &clu); + if (ret) + return ret; + + if (exfat_zeroed_cluster(inode, clu.dir)) + return -EIO; + + /* append to the FAT chain */ + if (clu.flags != p_dir->flags) { + /* no-fat-chain bit is disabled, + * so fat-chain should be synced with alloc-bitmap + */ + exfat_chain_cont_cluster(sb, p_dir->dir, p_dir->size); + p_dir->flags = ALLOC_FAT_CHAIN; + hint_femp.cur.flags = ALLOC_FAT_CHAIN; + } + + if (clu.flags == ALLOC_FAT_CHAIN) + if (exfat_ent_set(sb, last_clu, clu.dir)) + return -EIO; + + if (hint_femp.eidx == EXFAT_HINT_NONE) { + /* the special case that new dentry + * should be allocated from the start of new cluster + */ + hint_femp.eidx = EXFAT_B_TO_DEN_IDX(p_dir->size, sbi); + hint_femp.count = sbi->dentries_per_clu; + + exfat_chain_set(&hint_femp.cur, clu.dir, 0, clu.flags); + } + hint_femp.cur.size++; + p_dir->size++; + size = EXFAT_CLU_TO_B(p_dir->size, sbi); + + /* update the directory entry */ + if (p_dir->dir != sbi->root_dir) { + struct buffer_head *bh; + + ep = exfat_get_dentry(sb, + &(ei->dir), ei->entry + 1, &bh, §or); + if (!ep) + return -EIO; + + ep->dentry.stream.valid_size = cpu_to_le64(size); + ep->dentry.stream.size = ep->dentry.stream.valid_size; + ep->dentry.stream.flags = p_dir->flags; + exfat_update_bh(sb, bh, IS_DIRSYNC(inode)); + brelse(bh); + if (exfat_update_dir_chksum(inode, &(ei->dir), + ei->entry)) + return -EIO; + } + + /* directory inode should be updated in here */ + i_size_write(inode, size); + EXFAT_I(inode)->i_size_ondisk += sbi->cluster_size; + EXFAT_I(inode)->i_size_aligned += sbi->cluster_size; + EXFAT_I(inode)->flags = p_dir->flags; + inode->i_blocks += 1 << sbi->sect_per_clus_bits; + } + + return dentry; +} + +/* + * Name Resolution Functions : + * Zero if it was successful; otherwise nonzero. + */ +static int __exfat_resolve_path(struct inode *inode, const unsigned char *path, + struct exfat_chain *p_dir, struct exfat_uni_name *p_uniname, + int lookup) +{ + int namelen; + int lossy = NLS_NAME_NO_LOSSY; + struct super_block *sb = inode->i_sb; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + struct exfat_inode_info *ei = EXFAT_I(inode); + + /* strip all trailing periods */ + namelen = exfat_striptail_len(strlen(path), path); + if (!namelen) + return -ENOENT; + + if (strlen(path) > (MAX_NAME_LENGTH * MAX_CHARSET_SIZE)) + return -ENAMETOOLONG; + + /* + * strip all leading spaces : + * "MS windows 7" supports leading spaces. + * So we should skip this preprocessing for compatibility. + */ + + /* file name conversion : + * If lookup case, we allow bad-name for compatibility. + */ + namelen = exfat_nls_to_utf16(sb, path, namelen, p_uniname, + &lossy); + if (namelen < 0) + return namelen; /* return error value */ + + if ((lossy && !lookup) || !namelen) + return -EINVAL; + + exfat_chain_set(p_dir, ei->start_clu, + EXFAT_B_TO_CLU(i_size_read(inode), sbi), ei->flags); + + return 0; +} + +static inline int exfat_resolve_path(struct inode *inode, + const unsigned char *path, struct exfat_chain *dir, + struct exfat_uni_name *uni) +{ + return __exfat_resolve_path(inode, path, dir, uni, 0); +} + +static inline int exfat_resolve_path_for_lookup(struct inode *inode, + const unsigned char *path, struct exfat_chain *dir, + struct exfat_uni_name *uni) +{ + return __exfat_resolve_path(inode, path, dir, uni, 1); +} + +static inline loff_t exfat_make_i_pos(struct exfat_dir_entry *info) +{ + return ((loff_t) info->dir.dir << 32) | (info->entry & 0xffffffff); +} + +static int exfat_add_entry(struct inode *inode, const char *path, + struct exfat_chain *p_dir, unsigned int type, + struct exfat_dir_entry *info) +{ + int ret, dentry, num_entries; + struct super_block *sb = inode->i_sb; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + struct exfat_uni_name uniname; + struct exfat_chain clu; + int clu_size = 0; + unsigned int start_clu = EXFAT_FREE_CLUSTER; + + ret = exfat_resolve_path(inode, path, p_dir, &uniname); + if (ret) + goto out; + + num_entries = exfat_calc_num_entries(&uniname); + if (num_entries < 0) { + ret = num_entries; + goto out; + } + + /* exfat_find_empty_entry must be called before alloc_cluster() */ + dentry = exfat_find_empty_entry(inode, p_dir, num_entries); + if (dentry < 0) { + ret = dentry; /* -EIO or -ENOSPC */ + goto out; + } + + if (type == TYPE_DIR) { + ret = exfat_alloc_new_dir(inode, &clu); + if (ret) + goto out; + start_clu = clu.dir; + clu_size = sbi->cluster_size; + } + + /* update the directory entry */ + /* fill the dos name directory entry information of the created file. + * the first cluster is not determined yet. (0) + */ + ret = exfat_init_dir_entry(inode, p_dir, dentry, type, + start_clu, clu_size); + if (ret) + goto out; + + ret = exfat_init_ext_entry(inode, p_dir, dentry, num_entries, &uniname); + if (ret) + goto out; + + memcpy(&info->dir, p_dir, sizeof(struct exfat_chain)); + info->entry = dentry; + info->flags = ALLOC_NO_FAT_CHAIN; + info->type = type; + + if (type == TYPE_FILE) { + info->attr = ATTR_ARCHIVE; + info->start_clu = EXFAT_EOF_CLUSTER; + info->size = 0; + info->num_subdirs = 0; + } else { + int count; + struct exfat_chain cdir; + + info->attr = ATTR_SUBDIR; + info->start_clu = start_clu; + info->size = clu_size; + + exfat_chain_set(&cdir, info->start_clu, + EXFAT_B_TO_CLU(info->size, sbi), info->flags); + count = exfat_count_dir_entries(sb, &cdir); + if (count < 0) + return -EIO; + info->num_subdirs = count + EXFAT_MIN_SUBDIR; + } + memset(&info->crtime, 0, sizeof(info->crtime)); + memset(&info->mtime, 0, sizeof(info->mtime)); + memset(&info->atime, 0, sizeof(info->atime)); +out: + return ret; +} + +static int exfat_create(struct inode *dir, struct dentry *dentry, umode_t mode, + bool excl) +{ + struct super_block *sb = dir->i_sb; + struct inode *inode; + struct exfat_chain cdir; + struct exfat_dir_entry info; + loff_t i_pos; + int err; + + mutex_lock(&EXFAT_SB(sb)->s_lock); + exfat_set_vol_flags(sb, VOL_DIRTY); + err = exfat_add_entry(dir, dentry->d_name.name, &cdir, TYPE_FILE, + &info); + exfat_set_vol_flags(sb, VOL_CLEAN); + if (err) + goto unlock; + + inode_inc_iversion(dir); + dir->i_ctime = dir->i_mtime = current_time(dir); + if (IS_DIRSYNC(dir)) + exfat_sync_inode(dir); + else + mark_inode_dirty(dir); + + i_pos = exfat_make_i_pos(&info); + inode = exfat_build_inode(sb, &info, i_pos); + if (IS_ERR(inode)) + goto unlock; + + inode_inc_iversion(inode); + inode->i_mtime = inode->i_atime = inode->i_ctime = + EXFAT_I(inode)->i_crtime = current_time(inode); + /* timestamp is already written, so mark_inode_dirty() is unneeded. */ + + d_instantiate(dentry, inode); +unlock: + mutex_unlock(&EXFAT_SB(sb)->s_lock); + return err; +} + +/* lookup a file */ +static int exfat_find(struct inode *dir, struct qstr *qname, + struct exfat_dir_entry *info) +{ + int ret, dentry, num_entries, count; + struct exfat_chain cdir; + struct exfat_uni_name uni_name; + struct exfat_dentry *ep, *ep2; + struct exfat_entry_set_cache *es = NULL; + struct super_block *sb = dir->i_sb; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + struct exfat_inode_info *ei = EXFAT_I(dir); + + if (qname->len == 0) + return -ENOENT; + + /* check the validity of directory name in the given pathname */ + ret = exfat_resolve_path_for_lookup(dir, qname->name, &cdir, &uni_name); + if (ret) + return ret; + + num_entries = exfat_calc_num_entries(&uni_name); + if (num_entries < 0) + return num_entries; + + /* check the validation of hint_stat and initialize it if required */ + if (ei->version != (inode_peek_iversion_raw(dir) & 0xffffffff)) { + ei->hint_stat.clu = cdir.dir; + ei->hint_stat.eidx = 0; + ei->version = (inode_peek_iversion_raw(dir) & 0xffffffff); + ei->hint_femp.eidx = EXFAT_HINT_NONE; + } + + /* search the file name for directories */ + dentry = exfat_find_dir_entry(sb, ei, &cdir, &uni_name, + num_entries, TYPE_ALL); + + if ((dentry < 0) && (dentry != -EEXIST)) + return dentry; /* -error value */ + + memcpy(&info->dir, &cdir.dir, sizeof(struct exfat_chain)); + info->entry = dentry; + info->num_subdirs = 0; + + /* root directory itself */ + if (unlikely(dentry == -EEXIST)) { + int num_clu = 0; + + info->type = TYPE_DIR; + info->attr = ATTR_SUBDIR; + info->flags = ALLOC_FAT_CHAIN; + info->start_clu = sbi->root_dir; + memset(&info->crtime, 0, sizeof(info->crtime)); + memset(&info->mtime, 0, sizeof(info->mtime)); + memset(&info->atime, 0, sizeof(info->atime)); + + exfat_chain_set(&cdir, sbi->root_dir, 0, ALLOC_FAT_CHAIN); + if (exfat_count_num_clusters(sb, &cdir, &num_clu)) + return -EIO; + info->size = num_clu << sbi->cluster_size_bits; + + count = exfat_count_dir_entries(sb, &cdir); + if (count < 0) + return -EIO; + + info->num_subdirs = count; + } else { + es = exfat_get_dentry_set(sb, &cdir, dentry, ES_2_ENTRIES, &ep); + if (!es) + return -EIO; + ep2 = ep + 1; + + info->type = exfat_get_entry_type(ep); + info->attr = le16_to_cpu(ep->dentry.file.attr); + info->size = le64_to_cpu(ep2->dentry.stream.valid_size); + if ((info->type == TYPE_FILE) && (info->size == 0)) { + info->flags = ALLOC_NO_FAT_CHAIN; + info->start_clu = EXFAT_EOF_CLUSTER; + } else { + info->flags = ep2->dentry.stream.flags; + info->start_clu = + le32_to_cpu(ep2->dentry.stream.start_clu); + } + + if (ei->start_clu == EXFAT_FREE_CLUSTER) { + exfat_fs_error(sb, + "non-zero size file starts with zero cluster (size : %llu, p_dir : %u, entry : 0x%08x)", + i_size_read(dir), ei->dir.dir, ei->entry); + return -EIO; + } + + exfat_get_entry_time(sbi, &info->crtime, + ep->dentry.file.create_tz, + ep->dentry.file.create_time, + ep->dentry.file.create_date, + ep->dentry.file.create_time_ms); + exfat_get_entry_time(sbi, &info->mtime, + ep->dentry.file.modify_tz, + ep->dentry.file.modify_time, + ep->dentry.file.modify_date, + ep->dentry.file.modify_time_ms); + exfat_get_entry_time(sbi, &info->atime, + ep->dentry.file.access_tz, + ep->dentry.file.access_time, + ep->dentry.file.access_date, + 0); + kfree(es); + + if (info->type == TYPE_DIR) { + exfat_chain_set(&cdir, info->start_clu, + EXFAT_B_TO_CLU(info->size, sbi), info->flags); + count = exfat_count_dir_entries(sb, &cdir); + if (count < 0) + return -EIO; + + info->num_subdirs = count + EXFAT_MIN_SUBDIR; + } + } + return 0; +} + +static int exfat_d_anon_disconn(struct dentry *dentry) +{ + return IS_ROOT(dentry) && (dentry->d_flags & DCACHE_DISCONNECTED); +} + +static struct dentry *exfat_lookup(struct inode *dir, struct dentry *dentry, + unsigned int flags) +{ + struct super_block *sb = dir->i_sb; + struct inode *inode; + struct dentry *alias; + struct exfat_dir_entry info; + int err; + loff_t i_pos; + mode_t i_mode; + + mutex_lock(&EXFAT_SB(sb)->s_lock); + err = exfat_find(dir, &dentry->d_name, &info); + if (err) { + if (err == -ENOENT) { + inode = NULL; + goto out; + } + goto unlock; + } + + i_pos = exfat_make_i_pos(&info); + inode = exfat_build_inode(sb, &info, i_pos); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto unlock; + } + + i_mode = inode->i_mode; + alias = d_find_alias(inode); + + /* + * Checking "alias->d_parent == dentry->d_parent" to make sure + * FS is not corrupted (especially double linked dir). + */ + if (alias && alias->d_parent == dentry->d_parent && + !exfat_d_anon_disconn(alias)) { + + /* + * Unhashed alias is able to exist because of revalidate() + * called by lookup_fast. You can easily make this status + * by calling create and lookup concurrently + * In such case, we reuse an alias instead of new dentry + */ + if (d_unhashed(alias)) { + WARN_ON(alias->d_name.hash_len != + dentry->d_name.hash_len); + exfat_msg(sb, KERN_INFO, + "rehashed a dentry(%p) in read lookup", alias); + d_drop(dentry); + d_rehash(alias); + } else if (!S_ISDIR(i_mode)) { + /* + * This inode has non anonymous-DCACHE_DISCONNECTED + * dentry. This means, the user did ->lookup() by an + * another name (longname vs 8.3 alias of it) in past. + * + * Switch to new one for reason of locality if possible. + */ + d_move(alias, dentry); + } + iput(inode); + mutex_unlock(&EXFAT_SB(sb)->s_lock); + return alias; + } + dput(alias); +out: + mutex_unlock(&EXFAT_SB(sb)->s_lock); + if (!inode) + exfat_d_version_set(dentry, inode_query_iversion(dir)); + + return d_splice_alias(inode, dentry); +unlock: + mutex_unlock(&EXFAT_SB(sb)->s_lock); + return ERR_PTR(err); +} + +/* remove an entry, BUT don't truncate */ +static int exfat_unlink(struct inode *dir, struct dentry *dentry) +{ + struct exfat_chain cdir; + struct exfat_dentry *ep; + struct super_block *sb = dir->i_sb; + struct inode *inode = dentry->d_inode; + struct exfat_inode_info *ei = EXFAT_I(inode); + struct buffer_head *bh; + sector_t sector; + int num_entries, entry, err = 0; + + mutex_lock(&EXFAT_SB(sb)->s_lock); + exfat_chain_dup(&cdir, &ei->dir); + entry = ei->entry; + if (ei->dir.dir == DIR_DELETED) { + exfat_msg(sb, KERN_ERR, "abnormal access to deleted dentry"); + err = -ENOENT; + goto unlock; + } + + ep = exfat_get_dentry(sb, &cdir, entry, &bh, §or); + if (!ep) { + err = -EIO; + goto unlock; + } + num_entries = exfat_count_ext_entries(sb, &cdir, entry, ep); + if (num_entries < 0) { + err = -EIO; + brelse(bh); + goto unlock; + } + num_entries++; + brelse(bh); + + exfat_set_vol_flags(sb, VOL_DIRTY); + /* update the directory entry */ + if (exfat_remove_entries(dir, &cdir, entry, 0, num_entries)) { + err = -EIO; + goto unlock; + } + + /* This doesn't modify ei */ + ei->dir.dir = DIR_DELETED; + exfat_set_vol_flags(sb, VOL_CLEAN); + + inode_inc_iversion(dir); + dir->i_mtime = dir->i_atime = current_time(dir); + if (IS_DIRSYNC(dir)) + exfat_sync_inode(dir); + else + mark_inode_dirty(dir); + + clear_nlink(inode); + inode->i_mtime = inode->i_atime = current_time(inode); + exfat_unhash_inode(inode); + exfat_d_version_set(dentry, inode_query_iversion(dir)); +unlock: + mutex_unlock(&EXFAT_SB(sb)->s_lock); + return err; +} + +static int exfat_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) +{ + struct super_block *sb = dir->i_sb; + struct inode *inode; + struct exfat_dir_entry info; + struct exfat_chain cdir; + loff_t i_pos; + int err; + + mutex_lock(&EXFAT_SB(sb)->s_lock); + exfat_set_vol_flags(sb, VOL_DIRTY); + err = exfat_add_entry(dir, dentry->d_name.name, &cdir, TYPE_DIR, + &info); + exfat_set_vol_flags(sb, VOL_CLEAN); + if (err) + goto unlock; + + inode_inc_iversion(dir); + dir->i_ctime = dir->i_mtime = current_time(dir); + if (IS_DIRSYNC(dir)) + exfat_sync_inode(dir); + else + mark_inode_dirty(dir); + inc_nlink(dir); + + i_pos = exfat_make_i_pos(&info); + inode = exfat_build_inode(sb, &info, i_pos); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto unlock; + } + + inode_inc_iversion(inode); + inode->i_mtime = inode->i_atime = inode->i_ctime = + EXFAT_I(inode)->i_crtime = current_time(inode); + /* timestamp is already written, so mark_inode_dirty() is unneeded. */ + + d_instantiate(dentry, inode); + +unlock: + mutex_unlock(&EXFAT_SB(sb)->s_lock); + return err; +} + +static int exfat_check_dir_empty(struct super_block *sb, + struct exfat_chain *p_dir) +{ + int i, dentries_per_clu; + unsigned int type; + struct exfat_chain clu; + struct exfat_dentry *ep; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + struct buffer_head *bh; + + dentries_per_clu = sbi->dentries_per_clu; + + exfat_chain_dup(&clu, p_dir); + + while (clu.dir != EXFAT_EOF_CLUSTER) { + for (i = 0; i < dentries_per_clu; i++) { + ep = exfat_get_dentry(sb, &clu, i, &bh, NULL); + if (!ep) + return -EIO; + type = exfat_get_entry_type(ep); + brelse(bh); + if (type == TYPE_UNUSED) + return 0; + + if (type != TYPE_FILE && type != TYPE_DIR) + continue; + + return -ENOTEMPTY; + } + + if (clu.flags == ALLOC_NO_FAT_CHAIN) { + if (--clu.size > 0) + clu.dir++; + else + clu.dir = EXFAT_EOF_CLUSTER; + } else { + if (exfat_get_next_cluster(sb, &(clu.dir))) + return -EIO; + } + } + + return 0; +} + +static int exfat_rmdir(struct inode *dir, struct dentry *dentry) +{ + struct inode *inode = dentry->d_inode; + struct exfat_dentry *ep; + struct exfat_chain cdir, clu_to_free; + struct super_block *sb = inode->i_sb; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + struct exfat_inode_info *ei = EXFAT_I(inode); + struct buffer_head *bh; + sector_t sector; + int num_entries, entry, err; + + mutex_lock(&EXFAT_SB(inode->i_sb)->s_lock); + + exfat_chain_dup(&cdir, &ei->dir); + entry = ei->entry; + + if (ei->dir.dir == DIR_DELETED) { + exfat_msg(sb, KERN_ERR, "abnormal access to deleted dentry"); + err = -ENOENT; + goto unlock; + } + + exfat_set_vol_flags(sb, VOL_DIRTY); + exfat_chain_set(&clu_to_free, ei->start_clu, + EXFAT_B_TO_CLU_ROUND_UP(i_size_read(inode), sbi), ei->flags); + + err = exfat_check_dir_empty(sb, &clu_to_free); + if (err) { + if (err == -EIO) + exfat_msg(sb, KERN_ERR, + "failed to exfat_check_dir_empty : err(%d)", + err); + goto unlock; + } + + ep = exfat_get_dentry(sb, &cdir, entry, &bh, §or); + if (!ep) { + err = -EIO; + goto unlock; + } + + num_entries = exfat_count_ext_entries(sb, &cdir, entry, ep); + if (num_entries < 0) { + err = -EIO; + brelse(bh); + goto unlock; + } + num_entries++; + brelse(bh); + + err = exfat_remove_entries(dir, &cdir, entry, 0, num_entries); + if (err) { + exfat_msg(sb, KERN_ERR, + "failed to exfat_remove_entries : err(%d)", + err); + goto unlock; + } + ei->dir.dir = DIR_DELETED; + exfat_set_vol_flags(sb, VOL_CLEAN); + + inode_inc_iversion(dir); + dir->i_mtime = dir->i_atime = current_time(dir); + if (IS_DIRSYNC(dir)) + exfat_sync_inode(dir); + else + mark_inode_dirty(dir); + drop_nlink(dir); + + clear_nlink(inode); + inode->i_mtime = inode->i_atime = current_time(inode); + exfat_unhash_inode(inode); + exfat_d_version_set(dentry, inode_query_iversion(dir)); +unlock: + mutex_unlock(&EXFAT_SB(inode->i_sb)->s_lock); + return err; +} + +static int exfat_rename_file(struct inode *inode, struct exfat_chain *p_dir, + int oldentry, struct exfat_uni_name *p_uniname, + struct exfat_inode_info *ei) +{ + int ret, num_old_entries, num_new_entries; + sector_t sector_old, sector_new; + struct exfat_dentry *epold, *epnew; + struct super_block *sb = inode->i_sb; + struct buffer_head *new_bh, *old_bh; + int sync = IS_DIRSYNC(inode); + + epold = exfat_get_dentry(sb, p_dir, oldentry, &old_bh, §or_old); + if (!epold) + return -EIO; + + num_old_entries = exfat_count_ext_entries(sb, p_dir, oldentry, epold); + if (num_old_entries < 0) + return -EIO; + num_old_entries++; + + num_new_entries = exfat_calc_num_entries(p_uniname); + if (num_new_entries < 0) + return num_new_entries; + + if (num_old_entries < num_new_entries) { + int newentry; + + newentry = + exfat_find_empty_entry(inode, p_dir, num_new_entries); + if (newentry < 0) + return newentry; /* -EIO or -ENOSPC */ + + epnew = exfat_get_dentry(sb, p_dir, newentry, &new_bh, + §or_new); + if (!epnew) + return -EIO; + + memcpy(epnew, epold, DENTRY_SIZE); + if (exfat_get_entry_type(epnew) == TYPE_FILE) { + epnew->dentry.file.attr |= cpu_to_le16(ATTR_ARCHIVE); + ei->attr |= ATTR_ARCHIVE; + } + exfat_update_bh(sb, new_bh, sync); + brelse(old_bh); + brelse(new_bh); + + epold = exfat_get_dentry(sb, p_dir, oldentry + 1, &old_bh, + §or_old); + epnew = exfat_get_dentry(sb, p_dir, newentry + 1, &new_bh, + §or_new); + if (!epold || !epnew) + return -EIO; + + memcpy(epnew, epold, DENTRY_SIZE); + exfat_update_bh(sb, new_bh, sync); + brelse(old_bh); + brelse(new_bh); + + ret = exfat_init_ext_entry(inode, p_dir, newentry, + num_new_entries, p_uniname); + if (ret) + return ret; + + exfat_remove_entries(inode, p_dir, oldentry, 0, + num_old_entries); + ei->entry = newentry; + } else { + if (exfat_get_entry_type(epold) == TYPE_FILE) { + epold->dentry.file.attr |= cpu_to_le16(ATTR_ARCHIVE); + ei->attr |= ATTR_ARCHIVE; + } + exfat_update_bh(sb, old_bh, sync); + brelse(old_bh); + ret = exfat_init_ext_entry(inode, p_dir, oldentry, + num_new_entries, p_uniname); + if (ret) + return ret; + + exfat_remove_entries(inode, p_dir, oldentry, num_new_entries, + num_old_entries); + } + return 0; +} + +static int exfat_move_file(struct inode *inode, struct exfat_chain *p_olddir, + int oldentry, struct exfat_chain *p_newdir, + struct exfat_uni_name *p_uniname, struct exfat_inode_info *ei) +{ + int ret, newentry, num_new_entries, num_old_entries; + sector_t sector_mov, sector_new; + struct exfat_dentry *epmov, *epnew; + struct super_block *sb = inode->i_sb; + struct buffer_head *mov_bh, *new_bh; + + epmov = exfat_get_dentry(sb, p_olddir, oldentry, &mov_bh, §or_mov); + if (!epmov) + return -EIO; + + /* check if the source and target directory is the same */ + if (exfat_get_entry_type(epmov) == TYPE_DIR && + le32_to_cpu(epmov->dentry.stream.start_clu) == p_newdir->dir) + return -EINVAL; + + num_old_entries = exfat_count_ext_entries(sb, p_olddir, oldentry, + epmov); + if (num_old_entries < 0) + return -EIO; + num_old_entries++; + + num_new_entries = exfat_calc_num_entries(p_uniname); + if (num_new_entries < 0) + return num_new_entries; + + newentry = exfat_find_empty_entry(inode, p_newdir, num_new_entries); + if (newentry < 0) + return newentry; /* -EIO or -ENOSPC */ + + epnew = exfat_get_dentry(sb, p_newdir, newentry, &new_bh, §or_new); + if (!epnew) + return -EIO; + + memcpy(epnew, epmov, DENTRY_SIZE); + if (exfat_get_entry_type(epnew) == TYPE_FILE) { + epnew->dentry.file.attr |= cpu_to_le16(ATTR_ARCHIVE); + ei->attr |= ATTR_ARCHIVE; + } + exfat_update_bh(sb, new_bh, IS_DIRSYNC(inode)); + brelse(mov_bh); + brelse(new_bh); + + epmov = exfat_get_dentry(sb, p_olddir, oldentry + 1, &mov_bh, + §or_mov); + epnew = exfat_get_dentry(sb, p_newdir, newentry + 1, &new_bh, + §or_new); + if (!epmov || !epnew) + return -EIO; + + memcpy(epnew, epmov, DENTRY_SIZE); + exfat_update_bh(sb, new_bh, IS_DIRSYNC(inode)); + brelse(mov_bh); + brelse(new_bh); + + ret = exfat_init_ext_entry(inode, p_newdir, newentry, num_new_entries, + p_uniname); + if (ret) + return ret; + + exfat_remove_entries(inode, p_olddir, oldentry, 0, num_old_entries); + + exfat_chain_set(&ei->dir, p_newdir->dir, p_newdir->size, + p_newdir->flags); + + ei->entry = newentry; + return 0; +} + +static void exfat_update_parent_info(struct exfat_inode_info *ei, + struct inode *parent_inode) +{ + struct exfat_sb_info *sbi = EXFAT_SB(parent_inode->i_sb); + struct exfat_inode_info *parent_ei = EXFAT_I(parent_inode); + loff_t parent_isize = i_size_read(parent_inode); + + /* + * the problem that struct exfat_inode_info caches wrong parent info. + * + * because of flag-mismatch of ei->dir, + * there is abnormal traversing cluster chain. + */ + if (unlikely(parent_ei->flags != ei->dir.flags || + parent_isize != EXFAT_CLU_TO_B(ei->dir.size, sbi) || + parent_ei->start_clu != ei->dir.dir)) { + exfat_chain_set(&ei->dir, parent_ei->start_clu, + EXFAT_B_TO_CLU_ROUND_UP(parent_isize, sbi), + parent_ei->flags); + } +} + +/* rename or move a old file into a new file */ +static int __exfat_rename(struct inode *old_parent_inode, + struct exfat_inode_info *ei, struct inode *new_parent_inode, + struct dentry *new_dentry) +{ + int ret; + int dentry; + struct exfat_chain olddir, newdir; + struct exfat_chain *p_dir = NULL; + struct exfat_uni_name uni_name; + struct exfat_dentry *ep; + struct super_block *sb = old_parent_inode->i_sb; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + const unsigned char *new_path = new_dentry->d_name.name; + struct inode *new_inode = new_dentry->d_inode; + int num_entries; + struct exfat_inode_info *new_ei = NULL; + unsigned int new_entry_type = TYPE_UNUSED; + int new_entry = 0; + struct buffer_head *old_bh, *new_bh = NULL; + + /* check the validity of pointer parameters */ + if (new_path == NULL || strlen(new_path) == 0) + return -EINVAL; + + if (ei->dir.dir == DIR_DELETED) { + exfat_msg(sb, KERN_ERR, + "abnormal access to deleted source dentry"); + return -ENOENT; + } + + exfat_update_parent_info(ei, old_parent_inode); + + exfat_chain_dup(&olddir, &ei->dir); + dentry = ei->entry; + + ep = exfat_get_dentry(sb, &olddir, dentry, &old_bh, NULL); + if (!ep) { + ret = -EIO; + goto out; + } + brelse(old_bh); + + /* check whether new dir is existing directory and empty */ + if (new_inode) { + ret = -EIO; + new_ei = EXFAT_I(new_inode); + + if (new_ei->dir.dir == DIR_DELETED) { + exfat_msg(sb, KERN_ERR, + "abnormal access to deleted target dentry"); + goto out; + } + + exfat_update_parent_info(new_ei, new_parent_inode); + + p_dir = &(new_ei->dir); + new_entry = new_ei->entry; + ep = exfat_get_dentry(sb, p_dir, new_entry, &new_bh, NULL); + if (!ep) + goto out; + + new_entry_type = exfat_get_entry_type(ep); + brelse(new_bh); + + /* if new_inode exists, update ei */ + if (new_entry_type == TYPE_DIR) { + struct exfat_chain new_clu; + + new_clu.dir = new_ei->start_clu; + new_clu.size = + EXFAT_B_TO_CLU_ROUND_UP(i_size_read(new_inode), + sbi); + new_clu.flags = new_ei->flags; + + ret = exfat_check_dir_empty(sb, &new_clu); + if (ret) + goto out; + } + } + + /* check the validity of directory name in the given new pathname */ + ret = exfat_resolve_path(new_parent_inode, new_path, &newdir, + &uni_name); + if (ret) + goto out; + + exfat_set_vol_flags(sb, VOL_DIRTY); + + if (olddir.dir == newdir.dir) + ret = exfat_rename_file(new_parent_inode, &olddir, dentry, + &uni_name, ei); + else + ret = exfat_move_file(new_parent_inode, &olddir, dentry, + &newdir, &uni_name, ei); + + if (!ret && new_inode) { + /* delete entries of new_dir */ + ep = exfat_get_dentry(sb, p_dir, new_entry, &new_bh, NULL); + if (!ep) { + ret = -EIO; + goto del_out; + } + + num_entries = exfat_count_ext_entries(sb, p_dir, new_entry, ep); + if (num_entries < 0) { + ret = -EIO; + goto del_out; + } + brelse(new_bh); + + if (exfat_remove_entries(new_inode, p_dir, new_entry, 0, + num_entries + 1)) { + ret = -EIO; + goto del_out; + } + + /* Free the clusters if new_inode is a dir(as if exfat_rmdir) */ + if (new_entry_type == TYPE_DIR) { + /* new_ei, new_clu_to_free */ + struct exfat_chain new_clu_to_free; + + exfat_chain_set(&new_clu_to_free, new_ei->start_clu, + EXFAT_B_TO_CLU_ROUND_UP(i_size_read(new_inode), + sbi), new_ei->flags); + + if (exfat_free_cluster(new_inode, &new_clu_to_free)) { + /* just set I/O error only */ + ret = -EIO; + } + + i_size_write(new_inode, 0); + new_ei->start_clu = EXFAT_EOF_CLUSTER; + new_ei->flags = ALLOC_NO_FAT_CHAIN; + } +del_out: + /* Update new_inode ei + * Prevent syncing removed new_inode + * (new_ei is already initialized above code ("if (new_inode)") + */ + new_ei->dir.dir = DIR_DELETED; + } + exfat_set_vol_flags(sb, VOL_CLEAN); +out: + return ret; +} + +static int exfat_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags) +{ + struct inode *old_inode, *new_inode; + struct super_block *sb = old_dir->i_sb; + loff_t i_pos; + int err; + + /* + * The VFS already checks for existence, so for local filesystems + * the RENAME_NOREPLACE implementation is equivalent to plain rename. + * Don't support any other flags + */ + if (flags & ~RENAME_NOREPLACE) + return -EINVAL; + + mutex_lock(&EXFAT_SB(sb)->s_lock); + old_inode = old_dentry->d_inode; + new_inode = new_dentry->d_inode; + + err = __exfat_rename(old_dir, EXFAT_I(old_inode), new_dir, new_dentry); + if (err) + goto unlock; + + inode_inc_iversion(new_dir); + new_dir->i_ctime = new_dir->i_mtime = new_dir->i_atime = + EXFAT_I(new_dir)->i_crtime = current_time(new_dir); + if (IS_DIRSYNC(new_dir)) + exfat_sync_inode(new_dir); + else + mark_inode_dirty(new_dir); + + i_pos = ((loff_t)EXFAT_I(old_inode)->dir.dir << 32) | + (EXFAT_I(old_inode)->entry & 0xffffffff); + exfat_unhash_inode(old_inode); + exfat_hash_inode(old_inode, i_pos); + if (IS_DIRSYNC(new_dir)) + exfat_sync_inode(old_inode); + else + mark_inode_dirty(old_inode); + + if (S_ISDIR(old_inode->i_mode) && old_dir != new_dir) { + drop_nlink(old_dir); + if (!new_inode) + inc_nlink(new_dir); + } + + inode_inc_iversion(old_dir); + old_dir->i_ctime = old_dir->i_mtime = current_time(old_dir); + if (IS_DIRSYNC(old_dir)) + exfat_sync_inode(old_dir); + else + mark_inode_dirty(old_dir); + + if (new_inode) { + exfat_unhash_inode(new_inode); + + /* skip drop_nlink if new_inode already has been dropped */ + if (new_inode->i_nlink) { + drop_nlink(new_inode); + if (S_ISDIR(new_inode->i_mode)) + drop_nlink(new_inode); + } else { + exfat_msg(sb, KERN_WARNING, + "abnormal access to an inode dropped"); + WARN_ON(new_inode->i_nlink == 0); + } + new_inode->i_ctime = EXFAT_I(new_inode)->i_crtime = + current_time(new_inode); + } + +unlock: + mutex_unlock(&EXFAT_SB(sb)->s_lock); + return err; +} + +const struct inode_operations exfat_dir_inode_operations = { + .create = exfat_create, + .lookup = exfat_lookup, + .unlink = exfat_unlink, + .mkdir = exfat_mkdir, + .rmdir = exfat_rmdir, + .rename = exfat_rename, + .setattr = exfat_setattr, + .getattr = exfat_getattr, +}; diff --git a/fs/exfat/nls.c b/fs/exfat/nls.c new file mode 100644 index 000000000000..6d1c3ae130ff --- /dev/null +++ b/fs/exfat/nls.c @@ -0,0 +1,831 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. + */ + +#include <linux/string.h> +#include <linux/slab.h> +#include <linux/buffer_head.h> +#include <asm/unaligned.h> + +#include "exfat_raw.h" +#include "exfat_fs.h" + +/* Upcase tabel macro */ +#define EXFAT_NUM_UPCASE (2918) +#define UTBL_COUNT (0x10000) + +/* + * Upcase table in compressed format (7.2.5.1 Recommended Up-case Table + * in exfat specification, See: + * https://docs.microsoft.com/en-us/windows/win32/fileio/exfat-specification). + */ +static const unsigned short uni_def_upcase[EXFAT_NUM_UPCASE] = { + 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, + 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, + 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, + 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, + 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, + 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, + 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, + 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, + 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, + 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, + 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, + 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, + 0x0060, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, + 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, + 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, + 0x0058, 0x0059, 0x005a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f, + 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, + 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, + 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, + 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, + 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, + 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, + 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, + 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, + 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, + 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, + 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, + 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df, + 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, + 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, + 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00f7, + 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x0178, + 0x0100, 0x0100, 0x0102, 0x0102, 0x0104, 0x0104, 0x0106, 0x0106, + 0x0108, 0x0108, 0x010a, 0x010a, 0x010c, 0x010c, 0x010e, 0x010e, + 0x0110, 0x0110, 0x0112, 0x0112, 0x0114, 0x0114, 0x0116, 0x0116, + 0x0118, 0x0118, 0x011a, 0x011a, 0x011c, 0x011c, 0x011e, 0x011e, + 0x0120, 0x0120, 0x0122, 0x0122, 0x0124, 0x0124, 0x0126, 0x0126, + 0x0128, 0x0128, 0x012a, 0x012a, 0x012c, 0x012c, 0x012e, 0x012e, + 0x0130, 0x0131, 0x0132, 0x0132, 0x0134, 0x0134, 0x0136, 0x0136, + 0x0138, 0x0139, 0x0139, 0x013b, 0x013b, 0x013d, 0x013d, 0x013f, + 0x013f, 0x0141, 0x0141, 0x0143, 0x0143, 0x0145, 0x0145, 0x0147, + 0x0147, 0x0149, 0x014a, 0x014a, 0x014c, 0x014c, 0x014e, 0x014e, + 0x0150, 0x0150, 0x0152, 0x0152, 0x0154, 0x0154, 0x0156, 0x0156, + 0x0158, 0x0158, 0x015a, 0x015a, 0x015c, 0x015c, 0x015e, 0x015e, + 0x0160, 0x0160, 0x0162, 0x0162, 0x0164, 0x0164, 0x0166, 0x0166, + 0x0168, 0x0168, 0x016a, 0x016a, 0x016c, 0x016c, 0x016e, 0x016e, + 0x0170, 0x0170, 0x0172, 0x0172, 0x0174, 0x0174, 0x0176, 0x0176, + 0x0178, 0x0179, 0x0179, 0x017b, 0x017b, 0x017d, 0x017d, 0x017f, + 0x0243, 0x0181, 0x0182, 0x0182, 0x0184, 0x0184, 0x0186, 0x0187, + 0x0187, 0x0189, 0x018a, 0x018b, 0x018b, 0x018d, 0x018e, 0x018f, + 0x0190, 0x0191, 0x0191, 0x0193, 0x0194, 0x01f6, 0x0196, 0x0197, + 0x0198, 0x0198, 0x023d, 0x019b, 0x019c, 0x019d, 0x0220, 0x019f, + 0x01a0, 0x01a0, 0x01a2, 0x01a2, 0x01a4, 0x01a4, 0x01a6, 0x01a7, + 0x01a7, 0x01a9, 0x01aa, 0x01ab, 0x01ac, 0x01ac, 0x01ae, 0x01af, + 0x01af, 0x01b1, 0x01b2, 0x01b3, 0x01b3, 0x01b5, 0x01b5, 0x01b7, + 0x01b8, 0x01b8, 0x01ba, 0x01bb, 0x01bc, 0x01bc, 0x01be, 0x01f7, + 0x01c0, 0x01c1, 0x01c2, 0x01c3, 0x01c4, 0x01c5, 0x01c4, 0x01c7, + 0x01c8, 0x01c7, 0x01ca, 0x01cb, 0x01ca, 0x01cd, 0x01cd, 0x01cf, + 0x01cf, 0x01d1, 0x01d1, 0x01d3, 0x01d3, 0x01d5, 0x01d5, 0x01d7, + 0x01d7, 0x01d9, 0x01d9, 0x01db, 0x01db, 0x018e, 0x01de, 0x01de, + 0x01e0, 0x01e0, 0x01e2, 0x01e2, 0x01e4, 0x01e4, 0x01e6, 0x01e6, + 0x01e8, 0x01e8, 0x01ea, 0x01ea, 0x01ec, 0x01ec, 0x01ee, 0x01ee, + 0x01f0, 0x01f1, 0x01f2, 0x01f1, 0x01f4, 0x01f4, 0x01f6, 0x01f7, + 0x01f8, 0x01f8, 0x01fa, 0x01fa, 0x01fc, 0x01fc, 0x01fe, 0x01fe, + 0x0200, 0x0200, 0x0202, 0x0202, 0x0204, 0x0204, 0x0206, 0x0206, + 0x0208, 0x0208, 0x020a, 0x020a, 0x020c, 0x020c, 0x020e, 0x020e, + 0x0210, 0x0210, 0x0212, 0x0212, 0x0214, 0x0214, 0x0216, 0x0216, + 0x0218, 0x0218, 0x021a, 0x021a, 0x021c, 0x021c, 0x021e, 0x021e, + 0x0220, 0x0221, 0x0222, 0x0222, 0x0224, 0x0224, 0x0226, 0x0226, + 0x0228, 0x0228, 0x022a, 0x022a, 0x022c, 0x022c, 0x022e, 0x022e, + 0x0230, 0x0230, 0x0232, 0x0232, 0x0234, 0x0235, 0x0236, 0x0237, + 0x0238, 0x0239, 0x2c65, 0x023b, 0x023b, 0x023d, 0x2c66, 0x023f, + 0x0240, 0x0241, 0x0241, 0x0243, 0x0244, 0x0245, 0x0246, 0x0246, + 0x0248, 0x0248, 0x024a, 0x024a, 0x024c, 0x024c, 0x024e, 0x024e, + 0x0250, 0x0251, 0x0252, 0x0181, 0x0186, 0x0255, 0x0189, 0x018a, + 0x0258, 0x018f, 0x025a, 0x0190, 0x025c, 0x025d, 0x025e, 0x025f, + 0x0193, 0x0261, 0x0262, 0x0194, 0x0264, 0x0265, 0x0266, 0x0267, + 0x0197, 0x0196, 0x026a, 0x2c62, 0x026c, 0x026d, 0x026e, 0x019c, + 0x0270, 0x0271, 0x019d, 0x0273, 0x0274, 0x019f, 0x0276, 0x0277, + 0x0278, 0x0279, 0x027a, 0x027b, 0x027c, 0x2c64, 0x027e, 0x027f, + 0x01a6, 0x0281, 0x0282, 0x01a9, 0x0284, 0x0285, 0x0286, 0x0287, + 0x01ae, 0x0244, 0x01b1, 0x01b2, 0x0245, 0x028d, 0x028e, 0x028f, + 0x0290, 0x0291, 0x01b7, 0x0293, 0x0294, 0x0295, 0x0296, 0x0297, + 0x0298, 0x0299, 0x029a, 0x029b, 0x029c, 0x029d, 0x029e, 0x029f, + 0x02a0, 0x02a1, 0x02a2, 0x02a3, 0x02a4, 0x02a5, 0x02a6, 0x02a7, + 0x02a8, 0x02a9, 0x02aa, 0x02ab, 0x02ac, 0x02ad, 0x02ae, 0x02af, + 0x02b0, 0x02b1, 0x02b2, 0x02b3, 0x02b4, 0x02b5, 0x02b6, 0x02b7, + 0x02b8, 0x02b9, 0x02ba, 0x02bb, 0x02bc, 0x02bd, 0x02be, 0x02bf, + 0x02c0, 0x02c1, 0x02c2, 0x02c3, 0x02c4, 0x02c5, 0x02c6, 0x02c7, + 0x02c8, 0x02c9, 0x02ca, 0x02cb, 0x02cc, 0x02cd, 0x02ce, 0x02cf, + 0x02d0, 0x02d1, 0x02d2, 0x02d3, 0x02d4, 0x02d5, 0x02d6, 0x02d7, + 0x02d8, 0x02d9, 0x02da, 0x02db, 0x02dc, 0x02dd, 0x02de, 0x02df, + 0x02e0, 0x02e1, 0x02e2, 0x02e3, 0x02e4, 0x02e5, 0x02e6, 0x02e7, + 0x02e8, 0x02e9, 0x02ea, 0x02eb, 0x02ec, 0x02ed, 0x02ee, 0x02ef, + 0x02f0, 0x02f1, 0x02f2, 0x02f3, 0x02f4, 0x02f5, 0x02f6, 0x02f7, + 0x02f8, 0x02f9, 0x02fa, 0x02fb, 0x02fc, 0x02fd, 0x02fe, 0x02ff, + 0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306, 0x0307, + 0x0308, 0x0309, 0x030a, 0x030b, 0x030c, 0x030d, 0x030e, 0x030f, + 0x0310, 0x0311, 0x0312, 0x0313, 0x0314, 0x0315, 0x0316, 0x0317, + 0x0318, 0x0319, 0x031a, 0x031b, 0x031c, 0x031d, 0x031e, 0x031f, + 0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326, 0x0327, + 0x0328, 0x0329, 0x032a, 0x032b, 0x032c, 0x032d, 0x032e, 0x032f, + 0x0330, 0x0331, 0x0332, 0x0333, 0x0334, 0x0335, 0x0336, 0x0337, + 0x0338, 0x0339, 0x033a, 0x033b, 0x033c, 0x033d, 0x033e, 0x033f, + 0x0340, 0x0341, 0x0342, 0x0343, 0x0344, 0x0345, 0x0346, 0x0347, + 0x0348, 0x0349, 0x034a, 0x034b, 0x034c, 0x034d, 0x034e, 0x034f, + 0x0350, 0x0351, 0x0352, 0x0353, 0x0354, 0x0355, 0x0356, 0x0357, + 0x0358, 0x0359, 0x035a, 0x035b, 0x035c, 0x035d, 0x035e, 0x035f, + 0x0360, 0x0361, 0x0362, 0x0363, 0x0364, 0x0365, 0x0366, 0x0367, + 0x0368, 0x0369, 0x036a, 0x036b, 0x036c, 0x036d, 0x036e, 0x036f, + 0x0370, 0x0371, 0x0372, 0x0373, 0x0374, 0x0375, 0x0376, 0x0377, + 0x0378, 0x0379, 0x037a, 0x03fd, 0x03fe, 0x03ff, 0x037e, 0x037f, + 0x0380, 0x0381, 0x0382, 0x0383, 0x0384, 0x0385, 0x0386, 0x0387, + 0x0388, 0x0389, 0x038a, 0x038b, 0x038c, 0x038d, 0x038e, 0x038f, + 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, + 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f, + 0x03a0, 0x03a1, 0x03a2, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7, + 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x0386, 0x0388, 0x0389, 0x038a, + 0x03b0, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, + 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f, + 0x03a0, 0x03a1, 0x03a3, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7, + 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x038c, 0x038e, 0x038f, 0x03cf, + 0x03d0, 0x03d1, 0x03d2, 0x03d3, 0x03d4, 0x03d5, 0x03d6, 0x03d7, + 0x03d8, 0x03d8, 0x03da, 0x03da, 0x03dc, 0x03dc, 0x03de, 0x03de, + 0x03e0, 0x03e0, 0x03e2, 0x03e2, 0x03e4, 0x03e4, 0x03e6, 0x03e6, + 0x03e8, 0x03e8, 0x03ea, 0x03ea, 0x03ec, 0x03ec, 0x03ee, 0x03ee, + 0x03f0, 0x03f1, 0x03f9, 0x03f3, 0x03f4, 0x03f5, 0x03f6, 0x03f7, + 0x03f7, 0x03f9, 0x03fa, 0x03fa, 0x03fc, 0x03fd, 0x03fe, 0x03ff, + 0x0400, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407, + 0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x040d, 0x040e, 0x040f, + 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, + 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f, + 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, + 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f, + 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, + 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f, + 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, + 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f, + 0x0400, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407, + 0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x040d, 0x040e, 0x040f, + 0x0460, 0x0460, 0x0462, 0x0462, 0x0464, 0x0464, 0x0466, 0x0466, + 0x0468, 0x0468, 0x046a, 0x046a, 0x046c, 0x046c, 0x046e, 0x046e, + 0x0470, 0x0470, 0x0472, 0x0472, 0x0474, 0x0474, 0x0476, 0x0476, + 0x0478, 0x0478, 0x047a, 0x047a, 0x047c, 0x047c, 0x047e, 0x047e, + 0x0480, 0x0480, 0x0482, 0x0483, 0x0484, 0x0485, 0x0486, 0x0487, + 0x0488, 0x0489, 0x048a, 0x048a, 0x048c, 0x048c, 0x048e, 0x048e, + 0x0490, 0x0490, 0x0492, 0x0492, 0x0494, 0x0494, 0x0496, 0x0496, + 0x0498, 0x0498, 0x049a, 0x049a, 0x049c, 0x049c, 0x049e, 0x049e, + 0x04a0, 0x04a0, 0x04a2, 0x04a2, 0x04a4, 0x04a4, 0x04a6, 0x04a6, + 0x04a8, 0x04a8, 0x04aa, 0x04aa, 0x04ac, 0x04ac, 0x04ae, 0x04ae, + 0x04b0, 0x04b0, 0x04b2, 0x04b2, 0x04b4, 0x04b4, 0x04b6, 0x04b6, + 0x04b8, 0x04b8, 0x04ba, 0x04ba, 0x04bc, 0x04bc, 0x04be, 0x04be, + 0x04c0, 0x04c1, 0x04c1, 0x04c3, 0x04c3, 0x04c5, 0x04c5, 0x04c7, + 0x04c7, 0x04c9, 0x04c9, 0x04cb, 0x04cb, 0x04cd, 0x04cd, 0x04c0, + 0x04d0, 0x04d0, 0x04d2, 0x04d2, 0x04d4, 0x04d4, 0x04d6, 0x04d6, + 0x04d8, 0x04d8, 0x04da, 0x04da, 0x04dc, 0x04dc, 0x04de, 0x04de, + 0x04e0, 0x04e0, 0x04e2, 0x04e2, 0x04e4, 0x04e4, 0x04e6, 0x04e6, + 0x04e8, 0x04e8, 0x04ea, 0x04ea, 0x04ec, 0x04ec, 0x04ee, 0x04ee, + 0x04f0, 0x04f0, 0x04f2, 0x04f2, 0x04f4, 0x04f4, 0x04f6, 0x04f6, + 0x04f8, 0x04f8, 0x04fa, 0x04fa, 0x04fc, 0x04fc, 0x04fe, 0x04fe, + 0x0500, 0x0500, 0x0502, 0x0502, 0x0504, 0x0504, 0x0506, 0x0506, + 0x0508, 0x0508, 0x050a, 0x050a, 0x050c, 0x050c, 0x050e, 0x050e, + 0x0510, 0x0510, 0x0512, 0x0512, 0x0514, 0x0515, 0x0516, 0x0517, + 0x0518, 0x0519, 0x051a, 0x051b, 0x051c, 0x051d, 0x051e, 0x051f, + 0x0520, 0x0521, 0x0522, 0x0523, 0x0524, 0x0525, 0x0526, 0x0527, + 0x0528, 0x0529, 0x052a, 0x052b, 0x052c, 0x052d, 0x052e, 0x052f, + 0x0530, 0x0531, 0x0532, 0x0533, 0x0534, 0x0535, 0x0536, 0x0537, + 0x0538, 0x0539, 0x053a, 0x053b, 0x053c, 0x053d, 0x053e, 0x053f, + 0x0540, 0x0541, 0x0542, 0x0543, 0x0544, 0x0545, 0x0546, 0x0547, + 0x0548, 0x0549, 0x054a, 0x054b, 0x054c, 0x054d, 0x054e, 0x054f, + 0x0550, 0x0551, 0x0552, 0x0553, 0x0554, 0x0555, 0x0556, 0x0557, + 0x0558, 0x0559, 0x055a, 0x055b, 0x055c, 0x055d, 0x055e, 0x055f, + 0x0560, 0x0531, 0x0532, 0x0533, 0x0534, 0x0535, 0x0536, 0x0537, + 0x0538, 0x0539, 0x053a, 0x053b, 0x053c, 0x053d, 0x053e, 0x053f, + 0x0540, 0x0541, 0x0542, 0x0543, 0x0544, 0x0545, 0x0546, 0x0547, + 0x0548, 0x0549, 0x054a, 0x054b, 0x054c, 0x054d, 0x054e, 0x054f, + 0x0550, 0x0551, 0x0552, 0x0553, 0x0554, 0x0555, 0x0556, 0xffff, + 0x17f6, 0x2c63, 0x1d7e, 0x1d7f, 0x1d80, 0x1d81, 0x1d82, 0x1d83, + 0x1d84, 0x1d85, 0x1d86, 0x1d87, 0x1d88, 0x1d89, 0x1d8a, 0x1d8b, + 0x1d8c, 0x1d8d, 0x1d8e, 0x1d8f, 0x1d90, 0x1d91, 0x1d92, 0x1d93, + 0x1d94, 0x1d95, 0x1d96, 0x1d97, 0x1d98, 0x1d99, 0x1d9a, 0x1d9b, + 0x1d9c, 0x1d9d, 0x1d9e, 0x1d9f, 0x1da0, 0x1da1, 0x1da2, 0x1da3, + 0x1da4, 0x1da5, 0x1da6, 0x1da7, 0x1da8, 0x1da9, 0x1daa, 0x1dab, + 0x1dac, 0x1dad, 0x1dae, 0x1daf, 0x1db0, 0x1db1, 0x1db2, 0x1db3, + 0x1db4, 0x1db5, 0x1db6, 0x1db7, 0x1db8, 0x1db9, 0x1dba, 0x1dbb, + 0x1dbc, 0x1dbd, 0x1dbe, 0x1dbf, 0x1dc0, 0x1dc1, 0x1dc2, 0x1dc3, + 0x1dc4, 0x1dc5, 0x1dc6, 0x1dc7, 0x1dc8, 0x1dc9, 0x1dca, 0x1dcb, + 0x1dcc, 0x1dcd, 0x1dce, 0x1dcf, 0x1dd0, 0x1dd1, 0x1dd2, 0x1dd3, + 0x1dd4, 0x1dd5, 0x1dd6, 0x1dd7, 0x1dd8, 0x1dd9, 0x1dda, 0x1ddb, + 0x1ddc, 0x1ddd, 0x1dde, 0x1ddf, 0x1de0, 0x1de1, 0x1de2, 0x1de3, + 0x1de4, 0x1de5, 0x1de6, 0x1de7, 0x1de8, 0x1de9, 0x1dea, 0x1deb, + 0x1dec, 0x1ded, 0x1dee, 0x1def, 0x1df0, 0x1df1, 0x1df2, 0x1df3, + 0x1df4, 0x1df5, 0x1df6, 0x1df7, 0x1df8, 0x1df9, 0x1dfa, 0x1dfb, + 0x1dfc, 0x1dfd, 0x1dfe, 0x1dff, 0x1e00, 0x1e00, 0x1e02, 0x1e02, + 0x1e04, 0x1e04, 0x1e06, 0x1e06, 0x1e08, 0x1e08, 0x1e0a, 0x1e0a, + 0x1e0c, 0x1e0c, 0x1e0e, 0x1e0e, 0x1e10, 0x1e10, 0x1e12, 0x1e12, + 0x1e14, 0x1e14, 0x1e16, 0x1e16, 0x1e18, 0x1e18, 0x1e1a, 0x1e1a, + 0x1e1c, 0x1e1c, 0x1e1e, 0x1e1e, 0x1e20, 0x1e20, 0x1e22, 0x1e22, + 0x1e24, 0x1e24, 0x1e26, 0x1e26, 0x1e28, 0x1e28, 0x1e2a, 0x1e2a, + 0x1e2c, 0x1e2c, 0x1e2e, 0x1e2e, 0x1e30, 0x1e30, 0x1e32, 0x1e32, + 0x1e34, 0x1e34, 0x1e36, 0x1e36, 0x1e38, 0x1e38, 0x1e3a, 0x1e3a, + 0x1e3c, 0x1e3c, 0x1e3e, 0x1e3e, 0x1e40, 0x1e40, 0x1e42, 0x1e42, + 0x1e44, 0x1e44, 0x1e46, 0x1e46, 0x1e48, 0x1e48, 0x1e4a, 0x1e4a, + 0x1e4c, 0x1e4c, 0x1e4e, 0x1e4e, 0x1e50, 0x1e50, 0x1e52, 0x1e52, + 0x1e54, 0x1e54, 0x1e56, 0x1e56, 0x1e58, 0x1e58, 0x1e5a, 0x1e5a, + 0x1e5c, 0x1e5c, 0x1e5e, 0x1e5e, 0x1e60, 0x1e60, 0x1e62, 0x1e62, + 0x1e64, 0x1e64, 0x1e66, 0x1e66, 0x1e68, 0x1e68, 0x1e6a, 0x1e6a, + 0x1e6c, 0x1e6c, 0x1e6e, 0x1e6e, 0x1e70, 0x1e70, 0x1e72, 0x1e72, + 0x1e74, 0x1e74, 0x1e76, 0x1e76, 0x1e78, 0x1e78, 0x1e7a, 0x1e7a, + 0x1e7c, 0x1e7c, 0x1e7e, 0x1e7e, 0x1e80, 0x1e80, 0x1e82, 0x1e82, + 0x1e84, 0x1e84, 0x1e86, 0x1e86, 0x1e88, 0x1e88, 0x1e8a, 0x1e8a, + 0x1e8c, 0x1e8c, 0x1e8e, 0x1e8e, 0x1e90, 0x1e90, 0x1e92, 0x1e92, + 0x1e94, 0x1e94, 0x1e96, 0x1e97, 0x1e98, 0x1e99, 0x1e9a, 0x1e9b, + 0x1e9c, 0x1e9d, 0x1e9e, 0x1e9f, 0x1ea0, 0x1ea0, 0x1ea2, 0x1ea2, + 0x1ea4, 0x1ea4, 0x1ea6, 0x1ea6, 0x1ea8, 0x1ea8, 0x1eaa, 0x1eaa, + 0x1eac, 0x1eac, 0x1eae, 0x1eae, 0x1eb0, 0x1eb0, 0x1eb2, 0x1eb2, + 0x1eb4, 0x1eb4, 0x1eb6, 0x1eb6, 0x1eb8, 0x1eb8, 0x1eba, 0x1eba, + 0x1ebc, 0x1ebc, 0x1ebe, 0x1ebe, 0x1ec0, 0x1ec0, 0x1ec2, 0x1ec2, + 0x1ec4, 0x1ec4, 0x1ec6, 0x1ec6, 0x1ec8, 0x1ec8, 0x1eca, 0x1eca, + 0x1ecc, 0x1ecc, 0x1ece, 0x1ece, 0x1ed0, 0x1ed0, 0x1ed2, 0x1ed2, + 0x1ed4, 0x1ed4, 0x1ed6, 0x1ed6, 0x1ed8, 0x1ed8, 0x1eda, 0x1eda, + 0x1edc, 0x1edc, 0x1ede, 0x1ede, 0x1ee0, 0x1ee0, 0x1ee2, 0x1ee2, + 0x1ee4, 0x1ee4, 0x1ee6, 0x1ee6, 0x1ee8, 0x1ee8, 0x1eea, 0x1eea, + 0x1eec, 0x1eec, 0x1eee, 0x1eee, 0x1ef0, 0x1ef0, 0x1ef2, 0x1ef2, + 0x1ef4, 0x1ef4, 0x1ef6, 0x1ef6, 0x1ef8, 0x1ef8, 0x1efa, 0x1efb, + 0x1efc, 0x1efd, 0x1efe, 0x1eff, 0x1f08, 0x1f09, 0x1f0a, 0x1f0b, + 0x1f0c, 0x1f0d, 0x1f0e, 0x1f0f, 0x1f08, 0x1f09, 0x1f0a, 0x1f0b, + 0x1f0c, 0x1f0d, 0x1f0e, 0x1f0f, 0x1f18, 0x1f19, 0x1f1a, 0x1f1b, + 0x1f1c, 0x1f1d, 0x1f16, 0x1f17, 0x1f18, 0x1f19, 0x1f1a, 0x1f1b, + 0x1f1c, 0x1f1d, 0x1f1e, 0x1f1f, 0x1f28, 0x1f29, 0x1f2a, 0x1f2b, + 0x1f2c, 0x1f2d, 0x1f2e, 0x1f2f, 0x1f28, 0x1f29, 0x1f2a, 0x1f2b, + 0x1f2c, 0x1f2d, 0x1f2e, 0x1f2f, 0x1f38, 0x1f39, 0x1f3a, 0x1f3b, + 0x1f3c, 0x1f3d, 0x1f3e, 0x1f3f, 0x1f38, 0x1f39, 0x1f3a, 0x1f3b, + 0x1f3c, 0x1f3d, 0x1f3e, 0x1f3f, 0x1f48, 0x1f49, 0x1f4a, 0x1f4b, + 0x1f4c, 0x1f4d, 0x1f46, 0x1f47, 0x1f48, 0x1f49, 0x1f4a, 0x1f4b, + 0x1f4c, 0x1f4d, 0x1f4e, 0x1f4f, 0x1f50, 0x1f59, 0x1f52, 0x1f5b, + 0x1f54, 0x1f5d, 0x1f56, 0x1f5f, 0x1f58, 0x1f59, 0x1f5a, 0x1f5b, + 0x1f5c, 0x1f5d, 0x1f5e, 0x1f5f, 0x1f68, 0x1f69, 0x1f6a, 0x1f6b, + 0x1f6c, 0x1f6d, 0x1f6e, 0x1f6f, 0x1f68, 0x1f69, 0x1f6a, 0x1f6b, + 0x1f6c, 0x1f6d, 0x1f6e, 0x1f6f, 0x1fba, 0x1fbb, 0x1fc8, 0x1fc9, + 0x1fca, 0x1fcb, 0x1fda, 0x1fdb, 0x1ff8, 0x1ff9, 0x1fea, 0x1feb, + 0x1ffa, 0x1ffb, 0x1f7e, 0x1f7f, 0x1f88, 0x1f89, 0x1f8a, 0x1f8b, + 0x1f8c, 0x1f8d, 0x1f8e, 0x1f8f, 0x1f88, 0x1f89, 0x1f8a, 0x1f8b, + 0x1f8c, 0x1f8d, 0x1f8e, 0x1f8f, 0x1f98, 0x1f99, 0x1f9a, 0x1f9b, + 0x1f9c, 0x1f9d, 0x1f9e, 0x1f9f, 0x1f98, 0x1f99, 0x1f9a, 0x1f9b, + 0x1f9c, 0x1f9d, 0x1f9e, 0x1f9f, 0x1fa8, 0x1fa9, 0x1faa, 0x1fab, + 0x1fac, 0x1fad, 0x1fae, 0x1faf, 0x1fa8, 0x1fa9, 0x1faa, 0x1fab, + 0x1fac, 0x1fad, 0x1fae, 0x1faf, 0x1fb8, 0x1fb9, 0x1fb2, 0x1fbc, + 0x1fb4, 0x1fb5, 0x1fb6, 0x1fb7, 0x1fb8, 0x1fb9, 0x1fba, 0x1fbb, + 0x1fbc, 0x1fbd, 0x1fbe, 0x1fbf, 0x1fc0, 0x1fc1, 0x1fc2, 0x1fc3, + 0x1fc4, 0x1fc5, 0x1fc6, 0x1fc7, 0x1fc8, 0x1fc9, 0x1fca, 0x1fcb, + 0x1fc3, 0x1fcd, 0x1fce, 0x1fcf, 0x1fd8, 0x1fd9, 0x1fd2, 0x1fd3, + 0x1fd4, 0x1fd5, 0x1fd6, 0x1fd7, 0x1fd8, 0x1fd9, 0x1fda, 0x1fdb, + 0x1fdc, 0x1fdd, 0x1fde, 0x1fdf, 0x1fe8, 0x1fe9, 0x1fe2, 0x1fe3, + 0x1fe4, 0x1fec, 0x1fe6, 0x1fe7, 0x1fe8, 0x1fe9, 0x1fea, 0x1feb, + 0x1fec, 0x1fed, 0x1fee, 0x1fef, 0x1ff0, 0x1ff1, 0x1ff2, 0x1ff3, + 0x1ff4, 0x1ff5, 0x1ff6, 0x1ff7, 0x1ff8, 0x1ff9, 0x1ffa, 0x1ffb, + 0x1ff3, 0x1ffd, 0x1ffe, 0x1fff, 0x2000, 0x2001, 0x2002, 0x2003, + 0x2004, 0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200a, 0x200b, + 0x200c, 0x200d, 0x200e, 0x200f, 0x2010, 0x2011, 0x2012, 0x2013, + 0x2014, 0x2015, 0x2016, 0x2017, 0x2018, 0x2019, 0x201a, 0x201b, + 0x201c, 0x201d, 0x201e, 0x201f, 0x2020, 0x2021, 0x2022, 0x2023, + 0x2024, 0x2025, 0x2026, 0x2027, 0x2028, 0x2029, 0x202a, 0x202b, + 0x202c, 0x202d, 0x202e, 0x202f, 0x2030, 0x2031, 0x2032, 0x2033, + 0x2034, 0x2035, 0x2036, 0x2037, 0x2038, 0x2039, 0x203a, 0x203b, + 0x203c, 0x203d, 0x203e, 0x203f, 0x2040, 0x2041, 0x2042, 0x2043, + 0x2044, 0x2045, 0x2046, 0x2047, 0x2048, 0x2049, 0x204a, 0x204b, + 0x204c, 0x204d, 0x204e, 0x204f, 0x2050, 0x2051, 0x2052, 0x2053, + 0x2054, 0x2055, 0x2056, 0x2057, 0x2058, 0x2059, 0x205a, 0x205b, + 0x205c, 0x205d, 0x205e, 0x205f, 0x2060, 0x2061, 0x2062, 0x2063, + 0x2064, 0x2065, 0x2066, 0x2067, 0x2068, 0x2069, 0x206a, 0x206b, + 0x206c, 0x206d, 0x206e, 0x206f, 0x2070, 0x2071, 0x2072, 0x2073, + 0x2074, 0x2075, 0x2076, 0x2077, 0x2078, 0x2079, 0x207a, 0x207b, + 0x207c, 0x207d, 0x207e, 0x207f, 0x2080, 0x2081, 0x2082, 0x2083, + 0x2084, 0x2085, 0x2086, 0x2087, 0x2088, 0x2089, 0x208a, 0x208b, + 0x208c, 0x208d, 0x208e, 0x208f, 0x2090, 0x2091, 0x2092, 0x2093, + 0x2094, 0x2095, 0x2096, 0x2097, 0x2098, 0x2099, 0x209a, 0x209b, + 0x209c, 0x209d, 0x209e, 0x209f, 0x20a0, 0x20a1, 0x20a2, 0x20a3, + 0x20a4, 0x20a5, 0x20a6, 0x20a7, 0x20a8, 0x20a9, 0x20aa, 0x20ab, + 0x20ac, 0x20ad, 0x20ae, 0x20af, 0x20b0, 0x20b1, 0x20b2, 0x20b3, + 0x20b4, 0x20b5, 0x20b6, 0x20b7, 0x20b8, 0x20b9, 0x20ba, 0x20bb, + 0x20bc, 0x20bd, 0x20be, 0x20bf, 0x20c0, 0x20c1, 0x20c2, 0x20c3, + 0x20c4, 0x20c5, 0x20c6, 0x20c7, 0x20c8, 0x20c9, 0x20ca, 0x20cb, + 0x20cc, 0x20cd, 0x20ce, 0x20cf, 0x20d0, 0x20d1, 0x20d2, 0x20d3, + 0x20d4, 0x20d5, 0x20d6, 0x20d7, 0x20d8, 0x20d9, 0x20da, 0x20db, + 0x20dc, 0x20dd, 0x20de, 0x20df, 0x20e0, 0x20e1, 0x20e2, 0x20e3, + 0x20e4, 0x20e5, 0x20e6, 0x20e7, 0x20e8, 0x20e9, 0x20ea, 0x20eb, + 0x20ec, 0x20ed, 0x20ee, 0x20ef, 0x20f0, 0x20f1, 0x20f2, 0x20f3, + 0x20f4, 0x20f5, 0x20f6, 0x20f7, 0x20f8, 0x20f9, 0x20fa, 0x20fb, + 0x20fc, 0x20fd, 0x20fe, 0x20ff, 0x2100, 0x2101, 0x2102, 0x2103, + 0x2104, 0x2105, 0x2106, 0x2107, 0x2108, 0x2109, 0x210a, 0x210b, + 0x210c, 0x210d, 0x210e, 0x210f, 0x2110, 0x2111, 0x2112, 0x2113, + 0x2114, 0x2115, 0x2116, 0x2117, 0x2118, 0x2119, 0x211a, 0x211b, + 0x211c, 0x211d, 0x211e, 0x211f, 0x2120, 0x2121, 0x2122, 0x2123, + 0x2124, 0x2125, 0x2126, 0x2127, 0x2128, 0x2129, 0x212a, 0x212b, + 0x212c, 0x212d, 0x212e, 0x212f, 0x2130, 0x2131, 0x2132, 0x2133, + 0x2134, 0x2135, 0x2136, 0x2137, 0x2138, 0x2139, 0x213a, 0x213b, + 0x213c, 0x213d, 0x213e, 0x213f, 0x2140, 0x2141, 0x2142, 0x2143, + 0x2144, 0x2145, 0x2146, 0x2147, 0x2148, 0x2149, 0x214a, 0x214b, + 0x214c, 0x214d, 0x2132, 0x214f, 0x2150, 0x2151, 0x2152, 0x2153, + 0x2154, 0x2155, 0x2156, 0x2157, 0x2158, 0x2159, 0x215a, 0x215b, + 0x215c, 0x215d, 0x215e, 0x215f, 0x2160, 0x2161, 0x2162, 0x2163, + 0x2164, 0x2165, 0x2166, 0x2167, 0x2168, 0x2169, 0x216a, 0x216b, + 0x216c, 0x216d, 0x216e, 0x216f, 0x2160, 0x2161, 0x2162, 0x2163, + 0x2164, 0x2165, 0x2166, 0x2167, 0x2168, 0x2169, 0x216a, 0x216b, + 0x216c, 0x216d, 0x216e, 0x216f, 0x2180, 0x2181, 0x2182, 0x2183, + 0x2183, 0xffff, 0x034b, 0x24b6, 0x24b7, 0x24b8, 0x24b9, 0x24ba, + 0x24bb, 0x24bc, 0x24bd, 0x24be, 0x24bf, 0x24c0, 0x24c1, 0x24c2, + 0x24c3, 0x24c4, 0x24c5, 0x24c6, 0x24c7, 0x24c8, 0x24c9, 0x24ca, + 0x24cb, 0x24cc, 0x24cd, 0x24ce, 0x24cf, 0xffff, 0x0746, 0x2c00, + 0x2c01, 0x2c02, 0x2c03, 0x2c04, 0x2c05, 0x2c06, 0x2c07, 0x2c08, + 0x2c09, 0x2c0a, 0x2c0b, 0x2c0c, 0x2c0d, 0x2c0e, 0x2c0f, 0x2c10, + 0x2c11, 0x2c12, 0x2c13, 0x2c14, 0x2c15, 0x2c16, 0x2c17, 0x2c18, + 0x2c19, 0x2c1a, 0x2c1b, 0x2c1c, 0x2c1d, 0x2c1e, 0x2c1f, 0x2c20, + 0x2c21, 0x2c22, 0x2c23, 0x2c24, 0x2c25, 0x2c26, 0x2c27, 0x2c28, + 0x2c29, 0x2c2a, 0x2c2b, 0x2c2c, 0x2c2d, 0x2c2e, 0x2c5f, 0x2c60, + 0x2c60, 0x2c62, 0x2c63, 0x2c64, 0x2c65, 0x2c66, 0x2c67, 0x2c67, + 0x2c69, 0x2c69, 0x2c6b, 0x2c6b, 0x2c6d, 0x2c6e, 0x2c6f, 0x2c70, + 0x2c71, 0x2c72, 0x2c73, 0x2c74, 0x2c75, 0x2c75, 0x2c77, 0x2c78, + 0x2c79, 0x2c7a, 0x2c7b, 0x2c7c, 0x2c7d, 0x2c7e, 0x2c7f, 0x2c80, + 0x2c80, 0x2c82, 0x2c82, 0x2c84, 0x2c84, 0x2c86, 0x2c86, 0x2c88, + 0x2c88, 0x2c8a, 0x2c8a, 0x2c8c, 0x2c8c, 0x2c8e, 0x2c8e, 0x2c90, + 0x2c90, 0x2c92, 0x2c92, 0x2c94, 0x2c94, 0x2c96, 0x2c96, 0x2c98, + 0x2c98, 0x2c9a, 0x2c9a, 0x2c9c, 0x2c9c, 0x2c9e, 0x2c9e, 0x2ca0, + 0x2ca0, 0x2ca2, 0x2ca2, 0x2ca4, 0x2ca4, 0x2ca6, 0x2ca6, 0x2ca8, + 0x2ca8, 0x2caa, 0x2caa, 0x2cac, 0x2cac, 0x2cae, 0x2cae, 0x2cb0, + 0x2cb0, 0x2cb2, 0x2cb2, 0x2cb4, 0x2cb4, 0x2cb6, 0x2cb6, 0x2cb8, + 0x2cb8, 0x2cba, 0x2cba, 0x2cbc, 0x2cbc, 0x2cbe, 0x2cbe, 0x2cc0, + 0x2cc0, 0x2cc2, 0x2cc2, 0x2cc4, 0x2cc4, 0x2cc6, 0x2cc6, 0x2cc8, + 0x2cc8, 0x2cca, 0x2cca, 0x2ccc, 0x2ccc, 0x2cce, 0x2cce, 0x2cd0, + 0x2cd0, 0x2cd2, 0x2cd2, 0x2cd4, 0x2cd4, 0x2cd6, 0x2cd6, 0x2cd8, + 0x2cd8, 0x2cda, 0x2cda, 0x2cdc, 0x2cdc, 0x2cde, 0x2cde, 0x2ce0, + 0x2ce0, 0x2ce2, 0x2ce2, 0x2ce4, 0x2ce5, 0x2ce6, 0x2ce7, 0x2ce8, + 0x2ce9, 0x2cea, 0x2ceb, 0x2cec, 0x2ced, 0x2cee, 0x2cef, 0x2cf0, + 0x2cf1, 0x2cf2, 0x2cf3, 0x2cf4, 0x2cf5, 0x2cf6, 0x2cf7, 0x2cf8, + 0x2cf9, 0x2cfa, 0x2cfb, 0x2cfc, 0x2cfd, 0x2cfe, 0x2cff, 0x10a0, + 0x10a1, 0x10a2, 0x10a3, 0x10a4, 0x10a5, 0x10a6, 0x10a7, 0x10a8, + 0x10a9, 0x10aa, 0x10ab, 0x10ac, 0x10ad, 0x10ae, 0x10af, 0x10b0, + 0x10b1, 0x10b2, 0x10b3, 0x10b4, 0x10b5, 0x10b6, 0x10b7, 0x10b8, + 0x10b9, 0x10ba, 0x10bb, 0x10bc, 0x10bd, 0x10be, 0x10bf, 0x10c0, + 0x10c1, 0x10c2, 0x10c3, 0x10c4, 0x10c5, 0xffff, 0xd21b, 0xff21, + 0xff22, 0xff23, 0xff24, 0xff25, 0xff26, 0xff27, 0xff28, 0xff29, + 0xff2a, 0xff2b, 0xff2c, 0xff2d, 0xff2e, 0xff2f, 0xff30, 0xff31, + 0xff32, 0xff33, 0xff34, 0xff35, 0xff36, 0xff37, 0xff38, 0xff39, + 0xff3a, 0xff5b, 0xff5c, 0xff5d, 0xff5e, 0xff5f, 0xff60, 0xff61, + 0xff62, 0xff63, 0xff64, 0xff65, 0xff66, 0xff67, 0xff68, 0xff69, + 0xff6a, 0xff6b, 0xff6c, 0xff6d, 0xff6e, 0xff6f, 0xff70, 0xff71, + 0xff72, 0xff73, 0xff74, 0xff75, 0xff76, 0xff77, 0xff78, 0xff79, + 0xff7a, 0xff7b, 0xff7c, 0xff7d, 0xff7e, 0xff7f, 0xff80, 0xff81, + 0xff82, 0xff83, 0xff84, 0xff85, 0xff86, 0xff87, 0xff88, 0xff89, + 0xff8a, 0xff8b, 0xff8c, 0xff8d, 0xff8e, 0xff8f, 0xff90, 0xff91, + 0xff92, 0xff93, 0xff94, 0xff95, 0xff96, 0xff97, 0xff98, 0xff99, + 0xff9a, 0xff9b, 0xff9c, 0xff9d, 0xff9e, 0xff9f, 0xffa0, 0xffa1, + 0xffa2, 0xffa3, 0xffa4, 0xffa5, 0xffa6, 0xffa7, 0xffa8, 0xffa9, + 0xffaa, 0xffab, 0xffac, 0xffad, 0xffae, 0xffaf, 0xffb0, 0xffb1, + 0xffb2, 0xffb3, 0xffb4, 0xffb5, 0xffb6, 0xffb7, 0xffb8, 0xffb9, + 0xffba, 0xffbb, 0xffbc, 0xffbd, 0xffbe, 0xffbf, 0xffc0, 0xffc1, + 0xffc2, 0xffc3, 0xffc4, 0xffc5, 0xffc6, 0xffc7, 0xffc8, 0xffc9, + 0xffca, 0xffcb, 0xffcc, 0xffcd, 0xffce, 0xffcf, 0xffd0, 0xffd1, + 0xffd2, 0xffd3, 0xffd4, 0xffd5, 0xffd6, 0xffd7, 0xffd8, 0xffd9, + 0xffda, 0xffdb, 0xffdc, 0xffdd, 0xffde, 0xffdf, 0xffe0, 0xffe1, + 0xffe2, 0xffe3, 0xffe4, 0xffe5, 0xffe6, 0xffe7, 0xffe8, 0xffe9, + 0xffea, 0xffeb, 0xffec, 0xffed, 0xffee, 0xffef, 0xfff0, 0xfff1, + 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7, 0xfff8, 0xfff9, + 0xfffa, 0xfffb, 0xfffc, 0xfffd, 0xfffe, 0xffff, +}; + +/* + * Allow full-width illegal characters : + * "MS windows 7" supports full-width-invalid-name-characters. + * So we should check half-width-invalid-name-characters(ASCII) only + * for compatibility. + * + * " * / : < > ? \ | + */ +static unsigned short bad_uni_chars[] = { + 0x0022, 0x002A, 0x002F, 0x003A, + 0x003C, 0x003E, 0x003F, 0x005C, 0x007C, + 0 +}; + +static int exfat_convert_char_to_ucs2(struct nls_table *nls, + const unsigned char *ch, int ch_len, unsigned short *ucs2, + int *lossy) +{ + int len; + + *ucs2 = 0x0; + + if (ch[0] < 0x80) { + *ucs2 = ch[0]; + return 1; + } + + len = nls->char2uni(ch, ch_len, ucs2); + if (len < 0) { + /* conversion failed */ + if (lossy != NULL) + *lossy |= NLS_NAME_LOSSY; + *ucs2 = '_'; + return 1; + } + return len; +} + +static int exfat_convert_ucs2_to_char(struct nls_table *nls, + unsigned short ucs2, unsigned char *ch, int *lossy) +{ + int len; + + ch[0] = 0x0; + + if (ucs2 < 0x0080) { + ch[0] = ucs2; + return 1; + } + + len = nls->uni2char(ucs2, ch, MAX_CHARSET_SIZE); + if (len < 0) { + /* conversion failed */ + if (lossy != NULL) + *lossy |= NLS_NAME_LOSSY; + ch[0] = '_'; + return 1; + } + return len; +} + +unsigned short exfat_toupper(struct super_block *sb, unsigned short a) +{ + struct exfat_sb_info *sbi = EXFAT_SB(sb); + + return sbi->vol_utbl[a] ? sbi->vol_utbl[a] : a; +} + +static unsigned short *exfat_wstrchr(unsigned short *str, unsigned short wchar) +{ + while (*str) { + if (*(str++) == wchar) + return str; + } + return NULL; +} + +int exfat_uniname_ncmp(struct super_block *sb, unsigned short *a, + unsigned short *b, unsigned int len) +{ + int i; + + for (i = 0; i < len; i++, a++, b++) + if (exfat_toupper(sb, *a) != exfat_toupper(sb, *b)) + return 1; + return 0; +} + +static int exfat_utf16_to_utf8(struct super_block *sb, + struct exfat_uni_name *p_uniname, unsigned char *p_cstring, + int buflen) +{ + int len; + const unsigned short *uniname = p_uniname->name; + + /* always len >= 0 */ + len = utf16s_to_utf8s(uniname, MAX_NAME_LENGTH, UTF16_HOST_ENDIAN, + p_cstring, buflen); + p_cstring[len] = '\0'; + return len; +} + +static int exfat_utf8_to_utf16(struct super_block *sb, + const unsigned char *p_cstring, const int len, + struct exfat_uni_name *p_uniname, int *p_lossy) +{ + int i, unilen, lossy = NLS_NAME_NO_LOSSY; + unsigned short upname[MAX_NAME_LENGTH + 1]; + unsigned short *uniname = p_uniname->name; + + WARN_ON(!len); + + unilen = utf8s_to_utf16s(p_cstring, len, UTF16_HOST_ENDIAN, + (wchar_t *)uniname, MAX_NAME_LENGTH + 2); + if (unilen < 0) { + exfat_msg(sb, KERN_ERR, + "failed to %s (err : %d) nls len : %d", + __func__, unilen, len); + return unilen; + } + + if (unilen > MAX_NAME_LENGTH) { + exfat_msg(sb, KERN_ERR, + "failed to %s (estr:ENAMETOOLONG) nls len : %d, unilen : %d > %d", + __func__, len, unilen, MAX_NAME_LENGTH); + return -ENAMETOOLONG; + } + + p_uniname->name_len = unilen & 0xFF; + + for (i = 0; i < unilen; i++) { + if (*uniname < 0x0020 || + exfat_wstrchr(bad_uni_chars, *uniname)) + lossy |= NLS_NAME_LOSSY; + + upname[i] = exfat_toupper(sb, *uniname); + uniname++; + } + + *uniname = '\0'; + p_uniname->name_len = unilen; + p_uniname->name_hash = exfat_calc_chksum_2byte(upname, unilen << 1, 0, + CS_DEFAULT); + + if (p_lossy) + *p_lossy = lossy; + return unilen; +} + +#define PLANE_SIZE 0x00010000 +#define SURROGATE_MASK 0xfffff800 +#define SURROGATE_PAIR 0x0000d800 +#define SURROGATE_LOW 0x00000400 +#define SURROGATE_BITS 0x000003ff + +unsigned short exfat_high_surrogate(unicode_t u) +{ + return ((u - PLANE_SIZE) >> 10) + SURROGATE_PAIR; +} + +unsigned short exfat_low_surrogate(unicode_t u) +{ + return ((u - PLANE_SIZE) & SURROGATE_BITS) | SURROGATE_PAIR | + SURROGATE_LOW; +} + +static int __exfat_utf16_to_nls(struct super_block *sb, + struct exfat_uni_name *p_uniname, unsigned char *p_cstring, + int buflen) +{ + int i, j, len, out_len = 0; + unsigned char buf[MAX_CHARSET_SIZE]; + const unsigned short *uniname = p_uniname->name; + struct nls_table *nls = EXFAT_SB(sb)->nls_io; + + i = 0; + while (i < MAX_NAME_LENGTH && out_len < (buflen - 1)) { + if (*uniname == '\0') + break; + if ((*uniname & SURROGATE_MASK) != SURROGATE_PAIR) { + len = exfat_convert_ucs2_to_char(nls, *uniname, buf, + NULL); + } else { + /* Process UTF-16 surrogate pair as one character */ + if (!(*uniname & SURROGATE_LOW) && + i+1 < MAX_NAME_LENGTH && + (*(uniname+1) & SURROGATE_MASK) == SURROGATE_PAIR && + (*(uniname+1) & SURROGATE_LOW)) { + uniname++; + i++; + } + + /* + * UTF-16 surrogate pair encodes code points above + * U+FFFF. Code points above U+FFFF are not supported + * by kernel NLS framework therefore use replacement + * character + */ + len = 1; + buf[0] = '_'; + } + + if (out_len + len >= buflen) + len = buflen - 1 - out_len; + out_len += len; + + if (len > 1) { + for (j = 0; j < len; j++) + *p_cstring++ = buf[j]; + } else { /* len == 1 */ + *p_cstring++ = *buf; + } + + uniname++; + i++; + } + + *p_cstring = '\0'; + return out_len; +} + +static int exfat_nls_to_ucs2(struct super_block *sb, + const unsigned char *p_cstring, const int len, + struct exfat_uni_name *p_uniname, int *p_lossy) +{ + int i = 0, unilen = 0, lossy = NLS_NAME_NO_LOSSY; + unsigned short upname[MAX_NAME_LENGTH + 1]; + unsigned short *uniname = p_uniname->name; + struct nls_table *nls = EXFAT_SB(sb)->nls_io; + + WARN_ON(!len); + + while (unilen < MAX_NAME_LENGTH && i < len) { + i += exfat_convert_char_to_ucs2(nls, p_cstring + i, len - i, + uniname, &lossy); + + if (*uniname < 0x0020 || + exfat_wstrchr(bad_uni_chars, *uniname)) + lossy |= NLS_NAME_LOSSY; + + upname[unilen] = exfat_toupper(sb, *uniname); + uniname++; + unilen++; + } + + if (p_cstring[i] != '\0') + lossy |= NLS_NAME_OVERLEN; + + *uniname = '\0'; + p_uniname->name_len = unilen; + p_uniname->name_hash = exfat_calc_chksum_2byte(upname, unilen << 1, 0, + CS_DEFAULT); + + if (p_lossy) + *p_lossy = lossy; + return unilen; +} + +int exfat_utf16_to_nls(struct super_block *sb, struct exfat_uni_name *uniname, + unsigned char *p_cstring, int buflen) +{ + if (EXFAT_SB(sb)->options.utf8) + return exfat_utf16_to_utf8(sb, uniname, p_cstring, + buflen); + return __exfat_utf16_to_nls(sb, uniname, p_cstring, buflen); +} + +int exfat_nls_to_utf16(struct super_block *sb, const unsigned char *p_cstring, + const int len, struct exfat_uni_name *uniname, int *p_lossy) +{ + if (EXFAT_SB(sb)->options.utf8) + return exfat_utf8_to_utf16(sb, p_cstring, len, + uniname, p_lossy); + return exfat_nls_to_ucs2(sb, p_cstring, len, uniname, p_lossy); +} + +static int exfat_load_upcase_table(struct super_block *sb, + sector_t sector, unsigned long long num_sectors, + unsigned int utbl_checksum) +{ + struct exfat_sb_info *sbi = EXFAT_SB(sb); + unsigned int sect_size = sb->s_blocksize; + unsigned int i, index = 0, checksum = 0; + int ret; + unsigned char skip = false; + unsigned short *upcase_table; + + upcase_table = kcalloc(UTBL_COUNT, sizeof(unsigned short), GFP_KERNEL); + if (!upcase_table) + return -ENOMEM; + + sbi->vol_utbl = upcase_table; + num_sectors += sector; + + while (sector < num_sectors) { + struct buffer_head *bh; + + bh = sb_bread(sb, sector); + if (!bh) { + exfat_msg(sb, KERN_ERR, + "failed to read sector(0x%llx)\n", + (unsigned long long)sector); + ret = -EIO; + goto free_table; + } + sector++; + for (i = 0; i < sect_size && index <= 0xFFFF; i += 2) { + unsigned short uni = get_unaligned_le16(bh->b_data + i); + + checksum = ((checksum & 1) ? 0x80000000 : 0) + + (checksum >> 1) + + *(((unsigned char *)bh->b_data) + i); + checksum = ((checksum & 1) ? 0x80000000 : 0) + + (checksum >> 1) + + *(((unsigned char *)bh->b_data) + (i + 1)); + + if (skip) { + index += uni; + skip = false; + } else if (uni == index) { + index++; + } else if (uni == 0xFFFF) { + skip = true; + } else { /* uni != index , uni != 0xFFFF */ + upcase_table[index] = uni; + index++; + } + } + brelse(bh); + } + + if (index >= 0xFFFF && utbl_checksum == checksum) + return 0; + + exfat_msg(sb, KERN_ERR, + "failed to load upcase table (idx : 0x%08x, chksum : 0x%08x, utbl_chksum : 0x%08x)\n", + index, checksum, utbl_checksum); + ret = -EINVAL; +free_table: + exfat_free_upcase_table(sbi); + return ret; +} + +static int exfat_load_default_upcase_table(struct super_block *sb) +{ + int i, ret = -EIO; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + unsigned char skip = false; + unsigned short uni = 0, *upcase_table; + unsigned int index = 0; + + upcase_table = kcalloc(UTBL_COUNT, sizeof(unsigned short), GFP_KERNEL); + if (!upcase_table) + return -ENOMEM; + + sbi->vol_utbl = upcase_table; + + for (i = 0; index <= 0xFFFF && i < EXFAT_NUM_UPCASE; i++) { + uni = uni_def_upcase[i]; + if (skip) { + index += uni; + skip = false; + } else if (uni == index) { + index++; + } else if (uni == 0xFFFF) { + skip = true; + } else { + upcase_table[index] = uni; + index++; + } + } + + if (index >= 0xFFFF) + return 0; + + /* FATAL error: default upcase table has error */ + exfat_free_upcase_table(sbi); + return ret; +} + +int exfat_create_upcase_table(struct super_block *sb) +{ + int i, ret; + unsigned int tbl_clu, type; + sector_t sector; + unsigned long long tbl_size, num_sectors; + unsigned char blksize_bits = sb->s_blocksize_bits; + struct exfat_chain clu; + struct exfat_dentry *ep; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + struct buffer_head *bh; + + clu.dir = sbi->root_dir; + clu.flags = ALLOC_FAT_CHAIN; + + while (clu.dir != EXFAT_EOF_CLUSTER) { + for (i = 0; i < sbi->dentries_per_clu; i++) { + ep = exfat_get_dentry(sb, &clu, i, &bh, NULL); + if (!ep) + return -EIO; + + type = exfat_get_entry_type(ep); + if (type == TYPE_UNUSED) { + brelse(bh); + break; + } + + if (type != TYPE_UPCASE) { + brelse(bh); + continue; + } + + tbl_clu = le32_to_cpu(ep->dentry.upcase.start_clu); + tbl_size = le64_to_cpu(ep->dentry.upcase.size); + + sector = exfat_cluster_to_sector(sbi, tbl_clu); + num_sectors = ((tbl_size - 1) >> blksize_bits) + 1; + ret = exfat_load_upcase_table(sb, sector, num_sectors, + le32_to_cpu(ep->dentry.upcase.checksum)); + + brelse(bh); + if (ret && ret != -EIO) + goto load_default; + + /* load successfully */ + return ret; + } + + if (exfat_get_next_cluster(sb, &(clu.dir))) + return -EIO; + } + +load_default: + /* load default upcase table */ + return exfat_load_default_upcase_table(sb); +} + +void exfat_free_upcase_table(struct exfat_sb_info *sbi) +{ + kfree(sbi->vol_utbl); +} diff --git a/fs/exfat/super.c b/fs/exfat/super.c new file mode 100644 index 000000000000..16ed202ef527 --- /dev/null +++ b/fs/exfat/super.c @@ -0,0 +1,722 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. + */ + +#include <linux/fs_context.h> +#include <linux/fs_parser.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/time.h> +#include <linux/mount.h> +#include <linux/cred.h> +#include <linux/statfs.h> +#include <linux/seq_file.h> +#include <linux/blkdev.h> +#include <linux/fs_struct.h> +#include <linux/iversion.h> +#include <linux/nls.h> +#include <linux/buffer_head.h> + +#include "exfat_raw.h" +#include "exfat_fs.h" + +static char exfat_default_iocharset[] = CONFIG_EXFAT_DEFAULT_IOCHARSET; +static struct kmem_cache *exfat_inode_cachep; + +static void exfat_free_iocharset(struct exfat_sb_info *sbi) +{ + if (sbi->options.iocharset != exfat_default_iocharset) + kfree(sbi->options.iocharset); +} + +static void exfat_delayed_free(struct rcu_head *p) +{ + struct exfat_sb_info *sbi = container_of(p, struct exfat_sb_info, rcu); + + unload_nls(sbi->nls_io); + exfat_free_iocharset(sbi); + exfat_free_upcase_table(sbi); + kfree(sbi); +} + +static void exfat_put_super(struct super_block *sb) +{ + struct exfat_sb_info *sbi = EXFAT_SB(sb); + + mutex_lock(&sbi->s_lock); + if (test_and_clear_bit(EXFAT_SB_DIRTY, &sbi->s_state)) + sync_blockdev(sb->s_bdev); + exfat_set_vol_flags(sb, VOL_CLEAN); + exfat_free_bitmap(sbi); + mutex_unlock(&sbi->s_lock); + + call_rcu(&sbi->rcu, exfat_delayed_free); +} + +static int exfat_sync_fs(struct super_block *sb, int wait) +{ + struct exfat_sb_info *sbi = EXFAT_SB(sb); + int err = 0; + + /* If there are some dirty buffers in the bdev inode */ + mutex_lock(&sbi->s_lock); + if (test_and_clear_bit(EXFAT_SB_DIRTY, &sbi->s_state)) { + sync_blockdev(sb->s_bdev); + if (exfat_set_vol_flags(sb, VOL_CLEAN)) + err = -EIO; + } + mutex_unlock(&sbi->s_lock); + return err; +} + +static int exfat_statfs(struct dentry *dentry, struct kstatfs *buf) +{ + struct super_block *sb = dentry->d_sb; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + unsigned long long id = huge_encode_dev(sb->s_bdev->bd_dev); + + if (sbi->used_clusters == EXFAT_CLUSTERS_UNTRACKED) { + mutex_lock(&sbi->s_lock); + if (exfat_count_used_clusters(sb, &sbi->used_clusters)) { + mutex_unlock(&sbi->s_lock); + return -EIO; + } + mutex_unlock(&sbi->s_lock); + } + + buf->f_type = sb->s_magic; + buf->f_bsize = sbi->cluster_size; + buf->f_blocks = sbi->num_clusters - 2; /* clu 0 & 1 */ + buf->f_bfree = buf->f_blocks - sbi->used_clusters; + buf->f_bavail = buf->f_bfree; + buf->f_fsid.val[0] = (unsigned int)id; + buf->f_fsid.val[1] = (unsigned int)(id >> 32); + /* Unicode utf16 255 characters */ + buf->f_namelen = EXFAT_MAX_FILE_LEN * NLS_MAX_CHARSET_SIZE; + return 0; +} + +int exfat_set_vol_flags(struct super_block *sb, unsigned short new_flag) +{ + struct exfat_sb_info *sbi = EXFAT_SB(sb); + struct pbr64 *bpb; + bool sync = 0; + + /* flags are not changed */ + if (sbi->vol_flag == new_flag) + return 0; + + sbi->vol_flag = new_flag; + + /* skip updating volume dirty flag, + * if this volume has been mounted with read-only + */ + if (sb_rdonly(sb)) + return 0; + + if (!sbi->pbr_bh) { + sbi->pbr_bh = sb_bread(sb, 0); + if (!sbi->pbr_bh) { + exfat_msg(sb, KERN_ERR, "failed to read boot sector"); + return -ENOMEM; + } + } + + bpb = (struct pbr64 *)sbi->pbr_bh->b_data; + bpb->bsx.vol_flags = cpu_to_le16(new_flag); + + if (new_flag == VOL_DIRTY && !buffer_dirty(sbi->pbr_bh)) + sync = true; + else + sync = false; + + set_buffer_uptodate(sbi->pbr_bh); + mark_buffer_dirty(sbi->pbr_bh); + + if (sync) + sync_dirty_buffer(sbi->pbr_bh); + return 0; +} + +static int exfat_show_options(struct seq_file *m, struct dentry *root) +{ + struct super_block *sb = root->d_sb; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + struct exfat_mount_options *opts = &sbi->options; + + /* Show partition info */ + if (!uid_eq(opts->fs_uid, GLOBAL_ROOT_UID)) + seq_printf(m, ",uid=%u", + from_kuid_munged(&init_user_ns, opts->fs_uid)); + if (!gid_eq(opts->fs_gid, GLOBAL_ROOT_GID)) + seq_printf(m, ",gid=%u", + from_kgid_munged(&init_user_ns, opts->fs_gid)); + seq_printf(m, ",fmask=%04o,dmask=%04o", opts->fs_fmask, opts->fs_dmask); + if (opts->allow_utime) + seq_printf(m, ",allow_utime=%04o", opts->allow_utime); + if (opts->utf8) + seq_puts(m, ",iocharset=utf8"); + else if (sbi->nls_io) + seq_printf(m, ",iocharset=%s", sbi->nls_io->charset); + seq_printf(m, ",bps=%ld", sb->s_blocksize); + if (opts->errors == EXFAT_ERRORS_CONT) + seq_puts(m, ",errors=continue"); + else if (opts->errors == EXFAT_ERRORS_PANIC) + seq_puts(m, ",errors=panic"); + else + seq_puts(m, ",errors=remount-ro"); + if (opts->discard) + seq_puts(m, ",discard"); + if (opts->time_offset) + seq_printf(m, ",time_offset=%d", opts->time_offset); + return 0; +} + +static struct inode *exfat_alloc_inode(struct super_block *sb) +{ + struct exfat_inode_info *ei; + + ei = kmem_cache_alloc(exfat_inode_cachep, GFP_NOFS); + if (!ei) + return NULL; + + init_rwsem(&ei->truncate_lock); + return &ei->vfs_inode; +} + +static void exfat_free_inode(struct inode *inode) +{ + kmem_cache_free(exfat_inode_cachep, EXFAT_I(inode)); +} + +static const struct super_operations exfat_sops = { + .alloc_inode = exfat_alloc_inode, + .free_inode = exfat_free_inode, + .write_inode = exfat_write_inode, + .evict_inode = exfat_evict_inode, + .put_super = exfat_put_super, + .sync_fs = exfat_sync_fs, + .statfs = exfat_statfs, + .show_options = exfat_show_options, +}; + +enum { + Opt_uid, + Opt_gid, + Opt_umask, + Opt_dmask, + Opt_fmask, + Opt_allow_utime, + Opt_charset, + Opt_errors, + Opt_discard, + Opt_time_offset, +}; + +static const struct constant_table exfat_param_enums[] = { + { "continue", EXFAT_ERRORS_CONT }, + { "panic", EXFAT_ERRORS_PANIC }, + { "remount-ro", EXFAT_ERRORS_RO }, + {} +}; + +static const struct fs_parameter_spec exfat_parameters[] = { + fsparam_u32("uid", Opt_uid), + fsparam_u32("gid", Opt_gid), + fsparam_u32oct("umask", Opt_umask), + fsparam_u32oct("dmask", Opt_dmask), + fsparam_u32oct("fmask", Opt_fmask), + fsparam_u32oct("allow_utime", Opt_allow_utime), + fsparam_string("iocharset", Opt_charset), + fsparam_enum("errors", Opt_errors, exfat_param_enums), + fsparam_flag("discard", Opt_discard), + fsparam_s32("time_offset", Opt_time_offset), + {} +}; + +static int exfat_parse_param(struct fs_context *fc, struct fs_parameter *param) +{ + struct exfat_sb_info *sbi = fc->s_fs_info; + struct exfat_mount_options *opts = &sbi->options; + struct fs_parse_result result; + int opt; + + opt = fs_parse(fc, exfat_parameters, param, &result); + if (opt < 0) + return opt; + + switch (opt) { + case Opt_uid: + opts->fs_uid = make_kuid(current_user_ns(), result.uint_32); + break; + case Opt_gid: + opts->fs_gid = make_kgid(current_user_ns(), result.uint_32); + break; + case Opt_umask: + opts->fs_fmask = result.uint_32; + opts->fs_dmask = result.uint_32; + break; + case Opt_dmask: + opts->fs_dmask = result.uint_32; + break; + case Opt_fmask: + opts->fs_fmask = result.uint_32; + break; + case Opt_allow_utime: + opts->allow_utime = result.uint_32 & 0022; + break; + case Opt_charset: + exfat_free_iocharset(sbi); + opts->iocharset = kstrdup(param->string, GFP_KERNEL); + if (!opts->iocharset) + return -ENOMEM; + break; + case Opt_errors: + opts->errors = result.uint_32; + break; + case Opt_discard: + opts->discard = 1; + break; + case Opt_time_offset: + /* + * Make the limit 24 just in case someone invents something + * unusual. + */ + if (result.int_32 < -24 * 60 || result.int_32 > 24 * 60) + return -EINVAL; + opts->time_offset = result.int_32; + break; + default: + return -EINVAL; + } + + return 0; +} + +static void exfat_hash_init(struct super_block *sb) +{ + struct exfat_sb_info *sbi = EXFAT_SB(sb); + int i; + + spin_lock_init(&sbi->inode_hash_lock); + for (i = 0; i < EXFAT_HASH_SIZE; i++) + INIT_HLIST_HEAD(&sbi->inode_hashtable[i]); +} + +static int exfat_read_root(struct inode *inode) +{ + struct super_block *sb = inode->i_sb; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + struct exfat_inode_info *ei = EXFAT_I(inode); + struct exfat_chain cdir; + int num_subdirs, num_clu = 0; + + exfat_chain_set(&ei->dir, sbi->root_dir, 0, ALLOC_FAT_CHAIN); + ei->entry = -1; + ei->start_clu = sbi->root_dir; + ei->flags = ALLOC_FAT_CHAIN; + ei->type = TYPE_DIR; + ei->version = 0; + ei->rwoffset = 0; + ei->hint_bmap.off = EXFAT_EOF_CLUSTER; + ei->hint_stat.eidx = 0; + ei->hint_stat.clu = sbi->root_dir; + ei->hint_femp.eidx = EXFAT_HINT_NONE; + + exfat_chain_set(&cdir, sbi->root_dir, 0, ALLOC_FAT_CHAIN); + if (exfat_count_num_clusters(sb, &cdir, &num_clu)) + return -EIO; + i_size_write(inode, num_clu << sbi->cluster_size_bits); + + num_subdirs = exfat_count_dir_entries(sb, &cdir); + if (num_subdirs < 0) + return -EIO; + set_nlink(inode, num_subdirs + EXFAT_MIN_SUBDIR); + + inode->i_uid = sbi->options.fs_uid; + inode->i_gid = sbi->options.fs_gid; + inode_inc_iversion(inode); + inode->i_generation = 0; + inode->i_mode = exfat_make_mode(sbi, ATTR_SUBDIR, 0777); + inode->i_op = &exfat_dir_inode_operations; + inode->i_fop = &exfat_dir_operations; + + inode->i_blocks = ((i_size_read(inode) + (sbi->cluster_size - 1)) + & ~(sbi->cluster_size - 1)) >> inode->i_blkbits; + EXFAT_I(inode)->i_pos = ((loff_t)sbi->root_dir << 32) | 0xffffffff; + EXFAT_I(inode)->i_size_aligned = i_size_read(inode); + EXFAT_I(inode)->i_size_ondisk = i_size_read(inode); + + exfat_save_attr(inode, ATTR_SUBDIR); + inode->i_mtime = inode->i_atime = inode->i_ctime = ei->i_crtime = + current_time(inode); + exfat_cache_init_inode(inode); + return 0; +} + +static struct pbr *exfat_read_pbr_with_logical_sector(struct super_block *sb, + struct buffer_head **prev_bh) +{ + struct pbr *p_pbr = (struct pbr *) (*prev_bh)->b_data; + unsigned short logical_sect = 0; + + logical_sect = 1 << p_pbr->bsx.f64.sect_size_bits; + + if (!is_power_of_2(logical_sect) || + logical_sect < 512 || logical_sect > 4096) { + exfat_msg(sb, KERN_ERR, "bogus logical sector size %u", + logical_sect); + return NULL; + } + + if (logical_sect < sb->s_blocksize) { + exfat_msg(sb, KERN_ERR, + "logical sector size too small for device (logical sector size = %u)", + logical_sect); + return NULL; + } + + if (logical_sect > sb->s_blocksize) { + struct buffer_head *bh = NULL; + + __brelse(*prev_bh); + *prev_bh = NULL; + + if (!sb_set_blocksize(sb, logical_sect)) { + exfat_msg(sb, KERN_ERR, + "unable to set blocksize %u", logical_sect); + return NULL; + } + bh = sb_bread(sb, 0); + if (!bh) { + exfat_msg(sb, KERN_ERR, + "unable to read boot sector (logical sector size = %lu)", + sb->s_blocksize); + return NULL; + } + + *prev_bh = bh; + p_pbr = (struct pbr *) bh->b_data; + } + return p_pbr; +} + +/* mount the file system volume */ +static int __exfat_fill_super(struct super_block *sb) +{ + int ret; + struct pbr *p_pbr; + struct pbr64 *p_bpb; + struct buffer_head *bh; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + + /* set block size to read super block */ + sb_min_blocksize(sb, 512); + + /* read boot sector */ + bh = sb_bread(sb, 0); + if (!bh) { + exfat_msg(sb, KERN_ERR, "unable to read boot sector"); + return -EIO; + } + + /* PRB is read */ + p_pbr = (struct pbr *)bh->b_data; + + /* check the validity of PBR */ + if (le16_to_cpu((p_pbr->signature)) != PBR_SIGNATURE) { + exfat_msg(sb, KERN_ERR, "invalid boot record signature"); + ret = -EINVAL; + goto free_bh; + } + + + /* check logical sector size */ + p_pbr = exfat_read_pbr_with_logical_sector(sb, &bh); + if (!p_pbr) { + ret = -EIO; + goto free_bh; + } + + /* + * res_zero field must be filled with zero to prevent mounting + * from FAT volume. + */ + if (memchr_inv(p_pbr->bpb.f64.res_zero, 0, + sizeof(p_pbr->bpb.f64.res_zero))) { + ret = -EINVAL; + goto free_bh; + } + + p_bpb = (struct pbr64 *)p_pbr; + if (!p_bpb->bsx.num_fats) { + exfat_msg(sb, KERN_ERR, "bogus number of FAT structure"); + ret = -EINVAL; + goto free_bh; + } + + sbi->sect_per_clus = 1 << p_bpb->bsx.sect_per_clus_bits; + sbi->sect_per_clus_bits = p_bpb->bsx.sect_per_clus_bits; + sbi->cluster_size_bits = sbi->sect_per_clus_bits + sb->s_blocksize_bits; + sbi->cluster_size = 1 << sbi->cluster_size_bits; + sbi->num_FAT_sectors = le32_to_cpu(p_bpb->bsx.fat_length); + sbi->FAT1_start_sector = le32_to_cpu(p_bpb->bsx.fat_offset); + sbi->FAT2_start_sector = p_bpb->bsx.num_fats == 1 ? + sbi->FAT1_start_sector : + sbi->FAT1_start_sector + sbi->num_FAT_sectors; + sbi->data_start_sector = le32_to_cpu(p_bpb->bsx.clu_offset); + sbi->num_sectors = le64_to_cpu(p_bpb->bsx.vol_length); + /* because the cluster index starts with 2 */ + sbi->num_clusters = le32_to_cpu(p_bpb->bsx.clu_count) + + EXFAT_RESERVED_CLUSTERS; + + sbi->root_dir = le32_to_cpu(p_bpb->bsx.root_cluster); + sbi->dentries_per_clu = 1 << + (sbi->cluster_size_bits - DENTRY_SIZE_BITS); + + sbi->vol_flag = le16_to_cpu(p_bpb->bsx.vol_flags); + sbi->clu_srch_ptr = EXFAT_FIRST_CLUSTER; + sbi->used_clusters = EXFAT_CLUSTERS_UNTRACKED; + + if (le16_to_cpu(p_bpb->bsx.vol_flags) & VOL_DIRTY) { + sbi->vol_flag |= VOL_DIRTY; + exfat_msg(sb, KERN_WARNING, + "Volume was not properly unmounted. Some data may be corrupt. Please run fsck."); + } + + /* exFAT file size is limited by a disk volume size */ + sb->s_maxbytes = (u64)(sbi->num_clusters - EXFAT_RESERVED_CLUSTERS) << + sbi->cluster_size_bits; + + ret = exfat_create_upcase_table(sb); + if (ret) { + exfat_msg(sb, KERN_ERR, "failed to load upcase table"); + goto free_bh; + } + + ret = exfat_load_bitmap(sb); + if (ret) { + exfat_msg(sb, KERN_ERR, "failed to load alloc-bitmap"); + goto free_upcase_table; + } + + ret = exfat_count_used_clusters(sb, &sbi->used_clusters); + if (ret) { + exfat_msg(sb, KERN_ERR, "failed to scan clusters"); + goto free_alloc_bitmap; + } + + return 0; + +free_alloc_bitmap: + exfat_free_bitmap(sbi); +free_upcase_table: + exfat_free_upcase_table(sbi); +free_bh: + brelse(bh); + return ret; +} + +static int exfat_fill_super(struct super_block *sb, struct fs_context *fc) +{ + struct exfat_sb_info *sbi = sb->s_fs_info; + struct exfat_mount_options *opts = &sbi->options; + struct inode *root_inode; + int err; + + if (opts->allow_utime == (unsigned short)-1) + opts->allow_utime = ~opts->fs_dmask & 0022; + + if (opts->discard) { + struct request_queue *q = bdev_get_queue(sb->s_bdev); + + if (!blk_queue_discard(q)) + exfat_msg(sb, KERN_WARNING, + "mounting with \"discard\" option, but the device does not support discard"); + opts->discard = 0; + } + + sb->s_flags |= SB_NODIRATIME; + sb->s_magic = EXFAT_SUPER_MAGIC; + sb->s_op = &exfat_sops; + + sb->s_time_gran = 1; + sb->s_time_min = EXFAT_MIN_TIMESTAMP_SECS; + sb->s_time_max = EXFAT_MAX_TIMESTAMP_SECS; + + err = __exfat_fill_super(sb); + if (err) { + exfat_msg(sb, KERN_ERR, "failed to recognize exfat type"); + goto check_nls_io; + } + + /* set up enough so that it can read an inode */ + exfat_hash_init(sb); + + if (!strcmp(sbi->options.iocharset, "utf8")) + opts->utf8 = 1; + else { + sbi->nls_io = load_nls(sbi->options.iocharset); + if (!sbi->nls_io) { + exfat_msg(sb, KERN_ERR, "IO charset %s not found", + sbi->options.iocharset); + err = -EINVAL; + goto free_table; + } + } + + if (sbi->options.utf8) + sb->s_d_op = &exfat_utf8_dentry_ops; + else + sb->s_d_op = &exfat_dentry_ops; + + root_inode = new_inode(sb); + if (!root_inode) { + exfat_msg(sb, KERN_ERR, "failed to allocate root inode."); + err = -ENOMEM; + goto free_table; + } + + root_inode->i_ino = EXFAT_ROOT_INO; + inode_set_iversion(root_inode, 1); + err = exfat_read_root(root_inode); + if (err) { + exfat_msg(sb, KERN_ERR, "failed to initialize root inode."); + goto put_inode; + } + + exfat_hash_inode(root_inode, EXFAT_I(root_inode)->i_pos); + insert_inode_hash(root_inode); + + sb->s_root = d_make_root(root_inode); + if (!sb->s_root) { + exfat_msg(sb, KERN_ERR, "failed to get the root dentry"); + err = -ENOMEM; + goto put_inode; + } + + return 0; + +put_inode: + iput(root_inode); + sb->s_root = NULL; + +free_table: + exfat_free_upcase_table(sbi); + exfat_free_bitmap(sbi); + +check_nls_io: + unload_nls(sbi->nls_io); + exfat_free_iocharset(sbi); + sb->s_fs_info = NULL; + kfree(sbi); + return err; +} + +static int exfat_get_tree(struct fs_context *fc) +{ + return get_tree_bdev(fc, exfat_fill_super); +} + +static void exfat_free(struct fs_context *fc) +{ + kfree(fc->s_fs_info); +} + +static const struct fs_context_operations exfat_context_ops = { + .parse_param = exfat_parse_param, + .get_tree = exfat_get_tree, + .free = exfat_free, +}; + +static int exfat_init_fs_context(struct fs_context *fc) +{ + struct exfat_sb_info *sbi; + + sbi = kzalloc(sizeof(struct exfat_sb_info), GFP_KERNEL); + if (!sbi) + return -ENOMEM; + + mutex_init(&sbi->s_lock); + ratelimit_state_init(&sbi->ratelimit, DEFAULT_RATELIMIT_INTERVAL, + DEFAULT_RATELIMIT_BURST); + + sbi->options.fs_uid = current_uid(); + sbi->options.fs_gid = current_gid(); + sbi->options.fs_fmask = current->fs->umask; + sbi->options.fs_dmask = current->fs->umask; + sbi->options.allow_utime = -1; + sbi->options.iocharset = exfat_default_iocharset; + sbi->options.errors = EXFAT_ERRORS_RO; + + fc->s_fs_info = sbi; + fc->ops = &exfat_context_ops; + return 0; +} + +static struct file_system_type exfat_fs_type = { + .owner = THIS_MODULE, + .name = "exfat", + .init_fs_context = exfat_init_fs_context, + .parameters = exfat_parameters, + .kill_sb = kill_block_super, + .fs_flags = FS_REQUIRES_DEV, +}; + +static void exfat_inode_init_once(void *foo) +{ + struct exfat_inode_info *ei = (struct exfat_inode_info *)foo; + + INIT_HLIST_NODE(&ei->i_hash_fat); + inode_init_once(&ei->vfs_inode); +} + +static int __init init_exfat_fs(void) +{ + int err; + + err = exfat_cache_init(); + if (err) + return err; + + exfat_inode_cachep = kmem_cache_create("exfat_inode_cache", + sizeof(struct exfat_inode_info), + 0, SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, + exfat_inode_init_once); + if (!exfat_inode_cachep) { + err = -ENOMEM; + goto shutdown_cache; + } + + err = register_filesystem(&exfat_fs_type); + if (err) + goto destroy_cache; + + return 0; + +destroy_cache: + kmem_cache_destroy(exfat_inode_cachep); +shutdown_cache: + exfat_cache_shutdown(); + return err; +} + +static void __exit exit_exfat_fs(void) +{ + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); + kmem_cache_destroy(exfat_inode_cachep); + unregister_filesystem(&exfat_fs_type); + exfat_cache_shutdown(); +} + +module_init(init_exfat_fs); +module_exit(exit_exfat_fs); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("exFAT filesystem support"); +MODULE_AUTHOR("Samsung Electronics Co., Ltd."); diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 8fd0b3cdab4c..0e0a4d6209c7 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -516,10 +516,9 @@ int ext4_wait_block_bitmap(struct super_block *sb, ext4_group_t block_group, wait_on_buffer(bh); ext4_simulate_fail_bh(sb, bh, EXT4_SIM_BBITMAP_EIO); if (!buffer_uptodate(bh)) { - ext4_set_errno(sb, EIO); - ext4_error(sb, "Cannot read block bitmap - " - "block_group = %u, block_bitmap = %llu", - block_group, (unsigned long long) bh->b_blocknr); + ext4_error_err(sb, EIO, "Cannot read block bitmap - " + "block_group = %u, block_bitmap = %llu", + block_group, (unsigned long long) bh->b_blocknr); ext4_mark_group_bitmap_corrupted(sb, block_group, EXT4_GROUP_INFO_BBITMAP_CORRUPT); return -EIO; diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index 0a734ffb4310..16e9b2fda03a 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c @@ -166,10 +166,8 @@ static int ext4_data_block_valid_rcu(struct ext4_sb_info *sbi, if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) || (start_blk + count < start_blk) || - (start_blk + count > ext4_blocks_count(sbi->s_es))) { - sbi->s_es->s_last_error_block = cpu_to_le64(start_blk); + (start_blk + count > ext4_blocks_count(sbi->s_es))) return 0; - } if (system_blks == NULL) return 1; @@ -181,10 +179,8 @@ static int ext4_data_block_valid_rcu(struct ext4_sb_info *sbi, n = n->rb_left; else if (start_blk >= (entry->start_blk + entry->count)) n = n->rb_right; - else { - sbi->s_es->s_last_error_block = cpu_to_le64(start_blk); + else return 0; - } } return 1; } @@ -220,10 +216,12 @@ static int ext4_protect_reserved_inode(struct super_block *sb, } else { if (!ext4_data_block_valid_rcu(sbi, system_blks, map.m_pblk, n)) { - ext4_error(sb, "blocks %llu-%llu from inode %u " - "overlap system zone", map.m_pblk, - map.m_pblk + map.m_len - 1, ino); err = -EFSCORRUPTED; + __ext4_error(sb, __func__, __LINE__, -err, + map.m_pblk, "blocks %llu-%llu " + "from inode %u overlap system zone", + map.m_pblk, + map.m_pblk + map.m_len - 1, ino); break; } err = add_system_zone(system_blks, map.m_pblk, n); @@ -365,7 +363,6 @@ int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk, int ext4_check_blockref(const char *function, unsigned int line, struct inode *inode, __le32 *p, unsigned int max) { - struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; __le32 *bref = p; unsigned int blk; @@ -379,7 +376,6 @@ int ext4_check_blockref(const char *function, unsigned int line, if (blk && unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), blk, 1))) { - es->s_last_error_block = cpu_to_le64(blk); ext4_error_inode(inode, function, line, blk, "invalid block"); return -EFSCORRUPTED; diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 9aa1f75409b0..c654205f648d 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -392,7 +392,7 @@ struct fname { __u32 inode; __u8 name_len; __u8 file_type; - char name[0]; + char name[]; }; /* diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 61b37a052052..91eb4381cae5 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -414,7 +414,7 @@ struct flex_groups { #define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */ #define EXT4_VERITY_FL 0x00100000 /* Verity protected inode */ #define EXT4_EA_INODE_FL 0x00200000 /* Inode used for large EA */ -#define EXT4_EOFBLOCKS_FL 0x00400000 /* Blocks allocated beyond EOF */ +/* 0x00400000 was formerly EXT4_EOFBLOCKS_FL */ #define EXT4_INLINE_DATA_FL 0x10000000 /* Inode has inline data. */ #define EXT4_PROJINHERIT_FL 0x20000000 /* Create with parents projid */ #define EXT4_CASEFOLD_FL 0x40000000 /* Casefolded file */ @@ -487,7 +487,7 @@ enum { EXT4_INODE_EXTENTS = 19, /* Inode uses extents */ EXT4_INODE_VERITY = 20, /* Verity protected inode */ EXT4_INODE_EA_INODE = 21, /* Inode used for large EA */ - EXT4_INODE_EOFBLOCKS = 22, /* Blocks allocated beyond EOF */ +/* 22 was formerly EXT4_INODE_EOFBLOCKS */ EXT4_INODE_INLINE_DATA = 28, /* Data in inode. */ EXT4_INODE_PROJINHERIT = 29, /* Create with parents projid */ EXT4_INODE_RESERVED = 31, /* reserved for ext4 lib */ @@ -533,7 +533,6 @@ static inline void ext4_check_flag_values(void) CHECK_FLAG_VALUE(EXTENTS); CHECK_FLAG_VALUE(VERITY); CHECK_FLAG_VALUE(EA_INODE); - CHECK_FLAG_VALUE(EOFBLOCKS); CHECK_FLAG_VALUE(INLINE_DATA); CHECK_FLAG_VALUE(PROJINHERIT); CHECK_FLAG_VALUE(RESERVED); @@ -2771,21 +2770,20 @@ extern const char *ext4_decode_error(struct super_block *sb, int errno, extern void ext4_mark_group_bitmap_corrupted(struct super_block *sb, ext4_group_t block_group, unsigned int flags); -extern void ext4_set_errno(struct super_block *sb, int err); -extern __printf(4, 5) -void __ext4_error(struct super_block *, const char *, unsigned int, +extern __printf(6, 7) +void __ext4_error(struct super_block *, const char *, unsigned int, int, __u64, const char *, ...); -extern __printf(5, 6) -void __ext4_error_inode(struct inode *, const char *, unsigned int, ext4_fsblk_t, - const char *, ...); +extern __printf(6, 7) +void __ext4_error_inode(struct inode *, const char *, unsigned int, + ext4_fsblk_t, int, const char *, ...); extern __printf(5, 6) void __ext4_error_file(struct file *, const char *, unsigned int, ext4_fsblk_t, const char *, ...); extern void __ext4_std_error(struct super_block *, const char *, unsigned int, int); -extern __printf(4, 5) -void __ext4_abort(struct super_block *, const char *, unsigned int, +extern __printf(5, 6) +void __ext4_abort(struct super_block *, const char *, unsigned int, int, const char *, ...); extern __printf(4, 5) void __ext4_warning(struct super_block *, const char *, unsigned int, @@ -2806,8 +2804,12 @@ void __ext4_grp_locked_error(const char *, unsigned int, #define EXT4_ERROR_INODE(inode, fmt, a...) \ ext4_error_inode((inode), __func__, __LINE__, 0, (fmt), ## a) -#define EXT4_ERROR_INODE_BLOCK(inode, block, fmt, a...) \ - ext4_error_inode((inode), __func__, __LINE__, (block), (fmt), ## a) +#define EXT4_ERROR_INODE_ERR(inode, err, fmt, a...) \ + __ext4_error_inode((inode), __func__, __LINE__, 0, (err), (fmt), ## a) + +#define ext4_error_inode_block(inode, block, err, fmt, a...) \ + __ext4_error_inode((inode), __func__, __LINE__, (block), (err), \ + (fmt), ## a) #define EXT4_ERROR_FILE(file, block, fmt, a...) \ ext4_error_file((file), __func__, __LINE__, (block), (fmt), ## a) @@ -2815,13 +2817,18 @@ void __ext4_grp_locked_error(const char *, unsigned int, #ifdef CONFIG_PRINTK #define ext4_error_inode(inode, func, line, block, fmt, ...) \ - __ext4_error_inode(inode, func, line, block, fmt, ##__VA_ARGS__) + __ext4_error_inode(inode, func, line, block, 0, fmt, ##__VA_ARGS__) +#define ext4_error_inode_err(inode, func, line, block, err, fmt, ...) \ + __ext4_error_inode((inode), (func), (line), (block), \ + (err), (fmt), ##__VA_ARGS__) #define ext4_error_file(file, func, line, block, fmt, ...) \ __ext4_error_file(file, func, line, block, fmt, ##__VA_ARGS__) #define ext4_error(sb, fmt, ...) \ - __ext4_error(sb, __func__, __LINE__, fmt, ##__VA_ARGS__) -#define ext4_abort(sb, fmt, ...) \ - __ext4_abort(sb, __func__, __LINE__, fmt, ##__VA_ARGS__) + __ext4_error((sb), __func__, __LINE__, 0, 0, (fmt), ##__VA_ARGS__) +#define ext4_error_err(sb, err, fmt, ...) \ + __ext4_error((sb), __func__, __LINE__, (err), 0, (fmt), ##__VA_ARGS__) +#define ext4_abort(sb, err, fmt, ...) \ + __ext4_abort((sb), __func__, __LINE__, (err), (fmt), ##__VA_ARGS__) #define ext4_warning(sb, fmt, ...) \ __ext4_warning(sb, __func__, __LINE__, fmt, ##__VA_ARGS__) #define ext4_warning_inode(inode, fmt, ...) \ @@ -2839,7 +2846,12 @@ void __ext4_grp_locked_error(const char *, unsigned int, #define ext4_error_inode(inode, func, line, block, fmt, ...) \ do { \ no_printk(fmt, ##__VA_ARGS__); \ - __ext4_error_inode(inode, "", 0, block, " "); \ + __ext4_error_inode(inode, "", 0, block, 0, " "); \ +} while (0) +#define ext4_error_inode_err(inode, func, line, block, err, fmt, ...) \ +do { \ + no_printk(fmt, ##__VA_ARGS__); \ + __ext4_error_inode(inode, "", 0, block, err, " "); \ } while (0) #define ext4_error_file(file, func, line, block, fmt, ...) \ do { \ @@ -2849,12 +2861,17 @@ do { \ #define ext4_error(sb, fmt, ...) \ do { \ no_printk(fmt, ##__VA_ARGS__); \ - __ext4_error(sb, "", 0, " "); \ + __ext4_error(sb, "", 0, 0, 0, " "); \ +} while (0) +#define ext4_error_err(sb, err, fmt, ...) \ +do { \ + no_printk(fmt, ##__VA_ARGS__); \ + __ext4_error(sb, "", 0, err, 0, " "); \ } while (0) -#define ext4_abort(sb, fmt, ...) \ +#define ext4_abort(sb, err, fmt, ...) \ do { \ no_printk(fmt, ##__VA_ARGS__); \ - __ext4_abort(sb, "", 0, " "); \ + __ext4_abort(sb, "", 0, err, " "); \ } while (0) #define ext4_warning(sb, fmt, ...) \ do { \ diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index 1f53d64e42a5..7f16e1af8d5c 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -80,8 +80,7 @@ static int ext4_journal_check_start(struct super_block *sb) * take the FS itself readonly cleanly. */ if (journal && is_journal_aborted(journal)) { - ext4_set_errno(sb, -journal->j_errno); - ext4_abort(sb, "Detected aborted journal"); + ext4_abort(sb, -journal->j_errno, "Detected aborted journal"); return -EROFS; } return 0; @@ -272,8 +271,7 @@ int __ext4_forget(const char *where, unsigned int line, handle_t *handle, if (err) { ext4_journal_abort_handle(where, line, __func__, bh, handle, err); - ext4_set_errno(inode->i_sb, -err); - __ext4_abort(inode->i_sb, where, line, + __ext4_abort(inode->i_sb, where, line, -err, "error %d when attempting revoke", err); } BUFFER_TRACE(bh, "exit"); @@ -332,6 +330,7 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line, err); } } else { + set_buffer_uptodate(bh); if (inode) mark_buffer_dirty_inode(bh, inode); else @@ -342,11 +341,8 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line, struct ext4_super_block *es; es = EXT4_SB(inode->i_sb)->s_es; - es->s_last_error_block = - cpu_to_le64(bh->b_blocknr); - ext4_set_errno(inode->i_sb, EIO); - ext4_error_inode(inode, where, line, - bh->b_blocknr, + ext4_error_inode_err(inode, where, line, + bh->b_blocknr, EIO, "IO error syncing itable block"); err = -EIO; } diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index 7ea4f6fa173b..4b9002f0e84c 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -512,6 +512,9 @@ static inline int ext4_should_dioread_nolock(struct inode *inode) return 0; if (ext4_should_journal_data(inode)) return 0; + /* temporary fix to prevent generic/422 test failures */ + if (!test_opt(inode->i_sb, DELALLOC)) + return 0; return 1; } diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 954013d6076b..031752cfb6f7 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -28,6 +28,7 @@ #include <linux/uaccess.h> #include <linux/fiemap.h> #include <linux/backing-dev.h> +#include <linux/iomap.h> #include "ext4_jbd2.h" #include "ext4_extents.h" #include "xattr.h" @@ -83,13 +84,6 @@ static void ext4_extent_block_csum_set(struct inode *inode, et->et_checksum = ext4_extent_block_csum(inode, eh); } -static int ext4_split_extent(handle_t *handle, - struct inode *inode, - struct ext4_ext_path **ppath, - struct ext4_map_blocks *map, - int split_flag, - int flags); - static int ext4_split_extent_at(handle_t *handle, struct inode *inode, struct ext4_ext_path **ppath, @@ -97,9 +91,6 @@ static int ext4_split_extent_at(handle_t *handle, int split_flag, int flags); -static int ext4_find_delayed_extent(struct inode *inode, - struct extent_status *newes); - static int ext4_ext_trunc_restart_fn(struct inode *inode, int *dropped) { /* @@ -358,8 +349,8 @@ static int ext4_valid_extent_idx(struct inode *inode, } static int ext4_valid_extent_entries(struct inode *inode, - struct ext4_extent_header *eh, - int depth) + struct ext4_extent_header *eh, + ext4_fsblk_t *pblk, int depth) { unsigned short entries; if (eh->eh_entries == 0) @@ -370,8 +361,6 @@ static int ext4_valid_extent_entries(struct inode *inode, if (depth == 0) { /* leaf entries */ struct ext4_extent *ext = EXT_FIRST_EXTENT(eh); - struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; - ext4_fsblk_t pblock = 0; ext4_lblk_t lblock = 0; ext4_lblk_t prev = 0; int len = 0; @@ -383,8 +372,7 @@ static int ext4_valid_extent_entries(struct inode *inode, lblock = le32_to_cpu(ext->ee_block); len = ext4_ext_get_actual_len(ext); if ((lblock <= prev) && prev) { - pblock = ext4_ext_pblock(ext); - es->s_last_error_block = cpu_to_le64(pblock); + *pblk = ext4_ext_pblock(ext); return 0; } ext++; @@ -431,7 +419,7 @@ static int __ext4_ext_check(const char *function, unsigned int line, error_msg = "invalid eh_entries"; goto corrupted; } - if (!ext4_valid_extent_entries(inode, eh, depth)) { + if (!ext4_valid_extent_entries(inode, eh, &pblk, depth)) { error_msg = "invalid extent entries"; goto corrupted; } @@ -449,14 +437,14 @@ static int __ext4_ext_check(const char *function, unsigned int line, return 0; corrupted: - ext4_set_errno(inode->i_sb, -err); - ext4_error_inode(inode, function, line, 0, - "pblk %llu bad header/extent: %s - magic %x, " - "entries %u, max %u(%u), depth %u(%u)", - (unsigned long long) pblk, error_msg, - le16_to_cpu(eh->eh_magic), - le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max), - max, le16_to_cpu(eh->eh_depth), depth); + ext4_error_inode_err(inode, function, line, 0, -err, + "pblk %llu bad header/extent: %s - magic %x, " + "entries %u, max %u(%u), depth %u(%u)", + (unsigned long long) pblk, error_msg, + le16_to_cpu(eh->eh_magic), + le16_to_cpu(eh->eh_entries), + le16_to_cpu(eh->eh_max), + max, le16_to_cpu(eh->eh_depth), depth); return err; } @@ -556,6 +544,12 @@ int ext4_ext_precache(struct inode *inode) down_read(&ei->i_data_sem); depth = ext_depth(inode); + /* Don't cache anything if there are no external extent blocks */ + if (!depth) { + up_read(&ei->i_data_sem); + return ret; + } + path = kcalloc(depth + 1, sizeof(struct ext4_ext_path), GFP_NOFS); if (path == NULL) { @@ -563,9 +557,6 @@ int ext4_ext_precache(struct inode *inode) return -ENOMEM; } - /* Don't cache anything if there are no external extent blocks */ - if (depth == 0) - goto out; path[0].p_hdr = ext_inode_hdr(inode); ret = ext4_ext_check(inode, path[0].p_hdr, depth, 0); if (ret) @@ -2134,155 +2125,6 @@ cleanup: return err; } -static int ext4_fill_fiemap_extents(struct inode *inode, - ext4_lblk_t block, ext4_lblk_t num, - struct fiemap_extent_info *fieinfo) -{ - struct ext4_ext_path *path = NULL; - struct ext4_extent *ex; - struct extent_status es; - ext4_lblk_t next, next_del, start = 0, end = 0; - ext4_lblk_t last = block + num; - int exists, depth = 0, err = 0; - unsigned int flags = 0; - unsigned char blksize_bits = inode->i_sb->s_blocksize_bits; - - while (block < last && block != EXT_MAX_BLOCKS) { - num = last - block; - /* find extent for this block */ - down_read(&EXT4_I(inode)->i_data_sem); - - path = ext4_find_extent(inode, block, &path, 0); - if (IS_ERR(path)) { - up_read(&EXT4_I(inode)->i_data_sem); - err = PTR_ERR(path); - path = NULL; - break; - } - - depth = ext_depth(inode); - if (unlikely(path[depth].p_hdr == NULL)) { - up_read(&EXT4_I(inode)->i_data_sem); - EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth); - err = -EFSCORRUPTED; - break; - } - ex = path[depth].p_ext; - next = ext4_ext_next_allocated_block(path); - - flags = 0; - exists = 0; - if (!ex) { - /* there is no extent yet, so try to allocate - * all requested space */ - start = block; - end = block + num; - } else if (le32_to_cpu(ex->ee_block) > block) { - /* need to allocate space before found extent */ - start = block; - end = le32_to_cpu(ex->ee_block); - if (block + num < end) - end = block + num; - } else if (block >= le32_to_cpu(ex->ee_block) - + ext4_ext_get_actual_len(ex)) { - /* need to allocate space after found extent */ - start = block; - end = block + num; - if (end >= next) - end = next; - } else if (block >= le32_to_cpu(ex->ee_block)) { - /* - * some part of requested space is covered - * by found extent - */ - start = block; - end = le32_to_cpu(ex->ee_block) - + ext4_ext_get_actual_len(ex); - if (block + num < end) - end = block + num; - exists = 1; - } else { - BUG(); - } - BUG_ON(end <= start); - - if (!exists) { - es.es_lblk = start; - es.es_len = end - start; - es.es_pblk = 0; - } else { - es.es_lblk = le32_to_cpu(ex->ee_block); - es.es_len = ext4_ext_get_actual_len(ex); - es.es_pblk = ext4_ext_pblock(ex); - if (ext4_ext_is_unwritten(ex)) - flags |= FIEMAP_EXTENT_UNWRITTEN; - } - - /* - * Find delayed extent and update es accordingly. We call - * it even in !exists case to find out whether es is the - * last existing extent or not. - */ - next_del = ext4_find_delayed_extent(inode, &es); - if (!exists && next_del) { - exists = 1; - flags |= (FIEMAP_EXTENT_DELALLOC | - FIEMAP_EXTENT_UNKNOWN); - } - up_read(&EXT4_I(inode)->i_data_sem); - - if (unlikely(es.es_len == 0)) { - EXT4_ERROR_INODE(inode, "es.es_len == 0"); - err = -EFSCORRUPTED; - break; - } - - /* - * This is possible iff next == next_del == EXT_MAX_BLOCKS. - * we need to check next == EXT_MAX_BLOCKS because it is - * possible that an extent is with unwritten and delayed - * status due to when an extent is delayed allocated and - * is allocated by fallocate status tree will track both of - * them in a extent. - * - * So we could return a unwritten and delayed extent, and - * its block is equal to 'next'. - */ - if (next == next_del && next == EXT_MAX_BLOCKS) { - flags |= FIEMAP_EXTENT_LAST; - if (unlikely(next_del != EXT_MAX_BLOCKS || - next != EXT_MAX_BLOCKS)) { - EXT4_ERROR_INODE(inode, - "next extent == %u, next " - "delalloc extent = %u", - next, next_del); - err = -EFSCORRUPTED; - break; - } - } - - if (exists) { - err = fiemap_fill_next_extent(fieinfo, - (__u64)es.es_lblk << blksize_bits, - (__u64)es.es_pblk << blksize_bits, - (__u64)es.es_len << blksize_bits, - flags); - if (err < 0) - break; - if (err == 1) { - err = 0; - break; - } - } - - block = es.es_lblk + es.es_len; - } - - ext4_ext_drop_refs(path); - kfree(path); - return err; -} - static int ext4_fill_es_cache_info(struct inode *inode, ext4_lblk_t block, ext4_lblk_t num, struct fiemap_extent_info *fieinfo) @@ -3874,64 +3716,11 @@ out: return err; } -/* - * Handle EOFBLOCKS_FL flag, clearing it if necessary - */ -static int check_eofblocks_fl(handle_t *handle, struct inode *inode, - ext4_lblk_t lblk, - struct ext4_ext_path *path, - unsigned int len) -{ - int i, depth; - struct ext4_extent_header *eh; - struct ext4_extent *last_ex; - - if (!ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS)) - return 0; - - depth = ext_depth(inode); - eh = path[depth].p_hdr; - - /* - * We're going to remove EOFBLOCKS_FL entirely in future so we - * do not care for this case anymore. Simply remove the flag - * if there are no extents. - */ - if (unlikely(!eh->eh_entries)) - goto out; - last_ex = EXT_LAST_EXTENT(eh); - /* - * We should clear the EOFBLOCKS_FL flag if we are writing the - * last block in the last extent in the file. We test this by - * first checking to see if the caller to - * ext4_ext_get_blocks() was interested in the last block (or - * a block beyond the last block) in the current extent. If - * this turns out to be false, we can bail out from this - * function immediately. - */ - if (lblk + len < le32_to_cpu(last_ex->ee_block) + - ext4_ext_get_actual_len(last_ex)) - return 0; - /* - * If the caller does appear to be planning to write at or - * beyond the end of the current extent, we then test to see - * if the current extent is the last extent in the file, by - * checking to make sure it was reached via the rightmost node - * at each level of the tree. - */ - for (i = depth-1; i >= 0; i--) - if (path[i].p_idx != EXT_LAST_INDEX(path[i].p_hdr)) - return 0; -out: - ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS); - return ext4_mark_inode_dirty(handle, inode); -} - static int convert_initialized_extent(handle_t *handle, struct inode *inode, struct ext4_map_blocks *map, struct ext4_ext_path **ppath, - unsigned int allocated) + unsigned int *allocated) { struct ext4_ext_path *path = *ppath; struct ext4_extent *ex; @@ -3991,14 +3780,12 @@ convert_initialized_extent(handle_t *handle, struct inode *inode, ext4_ext_show_leaf(inode, path); ext4_update_inode_fsync_trans(handle, inode, 1); - err = check_eofblocks_fl(handle, inode, map->m_lblk, path, map->m_len); - if (err) - return err; + map->m_flags |= EXT4_MAP_UNWRITTEN; - if (allocated > map->m_len) - allocated = map->m_len; - map->m_len = allocated; - return allocated; + if (*allocated > map->m_len) + *allocated = map->m_len; + map->m_len = *allocated; + return 0; } static int @@ -4007,7 +3794,9 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode, struct ext4_ext_path **ppath, int flags, unsigned int allocated, ext4_fsblk_t newblock) { +#ifdef EXT_DEBUG struct ext4_ext_path *path = *ppath; +#endif int ret = 0; int err = 0; @@ -4047,11 +3836,9 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode, } ret = ext4_convert_unwritten_extents_endio(handle, inode, map, ppath); - if (ret >= 0) { + if (ret >= 0) ext4_update_inode_fsync_trans(handle, inode, 1); - err = check_eofblocks_fl(handle, inode, map->m_lblk, - path, map->m_len); - } else + else err = ret; map->m_flags |= EXT4_MAP_MAPPED; map->m_pblk = newblock; @@ -4100,12 +3887,6 @@ out: map_out: map->m_flags |= EXT4_MAP_MAPPED; - if ((flags & EXT4_GET_BLOCKS_KEEP_SIZE) == 0) { - err = check_eofblocks_fl(handle, inode, map->m_lblk, path, - map->m_len); - if (err < 0) - goto out2; - } out1: if (allocated > map->m_len) allocated = map->m_len; @@ -4244,12 +4025,11 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, struct ext4_extent newex, *ex, *ex2; struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); ext4_fsblk_t newblock = 0; - int free_on_err = 0, err = 0, depth, ret; + int err = 0, depth, ret; unsigned int allocated = 0, offset = 0; unsigned int allocated_clusters = 0; struct ext4_allocation_request ar; ext4_lblk_t cluster_offset; - bool map_from_cluster = false; ext_debug("blocks %u/%u requested for inode %lu\n", map->m_lblk, map->m_len, inode->i_ino); @@ -4308,12 +4088,12 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, */ if ((!ext4_ext_is_unwritten(ex)) && (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) { - allocated = convert_initialized_extent( - handle, inode, map, &path, - allocated); + err = convert_initialized_extent(handle, + inode, map, &path, &allocated); goto out2; - } else if (!ext4_ext_is_unwritten(ex)) + } else if (!ext4_ext_is_unwritten(ex)) { goto out; + } ret = ext4_ext_handle_unwritten_extents( handle, inode, map, &path, flags, @@ -4364,7 +4144,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, get_implied_cluster_alloc(inode->i_sb, map, ex, path)) { ar.len = allocated = map->m_len; newblock = map->m_pblk; - map_from_cluster = true; goto got_allocated_blocks; } @@ -4385,7 +4164,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, get_implied_cluster_alloc(inode->i_sb, map, ex2, path)) { ar.len = allocated = map->m_len; newblock = map->m_pblk; - map_from_cluster = true; goto got_allocated_blocks; } @@ -4442,7 +4220,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, goto out2; ext_debug("allocate new block: goal %llu, found %llu/%u\n", ar.goal, newblock, allocated); - free_on_err = 1; allocated_clusters = ar.len; ar.len = EXT4_C2B(sbi, ar.len) - offset; if (ar.len > allocated) @@ -4453,28 +4230,28 @@ got_allocated_blocks: ext4_ext_store_pblock(&newex, newblock + offset); newex.ee_len = cpu_to_le16(ar.len); /* Mark unwritten */ - if (flags & EXT4_GET_BLOCKS_UNWRIT_EXT){ + if (flags & EXT4_GET_BLOCKS_UNWRIT_EXT) { ext4_ext_mark_unwritten(&newex); map->m_flags |= EXT4_MAP_UNWRITTEN; } - err = 0; - if ((flags & EXT4_GET_BLOCKS_KEEP_SIZE) == 0) - err = check_eofblocks_fl(handle, inode, map->m_lblk, - path, ar.len); - if (!err) - err = ext4_ext_insert_extent(handle, inode, &path, - &newex, flags); - - if (err && free_on_err) { - int fb_flags = flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ? - EXT4_FREE_BLOCKS_NO_QUOT_UPDATE : 0; - /* free data blocks we just allocated */ - /* not a good idea to call discard here directly, - * but otherwise we'd need to call it every free() */ - ext4_discard_preallocations(inode); - ext4_free_blocks(handle, inode, NULL, newblock, - EXT4_C2B(sbi, allocated_clusters), fb_flags); + err = ext4_ext_insert_extent(handle, inode, &path, &newex, flags); + if (err) { + if (allocated_clusters) { + int fb_flags = 0; + + /* + * free data blocks we just allocated. + * not a good idea to call discard here directly, + * but otherwise we'd need to call it every free(). + */ + ext4_discard_preallocations(inode); + if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) + fb_flags = EXT4_FREE_BLOCKS_NO_QUOT_UPDATE; + ext4_free_blocks(handle, inode, NULL, newblock, + EXT4_C2B(sbi, allocated_clusters), + fb_flags); + } goto out2; } @@ -4491,7 +4268,7 @@ got_allocated_blocks: * clusters discovered to be delayed allocated. Once allocated, a * cluster is not included in the reserved count. */ - if (test_opt(inode->i_sb, DELALLOC) && !map_from_cluster) { + if (test_opt(inode->i_sb, DELALLOC) && allocated_clusters) { if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) { /* * When allocating delayed allocated clusters, simply @@ -4645,10 +4422,6 @@ retry: epos = new_size; if (ext4_update_inode_size(inode, epos) & 0x1) inode->i_mtime = inode->i_ctime; - } else { - if (epos > inode->i_size) - ext4_set_inode_flag(inode, - EXT4_INODE_EOFBLOCKS); } ext4_mark_inode_dirty(handle, inode); ext4_update_inode_fsync_trans(handle, inode, 1); @@ -4802,16 +4575,8 @@ static long ext4_zero_range(struct file *file, loff_t offset, } inode->i_mtime = inode->i_ctime = current_time(inode); - if (new_size) { + if (new_size) ext4_update_inode_size(inode, new_size); - } else { - /* - * Mark that we allocate beyond EOF so the subsequent truncate - * can proceed even if the new size is the same as i_size. - */ - if (offset + len > inode->i_size) - ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS); - } ext4_mark_inode_dirty(handle, inode); /* Zero out partial block at the edges of the range */ @@ -5009,64 +4774,13 @@ int ext4_convert_unwritten_io_end_vec(handle_t *handle, ext4_io_end_t *io_end) return ret < 0 ? ret : err; } -/* - * If newes is not existing extent (newes->ec_pblk equals zero) find - * delayed extent at start of newes and update newes accordingly and - * return start of the next delayed extent. - * - * If newes is existing extent (newes->ec_pblk is not equal zero) - * return start of next delayed extent or EXT_MAX_BLOCKS if no delayed - * extent found. Leave newes unmodified. - */ -static int ext4_find_delayed_extent(struct inode *inode, - struct extent_status *newes) -{ - struct extent_status es; - ext4_lblk_t block, next_del; - - if (newes->es_pblk == 0) { - ext4_es_find_extent_range(inode, &ext4_es_is_delayed, - newes->es_lblk, - newes->es_lblk + newes->es_len - 1, - &es); - - /* - * No extent in extent-tree contains block @newes->es_pblk, - * then the block may stay in 1)a hole or 2)delayed-extent. - */ - if (es.es_len == 0) - /* A hole found. */ - return 0; - - if (es.es_lblk > newes->es_lblk) { - /* A hole found. */ - newes->es_len = min(es.es_lblk - newes->es_lblk, - newes->es_len); - return 0; - } - - newes->es_len = es.es_lblk + es.es_len - newes->es_lblk; - } - - block = newes->es_lblk + newes->es_len; - ext4_es_find_extent_range(inode, &ext4_es_is_delayed, block, - EXT_MAX_BLOCKS, &es); - if (es.es_len == 0) - next_del = EXT_MAX_BLOCKS; - else - next_del = es.es_lblk; - - return next_del; -} - -static int ext4_xattr_fiemap(struct inode *inode, - struct fiemap_extent_info *fieinfo) +static int ext4_iomap_xattr_fiemap(struct inode *inode, struct iomap *iomap) { __u64 physical = 0; - __u64 length; - __u32 flags = FIEMAP_EXTENT_LAST; + __u64 length = 0; int blockbits = inode->i_sb->s_blocksize_bits; int error = 0; + u16 iomap_type; /* in-inode? */ if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) { @@ -5081,40 +4795,49 @@ static int ext4_xattr_fiemap(struct inode *inode, EXT4_I(inode)->i_extra_isize; physical += offset; length = EXT4_SB(inode->i_sb)->s_inode_size - offset; - flags |= FIEMAP_EXTENT_DATA_INLINE; brelse(iloc.bh); - } else { /* external block */ + iomap_type = IOMAP_INLINE; + } else if (EXT4_I(inode)->i_file_acl) { /* external block */ physical = (__u64)EXT4_I(inode)->i_file_acl << blockbits; length = inode->i_sb->s_blocksize; + iomap_type = IOMAP_MAPPED; + } else { + /* no in-inode or external block for xattr, so return -ENOENT */ + error = -ENOENT; + goto out; } - if (physical) - error = fiemap_fill_next_extent(fieinfo, 0, physical, - length, flags); - return (error < 0 ? error : 0); + iomap->addr = physical; + iomap->offset = 0; + iomap->length = length; + iomap->type = iomap_type; + iomap->flags = 0; +out: + return error; } -static int _ext4_fiemap(struct inode *inode, - struct fiemap_extent_info *fieinfo, - __u64 start, __u64 len, - int (*fill)(struct inode *, ext4_lblk_t, - ext4_lblk_t, - struct fiemap_extent_info *)) +static int ext4_iomap_xattr_begin(struct inode *inode, loff_t offset, + loff_t length, unsigned flags, + struct iomap *iomap, struct iomap *srcmap) { - ext4_lblk_t start_blk; - u32 ext4_fiemap_flags = FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR; + int error; - int error = 0; - - if (ext4_has_inline_data(inode)) { - int has_inline = 1; + error = ext4_iomap_xattr_fiemap(inode, iomap); + if (error == 0 && (offset >= iomap->length)) + error = -ENOENT; + return error; +} - error = ext4_inline_data_fiemap(inode, fieinfo, &has_inline, - start, len); +static const struct iomap_ops ext4_iomap_xattr_ops = { + .iomap_begin = ext4_iomap_xattr_begin, +}; - if (has_inline) - return error; - } +static int _ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, + __u64 start, __u64 len, bool from_es_cache) +{ + ext4_lblk_t start_blk; + u32 ext4_fiemap_flags = FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR; + int error = 0; if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) { error = ext4_ext_precache(inode); @@ -5123,19 +4846,19 @@ static int _ext4_fiemap(struct inode *inode, fieinfo->fi_flags &= ~FIEMAP_FLAG_CACHE; } - /* fallback to generic here if not in extents fmt */ - if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) && - fill == ext4_fill_fiemap_extents) - return generic_block_fiemap(inode, fieinfo, start, len, - ext4_get_block); - - if (fill == ext4_fill_es_cache_info) + if (from_es_cache) ext4_fiemap_flags &= FIEMAP_FLAG_XATTR; + if (fiemap_check_flags(fieinfo, ext4_fiemap_flags)) return -EBADR; if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) { - error = ext4_xattr_fiemap(inode, fieinfo); + fieinfo->fi_flags &= ~FIEMAP_FLAG_XATTR; + error = iomap_fiemap(inode, fieinfo, start, len, + &ext4_iomap_xattr_ops); + } else if (!from_es_cache) { + error = iomap_fiemap(inode, fieinfo, start, len, + &ext4_iomap_report_ops); } else { ext4_lblk_t len_blks; __u64 last_blk; @@ -5150,7 +4873,8 @@ static int _ext4_fiemap(struct inode *inode, * Walk the extent tree gathering extent information * and pushing extents back to the user. */ - error = fill(inode, start_blk, len_blks, fieinfo); + error = ext4_fill_es_cache_info(inode, start_blk, len_blks, + fieinfo); } return error; } @@ -5158,8 +4882,7 @@ static int _ext4_fiemap(struct inode *inode, int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, __u64 start, __u64 len) { - return _ext4_fiemap(inode, fieinfo, start, len, - ext4_fill_fiemap_extents); + return _ext4_fiemap(inode, fieinfo, start, len, false); } int ext4_get_es_cache(struct inode *inode, struct fiemap_extent_info *fieinfo, @@ -5175,8 +4898,7 @@ int ext4_get_es_cache(struct inode *inode, struct fiemap_extent_info *fieinfo, return 0; } - return _ext4_fiemap(inode, fieinfo, start, len, - ext4_fill_es_cache_info); + return _ext4_fiemap(inode, fieinfo, start, len, true); } diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 5f225881176b..0d624250a62b 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -872,6 +872,7 @@ const struct file_operations ext4_file_operations = { .llseek = ext4_llseek, .read_iter = ext4_file_read_iter, .write_iter = ext4_file_write_iter, + .iopoll = iomap_dio_iopoll, .unlocked_ioctl = ext4_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = ext4_compat_ioctl, diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index f95ee99091e4..b420c9dc444d 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -196,10 +196,9 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) ext4_simulate_fail_bh(sb, bh, EXT4_SIM_IBITMAP_EIO); if (!buffer_uptodate(bh)) { put_bh(bh); - ext4_set_errno(sb, EIO); - ext4_error(sb, "Cannot read inode bitmap - " - "block_group = %u, inode_bitmap = %llu", - block_group, bitmap_blk); + ext4_error_err(sb, EIO, "Cannot read inode bitmap - " + "block_group = %u, inode_bitmap = %llu", + block_group, bitmap_blk); ext4_mark_group_bitmap_corrupted(sb, block_group, EXT4_GROUP_INFO_IBITMAP_CORRUPT); return ERR_PTR(-EIO); @@ -712,21 +711,34 @@ out: static int find_inode_bit(struct super_block *sb, ext4_group_t group, struct buffer_head *bitmap, unsigned long *ino) { + bool check_recently_deleted = EXT4_SB(sb)->s_journal == NULL; + unsigned long recently_deleted_ino = EXT4_INODES_PER_GROUP(sb); + next: *ino = ext4_find_next_zero_bit((unsigned long *) bitmap->b_data, EXT4_INODES_PER_GROUP(sb), *ino); if (*ino >= EXT4_INODES_PER_GROUP(sb)) - return 0; + goto not_found; - if ((EXT4_SB(sb)->s_journal == NULL) && - recently_deleted(sb, group, *ino)) { + if (check_recently_deleted && recently_deleted(sb, group, *ino)) { + recently_deleted_ino = *ino; *ino = *ino + 1; if (*ino < EXT4_INODES_PER_GROUP(sb)) goto next; - return 0; + goto not_found; } - + return 1; +not_found: + if (recently_deleted_ino >= EXT4_INODES_PER_GROUP(sb)) + return 0; + /* + * Not reusing recently deleted inodes is mostly a preference. We don't + * want to report ENOSPC or skew allocation patterns because of that. + * So return even recently deleted inode if we could find better in the + * given range. + */ + *ino = recently_deleted_ino; return 1; } @@ -1231,9 +1243,9 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino) inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL); if (IS_ERR(inode)) { err = PTR_ERR(inode); - ext4_set_errno(sb, -err); - ext4_error(sb, "couldn't read orphan inode %lu (err %d)", - ino, err); + ext4_error_err(sb, -err, + "couldn't read orphan inode %lu (err %d)", + ino, err); return inode; } diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index 569fc68e8975..107f0043f67f 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -1019,7 +1019,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, * (should be rare). */ if (!bh) { - EXT4_ERROR_INODE_BLOCK(inode, nr, + ext4_error_inode_block(inode, nr, EIO, "Read failure"); continue; } diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index fad82d08fca5..f35e289e17aa 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -98,10 +98,9 @@ int ext4_get_max_inline_size(struct inode *inode) error = ext4_get_inode_loc(inode, &iloc); if (error) { - ext4_set_errno(inode->i_sb, -error); - ext4_error_inode(inode, __func__, __LINE__, 0, - "can't get inode location %lu", - inode->i_ino); + ext4_error_inode_err(inode, __func__, __LINE__, 0, -error, + "can't get inode location %lu", + inode->i_ino); return 0; } @@ -1762,9 +1761,9 @@ bool empty_inline_dir(struct inode *dir, int *has_inline_data) err = ext4_get_inode_loc(dir, &iloc); if (err) { - ext4_set_errno(dir->i_sb, -err); - EXT4_ERROR_INODE(dir, "error %d getting inode %lu block", - err, dir->i_ino); + EXT4_ERROR_INODE_ERR(dir, -err, + "error %d getting inode %lu block", + err, dir->i_ino); return true; } @@ -1857,47 +1856,6 @@ out: return error; } -int ext4_inline_data_fiemap(struct inode *inode, - struct fiemap_extent_info *fieinfo, - int *has_inline, __u64 start, __u64 len) -{ - __u64 physical = 0; - __u64 inline_len; - __u32 flags = FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_NOT_ALIGNED | - FIEMAP_EXTENT_LAST; - int error = 0; - struct ext4_iloc iloc; - - down_read(&EXT4_I(inode)->xattr_sem); - if (!ext4_has_inline_data(inode)) { - *has_inline = 0; - goto out; - } - inline_len = min_t(size_t, ext4_get_inline_size(inode), - i_size_read(inode)); - if (start >= inline_len) - goto out; - if (start + len < inline_len) - inline_len = start + len; - inline_len -= start; - - error = ext4_get_inode_loc(inode, &iloc); - if (error) - goto out; - - physical = (__u64)iloc.bh->b_blocknr << inode->i_sb->s_blocksize_bits; - physical += (char *)ext4_raw_inode(&iloc) - iloc.bh->b_data; - physical += offsetof(struct ext4_inode, i_block); - - brelse(iloc.bh); -out: - up_read(&EXT4_I(inode)->xattr_sem); - if (physical) - error = fiemap_fill_next_extent(fieinfo, start, physical, - inline_len, flags); - return (error < 0 ? error : 0); -} - int ext4_inline_data_truncate(struct inode *inode, int *has_inline) { handle_t *handle; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index fa0ff78dc033..e416096fc081 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -269,10 +269,9 @@ void ext4_evict_inode(struct inode *inode) if (inode->i_blocks) { err = ext4_truncate(inode); if (err) { - ext4_set_errno(inode->i_sb, -err); - ext4_error(inode->i_sb, - "couldn't truncate inode %lu (err %d)", - inode->i_ino, err); + ext4_error_err(inode->i_sb, -err, + "couldn't truncate inode %lu (err %d)", + inode->i_ino, err); goto stop_handle; } } @@ -2478,10 +2477,9 @@ update_disksize: up_write(&EXT4_I(inode)->i_data_sem); err2 = ext4_mark_inode_dirty(handle, inode); if (err2) { - ext4_set_errno(inode->i_sb, -err2); - ext4_error(inode->i_sb, - "Failed to mark inode %lu dirty", - inode->i_ino); + ext4_error_err(inode->i_sb, -err2, + "Failed to mark inode %lu dirty", + inode->i_ino); } if (!err) err = err2; @@ -3212,7 +3210,7 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block) return 0; } - return generic_block_bmap(mapping, block, ext4_get_block); + return iomap_bmap(mapping, block, &ext4_iomap_ops); } static int ext4_readpage(struct file *file, struct page *page) @@ -3333,6 +3331,10 @@ static void ext4_set_iomap(struct inode *inode, struct iomap *iomap, iomap->offset = (u64) map->m_lblk << blkbits; iomap->length = (u64) map->m_len << blkbits; + if ((map->m_flags & EXT4_MAP_MAPPED) && + !ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) + iomap->flags |= IOMAP_F_MERGED; + /* * Flags passed to ext4_map_blocks() for direct I/O writes can result * in m_flags having both EXT4_MAP_MAPPED and EXT4_MAP_UNWRITTEN bits @@ -3542,12 +3544,28 @@ static int ext4_iomap_begin_report(struct inode *inode, loff_t offset, map.m_len = min_t(loff_t, (offset + length - 1) >> blkbits, EXT4_MAX_LOGICAL_BLOCK) - map.m_lblk + 1; + /* + * Fiemap callers may call for offset beyond s_bitmap_maxbytes. + * So handle it here itself instead of querying ext4_map_blocks(). + * Since ext4_map_blocks() will warn about it and will return + * -EIO error. + */ + if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); + + if (offset >= sbi->s_bitmap_maxbytes) { + map.m_flags = 0; + goto set_iomap; + } + } + ret = ext4_map_blocks(NULL, inode, &map, 0); if (ret < 0) return ret; if (ret == 0) delalloc = ext4_iomap_is_delalloc(inode, &map); +set_iomap: ext4_set_iomap(inode, iomap, &map, offset, length); if (delalloc && iomap->type == IOMAP_HOLE) iomap->type = IOMAP_DELALLOC; @@ -4144,8 +4162,6 @@ int ext4_truncate(struct inode *inode) if (!ext4_can_truncate(inode)) return 0; - ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS); - if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE); @@ -4364,8 +4380,7 @@ make_io: wait_on_buffer(bh); if (!buffer_uptodate(bh)) { simulate_eio: - ext4_set_errno(inode->i_sb, EIO); - EXT4_ERROR_INODE_BLOCK(inode, block, + ext4_error_inode_block(inode, block, EIO, "unable to read itable block"); brelse(bh); return -EIO; @@ -4517,7 +4532,7 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))) { if (flags & EXT4_IGET_HANDLE) return ERR_PTR(-ESTALE); - __ext4_error(sb, function, line, + __ext4_error(sb, function, line, EFSCORRUPTED, 0, "inode #%lu: comm %s: iget: illegal inode #", ino, current->comm); return ERR_PTR(-EFSCORRUPTED); @@ -4580,9 +4595,8 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, if (!ext4_inode_csum_verify(inode, raw_inode, ei) || ext4_simulate_fail(sb, EXT4_SIM_INODE_CRC)) { - ext4_set_errno(inode->i_sb, EFSBADCRC); - ext4_error_inode(inode, function, line, 0, - "iget: checksum invalid"); + ext4_error_inode_err(inode, function, line, 0, EFSBADCRC, + "iget: checksum invalid"); ret = -EFSBADCRC; goto bad_inode; } @@ -4812,7 +4826,7 @@ static int ext4_inode_blocks_set(handle_t *handle, struct ext4_inode_info *ei) { struct inode *inode = &(ei->vfs_inode); - u64 i_blocks = inode->i_blocks; + u64 i_blocks = READ_ONCE(inode->i_blocks); struct super_block *sb = inode->i_sb; if (i_blocks <= ~0U) { @@ -4982,7 +4996,7 @@ static int ext4_do_update_inode(handle_t *handle, raw_inode->i_file_acl_high = cpu_to_le16(ei->i_file_acl >> 32); raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl); - if (ei->i_disksize != ext4_isize(inode->i_sb, raw_inode)) { + if (READ_ONCE(ei->i_disksize) != ext4_isize(inode->i_sb, raw_inode)) { ext4_isize_set(raw_inode, ei->i_disksize); need_datasync = 1; } @@ -5131,9 +5145,8 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc) if (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync) sync_dirty_buffer(iloc.bh); if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) { - ext4_set_errno(inode->i_sb, EIO); - EXT4_ERROR_INODE_BLOCK(inode, iloc.bh->b_blocknr, - "IO error syncing inode"); + ext4_error_inode_block(inode, iloc.bh->b_blocknr, EIO, + "IO error syncing inode"); err = -EIO; } brelse(iloc.bh); diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 0c1d1720cf1a..bfc1281fc4cb 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -327,18 +327,6 @@ static int ext4_ioctl_setflags(struct inode *inode, if ((flags ^ oldflags) & EXT4_EXTENTS_FL) migrate = 1; - if (flags & EXT4_EOFBLOCKS_FL) { - /* we don't support adding EOFBLOCKS flag */ - if (!(oldflags & EXT4_EOFBLOCKS_FL)) { - err = -EOPNOTSUPP; - goto flags_out; - } - } else if (oldflags & EXT4_EOFBLOCKS_FL) { - err = ext4_truncate(inode); - if (err) - goto flags_out; - } - if ((flags ^ oldflags) & EXT4_CASEFOLD_FL) { if (!ext4_has_feature_casefold(sb)) { err = -EOPNOTSUPP; diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 51a78eb65f3c..87c85be4c12e 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -1901,8 +1901,15 @@ void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac, BUG_ON(buddy == NULL); k = mb_find_next_zero_bit(buddy, max, 0); - BUG_ON(k >= max); - + if (k >= max) { + ext4_grp_locked_error(ac->ac_sb, e4b->bd_group, 0, 0, + "%d free clusters of order %d. But found 0", + grp->bb_counters[i], i); + ext4_mark_group_bitmap_corrupted(ac->ac_sb, + e4b->bd_group, + EXT4_GROUP_INFO_BBITMAP_CORRUPT); + break; + } ac->ac_found++; ac->ac_b_ex.fe_len = 1 << i; @@ -3914,9 +3921,9 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, bitmap_bh = ext4_read_block_bitmap(sb, group); if (IS_ERR(bitmap_bh)) { err = PTR_ERR(bitmap_bh); - ext4_set_errno(sb, -err); - ext4_error(sb, "Error %d reading block bitmap for %u", - err, group); + ext4_error_err(sb, -err, + "Error %d reading block bitmap for %u", + err, group); return 0; } @@ -4083,18 +4090,16 @@ repeat: err = ext4_mb_load_buddy_gfp(sb, group, &e4b, GFP_NOFS|__GFP_NOFAIL); if (err) { - ext4_set_errno(sb, -err); - ext4_error(sb, "Error %d loading buddy information for %u", - err, group); + ext4_error_err(sb, -err, "Error %d loading buddy information for %u", + err, group); continue; } bitmap_bh = ext4_read_block_bitmap(sb, group); if (IS_ERR(bitmap_bh)) { err = PTR_ERR(bitmap_bh); - ext4_set_errno(sb, -err); - ext4_error(sb, "Error %d reading block bitmap for %u", - err, group); + ext4_error_err(sb, -err, "Error %d reading block bitmap for %u", + err, group); ext4_mb_unload_buddy(&e4b); continue; } @@ -4302,7 +4307,8 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb, spin_lock(&lg->lg_prealloc_lock); list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order], - pa_inode_list) { + pa_inode_list, + lockdep_is_held(&lg->lg_prealloc_lock)) { spin_lock(&pa->pa_lock); if (atomic_read(&pa->pa_count)) { /* @@ -4347,9 +4353,8 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb, err = ext4_mb_load_buddy_gfp(sb, group, &e4b, GFP_NOFS|__GFP_NOFAIL); if (err) { - ext4_set_errno(sb, -err); - ext4_error(sb, "Error %d loading buddy information for %u", - err, group); + ext4_error_err(sb, -err, "Error %d loading buddy information for %u", + err, group); continue; } ext4_lock_group(sb, group); @@ -4386,7 +4391,8 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac) /* Add the prealloc space to lg */ spin_lock(&lg->lg_prealloc_lock); list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[order], - pa_inode_list) { + pa_inode_list, + lockdep_is_held(&lg->lg_prealloc_lock)) { spin_lock(&tmp_pa->pa_lock); if (tmp_pa->pa_deleted) { spin_unlock(&tmp_pa->pa_lock); diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c index 87f7551c5132..d34cb8c46655 100644 --- a/fs/ext4/mmp.c +++ b/fs/ext4/mmp.c @@ -175,8 +175,8 @@ static int kmmpd(void *data) */ if (retval) { if ((failed_writes % 60) == 0) { - ext4_set_errno(sb, -retval); - ext4_error(sb, "Error writing to MMP block"); + ext4_error_err(sb, -retval, + "Error writing to MMP block"); } failed_writes++; } @@ -208,9 +208,9 @@ static int kmmpd(void *data) retval = read_mmp_block(sb, &bh_check, mmp_block); if (retval) { - ext4_set_errno(sb, -retval); - ext4_error(sb, "error reading MMP data: %d", - retval); + ext4_error_err(sb, -retval, + "error reading MMP data: %d", + retval); goto exit_thread; } @@ -222,8 +222,7 @@ static int kmmpd(void *data) "Error while updating MMP info. " "The filesystem seems to have been" " multiply mounted."); - ext4_set_errno(sb, EBUSY); - ext4_error(sb, "abort"); + ext4_error_err(sb, EBUSY, "abort"); put_bh(bh_check); retval = -EBUSY; goto exit_thread; diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 30ce3dc69378..1ed86fb6c302 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -422,8 +422,8 @@ repair_branches: block_len_in_page, 0, &err2); ext4_double_up_write_data_sem(orig_inode, donor_inode); if (replaced_count != block_len_in_page) { - EXT4_ERROR_INODE_BLOCK(orig_inode, (sector_t)(orig_blk_offset), - "Unable to copy data block," + ext4_error_inode_block(orig_inode, (sector_t)(orig_blk_offset), + EIO, "Unable to copy data block," " data will be lost."); *err = -EIO; } diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index b05ea72f38fd..a8aca4772aaa 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -160,9 +160,9 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode, !ext4_simulate_fail(inode->i_sb, EXT4_SIM_DIRBLOCK_CRC)) set_buffer_verified(bh); else { - ext4_set_errno(inode->i_sb, EFSBADCRC); - ext4_error_inode(inode, func, line, block, - "Directory index failed checksum"); + ext4_error_inode_err(inode, func, line, block, + EFSBADCRC, + "Directory index failed checksum"); brelse(bh); return ERR_PTR(-EFSBADCRC); } @@ -172,9 +172,9 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode, !ext4_simulate_fail(inode->i_sb, EXT4_SIM_DIRBLOCK_CRC)) set_buffer_verified(bh); else { - ext4_set_errno(inode->i_sb, EFSBADCRC); - ext4_error_inode(inode, func, line, block, - "Directory block failed checksum"); + ext4_error_inode_err(inode, func, line, block, + EFSBADCRC, + "Directory block failed checksum"); brelse(bh); return ERR_PTR(-EFSBADCRC); } @@ -233,13 +233,13 @@ struct dx_root u8 unused_flags; } info; - struct dx_entry entries[0]; + struct dx_entry entries[]; }; struct dx_node { struct fake_dirent fake; - struct dx_entry entries[0]; + struct dx_entry entries[]; }; @@ -1532,9 +1532,9 @@ restart: goto next; wait_on_buffer(bh); if (!buffer_uptodate(bh)) { - ext4_set_errno(sb, EIO); - EXT4_ERROR_INODE(dir, "reading directory lblock %lu", - (unsigned long) block); + EXT4_ERROR_INODE_ERR(dir, EIO, + "reading directory lblock %lu", + (unsigned long) block); brelse(bh); ret = ERR_PTR(-EIO); goto cleanup_and_exit; @@ -1543,9 +1543,9 @@ restart: !is_dx_internal_node(dir, block, (struct ext4_dir_entry *)bh->b_data) && !ext4_dirblock_csum_verify(dir, bh)) { - ext4_set_errno(sb, EFSBADCRC); - EXT4_ERROR_INODE(dir, "checksumming directory " - "block %lu", (unsigned long)block); + EXT4_ERROR_INODE_ERR(dir, EFSBADCRC, + "checksumming directory " + "block %lu", (unsigned long)block); brelse(bh); ret = ERR_PTR(-EFSBADCRC); goto cleanup_and_exit; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index c8dff4c68141..9728e7b0e84f 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -335,10 +335,12 @@ static time64_t __ext4_get_tstamp(__le32 *lo, __u8 *hi) #define ext4_get_tstamp(es, tstamp) \ __ext4_get_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi) -static void __save_error_info(struct super_block *sb, const char *func, - unsigned int line) +static void __save_error_info(struct super_block *sb, int error, + __u32 ino, __u64 block, + const char *func, unsigned int line) { struct ext4_super_block *es = EXT4_SB(sb)->s_es; + int err; EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; if (bdev_read_only(sb->s_bdev)) @@ -347,8 +349,62 @@ static void __save_error_info(struct super_block *sb, const char *func, ext4_update_tstamp(es, s_last_error_time); strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func)); es->s_last_error_line = cpu_to_le32(line); - if (es->s_last_error_errcode == 0) - es->s_last_error_errcode = EXT4_ERR_EFSCORRUPTED; + es->s_last_error_ino = cpu_to_le32(ino); + es->s_last_error_block = cpu_to_le64(block); + switch (error) { + case EIO: + err = EXT4_ERR_EIO; + break; + case ENOMEM: + err = EXT4_ERR_ENOMEM; + break; + case EFSBADCRC: + err = EXT4_ERR_EFSBADCRC; + break; + case 0: + case EFSCORRUPTED: + err = EXT4_ERR_EFSCORRUPTED; + break; + case ENOSPC: + err = EXT4_ERR_ENOSPC; + break; + case ENOKEY: + err = EXT4_ERR_ENOKEY; + break; + case EROFS: + err = EXT4_ERR_EROFS; + break; + case EFBIG: + err = EXT4_ERR_EFBIG; + break; + case EEXIST: + err = EXT4_ERR_EEXIST; + break; + case ERANGE: + err = EXT4_ERR_ERANGE; + break; + case EOVERFLOW: + err = EXT4_ERR_EOVERFLOW; + break; + case EBUSY: + err = EXT4_ERR_EBUSY; + break; + case ENOTDIR: + err = EXT4_ERR_ENOTDIR; + break; + case ENOTEMPTY: + err = EXT4_ERR_ENOTEMPTY; + break; + case ESHUTDOWN: + err = EXT4_ERR_ESHUTDOWN; + break; + case EFAULT: + err = EXT4_ERR_EFAULT; + break; + default: + err = EXT4_ERR_UNKNOWN; + } + es->s_last_error_errcode = err; if (!es->s_first_error_time) { es->s_first_error_time = es->s_last_error_time; es->s_first_error_time_hi = es->s_last_error_time_hi; @@ -368,11 +424,13 @@ static void __save_error_info(struct super_block *sb, const char *func, le32_add_cpu(&es->s_error_count, 1); } -static void save_error_info(struct super_block *sb, const char *func, - unsigned int line) +static void save_error_info(struct super_block *sb, int error, + __u32 ino, __u64 block, + const char *func, unsigned int line) { - __save_error_info(sb, func, line); - ext4_commit_super(sb, 1); + __save_error_info(sb, error, ino, block, func, line); + if (!bdev_read_only(sb->s_bdev)) + ext4_commit_super(sb, 1); } /* @@ -477,7 +535,8 @@ static void ext4_handle_error(struct super_block *sb) "EXT4-fs error") void __ext4_error(struct super_block *sb, const char *function, - unsigned int line, const char *fmt, ...) + unsigned int line, int error, __u64 block, + const char *fmt, ...) { struct va_format vaf; va_list args; @@ -495,24 +554,21 @@ void __ext4_error(struct super_block *sb, const char *function, sb->s_id, function, line, current->comm, &vaf); va_end(args); } - save_error_info(sb, function, line); + save_error_info(sb, error, 0, block, function, line); ext4_handle_error(sb); } void __ext4_error_inode(struct inode *inode, const char *function, - unsigned int line, ext4_fsblk_t block, + unsigned int line, ext4_fsblk_t block, int error, const char *fmt, ...) { va_list args; struct va_format vaf; - struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) return; trace_ext4_error(inode->i_sb, function, line); - es->s_last_error_ino = cpu_to_le32(inode->i_ino); - es->s_last_error_block = cpu_to_le64(block); if (ext4_error_ratelimit(inode->i_sb)) { va_start(args, fmt); vaf.fmt = fmt; @@ -529,7 +585,8 @@ void __ext4_error_inode(struct inode *inode, const char *function, current->comm, &vaf); va_end(args); } - save_error_info(inode->i_sb, function, line); + save_error_info(inode->i_sb, error, inode->i_ino, block, + function, line); ext4_handle_error(inode->i_sb); } @@ -548,7 +605,6 @@ void __ext4_error_file(struct file *file, const char *function, trace_ext4_error(inode->i_sb, function, line); es = EXT4_SB(inode->i_sb)->s_es; - es->s_last_error_ino = cpu_to_le32(inode->i_ino); if (ext4_error_ratelimit(inode->i_sb)) { path = file_path(file, pathname, sizeof(pathname)); if (IS_ERR(path)) @@ -570,7 +626,8 @@ void __ext4_error_file(struct file *file, const char *function, current->comm, path, &vaf); va_end(args); } - save_error_info(inode->i_sb, function, line); + save_error_info(inode->i_sb, EFSCORRUPTED, inode->i_ino, block, + function, line); ext4_handle_error(inode->i_sb); } @@ -614,66 +671,6 @@ const char *ext4_decode_error(struct super_block *sb, int errno, return errstr; } -void ext4_set_errno(struct super_block *sb, int err) -{ - if (err < 0) - err = -err; - - switch (err) { - case EIO: - err = EXT4_ERR_EIO; - break; - case ENOMEM: - err = EXT4_ERR_ENOMEM; - break; - case EFSBADCRC: - err = EXT4_ERR_EFSBADCRC; - break; - case EFSCORRUPTED: - err = EXT4_ERR_EFSCORRUPTED; - break; - case ENOSPC: - err = EXT4_ERR_ENOSPC; - break; - case ENOKEY: - err = EXT4_ERR_ENOKEY; - break; - case EROFS: - err = EXT4_ERR_EROFS; - break; - case EFBIG: - err = EXT4_ERR_EFBIG; - break; - case EEXIST: - err = EXT4_ERR_EEXIST; - break; - case ERANGE: - err = EXT4_ERR_ERANGE; - break; - case EOVERFLOW: - err = EXT4_ERR_EOVERFLOW; - break; - case EBUSY: - err = EXT4_ERR_EBUSY; - break; - case ENOTDIR: - err = EXT4_ERR_ENOTDIR; - break; - case ENOTEMPTY: - err = EXT4_ERR_ENOTEMPTY; - break; - case ESHUTDOWN: - err = EXT4_ERR_ESHUTDOWN; - break; - case EFAULT: - err = EXT4_ERR_EFAULT; - break; - default: - err = EXT4_ERR_UNKNOWN; - } - EXT4_SB(sb)->s_es->s_last_error_errcode = err; -} - /* __ext4_std_error decodes expected errors from journaling functions * automatically and invokes the appropriate error response. */ @@ -698,8 +695,7 @@ void __ext4_std_error(struct super_block *sb, const char *function, sb->s_id, function, line, errstr); } - ext4_set_errno(sb, -errno); - save_error_info(sb, function, line); + save_error_info(sb, -errno, 0, 0, function, line); ext4_handle_error(sb); } @@ -714,7 +710,7 @@ void __ext4_std_error(struct super_block *sb, const char *function, */ void __ext4_abort(struct super_block *sb, const char *function, - unsigned int line, const char *fmt, ...) + unsigned int line, int error, const char *fmt, ...) { struct va_format vaf; va_list args; @@ -722,7 +718,7 @@ void __ext4_abort(struct super_block *sb, const char *function, if (unlikely(ext4_forced_shutdown(EXT4_SB(sb)))) return; - save_error_info(sb, function, line); + save_error_info(sb, error, 0, 0, function, line); va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; @@ -741,7 +737,6 @@ void __ext4_abort(struct super_block *sb, const char *function, sb->s_flags |= SB_RDONLY; if (EXT4_SB(sb)->s_journal) jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); - save_error_info(sb, function, line); } if (test_opt(sb, ERRORS_PANIC) && !system_going_down()) { if (EXT4_SB(sb)->s_journal && @@ -815,15 +810,12 @@ __acquires(bitlock) { struct va_format vaf; va_list args; - struct ext4_super_block *es = EXT4_SB(sb)->s_es; if (unlikely(ext4_forced_shutdown(EXT4_SB(sb)))) return; trace_ext4_error(sb, function, line); - es->s_last_error_ino = cpu_to_le32(ino); - es->s_last_error_block = cpu_to_le64(block); - __save_error_info(sb, function, line); + __save_error_info(sb, EFSCORRUPTED, ino, block, function, line); if (ext4_error_ratelimit(sb)) { va_start(args, fmt); @@ -1024,17 +1016,22 @@ static void ext4_put_super(struct super_block *sb) destroy_workqueue(sbi->rsv_conversion_wq); + /* + * Unregister sysfs before destroying jbd2 journal. + * Since we could still access attr_journal_task attribute via sysfs + * path which could have sbi->s_journal->j_task as NULL + */ + ext4_unregister_sysfs(sb); + if (sbi->s_journal) { aborted = is_journal_aborted(sbi->s_journal); err = jbd2_journal_destroy(sbi->s_journal); sbi->s_journal = NULL; if ((err < 0) && !aborted) { - ext4_set_errno(sb, -err); - ext4_abort(sb, "Couldn't clean up the journal"); + ext4_abort(sb, -err, "Couldn't clean up the journal"); } } - ext4_unregister_sysfs(sb); ext4_es_unregister_shrinker(sbi); del_timer_sync(&sbi->s_err_report); ext4_release_system_zone(sb); @@ -2180,6 +2177,14 @@ static int parse_options(char *options, struct super_block *sb, } } #endif + if (test_opt(sb, DIOREAD_NOLOCK)) { + int blocksize = + BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size); + if (blocksize < PAGE_SIZE) + ext4_msg(sb, KERN_WARNING, "Warning: mounting with an " + "experimental mount option 'dioread_nolock' " + "for blocksize < PAGE_SIZE"); + } return 1; } @@ -3609,7 +3614,8 @@ int ext4_calculate_overhead(struct super_block *sb) */ if (sbi->s_journal && !sbi->journal_bdev) overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_maxlen); - else if (ext4_has_feature_journal(sb) && !sbi->s_journal) { + else if (ext4_has_feature_journal(sb) && !sbi->s_journal && j_inum) { + /* j_inum for internal journal is non-zero */ j_inode = ext4_get_journal_inode(sb, j_inum); if (j_inode) { j_blocks = j_inode->i_size >> sb->s_blocksize_bits; @@ -3785,7 +3791,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) set_opt(sb, NO_UID32); /* xattr user namespace & acls are now defaulted on */ set_opt(sb, XATTR_USER); - set_opt(sb, DIOREAD_NOLOCK); #ifdef CONFIG_EXT4_FS_POSIX_ACL set_opt(sb, POSIX_ACL); #endif @@ -3835,6 +3840,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT; blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); + + if (blocksize == PAGE_SIZE) + set_opt(sb, DIOREAD_NOLOCK); + if (blocksize < EXT4_MIN_BLOCK_SIZE || blocksize > EXT4_MAX_BLOCK_SIZE) { ext4_msg(sb, KERN_ERR, @@ -4157,7 +4166,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) if (sbi->s_inodes_per_group < sbi->s_inodes_per_block || sbi->s_inodes_per_group > blocksize * 8) { ext4_msg(sb, KERN_ERR, "invalid inodes per group: %lu\n", - sbi->s_blocks_per_group); + sbi->s_inodes_per_group); goto failed_mount; } sbi->s_itb_per_group = sbi->s_inodes_per_group / @@ -4286,9 +4295,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) EXT4_BLOCKS_PER_GROUP(sb) - 1); do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb)); if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) { - ext4_msg(sb, KERN_WARNING, "groups count too large: %u " + ext4_msg(sb, KERN_WARNING, "groups count too large: %llu " "(block count %llu, first data block %u, " - "blocks per group %lu)", sbi->s_groups_count, + "blocks per group %lu)", blocks_count, ext4_blocks_count(es), le32_to_cpu(es->s_first_data_block), EXT4_BLOCKS_PER_GROUP(sb)); @@ -5433,7 +5442,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) } if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) - ext4_abort(sb, "Abort forced by user"); + ext4_abort(sb, EXT4_ERR_ESHUTDOWN, "Abort forced by user"); sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0); @@ -5622,10 +5631,8 @@ static int ext4_statfs_project(struct super_block *sb, return PTR_ERR(dquot); spin_lock(&dquot->dq_dqb_lock); - limit = dquot->dq_dqb.dqb_bsoftlimit; - if (dquot->dq_dqb.dqb_bhardlimit && - (!limit || dquot->dq_dqb.dqb_bhardlimit < limit)) - limit = dquot->dq_dqb.dqb_bhardlimit; + limit = min_not_zero(dquot->dq_dqb.dqb_bsoftlimit, + dquot->dq_dqb.dqb_bhardlimit); limit >>= sb->s_blocksize_bits; if (limit && buf->f_blocks > limit) { @@ -5637,11 +5644,8 @@ static int ext4_statfs_project(struct super_block *sb, (buf->f_blocks - curblock) : 0; } - limit = dquot->dq_dqb.dqb_isoftlimit; - if (dquot->dq_dqb.dqb_ihardlimit && - (!limit || dquot->dq_dqb.dqb_ihardlimit < limit)) - limit = dquot->dq_dqb.dqb_ihardlimit; - + limit = min_not_zero(dquot->dq_dqb.dqb_isoftlimit, + dquot->dq_dqb.dqb_ihardlimit); if (limit && buf->f_files > limit) { buf->f_files = limit; buf->f_ffree = diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 8cac7d95c3ad..21df43a25328 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -245,7 +245,7 @@ __ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh, bh->b_data); errout: if (error) - __ext4_error_inode(inode, function, line, 0, + __ext4_error_inode(inode, function, line, 0, -error, "corrupted xattr block %llu", (unsigned long long) bh->b_blocknr); else @@ -269,7 +269,7 @@ __xattr_check_inode(struct inode *inode, struct ext4_xattr_ibody_header *header, error = ext4_xattr_check_entries(IFIRST(header), end, IFIRST(header)); errout: if (error) - __ext4_error_inode(inode, function, line, 0, + __ext4_error_inode(inode, function, line, 0, -error, "corrupted in-inode xattr"); return error; } @@ -2880,9 +2880,9 @@ int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode, if (IS_ERR(bh)) { error = PTR_ERR(bh); if (error == -EIO) { - ext4_set_errno(inode->i_sb, EIO); - EXT4_ERROR_INODE(inode, "block %llu read error", - EXT4_I(inode)->i_file_acl); + EXT4_ERROR_INODE_ERR(inode, EIO, + "block %llu read error", + EXT4_I(inode)->i_file_acl); } bh = NULL; goto cleanup; diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h index f39cad2abe2a..ffe21ac77f78 100644 --- a/fs/ext4/xattr.h +++ b/fs/ext4/xattr.h @@ -48,7 +48,7 @@ struct ext4_xattr_entry { __le32 e_value_inum; /* inode in which the value is stored */ __le32 e_value_size; /* size of attribute value */ __le32 e_hash; /* hash value of name and value */ - char e_name[0]; /* attribute name */ + char e_name[]; /* attribute name */ }; #define EXT4_XATTR_PAD_BITS 2 @@ -118,7 +118,7 @@ struct ext4_xattr_ibody_find { struct ext4_xattr_inode_array { unsigned int count; /* # of used items in the array */ - struct inode *inodes[0]; + struct inode *inodes[]; }; extern const struct xattr_handler ext4_xattr_user_handler; diff --git a/fs/fs_parser.c b/fs/fs_parser.c index 7e6fb43f9541..ab53e42a874a 100644 --- a/fs/fs_parser.c +++ b/fs/fs_parser.c @@ -368,8 +368,6 @@ bool fs_validate_description(const char *name, const struct fs_parameter_spec *param, *p2; bool good = true; - pr_notice("*** VALIDATE %s ***\n", name); - for (param = desc; param->name; param++) { /* Check for duplicate parameter names */ for (p2 = desc; p2 < param; p2++) { diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index aff8642f0c2e..991c60c7ffe0 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -393,10 +393,9 @@ hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end) * In this case, we first scan the range and release found pages. * After releasing pages, hugetlb_unreserve_pages cleans up region/reserv * maps and global counts. Page faults can not race with truncation - * in this routine. hugetlb_no_page() prevents page faults in the - * truncated range. It checks i_size before allocation, and again after - * with the page table lock for the page held. The same lock must be - * acquired to unmap a page. + * in this routine. hugetlb_no_page() holds i_mmap_rwsem and prevents + * page faults in the truncated range by checking i_size. i_size is + * modified while holding i_mmap_rwsem. * hole punch is indicated if end is not LLONG_MAX * In the hole punch case we scan the range and release found pages. * Only when releasing a page is the associated region/reserv map @@ -436,7 +435,15 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart, index = page->index; hash = hugetlb_fault_mutex_hash(mapping, index); - mutex_lock(&hugetlb_fault_mutex_table[hash]); + if (!truncate_op) { + /* + * Only need to hold the fault mutex in the + * hole punch case. This prevents races with + * page faults. Races are not possible in the + * case of truncation. + */ + mutex_lock(&hugetlb_fault_mutex_table[hash]); + } /* * If page is mapped, it was faulted in after being @@ -450,7 +457,9 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart, if (unlikely(page_mapped(page))) { BUG_ON(truncate_op); + mutex_unlock(&hugetlb_fault_mutex_table[hash]); i_mmap_lock_write(mapping); + mutex_lock(&hugetlb_fault_mutex_table[hash]); hugetlb_vmdelete_list(&mapping->i_mmap, index * pages_per_huge_page(h), (index + 1) * pages_per_huge_page(h)); @@ -477,7 +486,8 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart, } unlock_page(page); - mutex_unlock(&hugetlb_fault_mutex_table[hash]); + if (!truncate_op) + mutex_unlock(&hugetlb_fault_mutex_table[hash]); } huge_pagevec_release(&pvec); cond_resched(); @@ -515,8 +525,8 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset) BUG_ON(offset & ~huge_page_mask(h)); pgoff = offset >> PAGE_SHIFT; - i_size_write(inode, offset); i_mmap_lock_write(mapping); + i_size_write(inode, offset); if (!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)) hugetlb_vmdelete_list(&mapping->i_mmap, pgoff, 0); i_mmap_unlock_write(mapping); @@ -638,7 +648,11 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset, /* addr is the offset within the file (zero based) */ addr = index * hpage_size; - /* mutex taken here, fault path and hole punch */ + /* + * fault mutex taken here, protects against fault path + * and hole punch. inode_lock previously taken protects + * against truncation. + */ hash = hugetlb_fault_mutex_hash(mapping, index); mutex_lock(&hugetlb_fault_mutex_table[hash]); diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 7c84c4c027c4..f080f542911b 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -503,7 +503,8 @@ EXPORT_SYMBOL_GPL(iomap_is_partially_uptodate); int iomap_releasepage(struct page *page, gfp_t gfp_mask) { - trace_iomap_releasepage(page->mapping->host, page, 0, 0); + trace_iomap_releasepage(page->mapping->host, page_offset(page), + PAGE_SIZE); /* * mm accommodates an old ext3 case where clean pages might not have had @@ -520,7 +521,7 @@ EXPORT_SYMBOL_GPL(iomap_releasepage); void iomap_invalidatepage(struct page *page, unsigned int offset, unsigned int len) { - trace_iomap_invalidatepage(page->mapping->host, page, offset, len); + trace_iomap_invalidatepage(page->mapping->host, offset, len); /* * If we are invalidating the entire page, clear the dirty state from it @@ -1519,7 +1520,7 @@ iomap_do_writepage(struct page *page, struct writeback_control *wbc, void *data) u64 end_offset; loff_t offset; - trace_iomap_writepage(inode, page, 0, 0); + trace_iomap_writepage(inode, page_offset(page), PAGE_SIZE); /* * Refuse to write the page out if we are called from reclaim context. diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index 23837926c0c5..20dde5aadcdd 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c @@ -534,8 +534,8 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, /* * We are about to drop our additional submission reference, which - * might be the last reference to the dio. There are three three - * different ways we can progress here: + * might be the last reference to the dio. There are three different + * ways we can progress here: * * (a) If this is the last reference we will always complete and free * the dio ourselves. diff --git a/fs/iomap/trace.h b/fs/iomap/trace.h index 6dc227b8c47e..4df19c66f597 100644 --- a/fs/iomap/trace.h +++ b/fs/iomap/trace.h @@ -41,14 +41,12 @@ DEFINE_EVENT(iomap_readpage_class, name, \ DEFINE_READPAGE_EVENT(iomap_readpage); DEFINE_READPAGE_EVENT(iomap_readpages); -DECLARE_EVENT_CLASS(iomap_page_class, - TP_PROTO(struct inode *inode, struct page *page, unsigned long off, - unsigned int len), - TP_ARGS(inode, page, off, len), +DECLARE_EVENT_CLASS(iomap_range_class, + TP_PROTO(struct inode *inode, unsigned long off, unsigned int len), + TP_ARGS(inode, off, len), TP_STRUCT__entry( __field(dev_t, dev) __field(u64, ino) - __field(pgoff_t, pgoff) __field(loff_t, size) __field(unsigned long, offset) __field(unsigned int, length) @@ -56,29 +54,26 @@ DECLARE_EVENT_CLASS(iomap_page_class, TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; - __entry->pgoff = page_offset(page); __entry->size = i_size_read(inode); __entry->offset = off; __entry->length = len; ), - TP_printk("dev %d:%d ino 0x%llx pgoff 0x%lx size 0x%llx offset %lx " + TP_printk("dev %d:%d ino 0x%llx size 0x%llx offset %lx " "length %x", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, - __entry->pgoff, __entry->size, __entry->offset, __entry->length) ) -#define DEFINE_PAGE_EVENT(name) \ -DEFINE_EVENT(iomap_page_class, name, \ - TP_PROTO(struct inode *inode, struct page *page, unsigned long off, \ - unsigned int len), \ - TP_ARGS(inode, page, off, len)) -DEFINE_PAGE_EVENT(iomap_writepage); -DEFINE_PAGE_EVENT(iomap_releasepage); -DEFINE_PAGE_EVENT(iomap_invalidatepage); +#define DEFINE_RANGE_EVENT(name) \ +DEFINE_EVENT(iomap_range_class, name, \ + TP_PROTO(struct inode *inode, unsigned long off, unsigned int len),\ + TP_ARGS(inode, off, len)) +DEFINE_RANGE_EVENT(iomap_writepage); +DEFINE_RANGE_EVENT(iomap_releasepage); +DEFINE_RANGE_EVENT(iomap_invalidatepage); #define IOMAP_TYPE_STRINGS \ { IOMAP_HOLE, "HOLE" }, \ diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 27373f5792a4..e855d8260433 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -997,9 +997,10 @@ restart_loop: * journalled data) we need to unmap buffer and clear * more bits. We also need to be careful about the check * because the data page mapping can get cleared under - * out hands, which alse need not to clear more bits - * because the page and buffers will be freed and can - * never be reused once we are done with them. + * our hands. Note that if mapping == NULL, we don't + * need to make buffer unmapped because the page is + * already detached from the mapping and buffers cannot + * get reused. */ mapping = READ_ONCE(bh->b_page->mapping); if (mapping && !sb_is_blkdev_sb(mapping->host->i_sb)) { diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c index d0f7a5abd9a9..fc2469a20fed 100644 --- a/fs/kernfs/inode.c +++ b/fs/kernfs/inode.c @@ -53,6 +53,8 @@ static struct kernfs_iattrs *__kernfs_iattrs(struct kernfs_node *kn, int alloc) kn->iattr->ia_ctime = kn->iattr->ia_atime; simple_xattrs_init(&kn->iattr->xattrs); + atomic_set(&kn->iattr->nr_user_xattrs, 0); + atomic_set(&kn->iattr->user_xattr_size, 0); out_unlock: ret = kn->iattr; mutex_unlock(&iattr_mutex); @@ -303,7 +305,7 @@ int kernfs_xattr_set(struct kernfs_node *kn, const char *name, if (!attrs) return -ENOMEM; - return simple_xattr_set(&attrs->xattrs, name, value, size, flags); + return simple_xattr_set(&attrs->xattrs, name, value, size, flags, NULL); } static int kernfs_vfs_xattr_get(const struct xattr_handler *handler, @@ -327,6 +329,86 @@ static int kernfs_vfs_xattr_set(const struct xattr_handler *handler, return kernfs_xattr_set(kn, name, value, size, flags); } +static int kernfs_vfs_user_xattr_add(struct kernfs_node *kn, + const char *full_name, + struct simple_xattrs *xattrs, + const void *value, size_t size, int flags) +{ + atomic_t *sz = &kn->iattr->user_xattr_size; + atomic_t *nr = &kn->iattr->nr_user_xattrs; + ssize_t removed_size; + int ret; + + if (atomic_inc_return(nr) > KERNFS_MAX_USER_XATTRS) { + ret = -ENOSPC; + goto dec_count_out; + } + + if (atomic_add_return(size, sz) > KERNFS_USER_XATTR_SIZE_LIMIT) { + ret = -ENOSPC; + goto dec_size_out; + } + + ret = simple_xattr_set(xattrs, full_name, value, size, flags, + &removed_size); + + if (!ret && removed_size >= 0) + size = removed_size; + else if (!ret) + return 0; +dec_size_out: + atomic_sub(size, sz); +dec_count_out: + atomic_dec(nr); + return ret; +} + +static int kernfs_vfs_user_xattr_rm(struct kernfs_node *kn, + const char *full_name, + struct simple_xattrs *xattrs, + const void *value, size_t size, int flags) +{ + atomic_t *sz = &kn->iattr->user_xattr_size; + atomic_t *nr = &kn->iattr->nr_user_xattrs; + ssize_t removed_size; + int ret; + + ret = simple_xattr_set(xattrs, full_name, value, size, flags, + &removed_size); + + if (removed_size >= 0) { + atomic_sub(removed_size, sz); + atomic_dec(nr); + } + + return ret; +} + +static int kernfs_vfs_user_xattr_set(const struct xattr_handler *handler, + struct dentry *unused, struct inode *inode, + const char *suffix, const void *value, + size_t size, int flags) +{ + const char *full_name = xattr_full_name(handler, suffix); + struct kernfs_node *kn = inode->i_private; + struct kernfs_iattrs *attrs; + + if (!(kernfs_root(kn)->flags & KERNFS_ROOT_SUPPORT_USER_XATTR)) + return -EOPNOTSUPP; + + attrs = kernfs_iattrs(kn); + if (!attrs) + return -ENOMEM; + + if (value) + return kernfs_vfs_user_xattr_add(kn, full_name, &attrs->xattrs, + value, size, flags); + else + return kernfs_vfs_user_xattr_rm(kn, full_name, &attrs->xattrs, + value, size, flags); + +} + static const struct xattr_handler kernfs_trusted_xattr_handler = { .prefix = XATTR_TRUSTED_PREFIX, .get = kernfs_vfs_xattr_get, @@ -339,8 +421,15 @@ static const struct xattr_handler kernfs_security_xattr_handler = { .set = kernfs_vfs_xattr_set, }; +static const struct xattr_handler kernfs_user_xattr_handler = { + .prefix = XATTR_USER_PREFIX, + .get = kernfs_vfs_xattr_get, + .set = kernfs_vfs_user_xattr_set, +}; + const struct xattr_handler *kernfs_xattr_handlers[] = { &kernfs_trusted_xattr_handler, &kernfs_security_xattr_handler, + &kernfs_user_xattr_handler, NULL }; diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h index 2f3c51d55261..7ee97ef59184 100644 --- a/fs/kernfs/kernfs-internal.h +++ b/fs/kernfs/kernfs-internal.h @@ -26,6 +26,8 @@ struct kernfs_iattrs { struct timespec64 ia_ctime; struct simple_xattrs xattrs; + atomic_t nr_user_xattrs; + atomic_t user_xattr_size; }; /* +1 to avoid triggering overflow warning when negating it */ diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index 89bd5581f317..963800037609 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c @@ -152,12 +152,13 @@ static int nfs_dns_upcall(struct cache_detail *cd, struct cache_head *ch) { struct nfs_dns_ent *key = container_of(ch, struct nfs_dns_ent, h); - int ret; - ret = nfs_cache_upcall(cd, key->hostname); - if (ret) - ret = sunrpc_cache_pipe_upcall(cd, ch); - return ret; + if (test_and_set_bit(CACHE_PENDING, &ch->flags)) + return 0; + if (!nfs_cache_upcall(cd, key->hostname)) + return 0; + clear_bit(CACHE_PENDING, &ch->flags); + return sunrpc_cache_pipe_upcall_timeout(cd, ch); } static int nfs_dns_match(struct cache_head *ca, diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index f368f3215f88..99d2cae91bd6 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -136,7 +136,7 @@ config NFSD_FLEXFILELAYOUT config NFSD_V4_2_INTER_SSC bool "NFSv4.2 inter server to server COPY" - depends on NFSD_V4 && NFS_V4_1 && NFS_V4_2 + depends on NFSD_V4 && NFS_V4_1 && NFS_V4_2 && NFS_FS=y help This option enables support for NFSv4.2 inter server to server copy where the destination server calls the NFSv4.2 diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 15422c951fd1..cb777fe82988 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -23,6 +23,7 @@ #include "netns.h" #include "pnfs.h" #include "filecache.h" +#include "trace.h" #define NFSDDBG_FACILITY NFSDDBG_EXPORT @@ -50,6 +51,11 @@ static void expkey_put(struct kref *ref) kfree_rcu(key, ek_rcu); } +static int expkey_upcall(struct cache_detail *cd, struct cache_head *h) +{ + return sunrpc_cache_pipe_upcall(cd, h); +} + static void expkey_request(struct cache_detail *cd, struct cache_head *h, char **bpp, int *blen) @@ -140,7 +146,9 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen) if (len == 0) { set_bit(CACHE_NEGATIVE, &key.h.flags); ek = svc_expkey_update(cd, &key, ek); - if (!ek) + if (ek) + trace_nfsd_expkey_update(ek, NULL); + else err = -ENOMEM; } else { err = kern_path(buf, 0, &key.ek_path); @@ -150,7 +158,9 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen) dprintk("Found the path %s\n", buf); ek = svc_expkey_update(cd, &key, ek); - if (!ek) + if (ek) + trace_nfsd_expkey_update(ek, buf); + else err = -ENOMEM; path_put(&key.ek_path); } @@ -249,6 +259,7 @@ static const struct cache_detail svc_expkey_cache_template = { .hash_size = EXPKEY_HASHMAX, .name = "nfsd.fh", .cache_put = expkey_put, + .cache_upcall = expkey_upcall, .cache_request = expkey_request, .cache_parse = expkey_parse, .cache_show = expkey_show, @@ -330,6 +341,11 @@ static void svc_export_put(struct kref *ref) kfree_rcu(exp, ex_rcu); } +static int svc_export_upcall(struct cache_detail *cd, struct cache_head *h) +{ + return sunrpc_cache_pipe_upcall(cd, h); +} + static void svc_export_request(struct cache_detail *cd, struct cache_head *h, char **bpp, int *blen) @@ -643,15 +659,17 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) } expp = svc_export_lookup(&exp); - if (expp) - expp = svc_export_update(&exp, expp); - else - err = -ENOMEM; - cache_flush(); - if (expp == NULL) + if (!expp) { err = -ENOMEM; - else + goto out4; + } + expp = svc_export_update(&exp, expp); + if (expp) { + trace_nfsd_export_update(expp); + cache_flush(); exp_put(expp); + } else + err = -ENOMEM; out4: nfsd4_fslocs_free(&exp.ex_fslocs); kfree(exp.ex_uuid); @@ -767,6 +785,7 @@ static const struct cache_detail svc_export_cache_template = { .hash_size = EXPORT_HASHMAX, .name = "nfsd.export", .cache_put = svc_export_put, + .cache_upcall = svc_export_upcall, .cache_request = svc_export_request, .cache_parse = svc_export_parse, .cache_show = svc_export_show, @@ -832,8 +851,10 @@ exp_find_key(struct cache_detail *cd, struct auth_domain *clp, int fsid_type, if (ek == NULL) return ERR_PTR(-ENOMEM); err = cache_check(cd, &ek->h, reqp); - if (err) + if (err) { + trace_nfsd_exp_find_key(&key, err); return ERR_PTR(err); + } return ek; } @@ -855,8 +876,10 @@ exp_get_by_name(struct cache_detail *cd, struct auth_domain *clp, if (exp == NULL) return ERR_PTR(-ENOMEM); err = cache_check(cd, &exp->h, reqp); - if (err) + if (err) { + trace_nfsd_exp_get_by_name(&key, err); return ERR_PTR(err); + } return exp; } diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 22e77ede9f14..82198d747c4c 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -890,7 +890,7 @@ nfsd_file_find_locked(struct inode *inode, unsigned int may_flags, unsigned char need = may_flags & NFSD_FILE_MAY_MASK; hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head, - nf_node) { + nf_node, lockdep_is_held(&nfsd_file_hashtbl[hashval].nfb_lock)) { if ((need & nf->nf_may) != need) continue; if (nf->nf_inode != inode) diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index 2baf32311e00..09aa545825bd 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -172,6 +172,8 @@ struct nfsd_net { unsigned int longest_chain_cachesize; struct shrinker nfsd_reply_cache_shrinker; + /* utsname taken from the the process that starts the server */ + char nfsd_name[UNX_MAXNODENAME+1]; }; /* Simple check to find out if a given net was properly initialized */ diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index d1f285245af8..9460be8a8321 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -122,6 +122,12 @@ idtoname_hash(struct ent *ent) return hash; } +static int +idtoname_upcall(struct cache_detail *cd, struct cache_head *h) +{ + return sunrpc_cache_pipe_upcall_timeout(cd, h); +} + static void idtoname_request(struct cache_detail *cd, struct cache_head *ch, char **bpp, int *blen) @@ -184,6 +190,7 @@ static const struct cache_detail idtoname_cache_template = { .hash_size = ENT_HASHMAX, .name = "nfs4.idtoname", .cache_put = ent_put, + .cache_upcall = idtoname_upcall, .cache_request = idtoname_request, .cache_parse = idtoname_parse, .cache_show = idtoname_show, @@ -295,6 +302,12 @@ nametoid_hash(struct ent *ent) return hash_str(ent->name, ENT_HASHBITS); } +static int +nametoid_upcall(struct cache_detail *cd, struct cache_head *h) +{ + return sunrpc_cache_pipe_upcall_timeout(cd, h); +} + static void nametoid_request(struct cache_detail *cd, struct cache_head *ch, char **bpp, int *blen) @@ -347,6 +360,7 @@ static const struct cache_detail nametoid_cache_template = { .hash_size = ENT_HASHMAX, .name = "nfs4.nametoid", .cache_put = ent_put, + .cache_upcall = nametoid_upcall, .cache_request = nametoid_request, .cache_parse = nametoid_parse, .cache_show = nametoid_show, diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 65cfe9ab47be..e32ecedece0f 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -494,6 +494,8 @@ find_any_file(struct nfs4_file *f) { struct nfsd_file *ret; + if (!f) + return NULL; spin_lock(&f->fi_lock); ret = __nfs4_get_fd(f, O_RDWR); if (!ret) { @@ -1309,6 +1311,12 @@ static void nfs4_put_stateowner(struct nfs4_stateowner *sop) nfs4_free_stateowner(sop); } +static bool +nfs4_ol_stateid_unhashed(const struct nfs4_ol_stateid *stp) +{ + return list_empty(&stp->st_perfile); +} + static bool unhash_ol_stateid(struct nfs4_ol_stateid *stp) { struct nfs4_file *fp = stp->st_stid.sc_file; @@ -1379,9 +1387,11 @@ static bool unhash_lock_stateid(struct nfs4_ol_stateid *stp) { lockdep_assert_held(&stp->st_stid.sc_client->cl_lock); + if (!unhash_ol_stateid(stp)) + return false; list_del_init(&stp->st_locks); nfs4_unhash_stid(&stp->st_stid); - return unhash_ol_stateid(stp); + return true; } static void release_lock_stateid(struct nfs4_ol_stateid *stp) @@ -1446,13 +1456,12 @@ static void release_open_stateid_locks(struct nfs4_ol_stateid *open_stp, static bool unhash_open_stateid(struct nfs4_ol_stateid *stp, struct list_head *reaplist) { - bool unhashed; - lockdep_assert_held(&stp->st_stid.sc_client->cl_lock); - unhashed = unhash_ol_stateid(stp); + if (!unhash_ol_stateid(stp)) + return false; release_open_stateid_locks(stp, reaplist); - return unhashed; + return true; } static void release_open_stateid(struct nfs4_ol_stateid *stp) @@ -2636,7 +2645,7 @@ static const struct file_operations client_ctl_fops = { static const struct tree_descr client_files[] = { [0] = {"info", &client_info_fops, S_IRUSR}, [1] = {"states", &client_states_fops, S_IRUSR}, - [2] = {"ctl", &client_ctl_fops, S_IRUSR|S_IWUSR}, + [2] = {"ctl", &client_ctl_fops, S_IWUSR}, [3] = {""}, }; @@ -4343,7 +4352,8 @@ find_file_locked(struct knfsd_fh *fh, unsigned int hashval) { struct nfs4_file *fp; - hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash) { + hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash, + lockdep_is_held(&state_lock)) { if (fh_match(&fp->fi_fhandle, fh)) { if (refcount_inc_not_zero(&fp->fi_ref)) return fp; @@ -5521,15 +5531,8 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid) if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) || CLOSE_STATEID(stateid)) return status; - /* Client debugging aid. */ - if (!same_clid(&stateid->si_opaque.so_clid, &cl->cl_clientid)) { - char addr_str[INET6_ADDRSTRLEN]; - rpc_ntop((struct sockaddr *)&cl->cl_addr, addr_str, - sizeof(addr_str)); - pr_warn_ratelimited("NFSD: client %s testing state ID " - "with incorrect client ID\n", addr_str); + if (!same_clid(&stateid->si_opaque.so_clid, &cl->cl_clientid)) return status; - } spin_lock(&cl->cl_lock); s = find_stateid_locked(cl, stateid); if (!s) @@ -6393,21 +6396,21 @@ alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, } static struct nfs4_ol_stateid * -find_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp) +find_lock_stateid(const struct nfs4_lockowner *lo, + const struct nfs4_ol_stateid *ost) { struct nfs4_ol_stateid *lst; - struct nfs4_client *clp = lo->lo_owner.so_client; - lockdep_assert_held(&clp->cl_lock); + lockdep_assert_held(&ost->st_stid.sc_client->cl_lock); - list_for_each_entry(lst, &lo->lo_owner.so_stateids, st_perstateowner) { - if (lst->st_stid.sc_type != NFS4_LOCK_STID) - continue; - if (lst->st_stid.sc_file == fp) { - refcount_inc(&lst->st_stid.sc_count); - return lst; + /* If ost is not hashed, ost->st_locks will not be valid */ + if (!nfs4_ol_stateid_unhashed(ost)) + list_for_each_entry(lst, &ost->st_locks, st_locks) { + if (lst->st_stateowner == &lo->lo_owner) { + refcount_inc(&lst->st_stid.sc_count); + return lst; + } } - } return NULL; } @@ -6423,11 +6426,11 @@ init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo, mutex_lock_nested(&stp->st_mutex, OPEN_STATEID_MUTEX); retry: spin_lock(&clp->cl_lock); - spin_lock(&fp->fi_lock); - retstp = find_lock_stateid(lo, fp); + if (nfs4_ol_stateid_unhashed(open_stp)) + goto out_close; + retstp = find_lock_stateid(lo, open_stp); if (retstp) - goto out_unlock; - + goto out_found; refcount_inc(&stp->st_stid.sc_count); stp->st_stid.sc_type = NFS4_LOCK_STID; stp->st_stateowner = nfs4_get_stateowner(&lo->lo_owner); @@ -6436,22 +6439,26 @@ retry: stp->st_access_bmap = 0; stp->st_deny_bmap = open_stp->st_deny_bmap; stp->st_openstp = open_stp; + spin_lock(&fp->fi_lock); list_add(&stp->st_locks, &open_stp->st_locks); list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids); list_add(&stp->st_perfile, &fp->fi_stateids); -out_unlock: spin_unlock(&fp->fi_lock); spin_unlock(&clp->cl_lock); - if (retstp) { - if (nfsd4_lock_ol_stateid(retstp) != nfs_ok) { - nfs4_put_stid(&retstp->st_stid); - goto retry; - } - /* To keep mutex tracking happy */ - mutex_unlock(&stp->st_mutex); - stp = retstp; - } return stp; +out_found: + spin_unlock(&clp->cl_lock); + if (nfsd4_lock_ol_stateid(retstp) != nfs_ok) { + nfs4_put_stid(&retstp->st_stid); + goto retry; + } + /* To keep mutex tracking happy */ + mutex_unlock(&stp->st_mutex); + return retstp; +out_close: + spin_unlock(&clp->cl_lock); + mutex_unlock(&stp->st_mutex); + return NULL; } static struct nfs4_ol_stateid * @@ -6466,7 +6473,7 @@ find_or_create_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fi, *new = false; spin_lock(&clp->cl_lock); - lst = find_lock_stateid(lo, fi); + lst = find_lock_stateid(lo, ost); spin_unlock(&clp->cl_lock); if (lst != NULL) { if (nfsd4_lock_ol_stateid(lst) == nfs_ok) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 9761512674a0..996ac01ee977 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3591,23 +3591,22 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp, __be32 nfserr; __be32 tmp; __be32 *p; - u32 zzz = 0; int pad; + /* + * svcrdma requires every READ payload to start somewhere + * in xdr->pages. + */ + if (xdr->iov == xdr->buf->head) { + xdr->iov = NULL; + xdr->end = xdr->p; + } + len = maxcount; v = 0; - - thislen = min_t(long, len, ((void *)xdr->end - (void *)xdr->p)); - p = xdr_reserve_space(xdr, (thislen+3)&~3); - WARN_ON_ONCE(!p); - resp->rqstp->rq_vec[v].iov_base = p; - resp->rqstp->rq_vec[v].iov_len = thislen; - v++; - len -= thislen; - while (len) { thislen = min_t(long, len, PAGE_SIZE); - p = xdr_reserve_space(xdr, (thislen+3)&~3); + p = xdr_reserve_space(xdr, thislen); WARN_ON_ONCE(!p); resp->rqstp->rq_vec[v].iov_base = p; resp->rqstp->rq_vec[v].iov_len = thislen; @@ -3616,23 +3615,25 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp, } read->rd_vlen = v; - len = maxcount; nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset, resp->rqstp->rq_vec, read->rd_vlen, &maxcount, &eof); read->rd_length = maxcount; if (nfserr) return nfserr; - xdr_truncate_encode(xdr, starting_len + 8 + ((maxcount+3)&~3)); + if (svc_encode_read_payload(resp->rqstp, starting_len + 8, maxcount)) + return nfserr_io; + xdr_truncate_encode(xdr, starting_len + 8 + xdr_align_size(maxcount)); tmp = htonl(eof); write_bytes_to_xdr_buf(xdr->buf, starting_len , &tmp, 4); tmp = htonl(maxcount); write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4); + tmp = xdr_zero; pad = (maxcount&3) ? 4 - (maxcount&3) : 0; write_bytes_to_xdr_buf(xdr->buf, starting_len + 8 + maxcount, - &zzz, pad); + &tmp, pad); return 0; } @@ -4005,11 +4006,12 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, int major_id_sz; int server_scope_sz; uint64_t minor_id = 0; + struct nfsd_net *nn = net_generic(SVC_NET(resp->rqstp), nfsd_net_id); - major_id = utsname()->nodename; - major_id_sz = strlen(major_id); - server_scope = utsname()->nodename; - server_scope_sz = strlen(server_scope); + major_id = nn->nfsd_name; + major_id_sz = strlen(nn->nfsd_name); + server_scope = nn->nfsd_name; + server_scope_sz = strlen(nn->nfsd_name); p = xdr_reserve_space(xdr, 8 /* eir_clientid */ + diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index e109a1007704..3bb2db947d29 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1333,6 +1333,7 @@ void nfsd_client_rmdir(struct dentry *dentry) dget(dentry); ret = simple_rmdir(dir, dentry); WARN_ON_ONCE(ret); + fsnotify_rmdir(dir, dentry); d_delete(dentry); inode_unlock(dir); } diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index b319080288c3..37bc8f5f4514 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -14,6 +14,7 @@ #include "nfsd.h" #include "vfs.h" #include "auth.h" +#include "trace.h" #define NFSDDBG_FACILITY NFSDDBG_FH @@ -209,11 +210,14 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) } error = nfserr_stale; - if (PTR_ERR(exp) == -ENOENT) - return error; + if (IS_ERR(exp)) { + trace_nfsd_set_fh_dentry_badexport(rqstp, fhp, PTR_ERR(exp)); + + if (PTR_ERR(exp) == -ENOENT) + return error; - if (IS_ERR(exp)) return nfserrno(PTR_ERR(exp)); + } if (exp->ex_flags & NFSEXP_NOSUBTREECHECK) { /* Elevate privileges so that the lack of 'r' or 'x' @@ -267,6 +271,9 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) dentry = exportfs_decode_fh(exp->ex_path.mnt, fid, data_left, fileid_type, nfsd_acceptable, exp); + if (IS_ERR_OR_NULL(dentry)) + trace_nfsd_set_fh_dentry_badhandle(rqstp, fhp, + dentry ? PTR_ERR(dentry) : -ESTALE); } if (dentry == NULL) goto out; diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 3b77b904212d..ca9fd348548b 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -749,6 +749,9 @@ nfsd_svc(int nrservs, struct net *net, const struct cred *cred) if (nrservs == 0 && nn->nfsd_serv == NULL) goto out; + strlcpy(nn->nfsd_name, utsname()->nodename, + sizeof(nn->nfsd_name)); + error = nfsd_create_serv(net); if (error) goto out; diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 06dd0d337049..78c574251c60 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -9,6 +9,7 @@ #define _NFSD_TRACE_H #include <linux/tracepoint.h> +#include "export.h" #include "nfsfh.h" TRACE_EVENT(nfsd_compound, @@ -50,6 +51,127 @@ TRACE_EVENT(nfsd_compound_status, __get_str(name), __entry->status) ) +DECLARE_EVENT_CLASS(nfsd_fh_err_class, + TP_PROTO(struct svc_rqst *rqstp, + struct svc_fh *fhp, + int status), + TP_ARGS(rqstp, fhp, status), + TP_STRUCT__entry( + __field(u32, xid) + __field(u32, fh_hash) + __field(int, status) + ), + TP_fast_assign( + __entry->xid = be32_to_cpu(rqstp->rq_xid); + __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle); + __entry->status = status; + ), + TP_printk("xid=0x%08x fh_hash=0x%08x status=%d", + __entry->xid, __entry->fh_hash, + __entry->status) +) + +#define DEFINE_NFSD_FH_ERR_EVENT(name) \ +DEFINE_EVENT(nfsd_fh_err_class, nfsd_##name, \ + TP_PROTO(struct svc_rqst *rqstp, \ + struct svc_fh *fhp, \ + int status), \ + TP_ARGS(rqstp, fhp, status)) + +DEFINE_NFSD_FH_ERR_EVENT(set_fh_dentry_badexport); +DEFINE_NFSD_FH_ERR_EVENT(set_fh_dentry_badhandle); + +TRACE_EVENT(nfsd_exp_find_key, + TP_PROTO(const struct svc_expkey *key, + int status), + TP_ARGS(key, status), + TP_STRUCT__entry( + __field(int, fsidtype) + __array(u32, fsid, 6) + __string(auth_domain, key->ek_client->name) + __field(int, status) + ), + TP_fast_assign( + __entry->fsidtype = key->ek_fsidtype; + memcpy(__entry->fsid, key->ek_fsid, 4*6); + __assign_str(auth_domain, key->ek_client->name); + __entry->status = status; + ), + TP_printk("fsid=%x::%s domain=%s status=%d", + __entry->fsidtype, + __print_array(__entry->fsid, 6, 4), + __get_str(auth_domain), + __entry->status + ) +); + +TRACE_EVENT(nfsd_expkey_update, + TP_PROTO(const struct svc_expkey *key, const char *exp_path), + TP_ARGS(key, exp_path), + TP_STRUCT__entry( + __field(int, fsidtype) + __array(u32, fsid, 6) + __string(auth_domain, key->ek_client->name) + __string(path, exp_path) + __field(bool, cache) + ), + TP_fast_assign( + __entry->fsidtype = key->ek_fsidtype; + memcpy(__entry->fsid, key->ek_fsid, 4*6); + __assign_str(auth_domain, key->ek_client->name); + __assign_str(path, exp_path); + __entry->cache = !test_bit(CACHE_NEGATIVE, &key->h.flags); + ), + TP_printk("fsid=%x::%s domain=%s path=%s cache=%s", + __entry->fsidtype, + __print_array(__entry->fsid, 6, 4), + __get_str(auth_domain), + __get_str(path), + __entry->cache ? "pos" : "neg" + ) +); + +TRACE_EVENT(nfsd_exp_get_by_name, + TP_PROTO(const struct svc_export *key, + int status), + TP_ARGS(key, status), + TP_STRUCT__entry( + __string(path, key->ex_path.dentry->d_name.name) + __string(auth_domain, key->ex_client->name) + __field(int, status) + ), + TP_fast_assign( + __assign_str(path, key->ex_path.dentry->d_name.name); + __assign_str(auth_domain, key->ex_client->name); + __entry->status = status; + ), + TP_printk("path=%s domain=%s status=%d", + __get_str(path), + __get_str(auth_domain), + __entry->status + ) +); + +TRACE_EVENT(nfsd_export_update, + TP_PROTO(const struct svc_export *key), + TP_ARGS(key), + TP_STRUCT__entry( + __string(path, key->ex_path.dentry->d_name.name) + __string(auth_domain, key->ex_client->name) + __field(bool, cache) + ), + TP_fast_assign( + __assign_str(path, key->ex_path.dentry->d_name.name); + __assign_str(auth_domain, key->ex_client->name); + __entry->cache = !test_bit(CACHE_NEGATIVE, &key->h.flags); + ), + TP_printk("path=%s domain=%s cache=%s", + __get_str(path), + __get_str(auth_domain), + __entry->cache ? "pos" : "neg" + ) +); + DECLARE_EVENT_CLASS(nfsd_io_class, TP_PROTO(struct svc_rqst *rqstp, struct svc_fh *fhp, diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 88534eb0e7c2..65b3abbcce4e 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -1060,7 +1060,6 @@ bail: brelse(bhs[i]); bhs[i] = NULL; } - mlog_errno(status); } return status; } @@ -3942,7 +3941,7 @@ rotate: * above. * * This leaf needs to have space, either by the empty 1st - * extent record, or by virtue of an l_next_rec < l_count. + * extent record, or by virtue of an l_next_free_rec < l_count. */ ocfs2_rotate_leaf(el, insert_rec); } diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index a368350d4c27..89d13e0705fe 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -101,8 +101,6 @@ static struct o2hb_callback { static struct o2hb_callback *hbcall_from_type(enum o2hb_callback_type type); -#define O2HB_DEFAULT_BLOCK_BITS 9 - enum o2hb_heartbeat_modes { O2HB_HEARTBEAT_LOCAL = 0, O2HB_HEARTBEAT_GLOBAL, @@ -1309,7 +1307,7 @@ static int o2hb_debug_open(struct inode *inode, struct file *file) case O2HB_DB_TYPE_REGION_NUMBER: reg = (struct o2hb_region *)db->db_data; - out += snprintf(buf + out, PAGE_SIZE - out, "%d\n", + out += scnprintf(buf + out, PAGE_SIZE - out, "%d\n", reg->hr_region_num); goto done; @@ -1319,12 +1317,12 @@ static int o2hb_debug_open(struct inode *inode, struct file *file) /* If 0, it has never been set before */ if (lts) lts = jiffies_to_msecs(jiffies - lts); - out += snprintf(buf + out, PAGE_SIZE - out, "%lu\n", lts); + out += scnprintf(buf + out, PAGE_SIZE - out, "%lu\n", lts); goto done; case O2HB_DB_TYPE_REGION_PINNED: reg = (struct o2hb_region *)db->db_data; - out += snprintf(buf + out, PAGE_SIZE - out, "%u\n", + out += scnprintf(buf + out, PAGE_SIZE - out, "%u\n", !!reg->hr_item_pinned); goto done; @@ -1333,8 +1331,8 @@ static int o2hb_debug_open(struct inode *inode, struct file *file) } while ((i = find_next_bit(map, db->db_len, i + 1)) < db->db_len) - out += snprintf(buf + out, PAGE_SIZE - out, "%d ", i); - out += snprintf(buf + out, PAGE_SIZE - out, "\n"); + out += scnprintf(buf + out, PAGE_SIZE - out, "%d ", i); + out += scnprintf(buf + out, PAGE_SIZE - out, "\n"); done: i_size_write(inode, out); diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c index 02bf4a1774cc..667a5c5e1f66 100644 --- a/fs/ocfs2/cluster/netdebug.c +++ b/fs/ocfs2/cluster/netdebug.c @@ -443,8 +443,8 @@ static int o2net_fill_bitmap(char *buf, int len) o2net_fill_node_map(map, sizeof(map)); while ((i = find_next_bit(map, O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) - out += snprintf(buf + out, PAGE_SIZE - out, "%d ", i); - out += snprintf(buf + out, PAGE_SIZE - out, "\n"); + out += scnprintf(buf + out, PAGE_SIZE - out, "%d ", i); + out += scnprintf(buf + out, PAGE_SIZE - out, "\n"); return out; } diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index 48a3398f0bf5..2c512b40a940 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -1570,15 +1570,13 @@ static void o2net_start_connect(struct work_struct *work) struct sockaddr_in myaddr = {0, }, remoteaddr = {0, }; int ret = 0, stop; unsigned int timeout; - unsigned int noio_flag; + unsigned int nofs_flag; /* - * sock_create allocates the sock with GFP_KERNEL. We must set - * per-process flag PF_MEMALLOC_NOIO so that all allocations done - * by this process are done as if GFP_NOIO was specified. So we - * are not reentering filesystem while doing memory reclaim. + * sock_create allocates the sock with GFP_KERNEL. We must + * prevent the filesystem from being reentered by memory reclaim. */ - noio_flag = memalloc_noio_save(); + nofs_flag = memalloc_nofs_save(); /* if we're greater we initiate tx, otherwise we accept */ if (o2nm_this_node() <= o2net_num_from_nn(nn)) goto out; @@ -1683,7 +1681,7 @@ out: if (mynode) o2nm_node_put(mynode); - memalloc_noio_restore(noio_flag); + memalloc_nofs_restore(nofs_flag); return; } @@ -1810,15 +1808,13 @@ static int o2net_accept_one(struct socket *sock, int *more) struct o2nm_node *local_node = NULL; struct o2net_sock_container *sc = NULL; struct o2net_node *nn; - unsigned int noio_flag; + unsigned int nofs_flag; /* - * sock_create_lite allocates the sock with GFP_KERNEL. We must set - * per-process flag PF_MEMALLOC_NOIO so that all allocations done - * by this process are done as if GFP_NOIO was specified. So we - * are not reentering filesystem while doing memory reclaim. + * sock_create_lite allocates the sock with GFP_KERNEL. We must + * prevent the filesystem from being reentered by memory reclaim. */ - noio_flag = memalloc_noio_save(); + nofs_flag = memalloc_nofs_save(); BUG_ON(sock == NULL); *more = 0; @@ -1934,7 +1930,7 @@ out: if (sc) sc_put(sc); - memalloc_noio_restore(noio_flag); + memalloc_nofs_restore(nofs_flag); return ret; } @@ -1948,7 +1944,6 @@ static void o2net_accept_many(struct work_struct *work) { struct socket *sock = o2net_listen_sock; int more; - int err; /* * It is critical to note that due to interrupt moderation @@ -1963,7 +1958,7 @@ static void o2net_accept_many(struct work_struct *work) */ for (;;) { - err = o2net_accept_one(sock, &more); + o2net_accept_one(sock, &more); if (!more) break; cond_resched(); diff --git a/fs/ocfs2/cluster/tcp.h b/fs/ocfs2/cluster/tcp.h index de87cbffd175..736338f45c59 100644 --- a/fs/ocfs2/cluster/tcp.h +++ b/fs/ocfs2/cluster/tcp.h @@ -32,7 +32,7 @@ struct o2net_msg __be32 status; __be32 key; __be32 msg_num; - __u8 buf[0]; + __u8 buf[]; }; typedef int (o2net_msg_handler_func)(struct o2net_msg *msg, u32 len, void *data, diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index bdef72c0f099..5761060d2ba8 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -676,7 +676,7 @@ static struct buffer_head *ocfs2_find_entry_el(const char *name, int namelen, int ra_ptr = 0; /* Current index into readahead buffer */ int num = 0; - int nblocks, i, err; + int nblocks, i; sb = dir->i_sb; @@ -708,7 +708,7 @@ restart: num++; bh = NULL; - err = ocfs2_read_dir_block(dir, b++, &bh, + ocfs2_read_dir_block(dir, b++, &bh, OCFS2_BH_READAHEAD); bh_use[ra_max] = bh; } diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index 0463dce65bb2..c8a444622faa 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h @@ -564,7 +564,7 @@ struct dlm_migratable_lockres // 48 bytes u8 lvb[DLM_LVB_LEN]; // 112 bytes - struct dlm_migratable_lock ml[0]; // 16 bytes each, begins at byte 112 + struct dlm_migratable_lock ml[]; // 16 bytes each, begins at byte 112 }; #define DLM_MIG_LOCKRES_MAX_LEN \ (sizeof(struct dlm_migratable_lockres) + \ @@ -601,7 +601,7 @@ struct dlm_convert_lock u8 name[O2NM_MAX_NAME_LEN]; - s8 lvb[0]; + s8 lvb[]; }; #define DLM_CONVERT_LOCK_MAX_LEN (sizeof(struct dlm_convert_lock)+DLM_LVB_LEN) @@ -616,7 +616,7 @@ struct dlm_unlock_lock u8 name[O2NM_MAX_NAME_LEN]; - s8 lvb[0]; + s8 lvb[]; }; #define DLM_UNLOCK_LOCK_MAX_LEN (sizeof(struct dlm_unlock_lock)+DLM_LVB_LEN) @@ -632,7 +632,7 @@ struct dlm_proxy_ast u8 name[O2NM_MAX_NAME_LEN]; - s8 lvb[0]; + s8 lvb[]; }; #define DLM_PROXY_AST_MAX_LEN (sizeof(struct dlm_proxy_ast)+DLM_LVB_LEN) diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index c5c6efba7b5e..4b8b41d23e91 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c @@ -244,11 +244,11 @@ static int stringify_lockname(const char *lockname, int locklen, char *buf, memcpy((__be64 *)&inode_blkno_be, (char *)&lockname[OCFS2_DENTRY_LOCK_INO_START], sizeof(__be64)); - out += snprintf(buf + out, len - out, "%.*s%08x", + out += scnprintf(buf + out, len - out, "%.*s%08x", OCFS2_DENTRY_LOCK_INO_START - 1, lockname, (unsigned int)be64_to_cpu(inode_blkno_be)); } else - out += snprintf(buf + out, len - out, "%.*s", + out += scnprintf(buf + out, len - out, "%.*s", locklen, lockname); return out; } @@ -260,7 +260,7 @@ static int stringify_nodemap(unsigned long *nodemap, int maxnodes, int i = -1; while ((i = find_next_bit(nodemap, maxnodes, i + 1)) < maxnodes) - out += snprintf(buf + out, len - out, "%d ", i); + out += scnprintf(buf + out, len - out, "%d ", i); return out; } @@ -278,34 +278,34 @@ static int dump_mle(struct dlm_master_list_entry *mle, char *buf, int len) mle_type = "MIG"; out += stringify_lockname(mle->mname, mle->mnamelen, buf + out, len - out); - out += snprintf(buf + out, len - out, + out += scnprintf(buf + out, len - out, "\t%3s\tmas=%3u\tnew=%3u\tevt=%1d\tuse=%1d\tref=%3d\n", mle_type, mle->master, mle->new_master, !list_empty(&mle->hb_events), !!mle->inuse, kref_read(&mle->mle_refs)); - out += snprintf(buf + out, len - out, "Maybe="); + out += scnprintf(buf + out, len - out, "Maybe="); out += stringify_nodemap(mle->maybe_map, O2NM_MAX_NODES, buf + out, len - out); - out += snprintf(buf + out, len - out, "\n"); + out += scnprintf(buf + out, len - out, "\n"); - out += snprintf(buf + out, len - out, "Vote="); + out += scnprintf(buf + out, len - out, "Vote="); out += stringify_nodemap(mle->vote_map, O2NM_MAX_NODES, buf + out, len - out); - out += snprintf(buf + out, len - out, "\n"); + out += scnprintf(buf + out, len - out, "\n"); - out += snprintf(buf + out, len - out, "Response="); + out += scnprintf(buf + out, len - out, "Response="); out += stringify_nodemap(mle->response_map, O2NM_MAX_NODES, buf + out, len - out); - out += snprintf(buf + out, len - out, "\n"); + out += scnprintf(buf + out, len - out, "\n"); - out += snprintf(buf + out, len - out, "Node="); + out += scnprintf(buf + out, len - out, "Node="); out += stringify_nodemap(mle->node_map, O2NM_MAX_NODES, buf + out, len - out); - out += snprintf(buf + out, len - out, "\n"); + out += scnprintf(buf + out, len - out, "\n"); - out += snprintf(buf + out, len - out, "\n"); + out += scnprintf(buf + out, len - out, "\n"); return out; } @@ -353,7 +353,7 @@ static int debug_purgelist_print(struct dlm_ctxt *dlm, char *buf, int len) int out = 0; unsigned long total = 0; - out += snprintf(buf + out, len - out, + out += scnprintf(buf + out, len - out, "Dumping Purgelist for Domain: %s\n", dlm->name); spin_lock(&dlm->spinlock); @@ -365,13 +365,13 @@ static int debug_purgelist_print(struct dlm_ctxt *dlm, char *buf, int len) out += stringify_lockname(res->lockname.name, res->lockname.len, buf + out, len - out); - out += snprintf(buf + out, len - out, "\t%ld\n", + out += scnprintf(buf + out, len - out, "\t%ld\n", (jiffies - res->last_used)/HZ); spin_unlock(&res->spinlock); } spin_unlock(&dlm->spinlock); - out += snprintf(buf + out, len - out, "Total on list: %lu\n", total); + out += scnprintf(buf + out, len - out, "Total on list: %lu\n", total); return out; } @@ -410,7 +410,7 @@ static int debug_mle_print(struct dlm_ctxt *dlm, char *buf, int len) int i, out = 0; unsigned long total = 0, longest = 0, bucket_count = 0; - out += snprintf(buf + out, len - out, + out += scnprintf(buf + out, len - out, "Dumping MLEs for Domain: %s\n", dlm->name); spin_lock(&dlm->master_lock); @@ -428,7 +428,7 @@ static int debug_mle_print(struct dlm_ctxt *dlm, char *buf, int len) } spin_unlock(&dlm->master_lock); - out += snprintf(buf + out, len - out, + out += scnprintf(buf + out, len - out, "Total: %lu, Longest: %lu\n", total, longest); return out; } @@ -467,7 +467,7 @@ static int dump_lock(struct dlm_lock *lock, int list_type, char *buf, int len) #define DEBUG_LOCK_VERSION 1 spin_lock(&lock->spinlock); - out = snprintf(buf, len, "LOCK:%d,%d,%d,%d,%d,%d:%lld,%d,%d,%d,%d,%d," + out = scnprintf(buf, len, "LOCK:%d,%d,%d,%d,%d,%d:%lld,%d,%d,%d,%d,%d," "%d,%d,%d,%d\n", DEBUG_LOCK_VERSION, list_type, lock->ml.type, lock->ml.convert_type, @@ -491,13 +491,13 @@ static int dump_lockres(struct dlm_lock_resource *res, char *buf, int len) int i; int out = 0; - out += snprintf(buf + out, len - out, "NAME:"); + out += scnprintf(buf + out, len - out, "NAME:"); out += stringify_lockname(res->lockname.name, res->lockname.len, buf + out, len - out); - out += snprintf(buf + out, len - out, "\n"); + out += scnprintf(buf + out, len - out, "\n"); #define DEBUG_LRES_VERSION 1 - out += snprintf(buf + out, len - out, + out += scnprintf(buf + out, len - out, "LRES:%d,%d,%d,%ld,%d,%d,%d,%d,%d,%d,%d\n", DEBUG_LRES_VERSION, res->owner, res->state, res->last_used, @@ -509,17 +509,17 @@ static int dump_lockres(struct dlm_lock_resource *res, char *buf, int len) kref_read(&res->refs)); /* refmap */ - out += snprintf(buf + out, len - out, "RMAP:"); + out += scnprintf(buf + out, len - out, "RMAP:"); out += stringify_nodemap(res->refmap, O2NM_MAX_NODES, buf + out, len - out); - out += snprintf(buf + out, len - out, "\n"); + out += scnprintf(buf + out, len - out, "\n"); /* lvb */ - out += snprintf(buf + out, len - out, "LVBX:"); + out += scnprintf(buf + out, len - out, "LVBX:"); for (i = 0; i < DLM_LVB_LEN; i++) - out += snprintf(buf + out, len - out, + out += scnprintf(buf + out, len - out, "%02x", (unsigned char)res->lvb[i]); - out += snprintf(buf + out, len - out, "\n"); + out += scnprintf(buf + out, len - out, "\n"); /* granted */ list_for_each_entry(lock, &res->granted, list) @@ -533,7 +533,7 @@ static int dump_lockres(struct dlm_lock_resource *res, char *buf, int len) list_for_each_entry(lock, &res->blocked, list) out += dump_lock(lock, 2, buf + out, len - out); - out += snprintf(buf + out, len - out, "\n"); + out += scnprintf(buf + out, len - out, "\n"); return out; } @@ -683,41 +683,41 @@ static int debug_state_print(struct dlm_ctxt *dlm, char *buf, int len) } /* Domain: xxxxxxxxxx Key: 0xdfbac769 */ - out += snprintf(buf + out, len - out, + out += scnprintf(buf + out, len - out, "Domain: %s Key: 0x%08x Protocol: %d.%d\n", dlm->name, dlm->key, dlm->dlm_locking_proto.pv_major, dlm->dlm_locking_proto.pv_minor); /* Thread Pid: xxx Node: xxx State: xxxxx */ - out += snprintf(buf + out, len - out, + out += scnprintf(buf + out, len - out, "Thread Pid: %d Node: %d State: %s\n", task_pid_nr(dlm->dlm_thread_task), dlm->node_num, state); /* Number of Joins: xxx Joining Node: xxx */ - out += snprintf(buf + out, len - out, + out += scnprintf(buf + out, len - out, "Number of Joins: %d Joining Node: %d\n", dlm->num_joins, dlm->joining_node); /* Domain Map: xx xx xx */ - out += snprintf(buf + out, len - out, "Domain Map: "); + out += scnprintf(buf + out, len - out, "Domain Map: "); out += stringify_nodemap(dlm->domain_map, O2NM_MAX_NODES, buf + out, len - out); - out += snprintf(buf + out, len - out, "\n"); + out += scnprintf(buf + out, len - out, "\n"); /* Exit Domain Map: xx xx xx */ - out += snprintf(buf + out, len - out, "Exit Domain Map: "); + out += scnprintf(buf + out, len - out, "Exit Domain Map: "); out += stringify_nodemap(dlm->exit_domain_map, O2NM_MAX_NODES, buf + out, len - out); - out += snprintf(buf + out, len - out, "\n"); + out += scnprintf(buf + out, len - out, "\n"); /* Live Map: xx xx xx */ - out += snprintf(buf + out, len - out, "Live Map: "); + out += scnprintf(buf + out, len - out, "Live Map: "); out += stringify_nodemap(dlm->live_nodes_map, O2NM_MAX_NODES, buf + out, len - out); - out += snprintf(buf + out, len - out, "\n"); + out += scnprintf(buf + out, len - out, "\n"); /* Lock Resources: xxx (xxx) */ - out += snprintf(buf + out, len - out, + out += scnprintf(buf + out, len - out, "Lock Resources: %d (%d)\n", atomic_read(&dlm->res_cur_count), atomic_read(&dlm->res_tot_count)); @@ -729,29 +729,29 @@ static int debug_state_print(struct dlm_ctxt *dlm, char *buf, int len) cur_mles += atomic_read(&dlm->mle_cur_count[i]); /* MLEs: xxx (xxx) */ - out += snprintf(buf + out, len - out, + out += scnprintf(buf + out, len - out, "MLEs: %d (%d)\n", cur_mles, tot_mles); /* Blocking: xxx (xxx) */ - out += snprintf(buf + out, len - out, + out += scnprintf(buf + out, len - out, " Blocking: %d (%d)\n", atomic_read(&dlm->mle_cur_count[DLM_MLE_BLOCK]), atomic_read(&dlm->mle_tot_count[DLM_MLE_BLOCK])); /* Mastery: xxx (xxx) */ - out += snprintf(buf + out, len - out, + out += scnprintf(buf + out, len - out, " Mastery: %d (%d)\n", atomic_read(&dlm->mle_cur_count[DLM_MLE_MASTER]), atomic_read(&dlm->mle_tot_count[DLM_MLE_MASTER])); /* Migration: xxx (xxx) */ - out += snprintf(buf + out, len - out, + out += scnprintf(buf + out, len - out, " Migration: %d (%d)\n", atomic_read(&dlm->mle_cur_count[DLM_MLE_MIGRATION]), atomic_read(&dlm->mle_tot_count[DLM_MLE_MIGRATION])); /* Lists: Dirty=Empty Purge=InUse PendingASTs=Empty ... */ - out += snprintf(buf + out, len - out, + out += scnprintf(buf + out, len - out, "Lists: Dirty=%s Purge=%s PendingASTs=%s " "PendingBASTs=%s\n", (list_empty(&dlm->dirty_list) ? "Empty" : "InUse"), @@ -760,12 +760,12 @@ static int debug_state_print(struct dlm_ctxt *dlm, char *buf, int len) (list_empty(&dlm->pending_basts) ? "Empty" : "InUse")); /* Purge Count: xxx Refs: xxx */ - out += snprintf(buf + out, len - out, + out += scnprintf(buf + out, len - out, "Purge Count: %d Refs: %d\n", dlm->purge_count, kref_read(&dlm->dlm_refs)); /* Dead Node: xxx */ - out += snprintf(buf + out, len - out, + out += scnprintf(buf + out, len - out, "Dead Node: %d\n", dlm->reco.dead_node); /* What about DLM_RECO_STATE_FINALIZE? */ @@ -775,19 +775,19 @@ static int debug_state_print(struct dlm_ctxt *dlm, char *buf, int len) state = "INACTIVE"; /* Recovery Pid: xxxx Master: xxx State: xxxx */ - out += snprintf(buf + out, len - out, + out += scnprintf(buf + out, len - out, "Recovery Pid: %d Master: %d State: %s\n", task_pid_nr(dlm->dlm_reco_thread_task), dlm->reco.new_master, state); /* Recovery Map: xx xx */ - out += snprintf(buf + out, len - out, "Recovery Map: "); + out += scnprintf(buf + out, len - out, "Recovery Map: "); out += stringify_nodemap(dlm->recovery_map, O2NM_MAX_NODES, buf + out, len - out); - out += snprintf(buf + out, len - out, "\n"); + out += scnprintf(buf + out, len - out, "\n"); /* Recovery Node State: */ - out += snprintf(buf + out, len - out, "Recovery Node State:\n"); + out += scnprintf(buf + out, len - out, "Recovery Node State:\n"); list_for_each_entry(node, &dlm->reco.node_data, list) { switch (node->state) { case DLM_RECO_NODE_DATA_INIT: @@ -815,7 +815,7 @@ static int debug_state_print(struct dlm_ctxt *dlm, char *buf, int len) state = "BAD"; break; } - out += snprintf(buf + out, len - out, "\t%u - %s\n", + out += scnprintf(buf + out, len - out, "\t%u - %s\n", node->node_num, state); } diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 900f7e466d11..55a6512e9fde 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -2749,8 +2749,6 @@ leave: return ret; } -#define DLM_MIGRATION_RETRY_MS 100 - /* * Should be called only after beginning the domain leave process. * There should not be any remaining locks on nonlocal lock resources, diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c index fd40c17cd022..5ccc4ff0b82a 100644 --- a/fs/ocfs2/dlm/dlmthread.c +++ b/fs/ocfs2/dlm/dlmthread.c @@ -39,8 +39,6 @@ static int dlm_thread(void *data); static void dlm_flush_asts(struct dlm_ctxt *dlm); -#define dlm_lock_is_remote(dlm, lock) ((lock)->ml.node != (dlm)->node_num) - /* will exit holding res->spinlock, but may drop in function */ /* waits until flags are cleared on res->state */ void __dlm_wait_on_lockres_flags(struct dlm_lock_resource *res, int flags) @@ -680,7 +678,6 @@ static void dlm_flush_asts(struct dlm_ctxt *dlm) #define DLM_THREAD_TIMEOUT_MS (4 * 1000) #define DLM_THREAD_MAX_DIRTY 100 -#define DLM_THREAD_MAX_ASTS 10 static int dlm_thread(void *data) { diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index cb9e6a73bea9..152a0fc4e905 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -2133,7 +2133,7 @@ static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, } #define OCFS2_SEC_BITS 34 -#define OCFS2_SEC_SHIFT (64 - 34) +#define OCFS2_SEC_SHIFT (64 - OCFS2_SEC_BITS) #define OCFS2_NSEC_MASK ((1ULL << OCFS2_SEC_SHIFT) - 1) /* LVB only has room for 64 bits of time here so we pack it for diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 68ba354cf361..b425f0b01dce 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -91,7 +91,7 @@ enum ocfs2_replay_state { struct ocfs2_replay_map { unsigned int rm_slots; enum ocfs2_replay_state rm_state; - unsigned char rm_replay_slots[0]; + unsigned char rm_replay_slots[]; }; static void ocfs2_replay_map_set_state(struct ocfs2_super *osb, int state) diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index da65251ef815..5381020aaa9a 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -406,7 +406,7 @@ static int ocfs2_mknod(struct inode *dir, if (status < 0) { mlog_errno(status); - goto leave; + goto roll_back; } if (si.enable) { @@ -414,7 +414,7 @@ static int ocfs2_mknod(struct inode *dir, meta_ac, data_ac); if (status < 0) { mlog_errno(status); - goto leave; + goto roll_back; } } @@ -427,7 +427,7 @@ static int ocfs2_mknod(struct inode *dir, OCFS2_I(dir)->ip_blkno); if (status) { mlog_errno(status); - goto leave; + goto roll_back; } dl = dentry->d_fsdata; @@ -437,12 +437,19 @@ static int ocfs2_mknod(struct inode *dir, &lookup); if (status < 0) { mlog_errno(status); - goto leave; + goto roll_back; } insert_inode_hash(inode); d_instantiate(dentry, inode); status = 0; + +roll_back: + if (status < 0 && S_ISDIR(mode)) { + ocfs2_add_links_count(dirfe, -1); + drop_nlink(dir); + } + leave: if (status < 0 && did_quota_inode) dquot_free_inode(inode); diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index 0db4a7ec58a2..0dd8c41bafd4 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h @@ -470,7 +470,7 @@ struct ocfs2_extent_list { __le16 l_reserved1; __le64 l_reserved2; /* Pad to sizeof(ocfs2_extent_rec) */ -/*10*/ struct ocfs2_extent_rec l_recs[0]; /* Extent records */ +/*10*/ struct ocfs2_extent_rec l_recs[]; /* Extent records */ }; /* @@ -484,7 +484,7 @@ struct ocfs2_chain_list { __le16 cl_count; /* Total chains in this list */ __le16 cl_next_free_rec; /* Next unused chain slot */ __le64 cl_reserved1; -/*10*/ struct ocfs2_chain_rec cl_recs[0]; /* Chain records */ +/*10*/ struct ocfs2_chain_rec cl_recs[]; /* Chain records */ }; /* @@ -496,7 +496,7 @@ struct ocfs2_truncate_log { /*00*/ __le16 tl_count; /* Total records in this log */ __le16 tl_used; /* Number of records in use */ __le32 tl_reserved1; -/*08*/ struct ocfs2_truncate_rec tl_recs[0]; /* Truncate records */ +/*08*/ struct ocfs2_truncate_rec tl_recs[]; /* Truncate records */ }; /* @@ -640,7 +640,7 @@ struct ocfs2_local_alloc __le16 la_size; /* Size of included bitmap, in bytes */ __le16 la_reserved1; __le64 la_reserved2; -/*10*/ __u8 la_bitmap[0]; +/*10*/ __u8 la_bitmap[]; }; /* @@ -653,7 +653,7 @@ struct ocfs2_inline_data * for data, starting at id_data */ __le16 id_reserved0; __le32 id_reserved1; - __u8 id_data[0]; /* Start of user data */ + __u8 id_data[]; /* Start of user data */ }; /* @@ -798,7 +798,7 @@ struct ocfs2_dx_entry_list { * possible in de_entries */ __le16 de_num_used; /* Current number of * de_entries entries */ - struct ocfs2_dx_entry de_entries[0]; /* Indexed dir entries + struct ocfs2_dx_entry de_entries[]; /* Indexed dir entries * in a packed array of * length de_num_used */ }; @@ -935,7 +935,7 @@ struct ocfs2_refcount_list { __le16 rl_used; /* Current number of used records */ __le32 rl_reserved2; __le64 rl_reserved1; /* Pad to sizeof(ocfs2_refcount_record) */ -/*10*/ struct ocfs2_refcount_rec rl_recs[0]; /* Refcount records */ +/*10*/ struct ocfs2_refcount_rec rl_recs[]; /* Refcount records */ }; @@ -1021,7 +1021,7 @@ struct ocfs2_xattr_header { buckets. A block uses xb_check and sets this field to zero.) */ - struct ocfs2_xattr_entry xh_entries[0]; /* xattr entry list. */ + struct ocfs2_xattr_entry xh_entries[]; /* xattr entry list. */ }; /* @@ -1207,7 +1207,7 @@ struct ocfs2_local_disk_dqinfo { /* Header of one chunk of a quota file */ struct ocfs2_local_disk_chunk { __le32 dqc_free; /* Number of free entries in the bitmap */ - __u8 dqc_bitmap[0]; /* Bitmap of entries in the corresponding + __u8 dqc_bitmap[]; /* Bitmap of entries in the corresponding * chunk of quota file */ }; diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index ee43e51188be..cfb77f70c888 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -154,6 +154,7 @@ ocfs2_refcount_cache_get_super(struct ocfs2_caching_info *ci) } static void ocfs2_refcount_cache_lock(struct ocfs2_caching_info *ci) +__acquires(&rf->rf_lock) { struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci); @@ -161,6 +162,7 @@ static void ocfs2_refcount_cache_lock(struct ocfs2_caching_info *ci) } static void ocfs2_refcount_cache_unlock(struct ocfs2_caching_info *ci) +__releases(&rf->rf_lock) { struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci); diff --git a/fs/ocfs2/reservations.c b/fs/ocfs2/reservations.c index 0249e8ca1028..bf3842e34fb9 100644 --- a/fs/ocfs2/reservations.c +++ b/fs/ocfs2/reservations.c @@ -33,9 +33,6 @@ static DEFINE_SPINLOCK(resv_lock); -#define OCFS2_MIN_RESV_WINDOW_BITS 8 -#define OCFS2_MAX_RESV_WINDOW_BITS 1024 - int ocfs2_dir_resv_allowed(struct ocfs2_super *osb) { return (osb->osb_resv_level && osb->osb_dir_resv_level); diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index 8aa6a667860c..a191094694c6 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c @@ -656,8 +656,6 @@ error: * and easier to preserve the name. */ -#define FS_OCFS2_NM 1 - static struct ctl_table ocfs2_nm_table[] = { { .procname = "hb_ctl_path", diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 939df99d2dec..4836becb7578 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -2509,9 +2509,6 @@ static int _ocfs2_free_suballoc_bits(handle_t *handle, bail: brelse(group_bh); - - if (status) - mlog_errno(status); return status; } @@ -2582,8 +2579,6 @@ static int _ocfs2_free_clusters(handle_t *handle, num_clusters); out: - if (status) - mlog_errno(status); return status; } diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 05dd68ade293..ac61eeaf3837 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -220,31 +220,31 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len) int i, out = 0; unsigned long flags; - out += snprintf(buf + out, len - out, + out += scnprintf(buf + out, len - out, "%10s => Id: %-s Uuid: %-s Gen: 0x%X Label: %-s\n", "Device", osb->dev_str, osb->uuid_str, osb->fs_generation, osb->vol_label); - out += snprintf(buf + out, len - out, + out += scnprintf(buf + out, len - out, "%10s => State: %d Flags: 0x%lX\n", "Volume", atomic_read(&osb->vol_state), osb->osb_flags); - out += snprintf(buf + out, len - out, + out += scnprintf(buf + out, len - out, "%10s => Block: %lu Cluster: %d\n", "Sizes", osb->sb->s_blocksize, osb->s_clustersize); - out += snprintf(buf + out, len - out, + out += scnprintf(buf + out, len - out, "%10s => Compat: 0x%X Incompat: 0x%X " "ROcompat: 0x%X\n", "Features", osb->s_feature_compat, osb->s_feature_incompat, osb->s_feature_ro_compat); - out += snprintf(buf + out, len - out, + out += scnprintf(buf + out, len - out, "%10s => Opts: 0x%lX AtimeQuanta: %u\n", "Mount", osb->s_mount_opt, osb->s_atime_quantum); if (cconn) { - out += snprintf(buf + out, len - out, + out += scnprintf(buf + out, len - out, "%10s => Stack: %s Name: %*s " "Version: %d.%d\n", "Cluster", (*osb->osb_cluster_stack == '\0' ? @@ -255,7 +255,7 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len) } spin_lock_irqsave(&osb->dc_task_lock, flags); - out += snprintf(buf + out, len - out, + out += scnprintf(buf + out, len - out, "%10s => Pid: %d Count: %lu WakeSeq: %lu " "WorkSeq: %lu\n", "DownCnvt", (osb->dc_task ? task_pid_nr(osb->dc_task) : -1), @@ -264,32 +264,32 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len) spin_unlock_irqrestore(&osb->dc_task_lock, flags); spin_lock(&osb->osb_lock); - out += snprintf(buf + out, len - out, "%10s => Pid: %d Nodes:", + out += scnprintf(buf + out, len - out, "%10s => Pid: %d Nodes:", "Recovery", (osb->recovery_thread_task ? task_pid_nr(osb->recovery_thread_task) : -1)); if (rm->rm_used == 0) - out += snprintf(buf + out, len - out, " None\n"); + out += scnprintf(buf + out, len - out, " None\n"); else { for (i = 0; i < rm->rm_used; i++) - out += snprintf(buf + out, len - out, " %d", + out += scnprintf(buf + out, len - out, " %d", rm->rm_entries[i]); - out += snprintf(buf + out, len - out, "\n"); + out += scnprintf(buf + out, len - out, "\n"); } spin_unlock(&osb->osb_lock); - out += snprintf(buf + out, len - out, + out += scnprintf(buf + out, len - out, "%10s => Pid: %d Interval: %lu\n", "Commit", (osb->commit_task ? task_pid_nr(osb->commit_task) : -1), osb->osb_commit_interval); - out += snprintf(buf + out, len - out, + out += scnprintf(buf + out, len - out, "%10s => State: %d TxnId: %lu NumTxns: %d\n", "Journal", osb->journal->j_state, osb->journal->j_trans_id, atomic_read(&osb->journal->j_num_trans)); - out += snprintf(buf + out, len - out, + out += scnprintf(buf + out, len - out, "%10s => GlobalAllocs: %d LocalAllocs: %d " "SubAllocs: %d LAWinMoves: %d SAExtends: %d\n", "Stats", @@ -299,7 +299,7 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len) atomic_read(&osb->alloc_stats.moves), atomic_read(&osb->alloc_stats.bg_extends)); - out += snprintf(buf + out, len - out, + out += scnprintf(buf + out, len - out, "%10s => State: %u Descriptor: %llu Size: %u bits " "Default: %u bits\n", "LocalAlloc", osb->local_alloc_state, @@ -307,7 +307,7 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len) osb->local_alloc_bits, osb->local_alloc_default_bits); spin_lock(&osb->osb_lock); - out += snprintf(buf + out, len - out, + out += scnprintf(buf + out, len - out, "%10s => InodeSlot: %d StolenInodes: %d, " "MetaSlot: %d StolenMeta: %d\n", "Steal", osb->s_inode_steal_slot, @@ -316,20 +316,20 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len) atomic_read(&osb->s_num_meta_stolen)); spin_unlock(&osb->osb_lock); - out += snprintf(buf + out, len - out, "OrphanScan => "); - out += snprintf(buf + out, len - out, "Local: %u Global: %u ", + out += scnprintf(buf + out, len - out, "OrphanScan => "); + out += scnprintf(buf + out, len - out, "Local: %u Global: %u ", os->os_count, os->os_seqno); - out += snprintf(buf + out, len - out, " Last Scan: "); + out += scnprintf(buf + out, len - out, " Last Scan: "); if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE) - out += snprintf(buf + out, len - out, "Disabled\n"); + out += scnprintf(buf + out, len - out, "Disabled\n"); else - out += snprintf(buf + out, len - out, "%lu seconds ago\n", + out += scnprintf(buf + out, len - out, "%lu seconds ago\n", (unsigned long)(ktime_get_seconds() - os->os_scantime)); - out += snprintf(buf + out, len - out, "%10s => %3s %10s\n", + out += scnprintf(buf + out, len - out, "%10s => %3s %10s\n", "Slots", "Num", "RecoGen"); for (i = 0; i < osb->max_slots; ++i) { - out += snprintf(buf + out, len - out, + out += scnprintf(buf + out, len - out, "%10s %c %3d %10d\n", " ", (i == osb->slot_num ? '*' : ' '), diff --git a/fs/pipe.c b/fs/pipe.c index 2144507447c5..16fb72e9abf7 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -146,7 +146,7 @@ static int anon_pipe_buf_steal(struct pipe_inode_info *pipe, struct page *page = buf->page; if (page_count(page) == 1) { - memcg_kmem_uncharge(page, 0); + memcg_kmem_uncharge_page(page, 0); __SetPageLocked(page); return 0; } diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c index 5afe0e7ff7cd..64e6a6698935 100644 --- a/fs/sysfs/group.c +++ b/fs/sysfs/group.c @@ -416,15 +416,18 @@ void sysfs_remove_link_from_group(struct kobject *kobj, const char *group_name, EXPORT_SYMBOL_GPL(sysfs_remove_link_from_group); /** - * __compat_only_sysfs_link_entry_to_kobj - add a symlink to a kobject pointing + * compat_only_sysfs_link_entry_to_kobj - add a symlink to a kobject pointing * to a group or an attribute * @kobj: The kobject containing the group. * @target_kobj: The target kobject. * @target_name: The name of the target group or attribute. + * @symlink_name: The name of the symlink file (target_name will be + * considered if symlink_name is NULL). */ -int __compat_only_sysfs_link_entry_to_kobj(struct kobject *kobj, - struct kobject *target_kobj, - const char *target_name) +int compat_only_sysfs_link_entry_to_kobj(struct kobject *kobj, + struct kobject *target_kobj, + const char *target_name, + const char *symlink_name) { struct kernfs_node *target; struct kernfs_node *entry; @@ -449,15 +452,18 @@ int __compat_only_sysfs_link_entry_to_kobj(struct kobject *kobj, return -ENOENT; } - link = kernfs_create_link(kobj->sd, target_name, entry); + if (!symlink_name) + symlink_name = target_name; + + link = kernfs_create_link(kobj->sd, symlink_name, entry); if (PTR_ERR(link) == -EEXIST) - sysfs_warn_dup(kobj->sd, target_name); + sysfs_warn_dup(kobj->sd, symlink_name); kernfs_put(entry); kernfs_put(target); return PTR_ERR_OR_ZERO(link); } -EXPORT_SYMBOL_GPL(__compat_only_sysfs_link_entry_to_kobj); +EXPORT_SYMBOL_GPL(compat_only_sysfs_link_entry_to_kobj); static int sysfs_group_attrs_change_owner(struct kernfs_node *grp_kn, const struct attribute_group *grp, diff --git a/fs/unicode/.gitignore b/fs/unicode/.gitignore index 0381e2221480..9b2467e77b2d 100644 --- a/fs/unicode/.gitignore +++ b/fs/unicode/.gitignore @@ -1,2 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only mkutf8data utf8data.h diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 37df7c9eedb1..703c1c3faa6e 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -334,6 +334,30 @@ out: return ret; } +/* Should pair with userfaultfd_signal_pending() */ +static inline long userfaultfd_get_blocking_state(unsigned int flags) +{ + if (flags & FAULT_FLAG_INTERRUPTIBLE) + return TASK_INTERRUPTIBLE; + + if (flags & FAULT_FLAG_KILLABLE) + return TASK_KILLABLE; + + return TASK_UNINTERRUPTIBLE; +} + +/* Should pair with userfaultfd_get_blocking_state() */ +static inline bool userfaultfd_signal_pending(unsigned int flags) +{ + if (flags & FAULT_FLAG_INTERRUPTIBLE) + return signal_pending(current); + + if (flags & FAULT_FLAG_KILLABLE) + return fatal_signal_pending(current); + + return false; +} + /* * The locking rules involved in returning VM_FAULT_RETRY depending on * FAULT_FLAG_ALLOW_RETRY, FAULT_FLAG_RETRY_NOWAIT and @@ -355,7 +379,7 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason) struct userfaultfd_ctx *ctx; struct userfaultfd_wait_queue uwq; vm_fault_t ret = VM_FAULT_SIGBUS; - bool must_wait, return_to_userland; + bool must_wait; long blocking_state; /* @@ -462,11 +486,7 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason) uwq.ctx = ctx; uwq.waken = false; - return_to_userland = - (vmf->flags & (FAULT_FLAG_USER|FAULT_FLAG_KILLABLE)) == - (FAULT_FLAG_USER|FAULT_FLAG_KILLABLE); - blocking_state = return_to_userland ? TASK_INTERRUPTIBLE : - TASK_KILLABLE; + blocking_state = userfaultfd_get_blocking_state(vmf->flags); spin_lock_irq(&ctx->fault_pending_wqh.lock); /* @@ -492,8 +512,7 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason) up_read(&mm->mmap_sem); if (likely(must_wait && !READ_ONCE(ctx->released) && - (return_to_userland ? !signal_pending(current) : - !fatal_signal_pending(current)))) { + !userfaultfd_signal_pending(vmf->flags))) { wake_up_poll(&ctx->fd_wqh, EPOLLIN); schedule(); ret |= VM_FAULT_MAJOR; @@ -515,8 +534,7 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason) set_current_state(blocking_state); if (READ_ONCE(uwq.waken) || READ_ONCE(ctx->released) || - (return_to_userland ? signal_pending(current) : - fatal_signal_pending(current))) + userfaultfd_signal_pending(vmf->flags)) break; schedule(); } @@ -524,30 +542,6 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason) __set_current_state(TASK_RUNNING); - if (return_to_userland) { - if (signal_pending(current) && - !fatal_signal_pending(current)) { - /* - * If we got a SIGSTOP or SIGCONT and this is - * a normal userland page fault, just let - * userland return so the signal will be - * handled and gdb debugging works. The page - * fault code immediately after we return from - * this function is going to release the - * mmap_sem and it's not depending on it - * (unlike gup would if we were not to return - * VM_FAULT_RETRY). - * - * If a fatal signal is pending we still take - * the streamlined VM_FAULT_RETRY failure path - * and there's no need to retake the mmap_sem - * in such case. - */ - down_read(&mm->mmap_sem); - ret = VM_FAULT_NOPAGE; - } - } - /* * Here we race with the list_del; list_add in * userfaultfd_ctx_read(), however because we don't ever run diff --git a/fs/xattr.c b/fs/xattr.c index 90dd78f0eb27..e13265e65871 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -817,7 +817,7 @@ struct simple_xattr *simple_xattr_alloc(const void *value, size_t size) if (len < sizeof(*new_xattr)) return NULL; - new_xattr = kmalloc(len, GFP_KERNEL); + new_xattr = kvmalloc(len, GFP_KERNEL); if (!new_xattr) return NULL; @@ -860,6 +860,7 @@ int simple_xattr_get(struct simple_xattrs *xattrs, const char *name, * @value: value of the xattr. If %NULL, will remove the attribute. * @size: size of the new xattr * @flags: %XATTR_{CREATE|REPLACE} + * @removed_size: returns size of the removed xattr, -1 if none removed * * %XATTR_CREATE is set, the xattr shouldn't exist already; otherwise fails * with -EEXIST. If %XATTR_REPLACE is set, the xattr should exist; @@ -868,7 +869,8 @@ int simple_xattr_get(struct simple_xattrs *xattrs, const char *name, * Returns 0 on success, -errno on failure. */ int simple_xattr_set(struct simple_xattrs *xattrs, const char *name, - const void *value, size_t size, int flags) + const void *value, size_t size, int flags, + ssize_t *removed_size) { struct simple_xattr *xattr; struct simple_xattr *new_xattr = NULL; @@ -882,7 +884,7 @@ int simple_xattr_set(struct simple_xattrs *xattrs, const char *name, new_xattr->name = kstrdup(name, GFP_KERNEL); if (!new_xattr->name) { - kfree(new_xattr); + kvfree(new_xattr); return -ENOMEM; } } @@ -895,8 +897,12 @@ int simple_xattr_set(struct simple_xattrs *xattrs, const char *name, err = -EEXIST; } else if (new_xattr) { list_replace(&xattr->list, &new_xattr->list); + if (removed_size) + *removed_size = xattr->size; } else { list_del(&xattr->list); + if (removed_size) + *removed_size = xattr->size; } goto out; } @@ -908,11 +914,14 @@ int simple_xattr_set(struct simple_xattrs *xattrs, const char *name, list_add(&new_xattr->list, &xattrs->head); xattr = NULL; } + + if (removed_size) + *removed_size = -1; out: spin_unlock(&xattrs->lock); if (xattr) { kfree(xattr->name); - kfree(xattr); + kvfree(xattr); } return err; diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index aceca2f9a3db..4f95df476181 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -26,6 +26,7 @@ xfs-y += $(addprefix libxfs/, \ xfs_bmap.o \ xfs_bmap_btree.o \ xfs_btree.o \ + xfs_btree_staging.o \ xfs_da_btree.o \ xfs_defer.o \ xfs_dir2.o \ diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c index 08d6beb54f8c..9d84007a5c65 100644 --- a/fs/xfs/libxfs/xfs_ag.c +++ b/fs/xfs/libxfs/xfs_ag.c @@ -231,7 +231,7 @@ xfs_sbblock_init( struct xfs_buf *bp, struct aghdr_init_data *id) { - struct xfs_dsb *dsb = XFS_BUF_TO_SBP(bp); + struct xfs_dsb *dsb = bp->b_addr; xfs_sb_to_disk(dsb, &mp->m_sb); dsb->sb_inprogress = 1; @@ -243,7 +243,7 @@ xfs_agfblock_init( struct xfs_buf *bp, struct aghdr_init_data *id) { - struct xfs_agf *agf = XFS_BUF_TO_AGF(bp); + struct xfs_agf *agf = bp->b_addr; xfs_extlen_t tmpsize; agf->agf_magicnum = cpu_to_be32(XFS_AGF_MAGIC); @@ -301,7 +301,7 @@ xfs_agflblock_init( uuid_copy(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid); } - agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, bp); + agfl_bno = xfs_buf_to_agfl_bno(bp); for (bucket = 0; bucket < xfs_agfl_size(mp); bucket++) agfl_bno[bucket] = cpu_to_be32(NULLAGBLOCK); } @@ -312,7 +312,7 @@ xfs_agiblock_init( struct xfs_buf *bp, struct aghdr_init_data *id) { - struct xfs_agi *agi = XFS_BUF_TO_AGI(bp); + struct xfs_agi *agi = bp->b_addr; int bucket; agi->agi_magicnum = cpu_to_be32(XFS_AGI_MAGIC); @@ -502,7 +502,7 @@ xfs_ag_extend_space( if (error) return error; - agi = XFS_BUF_TO_AGI(bp); + agi = bp->b_addr; be32_add_cpu(&agi->agi_length, len); ASSERT(id->agno == mp->m_sb.sb_agcount - 1 || be32_to_cpu(agi->agi_length) == mp->m_sb.sb_agblocks); @@ -515,7 +515,7 @@ xfs_ag_extend_space( if (error) return error; - agf = XFS_BUF_TO_AGF(bp); + agf = bp->b_addr; be32_add_cpu(&agf->agf_length, len); ASSERT(agf->agf_length == agi->agi_length); xfs_alloc_log_agf(tp, bp, XFS_AGF_LENGTH); @@ -569,11 +569,11 @@ xfs_ag_get_geometry( memset(ageo, 0, sizeof(*ageo)); ageo->ag_number = agno; - agi = XFS_BUF_TO_AGI(agi_bp); + agi = agi_bp->b_addr; ageo->ag_icount = be32_to_cpu(agi->agi_count); ageo->ag_ifree = be32_to_cpu(agi->agi_freecount); - agf = XFS_BUF_TO_AGF(agf_bp); + agf = agf_bp->b_addr; ageo->ag_length = be32_to_cpu(agf->agf_length); freeblks = pag->pagf_freeblks + pag->pagf_flcount + diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index d8053bc96c4d..203e74fa64aa 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -151,7 +151,7 @@ xfs_alloc_lookup_eq( cur->bc_rec.a.ar_startblock = bno; cur->bc_rec.a.ar_blockcount = len; error = xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat); - cur->bc_private.a.priv.abt.active = (*stat == 1); + cur->bc_ag.abt.active = (*stat == 1); return error; } @@ -171,7 +171,7 @@ xfs_alloc_lookup_ge( cur->bc_rec.a.ar_startblock = bno; cur->bc_rec.a.ar_blockcount = len; error = xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); - cur->bc_private.a.priv.abt.active = (*stat == 1); + cur->bc_ag.abt.active = (*stat == 1); return error; } @@ -190,7 +190,7 @@ xfs_alloc_lookup_le( cur->bc_rec.a.ar_startblock = bno; cur->bc_rec.a.ar_blockcount = len; error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat); - cur->bc_private.a.priv.abt.active = (*stat == 1); + cur->bc_ag.abt.active = (*stat == 1); return error; } @@ -198,7 +198,7 @@ static inline bool xfs_alloc_cur_active( struct xfs_btree_cur *cur) { - return cur && cur->bc_private.a.priv.abt.active; + return cur && cur->bc_ag.abt.active; } /* @@ -230,7 +230,7 @@ xfs_alloc_get_rec( int *stat) /* output: success/failure */ { struct xfs_mount *mp = cur->bc_mp; - xfs_agnumber_t agno = cur->bc_private.a.agno; + xfs_agnumber_t agno = cur->bc_ag.agno; union xfs_btree_rec *rec; int error; @@ -589,6 +589,7 @@ xfs_agfl_verify( { struct xfs_mount *mp = bp->b_mount; struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp); + __be32 *agfl_bno = xfs_buf_to_agfl_bno(bp); int i; /* @@ -614,8 +615,8 @@ xfs_agfl_verify( return __this_address; for (i = 0; i < xfs_agfl_size(mp); i++) { - if (be32_to_cpu(agfl->agfl_bno[i]) != NULLAGBLOCK && - be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks) + if (be32_to_cpu(agfl_bno[i]) != NULLAGBLOCK && + be32_to_cpu(agfl_bno[i]) >= mp->m_sb.sb_agblocks) return __this_address; } @@ -713,7 +714,7 @@ xfs_alloc_update_counters( struct xfs_buf *agbp, long len) { - struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); + struct xfs_agf *agf = agbp->b_addr; pag->pagf_freeblks += len; be32_add_cpu(&agf->agf_freeblks, len); @@ -721,7 +722,7 @@ xfs_alloc_update_counters( xfs_trans_agblocks_delta(tp, len); if (unlikely(be32_to_cpu(agf->agf_freeblks) > be32_to_cpu(agf->agf_length))) { - xfs_buf_corruption_error(agbp); + xfs_buf_mark_corrupt(agbp); return -EFSCORRUPTED; } @@ -907,7 +908,7 @@ xfs_alloc_cur_check( deactivate = true; out: if (deactivate) - cur->bc_private.a.priv.abt.active = false; + cur->bc_ag.abt.active = false; trace_xfs_alloc_cur_check(args->mp, cur->bc_btnum, bno, len, diff, *new); return 0; @@ -922,13 +923,13 @@ xfs_alloc_cur_finish( struct xfs_alloc_arg *args, struct xfs_alloc_cur *acur) { + struct xfs_agf __maybe_unused *agf = args->agbp->b_addr; int error; ASSERT(acur->cnt && acur->bnolt); ASSERT(acur->bno >= acur->rec_bno); ASSERT(acur->bno + acur->len <= acur->rec_bno + acur->rec_len); - ASSERT(acur->rec_bno + acur->rec_len <= - be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); + ASSERT(acur->rec_bno + acur->rec_len <= be32_to_cpu(agf->agf_length)); error = xfs_alloc_fixup_trees(acur->cnt, acur->bnolt, acur->rec_bno, acur->rec_len, acur->bno, acur->len, 0); @@ -1026,6 +1027,7 @@ xfs_alloc_ag_vextent_small( xfs_extlen_t *flenp, /* result length */ int *stat) /* status: 0-freelist, 1-normal/none */ { + struct xfs_agf *agf = args->agbp->b_addr; int error = 0; xfs_agblock_t fbno = NULLAGBLOCK; xfs_extlen_t flen = 0; @@ -1054,8 +1056,7 @@ xfs_alloc_ag_vextent_small( if (args->minlen != 1 || args->alignment != 1 || args->resv == XFS_AG_RESV_AGFL || - (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount) <= - args->minleft)) + be32_to_cpu(agf->agf_flcount) <= args->minleft) goto out; error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0); @@ -1079,9 +1080,7 @@ xfs_alloc_ag_vextent_small( } *fbnop = args->agbno = fbno; *flenp = args->len = 1; - if (XFS_IS_CORRUPT(args->mp, - fbno >= be32_to_cpu( - XFS_BUF_TO_AGF(args->agbp)->agf_length))) { + if (XFS_IS_CORRUPT(args->mp, fbno >= be32_to_cpu(agf->agf_length))) { error = -EFSCORRUPTED; goto error; } @@ -1203,6 +1202,7 @@ STATIC int /* error */ xfs_alloc_ag_vextent_exact( xfs_alloc_arg_t *args) /* allocation argument structure */ { + struct xfs_agf __maybe_unused *agf = args->agbp->b_addr; xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */ xfs_btree_cur_t *cnt_cur;/* by count btree cursor */ int error; @@ -1281,8 +1281,7 @@ xfs_alloc_ag_vextent_exact( */ cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, args->agno, XFS_BTNUM_CNT); - ASSERT(args->agbno + args->len <= - be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); + ASSERT(args->agbno + args->len <= be32_to_cpu(agf->agf_length)); error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno, args->len, XFSA_FIXUP_BNO_OK); if (error) { @@ -1353,7 +1352,7 @@ xfs_alloc_walk_iter( if (error) return error; if (i == 0) - cur->bc_private.a.priv.abt.active = false; + cur->bc_ag.abt.active = false; if (count > 0) count--; @@ -1468,7 +1467,7 @@ xfs_alloc_ag_vextent_locality( if (error) return error; if (i) { - acur->cnt->bc_private.a.priv.abt.active = true; + acur->cnt->bc_ag.abt.active = true; fbcur = acur->cnt; fbinc = false; } @@ -1515,7 +1514,7 @@ xfs_alloc_ag_vextent_lastblock( * maxlen, go to the start of this block, and skip all those smaller * than minlen. */ - if (len || args->alignment > 1) { + if (*len || args->alignment > 1) { acur->cnt->bc_ptrs[0] = 1; do { error = xfs_alloc_get_rec(acur->cnt, bno, len, &i); @@ -1661,6 +1660,7 @@ STATIC int /* error */ xfs_alloc_ag_vextent_size( xfs_alloc_arg_t *args) /* allocation argument structure */ { + struct xfs_agf *agf = args->agbp->b_addr; xfs_btree_cur_t *bno_cur; /* cursor for bno btree */ xfs_btree_cur_t *cnt_cur; /* cursor for cnt btree */ int error; /* error result */ @@ -1851,8 +1851,7 @@ restart: args->agbno = rbno; if (XFS_IS_CORRUPT(args->mp, args->agbno + args->len > - be32_to_cpu( - XFS_BUF_TO_AGF(args->agbp)->agf_length))) { + be32_to_cpu(agf->agf_length))) { error = -EFSCORRUPTED; goto error0; } @@ -2424,7 +2423,7 @@ xfs_agfl_reset( struct xfs_perag *pag) { struct xfs_mount *mp = tp->t_mountp; - struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); + struct xfs_agf *agf = agbp->b_addr; ASSERT(pag->pagf_agflreset); trace_xfs_agfl_reset(mp, agf, 0, _RET_IP_); @@ -2655,7 +2654,7 @@ xfs_alloc_get_freelist( xfs_agblock_t *bnop, /* block address retrieved from freelist */ int btreeblk) /* destination is a AGF btree */ { - xfs_agf_t *agf; /* a.g. freespace structure */ + struct xfs_agf *agf = agbp->b_addr; xfs_buf_t *agflbp;/* buffer for a.g. freelist structure */ xfs_agblock_t bno; /* block number returned */ __be32 *agfl_bno; @@ -2667,7 +2666,6 @@ xfs_alloc_get_freelist( /* * Freelist is empty, give up. */ - agf = XFS_BUF_TO_AGF(agbp); if (!agf->agf_flcount) { *bnop = NULLAGBLOCK; return 0; @@ -2684,7 +2682,7 @@ xfs_alloc_get_freelist( /* * Get the block number and update the data structures. */ - agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp); + agfl_bno = xfs_buf_to_agfl_bno(agflbp); bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]); be32_add_cpu(&agf->agf_flfirst, 1); xfs_trans_brelse(tp, agflbp); @@ -2745,7 +2743,7 @@ xfs_alloc_log_agf( sizeof(xfs_agf_t) }; - trace_xfs_agf(tp->t_mountp, XFS_BUF_TO_AGF(bp), fields, _RET_IP_); + trace_xfs_agf(tp->t_mountp, bp->b_addr, fields, _RET_IP_); xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGF_BUF); @@ -2783,18 +2781,15 @@ xfs_alloc_put_freelist( xfs_agblock_t bno, /* block being freed */ int btreeblk) /* block came from a AGF btree */ { - xfs_agf_t *agf; /* a.g. freespace structure */ + struct xfs_mount *mp = tp->t_mountp; + struct xfs_agf *agf = agbp->b_addr; __be32 *blockp;/* pointer to array entry */ int error; int logflags; - xfs_mount_t *mp; /* mount structure */ xfs_perag_t *pag; /* per allocation group data */ __be32 *agfl_bno; int startoff; - agf = XFS_BUF_TO_AGF(agbp); - mp = tp->t_mountp; - if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp, be32_to_cpu(agf->agf_seqno), &agflbp))) return error; @@ -2820,7 +2815,7 @@ xfs_alloc_put_freelist( ASSERT(be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp)); - agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp); + agfl_bno = xfs_buf_to_agfl_bno(agflbp); blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)]; *blockp = cpu_to_be32(bno); startoff = (char *)blockp - (char *)agflbp->b_addr; @@ -2838,13 +2833,12 @@ xfs_agf_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_mount; - struct xfs_agf *agf = XFS_BUF_TO_AGF(bp); + struct xfs_agf *agf = bp->b_addr; if (xfs_sb_version_hascrc(&mp->m_sb)) { if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid)) return __this_address; - if (!xfs_log_check_lsn(mp, - be64_to_cpu(XFS_BUF_TO_AGF(bp)->agf_lsn))) + if (!xfs_log_check_lsn(mp, be64_to_cpu(agf->agf_lsn))) return __this_address; } @@ -2858,6 +2852,13 @@ xfs_agf_verify( be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp))) return __this_address; + if (be32_to_cpu(agf->agf_length) > mp->m_sb.sb_dblocks) + return __this_address; + + if (be32_to_cpu(agf->agf_freeblks) < be32_to_cpu(agf->agf_longest) || + be32_to_cpu(agf->agf_freeblks) > be32_to_cpu(agf->agf_length)) + return __this_address; + if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 || be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 || be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS || @@ -2869,6 +2870,10 @@ xfs_agf_verify( be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > XFS_BTREE_MAXLEVELS)) return __this_address; + if (xfs_sb_version_hasrmapbt(&mp->m_sb) && + be32_to_cpu(agf->agf_rmap_blocks) > be32_to_cpu(agf->agf_length)) + return __this_address; + /* * during growfs operations, the perag is not fully initialised, * so we can't use it for any useful checking. growfs ensures we can't @@ -2883,6 +2888,11 @@ xfs_agf_verify( return __this_address; if (xfs_sb_version_hasreflink(&mp->m_sb) && + be32_to_cpu(agf->agf_refcount_blocks) > + be32_to_cpu(agf->agf_length)) + return __this_address; + + if (xfs_sb_version_hasreflink(&mp->m_sb) && (be32_to_cpu(agf->agf_refcount_level) < 1 || be32_to_cpu(agf->agf_refcount_level) > XFS_BTREE_MAXLEVELS)) return __this_address; @@ -2914,6 +2924,7 @@ xfs_agf_write_verify( { struct xfs_mount *mp = bp->b_mount; struct xfs_buf_log_item *bip = bp->b_log_item; + struct xfs_agf *agf = bp->b_addr; xfs_failaddr_t fa; fa = xfs_agf_verify(bp); @@ -2926,7 +2937,7 @@ xfs_agf_write_verify( return; if (bip) - XFS_BUF_TO_AGF(bp)->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn); + agf->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn); xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF); } @@ -2994,7 +3005,7 @@ xfs_alloc_read_agf( return error; ASSERT(!(*bpp)->b_error); - agf = XFS_BUF_TO_AGF(*bpp); + agf = (*bpp)->b_addr; pag = xfs_perag_get(mp, agno); if (!pag->pagf_init) { pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks); @@ -3275,6 +3286,7 @@ __xfs_free_extent( struct xfs_buf *agbp; xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, bno); xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp, bno); + struct xfs_agf *agf; int error; unsigned int busy_flags = 0; @@ -3288,6 +3300,7 @@ __xfs_free_extent( error = xfs_free_extent_fix_freelist(tp, agno, &agbp); if (error) return error; + agf = agbp->b_addr; if (XFS_IS_CORRUPT(mp, agbno >= mp->m_sb.sb_agblocks)) { error = -EFSCORRUPTED; @@ -3295,9 +3308,7 @@ __xfs_free_extent( } /* validate the extent size is legal now we have the agf locked */ - if (XFS_IS_CORRUPT(mp, - agbno + len > - be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_length))) { + if (XFS_IS_CORRUPT(mp, agbno + len > be32_to_cpu(agf->agf_length))) { error = -EFSCORRUPTED; goto err; } @@ -3408,7 +3419,7 @@ xfs_agfl_walk( unsigned int i; int error; - agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp); + agfl_bno = xfs_buf_to_agfl_bno(agflbp); i = be32_to_cpu(agf->agf_flfirst); /* Nothing to walk in an empty AGFL. */ diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h index 7380fbe4a3ff..a851bf77f17b 100644 --- a/fs/xfs/libxfs/xfs_alloc.h +++ b/fs/xfs/libxfs/xfs_alloc.h @@ -236,4 +236,13 @@ typedef int (*xfs_agfl_walk_fn)(struct xfs_mount *mp, xfs_agblock_t bno, int xfs_agfl_walk(struct xfs_mount *mp, struct xfs_agf *agf, struct xfs_buf *agflbp, xfs_agfl_walk_fn walk_fn, void *priv); +static inline __be32 * +xfs_buf_to_agfl_bno( + struct xfs_buf *bp) +{ + if (xfs_sb_version_hascrc(&bp->b_mount->m_sb)) + return bp->b_addr + sizeof(struct xfs_agfl); + return bp->b_addr; +} + #endif /* __XFS_ALLOC_H__ */ diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c index 279694d73e4e..60c453cb3ee3 100644 --- a/fs/xfs/libxfs/xfs_alloc_btree.c +++ b/fs/xfs/libxfs/xfs_alloc_btree.c @@ -12,6 +12,7 @@ #include "xfs_sb.h" #include "xfs_mount.h" #include "xfs_btree.h" +#include "xfs_btree_staging.h" #include "xfs_alloc_btree.h" #include "xfs_alloc.h" #include "xfs_extent_busy.h" @@ -25,7 +26,7 @@ xfs_allocbt_dup_cursor( struct xfs_btree_cur *cur) { return xfs_allocbt_init_cursor(cur->bc_mp, cur->bc_tp, - cur->bc_private.a.agbp, cur->bc_private.a.agno, + cur->bc_ag.agbp, cur->bc_ag.agno, cur->bc_btnum); } @@ -35,8 +36,8 @@ xfs_allocbt_set_root( union xfs_btree_ptr *ptr, int inc) { - struct xfs_buf *agbp = cur->bc_private.a.agbp; - struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); + struct xfs_buf *agbp = cur->bc_ag.agbp; + struct xfs_agf *agf = agbp->b_addr; xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno); int btnum = cur->bc_btnum; struct xfs_perag *pag = xfs_perag_get(cur->bc_mp, seqno); @@ -62,7 +63,7 @@ xfs_allocbt_alloc_block( xfs_agblock_t bno; /* Allocate the new block from the freelist. If we can't, give up. */ - error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp, + error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_ag.agbp, &bno, 1); if (error) return error; @@ -72,7 +73,7 @@ xfs_allocbt_alloc_block( return 0; } - xfs_extent_busy_reuse(cur->bc_mp, cur->bc_private.a.agno, bno, 1, false); + xfs_extent_busy_reuse(cur->bc_mp, cur->bc_ag.agno, bno, 1, false); xfs_trans_agbtree_delta(cur->bc_tp, 1); new->s = cpu_to_be32(bno); @@ -86,8 +87,8 @@ xfs_allocbt_free_block( struct xfs_btree_cur *cur, struct xfs_buf *bp) { - struct xfs_buf *agbp = cur->bc_private.a.agbp; - struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); + struct xfs_buf *agbp = cur->bc_ag.agbp; + struct xfs_agf *agf = agbp->b_addr; xfs_agblock_t bno; int error; @@ -113,7 +114,7 @@ xfs_allocbt_update_lastrec( int ptr, int reason) { - struct xfs_agf *agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp); + struct xfs_agf *agf = cur->bc_ag.agbp->b_addr; xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno); struct xfs_perag *pag; __be32 len; @@ -162,7 +163,7 @@ xfs_allocbt_update_lastrec( pag = xfs_perag_get(cur->bc_mp, seqno); pag->pagf_longest = be32_to_cpu(len); xfs_perag_put(pag); - xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp, XFS_AGF_LONGEST); + xfs_alloc_log_agf(cur->bc_tp, cur->bc_ag.agbp, XFS_AGF_LONGEST); } STATIC int @@ -226,9 +227,9 @@ xfs_allocbt_init_ptr_from_cur( struct xfs_btree_cur *cur, union xfs_btree_ptr *ptr) { - struct xfs_agf *agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp); + struct xfs_agf *agf = cur->bc_ag.agbp->b_addr; - ASSERT(cur->bc_private.a.agno == be32_to_cpu(agf->agf_seqno)); + ASSERT(cur->bc_ag.agno == be32_to_cpu(agf->agf_seqno)); ptr->s = agf->agf_roots[cur->bc_btnum]; } @@ -471,18 +472,14 @@ static const struct xfs_btree_ops xfs_cntbt_ops = { .recs_inorder = xfs_cntbt_recs_inorder, }; -/* - * Allocate a new allocation btree cursor. - */ -struct xfs_btree_cur * /* new alloc btree cursor */ -xfs_allocbt_init_cursor( - struct xfs_mount *mp, /* file system mount point */ - struct xfs_trans *tp, /* transaction pointer */ - struct xfs_buf *agbp, /* buffer for agf structure */ - xfs_agnumber_t agno, /* allocation group number */ - xfs_btnum_t btnum) /* btree identifier */ +/* Allocate most of a new allocation btree cursor. */ +STATIC struct xfs_btree_cur * +xfs_allocbt_init_common( + struct xfs_mount *mp, + struct xfs_trans *tp, + xfs_agnumber_t agno, + xfs_btnum_t btnum) { - struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); struct xfs_btree_cur *cur; ASSERT(btnum == XFS_BTNUM_BNO || btnum == XFS_BTNUM_CNT); @@ -495,19 +492,16 @@ xfs_allocbt_init_cursor( cur->bc_blocklog = mp->m_sb.sb_blocklog; if (btnum == XFS_BTNUM_CNT) { - cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_abtc_2); cur->bc_ops = &xfs_cntbt_ops; - cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]); + cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_abtc_2); cur->bc_flags = XFS_BTREE_LASTREC_UPDATE; } else { - cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_abtb_2); cur->bc_ops = &xfs_bnobt_ops; - cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]); + cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_abtb_2); } - cur->bc_private.a.agbp = agbp; - cur->bc_private.a.agno = agno; - cur->bc_private.a.priv.abt.active = false; + cur->bc_ag.agno = agno; + cur->bc_ag.abt.active = false; if (xfs_sb_version_hascrc(&mp->m_sb)) cur->bc_flags |= XFS_BTREE_CRC_BLOCKS; @@ -516,6 +510,73 @@ xfs_allocbt_init_cursor( } /* + * Allocate a new allocation btree cursor. + */ +struct xfs_btree_cur * /* new alloc btree cursor */ +xfs_allocbt_init_cursor( + struct xfs_mount *mp, /* file system mount point */ + struct xfs_trans *tp, /* transaction pointer */ + struct xfs_buf *agbp, /* buffer for agf structure */ + xfs_agnumber_t agno, /* allocation group number */ + xfs_btnum_t btnum) /* btree identifier */ +{ + struct xfs_agf *agf = agbp->b_addr; + struct xfs_btree_cur *cur; + + cur = xfs_allocbt_init_common(mp, tp, agno, btnum); + if (btnum == XFS_BTNUM_CNT) + cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]); + else + cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]); + + cur->bc_ag.agbp = agbp; + + return cur; +} + +/* Create a free space btree cursor with a fake root for staging. */ +struct xfs_btree_cur * +xfs_allocbt_stage_cursor( + struct xfs_mount *mp, + struct xbtree_afakeroot *afake, + xfs_agnumber_t agno, + xfs_btnum_t btnum) +{ + struct xfs_btree_cur *cur; + + cur = xfs_allocbt_init_common(mp, NULL, agno, btnum); + xfs_btree_stage_afakeroot(cur, afake); + return cur; +} + +/* + * Install a new free space btree root. Caller is responsible for invalidating + * and freeing the old btree blocks. + */ +void +xfs_allocbt_commit_staged_btree( + struct xfs_btree_cur *cur, + struct xfs_trans *tp, + struct xfs_buf *agbp) +{ + struct xfs_agf *agf = agbp->b_addr; + struct xbtree_afakeroot *afake = cur->bc_ag.afake; + + ASSERT(cur->bc_flags & XFS_BTREE_STAGING); + + agf->agf_roots[cur->bc_btnum] = cpu_to_be32(afake->af_root); + agf->agf_levels[cur->bc_btnum] = cpu_to_be32(afake->af_levels); + xfs_alloc_log_agf(tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS); + + if (cur->bc_btnum == XFS_BTNUM_BNO) { + xfs_btree_commit_afakeroot(cur, tp, agbp, &xfs_bnobt_ops); + } else { + cur->bc_flags |= XFS_BTREE_LASTREC_UPDATE; + xfs_btree_commit_afakeroot(cur, tp, agbp, &xfs_cntbt_ops); + } +} + +/* * Calculate number of records in an alloc btree block. */ int diff --git a/fs/xfs/libxfs/xfs_alloc_btree.h b/fs/xfs/libxfs/xfs_alloc_btree.h index c9305ebb69f6..047f09f0be3c 100644 --- a/fs/xfs/libxfs/xfs_alloc_btree.h +++ b/fs/xfs/libxfs/xfs_alloc_btree.h @@ -13,6 +13,7 @@ struct xfs_buf; struct xfs_btree_cur; struct xfs_mount; +struct xbtree_afakeroot; /* * Btree block header size depends on a superblock flag. @@ -48,8 +49,14 @@ struct xfs_mount; extern struct xfs_btree_cur *xfs_allocbt_init_cursor(struct xfs_mount *, struct xfs_trans *, struct xfs_buf *, xfs_agnumber_t, xfs_btnum_t); +struct xfs_btree_cur *xfs_allocbt_stage_cursor(struct xfs_mount *mp, + struct xbtree_afakeroot *afake, xfs_agnumber_t agno, + xfs_btnum_t btnum); extern int xfs_allocbt_maxrecs(struct xfs_mount *, int, int); extern xfs_extlen_t xfs_allocbt_calc_size(struct xfs_mount *mp, unsigned long long len); +void xfs_allocbt_commit_staged_btree(struct xfs_btree_cur *cur, + struct xfs_trans *tp, struct xfs_buf *agbp); + #endif /* __XFS_ALLOC_BTREE_H__ */ diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index e6149720ce02..e4fe3dca9883 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -56,33 +56,6 @@ STATIC int xfs_attr_node_removename(xfs_da_args_t *args); STATIC int xfs_attr_fillstate(xfs_da_state_t *state); STATIC int xfs_attr_refillstate(xfs_da_state_t *state); - -STATIC int -xfs_attr_args_init( - struct xfs_da_args *args, - struct xfs_inode *dp, - const unsigned char *name, - size_t namelen, - int flags) -{ - - if (!name) - return -EINVAL; - - memset(args, 0, sizeof(*args)); - args->geo = dp->i_mount->m_attr_geo; - args->whichfork = XFS_ATTR_FORK; - args->dp = dp; - args->flags = flags; - args->name = name; - args->namelen = namelen; - if (args->namelen >= MAXNAMELEN) - return -EFAULT; /* match IRIX behaviour */ - - args->hashval = xfs_da_hashname(args->name, args->namelen); - return 0; -} - int xfs_inode_hasattr( struct xfs_inode *ip) @@ -104,85 +77,60 @@ xfs_inode_hasattr( */ int xfs_attr_get_ilocked( - struct xfs_inode *ip, struct xfs_da_args *args) { - ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); + ASSERT(xfs_isilocked(args->dp, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); - if (!xfs_inode_hasattr(ip)) + if (!xfs_inode_hasattr(args->dp)) return -ENOATTR; - else if (ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) + + if (args->dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) return xfs_attr_shortform_getvalue(args); - else if (xfs_bmap_one_block(ip, XFS_ATTR_FORK)) + if (xfs_bmap_one_block(args->dp, XFS_ATTR_FORK)) return xfs_attr_leaf_get(args); - else - return xfs_attr_node_get(args); + return xfs_attr_node_get(args); } /* * Retrieve an extended attribute by name, and its value if requested. * - * If ATTR_KERNOVAL is set in @flags, then the caller does not want the value, - * just an indication whether the attribute exists and the size of the value if - * it exists. The size is returned in @valuelenp, + * If args->valuelen is zero, then the caller does not want the value, just an + * indication whether the attribute exists and the size of the value if it + * exists. The size is returned in args.valuelen. * - * If the attribute is found, but exceeds the size limit set by the caller in - * @valuelenp, return -ERANGE with the size of the attribute that was found in - * @valuelenp. + * If args->value is NULL but args->valuelen is non-zero, allocate the buffer + * for the value after existence of the attribute has been determined. The + * caller always has to free args->value if it is set, no matter if this + * function was successful or not. * - * If ATTR_ALLOC is set in @flags, allocate the buffer for the value after - * existence of the attribute has been determined. On success, return that - * buffer to the caller and leave them to free it. On failure, free any - * allocated buffer and ensure the buffer pointer returned to the caller is - * null. + * If the attribute is found, but exceeds the size limit set by the caller in + * args->valuelen, return -ERANGE with the size of the attribute that was found + * in args->valuelen. */ int xfs_attr_get( - struct xfs_inode *ip, - const unsigned char *name, - size_t namelen, - unsigned char **value, - int *valuelenp, - int flags) + struct xfs_da_args *args) { - struct xfs_da_args args; uint lock_mode; int error; - ASSERT((flags & (ATTR_ALLOC | ATTR_KERNOVAL)) || *value); - - XFS_STATS_INC(ip->i_mount, xs_attr_get); + XFS_STATS_INC(args->dp->i_mount, xs_attr_get); - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) + if (XFS_FORCED_SHUTDOWN(args->dp->i_mount)) return -EIO; - error = xfs_attr_args_init(&args, ip, name, namelen, flags); - if (error) - return error; + args->geo = args->dp->i_mount->m_attr_geo; + args->whichfork = XFS_ATTR_FORK; + args->hashval = xfs_da_hashname(args->name, args->namelen); /* Entirely possible to look up a name which doesn't exist */ - args.op_flags = XFS_DA_OP_OKNOENT; - if (flags & ATTR_ALLOC) - args.op_flags |= XFS_DA_OP_ALLOCVAL; - else - args.value = *value; - args.valuelen = *valuelenp; + args->op_flags = XFS_DA_OP_OKNOENT; - lock_mode = xfs_ilock_attr_map_shared(ip); - error = xfs_attr_get_ilocked(ip, &args); - xfs_iunlock(ip, lock_mode); - *valuelenp = args.valuelen; + lock_mode = xfs_ilock_attr_map_shared(args->dp); + error = xfs_attr_get_ilocked(args); + xfs_iunlock(args->dp, lock_mode); - /* on error, we have to clean up allocated value buffers */ - if (error) { - if (flags & ATTR_ALLOC) { - kmem_free(args.value); - *value = NULL; - } - return error; - } - *value = args.value; - return 0; + return error; } /* @@ -238,7 +186,7 @@ xfs_attr_try_sf_addname( * Commit the shortform mods, and we're done. * NOTE: this is also the error path (EEXIST, etc). */ - if (!error && (args->flags & ATTR_KERNOTIME) == 0) + if (!error && !(args->op_flags & XFS_DA_OP_NOTIME)) xfs_trans_ichgtime(args->trans, dp, XFS_ICHGTIME_CHG); if (mp->m_flags & XFS_MOUNT_WSYNC) @@ -336,188 +284,127 @@ xfs_attr_remove_args( return error; } +/* + * Note: If args->value is NULL the attribute will be removed, just like the + * Linux ->setattr API. + */ int xfs_attr_set( - struct xfs_inode *dp, - const unsigned char *name, - size_t namelen, - unsigned char *value, - int valuelen, - int flags) + struct xfs_da_args *args) { + struct xfs_inode *dp = args->dp; struct xfs_mount *mp = dp->i_mount; - struct xfs_da_args args; struct xfs_trans_res tres; - int rsvd = (flags & ATTR_ROOT) != 0; + bool rsvd = (args->attr_filter & XFS_ATTR_ROOT); int error, local; - - XFS_STATS_INC(mp, xs_attr_set); + unsigned int total; if (XFS_FORCED_SHUTDOWN(dp->i_mount)) return -EIO; - error = xfs_attr_args_init(&args, dp, name, namelen, flags); - if (error) - return error; - - args.value = value; - args.valuelen = valuelen; - args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT; - args.total = xfs_attr_calc_size(&args, &local); - error = xfs_qm_dqattach(dp); if (error) return error; - /* - * If the inode doesn't have an attribute fork, add one. - * (inode must not be locked when we call this routine) - */ - if (XFS_IFORK_Q(dp) == 0) { - int sf_size = sizeof(xfs_attr_sf_hdr_t) + - XFS_ATTR_SF_ENTSIZE_BYNAME(args.namelen, valuelen); - - error = xfs_bmap_add_attrfork(dp, sf_size, rsvd); - if (error) - return error; - } - - tres.tr_logres = M_RES(mp)->tr_attrsetm.tr_logres + - M_RES(mp)->tr_attrsetrt.tr_logres * args.total; - tres.tr_logcount = XFS_ATTRSET_LOG_COUNT; - tres.tr_logflags = XFS_TRANS_PERM_LOG_RES; - - /* - * Root fork attributes can use reserved data blocks for this - * operation if necessary - */ - error = xfs_trans_alloc(mp, &tres, args.total, 0, - rsvd ? XFS_TRANS_RESERVE : 0, &args.trans); - if (error) - return error; - - xfs_ilock(dp, XFS_ILOCK_EXCL); - error = xfs_trans_reserve_quota_nblks(args.trans, dp, args.total, 0, - rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES : - XFS_QMOPT_RES_REGBLKS); - if (error) - goto out_trans_cancel; - - xfs_trans_ijoin(args.trans, dp, 0); - error = xfs_attr_set_args(&args); - if (error) - goto out_trans_cancel; - if (!args.trans) { - /* shortform attribute has already been committed */ - goto out_unlock; - } - - /* - * If this is a synchronous mount, make sure that the - * transaction goes to disk before returning to the user. - */ - if (mp->m_flags & XFS_MOUNT_WSYNC) - xfs_trans_set_sync(args.trans); - - if ((flags & ATTR_KERNOTIME) == 0) - xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG); + args->geo = mp->m_attr_geo; + args->whichfork = XFS_ATTR_FORK; + args->hashval = xfs_da_hashname(args->name, args->namelen); /* - * Commit the last in the sequence of transactions. + * We have no control over the attribute names that userspace passes us + * to remove, so we have to allow the name lookup prior to attribute + * removal to fail as well. */ - xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE); - error = xfs_trans_commit(args.trans); -out_unlock: - xfs_iunlock(dp, XFS_ILOCK_EXCL); - return error; + args->op_flags = XFS_DA_OP_OKNOENT; -out_trans_cancel: - if (args.trans) - xfs_trans_cancel(args.trans); - goto out_unlock; -} + if (args->value) { + XFS_STATS_INC(mp, xs_attr_set); -/* - * Generic handler routine to remove a name from an attribute list. - * Transitions attribute list from Btree to shortform as necessary. - */ -int -xfs_attr_remove( - struct xfs_inode *dp, - const unsigned char *name, - size_t namelen, - int flags) -{ - struct xfs_mount *mp = dp->i_mount; - struct xfs_da_args args; - int error; - - XFS_STATS_INC(mp, xs_attr_remove); + args->op_flags |= XFS_DA_OP_ADDNAME; + args->total = xfs_attr_calc_size(args, &local); - if (XFS_FORCED_SHUTDOWN(dp->i_mount)) - return -EIO; + /* + * If the inode doesn't have an attribute fork, add one. + * (inode must not be locked when we call this routine) + */ + if (XFS_IFORK_Q(dp) == 0) { + int sf_size = sizeof(struct xfs_attr_sf_hdr) + + XFS_ATTR_SF_ENTSIZE_BYNAME(args->namelen, + args->valuelen); - error = xfs_attr_args_init(&args, dp, name, namelen, flags); - if (error) - return error; + error = xfs_bmap_add_attrfork(dp, sf_size, rsvd); + if (error) + return error; + } - /* - * we have no control over the attribute names that userspace passes us - * to remove, so we have to allow the name lookup prior to attribute - * removal to fail. - */ - args.op_flags = XFS_DA_OP_OKNOENT; + tres.tr_logres = M_RES(mp)->tr_attrsetm.tr_logres + + M_RES(mp)->tr_attrsetrt.tr_logres * + args->total; + tres.tr_logcount = XFS_ATTRSET_LOG_COUNT; + tres.tr_logflags = XFS_TRANS_PERM_LOG_RES; + total = args->total; + } else { + XFS_STATS_INC(mp, xs_attr_remove); - error = xfs_qm_dqattach(dp); - if (error) - return error; + tres = M_RES(mp)->tr_attrrm; + total = XFS_ATTRRM_SPACE_RES(mp); + } /* * Root fork attributes can use reserved data blocks for this * operation if necessary */ - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_attrrm, - XFS_ATTRRM_SPACE_RES(mp), 0, - (flags & ATTR_ROOT) ? XFS_TRANS_RESERVE : 0, - &args.trans); + error = xfs_trans_alloc(mp, &tres, total, 0, + rsvd ? XFS_TRANS_RESERVE : 0, &args->trans); if (error) return error; xfs_ilock(dp, XFS_ILOCK_EXCL); - /* - * No need to make quota reservations here. We expect to release some - * blocks not allocate in the common case. - */ - xfs_trans_ijoin(args.trans, dp, 0); - - error = xfs_attr_remove_args(&args); - if (error) - goto out; + xfs_trans_ijoin(args->trans, dp, 0); + if (args->value) { + unsigned int quota_flags = XFS_QMOPT_RES_REGBLKS; + + if (rsvd) + quota_flags |= XFS_QMOPT_FORCE_RES; + error = xfs_trans_reserve_quota_nblks(args->trans, dp, + args->total, 0, quota_flags); + if (error) + goto out_trans_cancel; + error = xfs_attr_set_args(args); + if (error) + goto out_trans_cancel; + /* shortform attribute has already been committed */ + if (!args->trans) + goto out_unlock; + } else { + error = xfs_attr_remove_args(args); + if (error) + goto out_trans_cancel; + } /* * If this is a synchronous mount, make sure that the * transaction goes to disk before returning to the user. */ if (mp->m_flags & XFS_MOUNT_WSYNC) - xfs_trans_set_sync(args.trans); + xfs_trans_set_sync(args->trans); - if ((flags & ATTR_KERNOTIME) == 0) - xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG); + if (!(args->op_flags & XFS_DA_OP_NOTIME)) + xfs_trans_ichgtime(args->trans, dp, XFS_ICHGTIME_CHG); /* * Commit the last in the sequence of transactions. */ - xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE); - error = xfs_trans_commit(args.trans); + xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE); + error = xfs_trans_commit(args->trans); +out_unlock: xfs_iunlock(dp, XFS_ILOCK_EXCL); - return error; -out: - if (args.trans) - xfs_trans_cancel(args.trans); - xfs_iunlock(dp, XFS_ILOCK_EXCL); - return error; +out_trans_cancel: + if (args->trans) + xfs_trans_cancel(args->trans); + goto out_unlock; } /*======================================================================== @@ -536,10 +423,10 @@ xfs_attr_shortform_addname(xfs_da_args_t *args) trace_xfs_attr_sf_addname(args); retval = xfs_attr_shortform_lookup(args); - if ((args->flags & ATTR_REPLACE) && (retval == -ENOATTR)) { + if (retval == -ENOATTR && (args->attr_flags & XATTR_REPLACE)) return retval; - } else if (retval == -EEXIST) { - if (args->flags & ATTR_CREATE) + if (retval == -EEXIST) { + if (args->attr_flags & XATTR_CREATE) return retval; retval = xfs_attr_shortform_remove(args); if (retval) @@ -549,7 +436,7 @@ xfs_attr_shortform_addname(xfs_da_args_t *args) * that the leaf format add routine won't trip over the attr * not being around. */ - args->flags &= ~ATTR_REPLACE; + args->attr_flags &= ~XATTR_REPLACE; } if (args->namelen >= XFS_ATTR_SF_ENTSIZE_MAX || @@ -602,14 +489,11 @@ xfs_attr_leaf_addname( * the given flags produce an error or call for an atomic rename. */ retval = xfs_attr3_leaf_lookup_int(bp, args); - if ((args->flags & ATTR_REPLACE) && (retval == -ENOATTR)) { - xfs_trans_brelse(args->trans, bp); - return retval; - } else if (retval == -EEXIST) { - if (args->flags & ATTR_CREATE) { /* pure create op */ - xfs_trans_brelse(args->trans, bp); - return retval; - } + if (retval == -ENOATTR && (args->attr_flags & XATTR_REPLACE)) + goto out_brelse; + if (retval == -EEXIST) { + if (args->attr_flags & XATTR_CREATE) + goto out_brelse; trace_xfs_attr_leaf_replace(args); @@ -750,6 +634,9 @@ xfs_attr_leaf_addname( error = xfs_attr3_leaf_clearflag(args); } return error; +out_brelse: + xfs_trans_brelse(args->trans, bp); + return retval; } /* @@ -876,10 +763,10 @@ restart: goto out; blk = &state->path.blk[ state->path.active-1 ]; ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC); - if ((args->flags & ATTR_REPLACE) && (retval == -ENOATTR)) { + if (retval == -ENOATTR && (args->attr_flags & XATTR_REPLACE)) goto out; - } else if (retval == -EEXIST) { - if (args->flags & ATTR_CREATE) + if (retval == -EEXIST) { + if (args->attr_flags & XATTR_CREATE) goto out; trace_xfs_attr_node_replace(args); @@ -1011,7 +898,7 @@ restart: * The INCOMPLETE flag means that we will find the "old" * attr, not the "new" one. */ - args->op_flags |= XFS_DA_OP_INCOMPLETE; + args->attr_filter |= XFS_ATTR_INCOMPLETE; state = xfs_da_state_alloc(); state->args = args; state->mp = mp; diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h index 4243b2272642..0d2d05908537 100644 --- a/fs/xfs/libxfs/xfs_attr.h +++ b/fs/xfs/libxfs/xfs_attr.h @@ -21,39 +21,6 @@ struct xfs_attr_list_context; * as possible so as to fit into the literal area of the inode. */ -/*======================================================================== - * External interfaces - *========================================================================*/ - - -#define ATTR_DONTFOLLOW 0x0001 /* -- ignored, from IRIX -- */ -#define ATTR_ROOT 0x0002 /* use attrs in root (trusted) namespace */ -#define ATTR_TRUST 0x0004 /* -- unused, from IRIX -- */ -#define ATTR_SECURE 0x0008 /* use attrs in security namespace */ -#define ATTR_CREATE 0x0010 /* pure create: fail if attr already exists */ -#define ATTR_REPLACE 0x0020 /* pure set: fail if attr does not exist */ - -#define ATTR_KERNOTIME 0x1000 /* [kernel] don't update inode timestamps */ -#define ATTR_KERNOVAL 0x2000 /* [kernel] get attr size only, not value */ - -#define ATTR_INCOMPLETE 0x4000 /* [kernel] return INCOMPLETE attr keys */ -#define ATTR_ALLOC 0x8000 /* [kernel] allocate xattr buffer on demand */ - -#define ATTR_KERNEL_FLAGS \ - (ATTR_KERNOTIME | ATTR_KERNOVAL | ATTR_INCOMPLETE | ATTR_ALLOC) - -#define XFS_ATTR_FLAGS \ - { ATTR_DONTFOLLOW, "DONTFOLLOW" }, \ - { ATTR_ROOT, "ROOT" }, \ - { ATTR_TRUST, "TRUST" }, \ - { ATTR_SECURE, "SECURE" }, \ - { ATTR_CREATE, "CREATE" }, \ - { ATTR_REPLACE, "REPLACE" }, \ - { ATTR_KERNOTIME, "KERNOTIME" }, \ - { ATTR_KERNOVAL, "KERNOVAL" }, \ - { ATTR_INCOMPLETE, "INCOMPLETE" }, \ - { ATTR_ALLOC, "ALLOC" } - /* * The maximum size (into the kernel or returned from the kernel) of an * attribute value or the buffer used for an attr_list() call. Larger @@ -62,45 +29,16 @@ struct xfs_attr_list_context; #define ATTR_MAX_VALUELEN (64*1024) /* max length of a value */ /* - * Define how lists of attribute names are returned to the user from - * the attr_list() call. A large, 32bit aligned, buffer is passed in - * along with its size. We put an array of offsets at the top that each - * reference an attrlist_ent_t and pack the attrlist_ent_t's at the bottom. - */ -typedef struct attrlist { - __s32 al_count; /* number of entries in attrlist */ - __s32 al_more; /* T/F: more attrs (do call again) */ - __s32 al_offset[1]; /* byte offsets of attrs [var-sized] */ -} attrlist_t; - -/* - * Show the interesting info about one attribute. This is what the - * al_offset[i] entry points to. - */ -typedef struct attrlist_ent { /* data from attr_list() */ - __u32 a_valuelen; /* number bytes in value of attr */ - char a_name[1]; /* attr name (NULL terminated) */ -} attrlist_ent_t; - -/* - * Given a pointer to the (char*) buffer containing the attr_list() result, - * and an index, return a pointer to the indicated attribute in the buffer. - */ -#define ATTR_ENTRY(buffer, index) \ - ((attrlist_ent_t *) \ - &((char *)buffer)[ ((attrlist_t *)(buffer))->al_offset[index] ]) - -/* * Kernel-internal version of the attrlist cursor. */ -typedef struct attrlist_cursor_kern { +struct xfs_attrlist_cursor_kern { __u32 hashval; /* hash value of next entry to add */ __u32 blkno; /* block containing entry (suggestion) */ __u32 offset; /* offset in list of equal-hashvals */ __u16 pad1; /* padding to match user-level */ __u8 pad2; /* padding to match user-level */ __u8 initted; /* T/F: cursor has been initialized */ -} attrlist_cursor_kern_t; +}; /*======================================================================== @@ -112,27 +50,28 @@ typedef struct attrlist_cursor_kern { typedef void (*put_listent_func_t)(struct xfs_attr_list_context *, int, unsigned char *, int, int); -typedef struct xfs_attr_list_context { - struct xfs_trans *tp; - struct xfs_inode *dp; /* inode */ - struct attrlist_cursor_kern *cursor; /* position in list */ - char *alist; /* output buffer */ +struct xfs_attr_list_context { + struct xfs_trans *tp; + struct xfs_inode *dp; /* inode */ + struct xfs_attrlist_cursor_kern cursor; /* position in list */ + void *buffer; /* output buffer */ /* * Abort attribute list iteration if non-zero. Can be used to pass * error values to the xfs_attr_list caller. */ - int seen_enough; + int seen_enough; + bool allow_incomplete; - ssize_t count; /* num used entries */ - int dupcnt; /* count dup hashvals seen */ - int bufsize; /* total buffer size */ - int firstu; /* first used byte in buffer */ - int flags; /* from VOP call */ - int resynch; /* T/F: resynch with cursor */ - put_listent_func_t put_listent; /* list output fmt function */ - int index; /* index into output buffer */ -} xfs_attr_list_context_t; + ssize_t count; /* num used entries */ + int dupcnt; /* count dup hashvals seen */ + int bufsize; /* total buffer size */ + int firstu; /* first used byte in buffer */ + unsigned int attr_filter; /* XFS_ATTR_{ROOT,SECURE} */ + int resynch; /* T/F: resynch with cursor */ + put_listent_func_t put_listent; /* list output fmt function */ + int index; /* index into output buffer */ +}; /*======================================================================== @@ -143,21 +82,14 @@ typedef struct xfs_attr_list_context { * Overall external interface routines. */ int xfs_attr_inactive(struct xfs_inode *dp); -int xfs_attr_list_int_ilocked(struct xfs_attr_list_context *); -int xfs_attr_list_int(struct xfs_attr_list_context *); +int xfs_attr_list_ilocked(struct xfs_attr_list_context *); +int xfs_attr_list(struct xfs_attr_list_context *); int xfs_inode_hasattr(struct xfs_inode *ip); -int xfs_attr_get_ilocked(struct xfs_inode *ip, struct xfs_da_args *args); -int xfs_attr_get(struct xfs_inode *ip, const unsigned char *name, - size_t namelen, unsigned char **value, int *valuelenp, - int flags); -int xfs_attr_set(struct xfs_inode *dp, const unsigned char *name, - size_t namelen, unsigned char *value, int valuelen, int flags); +int xfs_attr_get_ilocked(struct xfs_da_args *args); +int xfs_attr_get(struct xfs_da_args *args); +int xfs_attr_set(struct xfs_da_args *args); int xfs_attr_set_args(struct xfs_da_args *args); -int xfs_attr_remove(struct xfs_inode *dp, const unsigned char *name, - size_t namelen, int flags); int xfs_attr_remove_args(struct xfs_da_args *args); -int xfs_attr_list(struct xfs_inode *dp, char *buffer, int bufsize, - int flags, struct attrlist_cursor_kern *cursor); bool xfs_attr_namecheck(const void *name, size_t length); #endif /* __XFS_ATTR_H__ */ diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index fed537a4353d..863444e2dda7 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -445,14 +445,25 @@ xfs_attr3_leaf_read( * Namespace helper routines *========================================================================*/ -/* - * If namespace bits don't match return 0. - * If all match then return 1. - */ -STATIC int -xfs_attr_namesp_match(int arg_flags, int ondisk_flags) +static bool +xfs_attr_match( + struct xfs_da_args *args, + uint8_t namelen, + unsigned char *name, + int flags) { - return XFS_ATTR_NSP_ONDISK(ondisk_flags) == XFS_ATTR_NSP_ARGS_TO_ONDISK(arg_flags); + if (args->namelen != namelen) + return false; + if (memcmp(args->name, name, namelen) != 0) + return false; + /* + * If we are looking for incomplete entries, show only those, else only + * show complete entries. + */ + if (args->attr_filter != + (flags & (XFS_ATTR_NSP_ONDISK_MASK | XFS_ATTR_INCOMPLETE))) + return false; + return true; } static int @@ -464,7 +475,7 @@ xfs_attr_copy_value( /* * No copy if all we have to do is get the length */ - if (args->flags & ATTR_KERNOVAL) { + if (!args->valuelen) { args->valuelen = valuelen; return 0; } @@ -477,7 +488,7 @@ xfs_attr_copy_value( return -ERANGE; } - if (args->op_flags & XFS_DA_OP_ALLOCVAL) { + if (!args->value) { args->value = kmem_alloc_large(valuelen, 0); if (!args->value) return -ENOMEM; @@ -526,7 +537,7 @@ xfs_attr_shortform_bytesfit( int offset; /* rounded down */ - offset = (XFS_LITINO(mp, dp->i_d.di_version) - bytes) >> 3; + offset = (XFS_LITINO(mp) - bytes) >> 3; if (dp->i_d.di_format == XFS_DINODE_FMT_DEV) { minforkoff = roundup(sizeof(xfs_dev_t), 8) >> 3; @@ -593,8 +604,7 @@ xfs_attr_shortform_bytesfit( minforkoff = roundup(minforkoff, 8) >> 3; /* attr fork btree root can have at least this many key/ptr pairs */ - maxforkoff = XFS_LITINO(mp, dp->i_d.di_version) - - XFS_BMDR_SPACE_CALC(MINABTPTRS); + maxforkoff = XFS_LITINO(mp) - XFS_BMDR_SPACE_CALC(MINABTPTRS); maxforkoff = maxforkoff >> 3; /* rounded down */ if (offset >= maxforkoff) @@ -678,15 +688,8 @@ xfs_attr_shortform_add(xfs_da_args_t *args, int forkoff) sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data; sfe = &sf->list[0]; for (i = 0; i < sf->hdr.count; sfe = XFS_ATTR_SF_NEXTENTRY(sfe), i++) { -#ifdef DEBUG - if (sfe->namelen != args->namelen) - continue; - if (memcmp(args->name, sfe->nameval, args->namelen) != 0) - continue; - if (!xfs_attr_namesp_match(args->flags, sfe->flags)) - continue; - ASSERT(0); -#endif + ASSERT(!xfs_attr_match(args, sfe->namelen, sfe->nameval, + sfe->flags)); } offset = (char *)sfe - (char *)sf; @@ -697,7 +700,7 @@ xfs_attr_shortform_add(xfs_da_args_t *args, int forkoff) sfe->namelen = args->namelen; sfe->valuelen = args->valuelen; - sfe->flags = XFS_ATTR_NSP_ARGS_TO_ONDISK(args->flags); + sfe->flags = args->attr_filter; memcpy(sfe->nameval, args->name, args->namelen); memcpy(&sfe->nameval[args->namelen], args->value, args->valuelen); sf->hdr.count++; @@ -749,13 +752,9 @@ xfs_attr_shortform_remove(xfs_da_args_t *args) for (i = 0; i < end; sfe = XFS_ATTR_SF_NEXTENTRY(sfe), base += size, i++) { size = XFS_ATTR_SF_ENTSIZE(sfe); - if (sfe->namelen != args->namelen) - continue; - if (memcmp(sfe->nameval, args->name, args->namelen) != 0) - continue; - if (!xfs_attr_namesp_match(args->flags, sfe->flags)) - continue; - break; + if (xfs_attr_match(args, sfe->namelen, sfe->nameval, + sfe->flags)) + break; } if (i == end) return -ENOATTR; @@ -816,13 +815,9 @@ xfs_attr_shortform_lookup(xfs_da_args_t *args) sfe = &sf->list[0]; for (i = 0; i < sf->hdr.count; sfe = XFS_ATTR_SF_NEXTENTRY(sfe), i++) { - if (sfe->namelen != args->namelen) - continue; - if (memcmp(args->name, sfe->nameval, args->namelen) != 0) - continue; - if (!xfs_attr_namesp_match(args->flags, sfe->flags)) - continue; - return -EEXIST; + if (xfs_attr_match(args, sfe->namelen, sfe->nameval, + sfe->flags)) + return -EEXIST; } return -ENOATTR; } @@ -830,9 +825,9 @@ xfs_attr_shortform_lookup(xfs_da_args_t *args) /* * Retrieve the attribute value and length. * - * If ATTR_KERNOVAL is specified, only the length needs to be returned. - * Unlike a lookup, we only return an error if the attribute does not - * exist or we can't retrieve the value. + * If args->valuelen is zero, only the length needs to be returned. Unlike a + * lookup, we only return an error if the attribute does not exist or we can't + * retrieve the value. */ int xfs_attr_shortform_getvalue( @@ -847,14 +842,10 @@ xfs_attr_shortform_getvalue( sfe = &sf->list[0]; for (i = 0; i < sf->hdr.count; sfe = XFS_ATTR_SF_NEXTENTRY(sfe), i++) { - if (sfe->namelen != args->namelen) - continue; - if (memcmp(args->name, sfe->nameval, args->namelen) != 0) - continue; - if (!xfs_attr_namesp_match(args->flags, sfe->flags)) - continue; - return xfs_attr_copy_value(args, &sfe->nameval[args->namelen], - sfe->valuelen); + if (xfs_attr_match(args, sfe->namelen, sfe->nameval, + sfe->flags)) + return xfs_attr_copy_value(args, + &sfe->nameval[args->namelen], sfe->valuelen); } return -ENOATTR; } @@ -918,7 +909,7 @@ xfs_attr_shortform_to_leaf( nargs.valuelen = sfe->valuelen; nargs.hashval = xfs_da_hashname(sfe->nameval, sfe->namelen); - nargs.flags = XFS_ATTR_NSP_ONDISK_TO_ARGS(sfe->flags); + nargs.attr_filter = sfe->flags & XFS_ATTR_NSP_ONDISK_MASK; error = xfs_attr3_leaf_lookup_int(bp, &nargs); /* set a->index */ ASSERT(error == -ENOATTR); error = xfs_attr3_leaf_add(bp, &nargs); @@ -1124,7 +1115,7 @@ xfs_attr3_leaf_to_shortform( nargs.value = &name_loc->nameval[nargs.namelen]; nargs.valuelen = be16_to_cpu(name_loc->valuelen); nargs.hashval = be32_to_cpu(entry->hashval); - nargs.flags = XFS_ATTR_NSP_ONDISK_TO_ARGS(entry->flags); + nargs.attr_filter = entry->flags & XFS_ATTR_NSP_ONDISK_MASK; xfs_attr_shortform_add(&nargs, forkoff); } error = 0; @@ -1449,8 +1440,9 @@ xfs_attr3_leaf_add_work( entry->nameidx = cpu_to_be16(ichdr->freemap[mapindex].base + ichdr->freemap[mapindex].size); entry->hashval = cpu_to_be32(args->hashval); - entry->flags = tmp ? XFS_ATTR_LOCAL : 0; - entry->flags |= XFS_ATTR_NSP_ARGS_TO_ONDISK(args->flags); + entry->flags = args->attr_filter; + if (tmp) + entry->flags |= XFS_ATTR_LOCAL; if (args->op_flags & XFS_DA_OP_RENAME) { entry->flags |= XFS_ATTR_INCOMPLETE; if ((args->blkno2 == args->blkno) && @@ -2346,7 +2338,7 @@ xfs_attr3_leaf_lookup_int( xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr, leaf); entries = xfs_attr3_leaf_entryp(leaf); if (ichdr.count >= args->geo->blksize / 8) { - xfs_buf_corruption_error(bp); + xfs_buf_mark_corrupt(bp); return -EFSCORRUPTED; } @@ -2365,11 +2357,11 @@ xfs_attr3_leaf_lookup_int( break; } if (!(probe >= 0 && (!ichdr.count || probe < ichdr.count))) { - xfs_buf_corruption_error(bp); + xfs_buf_mark_corrupt(bp); return -EFSCORRUPTED; } if (!(span <= 4 || be32_to_cpu(entry->hashval) == hashval)) { - xfs_buf_corruption_error(bp); + xfs_buf_mark_corrupt(bp); return -EFSCORRUPTED; } @@ -2399,33 +2391,17 @@ xfs_attr3_leaf_lookup_int( /* * GROT: Add code to remove incomplete entries. */ - /* - * If we are looking for INCOMPLETE entries, show only those. - * If we are looking for complete entries, show only those. - */ - if (!!(args->op_flags & XFS_DA_OP_INCOMPLETE) != - !!(entry->flags & XFS_ATTR_INCOMPLETE)) { - continue; - } if (entry->flags & XFS_ATTR_LOCAL) { name_loc = xfs_attr3_leaf_name_local(leaf, probe); - if (name_loc->namelen != args->namelen) - continue; - if (memcmp(args->name, name_loc->nameval, - args->namelen) != 0) - continue; - if (!xfs_attr_namesp_match(args->flags, entry->flags)) + if (!xfs_attr_match(args, name_loc->namelen, + name_loc->nameval, entry->flags)) continue; args->index = probe; return -EEXIST; } else { name_rmt = xfs_attr3_leaf_name_remote(leaf, probe); - if (name_rmt->namelen != args->namelen) - continue; - if (memcmp(args->name, name_rmt->name, - args->namelen) != 0) - continue; - if (!xfs_attr_namesp_match(args->flags, entry->flags)) + if (!xfs_attr_match(args, name_rmt->namelen, + name_rmt->name, entry->flags)) continue; args->index = probe; args->rmtvaluelen = be32_to_cpu(name_rmt->valuelen); @@ -2444,9 +2420,9 @@ xfs_attr3_leaf_lookup_int( * Get the value associated with an attribute name from a leaf attribute * list structure. * - * If ATTR_KERNOVAL is specified, only the length needs to be returned. - * Unlike a lookup, we only return an error if the attribute does not - * exist or we can't retrieve the value. + * If args->valuelen is zero, only the length needs to be returned. Unlike a + * lookup, we only return an error if the attribute does not exist or we can't + * retrieve the value. */ int xfs_attr3_leaf_getvalue( diff --git a/fs/xfs/libxfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h index 73615b1dd1a8..6dd2d937a42a 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.h +++ b/fs/xfs/libxfs/xfs_attr_leaf.h @@ -8,7 +8,6 @@ #define __XFS_ATTR_LEAF_H__ struct attrlist; -struct attrlist_cursor_kern; struct xfs_attr_list_context; struct xfs_da_args; struct xfs_da_state; diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c index 8b7f74b3bea2..01ad7f353e08 100644 --- a/fs/xfs/libxfs/xfs_attr_remote.c +++ b/fs/xfs/libxfs/xfs_attr_remote.c @@ -397,7 +397,7 @@ xfs_attr_rmtval_get( trace_xfs_attr_rmtval_get(args); - ASSERT(!(args->flags & ATTR_KERNOVAL)); + ASSERT(args->valuelen != 0); ASSERT(args->rmtvaluelen == args->valuelen); valuelen = args->rmtvaluelen; diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 9a6d7a84689a..fda13cd7add0 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -193,14 +193,12 @@ xfs_default_attroffset( struct xfs_mount *mp = ip->i_mount; uint offset; - if (mp->m_sb.sb_inodesize == 256) { - offset = XFS_LITINO(mp, ip->i_d.di_version) - - XFS_BMDR_SPACE_CALC(MINABTPTRS); - } else { + if (mp->m_sb.sb_inodesize == 256) + offset = XFS_LITINO(mp) - XFS_BMDR_SPACE_CALC(MINABTPTRS); + else offset = XFS_BMDR_SPACE_CALC(6 * MINABTPTRS); - } - ASSERT(offset < XFS_LITINO(mp, ip->i_d.di_version)); + ASSERT(offset < XFS_LITINO(mp)); return offset; } @@ -690,7 +688,7 @@ xfs_bmap_extents_to_btree( * Need a cursor. Can't allocate until bb_level is filled in. */ cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); - cur->bc_private.b.flags = wasdel ? XFS_BTCUR_BPRV_WASDEL : 0; + cur->bc_ino.flags = wasdel ? XFS_BTCUR_BMBT_WASDEL : 0; /* * Convert to a btree with two levels, one record in root. */ @@ -727,7 +725,7 @@ xfs_bmap_extents_to_btree( ASSERT(tp->t_firstblock == NULLFSBLOCK || args.agno >= XFS_FSB_TO_AGNO(mp, tp->t_firstblock)); tp->t_firstblock = args.fsbno; - cur->bc_private.b.allocated++; + cur->bc_ino.allocated++; ip->i_d.di_nblocks++; xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L); error = xfs_trans_get_buf(tp, mp->m_ddev_targp, @@ -953,7 +951,7 @@ xfs_bmap_add_attrfork_btree( xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); return -ENOSPC; } - cur->bc_private.b.allocated = 0; + cur->bc_ino.allocated = 0; xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); } return 0; @@ -980,7 +978,7 @@ xfs_bmap_add_attrfork_extents( error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0, flags, XFS_DATA_FORK); if (cur) { - cur->bc_private.b.allocated = 0; + cur->bc_ino.allocated = 0; xfs_btree_del_cursor(cur, error); } return error; @@ -1178,13 +1176,13 @@ xfs_iread_bmbt_block( { struct xfs_iread_state *ir = priv; struct xfs_mount *mp = cur->bc_mp; - struct xfs_inode *ip = cur->bc_private.b.ip; + struct xfs_inode *ip = cur->bc_ino.ip; struct xfs_btree_block *block; struct xfs_buf *bp; struct xfs_bmbt_rec *frp; xfs_extnum_t num_recs; xfs_extnum_t j; - int whichfork = cur->bc_private.b.whichfork; + int whichfork = cur->bc_ino.whichfork; block = xfs_btree_get_block(cur, level, &bp); @@ -1528,7 +1526,7 @@ xfs_bmap_add_extent_delay_real( ASSERT(!isnullstartblock(new->br_startblock)); ASSERT(!bma->cur || - (bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL)); + (bma->cur->bc_ino.flags & XFS_BTCUR_BMBT_WASDEL)); XFS_STATS_INC(mp, xs_add_exlist); @@ -1818,7 +1816,7 @@ xfs_bmap_add_extent_delay_real( temp = PREV.br_blockcount - new->br_blockcount; da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp), startblockval(PREV.br_startblock) - - (bma->cur ? bma->cur->bc_private.b.allocated : 0)); + (bma->cur ? bma->cur->bc_ino.allocated : 0)); PREV.br_startoff = new_endoff; PREV.br_blockcount = temp; @@ -1904,7 +1902,7 @@ xfs_bmap_add_extent_delay_real( temp = PREV.br_blockcount - new->br_blockcount; da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp), startblockval(PREV.br_startblock) - - (bma->cur ? bma->cur->bc_private.b.allocated : 0)); + (bma->cur ? bma->cur->bc_ino.allocated : 0)); PREV.br_startblock = nullstartblock(da_new); PREV.br_blockcount = temp; @@ -2025,8 +2023,8 @@ xfs_bmap_add_extent_delay_real( xfs_mod_delalloc(mp, (int64_t)da_new - da_old); if (bma->cur) { - da_new += bma->cur->bc_private.b.allocated; - bma->cur->bc_private.b.allocated = 0; + da_new += bma->cur->bc_ino.allocated; + bma->cur->bc_ino.allocated = 0; } /* adjust for changes in reserved delayed indirect blocks */ @@ -2573,7 +2571,7 @@ xfs_bmap_add_extent_unwritten_real( /* clear out the allocated field, done with it now in any case. */ if (cur) { - cur->bc_private.b.allocated = 0; + cur->bc_ino.allocated = 0; *curp = cur; } @@ -2752,7 +2750,7 @@ xfs_bmap_add_extent_hole_real( struct xfs_bmbt_irec old; ASSERT(!isnullstartblock(new->br_startblock)); - ASSERT(!cur || !(cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL)); + ASSERT(!cur || !(cur->bc_ino.flags & XFS_BTCUR_BMBT_WASDEL)); XFS_STATS_INC(mp, xs_add_exlist); @@ -2955,7 +2953,7 @@ xfs_bmap_add_extent_hole_real( /* clear out the allocated field, done with it now in any case. */ if (cur) - cur->bc_private.b.allocated = 0; + cur->bc_ino.allocated = 0; xfs_bmap_check_leaf_extents(cur, ip, whichfork); done: @@ -4187,8 +4185,8 @@ xfs_bmapi_allocate( bma->nallocs++; if (bma->cur) - bma->cur->bc_private.b.flags = - bma->wasdel ? XFS_BTCUR_BPRV_WASDEL : 0; + bma->cur->bc_ino.flags = + bma->wasdel ? XFS_BTCUR_BMBT_WASDEL : 0; bma->got.br_startoff = bma->offset; bma->got.br_startblock = bma->blkno; @@ -4709,7 +4707,7 @@ xfs_bmapi_remap( if (ifp->if_flags & XFS_IFBROOT) { cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); - cur->bc_private.b.flags = 0; + cur->bc_ino.flags = 0; } got.br_startoff = bno; @@ -5364,7 +5362,7 @@ __xfs_bunmapi( if (ifp->if_flags & XFS_IFBROOT) { ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE); cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); - cur->bc_private.b.flags = 0; + cur->bc_ino.flags = 0; } else cur = NULL; @@ -5620,7 +5618,7 @@ error0: xfs_trans_log_inode(tp, ip, logflags); if (cur) { if (!error) - cur->bc_private.b.allocated = 0; + cur->bc_ino.allocated = 0; xfs_btree_del_cursor(cur, error); } return error; @@ -5839,7 +5837,7 @@ xfs_bmap_collapse_extents( if (ifp->if_flags & XFS_IFBROOT) { cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); - cur->bc_private.b.flags = 0; + cur->bc_ino.flags = 0; } if (!xfs_iext_lookup_extent(ip, ifp, *next_fsb, &icur, &got)) { @@ -5956,7 +5954,7 @@ xfs_bmap_insert_extents( if (ifp->if_flags & XFS_IFBROOT) { cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); - cur->bc_private.b.flags = 0; + cur->bc_ino.flags = 0; } if (*next_fsb == NULLFSBLOCK) { @@ -6025,8 +6023,8 @@ del_cursor: * @split_fsb is a block where the extents is split. If split_fsb lies in a * hole or the first block of extents, just return 0. */ -STATIC int -xfs_bmap_split_extent_at( +int +xfs_bmap_split_extent( struct xfs_trans *tp, struct xfs_inode *ip, xfs_fileoff_t split_fsb) @@ -6074,7 +6072,7 @@ xfs_bmap_split_extent_at( if (ifp->if_flags & XFS_IFBROOT) { cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); - cur->bc_private.b.flags = 0; + cur->bc_ino.flags = 0; error = xfs_bmbt_lookup_eq(cur, &got, &i); if (error) goto del_cursor; @@ -6133,7 +6131,7 @@ xfs_bmap_split_extent_at( del_cursor: if (cur) { - cur->bc_private.b.allocated = 0; + cur->bc_ino.allocated = 0; xfs_btree_del_cursor(cur, error); } @@ -6142,34 +6140,6 @@ del_cursor: return error; } -int -xfs_bmap_split_extent( - struct xfs_inode *ip, - xfs_fileoff_t split_fsb) -{ - struct xfs_mount *mp = ip->i_mount; - struct xfs_trans *tp; - int error; - - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, - XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, 0, &tp); - if (error) - return error; - - xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); - - error = xfs_bmap_split_extent_at(tp, ip, split_fsb); - if (error) - goto out; - - return xfs_trans_commit(tp); - -out: - xfs_trans_cancel(tp); - return error; -} - /* Deferred mapping is only for real extents in the data fork. */ static bool xfs_bmap_is_update_needed( diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index 14d25e0b7d9c..f3259ad5c22c 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -222,7 +222,8 @@ int xfs_bmap_can_insert_extents(struct xfs_inode *ip, xfs_fileoff_t off, int xfs_bmap_insert_extents(struct xfs_trans *tp, struct xfs_inode *ip, xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb, bool *done, xfs_fileoff_t stop_fsb); -int xfs_bmap_split_extent(struct xfs_inode *ip, xfs_fileoff_t split_offset); +int xfs_bmap_split_extent(struct xfs_trans *tp, struct xfs_inode *ip, + xfs_fileoff_t split_offset); int xfs_bmapi_reserve_delalloc(struct xfs_inode *ip, int whichfork, xfs_fileoff_t off, xfs_filblks_t len, xfs_filblks_t prealloc, struct xfs_bmbt_irec *got, struct xfs_iext_cursor *cur, diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index ffe608d2a2d9..295a59cf8840 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -166,13 +166,13 @@ xfs_bmbt_dup_cursor( struct xfs_btree_cur *new; new = xfs_bmbt_init_cursor(cur->bc_mp, cur->bc_tp, - cur->bc_private.b.ip, cur->bc_private.b.whichfork); + cur->bc_ino.ip, cur->bc_ino.whichfork); /* * Copy the firstblock, dfops, and flags values, * since init cursor doesn't get them. */ - new->bc_private.b.flags = cur->bc_private.b.flags; + new->bc_ino.flags = cur->bc_ino.flags; return new; } @@ -183,12 +183,12 @@ xfs_bmbt_update_cursor( struct xfs_btree_cur *dst) { ASSERT((dst->bc_tp->t_firstblock != NULLFSBLOCK) || - (dst->bc_private.b.ip->i_d.di_flags & XFS_DIFLAG_REALTIME)); + (dst->bc_ino.ip->i_d.di_flags & XFS_DIFLAG_REALTIME)); - dst->bc_private.b.allocated += src->bc_private.b.allocated; + dst->bc_ino.allocated += src->bc_ino.allocated; dst->bc_tp->t_firstblock = src->bc_tp->t_firstblock; - src->bc_private.b.allocated = 0; + src->bc_ino.allocated = 0; } STATIC int @@ -205,8 +205,8 @@ xfs_bmbt_alloc_block( args.tp = cur->bc_tp; args.mp = cur->bc_mp; args.fsbno = cur->bc_tp->t_firstblock; - xfs_rmap_ino_bmbt_owner(&args.oinfo, cur->bc_private.b.ip->i_ino, - cur->bc_private.b.whichfork); + xfs_rmap_ino_bmbt_owner(&args.oinfo, cur->bc_ino.ip->i_ino, + cur->bc_ino.whichfork); if (args.fsbno == NULLFSBLOCK) { args.fsbno = be64_to_cpu(start->l); @@ -230,7 +230,7 @@ xfs_bmbt_alloc_block( } args.minlen = args.maxlen = args.prod = 1; - args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL; + args.wasdel = cur->bc_ino.flags & XFS_BTCUR_BMBT_WASDEL; if (!args.wasdel && args.tp->t_blk_res == 0) { error = -ENOSPC; goto error0; @@ -259,10 +259,10 @@ xfs_bmbt_alloc_block( ASSERT(args.len == 1); cur->bc_tp->t_firstblock = args.fsbno; - cur->bc_private.b.allocated++; - cur->bc_private.b.ip->i_d.di_nblocks++; - xfs_trans_log_inode(args.tp, cur->bc_private.b.ip, XFS_ILOG_CORE); - xfs_trans_mod_dquot_byino(args.tp, cur->bc_private.b.ip, + cur->bc_ino.allocated++; + cur->bc_ino.ip->i_d.di_nblocks++; + xfs_trans_log_inode(args.tp, cur->bc_ino.ip, XFS_ILOG_CORE); + xfs_trans_mod_dquot_byino(args.tp, cur->bc_ino.ip, XFS_TRANS_DQ_BCOUNT, 1L); new->l = cpu_to_be64(args.fsbno); @@ -280,12 +280,12 @@ xfs_bmbt_free_block( struct xfs_buf *bp) { struct xfs_mount *mp = cur->bc_mp; - struct xfs_inode *ip = cur->bc_private.b.ip; + struct xfs_inode *ip = cur->bc_ino.ip; struct xfs_trans *tp = cur->bc_tp; xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, XFS_BUF_ADDR(bp)); struct xfs_owner_info oinfo; - xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, cur->bc_private.b.whichfork); + xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, cur->bc_ino.whichfork); xfs_bmap_add_free(cur->bc_tp, fsbno, 1, &oinfo); ip->i_d.di_nblocks--; @@ -302,8 +302,8 @@ xfs_bmbt_get_minrecs( if (level == cur->bc_nlevels - 1) { struct xfs_ifork *ifp; - ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, - cur->bc_private.b.whichfork); + ifp = XFS_IFORK_PTR(cur->bc_ino.ip, + cur->bc_ino.whichfork); return xfs_bmbt_maxrecs(cur->bc_mp, ifp->if_broot_bytes, level == 0) / 2; @@ -320,8 +320,8 @@ xfs_bmbt_get_maxrecs( if (level == cur->bc_nlevels - 1) { struct xfs_ifork *ifp; - ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, - cur->bc_private.b.whichfork); + ifp = XFS_IFORK_PTR(cur->bc_ino.ip, + cur->bc_ino.whichfork); return xfs_bmbt_maxrecs(cur->bc_mp, ifp->if_broot_bytes, level == 0); @@ -347,7 +347,7 @@ xfs_bmbt_get_dmaxrecs( { if (level != cur->bc_nlevels - 1) return cur->bc_mp->m_bmap_dmxr[level != 0]; - return xfs_bmdr_maxrecs(cur->bc_private.b.forksize, level == 0); + return xfs_bmdr_maxrecs(cur->bc_ino.forksize, level == 0); } STATIC void @@ -566,11 +566,11 @@ xfs_bmbt_init_cursor( if (xfs_sb_version_hascrc(&mp->m_sb)) cur->bc_flags |= XFS_BTREE_CRC_BLOCKS; - cur->bc_private.b.forksize = XFS_IFORK_SIZE(ip, whichfork); - cur->bc_private.b.ip = ip; - cur->bc_private.b.allocated = 0; - cur->bc_private.b.flags = 0; - cur->bc_private.b.whichfork = whichfork; + cur->bc_ino.forksize = XFS_IFORK_SIZE(ip, whichfork); + cur->bc_ino.ip = ip; + cur->bc_ino.allocated = 0; + cur->bc_ino.flags = 0; + cur->bc_ino.whichfork = whichfork; return cur; } @@ -644,7 +644,7 @@ xfs_bmbt_change_owner( cur = xfs_bmbt_init_cursor(ip->i_mount, tp, ip, whichfork); if (!cur) return -ENOMEM; - cur->bc_private.b.flags |= XFS_BTCUR_BPRV_INVALID_OWNER; + cur->bc_ino.flags |= XFS_BTCUR_BMBT_INVALID_OWNER; error = xfs_btree_change_owner(cur, new_owner, buffer_list); xfs_btree_del_cursor(cur, error); diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index fd300dc93ca4..2d25bab68764 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -20,6 +20,7 @@ #include "xfs_trace.h" #include "xfs_alloc.h" #include "xfs_log.h" +#include "xfs_btree_staging.h" /* * Cursor allocation zone. @@ -214,7 +215,7 @@ xfs_btree_check_sptr( { if (level <= 0) return false; - return xfs_verify_agbno(cur->bc_mp, cur->bc_private.a.agno, agbno); + return xfs_verify_agbno(cur->bc_mp, cur->bc_ag.agno, agbno); } /* @@ -234,8 +235,8 @@ xfs_btree_check_ptr( return 0; xfs_err(cur->bc_mp, "Inode %llu fork %d: Corrupt btree %d pointer at level %d index %d.", - cur->bc_private.b.ip->i_ino, - cur->bc_private.b.whichfork, cur->bc_btnum, + cur->bc_ino.ip->i_ino, + cur->bc_ino.whichfork, cur->bc_btnum, level, index); } else { if (xfs_btree_check_sptr(cur, be32_to_cpu((&ptr->s)[index]), @@ -243,7 +244,7 @@ xfs_btree_check_ptr( return 0; xfs_err(cur->bc_mp, "AG %u: Corrupt btree %d pointer at level %d index %d.", - cur->bc_private.a.agno, cur->bc_btnum, + cur->bc_ag.agno, cur->bc_btnum, level, index); } @@ -378,10 +379,12 @@ xfs_btree_del_cursor( * allocated indirect blocks' accounting. */ ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP || - cur->bc_private.b.allocated == 0); + cur->bc_ino.allocated == 0); /* * Free the cursor. */ + if (unlikely(cur->bc_flags & XFS_BTREE_STAGING)) + kmem_free((void *)cur->bc_ops); kmem_cache_free(xfs_btree_cur_zone, cur); } @@ -642,6 +645,17 @@ xfs_btree_ptr_addr( ((char *)block + xfs_btree_ptr_offset(cur, n, level)); } +struct xfs_ifork * +xfs_btree_ifork_ptr( + struct xfs_btree_cur *cur) +{ + ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); + + if (cur->bc_flags & XFS_BTREE_STAGING) + return cur->bc_ino.ifake->if_fork; + return XFS_IFORK_PTR(cur->bc_ino.ip, cur->bc_ino.whichfork); +} + /* * Get the root block which is stored in the inode. * @@ -652,9 +666,8 @@ STATIC struct xfs_btree_block * xfs_btree_get_iroot( struct xfs_btree_cur *cur) { - struct xfs_ifork *ifp; + struct xfs_ifork *ifp = xfs_btree_ifork_ptr(cur); - ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, cur->bc_private.b.whichfork); return (struct xfs_btree_block *)ifp->if_broot; } @@ -881,13 +894,13 @@ xfs_btree_readahead_sblock( if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) { - xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno, + xfs_btree_reada_bufs(cur->bc_mp, cur->bc_ag.agno, left, 1, cur->bc_ops->buf_ops); rval++; } if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) { - xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno, + xfs_btree_reada_bufs(cur->bc_mp, cur->bc_ag.agno, right, 1, cur->bc_ops->buf_ops); rval++; } @@ -945,7 +958,7 @@ xfs_btree_ptr_to_daddr( *daddr = XFS_FSB_TO_DADDR(cur->bc_mp, fsbno); } else { agbno = be32_to_cpu(ptr->s); - *daddr = XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno, + *daddr = XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_ag.agno, agbno); } @@ -1014,7 +1027,7 @@ xfs_btree_ptr_is_null( return ptr->s == cpu_to_be32(NULLAGBLOCK); } -STATIC void +void xfs_btree_set_ptr_null( struct xfs_btree_cur *cur, union xfs_btree_ptr *ptr) @@ -1050,7 +1063,7 @@ xfs_btree_get_sibling( } } -STATIC void +void xfs_btree_set_sibling( struct xfs_btree_cur *cur, struct xfs_btree_block *block, @@ -1128,7 +1141,7 @@ xfs_btree_init_block( btnum, level, numrecs, owner, 0); } -STATIC void +void xfs_btree_init_block_cur( struct xfs_btree_cur *cur, struct xfs_buf *bp, @@ -1144,9 +1157,9 @@ xfs_btree_init_block_cur( * code. */ if (cur->bc_flags & XFS_BTREE_LONG_PTRS) - owner = cur->bc_private.b.ip->i_ino; + owner = cur->bc_ino.ip->i_ino; else - owner = cur->bc_private.a.agno; + owner = cur->bc_ag.agno; xfs_btree_init_block_int(cur->bc_mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn, cur->bc_btnum, level, numrecs, @@ -1220,7 +1233,7 @@ xfs_btree_set_refs( } } -STATIC int +int xfs_btree_get_buf_block( struct xfs_btree_cur *cur, union xfs_btree_ptr *ptr, @@ -1280,7 +1293,7 @@ xfs_btree_read_buf_block( /* * Copy keys from one btree block to another. */ -STATIC void +void xfs_btree_copy_keys( struct xfs_btree_cur *cur, union xfs_btree_key *dst_key, @@ -1308,11 +1321,11 @@ xfs_btree_copy_recs( /* * Copy block pointers from one btree block to another. */ -STATIC void +void xfs_btree_copy_ptrs( struct xfs_btree_cur *cur, union xfs_btree_ptr *dst_ptr, - union xfs_btree_ptr *src_ptr, + const union xfs_btree_ptr *src_ptr, int numptrs) { ASSERT(numptrs >= 0); @@ -1393,8 +1406,8 @@ xfs_btree_log_keys( xfs_btree_key_offset(cur, first), xfs_btree_key_offset(cur, last + 1) - 1); } else { - xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, - xfs_ilog_fbroot(cur->bc_private.b.whichfork)); + xfs_trans_log_inode(cur->bc_tp, cur->bc_ino.ip, + xfs_ilog_fbroot(cur->bc_ino.whichfork)); } } @@ -1436,8 +1449,8 @@ xfs_btree_log_ptrs( xfs_btree_ptr_offset(cur, first, level), xfs_btree_ptr_offset(cur, last + 1, level) - 1); } else { - xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, - xfs_ilog_fbroot(cur->bc_private.b.whichfork)); + xfs_trans_log_inode(cur->bc_tp, cur->bc_ino.ip, + xfs_ilog_fbroot(cur->bc_ino.whichfork)); } } @@ -1505,8 +1518,8 @@ xfs_btree_log_block( xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF); xfs_trans_log_buf(cur->bc_tp, bp, first, last); } else { - xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, - xfs_ilog_fbroot(cur->bc_private.b.whichfork)); + xfs_trans_log_inode(cur->bc_tp, cur->bc_ino.ip, + xfs_ilog_fbroot(cur->bc_ino.whichfork)); } } @@ -1743,10 +1756,10 @@ xfs_btree_lookup_get_block( /* Check the inode owner since the verifiers don't. */ if (xfs_sb_version_hascrc(&cur->bc_mp->m_sb) && - !(cur->bc_private.b.flags & XFS_BTCUR_BPRV_INVALID_OWNER) && + !(cur->bc_ino.flags & XFS_BTCUR_BMBT_INVALID_OWNER) && (cur->bc_flags & XFS_BTREE_LONG_PTRS) && be64_to_cpu((*blkp)->bb_u.l.bb_owner) != - cur->bc_private.b.ip->i_ino) + cur->bc_ino.ip->i_ino) goto out_bad; /* Did we get the level we were looking for? */ @@ -1762,7 +1775,7 @@ xfs_btree_lookup_get_block( out_bad: *blkp = NULL; - xfs_buf_corruption_error(bp); + xfs_buf_mark_corrupt(bp); xfs_trans_brelse(cur->bc_tp, bp); return -EFSCORRUPTED; } @@ -2938,9 +2951,9 @@ xfs_btree_new_iroot( xfs_btree_copy_ptrs(cur, pp, &nptr, 1); - xfs_iroot_realloc(cur->bc_private.b.ip, + xfs_iroot_realloc(cur->bc_ino.ip, 1 - xfs_btree_get_numrecs(cblock), - cur->bc_private.b.whichfork); + cur->bc_ino.whichfork); xfs_btree_setbuf(cur, level, cbp); @@ -2953,7 +2966,7 @@ xfs_btree_new_iroot( xfs_btree_log_ptrs(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs)); *logflags |= - XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork); + XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_ino.whichfork); *stat = 1; return 0; error0: @@ -3105,11 +3118,11 @@ xfs_btree_make_block_unfull( if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && level == cur->bc_nlevels - 1) { - struct xfs_inode *ip = cur->bc_private.b.ip; + struct xfs_inode *ip = cur->bc_ino.ip; if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) { /* A root block that can be made bigger. */ - xfs_iroot_realloc(ip, 1, cur->bc_private.b.whichfork); + xfs_iroot_realloc(ip, 1, cur->bc_ino.whichfork); *stat = 1; } else { /* A root block that needs replacing */ @@ -3455,8 +3468,8 @@ STATIC int xfs_btree_kill_iroot( struct xfs_btree_cur *cur) { - int whichfork = cur->bc_private.b.whichfork; - struct xfs_inode *ip = cur->bc_private.b.ip; + int whichfork = cur->bc_ino.whichfork; + struct xfs_inode *ip = cur->bc_ino.ip; struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); struct xfs_btree_block *block; struct xfs_btree_block *cblock; @@ -3514,8 +3527,8 @@ xfs_btree_kill_iroot( index = numrecs - cur->bc_ops->get_maxrecs(cur, level); if (index) { - xfs_iroot_realloc(cur->bc_private.b.ip, index, - cur->bc_private.b.whichfork); + xfs_iroot_realloc(cur->bc_ino.ip, index, + cur->bc_ino.whichfork); block = ifp->if_broot; } @@ -3544,7 +3557,7 @@ xfs_btree_kill_iroot( cur->bc_bufs[level - 1] = NULL; be16_add_cpu(&block->bb_level, -1); xfs_trans_log_inode(cur->bc_tp, ip, - XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork)); + XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_ino.whichfork)); cur->bc_nlevels--; out0: return 0; @@ -3712,8 +3725,8 @@ xfs_btree_delrec( */ if (level == cur->bc_nlevels - 1) { if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) { - xfs_iroot_realloc(cur->bc_private.b.ip, -1, - cur->bc_private.b.whichfork); + xfs_iroot_realloc(cur->bc_ino.ip, -1, + cur->bc_ino.whichfork); error = xfs_btree_kill_iroot(cur); if (error) diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h index 3eff7c321d43..8626c5a81aad 100644 --- a/fs/xfs/libxfs/xfs_btree.h +++ b/fs/xfs/libxfs/xfs_btree.h @@ -10,6 +10,7 @@ struct xfs_buf; struct xfs_inode; struct xfs_mount; struct xfs_trans; +struct xfs_ifork; extern kmem_zone_t *xfs_btree_cur_zone; @@ -177,15 +178,37 @@ union xfs_btree_irec { struct xfs_refcount_irec rc; }; -/* Per-AG btree private information. */ -union xfs_btree_cur_private { - struct { - unsigned long nr_ops; /* # record updates */ - int shape_changes; /* # of extent splits */ - } refc; - struct { - bool active; /* allocation cursor state */ - } abt; +/* Per-AG btree information. */ +struct xfs_btree_cur_ag { + union { + struct xfs_buf *agbp; + struct xbtree_afakeroot *afake; /* for staging cursor */ + }; + xfs_agnumber_t agno; + union { + struct { + unsigned long nr_ops; /* # record updates */ + int shape_changes; /* # of extent splits */ + } refc; + struct { + bool active; /* allocation cursor state */ + } abt; + }; +}; + +/* Btree-in-inode cursor information */ +struct xfs_btree_cur_ino { + struct xfs_inode *ip; + struct xbtree_ifakeroot *ifake; /* for staging cursor */ + int allocated; + short forksize; + char whichfork; + char flags; +/* We are converting a delalloc reservation */ +#define XFS_BTCUR_BMBT_WASDEL (1 << 0) + +/* For extent swap, ignore owner check in verifier */ +#define XFS_BTCUR_BMBT_INVALID_OWNER (1 << 1) }; /* @@ -209,21 +232,9 @@ typedef struct xfs_btree_cur xfs_btnum_t bc_btnum; /* identifies which btree type */ int bc_statoff; /* offset of btre stats array */ union { - struct { /* needed for BNO, CNT, INO */ - struct xfs_buf *agbp; /* agf/agi buffer pointer */ - xfs_agnumber_t agno; /* ag number */ - union xfs_btree_cur_private priv; - } a; - struct { /* needed for BMAP */ - struct xfs_inode *ip; /* pointer to our inode */ - int allocated; /* count of alloced */ - short forksize; /* fork's inode space */ - char whichfork; /* data or attr fork */ - char flags; /* flags */ -#define XFS_BTCUR_BPRV_WASDEL (1<<0) /* was delayed */ -#define XFS_BTCUR_BPRV_INVALID_OWNER (1<<1) /* for ext swap */ - } b; - } bc_private; /* per-btree type data */ + struct xfs_btree_cur_ag bc_ag; + struct xfs_btree_cur_ino bc_ino; + }; } xfs_btree_cur_t; /* cursor flags */ @@ -232,6 +243,12 @@ typedef struct xfs_btree_cur #define XFS_BTREE_LASTREC_UPDATE (1<<2) /* track last rec externally */ #define XFS_BTREE_CRC_BLOCKS (1<<3) /* uses extended btree blocks */ #define XFS_BTREE_OVERLAPPING (1<<4) /* overlapping intervals */ +/* + * The root of this btree is a fakeroot structure so that we can stage a btree + * rebuild without leaving it accessible via primary metadata. The ops struct + * is dynamically allocated and must be freed when the cursor is deleted. + */ +#define XFS_BTREE_STAGING (1<<5) #define XFS_BTREE_NOERROR 0 @@ -494,6 +511,7 @@ union xfs_btree_key *xfs_btree_high_key_from_key(struct xfs_btree_cur *cur, int xfs_btree_has_record(struct xfs_btree_cur *cur, union xfs_btree_irec *low, union xfs_btree_irec *high, bool *exists); bool xfs_btree_has_more_records(struct xfs_btree_cur *cur); +struct xfs_ifork *xfs_btree_ifork_ptr(struct xfs_btree_cur *cur); /* Does this cursor point to the last block in the given level? */ static inline bool @@ -512,4 +530,20 @@ xfs_btree_islastblock( return block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK); } +void xfs_btree_set_ptr_null(struct xfs_btree_cur *cur, + union xfs_btree_ptr *ptr); +int xfs_btree_get_buf_block(struct xfs_btree_cur *cur, union xfs_btree_ptr *ptr, + struct xfs_btree_block **block, struct xfs_buf **bpp); +void xfs_btree_set_sibling(struct xfs_btree_cur *cur, + struct xfs_btree_block *block, union xfs_btree_ptr *ptr, + int lr); +void xfs_btree_init_block_cur(struct xfs_btree_cur *cur, + struct xfs_buf *bp, int level, int numrecs); +void xfs_btree_copy_ptrs(struct xfs_btree_cur *cur, + union xfs_btree_ptr *dst_ptr, + const union xfs_btree_ptr *src_ptr, int numptrs); +void xfs_btree_copy_keys(struct xfs_btree_cur *cur, + union xfs_btree_key *dst_key, union xfs_btree_key *src_key, + int numkeys); + #endif /* __XFS_BTREE_H__ */ diff --git a/fs/xfs/libxfs/xfs_btree_staging.c b/fs/xfs/libxfs/xfs_btree_staging.c new file mode 100644 index 000000000000..f464a7c7cf22 --- /dev/null +++ b/fs/xfs/libxfs/xfs_btree_staging.c @@ -0,0 +1,879 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2020 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <darrick.wong@oracle.com> + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_bit.h" +#include "xfs_mount.h" +#include "xfs_inode.h" +#include "xfs_trans.h" +#include "xfs_btree.h" +#include "xfs_trace.h" +#include "xfs_btree_staging.h" + +/* + * Staging Cursors and Fake Roots for Btrees + * ========================================= + * + * A staging btree cursor is a special type of btree cursor that callers must + * use to construct a new btree index using the btree bulk loader code. The + * bulk loading code uses the staging btree cursor to abstract the details of + * initializing new btree blocks and filling them with records or key/ptr + * pairs. Regular btree operations (e.g. queries and modifications) are not + * supported with staging cursors, and callers must not invoke them. + * + * Fake root structures contain all the information about a btree that is under + * construction by the bulk loading code. Staging btree cursors point to fake + * root structures instead of the usual AG header or inode structure. + * + * Callers are expected to initialize a fake root structure and pass it into + * the _stage_cursor function for a specific btree type. When bulk loading is + * complete, callers should call the _commit_staged_btree function for that + * specific btree type to commit the new btree into the filesystem. + */ + +/* + * Don't allow staging cursors to be duplicated because they're supposed to be + * kept private to a single thread. + */ +STATIC struct xfs_btree_cur * +xfs_btree_fakeroot_dup_cursor( + struct xfs_btree_cur *cur) +{ + ASSERT(0); + return NULL; +} + +/* + * Don't allow block allocation for a staging cursor, because staging cursors + * do not support regular btree modifications. + * + * Bulk loading uses a separate callback to obtain new blocks from a + * preallocated list, which prevents ENOSPC failures during loading. + */ +STATIC int +xfs_btree_fakeroot_alloc_block( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *start_bno, + union xfs_btree_ptr *new_bno, + int *stat) +{ + ASSERT(0); + return -EFSCORRUPTED; +} + +/* + * Don't allow block freeing for a staging cursor, because staging cursors + * do not support regular btree modifications. + */ +STATIC int +xfs_btree_fakeroot_free_block( + struct xfs_btree_cur *cur, + struct xfs_buf *bp) +{ + ASSERT(0); + return -EFSCORRUPTED; +} + +/* Initialize a pointer to the root block from the fakeroot. */ +STATIC void +xfs_btree_fakeroot_init_ptr_from_cur( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *ptr) +{ + struct xbtree_afakeroot *afake; + + ASSERT(cur->bc_flags & XFS_BTREE_STAGING); + + afake = cur->bc_ag.afake; + ptr->s = cpu_to_be32(afake->af_root); +} + +/* + * Bulk Loading for AG Btrees + * ========================== + * + * For a btree rooted in an AG header, pass a xbtree_afakeroot structure to the + * staging cursor. Callers should initialize this to zero. + * + * The _stage_cursor() function for a specific btree type should call + * xfs_btree_stage_afakeroot to set up the in-memory cursor as a staging + * cursor. The corresponding _commit_staged_btree() function should log the + * new root and call xfs_btree_commit_afakeroot() to transform the staging + * cursor into a regular btree cursor. + */ + +/* Update the btree root information for a per-AG fake root. */ +STATIC void +xfs_btree_afakeroot_set_root( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *ptr, + int inc) +{ + struct xbtree_afakeroot *afake = cur->bc_ag.afake; + + ASSERT(cur->bc_flags & XFS_BTREE_STAGING); + afake->af_root = be32_to_cpu(ptr->s); + afake->af_levels += inc; +} + +/* + * Initialize a AG-rooted btree cursor with the given AG btree fake root. + * The btree cursor's bc_ops will be overridden as needed to make the staging + * functionality work. + */ +void +xfs_btree_stage_afakeroot( + struct xfs_btree_cur *cur, + struct xbtree_afakeroot *afake) +{ + struct xfs_btree_ops *nops; + + ASSERT(!(cur->bc_flags & XFS_BTREE_STAGING)); + ASSERT(!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)); + ASSERT(cur->bc_tp == NULL); + + nops = kmem_alloc(sizeof(struct xfs_btree_ops), KM_NOFS); + memcpy(nops, cur->bc_ops, sizeof(struct xfs_btree_ops)); + nops->alloc_block = xfs_btree_fakeroot_alloc_block; + nops->free_block = xfs_btree_fakeroot_free_block; + nops->init_ptr_from_cur = xfs_btree_fakeroot_init_ptr_from_cur; + nops->set_root = xfs_btree_afakeroot_set_root; + nops->dup_cursor = xfs_btree_fakeroot_dup_cursor; + + cur->bc_ag.afake = afake; + cur->bc_nlevels = afake->af_levels; + cur->bc_ops = nops; + cur->bc_flags |= XFS_BTREE_STAGING; +} + +/* + * Transform an AG-rooted staging btree cursor back into a regular cursor by + * substituting a real btree root for the fake one and restoring normal btree + * cursor ops. The caller must log the btree root change prior to calling + * this. + */ +void +xfs_btree_commit_afakeroot( + struct xfs_btree_cur *cur, + struct xfs_trans *tp, + struct xfs_buf *agbp, + const struct xfs_btree_ops *ops) +{ + ASSERT(cur->bc_flags & XFS_BTREE_STAGING); + ASSERT(cur->bc_tp == NULL); + + trace_xfs_btree_commit_afakeroot(cur); + + kmem_free((void *)cur->bc_ops); + cur->bc_ag.agbp = agbp; + cur->bc_ops = ops; + cur->bc_flags &= ~XFS_BTREE_STAGING; + cur->bc_tp = tp; +} + +/* + * Bulk Loading for Inode-Rooted Btrees + * ==================================== + * + * For a btree rooted in an inode fork, pass a xbtree_ifakeroot structure to + * the staging cursor. This structure should be initialized as follows: + * + * - if_fork_size field should be set to the number of bytes available to the + * fork in the inode. + * + * - if_fork should point to a freshly allocated struct xfs_ifork. + * + * - if_format should be set to the appropriate fork type (e.g. + * XFS_DINODE_FMT_BTREE). + * + * All other fields must be zero. + * + * The _stage_cursor() function for a specific btree type should call + * xfs_btree_stage_ifakeroot to set up the in-memory cursor as a staging + * cursor. The corresponding _commit_staged_btree() function should log the + * new root and call xfs_btree_commit_ifakeroot() to transform the staging + * cursor into a regular btree cursor. + */ + +/* + * Initialize an inode-rooted btree cursor with the given inode btree fake + * root. The btree cursor's bc_ops will be overridden as needed to make the + * staging functionality work. If new_ops is not NULL, these new ops will be + * passed out to the caller for further overriding. + */ +void +xfs_btree_stage_ifakeroot( + struct xfs_btree_cur *cur, + struct xbtree_ifakeroot *ifake, + struct xfs_btree_ops **new_ops) +{ + struct xfs_btree_ops *nops; + + ASSERT(!(cur->bc_flags & XFS_BTREE_STAGING)); + ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); + ASSERT(cur->bc_tp == NULL); + + nops = kmem_alloc(sizeof(struct xfs_btree_ops), KM_NOFS); + memcpy(nops, cur->bc_ops, sizeof(struct xfs_btree_ops)); + nops->alloc_block = xfs_btree_fakeroot_alloc_block; + nops->free_block = xfs_btree_fakeroot_free_block; + nops->init_ptr_from_cur = xfs_btree_fakeroot_init_ptr_from_cur; + nops->dup_cursor = xfs_btree_fakeroot_dup_cursor; + + cur->bc_ino.ifake = ifake; + cur->bc_nlevels = ifake->if_levels; + cur->bc_ops = nops; + cur->bc_flags |= XFS_BTREE_STAGING; + + if (new_ops) + *new_ops = nops; +} + +/* + * Transform an inode-rooted staging btree cursor back into a regular cursor by + * substituting a real btree root for the fake one and restoring normal btree + * cursor ops. The caller must log the btree root change prior to calling + * this. + */ +void +xfs_btree_commit_ifakeroot( + struct xfs_btree_cur *cur, + struct xfs_trans *tp, + int whichfork, + const struct xfs_btree_ops *ops) +{ + ASSERT(cur->bc_flags & XFS_BTREE_STAGING); + ASSERT(cur->bc_tp == NULL); + + trace_xfs_btree_commit_ifakeroot(cur); + + kmem_free((void *)cur->bc_ops); + cur->bc_ino.ifake = NULL; + cur->bc_ino.whichfork = whichfork; + cur->bc_ops = ops; + cur->bc_flags &= ~XFS_BTREE_STAGING; + cur->bc_tp = tp; +} + +/* + * Bulk Loading of Staged Btrees + * ============================= + * + * This interface is used with a staged btree cursor to create a totally new + * btree with a large number of records (i.e. more than what would fit in a + * single root block). When the creation is complete, the new root can be + * linked atomically into the filesystem by committing the staged cursor. + * + * Creation of a new btree proceeds roughly as follows: + * + * The first step is to initialize an appropriate fake btree root structure and + * then construct a staged btree cursor. Refer to the block comments about + * "Bulk Loading for AG Btrees" and "Bulk Loading for Inode-Rooted Btrees" for + * more information about how to do this. + * + * The second step is to initialize a struct xfs_btree_bload context as + * documented in the structure definition. + * + * The third step is to call xfs_btree_bload_compute_geometry to compute the + * height of and the number of blocks needed to construct the btree. See the + * section "Computing the Geometry of the New Btree" for details about this + * computation. + * + * In step four, the caller must allocate xfs_btree_bload.nr_blocks blocks and + * save them for later use by ->claim_block(). Bulk loading requires all + * blocks to be allocated beforehand to avoid ENOSPC failures midway through a + * rebuild, and to minimize seek distances of the new btree. + * + * Step five is to call xfs_btree_bload() to start constructing the btree. + * + * The final step is to commit the staging btree cursor, which logs the new + * btree root and turns the staging cursor into a regular cursor. The caller + * is responsible for cleaning up the previous btree blocks, if any. + * + * Computing the Geometry of the New Btree + * ======================================= + * + * The number of items placed in each btree block is computed via the following + * algorithm: For leaf levels, the number of items for the level is nr_records + * in the bload structure. For node levels, the number of items for the level + * is the number of blocks in the next lower level of the tree. For each + * level, the desired number of items per block is defined as: + * + * desired = max(minrecs, maxrecs - slack factor) + * + * The number of blocks for the level is defined to be: + * + * blocks = floor(nr_items / desired) + * + * Note this is rounded down so that the npb calculation below will never fall + * below minrecs. The number of items that will actually be loaded into each + * btree block is defined as: + * + * npb = nr_items / blocks + * + * Some of the leftmost blocks in the level will contain one extra record as + * needed to handle uneven division. If the number of records in any block + * would exceed maxrecs for that level, blocks is incremented and npb is + * recalculated. + * + * In other words, we compute the number of blocks needed to satisfy a given + * loading level, then spread the items as evenly as possible. + * + * The height and number of fs blocks required to create the btree are computed + * and returned via btree_height and nr_blocks. + */ + +/* + * Put a btree block that we're loading onto the ordered list and release it. + * The btree blocks will be written to disk when bulk loading is finished. + */ +static void +xfs_btree_bload_drop_buf( + struct list_head *buffers_list, + struct xfs_buf **bpp) +{ + if (*bpp == NULL) + return; + + if (!xfs_buf_delwri_queue(*bpp, buffers_list)) + ASSERT(0); + + xfs_buf_relse(*bpp); + *bpp = NULL; +} + +/* + * Allocate and initialize one btree block for bulk loading. + * + * The new btree block will have its level and numrecs fields set to the values + * of the level and nr_this_block parameters, respectively. + * + * The caller should ensure that ptrp, bpp, and blockp refer to the left + * sibling of the new block, if there is any. On exit, ptrp, bpp, and blockp + * will all point to the new block. + */ +STATIC int +xfs_btree_bload_prep_block( + struct xfs_btree_cur *cur, + struct xfs_btree_bload *bbl, + struct list_head *buffers_list, + unsigned int level, + unsigned int nr_this_block, + union xfs_btree_ptr *ptrp, /* in/out */ + struct xfs_buf **bpp, /* in/out */ + struct xfs_btree_block **blockp, /* in/out */ + void *priv) +{ + union xfs_btree_ptr new_ptr; + struct xfs_buf *new_bp; + struct xfs_btree_block *new_block; + int ret; + + if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && + level == cur->bc_nlevels - 1) { + struct xfs_ifork *ifp = xfs_btree_ifork_ptr(cur); + size_t new_size; + + ASSERT(*bpp == NULL); + + /* Allocate a new incore btree root block. */ + new_size = bbl->iroot_size(cur, nr_this_block, priv); + ifp->if_broot = kmem_zalloc(new_size, 0); + ifp->if_broot_bytes = (int)new_size; + ifp->if_flags |= XFS_IFBROOT; + + /* Initialize it and send it out. */ + xfs_btree_init_block_int(cur->bc_mp, ifp->if_broot, + XFS_BUF_DADDR_NULL, cur->bc_btnum, level, + nr_this_block, cur->bc_ino.ip->i_ino, + cur->bc_flags); + + *bpp = NULL; + *blockp = ifp->if_broot; + xfs_btree_set_ptr_null(cur, ptrp); + return 0; + } + + /* Claim one of the caller's preallocated blocks. */ + xfs_btree_set_ptr_null(cur, &new_ptr); + ret = bbl->claim_block(cur, &new_ptr, priv); + if (ret) + return ret; + + ASSERT(!xfs_btree_ptr_is_null(cur, &new_ptr)); + + ret = xfs_btree_get_buf_block(cur, &new_ptr, &new_block, &new_bp); + if (ret) + return ret; + + /* + * The previous block (if any) is the left sibling of the new block, + * so set its right sibling pointer to the new block and drop it. + */ + if (*blockp) + xfs_btree_set_sibling(cur, *blockp, &new_ptr, XFS_BB_RIGHTSIB); + xfs_btree_bload_drop_buf(buffers_list, bpp); + + /* Initialize the new btree block. */ + xfs_btree_init_block_cur(cur, new_bp, level, nr_this_block); + xfs_btree_set_sibling(cur, new_block, ptrp, XFS_BB_LEFTSIB); + + /* Set the out parameters. */ + *bpp = new_bp; + *blockp = new_block; + xfs_btree_copy_ptrs(cur, ptrp, &new_ptr, 1); + return 0; +} + +/* Load one leaf block. */ +STATIC int +xfs_btree_bload_leaf( + struct xfs_btree_cur *cur, + unsigned int recs_this_block, + xfs_btree_bload_get_record_fn get_record, + struct xfs_btree_block *block, + void *priv) +{ + unsigned int j; + int ret; + + /* Fill the leaf block with records. */ + for (j = 1; j <= recs_this_block; j++) { + union xfs_btree_rec *block_rec; + + ret = get_record(cur, priv); + if (ret) + return ret; + block_rec = xfs_btree_rec_addr(cur, j, block); + cur->bc_ops->init_rec_from_cur(cur, block_rec); + } + + return 0; +} + +/* + * Load one node block with key/ptr pairs. + * + * child_ptr must point to a block within the next level down in the tree. A + * key/ptr entry will be created in the new node block to the block pointed to + * by child_ptr. On exit, child_ptr points to the next block on the child + * level that needs processing. + */ +STATIC int +xfs_btree_bload_node( + struct xfs_btree_cur *cur, + unsigned int recs_this_block, + union xfs_btree_ptr *child_ptr, + struct xfs_btree_block *block) +{ + unsigned int j; + int ret; + + /* Fill the node block with keys and pointers. */ + for (j = 1; j <= recs_this_block; j++) { + union xfs_btree_key child_key; + union xfs_btree_ptr *block_ptr; + union xfs_btree_key *block_key; + struct xfs_btree_block *child_block; + struct xfs_buf *child_bp; + + ASSERT(!xfs_btree_ptr_is_null(cur, child_ptr)); + + ret = xfs_btree_get_buf_block(cur, child_ptr, &child_block, + &child_bp); + if (ret) + return ret; + + block_ptr = xfs_btree_ptr_addr(cur, j, block); + xfs_btree_copy_ptrs(cur, block_ptr, child_ptr, 1); + + block_key = xfs_btree_key_addr(cur, j, block); + xfs_btree_get_keys(cur, child_block, &child_key); + xfs_btree_copy_keys(cur, block_key, &child_key, 1); + + xfs_btree_get_sibling(cur, child_block, child_ptr, + XFS_BB_RIGHTSIB); + xfs_buf_relse(child_bp); + } + + return 0; +} + +/* + * Compute the maximum number of records (or keyptrs) per block that we want to + * install at this level in the btree. Caller is responsible for having set + * @cur->bc_ino.forksize to the desired fork size, if appropriate. + */ +STATIC unsigned int +xfs_btree_bload_max_npb( + struct xfs_btree_cur *cur, + struct xfs_btree_bload *bbl, + unsigned int level) +{ + unsigned int ret; + + if (level == cur->bc_nlevels - 1 && cur->bc_ops->get_dmaxrecs) + return cur->bc_ops->get_dmaxrecs(cur, level); + + ret = cur->bc_ops->get_maxrecs(cur, level); + if (level == 0) + ret -= bbl->leaf_slack; + else + ret -= bbl->node_slack; + return ret; +} + +/* + * Compute the desired number of records (or keyptrs) per block that we want to + * install at this level in the btree, which must be somewhere between minrecs + * and max_npb. The caller is free to install fewer records per block. + */ +STATIC unsigned int +xfs_btree_bload_desired_npb( + struct xfs_btree_cur *cur, + struct xfs_btree_bload *bbl, + unsigned int level) +{ + unsigned int npb = xfs_btree_bload_max_npb(cur, bbl, level); + + /* Root blocks are not subject to minrecs rules. */ + if (level == cur->bc_nlevels - 1) + return max(1U, npb); + + return max_t(unsigned int, cur->bc_ops->get_minrecs(cur, level), npb); +} + +/* + * Compute the number of records to be stored in each block at this level and + * the number of blocks for this level. For leaf levels, we must populate an + * empty root block even if there are no records, so we have to have at least + * one block. + */ +STATIC void +xfs_btree_bload_level_geometry( + struct xfs_btree_cur *cur, + struct xfs_btree_bload *bbl, + unsigned int level, + uint64_t nr_this_level, + unsigned int *avg_per_block, + uint64_t *blocks, + uint64_t *blocks_with_extra) +{ + uint64_t npb; + uint64_t dontcare; + unsigned int desired_npb; + unsigned int maxnr; + + maxnr = cur->bc_ops->get_maxrecs(cur, level); + + /* + * Compute the number of blocks we need to fill each block with the + * desired number of records/keyptrs per block. Because desired_npb + * could be minrecs, we use regular integer division (which rounds + * the block count down) so that in the next step the effective # of + * items per block will never be less than desired_npb. + */ + desired_npb = xfs_btree_bload_desired_npb(cur, bbl, level); + *blocks = div64_u64_rem(nr_this_level, desired_npb, &dontcare); + *blocks = max(1ULL, *blocks); + + /* + * Compute the number of records that we will actually put in each + * block, assuming that we want to spread the records evenly between + * the blocks. Take care that the effective # of items per block (npb) + * won't exceed maxrecs even for the blocks that get an extra record, + * since desired_npb could be maxrecs, and in the previous step we + * rounded the block count down. + */ + npb = div64_u64_rem(nr_this_level, *blocks, blocks_with_extra); + if (npb > maxnr || (npb == maxnr && *blocks_with_extra > 0)) { + (*blocks)++; + npb = div64_u64_rem(nr_this_level, *blocks, blocks_with_extra); + } + + *avg_per_block = min_t(uint64_t, npb, nr_this_level); + + trace_xfs_btree_bload_level_geometry(cur, level, nr_this_level, + *avg_per_block, desired_npb, *blocks, + *blocks_with_extra); +} + +/* + * Ensure a slack value is appropriate for the btree. + * + * If the slack value is negative, set slack so that we fill the block to + * halfway between minrecs and maxrecs. Make sure the slack is never so large + * that we can underflow minrecs. + */ +static void +xfs_btree_bload_ensure_slack( + struct xfs_btree_cur *cur, + int *slack, + int level) +{ + int maxr; + int minr; + + maxr = cur->bc_ops->get_maxrecs(cur, level); + minr = cur->bc_ops->get_minrecs(cur, level); + + /* + * If slack is negative, automatically set slack so that we load the + * btree block approximately halfway between minrecs and maxrecs. + * Generally, this will net us 75% loading. + */ + if (*slack < 0) + *slack = maxr - ((maxr + minr) >> 1); + + *slack = min(*slack, maxr - minr); +} + +/* + * Prepare a btree cursor for a bulk load operation by computing the geometry + * fields in bbl. Caller must ensure that the btree cursor is a staging + * cursor. This function can be called multiple times. + */ +int +xfs_btree_bload_compute_geometry( + struct xfs_btree_cur *cur, + struct xfs_btree_bload *bbl, + uint64_t nr_records) +{ + uint64_t nr_blocks = 0; + uint64_t nr_this_level; + + ASSERT(cur->bc_flags & XFS_BTREE_STAGING); + + /* + * Make sure that the slack values make sense for traditional leaf and + * node blocks. Inode-rooted btrees will return different minrecs and + * maxrecs values for the root block (bc_nlevels == level - 1). We're + * checking levels 0 and 1 here, so set bc_nlevels such that the btree + * code doesn't interpret either as the root level. + */ + cur->bc_nlevels = XFS_BTREE_MAXLEVELS - 1; + xfs_btree_bload_ensure_slack(cur, &bbl->leaf_slack, 0); + xfs_btree_bload_ensure_slack(cur, &bbl->node_slack, 1); + + bbl->nr_records = nr_this_level = nr_records; + for (cur->bc_nlevels = 1; cur->bc_nlevels < XFS_BTREE_MAXLEVELS;) { + uint64_t level_blocks; + uint64_t dontcare64; + unsigned int level = cur->bc_nlevels - 1; + unsigned int avg_per_block; + + xfs_btree_bload_level_geometry(cur, bbl, level, nr_this_level, + &avg_per_block, &level_blocks, &dontcare64); + + if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) { + /* + * If all the items we want to store at this level + * would fit in the inode root block, then we have our + * btree root and are done. + * + * Note that bmap btrees forbid records in the root. + */ + if (level != 0 && nr_this_level <= avg_per_block) { + nr_blocks++; + break; + } + + /* + * Otherwise, we have to store all the items for this + * level in traditional btree blocks and therefore need + * another level of btree to point to those blocks. + * + * We have to re-compute the geometry for each level of + * an inode-rooted btree because the geometry differs + * between a btree root in an inode fork and a + * traditional btree block. + * + * This distinction is made in the btree code based on + * whether level == bc_nlevels - 1. Based on the + * previous root block size check against the root + * block geometry, we know that we aren't yet ready to + * populate the root. Increment bc_nevels and + * recalculate the geometry for a traditional + * block-based btree level. + */ + cur->bc_nlevels++; + xfs_btree_bload_level_geometry(cur, bbl, level, + nr_this_level, &avg_per_block, + &level_blocks, &dontcare64); + } else { + /* + * If all the items we want to store at this level + * would fit in a single root block, we're done. + */ + if (nr_this_level <= avg_per_block) { + nr_blocks++; + break; + } + + /* Otherwise, we need another level of btree. */ + cur->bc_nlevels++; + } + + nr_blocks += level_blocks; + nr_this_level = level_blocks; + } + + if (cur->bc_nlevels == XFS_BTREE_MAXLEVELS) + return -EOVERFLOW; + + bbl->btree_height = cur->bc_nlevels; + if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) + bbl->nr_blocks = nr_blocks - 1; + else + bbl->nr_blocks = nr_blocks; + return 0; +} + +/* Bulk load a btree given the parameters and geometry established in bbl. */ +int +xfs_btree_bload( + struct xfs_btree_cur *cur, + struct xfs_btree_bload *bbl, + void *priv) +{ + struct list_head buffers_list; + union xfs_btree_ptr child_ptr; + union xfs_btree_ptr ptr; + struct xfs_buf *bp = NULL; + struct xfs_btree_block *block = NULL; + uint64_t nr_this_level = bbl->nr_records; + uint64_t blocks; + uint64_t i; + uint64_t blocks_with_extra; + uint64_t total_blocks = 0; + unsigned int avg_per_block; + unsigned int level = 0; + int ret; + + ASSERT(cur->bc_flags & XFS_BTREE_STAGING); + + INIT_LIST_HEAD(&buffers_list); + cur->bc_nlevels = bbl->btree_height; + xfs_btree_set_ptr_null(cur, &child_ptr); + xfs_btree_set_ptr_null(cur, &ptr); + + xfs_btree_bload_level_geometry(cur, bbl, level, nr_this_level, + &avg_per_block, &blocks, &blocks_with_extra); + + /* Load each leaf block. */ + for (i = 0; i < blocks; i++) { + unsigned int nr_this_block = avg_per_block; + + /* + * Due to rounding, btree blocks will not be evenly populated + * in most cases. blocks_with_extra tells us how many blocks + * will receive an extra record to distribute the excess across + * the current level as evenly as possible. + */ + if (i < blocks_with_extra) + nr_this_block++; + + ret = xfs_btree_bload_prep_block(cur, bbl, &buffers_list, level, + nr_this_block, &ptr, &bp, &block, priv); + if (ret) + goto out; + + trace_xfs_btree_bload_block(cur, level, i, blocks, &ptr, + nr_this_block); + + ret = xfs_btree_bload_leaf(cur, nr_this_block, bbl->get_record, + block, priv); + if (ret) + goto out; + + /* + * Record the leftmost leaf pointer so we know where to start + * with the first node level. + */ + if (i == 0) + xfs_btree_copy_ptrs(cur, &child_ptr, &ptr, 1); + } + total_blocks += blocks; + xfs_btree_bload_drop_buf(&buffers_list, &bp); + + /* Populate the internal btree nodes. */ + for (level = 1; level < cur->bc_nlevels; level++) { + union xfs_btree_ptr first_ptr; + + nr_this_level = blocks; + block = NULL; + xfs_btree_set_ptr_null(cur, &ptr); + + xfs_btree_bload_level_geometry(cur, bbl, level, nr_this_level, + &avg_per_block, &blocks, &blocks_with_extra); + + /* Load each node block. */ + for (i = 0; i < blocks; i++) { + unsigned int nr_this_block = avg_per_block; + + if (i < blocks_with_extra) + nr_this_block++; + + ret = xfs_btree_bload_prep_block(cur, bbl, + &buffers_list, level, nr_this_block, + &ptr, &bp, &block, priv); + if (ret) + goto out; + + trace_xfs_btree_bload_block(cur, level, i, blocks, + &ptr, nr_this_block); + + ret = xfs_btree_bload_node(cur, nr_this_block, + &child_ptr, block); + if (ret) + goto out; + + /* + * Record the leftmost node pointer so that we know + * where to start the next node level above this one. + */ + if (i == 0) + xfs_btree_copy_ptrs(cur, &first_ptr, &ptr, 1); + } + total_blocks += blocks; + xfs_btree_bload_drop_buf(&buffers_list, &bp); + xfs_btree_copy_ptrs(cur, &child_ptr, &first_ptr, 1); + } + + /* Initialize the new root. */ + if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) { + ASSERT(xfs_btree_ptr_is_null(cur, &ptr)); + cur->bc_ino.ifake->if_levels = cur->bc_nlevels; + cur->bc_ino.ifake->if_blocks = total_blocks - 1; + } else { + cur->bc_ag.afake->af_root = be32_to_cpu(ptr.s); + cur->bc_ag.afake->af_levels = cur->bc_nlevels; + cur->bc_ag.afake->af_blocks = total_blocks; + } + + /* + * Write the new blocks to disk. If the ordered list isn't empty after + * that, then something went wrong and we have to fail. This should + * never happen, but we'll check anyway. + */ + ret = xfs_buf_delwri_submit(&buffers_list); + if (ret) + goto out; + if (!list_empty(&buffers_list)) { + ASSERT(list_empty(&buffers_list)); + ret = -EIO; + } + +out: + xfs_buf_delwri_cancel(&buffers_list); + if (bp) + xfs_buf_relse(bp); + return ret; +} diff --git a/fs/xfs/libxfs/xfs_btree_staging.h b/fs/xfs/libxfs/xfs_btree_staging.h new file mode 100644 index 000000000000..643f0f9b2994 --- /dev/null +++ b/fs/xfs/libxfs/xfs_btree_staging.h @@ -0,0 +1,123 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2020 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <darrick.wong@oracle.com> + */ +#ifndef __XFS_BTREE_STAGING_H__ +#define __XFS_BTREE_STAGING_H__ + +/* Fake root for an AG-rooted btree. */ +struct xbtree_afakeroot { + /* AG block number of the new btree root. */ + xfs_agblock_t af_root; + + /* Height of the new btree. */ + unsigned int af_levels; + + /* Number of blocks used by the btree. */ + unsigned int af_blocks; +}; + +/* Cursor interactions with with fake roots for AG-rooted btrees. */ +void xfs_btree_stage_afakeroot(struct xfs_btree_cur *cur, + struct xbtree_afakeroot *afake); +void xfs_btree_commit_afakeroot(struct xfs_btree_cur *cur, struct xfs_trans *tp, + struct xfs_buf *agbp, const struct xfs_btree_ops *ops); + +/* Fake root for an inode-rooted btree. */ +struct xbtree_ifakeroot { + /* Fake inode fork. */ + struct xfs_ifork *if_fork; + + /* Number of blocks used by the btree. */ + int64_t if_blocks; + + /* Height of the new btree. */ + unsigned int if_levels; + + /* Number of bytes available for this fork in the inode. */ + unsigned int if_fork_size; + + /* Fork format. */ + unsigned int if_format; + + /* Number of records. */ + unsigned int if_extents; +}; + +/* Cursor interactions with with fake roots for inode-rooted btrees. */ +void xfs_btree_stage_ifakeroot(struct xfs_btree_cur *cur, + struct xbtree_ifakeroot *ifake, + struct xfs_btree_ops **new_ops); +void xfs_btree_commit_ifakeroot(struct xfs_btree_cur *cur, struct xfs_trans *tp, + int whichfork, const struct xfs_btree_ops *ops); + +/* Bulk loading of staged btrees. */ +typedef int (*xfs_btree_bload_get_record_fn)(struct xfs_btree_cur *cur, void *priv); +typedef int (*xfs_btree_bload_claim_block_fn)(struct xfs_btree_cur *cur, + union xfs_btree_ptr *ptr, void *priv); +typedef size_t (*xfs_btree_bload_iroot_size_fn)(struct xfs_btree_cur *cur, + unsigned int nr_this_level, void *priv); + +struct xfs_btree_bload { + /* + * This function will be called nr_records times to load records into + * the btree. The function does this by setting the cursor's bc_rec + * field in in-core format. Records must be returned in sort order. + */ + xfs_btree_bload_get_record_fn get_record; + + /* + * This function will be called nr_blocks times to obtain a pointer + * to a new btree block on disk. Callers must preallocate all space + * for the new btree before calling xfs_btree_bload, and this function + * is what claims that reservation. + */ + xfs_btree_bload_claim_block_fn claim_block; + + /* + * This function should return the size of the in-core btree root + * block. It is only necessary for XFS_BTREE_ROOT_IN_INODE btree + * types. + */ + xfs_btree_bload_iroot_size_fn iroot_size; + + /* + * The caller should set this to the number of records that will be + * stored in the new btree. + */ + uint64_t nr_records; + + /* + * Number of free records to leave in each leaf block. If the caller + * sets this to -1, the slack value will be calculated to be be halfway + * between maxrecs and minrecs. This typically leaves the block 75% + * full. Note that slack values are not enforced on inode root blocks. + */ + int leaf_slack; + + /* + * Number of free key/ptrs pairs to leave in each node block. This + * field has the same semantics as leaf_slack. + */ + int node_slack; + + /* + * The xfs_btree_bload_compute_geometry function will set this to the + * number of btree blocks needed to store nr_records records. + */ + uint64_t nr_blocks; + + /* + * The xfs_btree_bload_compute_geometry function will set this to the + * height of the new btree. + */ + unsigned int btree_height; +}; + +int xfs_btree_bload_compute_geometry(struct xfs_btree_cur *cur, + struct xfs_btree_bload *bbl, uint64_t nr_records); +int xfs_btree_bload(struct xfs_btree_cur *cur, struct xfs_btree_bload *bbl, + void *priv); + +#endif /* __XFS_BTREE_STAGING_H__ */ diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c index 875e04f82541..897749c41f36 100644 --- a/fs/xfs/libxfs/xfs_da_btree.c +++ b/fs/xfs/libxfs/xfs_da_btree.c @@ -590,7 +590,7 @@ xfs_da3_split( node = oldblk->bp->b_addr; if (node->hdr.info.forw) { if (be32_to_cpu(node->hdr.info.forw) != addblk->blkno) { - xfs_buf_corruption_error(oldblk->bp); + xfs_buf_mark_corrupt(oldblk->bp); error = -EFSCORRUPTED; goto out; } @@ -603,7 +603,7 @@ xfs_da3_split( node = oldblk->bp->b_addr; if (node->hdr.info.back) { if (be32_to_cpu(node->hdr.info.back) != addblk->blkno) { - xfs_buf_corruption_error(oldblk->bp); + xfs_buf_mark_corrupt(oldblk->bp); error = -EFSCORRUPTED; goto out; } @@ -1624,7 +1624,7 @@ xfs_da3_node_lookup_int( } if (magic != XFS_DA_NODE_MAGIC && magic != XFS_DA3_NODE_MAGIC) { - xfs_buf_corruption_error(blk->bp); + xfs_buf_mark_corrupt(blk->bp); return -EFSCORRUPTED; } @@ -1639,7 +1639,7 @@ xfs_da3_node_lookup_int( /* Tree taller than we can handle; bail out! */ if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) { - xfs_buf_corruption_error(blk->bp); + xfs_buf_mark_corrupt(blk->bp); return -EFSCORRUPTED; } @@ -1647,7 +1647,7 @@ xfs_da3_node_lookup_int( if (blkno == args->geo->leafblk) expected_level = nodehdr.level - 1; else if (expected_level != nodehdr.level) { - xfs_buf_corruption_error(blk->bp); + xfs_buf_mark_corrupt(blk->bp); return -EFSCORRUPTED; } else expected_level--; @@ -1986,7 +1986,8 @@ xfs_da3_path_shift( ASSERT(path != NULL); ASSERT((path->active > 0) && (path->active < XFS_DA_NODE_MAXDEPTH)); level = (path->active-1) - 1; /* skip bottom layer in path */ - for (blk = &path->blk[level]; level >= 0; blk--, level--) { + for (; level >= 0; level--) { + blk = &path->blk[level]; xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, blk->bp->b_addr); @@ -2520,8 +2521,10 @@ xfs_dabuf_map( */ if (nirecs > 1) { map = kmem_zalloc(nirecs * sizeof(struct xfs_buf_map), KM_NOFS); - if (!map) + if (!map) { + error = -ENOMEM; goto out_free_irecs; + } *mapp = map; } diff --git a/fs/xfs/libxfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h index 0f4fbb0889ff..53e503b6f186 100644 --- a/fs/xfs/libxfs/xfs_da_btree.h +++ b/fs/xfs/libxfs/xfs_da_btree.h @@ -57,9 +57,10 @@ typedef struct xfs_da_args { const uint8_t *name; /* string (maybe not NULL terminated) */ int namelen; /* length of string (maybe no NULL) */ uint8_t filetype; /* filetype of inode for directories */ - uint8_t *value; /* set of bytes (maybe contain NULLs) */ + void *value; /* set of bytes (maybe contain NULLs) */ int valuelen; /* length of value */ - int flags; /* argument flags (eg: ATTR_NOCREATE) */ + unsigned int attr_filter; /* XFS_ATTR_{ROOT,SECURE,INCOMPLETE} */ + unsigned int attr_flags; /* XATTR_{CREATE,REPLACE} */ xfs_dahash_t hashval; /* hash value of name */ xfs_ino_t inumber; /* input/output inode number */ struct xfs_inode *dp; /* directory inode to manipulate */ @@ -88,8 +89,7 @@ typedef struct xfs_da_args { #define XFS_DA_OP_ADDNAME 0x0004 /* this is an add operation */ #define XFS_DA_OP_OKNOENT 0x0008 /* lookup/add op, ENOENT ok, else die */ #define XFS_DA_OP_CILOOKUP 0x0010 /* lookup to return CI name if found */ -#define XFS_DA_OP_ALLOCVAL 0x0020 /* lookup to alloc buffer if found */ -#define XFS_DA_OP_INCOMPLETE 0x0040 /* lookup INCOMPLETE attr keys */ +#define XFS_DA_OP_NOTIME 0x0020 /* don't update inode timestamps */ #define XFS_DA_OP_FLAGS \ { XFS_DA_OP_JUSTCHECK, "JUSTCHECK" }, \ @@ -97,8 +97,7 @@ typedef struct xfs_da_args { { XFS_DA_OP_ADDNAME, "ADDNAME" }, \ { XFS_DA_OP_OKNOENT, "OKNOENT" }, \ { XFS_DA_OP_CILOOKUP, "CILOOKUP" }, \ - { XFS_DA_OP_ALLOCVAL, "ALLOCVAL" }, \ - { XFS_DA_OP_INCOMPLETE, "INCOMPLETE" } + { XFS_DA_OP_NOTIME, "NOTIME" } /* * Storage for holding state during Btree searches and split/join ops. diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h index 734837a9b51a..08c0a4d98b89 100644 --- a/fs/xfs/libxfs/xfs_da_format.h +++ b/fs/xfs/libxfs/xfs_da_format.h @@ -692,19 +692,7 @@ struct xfs_attr3_leafblock { #define XFS_ATTR_ROOT (1 << XFS_ATTR_ROOT_BIT) #define XFS_ATTR_SECURE (1 << XFS_ATTR_SECURE_BIT) #define XFS_ATTR_INCOMPLETE (1 << XFS_ATTR_INCOMPLETE_BIT) - -/* - * Conversion macros for converting namespace bits from argument flags - * to ondisk flags. - */ -#define XFS_ATTR_NSP_ARGS_MASK (ATTR_ROOT | ATTR_SECURE) #define XFS_ATTR_NSP_ONDISK_MASK (XFS_ATTR_ROOT | XFS_ATTR_SECURE) -#define XFS_ATTR_NSP_ONDISK(flags) ((flags) & XFS_ATTR_NSP_ONDISK_MASK) -#define XFS_ATTR_NSP_ARGS(flags) ((flags) & XFS_ATTR_NSP_ARGS_MASK) -#define XFS_ATTR_NSP_ARGS_TO_ONDISK(x) (((x) & ATTR_ROOT ? XFS_ATTR_ROOT : 0) |\ - ((x) & ATTR_SECURE ? XFS_ATTR_SECURE : 0)) -#define XFS_ATTR_NSP_ONDISK_TO_ARGS(x) (((x) & XFS_ATTR_ROOT ? ATTR_ROOT : 0) |\ - ((x) & XFS_ATTR_SECURE ? ATTR_SECURE : 0)) /* * Alignment for namelist and valuelist entries (since they are mixed diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c index d6ced59b9567..1dbf2f980a26 100644 --- a/fs/xfs/libxfs/xfs_dir2_block.c +++ b/fs/xfs/libxfs/xfs_dir2_block.c @@ -114,6 +114,23 @@ const struct xfs_buf_ops xfs_dir3_block_buf_ops = { .verify_struct = xfs_dir3_block_verify, }; +static xfs_failaddr_t +xfs_dir3_block_header_check( + struct xfs_inode *dp, + struct xfs_buf *bp) +{ + struct xfs_mount *mp = dp->i_mount; + + if (xfs_sb_version_hascrc(&mp->m_sb)) { + struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; + + if (be64_to_cpu(hdr3->owner) != dp->i_ino) + return __this_address; + } + + return NULL; +} + int xfs_dir3_block_read( struct xfs_trans *tp, @@ -121,12 +138,24 @@ xfs_dir3_block_read( struct xfs_buf **bpp) { struct xfs_mount *mp = dp->i_mount; + xfs_failaddr_t fa; int err; err = xfs_da_read_buf(tp, dp, mp->m_dir_geo->datablk, 0, bpp, XFS_DATA_FORK, &xfs_dir3_block_buf_ops); - if (!err && tp && *bpp) - xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_BLOCK_BUF); + if (err || !*bpp) + return err; + + /* Check things that we can't do in the verifier. */ + fa = xfs_dir3_block_header_check(dp, *bpp); + if (fa) { + __xfs_buf_mark_corrupt(*bpp, fa); + xfs_trans_brelse(tp, *bpp); + *bpp = NULL; + return -EFSCORRUPTED; + } + + xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_BLOCK_BUF); return err; } diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c index b9eba8213180..375b3edb2ad2 100644 --- a/fs/xfs/libxfs/xfs_dir2_data.c +++ b/fs/xfs/libxfs/xfs_dir2_data.c @@ -394,6 +394,22 @@ static const struct xfs_buf_ops xfs_dir3_data_reada_buf_ops = { .verify_write = xfs_dir3_data_write_verify, }; +static xfs_failaddr_t +xfs_dir3_data_header_check( + struct xfs_inode *dp, + struct xfs_buf *bp) +{ + struct xfs_mount *mp = dp->i_mount; + + if (xfs_sb_version_hascrc(&mp->m_sb)) { + struct xfs_dir3_data_hdr *hdr3 = bp->b_addr; + + if (be64_to_cpu(hdr3->hdr.owner) != dp->i_ino) + return __this_address; + } + + return NULL; +} int xfs_dir3_data_read( @@ -403,12 +419,24 @@ xfs_dir3_data_read( unsigned int flags, struct xfs_buf **bpp) { + xfs_failaddr_t fa; int err; err = xfs_da_read_buf(tp, dp, bno, flags, bpp, XFS_DATA_FORK, &xfs_dir3_data_buf_ops); - if (!err && tp && *bpp) - xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_DATA_BUF); + if (err || !*bpp) + return err; + + /* Check things that we can't do in the verifier. */ + fa = xfs_dir3_data_header_check(dp, *bpp); + if (fa) { + __xfs_buf_mark_corrupt(*bpp, fa); + xfs_trans_brelse(tp, *bpp); + *bpp = NULL; + return -EFSCORRUPTED; + } + + xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_DATA_BUF); return err; } diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c index a131b520aac7..95d2a3f92d75 100644 --- a/fs/xfs/libxfs/xfs_dir2_leaf.c +++ b/fs/xfs/libxfs/xfs_dir2_leaf.c @@ -1383,7 +1383,7 @@ xfs_dir2_leaf_removename( ltp = xfs_dir2_leaf_tail_p(geo, leaf); bestsp = xfs_dir2_leaf_bests_p(ltp); if (be16_to_cpu(bestsp[db]) != oldbest) { - xfs_buf_corruption_error(lbp); + xfs_buf_mark_corrupt(lbp); return -EFSCORRUPTED; } /* diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c index a0cc5e240306..6ac4aad98cd7 100644 --- a/fs/xfs/libxfs/xfs_dir2_node.c +++ b/fs/xfs/libxfs/xfs_dir2_node.c @@ -194,6 +194,8 @@ xfs_dir3_free_header_check( return __this_address; if (be32_to_cpu(hdr3->nvalid) < be32_to_cpu(hdr3->nused)) return __this_address; + if (be64_to_cpu(hdr3->hdr.owner) != dp->i_ino) + return __this_address; } else { struct xfs_dir2_free_hdr *hdr = bp->b_addr; @@ -226,8 +228,9 @@ __xfs_dir3_free_read( /* Check things that we can't do in the verifier. */ fa = xfs_dir3_free_header_check(dp, fbno, *bpp); if (fa) { - xfs_verifier_error(*bpp, -EFSCORRUPTED, fa); + __xfs_buf_mark_corrupt(*bpp, fa); xfs_trans_brelse(tp, *bpp); + *bpp = NULL; return -EFSCORRUPTED; } @@ -439,7 +442,7 @@ xfs_dir2_leaf_to_node( ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); if (be32_to_cpu(ltp->bestcount) > (uint)dp->i_d.di_size / args->geo->blksize) { - xfs_buf_corruption_error(lbp); + xfs_buf_mark_corrupt(lbp); return -EFSCORRUPTED; } @@ -513,7 +516,7 @@ xfs_dir2_leafn_add( * into other peoples memory */ if (index < 0) { - xfs_buf_corruption_error(bp); + xfs_buf_mark_corrupt(bp); return -EFSCORRUPTED; } @@ -800,7 +803,7 @@ xfs_dir2_leafn_lookup_for_entry( xfs_dir3_leaf_check(dp, bp); if (leafhdr.count <= 0) { - xfs_buf_corruption_error(bp); + xfs_buf_mark_corrupt(bp); return -EFSCORRUPTED; } diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index 77e9fa385980..045556e78ee2 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -497,6 +497,23 @@ static inline bool xfs_sb_version_hascrc(struct xfs_sb *sbp) return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5; } +/* + * v5 file systems support V3 inodes only, earlier file systems support + * v2 and v1 inodes. + */ +static inline bool xfs_sb_version_has_v3inode(struct xfs_sb *sbp) +{ + return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5; +} + +static inline bool xfs_dinode_good_version(struct xfs_sb *sbp, + uint8_t version) +{ + if (xfs_sb_version_has_v3inode(sbp)) + return version == 3; + return version == 1 || version == 2; +} + static inline bool xfs_sb_version_has_pquotino(struct xfs_sb *sbp) { return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5; @@ -560,7 +577,6 @@ xfs_is_quota_inode(struct xfs_sb *sbp, xfs_ino_t ino) #define XFS_SB_DADDR ((xfs_daddr_t)0) /* daddr in filesystem/ag */ #define XFS_SB_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_SB_DADDR) -#define XFS_BUF_TO_SBP(bp) ((xfs_dsb_t *)((bp)->b_addr)) #define XFS_HDR_BLOCK(mp,d) ((xfs_agblock_t)XFS_BB_TO_FSBT(mp,d)) #define XFS_DADDR_TO_FSB(mp,d) XFS_AGB_TO_FSB(mp, \ @@ -707,7 +723,6 @@ typedef struct xfs_agf { /* disk block (xfs_daddr_t) in the AG */ #define XFS_AGF_DADDR(mp) ((xfs_daddr_t)(1 << (mp)->m_sectbb_log)) #define XFS_AGF_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGF_DADDR(mp)) -#define XFS_BUF_TO_AGF(bp) ((xfs_agf_t *)((bp)->b_addr)) /* * Size of the unlinked inode hash table in the agi. @@ -775,7 +790,6 @@ typedef struct xfs_agi { /* disk block (xfs_daddr_t) in the AG */ #define XFS_AGI_DADDR(mp) ((xfs_daddr_t)(2 << (mp)->m_sectbb_log)) #define XFS_AGI_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGI_DADDR(mp)) -#define XFS_BUF_TO_AGI(bp) ((xfs_agi_t *)((bp)->b_addr)) /* * The third a.g. block contains the a.g. freelist, an array @@ -783,21 +797,15 @@ typedef struct xfs_agi { */ #define XFS_AGFL_DADDR(mp) ((xfs_daddr_t)(3 << (mp)->m_sectbb_log)) #define XFS_AGFL_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGFL_DADDR(mp)) -#define XFS_BUF_TO_AGFL(bp) ((xfs_agfl_t *)((bp)->b_addr)) +#define XFS_BUF_TO_AGFL(bp) ((struct xfs_agfl *)((bp)->b_addr)) -#define XFS_BUF_TO_AGFL_BNO(mp, bp) \ - (xfs_sb_version_hascrc(&((mp)->m_sb)) ? \ - &(XFS_BUF_TO_AGFL(bp)->agfl_bno[0]) : \ - (__be32 *)(bp)->b_addr) - -typedef struct xfs_agfl { +struct xfs_agfl { __be32 agfl_magicnum; __be32 agfl_seqno; uuid_t agfl_uuid; __be64 agfl_lsn; __be32 agfl_crc; - __be32 agfl_bno[]; /* actually xfs_agfl_size(mp) */ -} __attribute__((packed)) xfs_agfl_t; +} __attribute__((packed)); #define XFS_AGFL_CRC_OFF offsetof(struct xfs_agfl, agfl_crc) @@ -946,8 +954,12 @@ enum xfs_dinode_fmt { /* * Inode size for given fs. */ -#define XFS_LITINO(mp, version) \ - ((int)(((mp)->m_sb.sb_inodesize) - xfs_dinode_size(version))) +#define XFS_DINODE_SIZE(sbp) \ + (xfs_sb_version_has_v3inode(sbp) ? \ + sizeof(struct xfs_dinode) : \ + offsetof(struct xfs_dinode, di_crc)) +#define XFS_LITINO(mp) \ + ((mp)->m_sb.sb_inodesize - XFS_DINODE_SIZE(&(mp)->m_sb)) /* * Inode data & attribute fork sizes, per inode. @@ -956,13 +968,9 @@ enum xfs_dinode_fmt { #define XFS_DFORK_BOFF(dip) ((int)((dip)->di_forkoff << 3)) #define XFS_DFORK_DSIZE(dip,mp) \ - (XFS_DFORK_Q(dip) ? \ - XFS_DFORK_BOFF(dip) : \ - XFS_LITINO(mp, (dip)->di_version)) + (XFS_DFORK_Q(dip) ? XFS_DFORK_BOFF(dip) : XFS_LITINO(mp)) #define XFS_DFORK_ASIZE(dip,mp) \ - (XFS_DFORK_Q(dip) ? \ - XFS_LITINO(mp, (dip)->di_version) - XFS_DFORK_BOFF(dip) : \ - 0) + (XFS_DFORK_Q(dip) ? XFS_LITINO(mp) - XFS_DFORK_BOFF(dip) : 0) #define XFS_DFORK_SIZE(dip,mp,w) \ ((w) == XFS_DATA_FORK ? \ XFS_DFORK_DSIZE(dip, mp) : \ diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index ef95ca07d084..245188e4f6d3 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -568,10 +568,40 @@ typedef struct xfs_fsop_setdm_handlereq { struct fsdmidata __user *data; /* DMAPI data */ } xfs_fsop_setdm_handlereq_t; +/* + * Flags passed in xfs_attr_multiop.am_flags for the attr ioctl interface. + * + * NOTE: Must match the values declared in libattr without the XFS_IOC_ prefix. + */ +#define XFS_IOC_ATTR_ROOT 0x0002 /* use attrs in root namespace */ +#define XFS_IOC_ATTR_SECURE 0x0008 /* use attrs in security namespace */ +#define XFS_IOC_ATTR_CREATE 0x0010 /* fail if attr already exists */ +#define XFS_IOC_ATTR_REPLACE 0x0020 /* fail if attr does not exist */ + typedef struct xfs_attrlist_cursor { __u32 opaque[4]; } xfs_attrlist_cursor_t; +/* + * Define how lists of attribute names are returned to userspace from the + * XFS_IOC_ATTRLIST_BY_HANDLE ioctl. struct xfs_attrlist is the header at the + * beginning of the returned buffer, and a each entry in al_offset contains the + * relative offset of an xfs_attrlist_ent containing the actual entry. + * + * NOTE: struct xfs_attrlist must match struct attrlist defined in libattr, and + * struct xfs_attrlist_ent must match struct attrlist_ent defined in libattr. + */ +struct xfs_attrlist { + __s32 al_count; /* number of entries in attrlist */ + __s32 al_more; /* T/F: more attrs (do call again) */ + __s32 al_offset[1]; /* byte offsets of attrs [var-sized] */ +}; + +struct xfs_attrlist_ent { /* data from attr_list() */ + __u32 a_valuelen; /* number bytes in value of attr */ + char a_name[1]; /* attr name (NULL terminated) */ +}; + typedef struct xfs_fsop_attrlist_handlereq { struct xfs_fsop_handlereq hreq; /* handle interface structure */ struct xfs_attrlist_cursor pos; /* opaque cookie, list offset */ @@ -589,7 +619,7 @@ typedef struct xfs_attr_multiop { void __user *am_attrname; void __user *am_attrvalue; __u32 am_length; - __u32 am_flags; + __u32 am_flags; /* XFS_IOC_ATTR_* */ } xfs_attr_multiop_t; typedef struct xfs_fsop_attrmulti_handlereq { diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index bf161e930f1d..7fcf62b324b0 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -105,7 +105,7 @@ xfs_inobt_get_rec( int *stat) { struct xfs_mount *mp = cur->bc_mp; - xfs_agnumber_t agno = cur->bc_private.a.agno; + xfs_agnumber_t agno = cur->bc_ag.agno; union xfs_btree_rec *rec; int error; uint64_t realfree; @@ -177,7 +177,7 @@ xfs_inobt_insert( xfs_btnum_t btnum) { struct xfs_btree_cur *cur; - struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); + struct xfs_agi *agi = agbp->b_addr; xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno); xfs_agino_t thisino; int i; @@ -304,7 +304,7 @@ xfs_ialloc_inode_init( * That means for v3 inode we log the entire buffer rather than just the * inode cores. */ - if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (xfs_sb_version_has_v3inode(&mp->m_sb)) { version = 3; ino = XFS_AGINO_TO_INO(mp, agno, XFS_AGB_TO_AGINO(mp, agbno)); @@ -339,7 +339,7 @@ xfs_ialloc_inode_init( xfs_buf_zero(fbuf, 0, BBTOB(fbuf->b_length)); for (i = 0; i < M_IGEO(mp)->inodes_per_cluster; i++) { int ioffset = i << mp->m_sb.sb_inodelog; - uint isize = xfs_dinode_size(version); + uint isize = XFS_DINODE_SIZE(&mp->m_sb); free = xfs_make_iptr(mp, fbuf, i); free->di_magic = cpu_to_be16(XFS_DINODE_MAGIC); @@ -525,7 +525,7 @@ xfs_inobt_insert_sprec( bool merge) /* merge or replace */ { struct xfs_btree_cur *cur; - struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); + struct xfs_agi *agi = agbp->b_addr; xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno); int error; int i; @@ -658,7 +658,7 @@ xfs_ialloc_ag_alloc( * chunk of inodes. If the filesystem is striped, this will fill * an entire stripe unit with inodes. */ - agi = XFS_BUF_TO_AGI(agbp); + agi = agbp->b_addr; newino = be32_to_cpu(agi->agi_newino); agno = be32_to_cpu(agi->agi_seqno); args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) + @@ -1130,7 +1130,7 @@ xfs_dialloc_ag_inobt( xfs_ino_t *inop) { struct xfs_mount *mp = tp->t_mountp; - struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); + struct xfs_agi *agi = agbp->b_addr; xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno); xfs_agnumber_t pagno = XFS_INO_TO_AGNO(mp, parent); xfs_agino_t pagino = XFS_INO_TO_AGINO(mp, parent); @@ -1583,7 +1583,7 @@ xfs_dialloc_ag( xfs_ino_t *inop) { struct xfs_mount *mp = tp->t_mountp; - struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); + struct xfs_agi *agi = agbp->b_addr; xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno); xfs_agnumber_t pagno = XFS_INO_TO_AGNO(mp, parent); xfs_agino_t pagino = XFS_INO_TO_AGINO(mp, parent); @@ -1943,7 +1943,7 @@ xfs_difree_inobt( struct xfs_icluster *xic, struct xfs_inobt_rec_incore *orec) { - struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); + struct xfs_agi *agi = agbp->b_addr; xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno); struct xfs_perag *pag; struct xfs_btree_cur *cur; @@ -2079,7 +2079,7 @@ xfs_difree_finobt( xfs_agino_t agino, struct xfs_inobt_rec_incore *ibtrec) /* inobt record */ { - struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); + struct xfs_agi *agi = agbp->b_addr; xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno); struct xfs_btree_cur *cur; struct xfs_inobt_rec_incore rec; @@ -2489,9 +2489,8 @@ xfs_ialloc_log_agi( sizeof(xfs_agi_t) }; #ifdef DEBUG - xfs_agi_t *agi; /* allocation group header */ + struct xfs_agi *agi = bp->b_addr; - agi = XFS_BUF_TO_AGI(bp); ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); #endif @@ -2523,14 +2522,13 @@ xfs_agi_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_mount; - struct xfs_agi *agi = XFS_BUF_TO_AGI(bp); + struct xfs_agi *agi = bp->b_addr; int i; if (xfs_sb_version_hascrc(&mp->m_sb)) { if (!uuid_equal(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid)) return __this_address; - if (!xfs_log_check_lsn(mp, - be64_to_cpu(XFS_BUF_TO_AGI(bp)->agi_lsn))) + if (!xfs_log_check_lsn(mp, be64_to_cpu(agi->agi_lsn))) return __this_address; } @@ -2593,6 +2591,7 @@ xfs_agi_write_verify( { struct xfs_mount *mp = bp->b_mount; struct xfs_buf_log_item *bip = bp->b_log_item; + struct xfs_agi *agi = bp->b_addr; xfs_failaddr_t fa; fa = xfs_agi_verify(bp); @@ -2605,7 +2604,7 @@ xfs_agi_write_verify( return; if (bip) - XFS_BUF_TO_AGI(bp)->agi_lsn = cpu_to_be64(bip->bli_item.li_lsn); + agi->agi_lsn = cpu_to_be64(bip->bli_item.li_lsn); xfs_buf_update_cksum(bp, XFS_AGI_CRC_OFF); } @@ -2661,7 +2660,7 @@ xfs_ialloc_read_agi( if (error) return error; - agi = XFS_BUF_TO_AGI(*bpp); + agi = (*bpp)->b_addr; pag = xfs_perag_get(mp, agno); if (!pag->pagi_init) { pag->pagi_freecount = be32_to_cpu(agi->agi_freecount); @@ -2873,7 +2872,7 @@ xfs_ialloc_setup_geometry( * cannot change the behavior. */ igeo->inode_cluster_size_raw = XFS_INODE_BIG_CLUSTER_SIZE; - if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (xfs_sb_version_has_v3inode(&mp->m_sb)) { int new_size = igeo->inode_cluster_size_raw; new_size *= mp->m_sb.sb_inodesize / XFS_DINODE_MIN_SIZE; diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index b82992f795aa..b2c122ad8f0e 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c @@ -12,6 +12,7 @@ #include "xfs_bit.h" #include "xfs_mount.h" #include "xfs_btree.h" +#include "xfs_btree_staging.h" #include "xfs_ialloc.h" #include "xfs_ialloc_btree.h" #include "xfs_alloc.h" @@ -20,7 +21,6 @@ #include "xfs_trans.h" #include "xfs_rmap.h" - STATIC int xfs_inobt_get_minrecs( struct xfs_btree_cur *cur, @@ -34,7 +34,7 @@ xfs_inobt_dup_cursor( struct xfs_btree_cur *cur) { return xfs_inobt_init_cursor(cur->bc_mp, cur->bc_tp, - cur->bc_private.a.agbp, cur->bc_private.a.agno, + cur->bc_ag.agbp, cur->bc_ag.agno, cur->bc_btnum); } @@ -44,8 +44,8 @@ xfs_inobt_set_root( union xfs_btree_ptr *nptr, int inc) /* level change */ { - struct xfs_buf *agbp = cur->bc_private.a.agbp; - struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); + struct xfs_buf *agbp = cur->bc_ag.agbp; + struct xfs_agi *agi = agbp->b_addr; agi->agi_root = nptr->s; be32_add_cpu(&agi->agi_level, inc); @@ -58,8 +58,8 @@ xfs_finobt_set_root( union xfs_btree_ptr *nptr, int inc) /* level change */ { - struct xfs_buf *agbp = cur->bc_private.a.agbp; - struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); + struct xfs_buf *agbp = cur->bc_ag.agbp; + struct xfs_agi *agi = agbp->b_addr; agi->agi_free_root = nptr->s; be32_add_cpu(&agi->agi_free_level, inc); @@ -83,7 +83,7 @@ __xfs_inobt_alloc_block( args.tp = cur->bc_tp; args.mp = cur->bc_mp; args.oinfo = XFS_RMAP_OINFO_INOBT; - args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.a.agno, sbno); + args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_ag.agno, sbno); args.minlen = 1; args.maxlen = 1; args.prod = 1; @@ -212,9 +212,9 @@ xfs_inobt_init_ptr_from_cur( struct xfs_btree_cur *cur, union xfs_btree_ptr *ptr) { - struct xfs_agi *agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp); + struct xfs_agi *agi = cur->bc_ag.agbp->b_addr; - ASSERT(cur->bc_private.a.agno == be32_to_cpu(agi->agi_seqno)); + ASSERT(cur->bc_ag.agno == be32_to_cpu(agi->agi_seqno)); ptr->s = agi->agi_root; } @@ -224,9 +224,9 @@ xfs_finobt_init_ptr_from_cur( struct xfs_btree_cur *cur, union xfs_btree_ptr *ptr) { - struct xfs_agi *agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp); + struct xfs_agi *agi = cur->bc_ag.agbp->b_addr; - ASSERT(cur->bc_private.a.agno == be32_to_cpu(agi->agi_seqno)); + ASSERT(cur->bc_ag.agno == be32_to_cpu(agi->agi_seqno)); ptr->s = agi->agi_free_root; } @@ -400,32 +400,27 @@ static const struct xfs_btree_ops xfs_finobt_ops = { }; /* - * Allocate a new inode btree cursor. + * Initialize a new inode btree cursor. */ -struct xfs_btree_cur * /* new inode btree cursor */ -xfs_inobt_init_cursor( +static struct xfs_btree_cur * +xfs_inobt_init_common( struct xfs_mount *mp, /* file system mount point */ struct xfs_trans *tp, /* transaction pointer */ - struct xfs_buf *agbp, /* buffer for agi structure */ xfs_agnumber_t agno, /* allocation group number */ xfs_btnum_t btnum) /* ialloc or free ino btree */ { - struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); struct xfs_btree_cur *cur; cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_NOFS); - cur->bc_tp = tp; cur->bc_mp = mp; cur->bc_btnum = btnum; if (btnum == XFS_BTNUM_INO) { - cur->bc_nlevels = be32_to_cpu(agi->agi_level); - cur->bc_ops = &xfs_inobt_ops; cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_ibt_2); + cur->bc_ops = &xfs_inobt_ops; } else { - cur->bc_nlevels = be32_to_cpu(agi->agi_free_level); - cur->bc_ops = &xfs_finobt_ops; cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_fibt_2); + cur->bc_ops = &xfs_finobt_ops; } cur->bc_blocklog = mp->m_sb.sb_blocklog; @@ -433,12 +428,75 @@ xfs_inobt_init_cursor( if (xfs_sb_version_hascrc(&mp->m_sb)) cur->bc_flags |= XFS_BTREE_CRC_BLOCKS; - cur->bc_private.a.agbp = agbp; - cur->bc_private.a.agno = agno; + cur->bc_ag.agno = agno; + return cur; +} + +/* Create an inode btree cursor. */ +struct xfs_btree_cur * +xfs_inobt_init_cursor( + struct xfs_mount *mp, + struct xfs_trans *tp, + struct xfs_buf *agbp, + xfs_agnumber_t agno, + xfs_btnum_t btnum) +{ + struct xfs_btree_cur *cur; + struct xfs_agi *agi = agbp->b_addr; + cur = xfs_inobt_init_common(mp, tp, agno, btnum); + if (btnum == XFS_BTNUM_INO) + cur->bc_nlevels = be32_to_cpu(agi->agi_level); + else + cur->bc_nlevels = be32_to_cpu(agi->agi_free_level); + cur->bc_ag.agbp = agbp; return cur; } +/* Create an inode btree cursor with a fake root for staging. */ +struct xfs_btree_cur * +xfs_inobt_stage_cursor( + struct xfs_mount *mp, + struct xbtree_afakeroot *afake, + xfs_agnumber_t agno, + xfs_btnum_t btnum) +{ + struct xfs_btree_cur *cur; + + cur = xfs_inobt_init_common(mp, NULL, agno, btnum); + xfs_btree_stage_afakeroot(cur, afake); + return cur; +} + +/* + * Install a new inobt btree root. Caller is responsible for invalidating + * and freeing the old btree blocks. + */ +void +xfs_inobt_commit_staged_btree( + struct xfs_btree_cur *cur, + struct xfs_trans *tp, + struct xfs_buf *agbp) +{ + struct xfs_agi *agi = agbp->b_addr; + struct xbtree_afakeroot *afake = cur->bc_ag.afake; + + ASSERT(cur->bc_flags & XFS_BTREE_STAGING); + + if (cur->bc_btnum == XFS_BTNUM_INO) { + agi->agi_root = cpu_to_be32(afake->af_root); + agi->agi_level = cpu_to_be32(afake->af_levels); + xfs_ialloc_log_agi(tp, agbp, XFS_AGI_ROOT | XFS_AGI_LEVEL); + xfs_btree_commit_afakeroot(cur, tp, agbp, &xfs_inobt_ops); + } else { + agi->agi_free_root = cpu_to_be32(afake->af_root); + agi->agi_free_level = cpu_to_be32(afake->af_levels); + xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREE_ROOT | + XFS_AGI_FREE_LEVEL); + xfs_btree_commit_afakeroot(cur, tp, agbp, &xfs_finobt_ops); + } +} + /* * Calculate number of records in an inobt btree block. */ diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.h b/fs/xfs/libxfs/xfs_ialloc_btree.h index 951305ecaae1..35bbd978c272 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.h +++ b/fs/xfs/libxfs/xfs_ialloc_btree.h @@ -48,6 +48,9 @@ struct xfs_mount; extern struct xfs_btree_cur *xfs_inobt_init_cursor(struct xfs_mount *, struct xfs_trans *, struct xfs_buf *, xfs_agnumber_t, xfs_btnum_t); +struct xfs_btree_cur *xfs_inobt_stage_cursor(struct xfs_mount *mp, + struct xbtree_afakeroot *afake, xfs_agnumber_t agno, + xfs_btnum_t btnum); extern int xfs_inobt_maxrecs(struct xfs_mount *, int, int); /* ir_holemask to inode allocation bitmap conversion */ @@ -68,4 +71,7 @@ int xfs_inobt_cur(struct xfs_mount *mp, struct xfs_trans *tp, xfs_agnumber_t agno, xfs_btnum_t btnum, struct xfs_btree_cur **curpp, struct xfs_buf **agi_bpp); +void xfs_inobt_commit_staged_btree(struct xfs_btree_cur *cur, + struct xfs_trans *tp, struct xfs_buf *agbp); + #endif /* __XFS_IALLOC_BTREE_H__ */ diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index 8afacfe4be0a..39c5a6e24915 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -44,17 +44,6 @@ xfs_inobp_check( } #endif -bool -xfs_dinode_good_version( - struct xfs_mount *mp, - __u8 version) -{ - if (xfs_sb_version_hascrc(&mp->m_sb)) - return version == 3; - - return version == 1 || version == 2; -} - /* * If we are doing readahead on an inode buffer, we might be in log recovery * reading an inode allocation buffer that hasn't yet been replayed, and hence @@ -93,7 +82,7 @@ xfs_inode_buf_verify( dip = xfs_buf_offset(bp, (i << mp->m_sb.sb_inodelog)); unlinked_ino = be32_to_cpu(dip->di_next_unlinked); di_ok = xfs_verify_magic16(bp, dip->di_magic) && - xfs_dinode_good_version(mp, dip->di_version) && + xfs_dinode_good_version(&mp->m_sb, dip->di_version) && xfs_verify_agino_or_null(mp, agno, unlinked_ino); if (unlikely(XFS_TEST_ERROR(!di_ok, mp, XFS_ERRTAG_ITOBP_INOTOBP))) { @@ -205,16 +194,14 @@ xfs_inode_from_disk( struct xfs_icdinode *to = &ip->i_d; struct inode *inode = VFS_I(ip); - /* * Convert v1 inodes immediately to v2 inode format as this is the * minimum inode version format we support in the rest of the code. + * They will also be unconditionally written back to disk as v2 inodes. */ - to->di_version = from->di_version; - if (to->di_version == 1) { + if (unlikely(from->di_version == 1)) { set_nlink(inode, be16_to_cpu(from->di_onlink)); to->di_projid = 0; - to->di_version = 2; } else { set_nlink(inode, be32_to_cpu(from->di_nlink)); to->di_projid = (prid_t)be16_to_cpu(from->di_projid_hi) << 16 | @@ -222,8 +209,8 @@ xfs_inode_from_disk( } to->di_format = from->di_format; - to->di_uid = be32_to_cpu(from->di_uid); - to->di_gid = be32_to_cpu(from->di_gid); + i_uid_write(inode, be32_to_cpu(from->di_uid)); + i_gid_write(inode, be32_to_cpu(from->di_gid)); to->di_flushiter = be16_to_cpu(from->di_flushiter); /* @@ -252,7 +239,7 @@ xfs_inode_from_disk( to->di_dmstate = be16_to_cpu(from->di_dmstate); to->di_flags = be16_to_cpu(from->di_flags); - if (to->di_version == 3) { + if (xfs_sb_version_has_v3inode(&ip->i_mount->m_sb)) { inode_set_iversion_queried(inode, be64_to_cpu(from->di_changecount)); to->di_crtime.tv_sec = be32_to_cpu(from->di_crtime.t_sec); @@ -274,10 +261,9 @@ xfs_inode_to_disk( to->di_magic = cpu_to_be16(XFS_DINODE_MAGIC); to->di_onlink = 0; - to->di_version = from->di_version; to->di_format = from->di_format; - to->di_uid = cpu_to_be32(from->di_uid); - to->di_gid = cpu_to_be32(from->di_gid); + to->di_uid = cpu_to_be32(i_uid_read(inode)); + to->di_gid = cpu_to_be32(i_gid_read(inode)); to->di_projid_lo = cpu_to_be16(from->di_projid & 0xffff); to->di_projid_hi = cpu_to_be16(from->di_projid >> 16); @@ -303,7 +289,8 @@ xfs_inode_to_disk( to->di_dmstate = cpu_to_be16(from->di_dmstate); to->di_flags = cpu_to_be16(from->di_flags); - if (from->di_version == 3) { + if (xfs_sb_version_has_v3inode(&ip->i_mount->m_sb)) { + to->di_version = 3; to->di_changecount = cpu_to_be64(inode_peek_iversion(inode)); to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.tv_sec); to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.tv_nsec); @@ -315,6 +302,7 @@ xfs_inode_to_disk( uuid_copy(&to->di_uuid, &ip->i_mount->m_sb.sb_meta_uuid); to->di_flushiter = 0; } else { + to->di_version = 2; to->di_flushiter = cpu_to_be16(from->di_flushiter); } } @@ -428,7 +416,7 @@ xfs_dinode_verify_forkoff( case XFS_DINODE_FMT_LOCAL: /* fall through ... */ case XFS_DINODE_FMT_EXTENTS: /* fall through ... */ case XFS_DINODE_FMT_BTREE: - if (dip->di_forkoff >= (XFS_LITINO(mp, dip->di_version) >> 3)) + if (dip->di_forkoff >= (XFS_LITINO(mp) >> 3)) return __this_address; break; default: @@ -454,7 +442,7 @@ xfs_dinode_verify( /* Verify v3 integrity information first */ if (dip->di_version >= 3) { - if (!xfs_sb_version_hascrc(&mp->m_sb)) + if (!xfs_sb_version_has_v3inode(&mp->m_sb)) return __this_address; if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize, XFS_DINODE_CRC_OFF)) @@ -629,10 +617,9 @@ xfs_iread( /* shortcut IO on inode allocation if possible */ if ((iget_flags & XFS_IGET_CREATE) && - xfs_sb_version_hascrc(&mp->m_sb) && + xfs_sb_version_has_v3inode(&mp->m_sb) && !(mp->m_flags & XFS_MOUNT_IKEEP)) { VFS_I(ip)->i_generation = prandom_u32(); - ip->i_d.di_version = 3; return 0; } @@ -674,7 +661,6 @@ xfs_iread( * Partial initialisation of the in-core inode. Just the bits * that xfs_ialloc won't overwrite or relies on being correct. */ - ip->i_d.di_version = dip->di_version; VFS_I(ip)->i_generation = be32_to_cpu(dip->di_gen); ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter); @@ -688,7 +674,6 @@ xfs_iread( VFS_I(ip)->i_mode = 0; } - ASSERT(ip->i_d.di_version >= 2); ip->i_delayed_blks = 0; /* diff --git a/fs/xfs/libxfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h index fd94b1078722..9b373dcf9e34 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.h +++ b/fs/xfs/libxfs/xfs_inode_buf.h @@ -16,11 +16,8 @@ struct xfs_dinode; * format specific structures at the appropriate time. */ struct xfs_icdinode { - int8_t di_version; /* inode version */ int8_t di_format; /* format of di_c data */ uint16_t di_flushiter; /* incremented on flush */ - uint32_t di_uid; /* owner's user id */ - uint32_t di_gid; /* owner's group id */ uint32_t di_projid; /* owner's project id */ xfs_fsize_t di_size; /* number of bytes in file */ xfs_rfsblock_t di_nblocks; /* # of direct & btree blocks used */ @@ -61,8 +58,6 @@ void xfs_inode_from_disk(struct xfs_inode *ip, struct xfs_dinode *from); void xfs_log_dinode_to_disk(struct xfs_log_dinode *from, struct xfs_dinode *to); -bool xfs_dinode_good_version(struct xfs_mount *mp, __u8 version); - #if defined(DEBUG) void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *); #else diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c index ad2b9c313fd2..518c6f0ec3a6 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.c +++ b/fs/xfs/libxfs/xfs_inode_fork.c @@ -183,7 +183,7 @@ xfs_iformat_local( */ if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { xfs_warn(ip->i_mount, - "corrupt inode %Lu (bad size %d for local fork, size = %d).", + "corrupt inode %Lu (bad size %d for local fork, size = %zd).", (unsigned long long) ip->i_ino, size, XFS_DFORK_SIZE(dip, ip->i_mount, whichfork)); xfs_inode_verifier_error(ip, -EFSCORRUPTED, diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h index 500333d0101e..668ee942be22 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.h +++ b/fs/xfs/libxfs/xfs_inode_fork.h @@ -46,14 +46,9 @@ struct xfs_ifork { (ip)->i_afp : \ (ip)->i_cowfp)) #define XFS_IFORK_DSIZE(ip) \ - (XFS_IFORK_Q(ip) ? \ - XFS_IFORK_BOFF(ip) : \ - XFS_LITINO((ip)->i_mount, (ip)->i_d.di_version)) + (XFS_IFORK_Q(ip) ? XFS_IFORK_BOFF(ip) : XFS_LITINO((ip)->i_mount)) #define XFS_IFORK_ASIZE(ip) \ - (XFS_IFORK_Q(ip) ? \ - XFS_LITINO((ip)->i_mount, (ip)->i_d.di_version) - \ - XFS_IFORK_BOFF(ip) : \ - 0) + (XFS_IFORK_Q(ip) ? XFS_LITINO((ip)->i_mount) - XFS_IFORK_BOFF(ip) : 0) #define XFS_IFORK_SIZE(ip,w) \ ((w) == XFS_DATA_FORK ? \ XFS_IFORK_DSIZE(ip) : \ diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h index 9bac0d2e56dc..e3400c9c71cd 100644 --- a/fs/xfs/libxfs/xfs_log_format.h +++ b/fs/xfs/libxfs/xfs_log_format.h @@ -424,12 +424,10 @@ struct xfs_log_dinode { /* structure must be padded to 64 bit alignment */ }; -static inline uint xfs_log_dinode_size(int version) -{ - if (version == 3) - return sizeof(struct xfs_log_dinode); - return offsetof(struct xfs_log_dinode, di_next_unlinked); -} +#define xfs_log_dinode_size(mp) \ + (xfs_sb_version_has_v3inode(&(mp)->m_sb) ? \ + sizeof(struct xfs_log_dinode) : \ + offsetof(struct xfs_log_dinode, di_next_unlinked)) /* * Buffer Log Format definitions diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index 6e1665f2cb67..2076627243b0 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -46,7 +46,7 @@ xfs_refcount_lookup_le( xfs_agblock_t bno, int *stat) { - trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_private.a.agno, bno, + trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.agno, bno, XFS_LOOKUP_LE); cur->bc_rec.rc.rc_startblock = bno; cur->bc_rec.rc.rc_blockcount = 0; @@ -63,7 +63,7 @@ xfs_refcount_lookup_ge( xfs_agblock_t bno, int *stat) { - trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_private.a.agno, bno, + trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.agno, bno, XFS_LOOKUP_GE); cur->bc_rec.rc.rc_startblock = bno; cur->bc_rec.rc.rc_blockcount = 0; @@ -80,7 +80,7 @@ xfs_refcount_lookup_eq( xfs_agblock_t bno, int *stat) { - trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_private.a.agno, bno, + trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.agno, bno, XFS_LOOKUP_LE); cur->bc_rec.rc.rc_startblock = bno; cur->bc_rec.rc.rc_blockcount = 0; @@ -108,7 +108,7 @@ xfs_refcount_get_rec( int *stat) { struct xfs_mount *mp = cur->bc_mp; - xfs_agnumber_t agno = cur->bc_private.a.agno; + xfs_agnumber_t agno = cur->bc_ag.agno; union xfs_btree_rec *rec; int error; xfs_agblock_t realstart; @@ -119,7 +119,7 @@ xfs_refcount_get_rec( xfs_refcount_btrec_to_irec(rec, irec); - agno = cur->bc_private.a.agno; + agno = cur->bc_ag.agno; if (irec->rc_blockcount == 0 || irec->rc_blockcount > MAXREFCEXTLEN) goto out_bad_rec; @@ -144,7 +144,7 @@ xfs_refcount_get_rec( if (irec->rc_refcount == 0 || irec->rc_refcount > MAXREFCOUNT) goto out_bad_rec; - trace_xfs_refcount_get(cur->bc_mp, cur->bc_private.a.agno, irec); + trace_xfs_refcount_get(cur->bc_mp, cur->bc_ag.agno, irec); return 0; out_bad_rec: @@ -169,14 +169,14 @@ xfs_refcount_update( union xfs_btree_rec rec; int error; - trace_xfs_refcount_update(cur->bc_mp, cur->bc_private.a.agno, irec); + trace_xfs_refcount_update(cur->bc_mp, cur->bc_ag.agno, irec); rec.refc.rc_startblock = cpu_to_be32(irec->rc_startblock); rec.refc.rc_blockcount = cpu_to_be32(irec->rc_blockcount); rec.refc.rc_refcount = cpu_to_be32(irec->rc_refcount); error = xfs_btree_update(cur, &rec); if (error) trace_xfs_refcount_update_error(cur->bc_mp, - cur->bc_private.a.agno, error, _RET_IP_); + cur->bc_ag.agno, error, _RET_IP_); return error; } @@ -193,7 +193,7 @@ xfs_refcount_insert( { int error; - trace_xfs_refcount_insert(cur->bc_mp, cur->bc_private.a.agno, irec); + trace_xfs_refcount_insert(cur->bc_mp, cur->bc_ag.agno, irec); cur->bc_rec.rc.rc_startblock = irec->rc_startblock; cur->bc_rec.rc.rc_blockcount = irec->rc_blockcount; cur->bc_rec.rc.rc_refcount = irec->rc_refcount; @@ -208,7 +208,7 @@ xfs_refcount_insert( out_error: if (error) trace_xfs_refcount_insert_error(cur->bc_mp, - cur->bc_private.a.agno, error, _RET_IP_); + cur->bc_ag.agno, error, _RET_IP_); return error; } @@ -234,7 +234,7 @@ xfs_refcount_delete( error = -EFSCORRUPTED; goto out_error; } - trace_xfs_refcount_delete(cur->bc_mp, cur->bc_private.a.agno, &irec); + trace_xfs_refcount_delete(cur->bc_mp, cur->bc_ag.agno, &irec); error = xfs_btree_delete(cur, i); if (XFS_IS_CORRUPT(cur->bc_mp, *i != 1)) { error = -EFSCORRUPTED; @@ -246,7 +246,7 @@ xfs_refcount_delete( out_error: if (error) trace_xfs_refcount_delete_error(cur->bc_mp, - cur->bc_private.a.agno, error, _RET_IP_); + cur->bc_ag.agno, error, _RET_IP_); return error; } @@ -366,7 +366,7 @@ xfs_refcount_split_extent( return 0; *shape_changed = true; - trace_xfs_refcount_split_extent(cur->bc_mp, cur->bc_private.a.agno, + trace_xfs_refcount_split_extent(cur->bc_mp, cur->bc_ag.agno, &rcext, agbno); /* Establish the right extent. */ @@ -391,7 +391,7 @@ xfs_refcount_split_extent( out_error: trace_xfs_refcount_split_extent_error(cur->bc_mp, - cur->bc_private.a.agno, error, _RET_IP_); + cur->bc_ag.agno, error, _RET_IP_); return error; } @@ -411,7 +411,7 @@ xfs_refcount_merge_center_extents( int found_rec; trace_xfs_refcount_merge_center_extents(cur->bc_mp, - cur->bc_private.a.agno, left, center, right); + cur->bc_ag.agno, left, center, right); /* * Make sure the center and right extents are not in the btree. @@ -468,7 +468,7 @@ xfs_refcount_merge_center_extents( out_error: trace_xfs_refcount_merge_center_extents_error(cur->bc_mp, - cur->bc_private.a.agno, error, _RET_IP_); + cur->bc_ag.agno, error, _RET_IP_); return error; } @@ -487,7 +487,7 @@ xfs_refcount_merge_left_extent( int found_rec; trace_xfs_refcount_merge_left_extent(cur->bc_mp, - cur->bc_private.a.agno, left, cleft); + cur->bc_ag.agno, left, cleft); /* If the extent at agbno (cleft) wasn't synthesized, remove it. */ if (cleft->rc_refcount > 1) { @@ -530,7 +530,7 @@ xfs_refcount_merge_left_extent( out_error: trace_xfs_refcount_merge_left_extent_error(cur->bc_mp, - cur->bc_private.a.agno, error, _RET_IP_); + cur->bc_ag.agno, error, _RET_IP_); return error; } @@ -548,7 +548,7 @@ xfs_refcount_merge_right_extent( int found_rec; trace_xfs_refcount_merge_right_extent(cur->bc_mp, - cur->bc_private.a.agno, cright, right); + cur->bc_ag.agno, cright, right); /* * If the extent ending at agbno+aglen (cright) wasn't synthesized, @@ -594,7 +594,7 @@ xfs_refcount_merge_right_extent( out_error: trace_xfs_refcount_merge_right_extent_error(cur->bc_mp, - cur->bc_private.a.agno, error, _RET_IP_); + cur->bc_ag.agno, error, _RET_IP_); return error; } @@ -679,13 +679,13 @@ xfs_refcount_find_left_extents( cleft->rc_blockcount = aglen; cleft->rc_refcount = 1; } - trace_xfs_refcount_find_left_extent(cur->bc_mp, cur->bc_private.a.agno, + trace_xfs_refcount_find_left_extent(cur->bc_mp, cur->bc_ag.agno, left, cleft, agbno); return error; out_error: trace_xfs_refcount_find_left_extent_error(cur->bc_mp, - cur->bc_private.a.agno, error, _RET_IP_); + cur->bc_ag.agno, error, _RET_IP_); return error; } @@ -768,13 +768,13 @@ xfs_refcount_find_right_extents( cright->rc_blockcount = aglen; cright->rc_refcount = 1; } - trace_xfs_refcount_find_right_extent(cur->bc_mp, cur->bc_private.a.agno, + trace_xfs_refcount_find_right_extent(cur->bc_mp, cur->bc_ag.agno, cright, right, agbno + aglen); return error; out_error: trace_xfs_refcount_find_right_extent_error(cur->bc_mp, - cur->bc_private.a.agno, error, _RET_IP_); + cur->bc_ag.agno, error, _RET_IP_); return error; } @@ -883,7 +883,7 @@ xfs_refcount_still_have_space( { unsigned long overhead; - overhead = cur->bc_private.a.priv.refc.shape_changes * + overhead = cur->bc_ag.refc.shape_changes * xfs_allocfree_log_count(cur->bc_mp, 1); overhead *= cur->bc_mp->m_sb.sb_blocksize; @@ -891,17 +891,17 @@ xfs_refcount_still_have_space( * Only allow 2 refcount extent updates per transaction if the * refcount continue update "error" has been injected. */ - if (cur->bc_private.a.priv.refc.nr_ops > 2 && + if (cur->bc_ag.refc.nr_ops > 2 && XFS_TEST_ERROR(false, cur->bc_mp, XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE)) return false; - if (cur->bc_private.a.priv.refc.nr_ops == 0) + if (cur->bc_ag.refc.nr_ops == 0) return true; else if (overhead > cur->bc_tp->t_log_res) return false; return cur->bc_tp->t_log_res - overhead > - cur->bc_private.a.priv.refc.nr_ops * XFS_REFCOUNT_ITEM_OVERHEAD; + cur->bc_ag.refc.nr_ops * XFS_REFCOUNT_ITEM_OVERHEAD; } /* @@ -952,7 +952,7 @@ xfs_refcount_adjust_extents( ext.rc_startblock - *agbno); tmp.rc_refcount = 1 + adj; trace_xfs_refcount_modify_extent(cur->bc_mp, - cur->bc_private.a.agno, &tmp); + cur->bc_ag.agno, &tmp); /* * Either cover the hole (increment) or @@ -968,10 +968,10 @@ xfs_refcount_adjust_extents( error = -EFSCORRUPTED; goto out_error; } - cur->bc_private.a.priv.refc.nr_ops++; + cur->bc_ag.refc.nr_ops++; } else { fsbno = XFS_AGB_TO_FSB(cur->bc_mp, - cur->bc_private.a.agno, + cur->bc_ag.agno, tmp.rc_startblock); xfs_bmap_add_free(cur->bc_tp, fsbno, tmp.rc_blockcount, oinfo); @@ -998,12 +998,12 @@ xfs_refcount_adjust_extents( goto skip; ext.rc_refcount += adj; trace_xfs_refcount_modify_extent(cur->bc_mp, - cur->bc_private.a.agno, &ext); + cur->bc_ag.agno, &ext); if (ext.rc_refcount > 1) { error = xfs_refcount_update(cur, &ext); if (error) goto out_error; - cur->bc_private.a.priv.refc.nr_ops++; + cur->bc_ag.refc.nr_ops++; } else if (ext.rc_refcount == 1) { error = xfs_refcount_delete(cur, &found_rec); if (error) @@ -1012,11 +1012,11 @@ xfs_refcount_adjust_extents( error = -EFSCORRUPTED; goto out_error; } - cur->bc_private.a.priv.refc.nr_ops++; + cur->bc_ag.refc.nr_ops++; goto advloop; } else { fsbno = XFS_AGB_TO_FSB(cur->bc_mp, - cur->bc_private.a.agno, + cur->bc_ag.agno, ext.rc_startblock); xfs_bmap_add_free(cur->bc_tp, fsbno, ext.rc_blockcount, oinfo); @@ -1035,7 +1035,7 @@ advloop: return error; out_error: trace_xfs_refcount_modify_extent_error(cur->bc_mp, - cur->bc_private.a.agno, error, _RET_IP_); + cur->bc_ag.agno, error, _RET_IP_); return error; } @@ -1057,10 +1057,10 @@ xfs_refcount_adjust( *new_agbno = agbno; *new_aglen = aglen; if (adj == XFS_REFCOUNT_ADJUST_INCREASE) - trace_xfs_refcount_increase(cur->bc_mp, cur->bc_private.a.agno, + trace_xfs_refcount_increase(cur->bc_mp, cur->bc_ag.agno, agbno, aglen); else - trace_xfs_refcount_decrease(cur->bc_mp, cur->bc_private.a.agno, + trace_xfs_refcount_decrease(cur->bc_mp, cur->bc_ag.agno, agbno, aglen); /* @@ -1088,7 +1088,7 @@ xfs_refcount_adjust( if (shape_changed) shape_changes++; if (shape_changes) - cur->bc_private.a.priv.refc.shape_changes++; + cur->bc_ag.refc.shape_changes++; /* Now that we've taken care of the ends, adjust the middle extents */ error = xfs_refcount_adjust_extents(cur, new_agbno, new_aglen, @@ -1099,7 +1099,7 @@ xfs_refcount_adjust( return 0; out_error: - trace_xfs_refcount_adjust_error(cur->bc_mp, cur->bc_private.a.agno, + trace_xfs_refcount_adjust_error(cur->bc_mp, cur->bc_ag.agno, error, _RET_IP_); return error; } @@ -1115,7 +1115,7 @@ xfs_refcount_finish_one_cleanup( if (rcur == NULL) return; - agbp = rcur->bc_private.a.agbp; + agbp = rcur->bc_ag.agbp; xfs_btree_del_cursor(rcur, error); if (error) xfs_trans_brelse(tp, agbp); @@ -1165,9 +1165,9 @@ xfs_refcount_finish_one( * the startblock, get one now. */ rcur = *pcur; - if (rcur != NULL && rcur->bc_private.a.agno != agno) { - nr_ops = rcur->bc_private.a.priv.refc.nr_ops; - shape_changes = rcur->bc_private.a.priv.refc.shape_changes; + if (rcur != NULL && rcur->bc_ag.agno != agno) { + nr_ops = rcur->bc_ag.refc.nr_ops; + shape_changes = rcur->bc_ag.refc.shape_changes; xfs_refcount_finish_one_cleanup(tp, rcur, 0); rcur = NULL; *pcur = NULL; @@ -1183,8 +1183,8 @@ xfs_refcount_finish_one( error = -ENOMEM; goto out_cur; } - rcur->bc_private.a.priv.refc.nr_ops = nr_ops; - rcur->bc_private.a.priv.refc.shape_changes = shape_changes; + rcur->bc_ag.refc.nr_ops = nr_ops; + rcur->bc_ag.refc.shape_changes = shape_changes; } *pcur = rcur; @@ -1303,7 +1303,7 @@ xfs_refcount_find_shared( int have; int error; - trace_xfs_refcount_find_shared(cur->bc_mp, cur->bc_private.a.agno, + trace_xfs_refcount_find_shared(cur->bc_mp, cur->bc_ag.agno, agbno, aglen); /* By default, skip the whole range */ @@ -1383,12 +1383,12 @@ xfs_refcount_find_shared( done: trace_xfs_refcount_find_shared_result(cur->bc_mp, - cur->bc_private.a.agno, *fbno, *flen); + cur->bc_ag.agno, *fbno, *flen); out_error: if (error) trace_xfs_refcount_find_shared_error(cur->bc_mp, - cur->bc_private.a.agno, error, _RET_IP_); + cur->bc_ag.agno, error, _RET_IP_); return error; } @@ -1485,7 +1485,7 @@ xfs_refcount_adjust_cow_extents( tmp.rc_blockcount = aglen; tmp.rc_refcount = 1; trace_xfs_refcount_modify_extent(cur->bc_mp, - cur->bc_private.a.agno, &tmp); + cur->bc_ag.agno, &tmp); error = xfs_refcount_insert(cur, &tmp, &found_tmp); @@ -1513,7 +1513,7 @@ xfs_refcount_adjust_cow_extents( ext.rc_refcount = 0; trace_xfs_refcount_modify_extent(cur->bc_mp, - cur->bc_private.a.agno, &ext); + cur->bc_ag.agno, &ext); error = xfs_refcount_delete(cur, &found_rec); if (error) goto out_error; @@ -1529,7 +1529,7 @@ xfs_refcount_adjust_cow_extents( return error; out_error: trace_xfs_refcount_modify_extent_error(cur->bc_mp, - cur->bc_private.a.agno, error, _RET_IP_); + cur->bc_ag.agno, error, _RET_IP_); return error; } @@ -1575,7 +1575,7 @@ xfs_refcount_adjust_cow( return 0; out_error: - trace_xfs_refcount_adjust_cow_error(cur->bc_mp, cur->bc_private.a.agno, + trace_xfs_refcount_adjust_cow_error(cur->bc_mp, cur->bc_ag.agno, error, _RET_IP_); return error; } @@ -1589,7 +1589,7 @@ __xfs_refcount_cow_alloc( xfs_agblock_t agbno, xfs_extlen_t aglen) { - trace_xfs_refcount_cow_increase(rcur->bc_mp, rcur->bc_private.a.agno, + trace_xfs_refcount_cow_increase(rcur->bc_mp, rcur->bc_ag.agno, agbno, aglen); /* Add refcount btree reservation */ @@ -1606,7 +1606,7 @@ __xfs_refcount_cow_free( xfs_agblock_t agbno, xfs_extlen_t aglen) { - trace_xfs_refcount_cow_decrease(rcur->bc_mp, rcur->bc_private.a.agno, + trace_xfs_refcount_cow_decrease(rcur->bc_mp, rcur->bc_ag.agno, agbno, aglen); /* Remove refcount btree reservation */ diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c index 38529dbacd55..7fd6044a4f78 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.c +++ b/fs/xfs/libxfs/xfs_refcount_btree.c @@ -12,6 +12,7 @@ #include "xfs_sb.h" #include "xfs_mount.h" #include "xfs_btree.h" +#include "xfs_btree_staging.h" #include "xfs_refcount_btree.h" #include "xfs_alloc.h" #include "xfs_error.h" @@ -25,7 +26,7 @@ xfs_refcountbt_dup_cursor( struct xfs_btree_cur *cur) { return xfs_refcountbt_init_cursor(cur->bc_mp, cur->bc_tp, - cur->bc_private.a.agbp, cur->bc_private.a.agno); + cur->bc_ag.agbp, cur->bc_ag.agno); } STATIC void @@ -34,8 +35,8 @@ xfs_refcountbt_set_root( union xfs_btree_ptr *ptr, int inc) { - struct xfs_buf *agbp = cur->bc_private.a.agbp; - struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); + struct xfs_buf *agbp = cur->bc_ag.agbp; + struct xfs_agf *agf = agbp->b_addr; xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno); struct xfs_perag *pag = xfs_perag_get(cur->bc_mp, seqno); @@ -57,8 +58,8 @@ xfs_refcountbt_alloc_block( union xfs_btree_ptr *new, int *stat) { - struct xfs_buf *agbp = cur->bc_private.a.agbp; - struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); + struct xfs_buf *agbp = cur->bc_ag.agbp; + struct xfs_agf *agf = agbp->b_addr; struct xfs_alloc_arg args; /* block allocation args */ int error; /* error return value */ @@ -66,7 +67,7 @@ xfs_refcountbt_alloc_block( args.tp = cur->bc_tp; args.mp = cur->bc_mp; args.type = XFS_ALLOCTYPE_NEAR_BNO; - args.fsbno = XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_private.a.agno, + args.fsbno = XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_ag.agno, xfs_refc_block(args.mp)); args.oinfo = XFS_RMAP_OINFO_REFC; args.minlen = args.maxlen = args.prod = 1; @@ -75,13 +76,13 @@ xfs_refcountbt_alloc_block( error = xfs_alloc_vextent(&args); if (error) goto out_error; - trace_xfs_refcountbt_alloc_block(cur->bc_mp, cur->bc_private.a.agno, + trace_xfs_refcountbt_alloc_block(cur->bc_mp, cur->bc_ag.agno, args.agbno, 1); if (args.fsbno == NULLFSBLOCK) { *stat = 0; return 0; } - ASSERT(args.agno == cur->bc_private.a.agno); + ASSERT(args.agno == cur->bc_ag.agno); ASSERT(args.len == 1); new->s = cpu_to_be32(args.agbno); @@ -101,12 +102,12 @@ xfs_refcountbt_free_block( struct xfs_buf *bp) { struct xfs_mount *mp = cur->bc_mp; - struct xfs_buf *agbp = cur->bc_private.a.agbp; - struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); + struct xfs_buf *agbp = cur->bc_ag.agbp; + struct xfs_agf *agf = agbp->b_addr; xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, XFS_BUF_ADDR(bp)); int error; - trace_xfs_refcountbt_free_block(cur->bc_mp, cur->bc_private.a.agno, + trace_xfs_refcountbt_free_block(cur->bc_mp, cur->bc_ag.agno, XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1); be32_add_cpu(&agf->agf_refcount_blocks, -1); xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_REFCOUNT_BLOCKS); @@ -169,9 +170,9 @@ xfs_refcountbt_init_ptr_from_cur( struct xfs_btree_cur *cur, union xfs_btree_ptr *ptr) { - struct xfs_agf *agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp); + struct xfs_agf *agf = cur->bc_ag.agbp->b_addr; - ASSERT(cur->bc_private.a.agno == be32_to_cpu(agf->agf_seqno)); + ASSERT(cur->bc_ag.agno == be32_to_cpu(agf->agf_seqno)); ptr->s = agf->agf_refcount_root; } @@ -311,41 +312,90 @@ static const struct xfs_btree_ops xfs_refcountbt_ops = { }; /* - * Allocate a new refcount btree cursor. + * Initialize a new refcount btree cursor. */ -struct xfs_btree_cur * -xfs_refcountbt_init_cursor( +static struct xfs_btree_cur * +xfs_refcountbt_init_common( struct xfs_mount *mp, struct xfs_trans *tp, - struct xfs_buf *agbp, xfs_agnumber_t agno) { - struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); struct xfs_btree_cur *cur; ASSERT(agno != NULLAGNUMBER); ASSERT(agno < mp->m_sb.sb_agcount); - cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_NOFS); + cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_NOFS); cur->bc_tp = tp; cur->bc_mp = mp; cur->bc_btnum = XFS_BTNUM_REFC; cur->bc_blocklog = mp->m_sb.sb_blocklog; - cur->bc_ops = &xfs_refcountbt_ops; cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_refcbt_2); - cur->bc_nlevels = be32_to_cpu(agf->agf_refcount_level); - - cur->bc_private.a.agbp = agbp; - cur->bc_private.a.agno = agno; + cur->bc_ag.agno = agno; cur->bc_flags |= XFS_BTREE_CRC_BLOCKS; - cur->bc_private.a.priv.refc.nr_ops = 0; - cur->bc_private.a.priv.refc.shape_changes = 0; + cur->bc_ag.refc.nr_ops = 0; + cur->bc_ag.refc.shape_changes = 0; + cur->bc_ops = &xfs_refcountbt_ops; + return cur; +} +/* Create a btree cursor. */ +struct xfs_btree_cur * +xfs_refcountbt_init_cursor( + struct xfs_mount *mp, + struct xfs_trans *tp, + struct xfs_buf *agbp, + xfs_agnumber_t agno) +{ + struct xfs_agf *agf = agbp->b_addr; + struct xfs_btree_cur *cur; + + cur = xfs_refcountbt_init_common(mp, tp, agno); + cur->bc_nlevels = be32_to_cpu(agf->agf_refcount_level); + cur->bc_ag.agbp = agbp; return cur; } +/* Create a btree cursor with a fake root for staging. */ +struct xfs_btree_cur * +xfs_refcountbt_stage_cursor( + struct xfs_mount *mp, + struct xbtree_afakeroot *afake, + xfs_agnumber_t agno) +{ + struct xfs_btree_cur *cur; + + cur = xfs_refcountbt_init_common(mp, NULL, agno); + xfs_btree_stage_afakeroot(cur, afake); + return cur; +} + +/* + * Swap in the new btree root. Once we pass this point the newly rebuilt btree + * is in place and we have to kill off all the old btree blocks. + */ +void +xfs_refcountbt_commit_staged_btree( + struct xfs_btree_cur *cur, + struct xfs_trans *tp, + struct xfs_buf *agbp) +{ + struct xfs_agf *agf = agbp->b_addr; + struct xbtree_afakeroot *afake = cur->bc_ag.afake; + + ASSERT(cur->bc_flags & XFS_BTREE_STAGING); + + agf->agf_refcount_root = cpu_to_be32(afake->af_root); + agf->agf_refcount_level = cpu_to_be32(afake->af_levels); + agf->agf_refcount_blocks = cpu_to_be32(afake->af_blocks); + xfs_alloc_log_agf(tp, agbp, XFS_AGF_REFCOUNT_BLOCKS | + XFS_AGF_REFCOUNT_ROOT | + XFS_AGF_REFCOUNT_LEVEL); + xfs_btree_commit_afakeroot(cur, tp, agbp, &xfs_refcountbt_ops); +} + /* * Calculate the number of records in a refcount btree block. */ @@ -420,7 +470,7 @@ xfs_refcountbt_calc_reserves( if (error) return error; - agf = XFS_BUF_TO_AGF(agbp); + agf = agbp->b_addr; agblocks = be32_to_cpu(agf->agf_length); tree_len = be32_to_cpu(agf->agf_refcount_blocks); xfs_trans_brelse(tp, agbp); diff --git a/fs/xfs/libxfs/xfs_refcount_btree.h b/fs/xfs/libxfs/xfs_refcount_btree.h index ba416f71c824..69dc515db671 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.h +++ b/fs/xfs/libxfs/xfs_refcount_btree.h @@ -13,6 +13,7 @@ struct xfs_buf; struct xfs_btree_cur; struct xfs_mount; +struct xbtree_afakeroot; /* * Btree block header size @@ -46,6 +47,8 @@ struct xfs_mount; extern struct xfs_btree_cur *xfs_refcountbt_init_cursor(struct xfs_mount *mp, struct xfs_trans *tp, struct xfs_buf *agbp, xfs_agnumber_t agno); +struct xfs_btree_cur *xfs_refcountbt_stage_cursor(struct xfs_mount *mp, + struct xbtree_afakeroot *afake, xfs_agnumber_t agno); extern int xfs_refcountbt_maxrecs(int blocklen, bool leaf); extern void xfs_refcountbt_compute_maxlevels(struct xfs_mount *mp); @@ -58,4 +61,7 @@ extern int xfs_refcountbt_calc_reserves(struct xfs_mount *mp, struct xfs_trans *tp, xfs_agnumber_t agno, xfs_extlen_t *ask, xfs_extlen_t *used); +void xfs_refcountbt_commit_staged_btree(struct xfs_btree_cur *cur, + struct xfs_trans *tp, struct xfs_buf *agbp); + #endif /* __XFS_REFCOUNT_BTREE_H__ */ diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c index ff9412f113c4..27c39268c31f 100644 --- a/fs/xfs/libxfs/xfs_rmap.c +++ b/fs/xfs/libxfs/xfs_rmap.c @@ -79,7 +79,7 @@ xfs_rmap_update( union xfs_btree_rec rec; int error; - trace_xfs_rmap_update(cur->bc_mp, cur->bc_private.a.agno, + trace_xfs_rmap_update(cur->bc_mp, cur->bc_ag.agno, irec->rm_startblock, irec->rm_blockcount, irec->rm_owner, irec->rm_offset, irec->rm_flags); @@ -91,7 +91,7 @@ xfs_rmap_update( error = xfs_btree_update(cur, &rec); if (error) trace_xfs_rmap_update_error(cur->bc_mp, - cur->bc_private.a.agno, error, _RET_IP_); + cur->bc_ag.agno, error, _RET_IP_); return error; } @@ -107,7 +107,7 @@ xfs_rmap_insert( int i; int error; - trace_xfs_rmap_insert(rcur->bc_mp, rcur->bc_private.a.agno, agbno, + trace_xfs_rmap_insert(rcur->bc_mp, rcur->bc_ag.agno, agbno, len, owner, offset, flags); error = xfs_rmap_lookup_eq(rcur, agbno, len, owner, offset, flags, &i); @@ -133,7 +133,7 @@ xfs_rmap_insert( done: if (error) trace_xfs_rmap_insert_error(rcur->bc_mp, - rcur->bc_private.a.agno, error, _RET_IP_); + rcur->bc_ag.agno, error, _RET_IP_); return error; } @@ -149,7 +149,7 @@ xfs_rmap_delete( int i; int error; - trace_xfs_rmap_delete(rcur->bc_mp, rcur->bc_private.a.agno, agbno, + trace_xfs_rmap_delete(rcur->bc_mp, rcur->bc_ag.agno, agbno, len, owner, offset, flags); error = xfs_rmap_lookup_eq(rcur, agbno, len, owner, offset, flags, &i); @@ -170,7 +170,7 @@ xfs_rmap_delete( done: if (error) trace_xfs_rmap_delete_error(rcur->bc_mp, - rcur->bc_private.a.agno, error, _RET_IP_); + rcur->bc_ag.agno, error, _RET_IP_); return error; } @@ -197,7 +197,7 @@ xfs_rmap_get_rec( int *stat) { struct xfs_mount *mp = cur->bc_mp; - xfs_agnumber_t agno = cur->bc_private.a.agno; + xfs_agnumber_t agno = cur->bc_ag.agno; union xfs_btree_rec *rec; int error; @@ -260,7 +260,7 @@ xfs_rmap_find_left_neighbor_helper( struct xfs_find_left_neighbor_info *info = priv; trace_xfs_rmap_find_left_neighbor_candidate(cur->bc_mp, - cur->bc_private.a.agno, rec->rm_startblock, + cur->bc_ag.agno, rec->rm_startblock, rec->rm_blockcount, rec->rm_owner, rec->rm_offset, rec->rm_flags); @@ -312,7 +312,7 @@ xfs_rmap_find_left_neighbor( info.stat = stat; trace_xfs_rmap_find_left_neighbor_query(cur->bc_mp, - cur->bc_private.a.agno, bno, 0, owner, offset, flags); + cur->bc_ag.agno, bno, 0, owner, offset, flags); error = xfs_rmap_query_range(cur, &info.high, &info.high, xfs_rmap_find_left_neighbor_helper, &info); @@ -320,7 +320,7 @@ xfs_rmap_find_left_neighbor( error = 0; if (*stat) trace_xfs_rmap_find_left_neighbor_result(cur->bc_mp, - cur->bc_private.a.agno, irec->rm_startblock, + cur->bc_ag.agno, irec->rm_startblock, irec->rm_blockcount, irec->rm_owner, irec->rm_offset, irec->rm_flags); return error; @@ -336,7 +336,7 @@ xfs_rmap_lookup_le_range_helper( struct xfs_find_left_neighbor_info *info = priv; trace_xfs_rmap_lookup_le_range_candidate(cur->bc_mp, - cur->bc_private.a.agno, rec->rm_startblock, + cur->bc_ag.agno, rec->rm_startblock, rec->rm_blockcount, rec->rm_owner, rec->rm_offset, rec->rm_flags); @@ -385,14 +385,14 @@ xfs_rmap_lookup_le_range( info.stat = stat; trace_xfs_rmap_lookup_le_range(cur->bc_mp, - cur->bc_private.a.agno, bno, 0, owner, offset, flags); + cur->bc_ag.agno, bno, 0, owner, offset, flags); error = xfs_rmap_query_range(cur, &info.high, &info.high, xfs_rmap_lookup_le_range_helper, &info); if (error == -ECANCELED) error = 0; if (*stat) trace_xfs_rmap_lookup_le_range_result(cur->bc_mp, - cur->bc_private.a.agno, irec->rm_startblock, + cur->bc_ag.agno, irec->rm_startblock, irec->rm_blockcount, irec->rm_owner, irec->rm_offset, irec->rm_flags); return error; @@ -498,7 +498,7 @@ xfs_rmap_unmap( (flags & XFS_RMAP_BMBT_BLOCK); if (unwritten) flags |= XFS_RMAP_UNWRITTEN; - trace_xfs_rmap_unmap(mp, cur->bc_private.a.agno, bno, len, + trace_xfs_rmap_unmap(mp, cur->bc_ag.agno, bno, len, unwritten, oinfo); /* @@ -522,7 +522,7 @@ xfs_rmap_unmap( goto out_error; } trace_xfs_rmap_lookup_le_range_result(cur->bc_mp, - cur->bc_private.a.agno, ltrec.rm_startblock, + cur->bc_ag.agno, ltrec.rm_startblock, ltrec.rm_blockcount, ltrec.rm_owner, ltrec.rm_offset, ltrec.rm_flags); ltoff = ltrec.rm_offset; @@ -588,7 +588,7 @@ xfs_rmap_unmap( if (ltrec.rm_startblock == bno && ltrec.rm_blockcount == len) { /* exact match, simply remove the record from rmap tree */ - trace_xfs_rmap_delete(mp, cur->bc_private.a.agno, + trace_xfs_rmap_delete(mp, cur->bc_ag.agno, ltrec.rm_startblock, ltrec.rm_blockcount, ltrec.rm_owner, ltrec.rm_offset, ltrec.rm_flags); @@ -666,7 +666,7 @@ xfs_rmap_unmap( else cur->bc_rec.r.rm_offset = offset + len; cur->bc_rec.r.rm_flags = flags; - trace_xfs_rmap_insert(mp, cur->bc_private.a.agno, + trace_xfs_rmap_insert(mp, cur->bc_ag.agno, cur->bc_rec.r.rm_startblock, cur->bc_rec.r.rm_blockcount, cur->bc_rec.r.rm_owner, @@ -678,11 +678,11 @@ xfs_rmap_unmap( } out_done: - trace_xfs_rmap_unmap_done(mp, cur->bc_private.a.agno, bno, len, + trace_xfs_rmap_unmap_done(mp, cur->bc_ag.agno, bno, len, unwritten, oinfo); out_error: if (error) - trace_xfs_rmap_unmap_error(mp, cur->bc_private.a.agno, + trace_xfs_rmap_unmap_error(mp, cur->bc_ag.agno, error, _RET_IP_); return error; } @@ -773,7 +773,7 @@ xfs_rmap_map( (flags & XFS_RMAP_BMBT_BLOCK); if (unwritten) flags |= XFS_RMAP_UNWRITTEN; - trace_xfs_rmap_map(mp, cur->bc_private.a.agno, bno, len, + trace_xfs_rmap_map(mp, cur->bc_ag.agno, bno, len, unwritten, oinfo); ASSERT(!xfs_rmap_should_skip_owner_update(oinfo)); @@ -795,7 +795,7 @@ xfs_rmap_map( goto out_error; } trace_xfs_rmap_lookup_le_range_result(cur->bc_mp, - cur->bc_private.a.agno, ltrec.rm_startblock, + cur->bc_ag.agno, ltrec.rm_startblock, ltrec.rm_blockcount, ltrec.rm_owner, ltrec.rm_offset, ltrec.rm_flags); @@ -831,7 +831,7 @@ xfs_rmap_map( goto out_error; } trace_xfs_rmap_find_right_neighbor_result(cur->bc_mp, - cur->bc_private.a.agno, gtrec.rm_startblock, + cur->bc_ag.agno, gtrec.rm_startblock, gtrec.rm_blockcount, gtrec.rm_owner, gtrec.rm_offset, gtrec.rm_flags); if (!xfs_rmap_is_mergeable(>rec, owner, flags)) @@ -870,7 +870,7 @@ xfs_rmap_map( * result: |rrrrrrrrrrrrrrrrrrrrrrrrrrrrr| */ ltrec.rm_blockcount += gtrec.rm_blockcount; - trace_xfs_rmap_delete(mp, cur->bc_private.a.agno, + trace_xfs_rmap_delete(mp, cur->bc_ag.agno, gtrec.rm_startblock, gtrec.rm_blockcount, gtrec.rm_owner, @@ -921,7 +921,7 @@ xfs_rmap_map( cur->bc_rec.r.rm_owner = owner; cur->bc_rec.r.rm_offset = offset; cur->bc_rec.r.rm_flags = flags; - trace_xfs_rmap_insert(mp, cur->bc_private.a.agno, bno, len, + trace_xfs_rmap_insert(mp, cur->bc_ag.agno, bno, len, owner, offset, flags); error = xfs_btree_insert(cur, &i); if (error) @@ -932,11 +932,11 @@ xfs_rmap_map( } } - trace_xfs_rmap_map_done(mp, cur->bc_private.a.agno, bno, len, + trace_xfs_rmap_map_done(mp, cur->bc_ag.agno, bno, len, unwritten, oinfo); out_error: if (error) - trace_xfs_rmap_map_error(mp, cur->bc_private.a.agno, + trace_xfs_rmap_map_error(mp, cur->bc_ag.agno, error, _RET_IP_); return error; } @@ -1010,7 +1010,7 @@ xfs_rmap_convert( (flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK)))); oldext = unwritten ? XFS_RMAP_UNWRITTEN : 0; new_endoff = offset + len; - trace_xfs_rmap_convert(mp, cur->bc_private.a.agno, bno, len, + trace_xfs_rmap_convert(mp, cur->bc_ag.agno, bno, len, unwritten, oinfo); /* @@ -1034,7 +1034,7 @@ xfs_rmap_convert( goto done; } trace_xfs_rmap_lookup_le_range_result(cur->bc_mp, - cur->bc_private.a.agno, PREV.rm_startblock, + cur->bc_ag.agno, PREV.rm_startblock, PREV.rm_blockcount, PREV.rm_owner, PREV.rm_offset, PREV.rm_flags); @@ -1076,7 +1076,7 @@ xfs_rmap_convert( goto done; } trace_xfs_rmap_find_left_neighbor_result(cur->bc_mp, - cur->bc_private.a.agno, LEFT.rm_startblock, + cur->bc_ag.agno, LEFT.rm_startblock, LEFT.rm_blockcount, LEFT.rm_owner, LEFT.rm_offset, LEFT.rm_flags); if (LEFT.rm_startblock + LEFT.rm_blockcount == bno && @@ -1114,7 +1114,7 @@ xfs_rmap_convert( goto done; } trace_xfs_rmap_find_right_neighbor_result(cur->bc_mp, - cur->bc_private.a.agno, RIGHT.rm_startblock, + cur->bc_ag.agno, RIGHT.rm_startblock, RIGHT.rm_blockcount, RIGHT.rm_owner, RIGHT.rm_offset, RIGHT.rm_flags); if (bno + len == RIGHT.rm_startblock && @@ -1132,7 +1132,7 @@ xfs_rmap_convert( RIGHT.rm_blockcount > XFS_RMAP_LEN_MAX) state &= ~RMAP_RIGHT_CONTIG; - trace_xfs_rmap_convert_state(mp, cur->bc_private.a.agno, state, + trace_xfs_rmap_convert_state(mp, cur->bc_ag.agno, state, _RET_IP_); /* reset the cursor back to PREV */ @@ -1162,7 +1162,7 @@ xfs_rmap_convert( error = -EFSCORRUPTED; goto done; } - trace_xfs_rmap_delete(mp, cur->bc_private.a.agno, + trace_xfs_rmap_delete(mp, cur->bc_ag.agno, RIGHT.rm_startblock, RIGHT.rm_blockcount, RIGHT.rm_owner, RIGHT.rm_offset, RIGHT.rm_flags); @@ -1180,7 +1180,7 @@ xfs_rmap_convert( error = -EFSCORRUPTED; goto done; } - trace_xfs_rmap_delete(mp, cur->bc_private.a.agno, + trace_xfs_rmap_delete(mp, cur->bc_ag.agno, PREV.rm_startblock, PREV.rm_blockcount, PREV.rm_owner, PREV.rm_offset, PREV.rm_flags); @@ -1210,7 +1210,7 @@ xfs_rmap_convert( * Setting all of a previous oldext extent to newext. * The left neighbor is contiguous, the right is not. */ - trace_xfs_rmap_delete(mp, cur->bc_private.a.agno, + trace_xfs_rmap_delete(mp, cur->bc_ag.agno, PREV.rm_startblock, PREV.rm_blockcount, PREV.rm_owner, PREV.rm_offset, PREV.rm_flags); @@ -1247,7 +1247,7 @@ xfs_rmap_convert( error = -EFSCORRUPTED; goto done; } - trace_xfs_rmap_delete(mp, cur->bc_private.a.agno, + trace_xfs_rmap_delete(mp, cur->bc_ag.agno, RIGHT.rm_startblock, RIGHT.rm_blockcount, RIGHT.rm_owner, RIGHT.rm_offset, RIGHT.rm_flags); @@ -1326,7 +1326,7 @@ xfs_rmap_convert( NEW.rm_blockcount = len; NEW.rm_flags = newext; cur->bc_rec.r = NEW; - trace_xfs_rmap_insert(mp, cur->bc_private.a.agno, bno, + trace_xfs_rmap_insert(mp, cur->bc_ag.agno, bno, len, owner, offset, newext); error = xfs_btree_insert(cur, &i); if (error) @@ -1383,7 +1383,7 @@ xfs_rmap_convert( NEW.rm_blockcount = len; NEW.rm_flags = newext; cur->bc_rec.r = NEW; - trace_xfs_rmap_insert(mp, cur->bc_private.a.agno, bno, + trace_xfs_rmap_insert(mp, cur->bc_ag.agno, bno, len, owner, offset, newext); error = xfs_btree_insert(cur, &i); if (error) @@ -1414,7 +1414,7 @@ xfs_rmap_convert( NEW = PREV; NEW.rm_blockcount = offset - PREV.rm_offset; cur->bc_rec.r = NEW; - trace_xfs_rmap_insert(mp, cur->bc_private.a.agno, + trace_xfs_rmap_insert(mp, cur->bc_ag.agno, NEW.rm_startblock, NEW.rm_blockcount, NEW.rm_owner, NEW.rm_offset, NEW.rm_flags); @@ -1441,7 +1441,7 @@ xfs_rmap_convert( /* new middle extent - newext */ cur->bc_rec.r.rm_flags &= ~XFS_RMAP_UNWRITTEN; cur->bc_rec.r.rm_flags |= newext; - trace_xfs_rmap_insert(mp, cur->bc_private.a.agno, bno, len, + trace_xfs_rmap_insert(mp, cur->bc_ag.agno, bno, len, owner, offset, newext); error = xfs_btree_insert(cur, &i); if (error) @@ -1465,12 +1465,12 @@ xfs_rmap_convert( ASSERT(0); } - trace_xfs_rmap_convert_done(mp, cur->bc_private.a.agno, bno, len, + trace_xfs_rmap_convert_done(mp, cur->bc_ag.agno, bno, len, unwritten, oinfo); done: if (error) trace_xfs_rmap_convert_error(cur->bc_mp, - cur->bc_private.a.agno, error, _RET_IP_); + cur->bc_ag.agno, error, _RET_IP_); return error; } @@ -1506,7 +1506,7 @@ xfs_rmap_convert_shared( (flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK)))); oldext = unwritten ? XFS_RMAP_UNWRITTEN : 0; new_endoff = offset + len; - trace_xfs_rmap_convert(mp, cur->bc_private.a.agno, bno, len, + trace_xfs_rmap_convert(mp, cur->bc_ag.agno, bno, len, unwritten, oinfo); /* @@ -1573,7 +1573,7 @@ xfs_rmap_convert_shared( goto done; } trace_xfs_rmap_find_right_neighbor_result(cur->bc_mp, - cur->bc_private.a.agno, RIGHT.rm_startblock, + cur->bc_ag.agno, RIGHT.rm_startblock, RIGHT.rm_blockcount, RIGHT.rm_owner, RIGHT.rm_offset, RIGHT.rm_flags); if (xfs_rmap_is_mergeable(&RIGHT, owner, newext)) @@ -1589,7 +1589,7 @@ xfs_rmap_convert_shared( RIGHT.rm_blockcount > XFS_RMAP_LEN_MAX) state &= ~RMAP_RIGHT_CONTIG; - trace_xfs_rmap_convert_state(mp, cur->bc_private.a.agno, state, + trace_xfs_rmap_convert_state(mp, cur->bc_ag.agno, state, _RET_IP_); /* * Switch out based on the FILLING and CONTIG state bits. @@ -1880,12 +1880,12 @@ xfs_rmap_convert_shared( ASSERT(0); } - trace_xfs_rmap_convert_done(mp, cur->bc_private.a.agno, bno, len, + trace_xfs_rmap_convert_done(mp, cur->bc_ag.agno, bno, len, unwritten, oinfo); done: if (error) trace_xfs_rmap_convert_error(cur->bc_mp, - cur->bc_private.a.agno, error, _RET_IP_); + cur->bc_ag.agno, error, _RET_IP_); return error; } @@ -1923,7 +1923,7 @@ xfs_rmap_unmap_shared( xfs_owner_info_unpack(oinfo, &owner, &offset, &flags); if (unwritten) flags |= XFS_RMAP_UNWRITTEN; - trace_xfs_rmap_unmap(mp, cur->bc_private.a.agno, bno, len, + trace_xfs_rmap_unmap(mp, cur->bc_ag.agno, bno, len, unwritten, oinfo); /* @@ -2072,12 +2072,12 @@ xfs_rmap_unmap_shared( goto out_error; } - trace_xfs_rmap_unmap_done(mp, cur->bc_private.a.agno, bno, len, + trace_xfs_rmap_unmap_done(mp, cur->bc_ag.agno, bno, len, unwritten, oinfo); out_error: if (error) trace_xfs_rmap_unmap_error(cur->bc_mp, - cur->bc_private.a.agno, error, _RET_IP_); + cur->bc_ag.agno, error, _RET_IP_); return error; } @@ -2112,7 +2112,7 @@ xfs_rmap_map_shared( xfs_owner_info_unpack(oinfo, &owner, &offset, &flags); if (unwritten) flags |= XFS_RMAP_UNWRITTEN; - trace_xfs_rmap_map(mp, cur->bc_private.a.agno, bno, len, + trace_xfs_rmap_map(mp, cur->bc_ag.agno, bno, len, unwritten, oinfo); /* Is there a left record that abuts our range? */ @@ -2138,7 +2138,7 @@ xfs_rmap_map_shared( goto out_error; } trace_xfs_rmap_find_right_neighbor_result(cur->bc_mp, - cur->bc_private.a.agno, gtrec.rm_startblock, + cur->bc_ag.agno, gtrec.rm_startblock, gtrec.rm_blockcount, gtrec.rm_owner, gtrec.rm_offset, gtrec.rm_flags); @@ -2231,12 +2231,12 @@ xfs_rmap_map_shared( goto out_error; } - trace_xfs_rmap_map_done(mp, cur->bc_private.a.agno, bno, len, + trace_xfs_rmap_map_done(mp, cur->bc_ag.agno, bno, len, unwritten, oinfo); out_error: if (error) trace_xfs_rmap_map_error(cur->bc_mp, - cur->bc_private.a.agno, error, _RET_IP_); + cur->bc_ag.agno, error, _RET_IP_); return error; } @@ -2336,7 +2336,7 @@ xfs_rmap_finish_one_cleanup( if (rcur == NULL) return; - agbp = rcur->bc_private.a.agbp; + agbp = rcur->bc_ag.agbp; xfs_btree_del_cursor(rcur, error); if (error) xfs_trans_brelse(tp, agbp); @@ -2386,7 +2386,7 @@ xfs_rmap_finish_one( * the startblock, get one now. */ rcur = *pcur; - if (rcur != NULL && rcur->bc_private.a.agno != agno) { + if (rcur != NULL && rcur->bc_ag.agno != agno) { xfs_rmap_finish_one_cleanup(tp, rcur, 0); rcur = NULL; *pcur = NULL; @@ -2694,7 +2694,6 @@ struct xfs_rmap_key_state { uint64_t owner; uint64_t offset; unsigned int flags; - bool has_rmap; }; /* For each rmap given, figure out if it doesn't match the key we want. */ @@ -2709,7 +2708,6 @@ xfs_rmap_has_other_keys_helper( if (rks->owner == rec->rm_owner && rks->offset == rec->rm_offset && ((rks->flags & rec->rm_flags) & XFS_RMAP_KEY_FLAGS) == rks->flags) return 0; - rks->has_rmap = true; return -ECANCELED; } @@ -2731,7 +2729,7 @@ xfs_rmap_has_other_keys( int error; xfs_owner_info_unpack(oinfo, &rks.owner, &rks.offset, &rks.flags); - rks.has_rmap = false; + *has_rmap = false; low.rm_startblock = bno; memset(&high, 0xFF, sizeof(high)); @@ -2739,11 +2737,12 @@ xfs_rmap_has_other_keys( error = xfs_rmap_query_range(cur, &low, &high, xfs_rmap_has_other_keys_helper, &rks); - if (error < 0) - return error; + if (error == -ECANCELED) { + *has_rmap = true; + return 0; + } - *has_rmap = rks.has_rmap; - return 0; + return error; } const struct xfs_owner_info XFS_RMAP_OINFO_SKIP_UPDATE = { diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c index fc78efa52c94..b7c05314d07c 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.c +++ b/fs/xfs/libxfs/xfs_rmap_btree.c @@ -14,6 +14,7 @@ #include "xfs_trans.h" #include "xfs_alloc.h" #include "xfs_btree.h" +#include "xfs_btree_staging.h" #include "xfs_rmap.h" #include "xfs_rmap_btree.h" #include "xfs_trace.h" @@ -51,7 +52,7 @@ xfs_rmapbt_dup_cursor( struct xfs_btree_cur *cur) { return xfs_rmapbt_init_cursor(cur->bc_mp, cur->bc_tp, - cur->bc_private.a.agbp, cur->bc_private.a.agno); + cur->bc_ag.agbp, cur->bc_ag.agno); } STATIC void @@ -60,8 +61,8 @@ xfs_rmapbt_set_root( union xfs_btree_ptr *ptr, int inc) { - struct xfs_buf *agbp = cur->bc_private.a.agbp; - struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); + struct xfs_buf *agbp = cur->bc_ag.agbp; + struct xfs_agf *agf = agbp->b_addr; xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno); int btnum = cur->bc_btnum; struct xfs_perag *pag = xfs_perag_get(cur->bc_mp, seqno); @@ -83,25 +84,25 @@ xfs_rmapbt_alloc_block( union xfs_btree_ptr *new, int *stat) { - struct xfs_buf *agbp = cur->bc_private.a.agbp; - struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); + struct xfs_buf *agbp = cur->bc_ag.agbp; + struct xfs_agf *agf = agbp->b_addr; int error; xfs_agblock_t bno; /* Allocate the new block from the freelist. If we can't, give up. */ - error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp, + error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_ag.agbp, &bno, 1); if (error) return error; - trace_xfs_rmapbt_alloc_block(cur->bc_mp, cur->bc_private.a.agno, + trace_xfs_rmapbt_alloc_block(cur->bc_mp, cur->bc_ag.agno, bno, 1); if (bno == NULLAGBLOCK) { *stat = 0; return 0; } - xfs_extent_busy_reuse(cur->bc_mp, cur->bc_private.a.agno, bno, 1, + xfs_extent_busy_reuse(cur->bc_mp, cur->bc_ag.agno, bno, 1, false); xfs_trans_agbtree_delta(cur->bc_tp, 1); @@ -109,7 +110,7 @@ xfs_rmapbt_alloc_block( be32_add_cpu(&agf->agf_rmap_blocks, 1); xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_RMAP_BLOCKS); - xfs_ag_resv_rmapbt_alloc(cur->bc_mp, cur->bc_private.a.agno); + xfs_ag_resv_rmapbt_alloc(cur->bc_mp, cur->bc_ag.agno); *stat = 1; return 0; @@ -120,13 +121,13 @@ xfs_rmapbt_free_block( struct xfs_btree_cur *cur, struct xfs_buf *bp) { - struct xfs_buf *agbp = cur->bc_private.a.agbp; - struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); + struct xfs_buf *agbp = cur->bc_ag.agbp; + struct xfs_agf *agf = agbp->b_addr; xfs_agblock_t bno; int error; bno = xfs_daddr_to_agbno(cur->bc_mp, XFS_BUF_ADDR(bp)); - trace_xfs_rmapbt_free_block(cur->bc_mp, cur->bc_private.a.agno, + trace_xfs_rmapbt_free_block(cur->bc_mp, cur->bc_ag.agno, bno, 1); be32_add_cpu(&agf->agf_rmap_blocks, -1); xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_RMAP_BLOCKS); @@ -138,7 +139,7 @@ xfs_rmapbt_free_block( XFS_EXTENT_BUSY_SKIP_DISCARD); xfs_trans_agbtree_delta(cur->bc_tp, -1); - xfs_ag_resv_rmapbt_free(cur->bc_mp, cur->bc_private.a.agno); + xfs_ag_resv_rmapbt_free(cur->bc_mp, cur->bc_ag.agno); return 0; } @@ -215,9 +216,9 @@ xfs_rmapbt_init_ptr_from_cur( struct xfs_btree_cur *cur, union xfs_btree_ptr *ptr) { - struct xfs_agf *agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp); + struct xfs_agf *agf = cur->bc_ag.agbp->b_addr; - ASSERT(cur->bc_private.a.agno == be32_to_cpu(agf->agf_seqno)); + ASSERT(cur->bc_ag.agno == be32_to_cpu(agf->agf_seqno)); ptr->s = agf->agf_roots[cur->bc_btnum]; } @@ -448,17 +449,12 @@ static const struct xfs_btree_ops xfs_rmapbt_ops = { .recs_inorder = xfs_rmapbt_recs_inorder, }; -/* - * Allocate a new allocation btree cursor. - */ -struct xfs_btree_cur * -xfs_rmapbt_init_cursor( +static struct xfs_btree_cur * +xfs_rmapbt_init_common( struct xfs_mount *mp, struct xfs_trans *tp, - struct xfs_buf *agbp, xfs_agnumber_t agno) { - struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); struct xfs_btree_cur *cur; cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_NOFS); @@ -468,17 +464,68 @@ xfs_rmapbt_init_cursor( cur->bc_btnum = XFS_BTNUM_RMAP; cur->bc_flags = XFS_BTREE_CRC_BLOCKS | XFS_BTREE_OVERLAPPING; cur->bc_blocklog = mp->m_sb.sb_blocklog; + cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_rmap_2); + cur->bc_ag.agno = agno; cur->bc_ops = &xfs_rmapbt_ops; + + return cur; +} + +/* Create a new reverse mapping btree cursor. */ +struct xfs_btree_cur * +xfs_rmapbt_init_cursor( + struct xfs_mount *mp, + struct xfs_trans *tp, + struct xfs_buf *agbp, + xfs_agnumber_t agno) +{ + struct xfs_agf *agf = agbp->b_addr; + struct xfs_btree_cur *cur; + + cur = xfs_rmapbt_init_common(mp, tp, agno); cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]); - cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_rmap_2); + cur->bc_ag.agbp = agbp; + return cur; +} - cur->bc_private.a.agbp = agbp; - cur->bc_private.a.agno = agno; +/* Create a new reverse mapping btree cursor with a fake root for staging. */ +struct xfs_btree_cur * +xfs_rmapbt_stage_cursor( + struct xfs_mount *mp, + struct xbtree_afakeroot *afake, + xfs_agnumber_t agno) +{ + struct xfs_btree_cur *cur; + cur = xfs_rmapbt_init_common(mp, NULL, agno); + xfs_btree_stage_afakeroot(cur, afake); return cur; } /* + * Install a new reverse mapping btree root. Caller is responsible for + * invalidating and freeing the old btree blocks. + */ +void +xfs_rmapbt_commit_staged_btree( + struct xfs_btree_cur *cur, + struct xfs_trans *tp, + struct xfs_buf *agbp) +{ + struct xfs_agf *agf = agbp->b_addr; + struct xbtree_afakeroot *afake = cur->bc_ag.afake; + + ASSERT(cur->bc_flags & XFS_BTREE_STAGING); + + agf->agf_roots[cur->bc_btnum] = cpu_to_be32(afake->af_root); + agf->agf_levels[cur->bc_btnum] = cpu_to_be32(afake->af_levels); + agf->agf_rmap_blocks = cpu_to_be32(afake->af_blocks); + xfs_alloc_log_agf(tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS | + XFS_AGF_RMAP_BLOCKS); + xfs_btree_commit_afakeroot(cur, tp, agbp, &xfs_rmapbt_ops); +} + +/* * Calculate number of records in an rmap btree block. */ int @@ -569,7 +616,7 @@ xfs_rmapbt_calc_reserves( if (error) return error; - agf = XFS_BUF_TO_AGF(agbp); + agf = agbp->b_addr; agblocks = be32_to_cpu(agf->agf_length); tree_len = be32_to_cpu(agf->agf_rmap_blocks); xfs_trans_brelse(tp, agbp); diff --git a/fs/xfs/libxfs/xfs_rmap_btree.h b/fs/xfs/libxfs/xfs_rmap_btree.h index 820d668b063d..115c3455a734 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.h +++ b/fs/xfs/libxfs/xfs_rmap_btree.h @@ -9,6 +9,7 @@ struct xfs_buf; struct xfs_btree_cur; struct xfs_mount; +struct xbtree_afakeroot; /* rmaps only exist on crc enabled filesystems */ #define XFS_RMAP_BLOCK_LEN XFS_BTREE_SBLOCK_CRC_LEN @@ -43,6 +44,10 @@ struct xfs_mount; struct xfs_btree_cur *xfs_rmapbt_init_cursor(struct xfs_mount *mp, struct xfs_trans *tp, struct xfs_buf *bp, xfs_agnumber_t agno); +struct xfs_btree_cur *xfs_rmapbt_stage_cursor(struct xfs_mount *mp, + struct xbtree_afakeroot *afake, xfs_agnumber_t agno); +void xfs_rmapbt_commit_staged_btree(struct xfs_btree_cur *cur, + struct xfs_trans *tp, struct xfs_buf *agbp); int xfs_rmapbt_maxrecs(int blocklen, int leaf); extern void xfs_rmapbt_compute_maxlevels(struct xfs_mount *mp); diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 2f60fc3c99a0..00266de58954 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -220,7 +220,7 @@ xfs_validate_sb_common( struct xfs_buf *bp, struct xfs_sb *sbp) { - struct xfs_dsb *dsb = XFS_BUF_TO_SBP(bp); + struct xfs_dsb *dsb = bp->b_addr; uint32_t agcount = 0; uint32_t rem; @@ -681,7 +681,7 @@ xfs_sb_read_verify( { struct xfs_sb sb; struct xfs_mount *mp = bp->b_mount; - struct xfs_dsb *dsb = XFS_BUF_TO_SBP(bp); + struct xfs_dsb *dsb = bp->b_addr; int error; /* @@ -707,7 +707,7 @@ xfs_sb_read_verify( * Check all the superblock fields. Don't byteswap the xquota flags * because _verify_common checks the on-disk values. */ - __xfs_sb_from_disk(&sb, XFS_BUF_TO_SBP(bp), false); + __xfs_sb_from_disk(&sb, dsb, false); error = xfs_validate_sb_common(mp, bp, &sb); if (error) goto out_error; @@ -730,7 +730,7 @@ static void xfs_sb_quiet_read_verify( struct xfs_buf *bp) { - struct xfs_dsb *dsb = XFS_BUF_TO_SBP(bp); + struct xfs_dsb *dsb = bp->b_addr; if (dsb->sb_magicnum == cpu_to_be32(XFS_SB_MAGIC)) { /* XFS filesystem, verify noisily! */ @@ -748,13 +748,14 @@ xfs_sb_write_verify( struct xfs_sb sb; struct xfs_mount *mp = bp->b_mount; struct xfs_buf_log_item *bip = bp->b_log_item; + struct xfs_dsb *dsb = bp->b_addr; int error; /* * Check all the superblock fields. Don't byteswap the xquota flags * because _verify_common checks the on-disk values. */ - __xfs_sb_from_disk(&sb, XFS_BUF_TO_SBP(bp), false); + __xfs_sb_from_disk(&sb, dsb, false); error = xfs_validate_sb_common(mp, bp, &sb); if (error) goto out_error; @@ -766,7 +767,7 @@ xfs_sb_write_verify( return; if (bip) - XFS_BUF_TO_SBP(bp)->sb_lsn = cpu_to_be64(bip->bli_item.li_lsn); + dsb->sb_lsn = cpu_to_be64(bip->bli_item.li_lsn); xfs_buf_update_cksum(bp, XFS_SB_CRC_OFF); return; @@ -927,7 +928,7 @@ xfs_log_sb( mp->m_sb.sb_ifree = percpu_counter_sum(&mp->m_ifree); mp->m_sb.sb_fdblocks = percpu_counter_sum(&mp->m_fdblocks); - xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb); + xfs_sb_to_disk(bp->b_addr, &mp->m_sb); xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF); xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsb) - 1); } @@ -1007,7 +1008,7 @@ xfs_update_secondary_sbs( bp->b_ops = &xfs_sb_buf_ops; xfs_buf_oneshot(bp); xfs_buf_zero(bp, 0, BBTOB(bp->b_length)); - xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb); + xfs_sb_to_disk(bp->b_addr, &mp->m_sb); xfs_buf_delwri_queue(bp, &buffer_list); xfs_buf_relse(bp); diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c index 7a9c04920505..d1a0848cb52e 100644 --- a/fs/xfs/libxfs/xfs_trans_resv.c +++ b/fs/xfs/libxfs/xfs_trans_resv.c @@ -187,7 +187,7 @@ xfs_calc_inode_chunk_res( XFS_FSB_TO_B(mp, 1)); if (alloc) { /* icreate tx uses ordered buffers */ - if (xfs_sb_version_hascrc(&mp->m_sb)) + if (xfs_sb_version_has_v3inode(&mp->m_sb)) return res; size = XFS_FSB_TO_B(mp, 1); } diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c index ba0f747c82e8..e9bcf1faa183 100644 --- a/fs/xfs/scrub/agheader.c +++ b/fs/xfs/scrub/agheader.c @@ -92,7 +92,7 @@ xchk_superblock( if (!xchk_process_error(sc, agno, XFS_SB_BLOCK(mp), &error)) return error; - sb = XFS_BUF_TO_SBP(bp); + sb = bp->b_addr; /* * Verify the geometries match. Fields that are permanently @@ -358,7 +358,7 @@ static inline void xchk_agf_xref_freeblks( struct xfs_scrub *sc) { - struct xfs_agf *agf = XFS_BUF_TO_AGF(sc->sa.agf_bp); + struct xfs_agf *agf = sc->sa.agf_bp->b_addr; xfs_extlen_t blocks = 0; int error; @@ -378,7 +378,7 @@ static inline void xchk_agf_xref_cntbt( struct xfs_scrub *sc) { - struct xfs_agf *agf = XFS_BUF_TO_AGF(sc->sa.agf_bp); + struct xfs_agf *agf = sc->sa.agf_bp->b_addr; xfs_agblock_t agbno; xfs_extlen_t blocks; int have; @@ -410,7 +410,7 @@ STATIC void xchk_agf_xref_btreeblks( struct xfs_scrub *sc) { - struct xfs_agf *agf = XFS_BUF_TO_AGF(sc->sa.agf_bp); + struct xfs_agf *agf = sc->sa.agf_bp->b_addr; struct xfs_mount *mp = sc->mp; xfs_agblock_t blocks; xfs_agblock_t btreeblks; @@ -456,7 +456,7 @@ static inline void xchk_agf_xref_refcblks( struct xfs_scrub *sc) { - struct xfs_agf *agf = XFS_BUF_TO_AGF(sc->sa.agf_bp); + struct xfs_agf *agf = sc->sa.agf_bp->b_addr; xfs_agblock_t blocks; int error; @@ -525,7 +525,7 @@ xchk_agf( goto out; xchk_buffer_recheck(sc, sc->sa.agf_bp); - agf = XFS_BUF_TO_AGF(sc->sa.agf_bp); + agf = sc->sa.agf_bp->b_addr; /* Check the AG length */ eoag = be32_to_cpu(agf->agf_length); @@ -711,7 +711,7 @@ xchk_agfl( goto out; /* Allocate buffer to ensure uniqueness of AGFL entries. */ - agf = XFS_BUF_TO_AGF(sc->sa.agf_bp); + agf = sc->sa.agf_bp->b_addr; agflcount = be32_to_cpu(agf->agf_flcount); if (agflcount > xfs_agfl_size(sc->mp)) { xchk_block_set_corrupt(sc, sc->sa.agf_bp); @@ -728,7 +728,7 @@ xchk_agfl( } /* Check the blocks in the AGFL. */ - error = xfs_agfl_walk(sc->mp, XFS_BUF_TO_AGF(sc->sa.agf_bp), + error = xfs_agfl_walk(sc->mp, sc->sa.agf_bp->b_addr, sc->sa.agfl_bp, xchk_agfl_block, &sai); if (error == -ECANCELED) { error = 0; @@ -765,7 +765,7 @@ static inline void xchk_agi_xref_icounts( struct xfs_scrub *sc) { - struct xfs_agi *agi = XFS_BUF_TO_AGI(sc->sa.agi_bp); + struct xfs_agi *agi = sc->sa.agi_bp->b_addr; xfs_agino_t icount; xfs_agino_t freecount; int error; @@ -834,7 +834,7 @@ xchk_agi( goto out; xchk_buffer_recheck(sc, sc->sa.agi_bp); - agi = XFS_BUF_TO_AGI(sc->sa.agi_bp); + agi = sc->sa.agi_bp->b_addr; /* Check the AG length */ eoag = be32_to_cpu(agi->agi_length); diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c index d5e6db9af434..bca2ab1d4be9 100644 --- a/fs/xfs/scrub/agheader_repair.c +++ b/fs/xfs/scrub/agheader_repair.c @@ -49,7 +49,7 @@ xrep_superblock( /* Copy AG 0's superblock to this one. */ xfs_buf_zero(bp, 0, BBTOB(bp->b_length)); - xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb); + xfs_sb_to_disk(bp->b_addr, &mp->m_sb); /* Write this to disk. */ xfs_trans_buf_set_type(sc->tp, bp, XFS_BLFT_SB_BUF); @@ -140,7 +140,7 @@ xrep_agf_find_btrees( struct xrep_find_ag_btree *fab, struct xfs_buf *agfl_bp) { - struct xfs_agf *old_agf = XFS_BUF_TO_AGF(agf_bp); + struct xfs_agf *old_agf = agf_bp->b_addr; int error; /* Go find the root data. */ @@ -181,7 +181,7 @@ xrep_agf_init_header( struct xfs_agf *old_agf) { struct xfs_mount *mp = sc->mp; - struct xfs_agf *agf = XFS_BUF_TO_AGF(agf_bp); + struct xfs_agf *agf = agf_bp->b_addr; memcpy(old_agf, agf, sizeof(*old_agf)); memset(agf, 0, BBTOB(agf_bp->b_length)); @@ -238,7 +238,7 @@ xrep_agf_calc_from_btrees( { struct xrep_agf_allocbt raa = { .sc = sc }; struct xfs_btree_cur *cur = NULL; - struct xfs_agf *agf = XFS_BUF_TO_AGF(agf_bp); + struct xfs_agf *agf = agf_bp->b_addr; struct xfs_mount *mp = sc->mp; xfs_agblock_t btreeblks; xfs_agblock_t blocks; @@ -302,7 +302,7 @@ xrep_agf_commit_new( struct xfs_buf *agf_bp) { struct xfs_perag *pag; - struct xfs_agf *agf = XFS_BUF_TO_AGF(agf_bp); + struct xfs_agf *agf = agf_bp->b_addr; /* Trigger fdblocks recalculation */ xfs_force_summary_recalc(sc->mp); @@ -376,7 +376,7 @@ xrep_agf( if (error) return error; agf_bp->b_ops = &xfs_agf_buf_ops; - agf = XFS_BUF_TO_AGF(agf_bp); + agf = agf_bp->b_addr; /* * Load the AGFL so that we can screen out OWN_AG blocks that are on @@ -395,7 +395,7 @@ xrep_agf( * Spot-check the AGFL blocks; if they're obviously corrupt then * there's nothing we can do but bail out. */ - error = xfs_agfl_walk(sc->mp, XFS_BUF_TO_AGF(agf_bp), agfl_bp, + error = xfs_agfl_walk(sc->mp, agf_bp->b_addr, agfl_bp, xrep_agf_check_agfl_block, sc); if (error) return error; @@ -429,10 +429,10 @@ out_revert: struct xrep_agfl { /* Bitmap of other OWN_AG metadata blocks. */ - struct xfs_bitmap agmetablocks; + struct xbitmap agmetablocks; /* Bitmap of free space. */ - struct xfs_bitmap *freesp; + struct xbitmap *freesp; struct xfs_scrub *sc; }; @@ -453,14 +453,14 @@ xrep_agfl_walk_rmap( /* Record all the OWN_AG blocks. */ if (rec->rm_owner == XFS_RMAP_OWN_AG) { - fsb = XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_private.a.agno, + fsb = XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_ag.agno, rec->rm_startblock); - error = xfs_bitmap_set(ra->freesp, fsb, rec->rm_blockcount); + error = xbitmap_set(ra->freesp, fsb, rec->rm_blockcount); if (error) return error; } - return xfs_bitmap_set_btcur_path(&ra->agmetablocks, cur); + return xbitmap_set_btcur_path(&ra->agmetablocks, cur); } /* @@ -476,19 +476,17 @@ STATIC int xrep_agfl_collect_blocks( struct xfs_scrub *sc, struct xfs_buf *agf_bp, - struct xfs_bitmap *agfl_extents, + struct xbitmap *agfl_extents, xfs_agblock_t *flcount) { struct xrep_agfl ra; struct xfs_mount *mp = sc->mp; struct xfs_btree_cur *cur; - struct xfs_bitmap_range *br; - struct xfs_bitmap_range *n; int error; ra.sc = sc; ra.freesp = agfl_extents; - xfs_bitmap_init(&ra.agmetablocks); + xbitmap_init(&ra.agmetablocks); /* Find all space used by the free space btrees & rmapbt. */ cur = xfs_rmapbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.agno); @@ -500,7 +498,7 @@ xrep_agfl_collect_blocks( /* Find all blocks currently being used by the bnobt. */ cur = xfs_allocbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.agno, XFS_BTNUM_BNO); - error = xfs_bitmap_set_btblocks(&ra.agmetablocks, cur); + error = xbitmap_set_btblocks(&ra.agmetablocks, cur); if (error) goto err; xfs_btree_del_cursor(cur, error); @@ -508,7 +506,7 @@ xrep_agfl_collect_blocks( /* Find all blocks currently being used by the cntbt. */ cur = xfs_allocbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.agno, XFS_BTNUM_CNT); - error = xfs_bitmap_set_btblocks(&ra.agmetablocks, cur); + error = xbitmap_set_btblocks(&ra.agmetablocks, cur); if (error) goto err; @@ -518,8 +516,8 @@ xrep_agfl_collect_blocks( * Drop the freesp meta blocks that are in use by btrees. * The remaining blocks /should/ be AGFL blocks. */ - error = xfs_bitmap_disunion(agfl_extents, &ra.agmetablocks); - xfs_bitmap_destroy(&ra.agmetablocks); + error = xbitmap_disunion(agfl_extents, &ra.agmetablocks); + xbitmap_destroy(&ra.agmetablocks); if (error) return error; @@ -527,18 +525,12 @@ xrep_agfl_collect_blocks( * Calculate the new AGFL size. If we found more blocks than fit in * the AGFL we'll free them later. */ - *flcount = 0; - for_each_xfs_bitmap_extent(br, n, agfl_extents) { - *flcount += br->len; - if (*flcount > xfs_agfl_size(mp)) - break; - } - if (*flcount > xfs_agfl_size(mp)) - *flcount = xfs_agfl_size(mp); + *flcount = min_t(uint64_t, xbitmap_hweight(agfl_extents), + xfs_agfl_size(mp)); return 0; err: - xfs_bitmap_destroy(&ra.agmetablocks); + xbitmap_destroy(&ra.agmetablocks); xfs_btree_del_cursor(cur, error); return error; } @@ -550,7 +542,7 @@ xrep_agfl_update_agf( struct xfs_buf *agf_bp, xfs_agblock_t flcount) { - struct xfs_agf *agf = XFS_BUF_TO_AGF(agf_bp); + struct xfs_agf *agf = agf_bp->b_addr; ASSERT(flcount <= xfs_agfl_size(sc->mp)); @@ -573,13 +565,13 @@ STATIC void xrep_agfl_init_header( struct xfs_scrub *sc, struct xfs_buf *agfl_bp, - struct xfs_bitmap *agfl_extents, + struct xbitmap *agfl_extents, xfs_agblock_t flcount) { struct xfs_mount *mp = sc->mp; __be32 *agfl_bno; - struct xfs_bitmap_range *br; - struct xfs_bitmap_range *n; + struct xbitmap_range *br; + struct xbitmap_range *n; struct xfs_agfl *agfl; xfs_agblock_t agbno; unsigned int fl_off; @@ -602,8 +594,8 @@ xrep_agfl_init_header( * step. */ fl_off = 0; - agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agfl_bp); - for_each_xfs_bitmap_extent(br, n, agfl_extents) { + agfl_bno = xfs_buf_to_agfl_bno(agfl_bp); + for_each_xbitmap_extent(br, n, agfl_extents) { agbno = XFS_FSB_TO_AGBNO(mp, br->start); trace_xrep_agfl_insert(mp, sc->sa.agno, agbno, br->len); @@ -637,7 +629,7 @@ int xrep_agfl( struct xfs_scrub *sc) { - struct xfs_bitmap agfl_extents; + struct xbitmap agfl_extents; struct xfs_mount *mp = sc->mp; struct xfs_buf *agf_bp; struct xfs_buf *agfl_bp; @@ -649,7 +641,7 @@ xrep_agfl( return -EOPNOTSUPP; xchk_perag_get(sc->mp, &sc->sa); - xfs_bitmap_init(&agfl_extents); + xbitmap_init(&agfl_extents); /* * Read the AGF so that we can query the rmapbt. We hope that there's @@ -696,10 +688,10 @@ xrep_agfl( goto err; /* Dump any AGFL overflow. */ - return xrep_reap_extents(sc, &agfl_extents, &XFS_RMAP_OINFO_AG, + error = xrep_reap_extents(sc, &agfl_extents, &XFS_RMAP_OINFO_AG, XFS_AG_RESV_AGFL); err: - xfs_bitmap_destroy(&agfl_extents); + xbitmap_destroy(&agfl_extents); return error; } @@ -761,7 +753,7 @@ xrep_agi_init_header( struct xfs_buf *agi_bp, struct xfs_agi *old_agi) { - struct xfs_agi *agi = XFS_BUF_TO_AGI(agi_bp); + struct xfs_agi *agi = agi_bp->b_addr; struct xfs_mount *mp = sc->mp; memcpy(old_agi, agi, sizeof(*old_agi)); @@ -807,7 +799,7 @@ xrep_agi_calc_from_btrees( struct xfs_buf *agi_bp) { struct xfs_btree_cur *cur; - struct xfs_agi *agi = XFS_BUF_TO_AGI(agi_bp); + struct xfs_agi *agi = agi_bp->b_addr; struct xfs_mount *mp = sc->mp; xfs_agino_t count; xfs_agino_t freecount; @@ -835,7 +827,7 @@ xrep_agi_commit_new( struct xfs_buf *agi_bp) { struct xfs_perag *pag; - struct xfs_agi *agi = XFS_BUF_TO_AGI(agi_bp); + struct xfs_agi *agi = agi_bp->b_addr; /* Trigger inode count recalculation */ xfs_force_summary_recalc(sc->mp); @@ -892,7 +884,7 @@ xrep_agi( if (error) return error; agi_bp->b_ops = &xfs_agi_buf_ops; - agi = XFS_BUF_TO_AGI(agi_bp); + agi = agi_bp->b_addr; /* Find the AGI btree roots. */ error = xrep_agi_find_btrees(sc, fab); diff --git a/fs/xfs/scrub/alloc.c b/fs/xfs/scrub/alloc.c index 5533e48e605d..73d924e47565 100644 --- a/fs/xfs/scrub/alloc.c +++ b/fs/xfs/scrub/alloc.c @@ -94,7 +94,7 @@ xchk_allocbt_rec( union xfs_btree_rec *rec) { struct xfs_mount *mp = bs->cur->bc_mp; - xfs_agnumber_t agno = bs->cur->bc_private.a.agno; + xfs_agnumber_t agno = bs->cur->bc_ag.agno; xfs_agblock_t bno; xfs_extlen_t len; diff --git a/fs/xfs/scrub/attr.c b/fs/xfs/scrub/attr.c index d9f0dd444b80..9faddb334a2c 100644 --- a/fs/xfs/scrub/attr.c +++ b/fs/xfs/scrub/attr.c @@ -98,7 +98,7 @@ struct xchk_xattr { /* * Check that an extended attribute key can be looked up by hash. * - * We use the XFS attribute list iterator (i.e. xfs_attr_list_int_ilocked) + * We use the XFS attribute list iterator (i.e. xfs_attr_list_ilocked) * to call this function for every attribute key in an inode. Once * we're here, we load the attribute value to see if any errors happen, * or if we get more or less data than we expected. @@ -147,11 +147,8 @@ xchk_xattr_listent( return; } - args.flags = ATTR_KERNOTIME; - if (flags & XFS_ATTR_ROOT) - args.flags |= ATTR_ROOT; - else if (flags & XFS_ATTR_SECURE) - args.flags |= ATTR_SECURE; + args.op_flags = XFS_DA_OP_NOTIME; + args.attr_filter = flags & XFS_ATTR_NSP_ONDISK_MASK; args.geo = context->dp->i_mount->m_attr_geo; args.whichfork = XFS_ATTR_FORK; args.dp = context->dp; @@ -162,7 +159,10 @@ xchk_xattr_listent( args.value = xchk_xattr_valuebuf(sx->sc); args.valuelen = valuelen; - error = xfs_attr_get_ilocked(context->dp, &args); + error = xfs_attr_get_ilocked(&args); + /* ENODATA means the hash lookup failed and the attr is bad */ + if (error == -ENODATA) + error = -EFSCORRUPTED; if (!xchk_fblock_process_error(sx->sc, XFS_ATTR_FORK, args.blkno, &error)) goto fail_xref; @@ -474,7 +474,6 @@ xchk_xattr( struct xfs_scrub *sc) { struct xchk_xattr sx; - struct attrlist_cursor_kern cursor = { 0 }; xfs_dablk_t last_checked = -1U; int error = 0; @@ -493,11 +492,10 @@ xchk_xattr( /* Check that every attr key can also be looked up by hash. */ sx.context.dp = sc->ip; - sx.context.cursor = &cursor; sx.context.resynch = 1; sx.context.put_listent = xchk_xattr_listent; sx.context.tp = sc->tp; - sx.context.flags = ATTR_INCOMPLETE; + sx.context.allow_incomplete = true; sx.sc = sc; /* @@ -516,7 +514,7 @@ xchk_xattr( * iteration, which doesn't really follow the usual buffer * locking order. */ - error = xfs_attr_list_int_ilocked(&sx.context); + error = xfs_attr_list_ilocked(&sx.context); if (!xchk_fblock_process_error(sc, XFS_ATTR_FORK, 0, &error)) goto out; diff --git a/fs/xfs/scrub/bitmap.c b/fs/xfs/scrub/bitmap.c index 18a684e18a69..f88694f22d05 100644 --- a/fs/xfs/scrub/bitmap.c +++ b/fs/xfs/scrub/bitmap.c @@ -18,14 +18,14 @@ * This is the logical equivalent of bitmap |= mask(start, len). */ int -xfs_bitmap_set( - struct xfs_bitmap *bitmap, +xbitmap_set( + struct xbitmap *bitmap, uint64_t start, uint64_t len) { - struct xfs_bitmap_range *bmr; + struct xbitmap_range *bmr; - bmr = kmem_alloc(sizeof(struct xfs_bitmap_range), KM_MAYFAIL); + bmr = kmem_alloc(sizeof(struct xbitmap_range), KM_MAYFAIL); if (!bmr) return -ENOMEM; @@ -39,13 +39,13 @@ xfs_bitmap_set( /* Free everything related to this bitmap. */ void -xfs_bitmap_destroy( - struct xfs_bitmap *bitmap) +xbitmap_destroy( + struct xbitmap *bitmap) { - struct xfs_bitmap_range *bmr; - struct xfs_bitmap_range *n; + struct xbitmap_range *bmr; + struct xbitmap_range *n; - for_each_xfs_bitmap_extent(bmr, n, bitmap) { + for_each_xbitmap_extent(bmr, n, bitmap) { list_del(&bmr->list); kmem_free(bmr); } @@ -53,24 +53,24 @@ xfs_bitmap_destroy( /* Set up a per-AG block bitmap. */ void -xfs_bitmap_init( - struct xfs_bitmap *bitmap) +xbitmap_init( + struct xbitmap *bitmap) { INIT_LIST_HEAD(&bitmap->list); } /* Compare two btree extents. */ static int -xfs_bitmap_range_cmp( +xbitmap_range_cmp( void *priv, struct list_head *a, struct list_head *b) { - struct xfs_bitmap_range *ap; - struct xfs_bitmap_range *bp; + struct xbitmap_range *ap; + struct xbitmap_range *bp; - ap = container_of(a, struct xfs_bitmap_range, list); - bp = container_of(b, struct xfs_bitmap_range, list); + ap = container_of(a, struct xbitmap_range, list); + bp = container_of(b, struct xbitmap_range, list); if (ap->start > bp->start) return 1; @@ -96,14 +96,14 @@ xfs_bitmap_range_cmp( #define LEFT_ALIGNED (1 << 0) #define RIGHT_ALIGNED (1 << 1) int -xfs_bitmap_disunion( - struct xfs_bitmap *bitmap, - struct xfs_bitmap *sub) +xbitmap_disunion( + struct xbitmap *bitmap, + struct xbitmap *sub) { struct list_head *lp; - struct xfs_bitmap_range *br; - struct xfs_bitmap_range *new_br; - struct xfs_bitmap_range *sub_br; + struct xbitmap_range *br; + struct xbitmap_range *new_br; + struct xbitmap_range *sub_br; uint64_t sub_start; uint64_t sub_len; int state; @@ -113,8 +113,8 @@ xfs_bitmap_disunion( return 0; ASSERT(!list_empty(&sub->list)); - list_sort(NULL, &bitmap->list, xfs_bitmap_range_cmp); - list_sort(NULL, &sub->list, xfs_bitmap_range_cmp); + list_sort(NULL, &bitmap->list, xbitmap_range_cmp); + list_sort(NULL, &sub->list, xbitmap_range_cmp); /* * Now that we've sorted both lists, we iterate bitmap once, rolling @@ -124,11 +124,11 @@ xfs_bitmap_disunion( * list traversal is similar to merge sort, but we're deleting * instead. In this manner we avoid O(n^2) operations. */ - sub_br = list_first_entry(&sub->list, struct xfs_bitmap_range, + sub_br = list_first_entry(&sub->list, struct xbitmap_range, list); lp = bitmap->list.next; while (lp != &bitmap->list) { - br = list_entry(lp, struct xfs_bitmap_range, list); + br = list_entry(lp, struct xbitmap_range, list); /* * Advance sub_br and/or br until we find a pair that @@ -181,7 +181,7 @@ xfs_bitmap_disunion( * Deleting from the middle: add the new right extent * and then shrink the left extent. */ - new_br = kmem_alloc(sizeof(struct xfs_bitmap_range), + new_br = kmem_alloc(sizeof(struct xbitmap_range), KM_MAYFAIL); if (!new_br) { error = -ENOMEM; @@ -247,8 +247,8 @@ out: * blocks going from the leaf towards the root. */ int -xfs_bitmap_set_btcur_path( - struct xfs_bitmap *bitmap, +xbitmap_set_btcur_path( + struct xbitmap *bitmap, struct xfs_btree_cur *cur) { struct xfs_buf *bp; @@ -261,7 +261,7 @@ xfs_bitmap_set_btcur_path( if (!bp) continue; fsb = XFS_DADDR_TO_FSB(cur->bc_mp, bp->b_bn); - error = xfs_bitmap_set(bitmap, fsb, 1); + error = xbitmap_set(bitmap, fsb, 1); if (error) return error; } @@ -271,12 +271,12 @@ xfs_bitmap_set_btcur_path( /* Collect a btree's block in the bitmap. */ STATIC int -xfs_bitmap_collect_btblock( +xbitmap_collect_btblock( struct xfs_btree_cur *cur, int level, void *priv) { - struct xfs_bitmap *bitmap = priv; + struct xbitmap *bitmap = priv; struct xfs_buf *bp; xfs_fsblock_t fsbno; @@ -285,15 +285,30 @@ xfs_bitmap_collect_btblock( return 0; fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, bp->b_bn); - return xfs_bitmap_set(bitmap, fsbno, 1); + return xbitmap_set(bitmap, fsbno, 1); } /* Walk the btree and mark the bitmap wherever a btree block is found. */ int -xfs_bitmap_set_btblocks( - struct xfs_bitmap *bitmap, +xbitmap_set_btblocks( + struct xbitmap *bitmap, struct xfs_btree_cur *cur) { - return xfs_btree_visit_blocks(cur, xfs_bitmap_collect_btblock, + return xfs_btree_visit_blocks(cur, xbitmap_collect_btblock, XFS_BTREE_VISIT_ALL, bitmap); } + +/* How many bits are set in this bitmap? */ +uint64_t +xbitmap_hweight( + struct xbitmap *bitmap) +{ + struct xbitmap_range *bmr; + struct xbitmap_range *n; + uint64_t ret = 0; + + for_each_xbitmap_extent(bmr, n, bitmap) + ret += bmr->len; + + return ret; +} diff --git a/fs/xfs/scrub/bitmap.h b/fs/xfs/scrub/bitmap.h index ae8ecbce6fa6..900646b72de1 100644 --- a/fs/xfs/scrub/bitmap.h +++ b/fs/xfs/scrub/bitmap.h @@ -6,31 +6,32 @@ #ifndef __XFS_SCRUB_BITMAP_H__ #define __XFS_SCRUB_BITMAP_H__ -struct xfs_bitmap_range { +struct xbitmap_range { struct list_head list; uint64_t start; uint64_t len; }; -struct xfs_bitmap { +struct xbitmap { struct list_head list; }; -void xfs_bitmap_init(struct xfs_bitmap *bitmap); -void xfs_bitmap_destroy(struct xfs_bitmap *bitmap); +void xbitmap_init(struct xbitmap *bitmap); +void xbitmap_destroy(struct xbitmap *bitmap); -#define for_each_xfs_bitmap_extent(bex, n, bitmap) \ +#define for_each_xbitmap_extent(bex, n, bitmap) \ list_for_each_entry_safe((bex), (n), &(bitmap)->list, list) -#define for_each_xfs_bitmap_block(b, bex, n, bitmap) \ +#define for_each_xbitmap_block(b, bex, n, bitmap) \ list_for_each_entry_safe((bex), (n), &(bitmap)->list, list) \ - for ((b) = bex->start; (b) < bex->start + bex->len; (b)++) + for ((b) = (bex)->start; (b) < (bex)->start + (bex)->len; (b)++) -int xfs_bitmap_set(struct xfs_bitmap *bitmap, uint64_t start, uint64_t len); -int xfs_bitmap_disunion(struct xfs_bitmap *bitmap, struct xfs_bitmap *sub); -int xfs_bitmap_set_btcur_path(struct xfs_bitmap *bitmap, +int xbitmap_set(struct xbitmap *bitmap, uint64_t start, uint64_t len); +int xbitmap_disunion(struct xbitmap *bitmap, struct xbitmap *sub); +int xbitmap_set_btcur_path(struct xbitmap *bitmap, struct xfs_btree_cur *cur); -int xfs_bitmap_set_btblocks(struct xfs_bitmap *bitmap, +int xbitmap_set_btblocks(struct xbitmap *bitmap, struct xfs_btree_cur *cur); +uint64_t xbitmap_hweight(struct xbitmap *bitmap); #endif /* __XFS_SCRUB_BITMAP_H__ */ diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c index fa6ea6407992..add8598eacd5 100644 --- a/fs/xfs/scrub/bmap.c +++ b/fs/xfs/scrub/bmap.c @@ -374,7 +374,7 @@ xchk_bmapbt_rec( struct xfs_bmbt_irec iext_irec; struct xfs_iext_cursor icur; struct xchk_bmap_info *info = bs->private; - struct xfs_inode *ip = bs->cur->bc_private.b.ip; + struct xfs_inode *ip = bs->cur->bc_ino.ip; struct xfs_buf *bp = NULL; struct xfs_btree_block *block; struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, info->whichfork); @@ -501,7 +501,7 @@ xchk_bmap_check_rmap( xchk_fblock_set_corrupt(sc, sbcri->whichfork, rec->rm_offset); if (irec.br_startblock != XFS_AGB_TO_FSB(sc->mp, - cur->bc_private.a.agno, rec->rm_startblock)) + cur->bc_ag.agno, rec->rm_startblock)) xchk_fblock_set_corrupt(sc, sbcri->whichfork, rec->rm_offset); if (irec.br_blockcount > rec->rm_blockcount) diff --git a/fs/xfs/scrub/dabtree.c b/fs/xfs/scrub/dabtree.c index 97a15b6f2865..9a2e27ac1300 100644 --- a/fs/xfs/scrub/dabtree.c +++ b/fs/xfs/scrub/dabtree.c @@ -219,19 +219,21 @@ xchk_da_btree_block_check_sibling( int direction, xfs_dablk_t sibling) { + struct xfs_da_state_path *path = &ds->state->path; + struct xfs_da_state_path *altpath = &ds->state->altpath; int retval; + int plevel; int error; - memcpy(&ds->state->altpath, &ds->state->path, - sizeof(ds->state->altpath)); + memcpy(altpath, path, sizeof(ds->state->altpath)); /* * If the pointer is null, we shouldn't be able to move the upper * level pointer anywhere. */ if (sibling == 0) { - error = xfs_da3_path_shift(ds->state, &ds->state->altpath, - direction, false, &retval); + error = xfs_da3_path_shift(ds->state, altpath, direction, + false, &retval); if (error == 0 && retval == 0) xchk_da_set_corrupt(ds, level); error = 0; @@ -239,27 +241,33 @@ xchk_da_btree_block_check_sibling( } /* Move the alternate cursor one block in the direction given. */ - error = xfs_da3_path_shift(ds->state, &ds->state->altpath, - direction, false, &retval); + error = xfs_da3_path_shift(ds->state, altpath, direction, false, + &retval); if (!xchk_da_process_error(ds, level, &error)) - return error; + goto out; if (retval) { xchk_da_set_corrupt(ds, level); - return error; + goto out; } - if (ds->state->altpath.blk[level].bp) - xchk_buffer_recheck(ds->sc, - ds->state->altpath.blk[level].bp); + if (altpath->blk[level].bp) + xchk_buffer_recheck(ds->sc, altpath->blk[level].bp); /* Compare upper level pointer to sibling pointer. */ - if (ds->state->altpath.blk[level].blkno != sibling) + if (altpath->blk[level].blkno != sibling) xchk_da_set_corrupt(ds, level); - if (ds->state->altpath.blk[level].bp) { - xfs_trans_brelse(ds->dargs.trans, - ds->state->altpath.blk[level].bp); - ds->state->altpath.blk[level].bp = NULL; - } + out: + /* Free all buffers in the altpath that aren't referenced from path. */ + for (plevel = 0; plevel < altpath->active; plevel++) { + if (altpath->blk[plevel].bp == NULL || + (plevel < path->active && + altpath->blk[plevel].bp == path->blk[plevel].bp)) + continue; + + xfs_trans_brelse(ds->dargs.trans, altpath->blk[plevel].bp); + altpath->blk[plevel].bp = NULL; + } + return error; } diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c index 266da4e4bde6..fe2a6e030c8a 100644 --- a/fs/xfs/scrub/dir.c +++ b/fs/xfs/scrub/dir.c @@ -155,6 +155,9 @@ xchk_dir_actor( xname.type = XFS_DIR3_FT_UNKNOWN; error = xfs_dir_lookup(sdc->sc->tp, ip, &xname, &lookup_ino, NULL); + /* ENOENT means the hash lookup failed and the dir is corrupt */ + if (error == -ENOENT) + error = -EFSCORRUPTED; if (!xchk_fblock_process_error(sdc->sc, XFS_DATA_FORK, offset, &error)) goto out; @@ -500,7 +503,7 @@ xchk_directory_leaf1_bestfree( /* Read the free space block. */ error = xfs_dir3_leaf_read(sc->tp, sc->ip, lblk, &bp); if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error)) - goto out; + return error; xchk_buffer_recheck(sc, bp); leaf = bp->b_addr; @@ -565,9 +568,10 @@ xchk_directory_leaf1_bestfree( xchk_directory_check_freesp(sc, lblk, dbp, best); xfs_trans_brelse(sc->tp, dbp); if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) - goto out; + break; } out: + xfs_trans_brelse(sc->tp, bp); return error; } @@ -589,7 +593,7 @@ xchk_directory_free_bestfree( /* Read the free space block */ error = xfs_dir2_free_read(sc->tp, sc->ip, lblk, &bp); if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error)) - goto out; + return error; xchk_buffer_recheck(sc, bp); if (xfs_sb_version_hascrc(&sc->mp->m_sb)) { @@ -612,7 +616,7 @@ xchk_directory_free_bestfree( 0, &dbp); if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error)) - break; + goto out; xchk_directory_check_freesp(sc, lblk, dbp, best); xfs_trans_brelse(sc->tp, dbp); } @@ -620,6 +624,7 @@ xchk_directory_free_bestfree( if (freehdr.nused + stale != freehdr.nvalid) xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); out: + xfs_trans_brelse(sc->tp, bp); return error; } diff --git a/fs/xfs/scrub/ialloc.c b/fs/xfs/scrub/ialloc.c index 681758704fda..64c217eb06a7 100644 --- a/fs/xfs/scrub/ialloc.c +++ b/fs/xfs/scrub/ialloc.c @@ -104,7 +104,7 @@ xchk_iallocbt_chunk( xfs_extlen_t len) { struct xfs_mount *mp = bs->cur->bc_mp; - xfs_agnumber_t agno = bs->cur->bc_private.a.agno; + xfs_agnumber_t agno = bs->cur->bc_ag.agno; xfs_agblock_t bno; bno = XFS_AGINO_TO_AGBNO(mp, agino); @@ -164,7 +164,7 @@ xchk_iallocbt_check_cluster_ifree( * the record, compute which fs inode we're talking about. */ agino = irec->ir_startino + irec_ino; - fsino = XFS_AGINO_TO_INO(mp, bs->cur->bc_private.a.agno, agino); + fsino = XFS_AGINO_TO_INO(mp, bs->cur->bc_ag.agno, agino); irec_free = (irec->ir_free & XFS_INOBT_MASK(irec_ino)); if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC || @@ -215,7 +215,7 @@ xchk_iallocbt_check_cluster( struct xfs_dinode *dip; struct xfs_buf *cluster_bp; unsigned int nr_inodes; - xfs_agnumber_t agno = bs->cur->bc_private.a.agno; + xfs_agnumber_t agno = bs->cur->bc_ag.agno; xfs_agblock_t agbno; unsigned int cluster_index; uint16_t cluster_mask = 0; @@ -426,7 +426,7 @@ xchk_iallocbt_rec( struct xchk_iallocbt *iabt = bs->private; struct xfs_inobt_rec_incore irec; uint64_t holes; - xfs_agnumber_t agno = bs->cur->bc_private.a.agno; + xfs_agnumber_t agno = bs->cur->bc_ag.agno; xfs_agino_t agino; xfs_extlen_t len; int holecount; diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c index 0cab11a5d390..beaeb6fa3119 100644 --- a/fs/xfs/scrub/refcount.c +++ b/fs/xfs/scrub/refcount.c @@ -336,7 +336,7 @@ xchk_refcountbt_rec( { struct xfs_mount *mp = bs->cur->bc_mp; xfs_agblock_t *cow_blocks = bs->private; - xfs_agnumber_t agno = bs->cur->bc_private.a.agno; + xfs_agnumber_t agno = bs->cur->bc_ag.agno; xfs_agblock_t bno; xfs_extlen_t len; xfs_nlink_t refcount; diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c index e489d7a8446a..db3cfd12803d 100644 --- a/fs/xfs/scrub/repair.c +++ b/fs/xfs/scrub/repair.c @@ -208,8 +208,10 @@ xrep_calc_ag_resblks( /* Now grab the block counters from the AGF. */ error = xfs_alloc_read_agf(mp, NULL, sm->sm_agno, 0, &bp); if (!error) { - aglen = be32_to_cpu(XFS_BUF_TO_AGF(bp)->agf_length); - freelen = be32_to_cpu(XFS_BUF_TO_AGF(bp)->agf_freeblks); + struct xfs_agf *agf = bp->b_addr; + + aglen = be32_to_cpu(agf->agf_length); + freelen = be32_to_cpu(agf->agf_freeblks); usedlen = aglen - freelen; xfs_buf_relse(bp); } @@ -434,10 +436,10 @@ xrep_init_btblock( int xrep_invalidate_blocks( struct xfs_scrub *sc, - struct xfs_bitmap *bitmap) + struct xbitmap *bitmap) { - struct xfs_bitmap_range *bmr; - struct xfs_bitmap_range *n; + struct xbitmap_range *bmr; + struct xbitmap_range *n; struct xfs_buf *bp; xfs_fsblock_t fsbno; @@ -449,7 +451,7 @@ xrep_invalidate_blocks( * because we never own those; and if we can't TRYLOCK the buffer we * assume it's owned by someone else. */ - for_each_xfs_bitmap_block(fsbno, bmr, n, bitmap) { + for_each_xbitmap_block(fsbno, bmr, n, bitmap) { /* Skip AG headers and post-EOFS blocks */ if (!xfs_verify_fsbno(sc->mp, fsbno)) continue; @@ -595,18 +597,18 @@ out_free: int xrep_reap_extents( struct xfs_scrub *sc, - struct xfs_bitmap *bitmap, + struct xbitmap *bitmap, const struct xfs_owner_info *oinfo, enum xfs_ag_resv_type type) { - struct xfs_bitmap_range *bmr; - struct xfs_bitmap_range *n; + struct xbitmap_range *bmr; + struct xbitmap_range *n; xfs_fsblock_t fsbno; int error = 0; ASSERT(xfs_sb_version_hasrmapbt(&sc->mp->m_sb)); - for_each_xfs_bitmap_block(fsbno, bmr, n, bitmap) { + for_each_xbitmap_block(fsbno, bmr, n, bitmap) { ASSERT(sc->ip != NULL || XFS_FSB_TO_AGNO(sc->mp, fsbno) == sc->sa.agno); trace_xrep_dispose_btree_extent(sc->mp, @@ -615,11 +617,9 @@ xrep_reap_extents( error = xrep_reap_block(sc, fsbno, oinfo, type); if (error) - goto out; + break; } -out: - xfs_bitmap_destroy(bitmap); return error; } @@ -879,7 +879,7 @@ xrep_find_ag_btree_roots( ri.sc = sc; ri.btree_info = btree_info; - ri.agf = XFS_BUF_TO_AGF(agf_bp); + ri.agf = agf_bp->b_addr; ri.agfl_bp = agfl_bp; for (fab = btree_info; fab->buf_ops; fab++) { ASSERT(agfl_bp || fab->rmap_owner != XFS_RMAP_OWN_AG); diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h index c3422403b169..04a47d45605b 100644 --- a/fs/xfs/scrub/repair.h +++ b/fs/xfs/scrub/repair.h @@ -28,11 +28,11 @@ int xrep_init_btblock(struct xfs_scrub *sc, xfs_fsblock_t fsb, struct xfs_buf **bpp, xfs_btnum_t btnum, const struct xfs_buf_ops *ops); -struct xfs_bitmap; +struct xbitmap; int xrep_fix_freelist(struct xfs_scrub *sc, bool can_shrink); -int xrep_invalidate_blocks(struct xfs_scrub *sc, struct xfs_bitmap *btlist); -int xrep_reap_extents(struct xfs_scrub *sc, struct xfs_bitmap *exlist, +int xrep_invalidate_blocks(struct xfs_scrub *sc, struct xbitmap *btlist); +int xrep_reap_extents(struct xfs_scrub *sc, struct xbitmap *exlist, const struct xfs_owner_info *oinfo, enum xfs_ag_resv_type type); struct xrep_find_ag_btree { diff --git a/fs/xfs/scrub/rmap.c b/fs/xfs/scrub/rmap.c index 8d4cefd761c1..f4fcb4719f41 100644 --- a/fs/xfs/scrub/rmap.c +++ b/fs/xfs/scrub/rmap.c @@ -92,7 +92,7 @@ xchk_rmapbt_rec( { struct xfs_mount *mp = bs->cur->bc_mp; struct xfs_rmap_irec irec; - xfs_agnumber_t agno = bs->cur->bc_private.a.agno; + xfs_agnumber_t agno = bs->cur->bc_ag.agno; bool non_inode; bool is_unwritten; bool is_bmbt; diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c index f1775bb19313..8ebf35b115ce 100644 --- a/fs/xfs/scrub/scrub.c +++ b/fs/xfs/scrub/scrub.c @@ -168,6 +168,7 @@ xchk_teardown( xfs_irele(sc->ip); sc->ip = NULL; } + sb_end_write(sc->mp->m_super); if (sc->flags & XCHK_REAPING_DISABLED) xchk_start_reaping(sc); if (sc->flags & XCHK_HAS_QUOTAOFFLOCK) { @@ -490,6 +491,14 @@ xfs_scrub_metadata( sc.ops = &meta_scrub_ops[sm->sm_type]; sc.sick_mask = xchk_health_mask_for_scrub_type(sm->sm_type); retry_op: + /* + * If freeze runs concurrently with a scrub, the freeze can be delayed + * indefinitely as we walk the filesystem and iterate over metadata + * buffers. Freeze quiesces the log (which waits for the buffer LRU to + * be emptied) and that won't happen while checking is running. + */ + sb_start_write(mp->m_super); + /* Set up for the operation. */ error = sc.ops->setup(&sc, ip); if (error) diff --git a/fs/xfs/scrub/trace.c b/fs/xfs/scrub/trace.c index 9eaab2eb5ed3..2c6c248be823 100644 --- a/fs/xfs/scrub/trace.c +++ b/fs/xfs/scrub/trace.c @@ -24,9 +24,9 @@ xchk_btree_cur_fsbno( return XFS_DADDR_TO_FSB(cur->bc_mp, cur->bc_bufs[level]->b_bn); else if (level == cur->bc_nlevels - 1 && cur->bc_flags & XFS_BTREE_LONG_PTRS) - return XFS_INO_TO_FSB(cur->bc_mp, cur->bc_private.b.ip->i_ino); + return XFS_INO_TO_FSB(cur->bc_mp, cur->bc_ino.ip->i_ino); else if (!(cur->bc_flags & XFS_BTREE_LONG_PTRS)) - return XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_private.a.agno, 0); + return XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_ag.agno, 0); return NULLFSBLOCK; } diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index 096203119934..e46f5cef90da 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -379,7 +379,7 @@ TRACE_EVENT(xchk_ifork_btree_op_error, xfs_fsblock_t fsbno = xchk_btree_cur_fsbno(cur, level); __entry->dev = sc->mp->m_super->s_dev; __entry->ino = sc->ip->i_ino; - __entry->whichfork = cur->bc_private.b.whichfork; + __entry->whichfork = cur->bc_ino.whichfork; __entry->type = sc->sm->sm_type; __entry->btnum = cur->bc_btnum; __entry->level = level; @@ -459,7 +459,7 @@ TRACE_EVENT(xchk_ifork_btree_error, xfs_fsblock_t fsbno = xchk_btree_cur_fsbno(cur, level); __entry->dev = sc->mp->m_super->s_dev; __entry->ino = sc->ip->i_ino; - __entry->whichfork = cur->bc_private.b.whichfork; + __entry->whichfork = cur->bc_ino.whichfork; __entry->type = sc->sm->sm_type; __entry->btnum = cur->bc_btnum; __entry->level = level; diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index cd743fad8478..d4c687b5cd06 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -14,6 +14,8 @@ #include "xfs_trace.h" #include "xfs_error.h" #include "xfs_acl.h" +#include "xfs_da_format.h" +#include "xfs_da_btree.h" #include <linux/posix_acl_xattr.h> @@ -67,10 +69,12 @@ xfs_acl_from_disk( switch (acl_e->e_tag) { case ACL_USER: - acl_e->e_uid = xfs_uid_to_kuid(be32_to_cpu(ace->ae_id)); + acl_e->e_uid = make_kuid(&init_user_ns, + be32_to_cpu(ace->ae_id)); break; case ACL_GROUP: - acl_e->e_gid = xfs_gid_to_kgid(be32_to_cpu(ace->ae_id)); + acl_e->e_gid = make_kgid(&init_user_ns, + be32_to_cpu(ace->ae_id)); break; case ACL_USER_OBJ: case ACL_GROUP_OBJ: @@ -103,10 +107,12 @@ xfs_acl_to_disk(struct xfs_acl *aclp, const struct posix_acl *acl) ace->ae_tag = cpu_to_be32(acl_e->e_tag); switch (acl_e->e_tag) { case ACL_USER: - ace->ae_id = cpu_to_be32(xfs_kuid_to_uid(acl_e->e_uid)); + ace->ae_id = cpu_to_be32( + from_kuid(&init_user_ns, acl_e->e_uid)); break; case ACL_GROUP: - ace->ae_id = cpu_to_be32(xfs_kgid_to_gid(acl_e->e_gid)); + ace->ae_id = cpu_to_be32( + from_kgid(&init_user_ns, acl_e->e_gid)); break; default: ace->ae_id = cpu_to_be32(ACL_UNDEFINED_ID); @@ -120,102 +126,86 @@ xfs_acl_to_disk(struct xfs_acl *aclp, const struct posix_acl *acl) struct posix_acl * xfs_get_acl(struct inode *inode, int type) { - struct xfs_inode *ip = XFS_I(inode); - struct posix_acl *acl = NULL; - struct xfs_acl *xfs_acl = NULL; - unsigned char *ea_name; - int error; - int len; + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + struct posix_acl *acl = NULL; + struct xfs_da_args args = { + .dp = ip, + .attr_filter = XFS_ATTR_ROOT, + .valuelen = XFS_ACL_MAX_SIZE(mp), + }; + int error; trace_xfs_get_acl(ip); switch (type) { case ACL_TYPE_ACCESS: - ea_name = SGI_ACL_FILE; + args.name = SGI_ACL_FILE; break; case ACL_TYPE_DEFAULT: - ea_name = SGI_ACL_DEFAULT; + args.name = SGI_ACL_DEFAULT; break; default: BUG(); } + args.namelen = strlen(args.name); /* - * If we have a cached ACLs value just return it, not need to - * go out to the disk. + * If the attribute doesn't exist make sure we have a negative cache + * entry, for any other error assume it is transient. */ - len = XFS_ACL_MAX_SIZE(ip->i_mount); - error = xfs_attr_get(ip, ea_name, strlen(ea_name), - (unsigned char **)&xfs_acl, &len, - ATTR_ALLOC | ATTR_ROOT); - if (error) { - /* - * If the attribute doesn't exist make sure we have a negative - * cache entry, for any other error assume it is transient. - */ - if (error != -ENOATTR) - acl = ERR_PTR(error); - } else { - acl = xfs_acl_from_disk(ip->i_mount, xfs_acl, len, - XFS_ACL_MAX_ENTRIES(ip->i_mount)); - kmem_free(xfs_acl); + error = xfs_attr_get(&args); + if (!error) { + acl = xfs_acl_from_disk(mp, args.value, args.valuelen, + XFS_ACL_MAX_ENTRIES(mp)); + } else if (error != -ENOATTR) { + acl = ERR_PTR(error); } + + kmem_free(args.value); return acl; } int __xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) { - struct xfs_inode *ip = XFS_I(inode); - unsigned char *ea_name; - int error; + struct xfs_inode *ip = XFS_I(inode); + struct xfs_da_args args = { + .dp = ip, + .attr_filter = XFS_ATTR_ROOT, + }; + int error; switch (type) { case ACL_TYPE_ACCESS: - ea_name = SGI_ACL_FILE; + args.name = SGI_ACL_FILE; break; case ACL_TYPE_DEFAULT: if (!S_ISDIR(inode->i_mode)) return acl ? -EACCES : 0; - ea_name = SGI_ACL_DEFAULT; + args.name = SGI_ACL_DEFAULT; break; default: return -EINVAL; } + args.namelen = strlen(args.name); if (acl) { - struct xfs_acl *xfs_acl; - int len = XFS_ACL_MAX_SIZE(ip->i_mount); - - xfs_acl = kmem_zalloc_large(len, 0); - if (!xfs_acl) + args.valuelen = XFS_ACL_SIZE(acl->a_count); + args.value = kmem_zalloc_large(args.valuelen, 0); + if (!args.value) return -ENOMEM; - - xfs_acl_to_disk(xfs_acl, acl); - - /* subtract away the unused acl entries */ - len -= sizeof(struct xfs_acl_entry) * - (XFS_ACL_MAX_ENTRIES(ip->i_mount) - acl->a_count); - - error = xfs_attr_set(ip, ea_name, strlen(ea_name), - (unsigned char *)xfs_acl, len, ATTR_ROOT); - - kmem_free(xfs_acl); - } else { - /* - * A NULL ACL argument means we want to remove the ACL. - */ - error = xfs_attr_remove(ip, ea_name, - strlen(ea_name), - ATTR_ROOT); - - /* - * If the attribute didn't exist to start with that's fine. - */ - if (error == -ENOATTR) - error = 0; + xfs_acl_to_disk(args.value, acl); } + error = xfs_attr_set(&args); + kmem_free(args.value); + + /* + * If the attribute didn't exist to start with that's fine. + */ + if (!acl && error == -ENOATTR) + error = 0; if (!error) set_cached_acl(inode, type, acl); return error; @@ -275,3 +265,19 @@ xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) return error; } + +/* + * Invalidate any cached ACLs if the user has bypassed the ACL interface. + * We don't validate the content whatsoever so it is caller responsibility to + * provide data in valid format and ensure i_mode is consistent. + */ +void +xfs_forget_acl( + struct inode *inode, + const char *name) +{ + if (!strcmp(name, SGI_ACL_FILE)) + forget_cached_acl(inode, ACL_TYPE_ACCESS); + else if (!strcmp(name, SGI_ACL_DEFAULT)) + forget_cached_acl(inode, ACL_TYPE_DEFAULT); +} diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h index 94615e34bc86..c042c0868016 100644 --- a/fs/xfs/xfs_acl.h +++ b/fs/xfs/xfs_acl.h @@ -13,14 +13,16 @@ struct posix_acl; extern struct posix_acl *xfs_get_acl(struct inode *inode, int type); extern int xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type); extern int __xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type); +void xfs_forget_acl(struct inode *inode, const char *name); #else static inline struct posix_acl *xfs_get_acl(struct inode *inode, int type) { return NULL; } # define xfs_set_acl NULL +static inline void xfs_forget_acl(struct inode *inode, const char *name) +{ +} #endif /* CONFIG_XFS_POSIX_ACL */ -extern void xfs_forget_acl(struct inode *inode, const char *name, int xflags); - #endif /* __XFS_ACL_H__ */ diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 58e937be24ce..9d9cebf18726 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -539,7 +539,7 @@ xfs_discard_page( if (XFS_FORCED_SHUTDOWN(mp)) goto out_invalidate; - xfs_alert(mp, + xfs_alert_ratelimited(mp, "page discard on page "PTR_FMT", inode 0x%llx, offset %llu.", page, ip->i_ino, offset); diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c index bbfa6ba84dcd..c42f90e16b4f 100644 --- a/fs/xfs/xfs_attr_inactive.c +++ b/fs/xfs/xfs_attr_inactive.c @@ -145,8 +145,8 @@ xfs_attr3_node_inactive( * Since this code is recursive (gasp!) we must protect ourselves. */ if (level > XFS_DA_NODE_MAXDEPTH) { + xfs_buf_mark_corrupt(bp); xfs_trans_brelse(*trans, bp); /* no locks for later trans */ - xfs_buf_corruption_error(bp); return -EFSCORRUPTED; } @@ -194,7 +194,7 @@ xfs_attr3_node_inactive( error = xfs_attr3_leaf_inactive(trans, dp, child_bp); break; default: - xfs_buf_corruption_error(child_bp); + xfs_buf_mark_corrupt(child_bp); xfs_trans_brelse(*trans, child_bp); error = -EFSCORRUPTED; break; @@ -289,7 +289,7 @@ xfs_attr3_root_inactive( break; default: error = -EFSCORRUPTED; - xfs_buf_corruption_error(bp); + xfs_buf_mark_corrupt(bp); xfs_trans_brelse(*trans, bp); break; } diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c index d37743bdf274..5ff1d929d3b5 100644 --- a/fs/xfs/xfs_attr_list.c +++ b/fs/xfs/xfs_attr_list.c @@ -52,24 +52,19 @@ static int xfs_attr_shortform_list( struct xfs_attr_list_context *context) { - struct attrlist_cursor_kern *cursor; + struct xfs_attrlist_cursor_kern *cursor = &context->cursor; + struct xfs_inode *dp = context->dp; struct xfs_attr_sf_sort *sbuf, *sbp; struct xfs_attr_shortform *sf; struct xfs_attr_sf_entry *sfe; - struct xfs_inode *dp; int sbsize, nsbuf, count, i; int error = 0; - ASSERT(context != NULL); - dp = context->dp; - ASSERT(dp != NULL); ASSERT(dp->i_afp != NULL); sf = (xfs_attr_shortform_t *)dp->i_afp->if_u1.if_data; ASSERT(sf != NULL); if (!sf->hdr.count) return 0; - cursor = context->cursor; - ASSERT(cursor != NULL); trace_xfs_attr_list_sf(context); @@ -205,7 +200,7 @@ out: STATIC int xfs_attr_node_list_lookup( struct xfs_attr_list_context *context, - struct attrlist_cursor_kern *cursor, + struct xfs_attrlist_cursor_kern *cursor, struct xfs_buf **pbp) { struct xfs_da3_icnode_hdr nodehdr; @@ -279,7 +274,7 @@ xfs_attr_node_list_lookup( return 0; out_corruptbuf: - xfs_buf_corruption_error(bp); + xfs_buf_mark_corrupt(bp); xfs_trans_brelse(tp, bp); return -EFSCORRUPTED; } @@ -288,8 +283,8 @@ STATIC int xfs_attr_node_list( struct xfs_attr_list_context *context) { + struct xfs_attrlist_cursor_kern *cursor = &context->cursor; struct xfs_attr3_icleaf_hdr leafhdr; - struct attrlist_cursor_kern *cursor; struct xfs_attr_leafblock *leaf; struct xfs_da_intnode *node; struct xfs_buf *bp; @@ -299,7 +294,6 @@ xfs_attr_node_list( trace_xfs_attr_node_list(context); - cursor = context->cursor; cursor->initted = 1; /* @@ -394,7 +388,7 @@ xfs_attr3_leaf_list_int( struct xfs_buf *bp, struct xfs_attr_list_context *context) { - struct attrlist_cursor_kern *cursor; + struct xfs_attrlist_cursor_kern *cursor = &context->cursor; struct xfs_attr_leafblock *leaf; struct xfs_attr3_icleaf_hdr ichdr; struct xfs_attr_leaf_entry *entries; @@ -408,7 +402,6 @@ xfs_attr3_leaf_list_int( xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf); entries = xfs_attr3_leaf_entryp(leaf); - cursor = context->cursor; cursor->initted = 1; /* @@ -452,8 +445,8 @@ xfs_attr3_leaf_list_int( } if ((entry->flags & XFS_ATTR_INCOMPLETE) && - !(context->flags & ATTR_INCOMPLETE)) - continue; /* skip incomplete entries */ + !context->allow_incomplete) + continue; if (entry->flags & XFS_ATTR_LOCAL) { xfs_attr_leaf_name_local_t *name_loc; @@ -488,14 +481,15 @@ xfs_attr3_leaf_list_int( * Copy out attribute entries for attr_list(), for leaf attribute lists. */ STATIC int -xfs_attr_leaf_list(xfs_attr_list_context_t *context) +xfs_attr_leaf_list( + struct xfs_attr_list_context *context) { - int error; - struct xfs_buf *bp; + struct xfs_buf *bp; + int error; trace_xfs_attr_leaf_list(context); - context->cursor->blkno = 0; + context->cursor.blkno = 0; error = xfs_attr3_leaf_read(context->tp, context->dp, 0, &bp); if (error) return error; @@ -506,7 +500,7 @@ xfs_attr_leaf_list(xfs_attr_list_context_t *context) } int -xfs_attr_list_int_ilocked( +xfs_attr_list_ilocked( struct xfs_attr_list_context *context) { struct xfs_inode *dp = context->dp; @@ -526,12 +520,12 @@ xfs_attr_list_int_ilocked( } int -xfs_attr_list_int( - xfs_attr_list_context_t *context) +xfs_attr_list( + struct xfs_attr_list_context *context) { - int error; - xfs_inode_t *dp = context->dp; - uint lock_mode; + struct xfs_inode *dp = context->dp; + uint lock_mode; + int error; XFS_STATS_INC(dp->i_mount, xs_attr_list); @@ -539,130 +533,7 @@ xfs_attr_list_int( return -EIO; lock_mode = xfs_ilock_attr_map_shared(dp); - error = xfs_attr_list_int_ilocked(context); + error = xfs_attr_list_ilocked(context); xfs_iunlock(dp, lock_mode); return error; } - -#define ATTR_ENTBASESIZE /* minimum bytes used by an attr */ \ - (((struct attrlist_ent *) 0)->a_name - (char *) 0) -#define ATTR_ENTSIZE(namelen) /* actual bytes used by an attr */ \ - ((ATTR_ENTBASESIZE + (namelen) + 1 + sizeof(uint32_t)-1) \ - & ~(sizeof(uint32_t)-1)) - -/* - * Format an attribute and copy it out to the user's buffer. - * Take care to check values and protect against them changing later, - * we may be reading them directly out of a user buffer. - */ -STATIC void -xfs_attr_put_listent( - xfs_attr_list_context_t *context, - int flags, - unsigned char *name, - int namelen, - int valuelen) -{ - struct attrlist *alist = (struct attrlist *)context->alist; - attrlist_ent_t *aep; - int arraytop; - - ASSERT(!context->seen_enough); - ASSERT(!(context->flags & ATTR_KERNOVAL)); - ASSERT(context->count >= 0); - ASSERT(context->count < (ATTR_MAX_VALUELEN/8)); - ASSERT(context->firstu >= sizeof(*alist)); - ASSERT(context->firstu <= context->bufsize); - - /* - * Only list entries in the right namespace. - */ - if (((context->flags & ATTR_SECURE) == 0) != - ((flags & XFS_ATTR_SECURE) == 0)) - return; - if (((context->flags & ATTR_ROOT) == 0) != - ((flags & XFS_ATTR_ROOT) == 0)) - return; - - arraytop = sizeof(*alist) + - context->count * sizeof(alist->al_offset[0]); - context->firstu -= ATTR_ENTSIZE(namelen); - if (context->firstu < arraytop) { - trace_xfs_attr_list_full(context); - alist->al_more = 1; - context->seen_enough = 1; - return; - } - - aep = (attrlist_ent_t *)&context->alist[context->firstu]; - aep->a_valuelen = valuelen; - memcpy(aep->a_name, name, namelen); - aep->a_name[namelen] = 0; - alist->al_offset[context->count++] = context->firstu; - alist->al_count = context->count; - trace_xfs_attr_list_add(context); - return; -} - -/* - * Generate a list of extended attribute names and optionally - * also value lengths. Positive return value follows the XFS - * convention of being an error, zero or negative return code - * is the length of the buffer returned (negated), indicating - * success. - */ -int -xfs_attr_list( - xfs_inode_t *dp, - char *buffer, - int bufsize, - int flags, - attrlist_cursor_kern_t *cursor) -{ - xfs_attr_list_context_t context; - struct attrlist *alist; - int error; - - /* - * Validate the cursor. - */ - if (cursor->pad1 || cursor->pad2) - return -EINVAL; - if ((cursor->initted == 0) && - (cursor->hashval || cursor->blkno || cursor->offset)) - return -EINVAL; - - /* Only internal consumers can retrieve incomplete attrs. */ - if (flags & ATTR_INCOMPLETE) - return -EINVAL; - - /* - * Check for a properly aligned buffer. - */ - if (((long)buffer) & (sizeof(int)-1)) - return -EFAULT; - if (flags & ATTR_KERNOVAL) - bufsize = 0; - - /* - * Initialize the output buffer. - */ - memset(&context, 0, sizeof(context)); - context.dp = dp; - context.cursor = cursor; - context.resynch = 1; - context.flags = flags; - context.alist = buffer; - context.bufsize = (bufsize & ~(sizeof(int)-1)); /* align */ - context.firstu = context.bufsize; - context.put_listent = xfs_attr_put_listent; - - alist = (struct attrlist *)context.alist; - alist->al_count = 0; - alist->al_more = 0; - alist->al_offset[0] = context.bufsize; - - error = xfs_attr_list_int(&context); - ASSERT(error <= 0); - return error; -} diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index e62fb5216341..4f800f7fe888 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -1062,7 +1062,6 @@ xfs_collapse_file_space( int error; xfs_fileoff_t next_fsb = XFS_B_TO_FSB(mp, offset + len); xfs_fileoff_t shift_fsb = XFS_B_TO_FSB(mp, len); - uint resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); bool done = false; ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); @@ -1078,32 +1077,34 @@ xfs_collapse_file_space( if (error) return error; - while (!error && !done) { - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, - &tp); - if (error) - break; + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0, &tp); + if (error) + return error; - xfs_ilock(ip, XFS_ILOCK_EXCL); - error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot, - ip->i_gdquot, ip->i_pdquot, resblks, 0, - XFS_QMOPT_RES_REGBLKS); - if (error) - goto out_trans_cancel; - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip, 0); + while (!done) { error = xfs_bmap_collapse_extents(tp, ip, &next_fsb, shift_fsb, &done); if (error) goto out_trans_cancel; + if (done) + break; - error = xfs_trans_commit(tp); + /* finish any deferred frees and roll the transaction */ + error = xfs_defer_finish(&tp); + if (error) + goto out_trans_cancel; } + error = xfs_trans_commit(tp); + xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; out_trans_cancel: xfs_trans_cancel(tp); + xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; } @@ -1146,35 +1147,41 @@ xfs_insert_file_space( if (error) return error; + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, + XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, 0, &tp); + if (error) + return error; + + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip, 0); + /* * The extent shifting code works on extent granularity. So, if stop_fsb * is not the starting block of extent, we need to split the extent at * stop_fsb. */ - error = xfs_bmap_split_extent(ip, stop_fsb); + error = xfs_bmap_split_extent(tp, ip, stop_fsb); if (error) - return error; + goto out_trans_cancel; - while (!error && !done) { - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0, - &tp); + do { + error = xfs_trans_roll_inode(&tp, ip); if (error) - break; + goto out_trans_cancel; - xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); error = xfs_bmap_insert_extents(tp, ip, &next_fsb, shift_fsb, &done, stop_fsb); if (error) goto out_trans_cancel; + } while (!done); - error = xfs_trans_commit(tp); - } - + error = xfs_trans_commit(tp); + xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; out_trans_cancel: xfs_trans_cancel(tp); + xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; } @@ -1442,12 +1449,12 @@ xfs_swap_extent_forks( * event of a crash. Set the owner change log flags now and leave the * bmbt scan as the last step. */ - if (ip->i_d.di_version == 3 && - ip->i_d.di_format == XFS_DINODE_FMT_BTREE) - (*target_log_flags) |= XFS_ILOG_DOWNER; - if (tip->i_d.di_version == 3 && - tip->i_d.di_format == XFS_DINODE_FMT_BTREE) - (*src_log_flags) |= XFS_ILOG_DOWNER; + if (xfs_sb_version_has_v3inode(&ip->i_mount->m_sb)) { + if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) + (*target_log_flags) |= XFS_ILOG_DOWNER; + if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE) + (*src_log_flags) |= XFS_ILOG_DOWNER; + } /* * Swap the data forks of the inodes @@ -1482,7 +1489,7 @@ xfs_swap_extent_forks( (*src_log_flags) |= XFS_ILOG_DEXT; break; case XFS_DINODE_FMT_BTREE: - ASSERT(ip->i_d.di_version < 3 || + ASSERT(!xfs_sb_version_has_v3inode(&ip->i_mount->m_sb) || (*src_log_flags & XFS_ILOG_DOWNER)); (*src_log_flags) |= XFS_ILOG_DBROOT; break; @@ -1494,7 +1501,7 @@ xfs_swap_extent_forks( break; case XFS_DINODE_FMT_BTREE: (*target_log_flags) |= XFS_ILOG_DBROOT; - ASSERT(tip->i_d.di_version < 3 || + ASSERT(!xfs_sb_version_has_v3inode(&ip->i_mount->m_sb) || (*target_log_flags & XFS_ILOG_DOWNER)); break; } diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 217e4f82a44a..f880141a2268 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -727,8 +727,9 @@ found: if (!bp->b_addr) { error = _xfs_buf_map_pages(bp, flags); if (unlikely(error)) { - xfs_warn(target->bt_mount, - "%s: failed to map pagesn", __func__); + xfs_warn_ratelimited(target->bt_mount, + "%s: failed to map %u pages", __func__, + bp->b_page_count); xfs_buf_relse(bp); return error; } @@ -1238,7 +1239,7 @@ xfs_buf_ioerror_alert( struct xfs_buf *bp, xfs_failaddr_t func) { - xfs_alert(bp->b_mount, + xfs_alert_ratelimited(bp->b_mount, "metadata I/O error in \"%pS\" at daddr 0x%llx len %d error %d", func, (uint64_t)XFS_BUF_ADDR(bp), bp->b_length, -bp->b_error); @@ -1573,6 +1574,28 @@ xfs_buf_zero( } /* + * Log a message about and stale a buffer that a caller has decided is corrupt. + * + * This function should be called for the kinds of metadata corruption that + * cannot be detect from a verifier, such as incorrect inter-block relationship + * data. Do /not/ call this function from a verifier function. + * + * The buffer must be XBF_DONE prior to the call. Afterwards, the buffer will + * be marked stale, but b_error will not be set. The caller is responsible for + * releasing the buffer or fixing it. + */ +void +__xfs_buf_mark_corrupt( + struct xfs_buf *bp, + xfs_failaddr_t fa) +{ + ASSERT(bp->b_flags & XBF_DONE); + + xfs_buf_corruption_error(bp, fa); + xfs_buf_stale(bp); +} + +/* * Handling of buffer targets (buftargs). */ diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index d79a1fe5d738..9a04c53c2488 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -272,6 +272,8 @@ static inline int xfs_buf_submit(struct xfs_buf *bp) } void xfs_buf_zero(struct xfs_buf *bp, size_t boff, size_t bsize); +void __xfs_buf_mark_corrupt(struct xfs_buf *bp, xfs_failaddr_t fa); +#define xfs_buf_mark_corrupt(bp) __xfs_buf_mark_corrupt((bp), __this_address) /* Buffer Utility Routines */ extern void *xfs_buf_offset(struct xfs_buf *, size_t); diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 663810e6cd59..1545657c3ca0 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -345,7 +345,7 @@ xfs_buf_item_format( * occurs during recovery. */ if (bip->bli_flags & XFS_BLI_INODE_BUF) { - if (xfs_sb_version_hascrc(&lip->li_mountp->m_sb) || + if (xfs_sb_version_has_v3inode(&lip->li_mountp->m_sb) || !((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) && xfs_log_item_in_current_chkpt(lip))) bip->__bli_format.blf_flags |= XFS_BLF_INODE_BUF; diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c index 0d3b640cf1cc..871ec22c9aee 100644 --- a/fs/xfs/xfs_dir2_readdir.c +++ b/fs/xfs/xfs_dir2_readdir.c @@ -147,7 +147,7 @@ xfs_dir2_block_getdents( xfs_off_t cook; struct xfs_da_geometry *geo = args->geo; int lock_mode; - unsigned int offset; + unsigned int offset, next_offset; unsigned int end; /* @@ -173,9 +173,10 @@ xfs_dir2_block_getdents( * Loop over the data portion of the block. * Each object is a real entry (dep) or an unused one (dup). */ - offset = geo->data_entry_offset; end = xfs_dir3_data_end_offset(geo, bp->b_addr); - while (offset < end) { + for (offset = geo->data_entry_offset; + offset < end; + offset = next_offset) { struct xfs_dir2_data_unused *dup = bp->b_addr + offset; struct xfs_dir2_data_entry *dep = bp->b_addr + offset; uint8_t filetype; @@ -184,14 +185,15 @@ xfs_dir2_block_getdents( * Unused, skip it. */ if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { - offset += be16_to_cpu(dup->length); + next_offset = offset + be16_to_cpu(dup->length); continue; } /* * Bump pointer for the next iteration. */ - offset += xfs_dir2_data_entsize(dp->i_mount, dep->namelen); + next_offset = offset + + xfs_dir2_data_entsize(dp->i_mount, dep->namelen); /* * The entry is before the desired starting point, skip it. diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index 0b8350e84d28..f979d0d7e6cd 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c @@ -31,6 +31,7 @@ xfs_trim_extents( struct block_device *bdev = mp->m_ddev_targp->bt_bdev; struct xfs_btree_cur *cur; struct xfs_buf *agbp; + struct xfs_agf *agf; struct xfs_perag *pag; int error; int i; @@ -47,14 +48,14 @@ xfs_trim_extents( error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp); if (error) goto out_put_perag; + agf = agbp->b_addr; cur = xfs_allocbt_init_cursor(mp, NULL, agbp, agno, XFS_BTNUM_CNT); /* * Look up the longest btree in the AGF and start with it. */ - error = xfs_alloc_lookup_ge(cur, 0, - be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_longest), &i); + error = xfs_alloc_lookup_ge(cur, 0, be32_to_cpu(agf->agf_longest), &i); if (error) goto out_del_cursor; @@ -75,7 +76,7 @@ xfs_trim_extents( error = -EFSCORRUPTED; goto out_del_cursor; } - ASSERT(flen <= be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_longest)); + ASSERT(flen <= be32_to_cpu(agf->agf_longest)); /* * use daddr format for all range/len calculations as that is diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index d223e1ae90a6..711376ca269f 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -829,9 +829,9 @@ xfs_qm_id_for_quotatype( { switch (type) { case XFS_DQ_USER: - return ip->i_d.di_uid; + return i_uid_read(VFS_I(ip)); case XFS_DQ_GROUP: - return ip->i_d.di_gid; + return i_gid_read(VFS_I(ip)); case XFS_DQ_PROJ: return ip->i_d.di_projid; } diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c index d60647d7197b..cf65e2e43c6e 100644 --- a/fs/xfs/xfs_dquot_item.c +++ b/fs/xfs/xfs_dquot_item.c @@ -307,36 +307,62 @@ xfs_qm_qoffend_logitem_committed( { struct xfs_qoff_logitem *qfe = QOFF_ITEM(lip); struct xfs_qoff_logitem *qfs = qfe->qql_start_lip; - struct xfs_ail *ailp = qfs->qql_item.li_ailp; - /* - * Delete the qoff-start logitem from the AIL. - * xfs_trans_ail_delete() drops the AIL lock. - */ - spin_lock(&ailp->ail_lock); - xfs_trans_ail_delete(ailp, &qfs->qql_item, SHUTDOWN_LOG_IO_ERROR); + xfs_qm_qoff_logitem_relse(qfs); - kmem_free(qfs->qql_item.li_lv_shadow); kmem_free(lip->li_lv_shadow); - kmem_free(qfs); kmem_free(qfe); return (xfs_lsn_t)-1; } +STATIC void +xfs_qm_qoff_logitem_release( + struct xfs_log_item *lip) +{ + struct xfs_qoff_logitem *qoff = QOFF_ITEM(lip); + + if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) { + if (qoff->qql_start_lip) + xfs_qm_qoff_logitem_relse(qoff->qql_start_lip); + xfs_qm_qoff_logitem_relse(qoff); + } +} + static const struct xfs_item_ops xfs_qm_qoffend_logitem_ops = { .iop_size = xfs_qm_qoff_logitem_size, .iop_format = xfs_qm_qoff_logitem_format, .iop_committed = xfs_qm_qoffend_logitem_committed, .iop_push = xfs_qm_qoff_logitem_push, + .iop_release = xfs_qm_qoff_logitem_release, }; static const struct xfs_item_ops xfs_qm_qoff_logitem_ops = { .iop_size = xfs_qm_qoff_logitem_size, .iop_format = xfs_qm_qoff_logitem_format, .iop_push = xfs_qm_qoff_logitem_push, + .iop_release = xfs_qm_qoff_logitem_release, }; /* + * Delete the quotaoff intent from the AIL and free it. On success, + * this should only be called for the start item. It can be used for + * either on shutdown or abort. + */ +void +xfs_qm_qoff_logitem_relse( + struct xfs_qoff_logitem *qoff) +{ + struct xfs_log_item *lip = &qoff->qql_item; + + ASSERT(test_bit(XFS_LI_IN_AIL, &lip->li_flags) || + test_bit(XFS_LI_ABORTED, &lip->li_flags) || + XFS_FORCED_SHUTDOWN(lip->li_mountp)); + xfs_trans_ail_remove(lip, SHUTDOWN_LOG_IO_ERROR); + kmem_free(lip->li_lv_shadow); + kmem_free(qoff); +} + +/* * Allocate and initialize an quotaoff item of the correct quota type(s). */ struct xfs_qoff_logitem * diff --git a/fs/xfs/xfs_dquot_item.h b/fs/xfs/xfs_dquot_item.h index 3bb19e556ade..2b86a43d7ce2 100644 --- a/fs/xfs/xfs_dquot_item.h +++ b/fs/xfs/xfs_dquot_item.h @@ -28,6 +28,7 @@ void xfs_qm_dquot_logitem_init(struct xfs_dquot *dqp); struct xfs_qoff_logitem *xfs_qm_qoff_logitem_init(struct xfs_mount *mp, struct xfs_qoff_logitem *start, uint flags); +void xfs_qm_qoff_logitem_relse(struct xfs_qoff_logitem *); struct xfs_qoff_logitem *xfs_trans_get_qoff_item(struct xfs_trans *tp, struct xfs_qoff_logitem *startqoff, uint flags); diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index 331765afc53e..a21e9cc6516a 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c @@ -345,16 +345,19 @@ xfs_corruption_error( * Complain about the kinds of metadata corruption that we can't detect from a * verifier, such as incorrect inter-block relationship data. Does not set * bp->b_error. + * + * Call xfs_buf_mark_corrupt, not this function. */ void xfs_buf_corruption_error( - struct xfs_buf *bp) + struct xfs_buf *bp, + xfs_failaddr_t fa) { struct xfs_mount *mp = bp->b_mount; xfs_alert_tag(mp, XFS_PTAG_VERIFIER_ERROR, "Metadata corruption detected at %pS, %s block 0x%llx", - __return_address, bp->b_ops->name, bp->b_bn); + fa, bp->b_ops->name, bp->b_bn); xfs_alert(mp, "Unmount and run xfs_repair"); diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h index 31a5d321ba9a..1717b7508356 100644 --- a/fs/xfs/xfs_error.h +++ b/fs/xfs/xfs_error.h @@ -15,7 +15,7 @@ extern void xfs_corruption_error(const char *tag, int level, struct xfs_mount *mp, const void *buf, size_t bufsize, const char *filename, int linenum, xfs_failaddr_t failaddr); -void xfs_buf_corruption_error(struct xfs_buf *bp); +void xfs_buf_corruption_error(struct xfs_buf *bp, xfs_failaddr_t fa); extern void xfs_buf_verifier_error(struct xfs_buf *bp, int error, const char *name, const void *buf, size_t bufsz, xfs_failaddr_t failaddr); diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c index 918456ca29e1..4eebcec4aae6 100644 --- a/fs/xfs/xfs_fsmap.c +++ b/fs/xfs/xfs_fsmap.c @@ -344,7 +344,7 @@ xfs_getfsmap_datadev_helper( xfs_fsblock_t fsb; xfs_daddr_t rec_daddr; - fsb = XFS_AGB_TO_FSB(mp, cur->bc_private.a.agno, rec->rm_startblock); + fsb = XFS_AGB_TO_FSB(mp, cur->bc_ag.agno, rec->rm_startblock); rec_daddr = XFS_FSB_TO_DADDR(mp, fsb); return xfs_getfsmap_helper(cur->bc_tp, info, rec, rec_daddr); @@ -362,7 +362,7 @@ xfs_getfsmap_datadev_bnobt_helper( struct xfs_rmap_irec irec; xfs_daddr_t rec_daddr; - rec_daddr = XFS_AGB_TO_DADDR(mp, cur->bc_private.a.agno, + rec_daddr = XFS_AGB_TO_DADDR(mp, cur->bc_ag.agno, rec->ar_startblock); irec.rm_startblock = rec->ar_startblock; @@ -896,6 +896,14 @@ xfs_getfsmap( info.format_arg = arg; info.head = head; + /* + * If fsmap runs concurrently with a scrub, the freeze can be delayed + * indefinitely as we walk the rmapbt and iterate over metadata + * buffers. Freeze quiesces the log (which waits for the buffer LRU to + * be emptied) and that won't happen while we're reading buffers. + */ + sb_start_write(mp->m_super); + /* For each device we support... */ for (i = 0; i < XFS_GETFSMAP_DEVS; i++) { /* Is this device within the range the user asked for? */ @@ -935,6 +943,7 @@ xfs_getfsmap( if (tp) xfs_trans_cancel(tp); + sb_end_write(mp->m_super); head->fmh_oflags = FMH_OF_DEV_T; return error; } diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 8dc2e5414276..a7be7a9e5c1a 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -289,6 +289,8 @@ xfs_reinit_inode( uint64_t version = inode_peek_iversion(inode); umode_t mode = inode->i_mode; dev_t dev = inode->i_rdev; + kuid_t uid = inode->i_uid; + kgid_t gid = inode->i_gid; error = inode_init_always(mp->m_super, inode); @@ -297,6 +299,8 @@ xfs_reinit_inode( inode_set_iversion_queried(inode, version); inode->i_mode = mode; inode->i_rdev = dev; + inode->i_uid = uid; + inode->i_gid = gid; return error; } diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index c5077e6326c7..14b922f2a6db 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -801,26 +801,18 @@ xfs_ialloc( return error; ASSERT(ip != NULL); inode = VFS_I(ip); - - /* - * We always convert v1 inodes to v2 now - we only support filesystems - * with >= v2 inode capability, so there is no reason for ever leaving - * an inode in v1 format. - */ - if (ip->i_d.di_version == 1) - ip->i_d.di_version = 2; - inode->i_mode = mode; set_nlink(inode, nlink); - ip->i_d.di_uid = xfs_kuid_to_uid(current_fsuid()); - ip->i_d.di_gid = xfs_kgid_to_gid(current_fsgid()); + inode->i_uid = current_fsuid(); inode->i_rdev = rdev; ip->i_d.di_projid = prid; if (pip && XFS_INHERIT_GID(pip)) { - ip->i_d.di_gid = pip->i_d.di_gid; + inode->i_gid = VFS_I(pip)->i_gid; if ((VFS_I(pip)->i_mode & S_ISGID) && S_ISDIR(mode)) inode->i_mode |= S_ISGID; + } else { + inode->i_gid = current_fsgid(); } /* @@ -828,9 +820,8 @@ xfs_ialloc( * ID or one of the supplementary group IDs, the S_ISGID bit is cleared * (and only if the irix_sgid_inherit compatibility variable is set). */ - if ((irix_sgid_inherit) && - (inode->i_mode & S_ISGID) && - (!in_group_p(xfs_gid_to_kgid(ip->i_d.di_gid)))) + if (irix_sgid_inherit && + (inode->i_mode & S_ISGID) && !in_group_p(inode->i_gid)) inode->i_mode &= ~S_ISGID; ip->i_d.di_size = 0; @@ -847,14 +838,13 @@ xfs_ialloc( ip->i_d.di_dmstate = 0; ip->i_d.di_flags = 0; - if (ip->i_d.di_version == 3) { + if (xfs_sb_version_has_v3inode(&mp->m_sb)) { inode_set_iversion(inode, 1); ip->i_d.di_flags2 = 0; ip->i_d.di_cowextsize = 0; ip->i_d.di_crtime = tv; } - flags = XFS_ILOG_CORE; switch (mode & S_IFMT) { case S_IFIFO: @@ -907,20 +897,13 @@ xfs_ialloc( ip->i_d.di_flags |= di_flags; } - if (pip && - (pip->i_d.di_flags2 & XFS_DIFLAG2_ANY) && - pip->i_d.di_version == 3 && - ip->i_d.di_version == 3) { - uint64_t di_flags2 = 0; - + if (pip && (pip->i_d.di_flags2 & XFS_DIFLAG2_ANY)) { if (pip->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) { - di_flags2 |= XFS_DIFLAG2_COWEXTSIZE; + ip->i_d.di_flags2 |= XFS_DIFLAG2_COWEXTSIZE; ip->i_d.di_cowextsize = pip->i_d.di_cowextsize; } if (pip->i_d.di_flags2 & XFS_DIFLAG2_DAX) - di_flags2 |= XFS_DIFLAG2_DAX; - - ip->i_d.di_flags2 |= di_flags2; + ip->i_d.di_flags2 |= XFS_DIFLAG2_DAX; } /* FALLTHROUGH */ case S_IFLNK: @@ -1122,7 +1105,6 @@ xfs_bumplink( { xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); - ASSERT(ip->i_d.di_version > 1); inc_nlink(VFS_I(ip)); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); } @@ -1158,8 +1140,7 @@ xfs_create( /* * Make sure that we have allocated dquot(s) on disk. */ - error = xfs_qm_vop_dqalloc(dp, xfs_kuid_to_uid(current_fsuid()), - xfs_kgid_to_gid(current_fsgid()), prid, + error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid, XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp, &pdqp); if (error) @@ -1309,8 +1290,7 @@ xfs_create_tmpfile( /* * Make sure that we have allocated dquot(s) on disk. */ - error = xfs_qm_vop_dqalloc(dp, xfs_kuid_to_uid(current_fsuid()), - xfs_kgid_to_gid(current_fsgid()), prid, + error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid, XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp, &pdqp); if (error) @@ -2119,7 +2099,7 @@ xfs_iunlink_update_bucket( unsigned int bucket_index, xfs_agino_t new_agino) { - struct xfs_agi *agi = XFS_BUF_TO_AGI(agibp); + struct xfs_agi *agi = agibp->b_addr; xfs_agino_t old_value; int offset; @@ -2135,7 +2115,7 @@ xfs_iunlink_update_bucket( * head of the list. */ if (old_value == new_agino) { - xfs_buf_corruption_error(agibp); + xfs_buf_mark_corrupt(agibp); return -EFSCORRUPTED; } @@ -2259,7 +2239,7 @@ xfs_iunlink( error = xfs_read_agi(mp, tp, agno, &agibp); if (error) return error; - agi = XFS_BUF_TO_AGI(agibp); + agi = agibp->b_addr; /* * Get the index into the agi hash table for the list this inode will @@ -2269,7 +2249,7 @@ xfs_iunlink( next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]); if (next_agino == agino || !xfs_verify_agino_or_null(mp, agno, next_agino)) { - xfs_buf_corruption_error(agibp); + xfs_buf_mark_corrupt(agibp); return -EFSCORRUPTED; } @@ -2443,7 +2423,7 @@ xfs_iunlink_remove( error = xfs_read_agi(mp, tp, agno, &agibp); if (error) return error; - agi = XFS_BUF_TO_AGI(agibp); + agi = agibp->b_addr; /* * Get the index into the agi hash table for the list this inode will @@ -3807,7 +3787,6 @@ xfs_iflush_int( ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)); ASSERT(iip != NULL && iip->ili_fields != 0); - ASSERT(ip->i_d.di_version > 1); /* set *dip = inode's place in the buffer */ dip = xfs_buf_offset(bp, ip->i_imap.im_boffset); @@ -3868,7 +3847,7 @@ xfs_iflush_int( * backwards compatibility with old kernels that predate logging all * inode changes. */ - if (ip->i_d.di_version < 3) + if (!xfs_sb_version_has_v3inode(&mp->m_sb)) ip->i_d.di_flushiter++; /* Check the inline fork data before we write out. */ diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 8bd5d0de6321..4a3d13d4a022 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -125,7 +125,7 @@ xfs_inode_item_size( *nvecs += 2; *nbytes += sizeof(struct xfs_inode_log_format) + - xfs_log_dinode_size(ip->i_d.di_version); + xfs_log_dinode_size(ip->i_mount); xfs_inode_item_data_fork_size(iip, nvecs, nbytes); if (XFS_IFORK_Q(ip)) @@ -305,11 +305,9 @@ xfs_inode_to_log_dinode( struct inode *inode = VFS_I(ip); to->di_magic = XFS_DINODE_MAGIC; - - to->di_version = from->di_version; to->di_format = from->di_format; - to->di_uid = from->di_uid; - to->di_gid = from->di_gid; + to->di_uid = i_uid_read(inode); + to->di_gid = i_gid_read(inode); to->di_projid_lo = from->di_projid & 0xffff; to->di_projid_hi = from->di_projid >> 16; @@ -339,7 +337,8 @@ xfs_inode_to_log_dinode( /* log a dummy value to ensure log structure is fully initialised */ to->di_next_unlinked = NULLAGINO; - if (from->di_version == 3) { + if (xfs_sb_version_has_v3inode(&ip->i_mount->m_sb)) { + to->di_version = 3; to->di_changecount = inode_peek_iversion(inode); to->di_crtime.t_sec = from->di_crtime.tv_sec; to->di_crtime.t_nsec = from->di_crtime.tv_nsec; @@ -351,6 +350,7 @@ xfs_inode_to_log_dinode( uuid_copy(&to->di_uuid, &ip->i_mount->m_sb.sb_meta_uuid); to->di_flushiter = 0; } else { + to->di_version = 2; to->di_flushiter = from->di_flushiter; } } @@ -370,7 +370,7 @@ xfs_inode_item_format_core( dic = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_ICORE); xfs_inode_to_log_dinode(ip, dic, ip->i_itemp->ili_item.li_lsn); - xlog_finish_iovec(lv, *vecp, xfs_log_dinode_size(ip->i_d.di_version)); + xlog_finish_iovec(lv, *vecp, xfs_log_dinode_size(ip->i_mount)); } /* @@ -395,8 +395,6 @@ xfs_inode_item_format( struct xfs_log_iovec *vecp = NULL; struct xfs_inode_log_format *ilf; - ASSERT(ip->i_d.di_version > 1); - ilf = xlog_prepare_iovec(lv, &vecp, XLOG_REG_TYPE_IFORMAT); ilf->ilf_type = XFS_LI_INODE; ilf->ilf_ino = ip->i_ino; diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index d42de92cb283..cdfb3cd9a25b 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -35,6 +35,8 @@ #include "xfs_health.h" #include "xfs_reflink.h" #include "xfs_ioctl.h" +#include "xfs_da_format.h" +#include "xfs_da_btree.h" #include <linux/mount.h> #include <linux/namei.h> @@ -292,62 +294,173 @@ xfs_readlink_by_handle( return error; } -STATIC int -xfs_attrlist_by_handle( - struct file *parfilp, - void __user *arg) +/* + * Format an attribute and copy it out to the user's buffer. + * Take care to check values and protect against them changing later, + * we may be reading them directly out of a user buffer. + */ +static void +xfs_ioc_attr_put_listent( + struct xfs_attr_list_context *context, + int flags, + unsigned char *name, + int namelen, + int valuelen) { - int error = -ENOMEM; - attrlist_cursor_kern_t *cursor; - struct xfs_fsop_attrlist_handlereq __user *p = arg; - xfs_fsop_attrlist_handlereq_t al_hreq; - struct dentry *dentry; - char *kbuf; + struct xfs_attrlist *alist = context->buffer; + struct xfs_attrlist_ent *aep; + int arraytop; - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - if (copy_from_user(&al_hreq, arg, sizeof(xfs_fsop_attrlist_handlereq_t))) - return -EFAULT; - if (al_hreq.buflen < sizeof(struct attrlist) || - al_hreq.buflen > XFS_XATTR_LIST_MAX) + ASSERT(!context->seen_enough); + ASSERT(context->count >= 0); + ASSERT(context->count < (ATTR_MAX_VALUELEN/8)); + ASSERT(context->firstu >= sizeof(*alist)); + ASSERT(context->firstu <= context->bufsize); + + /* + * Only list entries in the right namespace. + */ + if (context->attr_filter != (flags & XFS_ATTR_NSP_ONDISK_MASK)) + return; + + arraytop = sizeof(*alist) + + context->count * sizeof(alist->al_offset[0]); + + /* decrement by the actual bytes used by the attr */ + context->firstu -= round_up(offsetof(struct xfs_attrlist_ent, a_name) + + namelen + 1, sizeof(uint32_t)); + if (context->firstu < arraytop) { + trace_xfs_attr_list_full(context); + alist->al_more = 1; + context->seen_enough = 1; + return; + } + + aep = context->buffer + context->firstu; + aep->a_valuelen = valuelen; + memcpy(aep->a_name, name, namelen); + aep->a_name[namelen] = 0; + alist->al_offset[context->count++] = context->firstu; + alist->al_count = context->count; + trace_xfs_attr_list_add(context); +} + +static unsigned int +xfs_attr_filter( + u32 ioc_flags) +{ + if (ioc_flags & XFS_IOC_ATTR_ROOT) + return XFS_ATTR_ROOT; + if (ioc_flags & XFS_IOC_ATTR_SECURE) + return XFS_ATTR_SECURE; + return 0; +} + +static unsigned int +xfs_attr_flags( + u32 ioc_flags) +{ + if (ioc_flags & XFS_IOC_ATTR_CREATE) + return XATTR_CREATE; + if (ioc_flags & XFS_IOC_ATTR_REPLACE) + return XATTR_REPLACE; + return 0; +} + +int +xfs_ioc_attr_list( + struct xfs_inode *dp, + void __user *ubuf, + int bufsize, + int flags, + struct xfs_attrlist_cursor __user *ucursor) +{ + struct xfs_attr_list_context context = { }; + struct xfs_attrlist *alist; + void *buffer; + int error; + + if (bufsize < sizeof(struct xfs_attrlist) || + bufsize > XFS_XATTR_LIST_MAX) return -EINVAL; /* * Reject flags, only allow namespaces. */ - if (al_hreq.flags & ~(ATTR_ROOT | ATTR_SECURE)) + if (flags & ~(XFS_IOC_ATTR_ROOT | XFS_IOC_ATTR_SECURE)) + return -EINVAL; + if (flags == (XFS_IOC_ATTR_ROOT | XFS_IOC_ATTR_SECURE)) return -EINVAL; - dentry = xfs_handlereq_to_dentry(parfilp, &al_hreq.hreq); - if (IS_ERR(dentry)) - return PTR_ERR(dentry); + /* + * Validate the cursor. + */ + if (copy_from_user(&context.cursor, ucursor, sizeof(context.cursor))) + return -EFAULT; + if (context.cursor.pad1 || context.cursor.pad2) + return -EINVAL; + if (!context.cursor.initted && + (context.cursor.hashval || context.cursor.blkno || + context.cursor.offset)) + return -EINVAL; - kbuf = kmem_zalloc_large(al_hreq.buflen, 0); - if (!kbuf) - goto out_dput; + buffer = kmem_zalloc_large(bufsize, 0); + if (!buffer) + return -ENOMEM; - cursor = (attrlist_cursor_kern_t *)&al_hreq.pos; - error = xfs_attr_list(XFS_I(d_inode(dentry)), kbuf, al_hreq.buflen, - al_hreq.flags, cursor); + /* + * Initialize the output buffer. + */ + context.dp = dp; + context.resynch = 1; + context.attr_filter = xfs_attr_filter(flags); + context.buffer = buffer; + context.bufsize = round_down(bufsize, sizeof(uint32_t)); + context.firstu = context.bufsize; + context.put_listent = xfs_ioc_attr_put_listent; + + alist = context.buffer; + alist->al_count = 0; + alist->al_more = 0; + alist->al_offset[0] = context.bufsize; + + error = xfs_attr_list(&context); if (error) - goto out_kfree; + goto out_free; - if (copy_to_user(&p->pos, cursor, sizeof(attrlist_cursor_kern_t))) { + if (copy_to_user(ubuf, buffer, bufsize) || + copy_to_user(ucursor, &context.cursor, sizeof(context.cursor))) error = -EFAULT; - goto out_kfree; - } +out_free: + kmem_free(buffer); + return error; +} - if (copy_to_user(al_hreq.buffer, kbuf, al_hreq.buflen)) - error = -EFAULT; +STATIC int +xfs_attrlist_by_handle( + struct file *parfilp, + struct xfs_fsop_attrlist_handlereq __user *p) +{ + struct xfs_fsop_attrlist_handlereq al_hreq; + struct dentry *dentry; + int error = -ENOMEM; -out_kfree: - kmem_free(kbuf); -out_dput: + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (copy_from_user(&al_hreq, p, sizeof(al_hreq))) + return -EFAULT; + + dentry = xfs_handlereq_to_dentry(parfilp, &al_hreq.hreq); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + + error = xfs_ioc_attr_list(XFS_I(d_inode(dentry)), al_hreq.buffer, + al_hreq.buflen, al_hreq.flags, &p->pos); dput(dentry); return error; } -int +static int xfs_attrmulti_attr_get( struct inode *inode, unsigned char *name, @@ -355,31 +468,33 @@ xfs_attrmulti_attr_get( uint32_t *len, uint32_t flags) { - unsigned char *kbuf; - int error = -EFAULT; - size_t namelen; + struct xfs_da_args args = { + .dp = XFS_I(inode), + .attr_filter = xfs_attr_filter(flags), + .attr_flags = xfs_attr_flags(flags), + .name = name, + .namelen = strlen(name), + .valuelen = *len, + }; + int error; if (*len > XFS_XATTR_SIZE_MAX) return -EINVAL; - kbuf = kmem_zalloc_large(*len, 0); - if (!kbuf) - return -ENOMEM; - namelen = strlen(name); - error = xfs_attr_get(XFS_I(inode), name, namelen, &kbuf, (int *)len, - flags); + error = xfs_attr_get(&args); if (error) goto out_kfree; - if (copy_to_user(ubuf, kbuf, *len)) + *len = args.valuelen; + if (copy_to_user(ubuf, args.value, args.valuelen)) error = -EFAULT; out_kfree: - kmem_free(kbuf); + kmem_free(args.value); return error; } -int +static int xfs_attrmulti_attr_set( struct inode *inode, unsigned char *name, @@ -387,42 +502,75 @@ xfs_attrmulti_attr_set( uint32_t len, uint32_t flags) { - unsigned char *kbuf; + struct xfs_da_args args = { + .dp = XFS_I(inode), + .attr_filter = xfs_attr_filter(flags), + .attr_flags = xfs_attr_flags(flags), + .name = name, + .namelen = strlen(name), + }; int error; - size_t namelen; if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) return -EPERM; - if (len > XFS_XATTR_SIZE_MAX) - return -EINVAL; - kbuf = memdup_user(ubuf, len); - if (IS_ERR(kbuf)) - return PTR_ERR(kbuf); + if (ubuf) { + if (len > XFS_XATTR_SIZE_MAX) + return -EINVAL; + args.value = memdup_user(ubuf, len); + if (IS_ERR(args.value)) + return PTR_ERR(args.value); + args.valuelen = len; + } - namelen = strlen(name); - error = xfs_attr_set(XFS_I(inode), name, namelen, kbuf, len, flags); - if (!error) - xfs_forget_acl(inode, name, flags); - kfree(kbuf); + error = xfs_attr_set(&args); + if (!error && (flags & XFS_IOC_ATTR_ROOT)) + xfs_forget_acl(inode, name); + kfree(args.value); return error; } int -xfs_attrmulti_attr_remove( +xfs_ioc_attrmulti_one( + struct file *parfilp, struct inode *inode, - unsigned char *name, + uint32_t opcode, + void __user *uname, + void __user *value, + uint32_t *len, uint32_t flags) { + unsigned char *name; int error; - size_t namelen; - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) - return -EPERM; - namelen = strlen(name); - error = xfs_attr_remove(XFS_I(inode), name, namelen, flags); - if (!error) - xfs_forget_acl(inode, name, flags); + if ((flags & XFS_IOC_ATTR_ROOT) && (flags & XFS_IOC_ATTR_SECURE)) + return -EINVAL; + + name = strndup_user(uname, MAXNAMELEN); + if (IS_ERR(name)) + return PTR_ERR(name); + + switch (opcode) { + case ATTR_OP_GET: + error = xfs_attrmulti_attr_get(inode, name, value, len, flags); + break; + case ATTR_OP_REMOVE: + value = NULL; + *len = 0; + /* fall through */ + case ATTR_OP_SET: + error = mnt_want_write_file(parfilp); + if (error) + break; + error = xfs_attrmulti_attr_set(inode, name, value, *len, flags); + mnt_drop_write_file(parfilp); + break; + default: + error = -EINVAL; + break; + } + + kfree(name); return error; } @@ -436,7 +584,6 @@ xfs_attrmulti_by_handle( xfs_fsop_attrmulti_handlereq_t am_hreq; struct dentry *dentry; unsigned int i, size; - unsigned char *attr_name; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -462,63 +609,17 @@ xfs_attrmulti_by_handle( goto out_dput; } - error = -ENOMEM; - attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL); - if (!attr_name) - goto out_kfree_ops; - error = 0; for (i = 0; i < am_hreq.opcount; i++) { - if ((ops[i].am_flags & ATTR_ROOT) && - (ops[i].am_flags & ATTR_SECURE)) { - ops[i].am_error = -EINVAL; - continue; - } - ops[i].am_flags &= ~ATTR_KERNEL_FLAGS; - - ops[i].am_error = strncpy_from_user((char *)attr_name, - ops[i].am_attrname, MAXNAMELEN); - if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN) - error = -ERANGE; - if (ops[i].am_error < 0) - break; - - switch (ops[i].am_opcode) { - case ATTR_OP_GET: - ops[i].am_error = xfs_attrmulti_attr_get( - d_inode(dentry), attr_name, - ops[i].am_attrvalue, &ops[i].am_length, - ops[i].am_flags); - break; - case ATTR_OP_SET: - ops[i].am_error = mnt_want_write_file(parfilp); - if (ops[i].am_error) - break; - ops[i].am_error = xfs_attrmulti_attr_set( - d_inode(dentry), attr_name, - ops[i].am_attrvalue, ops[i].am_length, - ops[i].am_flags); - mnt_drop_write_file(parfilp); - break; - case ATTR_OP_REMOVE: - ops[i].am_error = mnt_want_write_file(parfilp); - if (ops[i].am_error) - break; - ops[i].am_error = xfs_attrmulti_attr_remove( - d_inode(dentry), attr_name, - ops[i].am_flags); - mnt_drop_write_file(parfilp); - break; - default: - ops[i].am_error = -EINVAL; - } + ops[i].am_error = xfs_ioc_attrmulti_one(parfilp, + d_inode(dentry), ops[i].am_opcode, + ops[i].am_attrname, ops[i].am_attrvalue, + &ops[i].am_length, ops[i].am_flags); } if (copy_to_user(am_hreq.ops, ops, size)) error = -EFAULT; - kfree(attr_name); - out_kfree_ops: kfree(ops); out_dput: dput(dentry); @@ -1162,7 +1263,7 @@ xfs_ioctl_setattr_xflags( /* diflags2 only valid for v3 inodes. */ di_flags2 = xfs_flags2diflags2(ip, fa->fsx_xflags); - if (di_flags2 && ip->i_d.di_version < 3) + if (di_flags2 && !xfs_sb_version_has_v3inode(&mp->m_sb)) return -EINVAL; ip->i_d.di_flags = xfs_flags2diflags(ip, fa->fsx_xflags); @@ -1372,8 +1473,7 @@ xfs_ioctl_setattr_check_cowextsize( if (!(fa->fsx_xflags & FS_XFLAG_COWEXTSIZE)) return 0; - if (!xfs_sb_version_hasreflink(&ip->i_mount->m_sb) || - ip->i_d.di_version != 3) + if (!xfs_sb_version_hasreflink(&ip->i_mount->m_sb)) return -EINVAL; if (fa->fsx_cowextsize == 0) @@ -1434,9 +1534,9 @@ xfs_ioctl_setattr( * because the i_*dquot fields will get updated anyway. */ if (XFS_IS_QUOTA_ON(mp)) { - code = xfs_qm_vop_dqalloc(ip, ip->i_d.di_uid, - ip->i_d.di_gid, fa->fsx_projid, - XFS_QMOPT_PQUOTA, &udqp, NULL, &pdqp); + code = xfs_qm_vop_dqalloc(ip, VFS_I(ip)->i_uid, + VFS_I(ip)->i_gid, fa->fsx_projid, + XFS_QMOPT_PQUOTA, &udqp, NULL, &pdqp); if (code) return code; } @@ -1501,7 +1601,6 @@ xfs_ioctl_setattr( olddquot = xfs_qm_vop_chown(tp, ip, &ip->i_pdquot, pdqp); } - ASSERT(ip->i_d.di_version > 1); ip->i_d.di_projid = fa->fsx_projid; } @@ -1514,7 +1613,7 @@ xfs_ioctl_setattr( ip->i_d.di_extsize = fa->fsx_extsize >> mp->m_sb.sb_blocklog; else ip->i_d.di_extsize = 0; - if (ip->i_d.di_version == 3 && + if (xfs_sb_version_has_v3inode(&mp->m_sb) && (ip->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE)) ip->i_d.di_cowextsize = fa->fsx_cowextsize >> mp->m_sb.sb_blocklog; diff --git a/fs/xfs/xfs_ioctl.h b/fs/xfs/xfs_ioctl.h index 420bd95dc326..bab6a5a92407 100644 --- a/fs/xfs/xfs_ioctl.h +++ b/fs/xfs/xfs_ioctl.h @@ -6,6 +6,11 @@ #ifndef __XFS_IOCTL_H__ #define __XFS_IOCTL_H__ +struct xfs_bstat; +struct xfs_ibulk; +struct xfs_inogrp; + + extern int xfs_ioc_space( struct file *filp, @@ -30,27 +35,11 @@ xfs_readlink_by_handle( struct file *parfilp, xfs_fsop_handlereq_t *hreq); -extern int -xfs_attrmulti_attr_get( - struct inode *inode, - unsigned char *name, - unsigned char __user *ubuf, - uint32_t *len, - uint32_t flags); - -extern int -xfs_attrmulti_attr_set( - struct inode *inode, - unsigned char *name, - const unsigned char __user *ubuf, - uint32_t len, - uint32_t flags); - -extern int -xfs_attrmulti_attr_remove( - struct inode *inode, - unsigned char *name, - uint32_t flags); +int xfs_ioc_attrmulti_one(struct file *parfilp, struct inode *inode, + uint32_t opcode, void __user *uname, void __user *value, + uint32_t *len, uint32_t flags); +int xfs_ioc_attr_list(struct xfs_inode *dp, void __user *ubuf, int bufsize, + int flags, struct xfs_attrlist_cursor __user *ucursor); extern struct dentry * xfs_handle_to_dentry( @@ -70,10 +59,6 @@ xfs_file_compat_ioctl( unsigned int cmd, unsigned long arg); -struct xfs_ibulk; -struct xfs_bstat; -struct xfs_inogrp; - int xfs_fsbulkstat_one_fmt(struct xfs_ibulk *breq, const struct xfs_bulkstat *bstat); int xfs_fsinumbers_fmt(struct xfs_ibulk *breq, const struct xfs_inumbers *igrp); diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c index 769581a79c58..c1771e728117 100644 --- a/fs/xfs/xfs_ioctl32.c +++ b/fs/xfs/xfs_ioctl32.c @@ -352,56 +352,24 @@ xfs_compat_handlereq_to_dentry( STATIC int xfs_compat_attrlist_by_handle( struct file *parfilp, - void __user *arg) + compat_xfs_fsop_attrlist_handlereq_t __user *p) { - int error; - attrlist_cursor_kern_t *cursor; - compat_xfs_fsop_attrlist_handlereq_t __user *p = arg; compat_xfs_fsop_attrlist_handlereq_t al_hreq; struct dentry *dentry; - char *kbuf; + int error; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (copy_from_user(&al_hreq, arg, - sizeof(compat_xfs_fsop_attrlist_handlereq_t))) + if (copy_from_user(&al_hreq, p, sizeof(al_hreq))) return -EFAULT; - if (al_hreq.buflen < sizeof(struct attrlist) || - al_hreq.buflen > XFS_XATTR_LIST_MAX) - return -EINVAL; - - /* - * Reject flags, only allow namespaces. - */ - if (al_hreq.flags & ~(ATTR_ROOT | ATTR_SECURE)) - return -EINVAL; dentry = xfs_compat_handlereq_to_dentry(parfilp, &al_hreq.hreq); if (IS_ERR(dentry)) return PTR_ERR(dentry); - error = -ENOMEM; - kbuf = kmem_zalloc_large(al_hreq.buflen, 0); - if (!kbuf) - goto out_dput; - - cursor = (attrlist_cursor_kern_t *)&al_hreq.pos; - error = xfs_attr_list(XFS_I(d_inode(dentry)), kbuf, al_hreq.buflen, - al_hreq.flags, cursor); - if (error) - goto out_kfree; - - if (copy_to_user(&p->pos, cursor, sizeof(attrlist_cursor_kern_t))) { - error = -EFAULT; - goto out_kfree; - } - - if (copy_to_user(compat_ptr(al_hreq.buffer), kbuf, al_hreq.buflen)) - error = -EFAULT; - -out_kfree: - kmem_free(kbuf); -out_dput: + error = xfs_ioc_attr_list(XFS_I(d_inode(dentry)), + compat_ptr(al_hreq.buffer), al_hreq.buflen, + al_hreq.flags, &p->pos); dput(dentry); return error; } @@ -416,7 +384,6 @@ xfs_compat_attrmulti_by_handle( compat_xfs_fsop_attrmulti_handlereq_t am_hreq; struct dentry *dentry; unsigned int i, size; - unsigned char *attr_name; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -443,64 +410,18 @@ xfs_compat_attrmulti_by_handle( goto out_dput; } - error = -ENOMEM; - attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL); - if (!attr_name) - goto out_kfree_ops; - error = 0; for (i = 0; i < am_hreq.opcount; i++) { - if ((ops[i].am_flags & ATTR_ROOT) && - (ops[i].am_flags & ATTR_SECURE)) { - ops[i].am_error = -EINVAL; - continue; - } - ops[i].am_flags &= ~ATTR_KERNEL_FLAGS; - - ops[i].am_error = strncpy_from_user((char *)attr_name, + ops[i].am_error = xfs_ioc_attrmulti_one(parfilp, + d_inode(dentry), ops[i].am_opcode, compat_ptr(ops[i].am_attrname), - MAXNAMELEN); - if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN) - error = -ERANGE; - if (ops[i].am_error < 0) - break; - - switch (ops[i].am_opcode) { - case ATTR_OP_GET: - ops[i].am_error = xfs_attrmulti_attr_get( - d_inode(dentry), attr_name, - compat_ptr(ops[i].am_attrvalue), - &ops[i].am_length, ops[i].am_flags); - break; - case ATTR_OP_SET: - ops[i].am_error = mnt_want_write_file(parfilp); - if (ops[i].am_error) - break; - ops[i].am_error = xfs_attrmulti_attr_set( - d_inode(dentry), attr_name, - compat_ptr(ops[i].am_attrvalue), - ops[i].am_length, ops[i].am_flags); - mnt_drop_write_file(parfilp); - break; - case ATTR_OP_REMOVE: - ops[i].am_error = mnt_want_write_file(parfilp); - if (ops[i].am_error) - break; - ops[i].am_error = xfs_attrmulti_attr_remove( - d_inode(dentry), attr_name, - ops[i].am_flags); - mnt_drop_write_file(parfilp); - break; - default: - ops[i].am_error = -EINVAL; - } + compat_ptr(ops[i].am_attrvalue), + &ops[i].am_length, ops[i].am_flags); } if (copy_to_user(compat_ptr(am_hreq.ops), ops, size)) error = -EFAULT; - kfree(attr_name); - out_kfree_ops: kfree(ops); out_dput: dput(dentry); diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 81f2f93caec0..f7a99b3bbcf7 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -22,7 +22,6 @@ #include "xfs_iomap.h" #include "xfs_error.h" -#include <linux/xattr.h> #include <linux/posix_acl.h> #include <linux/security.h> #include <linux/iversion.h> @@ -50,10 +49,15 @@ xfs_initxattrs( int error = 0; for (xattr = xattr_array; xattr->name != NULL; xattr++) { - error = xfs_attr_set(ip, xattr->name, - strlen(xattr->name), - xattr->value, xattr->value_len, - ATTR_SECURE); + struct xfs_da_args args = { + .dp = ip, + .attr_filter = XFS_ATTR_SECURE, + .name = xattr->name, + .namelen = strlen(xattr->name), + .value = xattr->value, + .valuelen = xattr->value_len, + }; + error = xfs_attr_set(&args); if (error < 0) break; } @@ -553,7 +557,7 @@ xfs_vn_getattr( stat->blocks = XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks); - if (ip->i_d.di_version == 3) { + if (xfs_sb_version_has_v3inode(&mp->m_sb)) { if (request_mask & STATX_BTIME) { stat->result_mask |= STATX_BTIME; stat->btime = ip->i_d.di_crtime; @@ -692,9 +696,7 @@ xfs_setattr_nonsize( */ ASSERT(udqp == NULL); ASSERT(gdqp == NULL); - error = xfs_qm_vop_dqalloc(ip, xfs_kuid_to_uid(uid), - xfs_kgid_to_gid(gid), - ip->i_d.di_projid, + error = xfs_qm_vop_dqalloc(ip, uid, gid, ip->i_d.di_projid, qflags, &udqp, &gdqp, NULL); if (error) return error; @@ -763,7 +765,6 @@ xfs_setattr_nonsize( olddquot1 = xfs_qm_vop_chown(tp, ip, &ip->i_udquot, udqp); } - ip->i_d.di_uid = xfs_kuid_to_uid(uid); inode->i_uid = uid; } if (!gid_eq(igid, gid)) { @@ -775,7 +776,6 @@ xfs_setattr_nonsize( olddquot2 = xfs_qm_vop_chown(tp, ip, &ip->i_gdquot, gdqp); } - ip->i_d.di_gid = xfs_kgid_to_gid(gid); inode->i_gid = gid; } } @@ -1304,9 +1304,6 @@ xfs_setup_inode( /* make the inode look hashed for the writeback code */ inode_fake_hash(inode); - inode->i_uid = xfs_uid_to_kuid(ip->i_d.di_uid); - inode->i_gid = xfs_gid_to_kgid(ip->i_d.di_gid); - i_size_write(inode, ip->i_d.di_size); xfs_diflags_to_iflags(inode, ip); diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 4b31c29b7e6b..ff2da28fed90 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -86,8 +86,8 @@ xfs_bulkstat_one_int( */ buf->bs_projectid = ip->i_d.di_projid; buf->bs_ino = ino; - buf->bs_uid = dic->di_uid; - buf->bs_gid = dic->di_gid; + buf->bs_uid = i_uid_read(inode); + buf->bs_gid = i_gid_read(inode); buf->bs_size = dic->di_size; buf->bs_nlink = inode->i_nlink; @@ -110,7 +110,7 @@ xfs_bulkstat_one_int( buf->bs_forkoff = XFS_IFORK_BOFF(ip); buf->bs_version = XFS_BULKSTAT_VERSION_V5; - if (dic->di_version == 3) { + if (xfs_sb_version_has_v3inode(&mp->m_sb)) { if (dic->di_flags2 & XFS_DIFLAG2_COWEXTSIZE) buf->bs_cowextsize_blks = dic->di_cowextsize; } diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h index 8738bb03f253..9f70d2f68e05 100644 --- a/fs/xfs/xfs_linux.h +++ b/fs/xfs/xfs_linux.h @@ -60,6 +60,7 @@ typedef __u32 xfs_nlink_t; #include <linux/list_sort.h> #include <linux/ratelimit.h> #include <linux/rhashtable.h> +#include <linux/xattr.h> #include <asm/page.h> #include <asm/div64.h> @@ -163,32 +164,6 @@ struct xstats { extern struct xstats xfsstats; -/* Kernel uid/gid conversion. These are used to convert to/from the on disk - * uid_t/gid_t types to the kuid_t/kgid_t types that the kernel uses internally. - * The conversion here is type only, the value will remain the same since we - * are converting to the init_user_ns. The uid is later mapped to a particular - * user namespace value when crossing the kernel/user boundary. - */ -static inline uint32_t xfs_kuid_to_uid(kuid_t uid) -{ - return from_kuid(&init_user_ns, uid); -} - -static inline kuid_t xfs_uid_to_kuid(uint32_t uid) -{ - return make_kuid(&init_user_ns, uid); -} - -static inline uint32_t xfs_kgid_to_gid(kgid_t gid) -{ - return from_kgid(&init_user_ns, gid); -} - -static inline kgid_t xfs_gid_to_kgid(uint32_t gid) -{ - return make_kgid(&init_user_ns, gid); -} - static inline dev_t xfs_to_linux_dev_t(xfs_dev_t dev) { return MKDEV(sysv_major(dev) & 0x1ff, sysv_minor(dev)); diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index f6006d94a581..4a53768c5397 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -47,8 +47,7 @@ xlog_dealloc_log( /* local state machine functions */ STATIC void xlog_state_done_syncing( - struct xlog_in_core *iclog, - bool aborted); + struct xlog_in_core *iclog); STATIC int xlog_state_get_iclog_space( struct xlog *log, @@ -63,11 +62,6 @@ xlog_state_switch_iclogs( struct xlog_in_core *iclog, int eventual_size); STATIC void -xlog_state_want_sync( - struct xlog *log, - struct xlog_in_core *iclog); - -STATIC void xlog_grant_push_ail( struct xlog *log, int need_bytes); @@ -597,26 +591,21 @@ xlog_state_release_iclog( return 0; } -int +void xfs_log_release_iclog( - struct xfs_mount *mp, struct xlog_in_core *iclog) { - struct xlog *log = mp->m_log; - bool sync; - - if (iclog->ic_state == XLOG_STATE_IOERROR) { - xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); - return -EIO; - } + struct xlog *log = iclog->ic_log; + bool sync = false; if (atomic_dec_and_lock(&iclog->ic_refcnt, &log->l_icloglock)) { - sync = __xlog_state_release_iclog(log, iclog); + if (iclog->ic_state != XLOG_STATE_IOERROR) + sync = __xlog_state_release_iclog(log, iclog); spin_unlock(&log->l_icloglock); - if (sync) - xlog_sync(log, iclog); } - return 0; + + if (sync) + xlog_sync(log, iclog); } /* @@ -855,6 +844,31 @@ xfs_log_mount_cancel( } /* + * Wait for the iclog to be written disk, or return an error if the log has been + * shut down. + */ +static int +xlog_wait_on_iclog( + struct xlog_in_core *iclog) + __releases(iclog->ic_log->l_icloglock) +{ + struct xlog *log = iclog->ic_log; + + if (!XLOG_FORCED_SHUTDOWN(log) && + iclog->ic_state != XLOG_STATE_ACTIVE && + iclog->ic_state != XLOG_STATE_DIRTY) { + XFS_STATS_INC(log->l_mp, xs_log_force_sleep); + xlog_wait(&iclog->ic_force_wait, &log->l_icloglock); + } else { + spin_unlock(&log->l_icloglock); + } + + if (XLOG_FORCED_SHUTDOWN(log)) + return -EIO; + return 0; +} + +/* * Final log writes as part of unmount. * * Mark the filesystem clean as unmount happens. Note that during relocation @@ -919,20 +933,13 @@ out_err: spin_lock(&log->l_icloglock); iclog = log->l_iclog; atomic_inc(&iclog->ic_refcnt); - xlog_state_want_sync(log, iclog); + if (iclog->ic_state == XLOG_STATE_ACTIVE) + xlog_state_switch_iclogs(log, iclog, 0); + else + ASSERT(iclog->ic_state == XLOG_STATE_WANT_SYNC || + iclog->ic_state == XLOG_STATE_IOERROR); error = xlog_state_release_iclog(log, iclog); - switch (iclog->ic_state) { - default: - if (!XLOG_FORCED_SHUTDOWN(log)) { - xlog_wait(&iclog->ic_force_wait, &log->l_icloglock); - break; - } - /* fall through */ - case XLOG_STATE_ACTIVE: - case XLOG_STATE_DIRTY: - spin_unlock(&log->l_icloglock); - break; - } + xlog_wait_on_iclog(iclog); if (tic) { trace_xfs_log_umount_write(log, tic); @@ -941,6 +948,18 @@ out_err: } } +static void +xfs_log_unmount_verify_iclog( + struct xlog *log) +{ + struct xlog_in_core *iclog = log->l_iclog; + + do { + ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE); + ASSERT(iclog->ic_offset == 0); + } while ((iclog = iclog->ic_next) != log->l_iclog); +} + /* * Unmount record used to have a string "Unmount filesystem--" in the * data section where the "Un" was really a magic number (XLOG_UNMOUNT_TYPE). @@ -948,16 +967,11 @@ out_err: * currently architecture converted and "Unmount" is a bit foo. * As far as I know, there weren't any dependencies on the old behaviour. */ - -static int -xfs_log_unmount_write(xfs_mount_t *mp) +static void +xfs_log_unmount_write( + struct xfs_mount *mp) { - struct xlog *log = mp->m_log; - xlog_in_core_t *iclog; -#ifdef DEBUG - xlog_in_core_t *first_iclog; -#endif - int error; + struct xlog *log = mp->m_log; /* * Don't write out unmount record on norecovery mounts or ro devices. @@ -966,57 +980,16 @@ xfs_log_unmount_write(xfs_mount_t *mp) if (mp->m_flags & XFS_MOUNT_NORECOVERY || xfs_readonly_buftarg(log->l_targ)) { ASSERT(mp->m_flags & XFS_MOUNT_RDONLY); - return 0; + return; } - error = xfs_log_force(mp, XFS_LOG_SYNC); - ASSERT(error || !(XLOG_FORCED_SHUTDOWN(log))); - -#ifdef DEBUG - first_iclog = iclog = log->l_iclog; - do { - if (iclog->ic_state != XLOG_STATE_IOERROR) { - ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE); - ASSERT(iclog->ic_offset == 0); - } - iclog = iclog->ic_next; - } while (iclog != first_iclog); -#endif - if (! (XLOG_FORCED_SHUTDOWN(log))) { - xfs_log_write_unmount_record(mp); - } else { - /* - * We're already in forced_shutdown mode, couldn't - * even attempt to write out the unmount transaction. - * - * Go through the motions of sync'ing and releasing - * the iclog, even though no I/O will actually happen, - * we need to wait for other log I/Os that may already - * be in progress. Do this as a separate section of - * code so we'll know if we ever get stuck here that - * we're in this odd situation of trying to unmount - * a file system that went into forced_shutdown as - * the result of an unmount.. - */ - spin_lock(&log->l_icloglock); - iclog = log->l_iclog; - atomic_inc(&iclog->ic_refcnt); - xlog_state_want_sync(log, iclog); - error = xlog_state_release_iclog(log, iclog); - switch (iclog->ic_state) { - case XLOG_STATE_ACTIVE: - case XLOG_STATE_DIRTY: - case XLOG_STATE_IOERROR: - spin_unlock(&log->l_icloglock); - break; - default: - xlog_wait(&iclog->ic_force_wait, &log->l_icloglock); - break; - } - } + xfs_log_force(mp, XFS_LOG_SYNC); - return error; -} /* xfs_log_unmount_write */ + if (XLOG_FORCED_SHUTDOWN(log)) + return; + xfs_log_unmount_verify_iclog(log); + xfs_log_write_unmount_record(mp); +} /* * Empty the log for unmount/freeze. @@ -1279,7 +1252,6 @@ xlog_ioend_work( struct xlog_in_core *iclog = container_of(work, struct xlog_in_core, ic_end_io_work); struct xlog *log = iclog->ic_log; - bool aborted = false; int error; error = blk_status_to_errno(iclog->ic_bio.bi_status); @@ -1295,17 +1267,9 @@ xlog_ioend_work( if (XFS_TEST_ERROR(error, log->l_mp, XFS_ERRTAG_IODONE_IOERR)) { xfs_alert(log->l_mp, "log I/O error %d", error); xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR); - /* - * This flag will be propagated to the trans-committed - * callback routines to let them know that the log-commit - * didn't succeed. - */ - aborted = true; - } else if (iclog->ic_state == XLOG_STATE_IOERROR) { - aborted = true; } - xlog_state_done_syncing(iclog, aborted); + xlog_state_done_syncing(iclog); bio_uninit(&iclog->ic_bio); /* @@ -1739,7 +1703,7 @@ xlog_bio_end_io( &iclog->ic_end_io_work); } -static void +static int xlog_map_iclog_data( struct bio *bio, void *data, @@ -1750,11 +1714,14 @@ xlog_map_iclog_data( unsigned int off = offset_in_page(data); size_t len = min_t(size_t, count, PAGE_SIZE - off); - WARN_ON_ONCE(bio_add_page(bio, page, len, off) != len); + if (bio_add_page(bio, page, len, off) != len) + return -EIO; data += len; count -= len; } while (count); + + return 0; } STATIC void @@ -1784,7 +1751,7 @@ xlog_write_iclog( * the buffer manually, the code needs to be kept in sync * with the I/O completion path. */ - xlog_state_done_syncing(iclog, true); + xlog_state_done_syncing(iclog); up(&iclog->ic_sema); return; } @@ -1798,7 +1765,10 @@ xlog_write_iclog( if (need_flush) iclog->ic_bio.bi_opf |= REQ_PREFLUSH; - xlog_map_iclog_data(&iclog->ic_bio, iclog->ic_data, count); + if (xlog_map_iclog_data(&iclog->ic_bio, iclog->ic_data, count)) { + xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR); + return; + } if (is_vmalloc_addr(iclog->ic_data)) flush_kernel_vmap_range(iclog->ic_data, count); @@ -2328,7 +2298,11 @@ xlog_write_copy_finish( *record_cnt = 0; *data_cnt = 0; - xlog_state_want_sync(log, iclog); + if (iclog->ic_state == XLOG_STATE_ACTIVE) + xlog_state_switch_iclogs(log, iclog, 0); + else + ASSERT(iclog->ic_state == XLOG_STATE_WANT_SYNC || + iclog->ic_state == XLOG_STATE_IOERROR); if (!commit_iclog) goto release_iclog; spin_unlock(&log->l_icloglock); @@ -2575,111 +2549,106 @@ next_lv: ***************************************************************************** */ +static void +xlog_state_activate_iclog( + struct xlog_in_core *iclog, + int *iclogs_changed) +{ + ASSERT(list_empty_careful(&iclog->ic_callbacks)); + + /* + * If the number of ops in this iclog indicate it just contains the + * dummy transaction, we can change state into IDLE (the second time + * around). Otherwise we should change the state into NEED a dummy. + * We don't need to cover the dummy. + */ + if (*iclogs_changed == 0 && + iclog->ic_header.h_num_logops == cpu_to_be32(XLOG_COVER_OPS)) { + *iclogs_changed = 1; + } else { + /* + * We have two dirty iclogs so start over. This could also be + * num of ops indicating this is not the dummy going out. + */ + *iclogs_changed = 2; + } + + iclog->ic_state = XLOG_STATE_ACTIVE; + iclog->ic_offset = 0; + iclog->ic_header.h_num_logops = 0; + memset(iclog->ic_header.h_cycle_data, 0, + sizeof(iclog->ic_header.h_cycle_data)); + iclog->ic_header.h_lsn = 0; +} + /* - * An iclog has just finished IO completion processing, so we need to update - * the iclog state and propagate that up into the overall log state. Hence we - * prepare the iclog for cleaning, and then clean all the pending dirty iclogs - * starting from the head, and then wake up any threads that are waiting for the - * iclog to be marked clean. - * - * The ordering of marking iclogs ACTIVE must be maintained, so an iclog - * doesn't become ACTIVE beyond one that is SYNCING. This is also required to - * maintain the notion that we use a ordered wait queue to hold off would be - * writers to the log when every iclog is trying to sync to disk. - * - * Caller must hold the icloglock before calling us. - * - * State Change: !IOERROR -> DIRTY -> ACTIVE + * Loop through all iclogs and mark all iclogs currently marked DIRTY as + * ACTIVE after iclog I/O has completed. */ -STATIC void -xlog_state_clean_iclog( +static void +xlog_state_activate_iclogs( struct xlog *log, - struct xlog_in_core *dirty_iclog) + int *iclogs_changed) { - struct xlog_in_core *iclog; - int changed = 0; - - /* Prepare the completed iclog. */ - if (dirty_iclog->ic_state != XLOG_STATE_IOERROR) - dirty_iclog->ic_state = XLOG_STATE_DIRTY; + struct xlog_in_core *iclog = log->l_iclog; - /* Walk all the iclogs to update the ordered active state. */ - iclog = log->l_iclog; do { - if (iclog->ic_state == XLOG_STATE_DIRTY) { - iclog->ic_state = XLOG_STATE_ACTIVE; - iclog->ic_offset = 0; - ASSERT(list_empty_careful(&iclog->ic_callbacks)); - /* - * If the number of ops in this iclog indicate it just - * contains the dummy transaction, we can - * change state into IDLE (the second time around). - * Otherwise we should change the state into - * NEED a dummy. - * We don't need to cover the dummy. - */ - if (!changed && - (be32_to_cpu(iclog->ic_header.h_num_logops) == - XLOG_COVER_OPS)) { - changed = 1; - } else { - /* - * We have two dirty iclogs so start over - * This could also be num of ops indicates - * this is not the dummy going out. - */ - changed = 2; - } - iclog->ic_header.h_num_logops = 0; - memset(iclog->ic_header.h_cycle_data, 0, - sizeof(iclog->ic_header.h_cycle_data)); - iclog->ic_header.h_lsn = 0; - } else if (iclog->ic_state == XLOG_STATE_ACTIVE) - /* do nothing */; - else - break; /* stop cleaning */ - iclog = iclog->ic_next; - } while (iclog != log->l_iclog); - + if (iclog->ic_state == XLOG_STATE_DIRTY) + xlog_state_activate_iclog(iclog, iclogs_changed); + /* + * The ordering of marking iclogs ACTIVE must be maintained, so + * an iclog doesn't become ACTIVE beyond one that is SYNCING. + */ + else if (iclog->ic_state != XLOG_STATE_ACTIVE) + break; + } while ((iclog = iclog->ic_next) != log->l_iclog); +} +static int +xlog_covered_state( + int prev_state, + int iclogs_changed) +{ /* - * Wake up threads waiting in xfs_log_force() for the dirty iclog - * to be cleaned. + * We usually go to NEED. But we go to NEED2 if the changed indicates we + * are done writing the dummy record. If we are done with the second + * dummy recored (DONE2), then we go to IDLE. */ - wake_up_all(&dirty_iclog->ic_force_wait); + switch (prev_state) { + case XLOG_STATE_COVER_IDLE: + case XLOG_STATE_COVER_NEED: + case XLOG_STATE_COVER_NEED2: + break; + case XLOG_STATE_COVER_DONE: + if (iclogs_changed == 1) + return XLOG_STATE_COVER_NEED2; + break; + case XLOG_STATE_COVER_DONE2: + if (iclogs_changed == 1) + return XLOG_STATE_COVER_IDLE; + break; + default: + ASSERT(0); + } - /* - * Change state for the dummy log recording. - * We usually go to NEED. But we go to NEED2 if the changed indicates - * we are done writing the dummy record. - * If we are done with the second dummy recored (DONE2), then - * we go to IDLE. - */ - if (changed) { - switch (log->l_covered_state) { - case XLOG_STATE_COVER_IDLE: - case XLOG_STATE_COVER_NEED: - case XLOG_STATE_COVER_NEED2: - log->l_covered_state = XLOG_STATE_COVER_NEED; - break; + return XLOG_STATE_COVER_NEED; +} - case XLOG_STATE_COVER_DONE: - if (changed == 1) - log->l_covered_state = XLOG_STATE_COVER_NEED2; - else - log->l_covered_state = XLOG_STATE_COVER_NEED; - break; +STATIC void +xlog_state_clean_iclog( + struct xlog *log, + struct xlog_in_core *dirty_iclog) +{ + int iclogs_changed = 0; - case XLOG_STATE_COVER_DONE2: - if (changed == 1) - log->l_covered_state = XLOG_STATE_COVER_IDLE; - else - log->l_covered_state = XLOG_STATE_COVER_NEED; - break; + dirty_iclog->ic_state = XLOG_STATE_DIRTY; - default: - ASSERT(0); - } + xlog_state_activate_iclogs(log, &iclogs_changed); + wake_up_all(&dirty_iclog->ic_force_wait); + + if (iclogs_changed) { + log->l_covered_state = xlog_covered_state(log->l_covered_state, + iclogs_changed); } } @@ -2808,8 +2777,7 @@ xlog_state_iodone_process_iclog( static void xlog_state_do_iclog_callbacks( struct xlog *log, - struct xlog_in_core *iclog, - bool aborted) + struct xlog_in_core *iclog) __releases(&log->l_icloglock) __acquires(&log->l_icloglock) { @@ -2821,7 +2789,7 @@ xlog_state_do_iclog_callbacks( list_splice_init(&iclog->ic_callbacks, &tmp); spin_unlock(&iclog->ic_callback_lock); - xlog_cil_process_committed(&tmp, aborted); + xlog_cil_process_committed(&tmp); spin_lock(&iclog->ic_callback_lock); } @@ -2836,8 +2804,7 @@ xlog_state_do_iclog_callbacks( STATIC void xlog_state_do_callback( - struct xlog *log, - bool aborted) + struct xlog *log) { struct xlog_in_core *iclog; struct xlog_in_core *first_iclog; @@ -2878,9 +2845,11 @@ xlog_state_do_callback( * we'll have to run at least one more complete loop. */ cycled_icloglock = true; - xlog_state_do_iclog_callbacks(log, iclog, aborted); - - xlog_state_clean_iclog(log, iclog); + xlog_state_do_iclog_callbacks(log, iclog); + if (XLOG_FORCED_SHUTDOWN(log)) + wake_up_all(&iclog->ic_force_wait); + else + xlog_state_clean_iclog(log, iclog); iclog = iclog->ic_next; } while (first_iclog != iclog); @@ -2916,25 +2885,22 @@ xlog_state_do_callback( */ STATIC void xlog_state_done_syncing( - struct xlog_in_core *iclog, - bool aborted) + struct xlog_in_core *iclog) { struct xlog *log = iclog->ic_log; spin_lock(&log->l_icloglock); - ASSERT(atomic_read(&iclog->ic_refcnt) == 0); /* * If we got an error, either on the first buffer, or in the case of - * split log writes, on the second, we mark ALL iclogs STATE_IOERROR, - * and none should ever be attempted to be written to disk - * again. + * split log writes, on the second, we shut down the file system and + * no iclogs should ever be attempted to be written to disk again. */ - if (iclog->ic_state == XLOG_STATE_SYNCING) + if (!XLOG_FORCED_SHUTDOWN(log)) { + ASSERT(iclog->ic_state == XLOG_STATE_SYNCING); iclog->ic_state = XLOG_STATE_DONE_SYNC; - else - ASSERT(iclog->ic_state == XLOG_STATE_IOERROR); + } /* * Someone could be sleeping prior to writing out the next @@ -2943,9 +2909,8 @@ xlog_state_done_syncing( */ wake_up_all(&iclog->ic_write_wait); spin_unlock(&log->l_icloglock); - xlog_state_do_callback(log, aborted); /* also cleans log */ -} /* xlog_state_done_syncing */ - + xlog_state_do_callback(log); /* also cleans log */ +} /* * If the head of the in-core log ring is not (ACTIVE or DIRTY), then we must @@ -3152,11 +3117,12 @@ xlog_ungrant_log_space( } /* - * This routine will mark the current iclog in the ring as WANT_SYNC - * and move the current iclog pointer to the next iclog in the ring. - * When this routine is called from xlog_state_get_iclog_space(), the - * exact size of the iclog has not yet been determined. All we know is - * that every data block. We have run out of space in this log record. + * Mark the current iclog in the ring as WANT_SYNC and move the current iclog + * pointer to the next iclog in the ring. + * + * When called from xlog_state_get_iclog_space(), the exact size of the iclog + * has not yet been determined, all we know is that we have run out of space in + * the current iclog. */ STATIC void xlog_state_switch_iclogs( @@ -3165,6 +3131,8 @@ xlog_state_switch_iclogs( int eventual_size) { ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE); + assert_spin_locked(&log->l_icloglock); + if (!eventual_size) eventual_size = iclog->ic_offset; iclog->ic_state = XLOG_STATE_WANT_SYNC; @@ -3259,9 +3227,6 @@ xfs_log_force( * previous iclog and go to sleep. */ iclog = iclog->ic_prev; - if (iclog->ic_state == XLOG_STATE_ACTIVE || - iclog->ic_state == XLOG_STATE_DIRTY) - goto out_unlock; } else if (iclog->ic_state == XLOG_STATE_ACTIVE) { if (atomic_read(&iclog->ic_refcnt) == 0) { /* @@ -3277,8 +3242,7 @@ xfs_log_force( if (xlog_state_release_iclog(log, iclog)) goto out_error; - if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn || - iclog->ic_state == XLOG_STATE_DIRTY) + if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) goto out_unlock; } else { /* @@ -3298,17 +3262,8 @@ xfs_log_force( ; } - if (!(flags & XFS_LOG_SYNC)) - goto out_unlock; - - if (iclog->ic_state == XLOG_STATE_IOERROR) - goto out_error; - XFS_STATS_INC(mp, xs_log_force_sleep); - xlog_wait(&iclog->ic_force_wait, &log->l_icloglock); - if (iclog->ic_state == XLOG_STATE_IOERROR) - return -EIO; - return 0; - + if (flags & XFS_LOG_SYNC) + return xlog_wait_on_iclog(iclog); out_unlock: spin_unlock(&log->l_icloglock); return 0; @@ -3339,9 +3294,6 @@ __xfs_log_force_lsn( goto out_unlock; } - if (iclog->ic_state == XLOG_STATE_DIRTY) - goto out_unlock; - if (iclog->ic_state == XLOG_STATE_ACTIVE) { /* * We sleep here if we haven't already slept (e.g. this is the @@ -3375,20 +3327,8 @@ __xfs_log_force_lsn( *log_flushed = 1; } - if (!(flags & XFS_LOG_SYNC) || - (iclog->ic_state == XLOG_STATE_ACTIVE || - iclog->ic_state == XLOG_STATE_DIRTY)) - goto out_unlock; - - if (iclog->ic_state == XLOG_STATE_IOERROR) - goto out_error; - - XFS_STATS_INC(mp, xs_log_force_sleep); - xlog_wait(&iclog->ic_force_wait, &log->l_icloglock); - if (iclog->ic_state == XLOG_STATE_IOERROR) - return -EIO; - return 0; - + if (flags & XFS_LOG_SYNC) + return xlog_wait_on_iclog(iclog); out_unlock: spin_unlock(&log->l_icloglock); return 0; @@ -3434,26 +3374,6 @@ xfs_log_force_lsn( return ret; } -/* - * Called when we want to mark the current iclog as being ready to sync to - * disk. - */ -STATIC void -xlog_state_want_sync( - struct xlog *log, - struct xlog_in_core *iclog) -{ - assert_spin_locked(&log->l_icloglock); - - if (iclog->ic_state == XLOG_STATE_ACTIVE) { - xlog_state_switch_iclogs(log, iclog, 0); - } else { - ASSERT(iclog->ic_state == XLOG_STATE_WANT_SYNC || - iclog->ic_state == XLOG_STATE_IOERROR); - } -} - - /***************************************************************************** * * TICKET functions @@ -3937,7 +3857,7 @@ xfs_log_force_umount( spin_lock(&log->l_cilp->xc_push_lock); wake_up_all(&log->l_cilp->xc_commit_wait); spin_unlock(&log->l_cilp->xc_push_lock); - xlog_state_do_callback(log, true); + xlog_state_do_callback(log); /* return non-zero if log IOERROR transition had already happened */ return retval; diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index 84e06805160f..cc77cc36560a 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -121,8 +121,7 @@ void xfs_log_mount_cancel(struct xfs_mount *); xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp); xfs_lsn_t xlog_assign_tail_lsn_locked(struct xfs_mount *mp); void xfs_log_space_wake(struct xfs_mount *mp); -int xfs_log_release_iclog(struct xfs_mount *mp, - struct xlog_in_core *iclog); +void xfs_log_release_iclog(struct xlog_in_core *iclog); int xfs_log_reserve(struct xfs_mount *mp, int length, int count, @@ -138,7 +137,7 @@ void xfs_log_ticket_put(struct xlog_ticket *ticket); void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp, xfs_lsn_t *commit_lsn, bool regrant); -void xlog_cil_process_committed(struct list_head *list, bool aborted); +void xlog_cil_process_committed(struct list_head *list); bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip); void xfs_log_work_queue(struct xfs_mount *mp); diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 48435cf2aa16..64cc0bf2ab3b 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -574,10 +574,10 @@ xlog_discard_busy_extents( */ static void xlog_cil_committed( - struct xfs_cil_ctx *ctx, - bool abort) + struct xfs_cil_ctx *ctx) { struct xfs_mount *mp = ctx->cil->xc_log->l_mp; + bool abort = XLOG_FORCED_SHUTDOWN(ctx->cil->xc_log); /* * If the I/O failed, we're aborting the commit and already shutdown. @@ -613,37 +613,38 @@ xlog_cil_committed( void xlog_cil_process_committed( - struct list_head *list, - bool aborted) + struct list_head *list) { struct xfs_cil_ctx *ctx; while ((ctx = list_first_entry_or_null(list, struct xfs_cil_ctx, iclog_entry))) { list_del(&ctx->iclog_entry); - xlog_cil_committed(ctx, aborted); + xlog_cil_committed(ctx); } } /* - * Push the Committed Item List to the log. If @push_seq flag is zero, then it - * is a background flush and so we can chose to ignore it. Otherwise, if the - * current sequence is the same as @push_seq we need to do a flush. If - * @push_seq is less than the current sequence, then it has already been + * Push the Committed Item List to the log. + * + * If the current sequence is the same as xc_push_seq we need to do a flush. If + * xc_push_seq is less than the current sequence, then it has already been * flushed and we don't need to do anything - the caller will wait for it to * complete if necessary. * - * @push_seq is a value rather than a flag because that allows us to do an - * unlocked check of the sequence number for a match. Hence we can allows log - * forces to run racily and not issue pushes for the same sequence twice. If we - * get a race between multiple pushes for the same sequence they will block on - * the first one and then abort, hence avoiding needless pushes. + * xc_push_seq is checked unlocked against the sequence number for a match. + * Hence we can allow log forces to run racily and not issue pushes for the + * same sequence twice. If we get a race between multiple pushes for the same + * sequence they will block on the first one and then abort, hence avoiding + * needless pushes. */ -STATIC int -xlog_cil_push( - struct xlog *log) +static void +xlog_cil_push_work( + struct work_struct *work) { - struct xfs_cil *cil = log->l_cilp; + struct xfs_cil *cil = + container_of(work, struct xfs_cil, xc_push_work); + struct xlog *log = cil->xc_log; struct xfs_log_vec *lv; struct xfs_cil_ctx *ctx; struct xfs_cil_ctx *new_ctx; @@ -657,9 +658,6 @@ xlog_cil_push( xfs_lsn_t commit_lsn; xfs_lsn_t push_seq; - if (!cil) - return 0; - new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_NOFS); new_ctx->ticket = xlog_cil_ticket_alloc(log); @@ -867,28 +865,20 @@ restart: spin_unlock(&cil->xc_push_lock); /* release the hounds! */ - return xfs_log_release_iclog(log->l_mp, commit_iclog); + xfs_log_release_iclog(commit_iclog); + return; out_skip: up_write(&cil->xc_ctx_lock); xfs_log_ticket_put(new_ctx->ticket); kmem_free(new_ctx); - return 0; + return; out_abort_free_ticket: xfs_log_ticket_put(tic); out_abort: - xlog_cil_committed(ctx, true); - return -EIO; -} - -static void -xlog_cil_push_work( - struct work_struct *work) -{ - struct xfs_cil *cil = container_of(work, struct xfs_cil, - xc_push_work); - xlog_cil_push(cil->xc_log); + ASSERT(XLOG_FORCED_SHUTDOWN(log)); + xlog_cil_committed(ctx); } /* diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index b192c5a9f9fd..2b0aec37e73e 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -402,7 +402,8 @@ struct xlog { #define XLOG_BUF_CANCEL_BUCKET(log, blkno) \ ((log)->l_buf_cancel_table + ((uint64_t)blkno % XLOG_BC_TABLE_SIZE)) -#define XLOG_FORCED_SHUTDOWN(log) ((log)->l_flags & XLOG_IO_ERROR) +#define XLOG_FORCED_SHUTDOWN(log) \ + (unlikely((log)->l_flags & XLOG_IO_ERROR)) /* common routines */ extern int @@ -525,12 +526,6 @@ xlog_cil_force(struct xlog *log) } /* - * Unmount record type is used as a pseudo transaction type for the ticket. - * It's value must be outside the range of XFS_TRANS_* values. - */ -#define XLOG_UNMOUNT_REC_TYPE (-1U) - -/* * Wrapper function for waiting on a wait queue serialised against wakeups * by a spinlock. This matches the semantics of all the wait queues used in the * log code. diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 25cfc85dbaa7..11c3502b07b1 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -2869,8 +2869,8 @@ xfs_recover_inode_owner_change( return -ENOMEM; /* instantiate the inode */ + ASSERT(dip->di_version >= 3); xfs_inode_from_disk(ip, dip); - ASSERT(ip->i_d.di_version >= 3); error = xfs_iformat_fork(ip, dip); if (error) @@ -2997,7 +2997,7 @@ xlog_recover_inode_pass2( * superblock flag to determine whether we need to look at di_flushiter * to skip replay when the on disk inode is newer than the log one */ - if (!xfs_sb_version_hascrc(&mp->m_sb) && + if (!xfs_sb_version_has_v3inode(&mp->m_sb) && ldip->di_flushiter < be16_to_cpu(dip->di_flushiter)) { /* * Deal with the wrap case, DI_MAX_FLUSH is less @@ -3068,7 +3068,7 @@ xlog_recover_inode_pass2( error = -EFSCORRUPTED; goto out_release; } - isize = xfs_log_dinode_size(ldip->di_version); + isize = xfs_log_dinode_size(mp); if (unlikely(item->ri_buf[1].i_len > isize)) { XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)", XFS_ERRLEVEL_LOW, mp, ldip, @@ -4947,7 +4947,7 @@ xlog_recover_clear_agi_bucket( if (error) goto out_abort; - agi = XFS_BUF_TO_AGI(agibp); + agi = agibp->b_addr; agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO); offset = offsetof(xfs_agi_t, agi_unlinked) + (sizeof(xfs_agino_t) * bucket); @@ -5083,7 +5083,7 @@ xlog_recover_process_iunlinks( * buffer reference though, so that it stays pinned in memory * while we need the buffer. */ - agi = XFS_BUF_TO_AGI(agibp); + agi = agibp->b_addr; xfs_buf_unlock(agibp); for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) { @@ -5636,7 +5636,7 @@ xlog_do_recover( /* Convert superblock from on-disk format */ sbp = &mp->m_sb; - xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp)); + xfs_sb_from_disk(sbp, bp->b_addr); xfs_buf_relse(bp); /* re-initialise in-core superblock and geometry structures */ @@ -5809,7 +5809,6 @@ xlog_recover_check_summary( struct xlog *log) { xfs_mount_t *mp; - xfs_agf_t *agfp; xfs_buf_t *agfbp; xfs_buf_t *agibp; xfs_agnumber_t agno; @@ -5829,7 +5828,8 @@ xlog_recover_check_summary( xfs_alert(mp, "%s agf read failed agno %d error %d", __func__, agno, error); } else { - agfp = XFS_BUF_TO_AGF(agfbp); + struct xfs_agf *agfp = agfbp->b_addr; + freeblks += be32_to_cpu(agfp->agf_freeblks) + be32_to_cpu(agfp->agf_flcount); xfs_buf_relse(agfbp); @@ -5840,7 +5840,7 @@ xlog_recover_check_summary( xfs_alert(mp, "%s agi read failed agno %d error %d", __func__, agno, error); } else { - struct xfs_agi *agi = XFS_BUF_TO_AGI(agibp); + struct xfs_agi *agi = agibp->b_addr; itotal += be32_to_cpu(agi->agi_count); ifree += be32_to_cpu(agi->agi_freecount); diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 56efe140c923..c5513e5a226a 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -310,7 +310,7 @@ reread: /* * Initialize the mount structure from the superblock. */ - xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp)); + xfs_sb_from_disk(sbp, bp->b_addr); /* * If we haven't validated the superblock, do so now before we try diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 0b0909657bad..cabdb755adae 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -326,16 +326,16 @@ xfs_qm_dqattach_locked( ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); if (XFS_IS_UQUOTA_ON(mp) && !ip->i_udquot) { - error = xfs_qm_dqattach_one(ip, ip->i_d.di_uid, XFS_DQ_USER, - doalloc, &ip->i_udquot); + error = xfs_qm_dqattach_one(ip, i_uid_read(VFS_I(ip)), + XFS_DQ_USER, doalloc, &ip->i_udquot); if (error) goto done; ASSERT(ip->i_udquot); } if (XFS_IS_GQUOTA_ON(mp) && !ip->i_gdquot) { - error = xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP, - doalloc, &ip->i_gdquot); + error = xfs_qm_dqattach_one(ip, i_gid_read(VFS_I(ip)), + XFS_DQ_GROUP, doalloc, &ip->i_gdquot); if (error) goto done; ASSERT(ip->i_gdquot); @@ -871,12 +871,20 @@ xfs_qm_reset_dqcounts( ddq->d_bcount = 0; ddq->d_icount = 0; ddq->d_rtbcount = 0; - ddq->d_btimer = 0; - ddq->d_itimer = 0; - ddq->d_rtbtimer = 0; - ddq->d_bwarns = 0; - ddq->d_iwarns = 0; - ddq->d_rtbwarns = 0; + + /* + * dquot id 0 stores the default grace period and the maximum + * warning limit that were set by the administrator, so we + * should not reset them. + */ + if (ddq->d_id != 0) { + ddq->d_btimer = 0; + ddq->d_itimer = 0; + ddq->d_rtbtimer = 0; + ddq->d_bwarns = 0; + ddq->d_iwarns = 0; + ddq->d_rtbwarns = 0; + } if (xfs_sb_version_hascrc(&mp->m_sb)) { xfs_update_cksum((char *)&dqb[j], @@ -1613,8 +1621,8 @@ xfs_qm_dqfree_one( int xfs_qm_vop_dqalloc( struct xfs_inode *ip, - xfs_dqid_t uid, - xfs_dqid_t gid, + kuid_t uid, + kgid_t gid, prid_t prid, uint flags, struct xfs_dquot **O_udqpp, @@ -1622,6 +1630,8 @@ xfs_qm_vop_dqalloc( struct xfs_dquot **O_pdqpp) { struct xfs_mount *mp = ip->i_mount; + struct inode *inode = VFS_I(ip); + struct user_namespace *user_ns = inode->i_sb->s_user_ns; struct xfs_dquot *uq = NULL; struct xfs_dquot *gq = NULL; struct xfs_dquot *pq = NULL; @@ -1635,7 +1645,7 @@ xfs_qm_vop_dqalloc( xfs_ilock(ip, lockflags); if ((flags & XFS_QMOPT_INHERIT) && XFS_INHERIT_GID(ip)) - gid = ip->i_d.di_gid; + gid = inode->i_gid; /* * Attach the dquot(s) to this inode, doing a dquot allocation @@ -1650,7 +1660,7 @@ xfs_qm_vop_dqalloc( } if ((flags & XFS_QMOPT_UQUOTA) && XFS_IS_UQUOTA_ON(mp)) { - if (ip->i_d.di_uid != uid) { + if (!uid_eq(inode->i_uid, uid)) { /* * What we need is the dquot that has this uid, and * if we send the inode to dqget, the uid of the inode @@ -1661,7 +1671,8 @@ xfs_qm_vop_dqalloc( * holding ilock. */ xfs_iunlock(ip, lockflags); - error = xfs_qm_dqget(mp, uid, XFS_DQ_USER, true, &uq); + error = xfs_qm_dqget(mp, from_kuid(user_ns, uid), + XFS_DQ_USER, true, &uq); if (error) { ASSERT(error != -ENOENT); return error; @@ -1682,9 +1693,10 @@ xfs_qm_vop_dqalloc( } } if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) { - if (ip->i_d.di_gid != gid) { + if (!gid_eq(inode->i_gid, gid)) { xfs_iunlock(ip, lockflags); - error = xfs_qm_dqget(mp, gid, XFS_DQ_GROUP, true, &gq); + error = xfs_qm_dqget(mp, from_kgid(user_ns, gid), + XFS_DQ_GROUP, true, &gq); if (error) { ASSERT(error != -ENOENT); goto error_rele; @@ -1810,7 +1822,7 @@ xfs_qm_vop_chown_reserve( XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS; if (XFS_IS_UQUOTA_ON(mp) && udqp && - ip->i_d.di_uid != be32_to_cpu(udqp->q_core.d_id)) { + i_uid_read(VFS_I(ip)) != be32_to_cpu(udqp->q_core.d_id)) { udq_delblks = udqp; /* * If there are delayed allocation blocks, then we have to @@ -1823,7 +1835,7 @@ xfs_qm_vop_chown_reserve( } } if (XFS_IS_GQUOTA_ON(ip->i_mount) && gdqp && - ip->i_d.di_gid != be32_to_cpu(gdqp->q_core.d_id)) { + i_gid_read(VFS_I(ip)) != be32_to_cpu(gdqp->q_core.d_id)) { gdq_delblks = gdqp; if (delblks) { ASSERT(ip->i_gdquot); @@ -1920,14 +1932,15 @@ xfs_qm_vop_create_dqattach( if (udqp && XFS_IS_UQUOTA_ON(mp)) { ASSERT(ip->i_udquot == NULL); - ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id)); + ASSERT(i_uid_read(VFS_I(ip)) == be32_to_cpu(udqp->q_core.d_id)); ip->i_udquot = xfs_qm_dqhold(udqp); xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1); } if (gdqp && XFS_IS_GQUOTA_ON(mp)) { ASSERT(ip->i_gdquot == NULL); - ASSERT(ip->i_d.di_gid == be32_to_cpu(gdqp->q_core.d_id)); + ASSERT(i_gid_read(VFS_I(ip)) == be32_to_cpu(gdqp->q_core.d_id)); + ip->i_gdquot = xfs_qm_dqhold(gdqp); xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1); } diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c index 1ea82764bf89..5d5ac65aa1cc 100644 --- a/fs/xfs/xfs_qm_syscalls.c +++ b/fs/xfs/xfs_qm_syscalls.c @@ -29,8 +29,6 @@ xfs_qm_log_quotaoff( int error; struct xfs_qoff_logitem *qoffi; - *qoffstartp = NULL; - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_quotaoff, 0, 0, 0, &tp); if (error) goto out; @@ -62,7 +60,7 @@ out: STATIC int xfs_qm_log_quotaoff_end( struct xfs_mount *mp, - struct xfs_qoff_logitem *startqoff, + struct xfs_qoff_logitem **startqoff, uint flags) { struct xfs_trans *tp; @@ -73,9 +71,10 @@ xfs_qm_log_quotaoff_end( if (error) return error; - qoffi = xfs_trans_get_qoff_item(tp, startqoff, + qoffi = xfs_trans_get_qoff_item(tp, *startqoff, flags & XFS_ALL_QUOTA_ACCT); xfs_trans_log_quotaoff_item(tp, qoffi); + *startqoff = NULL; /* * We have to make sure that the transaction is secure on disk before we @@ -103,7 +102,7 @@ xfs_qm_scall_quotaoff( uint dqtype; int error; uint inactivate_flags; - struct xfs_qoff_logitem *qoffstart; + struct xfs_qoff_logitem *qoffstart = NULL; /* * No file system can have quotas enabled on disk but not in core. @@ -228,7 +227,7 @@ xfs_qm_scall_quotaoff( * So, we have QUOTAOFF start and end logitems; the start * logitem won't get overwritten until the end logitem appears... */ - error = xfs_qm_log_quotaoff_end(mp, qoffstart, flags); + error = xfs_qm_log_quotaoff_end(mp, &qoffstart, flags); if (error) { /* We're screwed now. Shutdown is the only option. */ xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); @@ -261,6 +260,8 @@ xfs_qm_scall_quotaoff( } out_unlock: + if (error && qoffstart) + xfs_qm_qoff_logitem_relse(qoffstart); mutex_unlock(&q->qi_quotaofflock); return error; } diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h index efe42ae7a2f3..aa8fc1f55fbd 100644 --- a/fs/xfs/xfs_quota.h +++ b/fs/xfs/xfs_quota.h @@ -86,7 +86,7 @@ extern int xfs_trans_reserve_quota_bydquots(struct xfs_trans *, struct xfs_mount *, struct xfs_dquot *, struct xfs_dquot *, struct xfs_dquot *, int64_t, long, uint); -extern int xfs_qm_vop_dqalloc(struct xfs_inode *, xfs_dqid_t, xfs_dqid_t, +extern int xfs_qm_vop_dqalloc(struct xfs_inode *, kuid_t, kgid_t, prid_t, uint, struct xfs_dquot **, struct xfs_dquot **, struct xfs_dquot **); extern void xfs_qm_vop_create_dqattach(struct xfs_trans *, struct xfs_inode *, @@ -109,7 +109,7 @@ extern void xfs_qm_unmount_quotas(struct xfs_mount *); #else static inline int -xfs_qm_vop_dqalloc(struct xfs_inode *ip, xfs_dqid_t uid, xfs_dqid_t gid, +xfs_qm_vop_dqalloc(struct xfs_inode *ip, kuid_t kuid, kgid_t kgid, prid_t prid, uint flags, struct xfs_dquot **udqp, struct xfs_dquot **gdqp, struct xfs_dquot **pdqp) { diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c index 113883c4f202..f70f1255220b 100644 --- a/fs/xfs/xfs_stats.c +++ b/fs/xfs/xfs_stats.c @@ -57,13 +57,13 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf) /* Loop over all stats groups */ for (i = j = 0; i < ARRAY_SIZE(xstats); i++) { - len += snprintf(buf + len, PATH_MAX - len, "%s", + len += scnprintf(buf + len, PATH_MAX - len, "%s", xstats[i].desc); /* inner loop does each group */ for (; j < xstats[i].endpoint; j++) - len += snprintf(buf + len, PATH_MAX - len, " %u", + len += scnprintf(buf + len, PATH_MAX - len, " %u", counter_val(stats, j)); - len += snprintf(buf + len, PATH_MAX - len, "\n"); + len += scnprintf(buf + len, PATH_MAX - len, "\n"); } /* extra precision counters */ for_each_possible_cpu(i) { @@ -72,9 +72,9 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf) xs_read_bytes += per_cpu_ptr(stats, i)->s.xs_read_bytes; } - len += snprintf(buf + len, PATH_MAX-len, "xpc %Lu %Lu %Lu\n", + len += scnprintf(buf + len, PATH_MAX-len, "xpc %Lu %Lu %Lu\n", xs_xstrat_bytes, xs_write_bytes, xs_read_bytes); - len += snprintf(buf + len, PATH_MAX-len, "debug %u\n", + len += scnprintf(buf + len, PATH_MAX-len, "debug %u\n", #if defined(DEBUG) 1); #else diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c index d762d42ed0ff..fa0fa3c70f1a 100644 --- a/fs/xfs/xfs_symlink.c +++ b/fs/xfs/xfs_symlink.c @@ -182,9 +182,7 @@ xfs_symlink( /* * Make sure that we have allocated dquot(s) on disk. */ - error = xfs_qm_vop_dqalloc(dp, - xfs_kuid_to_uid(current_fsuid()), - xfs_kgid_to_gid(current_fsgid()), prid, + error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid, XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp, &pdqp); if (error) @@ -194,7 +192,7 @@ xfs_symlink( * The symlink will fit into the inode data fork? * There can't be any attributes so we get the whole variable part. */ - if (pathlen <= XFS_LITINO(mp, dp->i_d.di_version)) + if (pathlen <= XFS_LITINO(mp)) fs_blocks = 0; else fs_blocks = xfs_symlink_blocks(mp, pathlen); diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c index bc85b89f88ca..120398a37c2a 100644 --- a/fs/xfs/xfs_trace.c +++ b/fs/xfs/xfs_trace.c @@ -6,6 +6,7 @@ #include "xfs.h" #include "xfs_fs.h" #include "xfs_shared.h" +#include "xfs_bit.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" @@ -27,6 +28,7 @@ #include "xfs_log_recover.h" #include "xfs_filestream.h" #include "xfs_fsmap.h" +#include "xfs_btree_staging.h" /* * We include this last to have the helpers above available for the trace diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index e242988f57fb..efc7751550d9 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -35,6 +35,12 @@ struct xfs_icreate_log; struct xfs_owner_info; struct xfs_trans_res; struct xfs_inobt_rec_incore; +union xfs_btree_ptr; + +#define XFS_ATTR_FILTER_FLAGS \ + { XFS_ATTR_ROOT, "ROOT" }, \ + { XFS_ATTR_SECURE, "SECURE" }, \ + { XFS_ATTR_INCOMPLETE, "INCOMPLETE" } DECLARE_EVENT_CLASS(xfs_attr_list_class, TP_PROTO(struct xfs_attr_list_context *ctx), @@ -45,39 +51,39 @@ DECLARE_EVENT_CLASS(xfs_attr_list_class, __field(u32, hashval) __field(u32, blkno) __field(u32, offset) - __field(void *, alist) + __field(void *, buffer) __field(int, bufsize) __field(int, count) __field(int, firstu) __field(int, dupcnt) - __field(int, flags) + __field(unsigned int, attr_filter) ), TP_fast_assign( __entry->dev = VFS_I(ctx->dp)->i_sb->s_dev; __entry->ino = ctx->dp->i_ino; - __entry->hashval = ctx->cursor->hashval; - __entry->blkno = ctx->cursor->blkno; - __entry->offset = ctx->cursor->offset; - __entry->alist = ctx->alist; + __entry->hashval = ctx->cursor.hashval; + __entry->blkno = ctx->cursor.blkno; + __entry->offset = ctx->cursor.offset; + __entry->buffer = ctx->buffer; __entry->bufsize = ctx->bufsize; __entry->count = ctx->count; __entry->firstu = ctx->firstu; - __entry->flags = ctx->flags; + __entry->attr_filter = ctx->attr_filter; ), TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u " - "alist %p size %u count %u firstu %u flags %d %s", + "buffer %p size %u count %u firstu %u filter %s", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->hashval, __entry->blkno, __entry->offset, __entry->dupcnt, - __entry->alist, + __entry->buffer, __entry->bufsize, __entry->count, __entry->firstu, - __entry->flags, - __print_flags(__entry->flags, "|", XFS_ATTR_FLAGS) + __print_flags(__entry->attr_filter, "|", + XFS_ATTR_FILTER_FLAGS) ) ) @@ -169,31 +175,31 @@ TRACE_EVENT(xfs_attr_list_node_descend, __field(u32, hashval) __field(u32, blkno) __field(u32, offset) - __field(void *, alist) + __field(void *, buffer) __field(int, bufsize) __field(int, count) __field(int, firstu) __field(int, dupcnt) - __field(int, flags) + __field(unsigned int, attr_filter) __field(u32, bt_hashval) __field(u32, bt_before) ), TP_fast_assign( __entry->dev = VFS_I(ctx->dp)->i_sb->s_dev; __entry->ino = ctx->dp->i_ino; - __entry->hashval = ctx->cursor->hashval; - __entry->blkno = ctx->cursor->blkno; - __entry->offset = ctx->cursor->offset; - __entry->alist = ctx->alist; + __entry->hashval = ctx->cursor.hashval; + __entry->blkno = ctx->cursor.blkno; + __entry->offset = ctx->cursor.offset; + __entry->buffer = ctx->buffer; __entry->bufsize = ctx->bufsize; __entry->count = ctx->count; __entry->firstu = ctx->firstu; - __entry->flags = ctx->flags; + __entry->attr_filter = ctx->attr_filter; __entry->bt_hashval = be32_to_cpu(btree->hashval); __entry->bt_before = be32_to_cpu(btree->before); ), TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u " - "alist %p size %u count %u firstu %u flags %d %s " + "buffer %p size %u count %u firstu %u filter %s " "node hashval %u, node before %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, @@ -201,12 +207,12 @@ TRACE_EVENT(xfs_attr_list_node_descend, __entry->blkno, __entry->offset, __entry->dupcnt, - __entry->alist, + __entry->buffer, __entry->bufsize, __entry->count, __entry->firstu, - __entry->flags, - __print_flags(__entry->flags, "|", XFS_ATTR_FLAGS), + __print_flags(__entry->attr_filter, "|", + XFS_ATTR_FILTER_FLAGS), __entry->bt_hashval, __entry->bt_before) ); @@ -1701,7 +1707,8 @@ DECLARE_EVENT_CLASS(xfs_attr_class, __field(int, namelen) __field(int, valuelen) __field(xfs_dahash_t, hashval) - __field(int, flags) + __field(unsigned int, attr_filter) + __field(unsigned int, attr_flags) __field(int, op_flags) ), TP_fast_assign( @@ -1712,11 +1719,12 @@ DECLARE_EVENT_CLASS(xfs_attr_class, __entry->namelen = args->namelen; __entry->valuelen = args->valuelen; __entry->hashval = args->hashval; - __entry->flags = args->flags; + __entry->attr_filter = args->attr_filter; + __entry->attr_flags = args->attr_flags; __entry->op_flags = args->op_flags; ), TP_printk("dev %d:%d ino 0x%llx name %.*s namelen %d valuelen %d " - "hashval 0x%x flags %s op_flags %s", + "hashval 0x%x filter %s flags %s op_flags %s", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->namelen, @@ -1724,7 +1732,11 @@ DECLARE_EVENT_CLASS(xfs_attr_class, __entry->namelen, __entry->valuelen, __entry->hashval, - __print_flags(__entry->flags, "|", XFS_ATTR_FLAGS), + __print_flags(__entry->attr_filter, "|", + XFS_ATTR_FILTER_FLAGS), + __print_flags(__entry->attr_flags, "|", + { XATTR_CREATE, "CREATE" }, + { XATTR_REPLACE, "REPLACE" }), __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS)) ) @@ -3594,6 +3606,151 @@ TRACE_EVENT(xfs_check_new_dalign, __entry->calc_rootino) ) +TRACE_EVENT(xfs_btree_commit_afakeroot, + TP_PROTO(struct xfs_btree_cur *cur), + TP_ARGS(cur), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_btnum_t, btnum) + __field(xfs_agnumber_t, agno) + __field(xfs_agblock_t, agbno) + __field(unsigned int, levels) + __field(unsigned int, blocks) + ), + TP_fast_assign( + __entry->dev = cur->bc_mp->m_super->s_dev; + __entry->btnum = cur->bc_btnum; + __entry->agno = cur->bc_ag.agno; + __entry->agbno = cur->bc_ag.afake->af_root; + __entry->levels = cur->bc_ag.afake->af_levels; + __entry->blocks = cur->bc_ag.afake->af_blocks; + ), + TP_printk("dev %d:%d btree %s ag %u levels %u blocks %u root %u", + MAJOR(__entry->dev), MINOR(__entry->dev), + __print_symbolic(__entry->btnum, XFS_BTNUM_STRINGS), + __entry->agno, + __entry->levels, + __entry->blocks, + __entry->agbno) +) + +TRACE_EVENT(xfs_btree_commit_ifakeroot, + TP_PROTO(struct xfs_btree_cur *cur), + TP_ARGS(cur), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_btnum_t, btnum) + __field(xfs_agnumber_t, agno) + __field(xfs_agino_t, agino) + __field(unsigned int, levels) + __field(unsigned int, blocks) + __field(int, whichfork) + ), + TP_fast_assign( + __entry->dev = cur->bc_mp->m_super->s_dev; + __entry->btnum = cur->bc_btnum; + __entry->agno = XFS_INO_TO_AGNO(cur->bc_mp, + cur->bc_ino.ip->i_ino); + __entry->agino = XFS_INO_TO_AGINO(cur->bc_mp, + cur->bc_ino.ip->i_ino); + __entry->levels = cur->bc_ino.ifake->if_levels; + __entry->blocks = cur->bc_ino.ifake->if_blocks; + __entry->whichfork = cur->bc_ino.whichfork; + ), + TP_printk("dev %d:%d btree %s ag %u agino %u whichfork %s levels %u blocks %u", + MAJOR(__entry->dev), MINOR(__entry->dev), + __print_symbolic(__entry->btnum, XFS_BTNUM_STRINGS), + __entry->agno, + __entry->agino, + __entry->whichfork == XFS_ATTR_FORK ? "attr" : "data", + __entry->levels, + __entry->blocks) +) + +TRACE_EVENT(xfs_btree_bload_level_geometry, + TP_PROTO(struct xfs_btree_cur *cur, unsigned int level, + uint64_t nr_this_level, unsigned int nr_per_block, + unsigned int desired_npb, uint64_t blocks, + uint64_t blocks_with_extra), + TP_ARGS(cur, level, nr_this_level, nr_per_block, desired_npb, blocks, + blocks_with_extra), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_btnum_t, btnum) + __field(unsigned int, level) + __field(unsigned int, nlevels) + __field(uint64_t, nr_this_level) + __field(unsigned int, nr_per_block) + __field(unsigned int, desired_npb) + __field(unsigned long long, blocks) + __field(unsigned long long, blocks_with_extra) + ), + TP_fast_assign( + __entry->dev = cur->bc_mp->m_super->s_dev; + __entry->btnum = cur->bc_btnum; + __entry->level = level; + __entry->nlevels = cur->bc_nlevels; + __entry->nr_this_level = nr_this_level; + __entry->nr_per_block = nr_per_block; + __entry->desired_npb = desired_npb; + __entry->blocks = blocks; + __entry->blocks_with_extra = blocks_with_extra; + ), + TP_printk("dev %d:%d btree %s level %u/%u nr_this_level %llu nr_per_block %u desired_npb %u blocks %llu blocks_with_extra %llu", + MAJOR(__entry->dev), MINOR(__entry->dev), + __print_symbolic(__entry->btnum, XFS_BTNUM_STRINGS), + __entry->level, + __entry->nlevels, + __entry->nr_this_level, + __entry->nr_per_block, + __entry->desired_npb, + __entry->blocks, + __entry->blocks_with_extra) +) + +TRACE_EVENT(xfs_btree_bload_block, + TP_PROTO(struct xfs_btree_cur *cur, unsigned int level, + uint64_t block_idx, uint64_t nr_blocks, + union xfs_btree_ptr *ptr, unsigned int nr_records), + TP_ARGS(cur, level, block_idx, nr_blocks, ptr, nr_records), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_btnum_t, btnum) + __field(unsigned int, level) + __field(unsigned long long, block_idx) + __field(unsigned long long, nr_blocks) + __field(xfs_agnumber_t, agno) + __field(xfs_agblock_t, agbno) + __field(unsigned int, nr_records) + ), + TP_fast_assign( + __entry->dev = cur->bc_mp->m_super->s_dev; + __entry->btnum = cur->bc_btnum; + __entry->level = level; + __entry->block_idx = block_idx; + __entry->nr_blocks = nr_blocks; + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { + xfs_fsblock_t fsb = be64_to_cpu(ptr->l); + + __entry->agno = XFS_FSB_TO_AGNO(cur->bc_mp, fsb); + __entry->agbno = XFS_FSB_TO_AGBNO(cur->bc_mp, fsb); + } else { + __entry->agno = cur->bc_ag.agno; + __entry->agbno = be32_to_cpu(ptr->s); + } + __entry->nr_records = nr_records; + ), + TP_printk("dev %d:%d btree %s level %u block %llu/%llu fsb (%u/%u) recs %u", + MAJOR(__entry->dev), MINOR(__entry->dev), + __print_symbolic(__entry->btnum, XFS_BTNUM_STRINGS), + __entry->level, + __entry->block_idx, + __entry->nr_blocks, + __entry->agno, + __entry->agbno, + __entry->nr_records) +) + #endif /* _TRACE_XFS_H */ #undef TRACE_INCLUDE_PATH diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 3b208f9a865c..1adc6bc53a56 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -306,6 +306,11 @@ xfs_trans_alloc( * * Note the zero-length reservation; this transaction MUST be cancelled * without any dirty data. + * + * Callers should obtain freeze protection to avoid two conflicts with fs + * freezing: (1) having active transactions trip the m_active_trans ASSERTs; + * and (2) grabbing buffers at the same time that freeze is trying to drain + * the buffer LRU list. */ int xfs_trans_alloc_empty( @@ -450,7 +455,7 @@ xfs_trans_apply_sb_deltas( int whole = 0; bp = xfs_trans_getsb(tp, tp->t_mountp); - sbp = XFS_BUF_TO_SBP(bp); + sbp = bp->b_addr; /* * Check that superblock mods match the mods made to AGF counters. diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 00cc5b8734be..2ef0dfbfb303 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -32,6 +32,7 @@ STATIC void xfs_ail_check( struct xfs_ail *ailp, struct xfs_log_item *lip) + __must_hold(&ailp->ail_lock) { struct xfs_log_item *prev_lip; struct xfs_log_item *next_lip; @@ -529,8 +530,9 @@ xfsaild( { struct xfs_ail *ailp = data; long tout = 0; /* milliseconds */ + unsigned int noreclaim_flag; - current->flags |= PF_MEMALLOC; + noreclaim_flag = memalloc_noreclaim_save(); set_freezable(); while (1) { @@ -601,6 +603,7 @@ xfsaild( tout = xfsaild_push(ailp); } + memalloc_noreclaim_restore(noreclaim_flag); return 0; } diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c index b0fedb543f97..fc5d7276026e 100644 --- a/fs/xfs/xfs_xattr.c +++ b/fs/xfs/xfs_xattr.c @@ -12,53 +12,30 @@ #include "xfs_inode.h" #include "xfs_attr.h" #include "xfs_acl.h" +#include "xfs_da_format.h" +#include "xfs_da_btree.h" #include <linux/posix_acl_xattr.h> -#include <linux/xattr.h> static int xfs_xattr_get(const struct xattr_handler *handler, struct dentry *unused, struct inode *inode, const char *name, void *value, size_t size) { - int xflags = handler->flags; - struct xfs_inode *ip = XFS_I(inode); - int error, asize = size; - size_t namelen = strlen(name); - - /* Convert Linux syscall to XFS internal ATTR flags */ - if (!size) { - xflags |= ATTR_KERNOVAL; - value = NULL; - } + struct xfs_da_args args = { + .dp = XFS_I(inode), + .attr_filter = handler->flags, + .name = name, + .namelen = strlen(name), + .value = value, + .valuelen = size, + }; + int error; - error = xfs_attr_get(ip, name, namelen, (unsigned char **)&value, - &asize, xflags); + error = xfs_attr_get(&args); if (error) return error; - return asize; -} - -void -xfs_forget_acl( - struct inode *inode, - const char *name, - int xflags) -{ - /* - * Invalidate any cached ACLs if the user has bypassed the ACL - * interface. We don't validate the content whatsoever so it is caller - * responsibility to provide data in valid format and ensure i_mode is - * consistent. - */ - if (xflags & ATTR_ROOT) { -#ifdef CONFIG_XFS_POSIX_ACL - if (!strcmp(name, SGI_ACL_FILE)) - forget_cached_acl(inode, ACL_TYPE_ACCESS); - else if (!strcmp(name, SGI_ACL_DEFAULT)) - forget_cached_acl(inode, ACL_TYPE_DEFAULT); -#endif - } + return args.valuelen; } static int @@ -66,25 +43,20 @@ xfs_xattr_set(const struct xattr_handler *handler, struct dentry *unused, struct inode *inode, const char *name, const void *value, size_t size, int flags) { - int xflags = handler->flags; - struct xfs_inode *ip = XFS_I(inode); + struct xfs_da_args args = { + .dp = XFS_I(inode), + .attr_filter = handler->flags, + .attr_flags = flags, + .name = name, + .namelen = strlen(name), + .value = (void *)value, + .valuelen = size, + }; int error; - size_t namelen = strlen(name); - - /* Convert Linux syscall to XFS internal ATTR flags */ - if (flags & XATTR_CREATE) - xflags |= ATTR_CREATE; - if (flags & XATTR_REPLACE) - xflags |= ATTR_REPLACE; - - if (value) - error = xfs_attr_set(ip, name, namelen, (void *)value, size, - xflags); - else - error = xfs_attr_remove(ip, name, namelen, xflags); - if (!error) - xfs_forget_acl(inode, name, xflags); + error = xfs_attr_set(&args); + if (!error && (handler->flags & XFS_ATTR_ROOT)) + xfs_forget_acl(inode, name); return error; } @@ -97,14 +69,14 @@ static const struct xattr_handler xfs_xattr_user_handler = { static const struct xattr_handler xfs_xattr_trusted_handler = { .prefix = XATTR_TRUSTED_PREFIX, - .flags = ATTR_ROOT, + .flags = XFS_ATTR_ROOT, .get = xfs_xattr_get, .set = xfs_xattr_set, }; static const struct xattr_handler xfs_xattr_security_handler = { .prefix = XATTR_SECURITY_PREFIX, - .flags = ATTR_SECURE, + .flags = XFS_ATTR_SECURE, .get = xfs_xattr_get, .set = xfs_xattr_set, }; @@ -134,7 +106,7 @@ __xfs_xattr_put_listent( if (context->count < 0 || context->seen_enough) return; - if (!context->alist) + if (!context->buffer) goto compute_size; arraytop = context->count + prefix_len + namelen + 1; @@ -143,7 +115,7 @@ __xfs_xattr_put_listent( context->seen_enough = 1; return; } - offset = (char *)context->alist + context->count; + offset = context->buffer + context->count; strncpy(offset, prefix, prefix_len); offset += prefix_len; strncpy(offset, (char *)name, namelen); /* real name */ @@ -218,7 +190,6 @@ xfs_vn_listxattr( size_t size) { struct xfs_attr_list_context context; - struct attrlist_cursor_kern cursor = { 0 }; struct inode *inode = d_inode(dentry); int error; @@ -227,14 +198,13 @@ xfs_vn_listxattr( */ memset(&context, 0, sizeof(context)); context.dp = XFS_I(inode); - context.cursor = &cursor; context.resynch = 1; - context.alist = size ? data : NULL; + context.buffer = size ? data : NULL; context.bufsize = size; context.firstu = context.bufsize; context.put_listent = xfs_xattr_put_listent; - error = xfs_attr_list_int(&context); + error = xfs_attr_list(&context); if (error) return error; if (context.count < 0) |