diff options
Diffstat (limited to 'fs/f2fs/f2fs.h')
| -rw-r--r-- | fs/f2fs/f2fs.h | 4011 |
1 files changed, 3105 insertions, 906 deletions
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 94a88b233e98..20edbb99b814 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1,100 +1,147 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * fs/f2fs/f2fs.h * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #ifndef _LINUX_F2FS_H #define _LINUX_F2FS_H +#include <linux/uio.h> #include <linux/types.h> #include <linux/page-flags.h> -#include <linux/buffer_head.h> #include <linux/slab.h> #include <linux/crc32.h> #include <linux/magic.h> #include <linux/kobject.h> #include <linux/sched.h> +#include <linux/cred.h> +#include <linux/sched/mm.h> #include <linux/vmalloc.h> #include <linux/bio.h> #include <linux/blkdev.h> #include <linux/quotaops.h> -#ifdef CONFIG_F2FS_FS_ENCRYPTION -#include <linux/fscrypt_supp.h> -#else -#include <linux/fscrypt_notsupp.h> -#endif -#include <crypto/hash.h> +#include <linux/part_stat.h> +#include <linux/rw_hint.h> + +#include <linux/fscrypt.h> +#include <linux/fsverity.h> + +struct pagevec; #ifdef CONFIG_F2FS_CHECK_FS #define f2fs_bug_on(sbi, condition) BUG_ON(condition) #else #define f2fs_bug_on(sbi, condition) \ do { \ - if (unlikely(condition)) { \ - WARN_ON(1); \ + if (WARN_ON(condition)) \ set_sbi_flag(sbi, SBI_NEED_FSCK); \ - } \ } while (0) #endif -#ifdef CONFIG_F2FS_FAULT_INJECTION enum { FAULT_KMALLOC, + FAULT_KVMALLOC, FAULT_PAGE_ALLOC, + FAULT_PAGE_GET, + FAULT_ALLOC_BIO, /* it's obsolete due to bio_alloc() will never fail */ FAULT_ALLOC_NID, FAULT_ORPHAN, FAULT_BLOCK, FAULT_DIR_DEPTH, FAULT_EVICT_INODE, FAULT_TRUNCATE, - FAULT_IO, + FAULT_READ_IO, FAULT_CHECKPOINT, + FAULT_DISCARD, + FAULT_WRITE_IO, + FAULT_SLAB_ALLOC, + FAULT_DQUOT_INIT, + FAULT_LOCK_OP, + FAULT_BLKADDR_VALIDITY, + FAULT_BLKADDR_CONSISTENCE, + FAULT_NO_SEGMENT, + FAULT_INCONSISTENT_FOOTER, + FAULT_TIMEOUT, + FAULT_VMALLOC, FAULT_MAX, }; +/* indicate which option to update */ +enum fault_option { + FAULT_RATE = 1, /* only update fault rate */ + FAULT_TYPE = 2, /* only update fault type */ + FAULT_ALL = 4, /* reset all fault injection options/stats */ +}; + +#ifdef CONFIG_F2FS_FAULT_INJECTION struct f2fs_fault_info { atomic_t inject_ops; - unsigned int inject_rate; + int inject_rate; unsigned int inject_type; + /* Used to account total count of injection for each type */ + unsigned int inject_count[FAULT_MAX]; }; -extern char *fault_name[FAULT_MAX]; -#define IS_FAULT_SET(fi, type) ((fi)->inject_type & (1 << (type))) +extern const char *f2fs_fault_name[FAULT_MAX]; +#define IS_FAULT_SET(fi, type) ((fi)->inject_type & BIT(type)) + +/* maximum retry count for injected failure */ +#define DEFAULT_FAILURE_RETRY_COUNT 8 +#else +#define DEFAULT_FAILURE_RETRY_COUNT 1 #endif /* * For mount options */ -#define F2FS_MOUNT_BG_GC 0x00000001 -#define F2FS_MOUNT_DISABLE_ROLL_FORWARD 0x00000002 -#define F2FS_MOUNT_DISCARD 0x00000004 -#define F2FS_MOUNT_NOHEAP 0x00000008 -#define F2FS_MOUNT_XATTR_USER 0x00000010 -#define F2FS_MOUNT_POSIX_ACL 0x00000020 -#define F2FS_MOUNT_DISABLE_EXT_IDENTIFY 0x00000040 -#define F2FS_MOUNT_INLINE_XATTR 0x00000080 -#define F2FS_MOUNT_INLINE_DATA 0x00000100 -#define F2FS_MOUNT_INLINE_DENTRY 0x00000200 -#define F2FS_MOUNT_FLUSH_MERGE 0x00000400 -#define F2FS_MOUNT_NOBARRIER 0x00000800 -#define F2FS_MOUNT_FASTBOOT 0x00001000 -#define F2FS_MOUNT_EXTENT_CACHE 0x00002000 -#define F2FS_MOUNT_FORCE_FG_GC 0x00004000 -#define F2FS_MOUNT_DATA_FLUSH 0x00008000 -#define F2FS_MOUNT_FAULT_INJECTION 0x00010000 -#define F2FS_MOUNT_ADAPTIVE 0x00020000 -#define F2FS_MOUNT_LFS 0x00040000 -#define F2FS_MOUNT_USRQUOTA 0x00080000 -#define F2FS_MOUNT_GRPQUOTA 0x00100000 - -#define clear_opt(sbi, option) ((sbi)->mount_opt.opt &= ~F2FS_MOUNT_##option) -#define set_opt(sbi, option) ((sbi)->mount_opt.opt |= F2FS_MOUNT_##option) -#define test_opt(sbi, option) ((sbi)->mount_opt.opt & F2FS_MOUNT_##option) +enum f2fs_mount_opt { + F2FS_MOUNT_DISABLE_ROLL_FORWARD, + F2FS_MOUNT_DISCARD, + F2FS_MOUNT_NOHEAP, + F2FS_MOUNT_XATTR_USER, + F2FS_MOUNT_POSIX_ACL, + F2FS_MOUNT_DISABLE_EXT_IDENTIFY, + F2FS_MOUNT_INLINE_XATTR, + F2FS_MOUNT_INLINE_DATA, + F2FS_MOUNT_INLINE_DENTRY, + F2FS_MOUNT_FLUSH_MERGE, + F2FS_MOUNT_NOBARRIER, + F2FS_MOUNT_FASTBOOT, + F2FS_MOUNT_READ_EXTENT_CACHE, + F2FS_MOUNT_DATA_FLUSH, + F2FS_MOUNT_FAULT_INJECTION, + F2FS_MOUNT_USRQUOTA, + F2FS_MOUNT_GRPQUOTA, + F2FS_MOUNT_PRJQUOTA, + F2FS_MOUNT_QUOTA, + F2FS_MOUNT_INLINE_XATTR_SIZE, + F2FS_MOUNT_RESERVE_ROOT, + F2FS_MOUNT_DISABLE_CHECKPOINT, + F2FS_MOUNT_NORECOVERY, + F2FS_MOUNT_ATGC, + F2FS_MOUNT_MERGE_CHECKPOINT, + F2FS_MOUNT_GC_MERGE, + F2FS_MOUNT_COMPRESS_CACHE, + F2FS_MOUNT_AGE_EXTENT_CACHE, + F2FS_MOUNT_NAT_BITS, + F2FS_MOUNT_INLINECRYPT, + /* + * Some f2fs environments expect to be able to pass the "lazytime" option + * string rather than using the MS_LAZYTIME flag, so this must remain. + */ + F2FS_MOUNT_LAZYTIME, + F2FS_MOUNT_RESERVE_NODE, +}; + +#define F2FS_OPTION(sbi) ((sbi)->mount_opt) +#define clear_opt(sbi, option) \ + (F2FS_OPTION(sbi).opt &= ~BIT(F2FS_MOUNT_##option)) +#define set_opt(sbi, option) \ + (F2FS_OPTION(sbi).opt |= BIT(F2FS_MOUNT_##option)) +#define test_opt(sbi, option) \ + (F2FS_OPTION(sbi).opt & BIT(F2FS_MOUNT_##option)) #define ver_after(a, b) (typecheck(unsigned long long, a) && \ typecheck(unsigned long long, b) && \ @@ -106,19 +153,114 @@ typedef u32 block_t; /* */ typedef u32 nid_t; +#define COMPRESS_EXT_NUM 16 + +enum blkzone_allocation_policy { + BLKZONE_ALLOC_PRIOR_SEQ, /* Prioritize writing to sequential zones */ + BLKZONE_ALLOC_ONLY_SEQ, /* Only allow writing to sequential zones */ + BLKZONE_ALLOC_PRIOR_CONV, /* Prioritize writing to conventional zones */ +}; + +enum bggc_io_aware_policy { + AWARE_ALL_IO, /* skip background GC if there is any kind of pending IO */ + AWARE_READ_IO, /* skip background GC if there is pending read IO */ + AWARE_NONE, /* don't aware IO for background GC */ +}; + +enum device_allocation_policy { + ALLOCATE_FORWARD_NOHINT, + ALLOCATE_FORWARD_WITHIN_HINT, + ALLOCATE_FORWARD_FROM_HINT, +}; + +/* + * An implementation of an rwsem that is explicitly unfair to readers. This + * prevents priority inversion when a low-priority reader acquires the read lock + * while sleeping on the write lock but the write lock is needed by + * higher-priority clients. + */ + +struct f2fs_rwsem { + struct rw_semaphore internal_rwsem; +#ifdef CONFIG_F2FS_UNFAIR_RWSEM + wait_queue_head_t read_waiters; +#endif +}; + struct f2fs_mount_info { - unsigned int opt; + unsigned long long opt; + block_t root_reserved_blocks; /* root reserved blocks */ + block_t root_reserved_nodes; /* root reserved nodes */ + kuid_t s_resuid; /* reserved blocks for uid */ + kgid_t s_resgid; /* reserved blocks for gid */ + int active_logs; /* # of active logs */ + int inline_xattr_size; /* inline xattr size */ +#ifdef CONFIG_F2FS_FAULT_INJECTION + struct f2fs_fault_info fault_info; /* For fault injection */ +#endif +#ifdef CONFIG_QUOTA + /* Names of quota files with journalled quota */ + char *s_qf_names[MAXQUOTAS]; + int s_jquota_fmt; /* Format of quota to use */ +#endif + /* For which write hints are passed down to block layer */ + int alloc_mode; /* segment allocation policy */ + int fsync_mode; /* fsync policy */ + int fs_mode; /* fs mode: LFS or ADAPTIVE */ + int bggc_mode; /* bggc mode: off, on or sync */ + int memory_mode; /* memory mode */ + int errors; /* errors parameter */ + int discard_unit; /* + * discard command's offset/size should + * be aligned to this unit: block, + * segment or section + */ + struct fscrypt_dummy_policy dummy_enc_policy; /* test dummy encryption */ + block_t unusable_cap_perc; /* percentage for cap */ + block_t unusable_cap; /* Amount of space allowed to be + * unusable when disabling checkpoint + */ + + /* For compression */ + unsigned char compress_algorithm; /* algorithm type */ + unsigned char compress_log_size; /* cluster log size */ + unsigned char compress_level; /* compress level */ + bool compress_chksum; /* compressed data chksum */ + unsigned char compress_ext_cnt; /* extension count */ + unsigned char nocompress_ext_cnt; /* nocompress extension count */ + int compress_mode; /* compression mode */ + unsigned char extensions[COMPRESS_EXT_NUM][F2FS_EXTENSION_LEN]; /* extensions */ + unsigned char noextensions[COMPRESS_EXT_NUM][F2FS_EXTENSION_LEN]; /* extensions */ + unsigned int lookup_mode; }; -#define F2FS_FEATURE_ENCRYPT 0x0001 -#define F2FS_FEATURE_BLKZONED 0x0002 +#define F2FS_FEATURE_ENCRYPT 0x00000001 +#define F2FS_FEATURE_BLKZONED 0x00000002 +#define F2FS_FEATURE_ATOMIC_WRITE 0x00000004 +#define F2FS_FEATURE_EXTRA_ATTR 0x00000008 +#define F2FS_FEATURE_PRJQUOTA 0x00000010 +#define F2FS_FEATURE_INODE_CHKSUM 0x00000020 +#define F2FS_FEATURE_FLEXIBLE_INLINE_XATTR 0x00000040 +#define F2FS_FEATURE_QUOTA_INO 0x00000080 +#define F2FS_FEATURE_INODE_CRTIME 0x00000100 +#define F2FS_FEATURE_LOST_FOUND 0x00000200 +#define F2FS_FEATURE_VERITY 0x00000400 +#define F2FS_FEATURE_SB_CHKSUM 0x00000800 +#define F2FS_FEATURE_CASEFOLD 0x00001000 +#define F2FS_FEATURE_COMPRESSION 0x00002000 +#define F2FS_FEATURE_RO 0x00004000 +#define F2FS_FEATURE_DEVICE_ALIAS 0x00008000 +#define F2FS_FEATURE_PACKED_SSA 0x00010000 + +#define __F2FS_HAS_FEATURE(raw_super, mask) \ + ((raw_super->feature & cpu_to_le32(mask)) != 0) +#define F2FS_HAS_FEATURE(sbi, mask) __F2FS_HAS_FEATURE(sbi->raw_super, mask) -#define F2FS_HAS_FEATURE(sb, mask) \ - ((F2FS_SB(sb)->raw_super->feature & cpu_to_le32(mask)) != 0) -#define F2FS_SET_FEATURE(sb, mask) \ - (F2FS_SB(sb)->raw_super->feature |= cpu_to_le32(mask)) -#define F2FS_CLEAR_FEATURE(sb, mask) \ - (F2FS_SB(sb)->raw_super->feature &= ~cpu_to_le32(mask)) +/* + * Default values for user and/or group using reserved blocks + */ +#define F2FS_DEF_RESUID 0 +#define F2FS_DEF_RESGID 0 /* * For checkpoint manager @@ -134,34 +276,78 @@ enum { #define CP_RECOVERY 0x00000008 #define CP_DISCARD 0x00000010 #define CP_TRIMMED 0x00000020 - -#define DEF_BATCHED_TRIM_SECTIONS 2048 -#define BATCHED_TRIM_SEGMENTS(sbi) \ - (GET_SEG_FROM_SEC(sbi, SM_I(sbi)->trim_sections)) -#define BATCHED_TRIM_BLOCKS(sbi) \ - (BATCHED_TRIM_SEGMENTS(sbi) << (sbi)->log_blocks_per_seg) -#define MAX_DISCARD_BLOCKS(sbi) BLKS_PER_SEC(sbi) -#define DISCARD_ISSUE_RATE 8 +#define CP_PAUSE 0x00000040 +#define CP_RESIZE 0x00000080 + +#define DEF_MAX_DISCARD_REQUEST 8 /* issue 8 discards per round */ +#define DEF_MIN_DISCARD_ISSUE_TIME 50 /* 50 ms, if exists */ +#define DEF_MID_DISCARD_ISSUE_TIME 500 /* 500 ms, if device busy */ +#define DEF_MAX_DISCARD_ISSUE_TIME 60000 /* 60 s, if no candidates */ +#define DEF_DISCARD_URGENT_UTIL 80 /* do more discard over 80% */ #define DEF_CP_INTERVAL 60 /* 60 secs */ #define DEF_IDLE_INTERVAL 5 /* 5 secs */ +#define DEF_DISABLE_INTERVAL 5 /* 5 secs */ +#define DEF_ENABLE_INTERVAL 5 /* 5 secs */ +#define DEF_DISABLE_QUICK_INTERVAL 1 /* 1 secs */ +#define DEF_UMOUNT_DISCARD_TIMEOUT 5 /* 5 secs */ + +enum cp_time { + CP_TIME_START, /* begin */ + CP_TIME_LOCK, /* after cp_global_sem */ + CP_TIME_OP_LOCK, /* after block_operation */ + CP_TIME_FLUSH_META, /* after flush sit/nat */ + CP_TIME_SYNC_META, /* after sync_meta_pages */ + CP_TIME_SYNC_CP_META, /* after sync cp meta pages */ + CP_TIME_WAIT_DIRTY_META,/* after wait on dirty meta */ + CP_TIME_WAIT_CP_DATA, /* after wait on cp data */ + CP_TIME_FLUSH_DEVICE, /* after flush device cache */ + CP_TIME_WAIT_LAST_CP, /* after wait on last cp pack */ + CP_TIME_END, /* after unblock_operation */ + CP_TIME_MAX, +}; + +/* time cost stats of checkpoint */ +struct cp_stats { + ktime_t times[CP_TIME_MAX]; +}; struct cp_control { int reason; __u64 trim_start; __u64 trim_end; __u64 trim_minlen; - __u64 trimmed; + struct cp_stats stats; +}; + +enum f2fs_cp_phase { + CP_PHASE_START_BLOCK_OPS, + CP_PHASE_FINISH_BLOCK_OPS, + CP_PHASE_FINISH_CHECKPOINT, }; /* - * For CP/NAT/SIT/SSA readahead + * indicate meta/data type */ enum { META_CP, META_NAT, META_SIT, META_SSA, + META_MAX, META_POR, + DATA_GENERIC, /* check range only */ + DATA_GENERIC_ENHANCE, /* strong check on range and segment bitmap */ + DATA_GENERIC_ENHANCE_READ, /* + * strong check on range and segment + * bitmap but no warning due to race + * condition of read on truncated area + * by extent_cache + */ + DATA_GENERIC_ENHANCE_UPDATE, /* + * strong check on range and segment + * bitmap for update case + */ + META_GENERIC, }; /* for the list of ino */ @@ -169,12 +355,16 @@ enum { ORPHAN_INO, /* for orphan ino list */ APPEND_INO, /* for append ino list */ UPDATE_INO, /* for update ino list */ + TRANS_DIR_INO, /* for transactions dir ino list */ + XATTR_DIR_INO, /* for xattr updated dir ino list */ + FLUSH_INO, /* for multiple device flushing */ MAX_INO_ENTRY, /* max. list */ }; struct ino_entry { - struct list_head list; /* list head */ - nid_t ino; /* inode number */ + struct list_head list; /* list head */ + nid_t ino; /* inode number */ + unsigned int dirty_device; /* dirty device bitmap */ }; /* for the list of inodes to be GCed */ @@ -183,6 +373,38 @@ struct inode_entry { struct inode *inode; /* vfs inode pointer */ }; +struct fsync_node_entry { + struct list_head list; /* list head */ + struct folio *folio; /* warm node folio pointer */ + unsigned int seq_id; /* sequence id */ +}; + +struct ckpt_req { + struct completion wait; /* completion for checkpoint done */ + struct llist_node llnode; /* llist_node to be linked in wait queue */ + int ret; /* return code of checkpoint */ + union { + ktime_t queue_time; /* request queued time */ + ktime_t delta_time; /* time in queue */ + }; +}; + +struct ckpt_req_control { + struct task_struct *f2fs_issue_ckpt; /* checkpoint task */ + int ckpt_thread_ioprio; /* checkpoint merge thread ioprio */ + wait_queue_head_t ckpt_wait_queue; /* waiting queue for wake-up */ + atomic_t issued_ckpt; /* # of actually issued ckpts */ + atomic_t total_ckpt; /* # of total ckpts */ + atomic_t queued_ckpt; /* # of queued ckpts */ + struct llist_head issue_list; /* list for command issue */ + spinlock_t stat_lock; /* lock for below checkpoint time stats */ + unsigned int cur_time; /* cur wait time in msec for currently issued checkpoint */ + unsigned int peak_time; /* peak wait time in msec until now */ +}; + +/* a time threshold that checkpoint was blocked for, unit: ms */ +#define CP_LONG_LATENCY_THRESHOLD 5000 + /* for the bitmap indicate blocks to be discarded */ struct discard_entry { struct list_head list; /* list head */ @@ -190,15 +412,25 @@ struct discard_entry { unsigned char discard_map[SIT_VBLOCK_MAP_SIZE]; /* segment discard bitmap */ }; +/* minimum discard granularity, unit: block count */ +#define MIN_DISCARD_GRANULARITY 1 +/* default discard granularity of inner discard thread, unit: block count */ +#define DEFAULT_DISCARD_GRANULARITY 16 +/* default maximum discard granularity of ordered discard, unit: block count */ +#define DEFAULT_MAX_ORDERED_DISCARD_GRANULARITY 16 +/* default interval of periodical discard submission */ +#define DEFAULT_DISCARD_INTERVAL (msecs_to_jiffies(20)) + /* max discard pend list number */ #define MAX_PLIST_NUM 512 #define plist_idx(blk_num) ((blk_num) >= MAX_PLIST_NUM ? \ - (MAX_PLIST_NUM - 1) : (blk_num - 1)) + (MAX_PLIST_NUM - 1) : ((blk_num) - 1)) enum { - D_PREP, - D_SUBMIT, - D_DONE, + D_PREP, /* initial */ + D_PARTIAL, /* partially submitted */ + D_SUBMIT, /* all submitted */ + D_DONE, /* finished */ }; struct discard_info { @@ -209,21 +441,44 @@ struct discard_info { struct discard_cmd { struct rb_node rb_node; /* rb node located in rb-tree */ - union { - struct { - block_t lstart; /* logical start address */ - block_t len; /* length */ - block_t start; /* actual start address in dev */ - }; - struct discard_info di; /* discard info */ - - }; + struct discard_info di; /* discard info */ struct list_head list; /* command list */ - struct completion wait; /* compleation */ + struct completion wait; /* completion */ struct block_device *bdev; /* bdev */ unsigned short ref; /* reference count */ unsigned char state; /* state */ + unsigned char queued; /* queued discard */ int error; /* bio error */ + spinlock_t lock; /* for state/bio_ref updating */ + unsigned short bio_ref; /* bio reference count */ +}; + +enum { + DPOLICY_BG, + DPOLICY_FORCE, + DPOLICY_FSTRIM, + DPOLICY_UMOUNT, + MAX_DPOLICY, +}; + +enum { + DPOLICY_IO_AWARE_DISABLE, /* force to not be aware of IO */ + DPOLICY_IO_AWARE_ENABLE, /* force to be aware of IO */ + DPOLICY_IO_AWARE_MAX, +}; + +struct discard_policy { + int type; /* type of discard */ + unsigned int min_interval; /* used for candidates exist */ + unsigned int mid_interval; /* used for device busy */ + unsigned int max_interval; /* used for candidates not exist */ + unsigned int max_requests; /* # of discards issued per round */ + unsigned int io_aware_gran; /* minimum granularity discard not be aware of I/O */ + bool io_aware; /* issue discard in idle time */ + bool sync; /* submit discard with REQ_SYNC flag */ + bool ordered; /* issue discard by lba order */ + bool timeout; /* discard timeout for put_super */ + unsigned int granularity; /* discard granularity */ }; struct discard_cmd_control { @@ -231,15 +486,28 @@ struct discard_cmd_control { struct list_head entry_list; /* 4KB discard entry list */ struct list_head pend_list[MAX_PLIST_NUM];/* store pending entries */ struct list_head wait_list; /* store on-flushing entries */ + struct list_head fstrim_list; /* in-flight discard from fstrim */ wait_queue_head_t discard_wait_queue; /* waiting queue for wake-up */ struct mutex cmd_lock; unsigned int nr_discards; /* # of discards in the list */ unsigned int max_discards; /* max. discards to be issued */ + unsigned int max_discard_request; /* max. discard request per round */ + unsigned int min_discard_issue_time; /* min. interval between discard issue */ + unsigned int mid_discard_issue_time; /* mid. interval between discard issue */ + unsigned int max_discard_issue_time; /* max. interval between discard issue */ + unsigned int discard_io_aware_gran; /* minimum discard granularity not be aware of I/O */ + unsigned int discard_urgent_util; /* utilization which issue discard proactively */ + unsigned int discard_granularity; /* discard granularity */ + unsigned int max_ordered_discard; /* maximum discard granularity issued by lba order */ + unsigned int discard_io_aware; /* io_aware policy */ unsigned int undiscard_blks; /* # of undiscard blocks */ + unsigned int next_pos; /* next discard position */ atomic_t issued_discard; /* # of issued discard */ - atomic_t issing_discard; /* # of issing discard */ + atomic_t queued_discard; /* # of queued discard */ atomic_t discard_cmd_cnt; /* # of cached cmd count */ - struct rb_root root; /* root of discard rb-tree */ + struct rb_root_cached root; /* root of discard rb-tree */ + bool rbtree_check; /* config for consistence check */ + bool discard_wake; /* to wake up discard thread */ }; /* for the list of fsync inodes, used only during recovery */ @@ -285,86 +553,75 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal, return size <= MAX_SIT_JENTRIES(journal); } -/* - * ioctl commands - */ -#define F2FS_IOC_GETFLAGS FS_IOC_GETFLAGS -#define F2FS_IOC_SETFLAGS FS_IOC_SETFLAGS -#define F2FS_IOC_GETVERSION FS_IOC_GETVERSION - -#define F2FS_IOCTL_MAGIC 0xf5 -#define F2FS_IOC_START_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 1) -#define F2FS_IOC_COMMIT_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 2) -#define F2FS_IOC_START_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 3) -#define F2FS_IOC_RELEASE_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 4) -#define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5) -#define F2FS_IOC_GARBAGE_COLLECT _IOW(F2FS_IOCTL_MAGIC, 6, __u32) -#define F2FS_IOC_WRITE_CHECKPOINT _IO(F2FS_IOCTL_MAGIC, 7) -#define F2FS_IOC_DEFRAGMENT _IOWR(F2FS_IOCTL_MAGIC, 8, \ - struct f2fs_defragment) -#define F2FS_IOC_MOVE_RANGE _IOWR(F2FS_IOCTL_MAGIC, 9, \ - struct f2fs_move_range) -#define F2FS_IOC_FLUSH_DEVICE _IOW(F2FS_IOCTL_MAGIC, 10, \ - struct f2fs_flush_device) -#define F2FS_IOC_GARBAGE_COLLECT_RANGE _IOW(F2FS_IOCTL_MAGIC, 11, \ - struct f2fs_gc_range) - -#define F2FS_IOC_SET_ENCRYPTION_POLICY FS_IOC_SET_ENCRYPTION_POLICY -#define F2FS_IOC_GET_ENCRYPTION_POLICY FS_IOC_GET_ENCRYPTION_POLICY -#define F2FS_IOC_GET_ENCRYPTION_PWSALT FS_IOC_GET_ENCRYPTION_PWSALT +/* for inline stuff */ +#define DEF_INLINE_RESERVED_SIZE 1 +static inline int get_extra_isize(struct inode *inode); +static inline int get_inline_xattr_addrs(struct inode *inode); +#define MAX_INLINE_DATA(inode) (sizeof(__le32) * \ + (CUR_ADDRS_PER_INODE(inode) - \ + get_inline_xattr_addrs(inode) - \ + DEF_INLINE_RESERVED_SIZE)) + +/* for inline dir */ +#define NR_INLINE_DENTRY(inode) (MAX_INLINE_DATA(inode) * BITS_PER_BYTE / \ + ((SIZE_OF_DIR_ENTRY + F2FS_SLOT_LEN) * \ + BITS_PER_BYTE + 1)) +#define INLINE_DENTRY_BITMAP_SIZE(inode) \ + DIV_ROUND_UP(NR_INLINE_DENTRY(inode), BITS_PER_BYTE) +#define INLINE_RESERVED_SIZE(inode) (MAX_INLINE_DATA(inode) - \ + ((SIZE_OF_DIR_ENTRY + F2FS_SLOT_LEN) * \ + NR_INLINE_DENTRY(inode) + \ + INLINE_DENTRY_BITMAP_SIZE(inode))) /* - * should be same as XFS_IOC_GOINGDOWN. - * Flags for going down operation used by FS_IOC_GOINGDOWN - */ -#define F2FS_IOC_SHUTDOWN _IOR('X', 125, __u32) /* Shutdown */ -#define F2FS_GOING_DOWN_FULLSYNC 0x0 /* going down with full sync */ -#define F2FS_GOING_DOWN_METASYNC 0x1 /* going down with metadata */ -#define F2FS_GOING_DOWN_NOSYNC 0x2 /* going down */ -#define F2FS_GOING_DOWN_METAFLUSH 0x3 /* going down with meta flush */ - -#if defined(__KERNEL__) && defined(CONFIG_COMPAT) -/* - * ioctl commands in 32 bit emulation + * For INODE and NODE manager */ -#define F2FS_IOC32_GETFLAGS FS_IOC32_GETFLAGS -#define F2FS_IOC32_SETFLAGS FS_IOC32_SETFLAGS -#define F2FS_IOC32_GETVERSION FS_IOC32_GETVERSION -#endif +/* for directory operations */ -struct f2fs_gc_range { - u32 sync; - u64 start; - u64 len; -}; +struct f2fs_filename { + /* + * The filename the user specified. This is NULL for some + * filesystem-internal operations, e.g. converting an inline directory + * to a non-inline one, or roll-forward recovering an encrypted dentry. + */ + const struct qstr *usr_fname; -struct f2fs_defragment { - u64 start; - u64 len; -}; + /* + * The on-disk filename. For encrypted directories, this is encrypted. + * This may be NULL for lookups in an encrypted dir without the key. + */ + struct fscrypt_str disk_name; -struct f2fs_move_range { - u32 dst_fd; /* destination fd */ - u64 pos_in; /* start position in src_fd */ - u64 pos_out; /* start position in dst_fd */ - u64 len; /* size to move */ -}; + /* The dirhash of this filename */ + f2fs_hash_t hash; -struct f2fs_flush_device { - u32 dev_num; /* device number to flush */ - u32 segments; /* # of segments to flush */ +#ifdef CONFIG_FS_ENCRYPTION + /* + * For lookups in encrypted directories: either the buffer backing + * disk_name, or a buffer that holds the decoded no-key name. + */ + struct fscrypt_str crypto_buf; +#endif +#if IS_ENABLED(CONFIG_UNICODE) + /* + * For casefolded directories: the casefolded name, but it's left NULL + * if the original name is not valid Unicode, if the original name is + * "." or "..", if the directory is both casefolded and encrypted and + * its encryption key is unavailable, or if the filesystem is doing an + * internal operation where usr_fname is also NULL. In all these cases + * we fall back to treating the name as an opaque byte sequence. + */ + struct qstr cf_name; +#endif }; -/* - * For INODE and NODE manager - */ -/* for directory operations */ struct f2fs_dentry_ptr { struct inode *inode; - const void *bitmap; + void *bitmap; struct f2fs_dir_entry *dentry; __u8 (*filename)[F2FS_SLOT_LEN]; int max; + int nr_bitmap; }; static inline void make_dentry_ptr_block(struct inode *inode, @@ -372,19 +629,26 @@ static inline void make_dentry_ptr_block(struct inode *inode, { d->inode = inode; d->max = NR_DENTRY_IN_BLOCK; - d->bitmap = &t->dentry_bitmap; + d->nr_bitmap = SIZE_OF_DENTRY_BITMAP; + d->bitmap = t->dentry_bitmap; d->dentry = t->dentry; d->filename = t->filename; } static inline void make_dentry_ptr_inline(struct inode *inode, - struct f2fs_dentry_ptr *d, struct f2fs_inline_dentry *t) + struct f2fs_dentry_ptr *d, void *t) { + int entry_cnt = NR_INLINE_DENTRY(inode); + int bitmap_size = INLINE_DENTRY_BITMAP_SIZE(inode); + int reserved_size = INLINE_RESERVED_SIZE(inode); + d->inode = inode; - d->max = NR_INLINE_DENTRY; - d->bitmap = &t->dentry_bitmap; - d->dentry = t->dentry; - d->filename = t->filename; + d->max = entry_cnt; + d->nr_bitmap = bitmap_size; + d->bitmap = t; + d->dentry = t + bitmap_size + reserved_size; + d->filename = t + bitmap_size + reserved_size + + SIZE_OF_DIR_ENTRY * entry_cnt; } /* @@ -403,82 +667,152 @@ enum { */ }; +#define DEFAULT_RETRY_IO_COUNT 8 /* maximum retry read IO or flush count */ + +/* IO/non-IO congestion wait timeout value, default: 1ms */ +#define DEFAULT_SCHEDULE_TIMEOUT (msecs_to_jiffies(1)) + +/* timeout value injected, default: 1000ms */ +#define DEFAULT_FAULT_TIMEOUT (msecs_to_jiffies(1000)) + +/* maximum retry quota flush count */ +#define DEFAULT_RETRY_QUOTA_FLUSH_COUNT 8 + +/* maximum retry of EIO'ed page */ +#define MAX_RETRY_PAGE_EIO 100 + #define F2FS_LINK_MAX 0xffffffff /* maximum link count per file */ #define MAX_DIR_RA_PAGES 4 /* maximum ra pages of dir */ -/* vector size for gang look-up from extent cache that consists of radix tree */ -#define EXT_TREE_VEC_SIZE 64 +/* dirty segments threshold for triggering CP */ +#define DEFAULT_DIRTY_THRESHOLD 4 + +#define RECOVERY_MAX_RA_BLOCKS BIO_MAX_VECS +#define RECOVERY_MIN_RA_BLOCKS 1 + +#define F2FS_ONSTACK_PAGES 16 /* nr of onstack pages */ /* for in-memory extent cache entry */ #define F2FS_MIN_EXTENT_LEN 64 /* minimum extent length */ /* number of extent info in extent cache we try to shrink */ -#define EXTENT_CACHE_SHRINK_NUMBER 128 +#define READ_EXTENT_CACHE_SHRINK_NUMBER 128 -struct rb_entry { - struct rb_node rb_node; /* rb node located in rb-tree */ - unsigned int ofs; /* start offset of the entry */ - unsigned int len; /* length of the entry */ +/* number of age extent info in extent cache we try to shrink */ +#define AGE_EXTENT_CACHE_SHRINK_NUMBER 128 +#define LAST_AGE_WEIGHT 30 +#define SAME_AGE_REGION 1024 + +/* + * Define data block with age less than 1GB as hot data + * define data block with age less than 10GB but more than 1GB as warm data + */ +#define DEF_HOT_DATA_AGE_THRESHOLD 262144 +#define DEF_WARM_DATA_AGE_THRESHOLD 2621440 + +/* default max read extent count per inode */ +#define DEF_MAX_READ_EXTENT_COUNT 10240 + +/* extent cache type */ +enum extent_type { + EX_READ, + EX_BLOCK_AGE, + NR_EXTENT_CACHES, }; +/* + * Reserved value to mark invalid age extents, hence valid block range + * from 0 to ULLONG_MAX-1 + */ +#define F2FS_EXTENT_AGE_INVALID ULLONG_MAX + struct extent_info { unsigned int fofs; /* start offset in a file */ unsigned int len; /* length of the extent */ - u32 blk; /* start block address of the extent */ -}; - -struct extent_node { - struct rb_node rb_node; union { + /* read extent_cache */ struct { - unsigned int fofs; - unsigned int len; - u32 blk; + /* start block address of the extent */ + block_t blk; +#ifdef CONFIG_F2FS_FS_COMPRESSION + /* physical extent length of compressed blocks */ + unsigned int c_len; +#endif + }; + /* block age extent_cache */ + struct { + /* block age of the extent */ + unsigned long long age; + /* last total blocks allocated */ + unsigned long long last_blocks; }; - struct extent_info ei; /* extent info */ - }; +}; + +struct extent_node { + struct rb_node rb_node; /* rb node located in rb-tree */ + struct extent_info ei; /* extent info */ struct list_head list; /* node in global extent list of sbi */ struct extent_tree *et; /* extent tree pointer */ }; struct extent_tree { nid_t ino; /* inode number */ - struct rb_root root; /* root of extent info rb-tree */ + enum extent_type type; /* keep the extent tree type */ + struct rb_root_cached root; /* root of extent info rb-tree */ struct extent_node *cached_en; /* recently accessed extent node */ - struct extent_info largest; /* largested extent info */ struct list_head list; /* to be used by sbi->zombie_list */ rwlock_t lock; /* protect extent info rb-tree */ atomic_t node_cnt; /* # of extent node in rb-tree*/ + bool largest_updated; /* largest extent updated */ + struct extent_info largest; /* largest cached extent for EX_READ */ +}; + +struct extent_tree_info { + struct radix_tree_root extent_tree_root;/* cache extent cache entries */ + struct mutex extent_tree_lock; /* locking extent radix tree */ + struct list_head extent_list; /* lru list for shrinker */ + spinlock_t extent_lock; /* locking extent lru list */ + atomic_t total_ext_tree; /* extent tree count */ + struct list_head zombie_list; /* extent zombie tree list */ + atomic_t total_zombie_tree; /* extent zombie tree count */ + atomic_t total_ext_node; /* extent info count */ }; /* - * This structure is taken from ext4_map_blocks. - * - * Note that, however, f2fs uses NEW and MAPPED flags for f2fs_map_blocks(). + * State of block returned by f2fs_map_blocks. */ -#define F2FS_MAP_NEW (1 << BH_New) -#define F2FS_MAP_MAPPED (1 << BH_Mapped) -#define F2FS_MAP_UNWRITTEN (1 << BH_Unwritten) +#define F2FS_MAP_NEW (1U << 0) +#define F2FS_MAP_MAPPED (1U << 1) +#define F2FS_MAP_DELALLOC (1U << 2) #define F2FS_MAP_FLAGS (F2FS_MAP_NEW | F2FS_MAP_MAPPED |\ - F2FS_MAP_UNWRITTEN) + F2FS_MAP_DELALLOC) struct f2fs_map_blocks { + struct block_device *m_bdev; /* for multi-device dio */ block_t m_pblk; block_t m_lblk; unsigned int m_len; unsigned int m_flags; + unsigned long m_last_pblk; /* last allocated block, only used for DIO in LFS mode */ pgoff_t *m_next_pgofs; /* point next possible non-hole pgofs */ + pgoff_t *m_next_extent; /* point to next possible extent */ + int m_seg_type; + bool m_may_create; /* indicate it is from write path */ + bool m_multidev_dio; /* indicate it allows multi-device dio */ }; /* for flag in get_data_block */ -#define F2FS_GET_BLOCK_READ 0 -#define F2FS_GET_BLOCK_DIO 1 -#define F2FS_GET_BLOCK_FIEMAP 2 -#define F2FS_GET_BLOCK_BMAP 3 -#define F2FS_GET_BLOCK_PRE_DIO 4 -#define F2FS_GET_BLOCK_PRE_AIO 5 +enum { + F2FS_GET_BLOCK_DEFAULT, + F2FS_GET_BLOCK_FIEMAP, + F2FS_GET_BLOCK_BMAP, + F2FS_GET_BLOCK_DIO, + F2FS_GET_BLOCK_PRE_DIO, + F2FS_GET_BLOCK_PRE_AIO, + F2FS_GET_BLOCK_PRECACHE, +}; /* * i_advise uses FADVISE_XXX_BIT. We can add additional hints later. @@ -488,58 +822,164 @@ struct f2fs_map_blocks { #define FADVISE_ENCRYPT_BIT 0x04 #define FADVISE_ENC_NAME_BIT 0x08 #define FADVISE_KEEP_SIZE_BIT 0x10 +#define FADVISE_HOT_BIT 0x20 +#define FADVISE_VERITY_BIT 0x40 +#define FADVISE_TRUNC_BIT 0x80 + +#define FADVISE_MODIFIABLE_BITS (FADVISE_COLD_BIT | FADVISE_HOT_BIT) #define file_is_cold(inode) is_file(inode, FADVISE_COLD_BIT) -#define file_wrong_pino(inode) is_file(inode, FADVISE_LOST_PINO_BIT) #define file_set_cold(inode) set_file(inode, FADVISE_COLD_BIT) -#define file_lost_pino(inode) set_file(inode, FADVISE_LOST_PINO_BIT) #define file_clear_cold(inode) clear_file(inode, FADVISE_COLD_BIT) + +#define file_wrong_pino(inode) is_file(inode, FADVISE_LOST_PINO_BIT) +#define file_lost_pino(inode) set_file(inode, FADVISE_LOST_PINO_BIT) #define file_got_pino(inode) clear_file(inode, FADVISE_LOST_PINO_BIT) + #define file_is_encrypt(inode) is_file(inode, FADVISE_ENCRYPT_BIT) #define file_set_encrypt(inode) set_file(inode, FADVISE_ENCRYPT_BIT) -#define file_clear_encrypt(inode) clear_file(inode, FADVISE_ENCRYPT_BIT) + #define file_enc_name(inode) is_file(inode, FADVISE_ENC_NAME_BIT) #define file_set_enc_name(inode) set_file(inode, FADVISE_ENC_NAME_BIT) + #define file_keep_isize(inode) is_file(inode, FADVISE_KEEP_SIZE_BIT) #define file_set_keep_isize(inode) set_file(inode, FADVISE_KEEP_SIZE_BIT) +#define file_is_hot(inode) is_file(inode, FADVISE_HOT_BIT) +#define file_set_hot(inode) set_file(inode, FADVISE_HOT_BIT) +#define file_clear_hot(inode) clear_file(inode, FADVISE_HOT_BIT) + +#define file_is_verity(inode) is_file(inode, FADVISE_VERITY_BIT) +#define file_set_verity(inode) set_file(inode, FADVISE_VERITY_BIT) + +#define file_should_truncate(inode) is_file(inode, FADVISE_TRUNC_BIT) +#define file_need_truncate(inode) set_file(inode, FADVISE_TRUNC_BIT) +#define file_dont_truncate(inode) clear_file(inode, FADVISE_TRUNC_BIT) + #define DEF_DIR_LEVEL 0 +/* used for f2fs_inode_info->flags */ +enum { + FI_NEW_INODE, /* indicate newly allocated inode */ + FI_DIRTY_INODE, /* indicate inode is dirty or not */ + FI_AUTO_RECOVER, /* indicate inode is recoverable */ + FI_DIRTY_DIR, /* indicate directory has dirty pages */ + FI_INC_LINK, /* need to increment i_nlink */ + FI_ACL_MODE, /* indicate acl mode */ + FI_NO_ALLOC, /* should not allocate any blocks */ + FI_FREE_NID, /* free allocated nide */ + FI_NO_EXTENT, /* not to use the extent cache */ + FI_INLINE_XATTR, /* used for inline xattr */ + FI_INLINE_DATA, /* used for inline data*/ + FI_INLINE_DENTRY, /* used for inline dentry */ + FI_APPEND_WRITE, /* inode has appended data */ + FI_UPDATE_WRITE, /* inode has in-place-update data */ + FI_NEED_IPU, /* used for ipu per file */ + FI_ATOMIC_FILE, /* indicate atomic file */ + FI_DATA_EXIST, /* indicate data exists */ + FI_SKIP_WRITES, /* should skip data page writeback */ + FI_OPU_WRITE, /* used for opu per file */ + FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */ + FI_PREALLOCATED_ALL, /* all blocks for write were preallocated */ + FI_HOT_DATA, /* indicate file is hot */ + FI_EXTRA_ATTR, /* indicate file has extra attribute */ + FI_PROJ_INHERIT, /* indicate file inherits projectid */ + FI_PIN_FILE, /* indicate file should not be gced */ + FI_VERITY_IN_PROGRESS, /* building fs-verity Merkle tree */ + FI_COMPRESSED_FILE, /* indicate file's data can be compressed */ + FI_COMPRESS_CORRUPT, /* indicate compressed cluster is corrupted */ + FI_MMAP_FILE, /* indicate file was mmapped */ + FI_ENABLE_COMPRESS, /* enable compression in "user" compression mode */ + FI_COMPRESS_RELEASED, /* compressed blocks were released */ + FI_ALIGNED_WRITE, /* enable aligned write */ + FI_COW_FILE, /* indicate COW file */ + FI_ATOMIC_COMMITTED, /* indicate atomic commit completed except disk sync */ + FI_ATOMIC_DIRTIED, /* indicate atomic file is dirtied */ + FI_ATOMIC_REPLACE, /* indicate atomic replace */ + FI_OPENED_FILE, /* indicate file has been opened */ + FI_DONATE_FINISHED, /* indicate page donation of file has been finished */ + FI_MAX, /* max flag, never be used */ +}; + struct f2fs_inode_info { struct inode vfs_inode; /* serve a vfs inode */ unsigned long i_flags; /* keep an inode flags for ioctl */ unsigned char i_advise; /* use to give file attribute hints */ unsigned char i_dir_level; /* use for dentry level for large dir */ - unsigned int i_current_depth; /* use only in directory structure */ + union { + unsigned int i_current_depth; /* only for directory depth */ + unsigned short i_gc_failures; /* for gc failure statistic */ + }; unsigned int i_pino; /* parent inode number */ umode_t i_acl_mode; /* keep file acl mode temporarily */ /* Use below internally in f2fs*/ - unsigned long flags; /* use to pass per-file flags */ - struct rw_semaphore i_sem; /* protect fi info */ + unsigned long flags[BITS_TO_LONGS(FI_MAX)]; /* use to pass per-file flags */ + unsigned int ioprio_hint; /* hint for IO priority */ + struct f2fs_rwsem i_sem; /* protect fi info */ atomic_t dirty_pages; /* # of dirty pages */ f2fs_hash_t chash; /* hash value of given file name */ unsigned int clevel; /* maximum level of given file name */ struct task_struct *task; /* lookup and create consistency */ + struct task_struct *cp_task; /* separate cp/wb IO stats*/ + struct task_struct *wb_task; /* indicate inode is in context of writeback */ nid_t i_xattr_nid; /* node id that contains xattrs */ loff_t last_disk_size; /* lastly written file size */ + spinlock_t i_size_lock; /* protect last_disk_size */ #ifdef CONFIG_QUOTA - struct dquot *i_dquot[MAXQUOTAS]; + struct dquot __rcu *i_dquot[MAXQUOTAS]; /* quota space reservation, managed internally by quota code */ qsize_t i_reserved_quota; #endif struct list_head dirty_list; /* dirty list for dirs and files */ struct list_head gdirty_list; /* linked in global dirty list */ - struct list_head inmem_pages; /* inmemory pages managed by f2fs */ - struct mutex inmem_lock; /* lock for inmemory pages */ - struct extent_tree *extent_tree; /* cached extent_tree entry */ - struct rw_semaphore dio_rwsem[2];/* avoid racing between dio and gc */ - struct rw_semaphore i_mmap_sem; + + /* linked in global inode list for cache donation */ + struct list_head gdonate_list; + pgoff_t donate_start, donate_end; /* inclusive */ + atomic_t open_count; /* # of open files */ + + struct task_struct *atomic_write_task; /* store atomic write task */ + struct extent_tree *extent_tree[NR_EXTENT_CACHES]; + /* cached extent_tree entry */ + union { + struct inode *cow_inode; /* copy-on-write inode for atomic write */ + struct inode *atomic_inode; + /* point to atomic_inode, available only for cow_inode */ + }; + + /* avoid racing between foreground op and gc */ + struct f2fs_rwsem i_gc_rwsem[2]; + struct f2fs_rwsem i_xattr_sem; /* avoid racing between reading and changing EAs */ + + int i_extra_isize; /* size of extra space located in i_addr */ + kprojid_t i_projid; /* id for project quota */ + int i_inline_xattr_size; /* inline xattr size */ + struct timespec64 i_crtime; /* inode creation time */ + struct timespec64 i_disk_time[3];/* inode disk times */ + + /* for file compress */ + atomic_t i_compr_blocks; /* # of compressed blocks */ + unsigned char i_compress_algorithm; /* algorithm type */ + unsigned char i_log_cluster_size; /* log of cluster size */ + unsigned char i_compress_level; /* compress level (lz4hc,zstd) */ + unsigned char i_compress_flag; /* compress flag */ + unsigned int i_cluster_size; /* cluster size */ + atomic_t writeback; /* count # of writeback thread */ + + unsigned int atomic_write_cnt; + loff_t original_i_size; /* original i_size before atomic write */ +#ifdef CONFIG_FS_ENCRYPTION + struct fscrypt_inode_info *i_crypt_info; /* filesystem encryption info */ +#endif +#ifdef CONFIG_FS_VERITY + struct fsverity_info *i_verity_info; /* filesystem verity info */ +#endif }; -static inline void get_extent_info(struct extent_info *ext, +static inline void get_read_extent_info(struct extent_info *ext, struct f2fs_extent *i_ext) { ext->fofs = le32_to_cpu(i_ext->fofs); @@ -547,7 +987,7 @@ static inline void get_extent_info(struct extent_info *ext, ext->len = le32_to_cpu(i_ext->len); } -static inline void set_raw_extent(struct extent_info *ext, +static inline void set_raw_read_extent(struct extent_info *ext, struct f2fs_extent *i_ext) { i_ext->fofs = cpu_to_le32(ext->fofs); @@ -555,65 +995,39 @@ static inline void set_raw_extent(struct extent_info *ext, i_ext->len = cpu_to_le32(ext->len); } -static inline void set_extent_info(struct extent_info *ei, unsigned int fofs, - u32 blk, unsigned int len) -{ - ei->fofs = fofs; - ei->blk = blk; - ei->len = len; -} - static inline bool __is_discard_mergeable(struct discard_info *back, - struct discard_info *front) + struct discard_info *front, unsigned int max_len) { - return back->lstart + back->len == front->lstart; + return (back->lstart + back->len == front->lstart) && + (back->len + front->len <= max_len); } static inline bool __is_discard_back_mergeable(struct discard_info *cur, - struct discard_info *back) + struct discard_info *back, unsigned int max_len) { - return __is_discard_mergeable(back, cur); + return __is_discard_mergeable(back, cur, max_len); } static inline bool __is_discard_front_mergeable(struct discard_info *cur, - struct discard_info *front) -{ - return __is_discard_mergeable(cur, front); -} - -static inline bool __is_extent_mergeable(struct extent_info *back, - struct extent_info *front) -{ - return (back->fofs + back->len == front->fofs && - back->blk + back->len == front->blk); -} - -static inline bool __is_back_mergeable(struct extent_info *cur, - struct extent_info *back) -{ - return __is_extent_mergeable(back, cur); -} - -static inline bool __is_front_mergeable(struct extent_info *cur, - struct extent_info *front) + struct discard_info *front, unsigned int max_len) { - return __is_extent_mergeable(cur, front); + return __is_discard_mergeable(cur, front, max_len); } -extern void f2fs_mark_inode_dirty_sync(struct inode *inode, bool sync); -static inline void __try_update_largest_extent(struct inode *inode, - struct extent_tree *et, struct extent_node *en) -{ - if (en->ei.len > et->largest.len) { - et->largest = en->ei; - f2fs_mark_inode_dirty_sync(inode, true); - } -} +/* + * For free nid management + */ +enum nid_state { + FREE_NID, /* newly added to free nid list */ + PREALLOC_NID, /* it is preallocated */ + MAX_NID_STATE, +}; -enum nid_list { - FREE_NID_LIST, - ALLOC_NID_LIST, - MAX_NID_LIST, +enum nat_state { + TOTAL_NAT, + DIRTY_NAT, + RECLAIMABLE_NAT, + MAX_NAT_STATE, }; struct f2fs_nm_info { @@ -621,6 +1035,7 @@ struct f2fs_nm_info { nid_t max_nid; /* maximum possible node ids */ nid_t available_nids; /* # of available node ids */ nid_t next_scan_nid; /* the next nid to be scanned */ + nid_t max_rf_node_blocks; /* max # of nodes for recovery */ unsigned int ram_thresh; /* control the memory footprint */ unsigned int ra_nid_pages; /* # of nid pages to be readaheaded */ unsigned int dirty_nats_ratio; /* control dirty nats ratio threshold */ @@ -628,19 +1043,19 @@ struct f2fs_nm_info { /* NAT cache management */ struct radix_tree_root nat_root;/* root of the nat entry cache */ struct radix_tree_root nat_set_root;/* root of the nat set cache */ - struct rw_semaphore nat_tree_lock; /* protect nat_tree_lock */ + struct f2fs_rwsem nat_tree_lock; /* protect nat entry tree */ struct list_head nat_entries; /* cached nat entry list (clean) */ - unsigned int nat_cnt; /* the # of cached nat entries */ - unsigned int dirty_nat_cnt; /* total num of nat entries in set */ + spinlock_t nat_list_lock; /* protect clean nat entry list */ + unsigned int nat_cnt[MAX_NAT_STATE]; /* the # of cached nat entries */ unsigned int nat_blocks; /* # of nat blocks */ /* free node ids management */ struct radix_tree_root free_nid_root;/* root of the free_nid cache */ - struct list_head nid_list[MAX_NID_LIST];/* lists for free nids */ - unsigned int nid_cnt[MAX_NID_LIST]; /* the number of free node id */ + struct list_head free_nid_list; /* list for free nids excluding preallocated nids */ + unsigned int nid_cnt[MAX_NID_STATE]; /* the number of free node id */ spinlock_t nid_list_lock; /* protect nid lists ops */ struct mutex build_lock; /* lock for build free nids */ - unsigned char (*free_nid_bitmap)[NAT_ENTRY_BITMAP_SIZE]; + unsigned char **free_nid_bitmap; unsigned char *nat_block_bitmap; unsigned short *free_nid_count; /* free nid count of NAT block */ @@ -664,11 +1079,11 @@ struct f2fs_nm_info { */ struct dnode_of_data { struct inode *inode; /* vfs inode pointer */ - struct page *inode_page; /* its inode page, NULL is possible */ - struct page *node_page; /* cached direct node page */ + struct folio *inode_folio; /* its inode folio, NULL is possible */ + struct folio *node_folio; /* cached direct node folio */ nid_t nid; /* node id of the direct node block */ unsigned int ofs_in_node; /* data offset in the node page */ - bool inode_page_locked; /* inode page is locked or not */ + bool inode_folio_locked; /* inode folio is locked or not */ bool node_changed; /* is node block changed */ char cur_level; /* level of hole node page */ char max_level; /* level of current page located */ @@ -676,12 +1091,12 @@ struct dnode_of_data { }; static inline void set_new_dnode(struct dnode_of_data *dn, struct inode *inode, - struct page *ipage, struct page *npage, nid_t nid) + struct folio *ifolio, struct folio *nfolio, nid_t nid) { memset(dn, 0, sizeof(*dn)); dn->inode = inode; - dn->inode_page = ipage; - dn->node_page = npage; + dn->inode_folio = ifolio; + dn->node_folio = nfolio; dn->nid = nid; } @@ -700,21 +1115,29 @@ static inline void set_new_dnode(struct dnode_of_data *dn, struct inode *inode, */ #define NR_CURSEG_DATA_TYPE (3) #define NR_CURSEG_NODE_TYPE (3) -#define NR_CURSEG_TYPE (NR_CURSEG_DATA_TYPE + NR_CURSEG_NODE_TYPE) +#define NR_CURSEG_INMEM_TYPE (2) +#define NR_CURSEG_RO_TYPE (2) +#define NR_CURSEG_PERSIST_TYPE (NR_CURSEG_DATA_TYPE + NR_CURSEG_NODE_TYPE) +#define NR_CURSEG_TYPE (NR_CURSEG_INMEM_TYPE + NR_CURSEG_PERSIST_TYPE) -enum { +enum log_type { CURSEG_HOT_DATA = 0, /* directory entry blocks */ CURSEG_WARM_DATA, /* data blocks */ CURSEG_COLD_DATA, /* multimedia or GCed data blocks */ CURSEG_HOT_NODE, /* direct node blocks of directory files */ CURSEG_WARM_NODE, /* direct node blocks of normal files */ CURSEG_COLD_NODE, /* indirect node blocks */ - NO_CHECK_TYPE, + NR_PERSISTENT_LOG, /* number of persistent log */ + CURSEG_COLD_DATA_PINNED = NR_PERSISTENT_LOG, + /* pinned file that needs consecutive block address */ + CURSEG_ALL_DATA_ATGC, /* SSR alloctor in hot/warm/cold data area */ + NO_CHECK_TYPE, /* number of persistent & inmem log */ }; struct flush_cmd { struct completion wait; struct llist_node llnode; + nid_t ino; int ret; }; @@ -722,7 +1145,7 @@ struct flush_cmd_control { struct task_struct *f2fs_issue_flush; /* flush thread */ wait_queue_head_t flush_wait_queue; /* waiting queue for wake-up */ atomic_t issued_flush; /* # of issued flushes */ - atomic_t issing_flush; /* # of issing flushes */ + atomic_t queued_flush; /* # of queued flushes */ struct llist_head issue_list; /* list for command issue */ struct llist_node *dispatch_list; /* list for command dispatch */ }; @@ -733,6 +1156,8 @@ struct f2fs_sm_info { struct dirty_seglist_info *dirty_info; /* dirty segment information */ struct curseg_info *curseg_array; /* active segment information */ + struct f2fs_rwsem curseg_lock; /* for preventing curseg change */ + block_t seg0_blkaddr; /* block address of 0'th segment */ block_t main_blkaddr; /* start block address of main area */ block_t ssa_blkaddr; /* start block address of SSA area */ @@ -745,15 +1170,14 @@ struct f2fs_sm_info { /* a threshold to reclaim prefree segments */ unsigned int rec_prefree_segments; - /* for batched trimming */ - unsigned int trim_sections; /* # of sections to trim */ - struct list_head sit_entry_set; /* sit entry set list */ unsigned int ipu_policy; /* in-place-update policy */ unsigned int min_ipu_util; /* in-place-update threshold */ unsigned int min_fsync_blocks; /* threshold for fsync */ + unsigned int min_seq_blocks; /* threshold for sequential blocks */ unsigned int min_hot_blocks; /* threshold for hot block allocation */ + unsigned int min_ssr_sections; /* threshold to trigger SSR allocation */ /* for flush command control */ struct flush_cmd_control *fcc_info; @@ -771,16 +1195,22 @@ struct f2fs_sm_info { * f2fs monitors the number of several block types such as on-writeback, * dirty dentry blocks, dirty node blocks, and dirty meta blocks. */ -#define WB_DATA_TYPE(p) (__is_cp_guaranteed(p) ? F2FS_WB_CP_DATA : F2FS_WB_DATA) +#define WB_DATA_TYPE(folio, f) \ + (f || f2fs_is_cp_guaranteed(folio) ? F2FS_WB_CP_DATA : F2FS_WB_DATA) enum count_type { F2FS_DIRTY_DENTS, F2FS_DIRTY_DATA, + F2FS_DIRTY_QDATA, F2FS_DIRTY_NODES, F2FS_DIRTY_META, - F2FS_INMEM_PAGES, F2FS_DIRTY_IMETA, F2FS_WB_CP_DATA, F2FS_WB_DATA, + F2FS_RD_DATA, + F2FS_RD_NODE, + F2FS_RD_META, + F2FS_DIO_WRITE, + F2FS_DIO_READ, NR_COUNT_TYPE, }; @@ -796,17 +1226,14 @@ enum count_type { * ... Only can be used with META. */ #define PAGE_TYPE_OF_BIO(type) ((type) > META ? META : (type)) +#define PAGE_TYPE_ON_MAIN(type) ((type) == DATA || (type) == NODE) enum page_type { - DATA, - NODE, + DATA = 0, + NODE = 1, /* should not change this */ META, NR_PAGE_TYPE, META_FLUSH, - INMEM, /* the below types are used by tracepoints only. */ - INMEM_DROP, - INMEM_INVALIDATE, - INMEM_REVOKE, - IPU, + IPU, /* the below types are used by tracepoints only. */ OPU, }; @@ -823,20 +1250,92 @@ enum need_lock_type { LOCK_RETRY, }; +enum cp_reason_type { + CP_NO_NEEDED, + CP_NON_REGULAR, + CP_COMPRESSED, + CP_HARDLINK, + CP_SB_NEED_CP, + CP_WRONG_PINO, + CP_NO_SPC_ROLL, + CP_NODE_NEED_CP, + CP_FASTBOOT_MODE, + CP_SPEC_LOG_NUM, + CP_RECOVER_DIR, + CP_XATTR_DIR, +}; + +enum iostat_type { + /* WRITE IO */ + APP_DIRECT_IO, /* app direct write IOs */ + APP_BUFFERED_IO, /* app buffered write IOs */ + APP_WRITE_IO, /* app write IOs */ + APP_MAPPED_IO, /* app mapped IOs */ + APP_BUFFERED_CDATA_IO, /* app buffered write IOs on compressed file */ + APP_MAPPED_CDATA_IO, /* app mapped write IOs on compressed file */ + FS_DATA_IO, /* data IOs from kworker/fsync/reclaimer */ + FS_CDATA_IO, /* data IOs from kworker/fsync/reclaimer on compressed file */ + FS_NODE_IO, /* node IOs from kworker/fsync/reclaimer */ + FS_META_IO, /* meta IOs from kworker/reclaimer */ + FS_GC_DATA_IO, /* data IOs from forground gc */ + FS_GC_NODE_IO, /* node IOs from forground gc */ + FS_CP_DATA_IO, /* data IOs from checkpoint */ + FS_CP_NODE_IO, /* node IOs from checkpoint */ + FS_CP_META_IO, /* meta IOs from checkpoint */ + + /* READ IO */ + APP_DIRECT_READ_IO, /* app direct read IOs */ + APP_BUFFERED_READ_IO, /* app buffered read IOs */ + APP_READ_IO, /* app read IOs */ + APP_MAPPED_READ_IO, /* app mapped read IOs */ + APP_BUFFERED_CDATA_READ_IO, /* app buffered read IOs on compressed file */ + APP_MAPPED_CDATA_READ_IO, /* app mapped read IOs on compressed file */ + FS_DATA_READ_IO, /* data read IOs */ + FS_GDATA_READ_IO, /* data read IOs from background gc */ + FS_CDATA_READ_IO, /* compressed data read IOs */ + FS_NODE_READ_IO, /* node read IOs */ + FS_META_READ_IO, /* meta read IOs */ + + /* other */ + FS_DISCARD_IO, /* discard */ + FS_FLUSH_IO, /* flush */ + FS_ZONE_RESET_IO, /* zone reset */ + NR_IO_TYPE, +}; + struct f2fs_io_info { struct f2fs_sb_info *sbi; /* f2fs_sb_info pointer */ + nid_t ino; /* inode number */ enum page_type type; /* contains DATA/NODE/META/META_FLUSH */ enum temp_type temp; /* contains HOT/WARM/COLD */ - int op; /* contains REQ_OP_ */ - int op_flags; /* req_flag_bits */ + enum req_op op; /* contains REQ_OP_ */ + blk_opf_t op_flags; /* req_flag_bits */ block_t new_blkaddr; /* new block address to be written */ block_t old_blkaddr; /* old block address before Cow */ - struct page *page; /* page to be written */ + union { + struct page *page; /* page to be written */ + struct folio *folio; + }; struct page *encrypted_page; /* encrypted page */ + struct page *compressed_page; /* compressed page */ struct list_head list; /* serialize IOs */ - bool submitted; /* indicate IO submission */ - int need_lock; /* indicate we need to lock cp_rwsem */ - bool in_list; /* indicate fio is in io_list */ + unsigned int compr_blocks; /* # of compressed block addresses */ + unsigned int need_lock:8; /* indicate we need to lock cp_rwsem */ + unsigned int version:8; /* version of the node */ + unsigned int submitted:1; /* indicate IO submission */ + unsigned int in_list:1; /* indicate fio is in io_list */ + unsigned int is_por:1; /* indicate IO is from recovery or not */ + unsigned int encrypted:1; /* indicate file is encrypted */ + unsigned int meta_gc:1; /* require meta inode GC */ + enum iostat_type io_type; /* io type */ + struct writeback_control *io_wbc; /* writeback control */ + struct bio **bio; /* bio for ipu */ + sector_t *last_block; /* last block number in bio */ +}; + +struct bio_entry { + struct bio *bio; + struct list_head list; }; #define is_read_io(rw) ((rw) == READ) @@ -845,22 +1344,30 @@ struct f2fs_bio_info { struct bio *bio; /* bios to merge */ sector_t last_block_in_bio; /* last block number */ struct f2fs_io_info fio; /* store buffered io info. */ - struct rw_semaphore io_rwsem; /* blocking op for bio */ +#ifdef CONFIG_BLK_DEV_ZONED + struct completion zone_wait; /* condition value for the previous open zone to close */ + struct bio *zone_pending_bio; /* pending bio for the previous zone */ + void *bi_private; /* previous bi_private for pending bio */ +#endif + struct f2fs_rwsem io_rwsem; /* blocking op for bio */ spinlock_t io_lock; /* serialize DATA/NODE IOs */ struct list_head io_list; /* track fios */ + struct list_head bio_list; /* bio entry list head */ + struct f2fs_rwsem bio_list_lock; /* lock to protect bio entry list */ }; #define FDEV(i) (sbi->devs[i]) #define RDEV(i) (raw_super->devs[i]) struct f2fs_dev_info { + struct file *bdev_file; struct block_device *bdev; - char path[MAX_PATH_LEN]; + char path[MAX_PATH_LEN + 1]; unsigned int total_segments; block_t start_blk; block_t end_blk; #ifdef CONFIG_BLK_DEV_ZONED - unsigned int nr_blkz; /* Total number of zones */ - u8 *blkz_type; /* Array of zones type */ + unsigned int nr_blkz; /* Total number of zones */ + unsigned long *blkz_seq; /* Bitmap indicating sequential zones */ #endif }; @@ -868,6 +1375,7 @@ enum inode_type { DIR_INODE, /* for dirty dir inode */ FILE_INODE, /* for dirty regular/symlink inode */ DIRTY_META, /* for all dirtied inode metadata */ + DONATE_INODE, /* for all inode to donate pages */ NR_INODE_TYPE, }; @@ -879,7 +1387,32 @@ struct inode_management { unsigned long ino_num; /* number of entries */ }; -/* For s_flag in struct f2fs_sb_info */ +/* for GC_AT */ +struct atgc_management { + bool atgc_enabled; /* ATGC is enabled or not */ + struct rb_root_cached root; /* root of victim rb-tree */ + struct list_head victim_list; /* linked with all victim entries */ + unsigned int victim_count; /* victim count in rb-tree */ + unsigned int candidate_ratio; /* candidate ratio */ + unsigned int max_candidate_count; /* max candidate count */ + unsigned int age_weight; /* age weight, vblock_weight = 100 - age_weight */ + unsigned long long age_threshold; /* age threshold */ +}; + +struct f2fs_gc_control { + unsigned int victim_segno; /* target victim segment number */ + int init_gc_type; /* FG_GC or BG_GC */ + bool no_bg_gc; /* check the space and stop bg_gc */ + bool should_migrate_blocks; /* should migrate blocks */ + bool err_gc_skipped; /* return EAGAIN if GC skipped */ + bool one_time; /* require one time GC in one migration unit */ + unsigned int nr_free_secs; /* # of free sections to do GC */ +}; + +/* + * For s_flag in struct f2fs_sb_info + * Modification on enum should be synchronized with s_flag array + */ enum { SBI_IS_DIRTY, /* dirty flag for checkpoint */ SBI_IS_CLOSE, /* specify unmounting */ @@ -887,24 +1420,272 @@ enum { SBI_POR_DOING, /* recovery is doing or not */ SBI_NEED_SB_WRITE, /* need to recover superblock */ SBI_NEED_CP, /* need to checkpoint */ + SBI_IS_SHUTDOWN, /* shutdown by ioctl */ + SBI_IS_RECOVERED, /* recovered orphan/data */ + SBI_CP_DISABLED, /* CP was disabled last mount */ + SBI_CP_DISABLED_QUICK, /* CP was disabled quickly */ + SBI_QUOTA_NEED_FLUSH, /* need to flush quota info in CP */ + SBI_QUOTA_SKIP_FLUSH, /* skip flushing quota in current CP */ + SBI_QUOTA_NEED_REPAIR, /* quota file may be corrupted */ + SBI_IS_RESIZEFS, /* resizefs is in process */ + SBI_IS_FREEZING, /* freezefs is in process */ + SBI_IS_WRITABLE, /* remove ro mountoption transiently */ + MAX_SBI_FLAG, }; enum { CP_TIME, REQ_TIME, + DISCARD_TIME, + GC_TIME, + DISABLE_TIME, + ENABLE_TIME, + UMOUNT_DISCARD_TIMEOUT, MAX_TIME, }; +/* Note that you need to keep synchronization with this gc_mode_names array */ +enum { + GC_NORMAL, + GC_IDLE_CB, + GC_IDLE_GREEDY, + GC_IDLE_AT, + GC_URGENT_HIGH, + GC_URGENT_LOW, + GC_URGENT_MID, + MAX_GC_MODE, +}; + +enum { + BGGC_MODE_ON, /* background gc is on */ + BGGC_MODE_OFF, /* background gc is off */ + BGGC_MODE_SYNC, /* + * background gc is on, migrating blocks + * like foreground gc + */ +}; + +enum { + FS_MODE_ADAPTIVE, /* use both lfs/ssr allocation */ + FS_MODE_LFS, /* use lfs allocation only */ + FS_MODE_FRAGMENT_SEG, /* segment fragmentation mode */ + FS_MODE_FRAGMENT_BLK, /* block fragmentation mode */ +}; + +enum { + ALLOC_MODE_DEFAULT, /* stay default */ + ALLOC_MODE_REUSE, /* reuse segments as much as possible */ +}; + +enum fsync_mode { + FSYNC_MODE_POSIX, /* fsync follows posix semantics */ + FSYNC_MODE_STRICT, /* fsync behaves in line with ext4 */ + FSYNC_MODE_NOBARRIER, /* fsync behaves nobarrier based on posix */ +}; + +enum { + COMPR_MODE_FS, /* + * automatically compress compression + * enabled files + */ + COMPR_MODE_USER, /* + * automatical compression is disabled. + * user can control the file compression + * using ioctls + */ +}; + +enum { + DISCARD_UNIT_BLOCK, /* basic discard unit is block */ + DISCARD_UNIT_SEGMENT, /* basic discard unit is segment */ + DISCARD_UNIT_SECTION, /* basic discard unit is section */ +}; + +enum { + MEMORY_MODE_NORMAL, /* memory mode for normal devices */ + MEMORY_MODE_LOW, /* memory mode for low memory devices */ +}; + +enum errors_option { + MOUNT_ERRORS_READONLY, /* remount fs ro on errors */ + MOUNT_ERRORS_CONTINUE, /* continue on errors */ + MOUNT_ERRORS_PANIC, /* panic on errors */ +}; + +enum { + BACKGROUND, + FOREGROUND, + MAX_CALL_TYPE, + TOTAL_CALL = FOREGROUND, +}; + +enum f2fs_lookup_mode { + LOOKUP_PERF, + LOOKUP_COMPAT, + LOOKUP_AUTO, +}; + +static inline int f2fs_test_bit(unsigned int nr, char *addr); +static inline void f2fs_set_bit(unsigned int nr, char *addr); +static inline void f2fs_clear_bit(unsigned int nr, char *addr); + +/* + * Layout of f2fs page.private: + * + * Layout A: lowest bit should be 1 + * | bit0 = 1 | bit1 | bit2 | ... | bit MAX | private data .... | + * bit 0 PAGE_PRIVATE_NOT_POINTER + * bit 1 PAGE_PRIVATE_ONGOING_MIGRATION + * bit 2 PAGE_PRIVATE_INLINE_INODE + * bit 3 PAGE_PRIVATE_REF_RESOURCE + * bit 4 PAGE_PRIVATE_ATOMIC_WRITE + * bit 5- f2fs private data + * + * Layout B: lowest bit should be 0 + * page.private is a wrapped pointer. + */ +enum { + PAGE_PRIVATE_NOT_POINTER, /* private contains non-pointer data */ + PAGE_PRIVATE_ONGOING_MIGRATION, /* data page which is on-going migrating */ + PAGE_PRIVATE_INLINE_INODE, /* inode page contains inline data */ + PAGE_PRIVATE_REF_RESOURCE, /* dirty page has referenced resources */ + PAGE_PRIVATE_ATOMIC_WRITE, /* data page from atomic write path */ + PAGE_PRIVATE_MAX +}; + +/* For compression */ +enum compress_algorithm_type { + COMPRESS_LZO, + COMPRESS_LZ4, + COMPRESS_ZSTD, + COMPRESS_LZORLE, + COMPRESS_MAX, +}; + +enum compress_flag { + COMPRESS_CHKSUM, + COMPRESS_MAX_FLAG, +}; + +#define COMPRESS_WATERMARK 20 +#define COMPRESS_PERCENT 20 + +#define COMPRESS_DATA_RESERVED_SIZE 4 +struct compress_data { + __le32 clen; /* compressed data size */ + __le32 chksum; /* compressed data checksum */ + __le32 reserved[COMPRESS_DATA_RESERVED_SIZE]; /* reserved */ + u8 cdata[]; /* compressed data */ +}; + +#define COMPRESS_HEADER_SIZE (sizeof(struct compress_data)) + +#define F2FS_COMPRESSED_PAGE_MAGIC 0xF5F2C000 + +#define F2FS_ZSTD_DEFAULT_CLEVEL 1 + +#define COMPRESS_LEVEL_OFFSET 8 + +/* compress context */ +struct compress_ctx { + struct inode *inode; /* inode the context belong to */ + pgoff_t cluster_idx; /* cluster index number */ + unsigned int cluster_size; /* page count in cluster */ + unsigned int log_cluster_size; /* log of cluster size */ + struct page **rpages; /* pages store raw data in cluster */ + unsigned int nr_rpages; /* total page number in rpages */ + struct page **cpages; /* pages store compressed data in cluster */ + unsigned int nr_cpages; /* total page number in cpages */ + unsigned int valid_nr_cpages; /* valid page number in cpages */ + void *rbuf; /* virtual mapped address on rpages */ + struct compress_data *cbuf; /* virtual mapped address on cpages */ + size_t rlen; /* valid data length in rbuf */ + size_t clen; /* valid data length in cbuf */ + void *private; /* payload buffer for specified compression algorithm */ + void *private2; /* extra payload buffer */ +}; + +/* compress context for write IO path */ +struct compress_io_ctx { + u32 magic; /* magic number to indicate page is compressed */ + struct inode *inode; /* inode the context belong to */ + struct page **rpages; /* pages store raw data in cluster */ + unsigned int nr_rpages; /* total page number in rpages */ + atomic_t pending_pages; /* in-flight compressed page count */ +}; + +/* Context for decompressing one cluster on the read IO path */ +struct decompress_io_ctx { + u32 magic; /* magic number to indicate page is compressed */ + struct inode *inode; /* inode the context belong to */ + struct f2fs_sb_info *sbi; /* f2fs_sb_info pointer */ + pgoff_t cluster_idx; /* cluster index number */ + unsigned int cluster_size; /* page count in cluster */ + unsigned int log_cluster_size; /* log of cluster size */ + struct page **rpages; /* pages store raw data in cluster */ + unsigned int nr_rpages; /* total page number in rpages */ + struct page **cpages; /* pages store compressed data in cluster */ + unsigned int nr_cpages; /* total page number in cpages */ + struct page **tpages; /* temp pages to pad holes in cluster */ + void *rbuf; /* virtual mapped address on rpages */ + struct compress_data *cbuf; /* virtual mapped address on cpages */ + size_t rlen; /* valid data length in rbuf */ + size_t clen; /* valid data length in cbuf */ + + /* + * The number of compressed pages remaining to be read in this cluster. + * This is initially nr_cpages. It is decremented by 1 each time a page + * has been read (or failed to be read). When it reaches 0, the cluster + * is decompressed (or an error is reported). + * + * If an error occurs before all the pages have been submitted for I/O, + * then this will never reach 0. In this case the I/O submitter is + * responsible for calling f2fs_decompress_end_io() instead. + */ + atomic_t remaining_pages; + + /* + * Number of references to this decompress_io_ctx. + * + * One reference is held for I/O completion. This reference is dropped + * after the pagecache pages are updated and unlocked -- either after + * decompression (and verity if enabled), or after an error. + * + * In addition, each compressed page holds a reference while it is in a + * bio. These references are necessary prevent compressed pages from + * being freed while they are still in a bio. + */ + refcount_t refcnt; + + bool failed; /* IO error occurred before decompression? */ + bool need_verity; /* need fs-verity verification after decompression? */ + unsigned char compress_algorithm; /* backup algorithm type */ + void *private; /* payload buffer for specified decompression algorithm */ + void *private2; /* extra payload buffer */ + struct work_struct verity_work; /* work to verify the decompressed pages */ + struct work_struct free_work; /* work for late free this structure itself */ +}; + +#define NULL_CLUSTER ((unsigned int)(~0)) +#define MIN_COMPRESS_LOG_SIZE 2 +#define MAX_COMPRESS_LOG_SIZE 8 +#define MAX_COMPRESS_WINDOW_SIZE(log_size) ((PAGE_SIZE) << (log_size)) + struct f2fs_sb_info { struct super_block *sb; /* pointer to VFS super block */ struct proc_dir_entry *s_proc; /* proc entry */ struct f2fs_super_block *raw_super; /* raw super block pointer */ + struct f2fs_rwsem sb_lock; /* lock for raw super block */ int valid_super_block; /* valid super block no */ unsigned long s_flag; /* flags for sbi */ + struct mutex writepages; /* mutex for writepages() */ #ifdef CONFIG_BLK_DEV_ZONED unsigned int blocks_per_blkz; /* F2FS blocks per zone */ - unsigned int log_blocks_per_blkz; /* log2 F2FS blocks per zone */ + unsigned int unusable_blocks_per_sec; /* unusable blocks per section */ + unsigned int max_open_zones; /* max open zone resources of the zoned device */ + /* For adjust the priority writing position of data in zone UFS */ + unsigned int blkzone_alloc_policy; #endif /* for node-related operations */ @@ -916,25 +1697,33 @@ struct f2fs_sb_info { /* for bio operations */ struct f2fs_bio_info *write_io[NR_PAGE_TYPE]; /* for write bios */ - struct mutex wio_mutex[NR_PAGE_TYPE - 1][NR_TEMP_TYPE]; - /* bio ordering for NODE/DATA */ - int write_io_size_bits; /* Write IO size bits */ - mempool_t *write_io_dummy; /* Dummy pages */ + /* keep migration IO order for LFS mode */ + struct f2fs_rwsem io_order_lock; + pgoff_t page_eio_ofs[NR_PAGE_TYPE]; /* EIO page offset */ + int page_eio_cnt[NR_PAGE_TYPE]; /* EIO count */ /* for checkpoint */ struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */ int cur_cp_pack; /* remain current cp pack */ spinlock_t cp_lock; /* for flag in ckpt */ struct inode *meta_inode; /* cache meta blocks */ - struct mutex cp_mutex; /* checkpoint procedure lock */ - struct rw_semaphore cp_rwsem; /* blocking FS operations */ - struct rw_semaphore node_write; /* locking node writes */ - struct rw_semaphore node_change; /* locking node change */ + struct f2fs_rwsem cp_global_sem; /* checkpoint procedure lock */ + struct f2fs_rwsem cp_rwsem; /* blocking FS operations */ + struct f2fs_rwsem node_write; /* locking node writes */ + struct f2fs_rwsem node_change; /* locking node change */ wait_queue_head_t cp_wait; unsigned long last_time[MAX_TIME]; /* to store time in jiffies */ long interval_time[MAX_TIME]; /* to store thresholds */ + struct ckpt_req_control cprc_info; /* for checkpoint request control */ + struct cp_stats cp_stats; /* for time stat of checkpoint */ + struct f2fs_rwsem cp_enable_rwsem; /* block cache/dio write */ - struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */ + struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */ + + spinlock_t fsync_node_lock; /* for node entry lock */ + struct list_head fsync_node_list; /* node list head */ + unsigned int fsync_seg_id; /* sequence id */ + unsigned int fsync_node_num; /* number of node entries */ /* for orphan inode, use 0'th array */ unsigned int max_orphans; /* max orphan inodes */ @@ -942,16 +1731,20 @@ struct f2fs_sb_info { /* for inode management */ struct list_head inode_list[NR_INODE_TYPE]; /* dirty inode list */ spinlock_t inode_lock[NR_INODE_TYPE]; /* for dirty inode list lock */ + struct mutex flush_lock; /* for flush exclusion */ /* for extent tree cache */ - struct radix_tree_root extent_tree_root;/* cache extent cache entries */ - struct mutex extent_tree_lock; /* locking extent radix tree */ - struct list_head extent_list; /* lru list for shrinker */ - spinlock_t extent_lock; /* locking extent lru list */ - atomic_t total_ext_tree; /* extent tree count */ - struct list_head zombie_list; /* extent zombie tree list */ - atomic_t total_zombie_tree; /* extent zombie tree count */ - atomic_t total_ext_node; /* extent info count */ + struct extent_tree_info extent_tree[NR_EXTENT_CACHES]; + atomic64_t allocated_data_blocks; /* for block age extent_cache */ + unsigned int max_read_extent_count; /* max read extent count per inode */ + + /* The threshold used for hot and warm data seperation*/ + unsigned int hot_data_age_threshold; + unsigned int warm_data_age_threshold; + unsigned int last_age_weight; + + /* control donate caches */ + unsigned int donate_files; /* basic filesystem units */ unsigned int log_sectors_per_block; /* log2 sectors per block */ @@ -967,25 +1760,33 @@ struct f2fs_sb_info { unsigned int total_sections; /* total section count */ unsigned int total_node_count; /* total node block count */ unsigned int total_valid_node_count; /* valid node block count */ - loff_t max_file_blocks; /* max block index of file */ - int active_logs; /* # of active logs */ int dir_level; /* directory level */ + bool readdir_ra; /* readahead inode in readdir */ + u64 max_io_bytes; /* max io bytes to merge IOs */ block_t user_block_count; /* # of user blocks */ block_t total_valid_block_count; /* # of valid blocks */ block_t discard_blks; /* discard command candidats */ block_t last_valid_block_count; /* for recovery */ block_t reserved_blocks; /* configurable reserved blocks */ + block_t current_reserved_blocks; /* current reserved blocks */ + + /* Additional tracking for no checkpoint mode */ + block_t unusable_block_count; /* # of blocks saved by last cp */ - u32 s_next_generation; /* for NFS support */ + unsigned int nquota_files; /* # of quota sysfile */ + struct f2fs_rwsem quota_sem; /* blocking cp for flags */ + struct task_struct *umount_lock_holder; /* s_umount lock holder */ /* # of pages, see count_type */ atomic_t nr_pages[NR_COUNT_TYPE]; /* # of allocated blocks */ struct percpu_counter alloc_valid_block_count; + /* # of node block writes as roll forward recovery */ + struct percpu_counter rf_node_block_count; /* writeback control */ - atomic_t wb_sync_req; /* count # of WB_SYNC threads */ + atomic_t wb_sync_req[META]; /* count # of WB_SYNC threads */ /* valid inode count */ struct percpu_counter total_valid_inode_count; @@ -993,15 +1794,35 @@ struct f2fs_sb_info { struct f2fs_mount_info mount_opt; /* mount options */ /* for cleaning operations */ - struct mutex gc_mutex; /* mutex for GC */ + struct f2fs_rwsem gc_lock; /* + * semaphore for GC, avoid + * race between GC and GC or CP + */ struct f2fs_gc_kthread *gc_thread; /* GC thread */ + struct atgc_management am; /* atgc management */ unsigned int cur_victim_sec; /* current victim section num */ + unsigned int gc_mode; /* current GC state */ + unsigned int next_victim_seg[2]; /* next segment in victim section */ + spinlock_t gc_remaining_trials_lock; + /* remaining trial count for GC_URGENT_* and GC_IDLE_* */ + unsigned int gc_remaining_trials; + + /* for skip statistic */ + unsigned long long skipped_gc_rwsem; /* FG_GC only */ - /* threshold for converting bg victims for fg */ - u64 fggc_threshold; + /* free sections reserved for pinned file */ + unsigned int reserved_pin_section; + + /* threshold for gc trials on pinned files */ + unsigned short gc_pin_file_threshold; + struct f2fs_rwsem pin_sem; /* maximum # of trials to find a victim segment for SSR and GC */ unsigned int max_victim_search; + /* migration granularity of garbage collection, unit: segment */ + unsigned int migration_granularity; + /* migration window granularity of garbage collection, unit: segment */ + unsigned int migration_window_granularity; /* * for stat information. @@ -1009,57 +1830,170 @@ struct f2fs_sb_info { */ #ifdef CONFIG_F2FS_STAT_FS struct f2fs_stat_info *stat_info; /* FS status information */ + atomic_t meta_count[META_MAX]; /* # of meta blocks */ unsigned int segment_count[2]; /* # of allocated segments */ unsigned int block_count[2]; /* # of allocated blocks */ atomic_t inplace_count; /* # of inplace update */ - atomic64_t total_hit_ext; /* # of lookup extent cache */ - atomic64_t read_hit_rbtree; /* # of hit rbtree extent node */ - atomic64_t read_hit_largest; /* # of hit largest extent node */ - atomic64_t read_hit_cached; /* # of hit cached extent node */ + /* # of lookup extent cache */ + atomic64_t total_hit_ext[NR_EXTENT_CACHES]; + /* # of hit rbtree extent node */ + atomic64_t read_hit_rbtree[NR_EXTENT_CACHES]; + /* # of hit cached extent node */ + atomic64_t read_hit_cached[NR_EXTENT_CACHES]; + /* # of hit largest extent node in read extent cache */ + atomic64_t read_hit_largest; atomic_t inline_xattr; /* # of inline_xattr inodes */ atomic_t inline_inode; /* # of inline_data inodes */ atomic_t inline_dir; /* # of inline_dentry inodes */ - atomic_t aw_cnt; /* # of atomic writes */ - atomic_t vw_cnt; /* # of volatile writes */ + atomic_t compr_inode; /* # of compressed inodes */ + atomic64_t compr_blocks; /* # of compressed blocks */ + atomic_t swapfile_inode; /* # of swapfile inodes */ + atomic_t atomic_files; /* # of opened atomic file */ atomic_t max_aw_cnt; /* max # of atomic writes */ - atomic_t max_vw_cnt; /* max # of volatile writes */ - int bg_gc; /* background gc calls */ + unsigned int io_skip_bggc; /* skip background gc for in-flight IO */ + unsigned int other_skip_bggc; /* skip background gc for other reasons */ unsigned int ndirty_inode[NR_INODE_TYPE]; /* # of dirty inodes */ + atomic_t cp_call_count[MAX_CALL_TYPE]; /* # of cp call */ #endif spinlock_t stat_lock; /* lock for stat operations */ - /* For sysfs suppport */ - struct kobject s_kobj; + /* to attach REQ_META|REQ_FUA flags */ + unsigned int data_io_flag; + unsigned int node_io_flag; + + /* For sysfs support */ + struct kobject s_kobj; /* /sys/fs/f2fs/<devname> */ struct completion s_kobj_unregister; + struct kobject s_stat_kobj; /* /sys/fs/f2fs/<devname>/stat */ + struct completion s_stat_kobj_unregister; + + struct kobject s_feature_list_kobj; /* /sys/fs/f2fs/<devname>/feature_list */ + struct completion s_feature_list_kobj_unregister; + /* For shrinker support */ struct list_head s_list; - int s_ndevs; /* number of devices */ - struct f2fs_dev_info *devs; /* for device list */ struct mutex umount_mutex; unsigned int shrinker_run_no; + /* For multi devices */ + int s_ndevs; /* number of devices */ + struct f2fs_dev_info *devs; /* for device list */ + unsigned int dirty_device; /* for checkpoint data flush */ + spinlock_t dev_lock; /* protect dirty_device */ + bool aligned_blksize; /* all devices has the same logical blksize */ + unsigned int first_seq_zone_segno; /* first segno in sequential zone */ + unsigned int bggc_io_aware; /* For adjust the BG_GC priority when pending IO */ + unsigned int allocate_section_hint; /* the boundary position between devices */ + unsigned int allocate_section_policy; /* determine the section writing priority */ + /* For write statistics */ u64 sectors_written_start; u64 kbytes_written; - /* Reference to checksum algorithm driver via cryptoapi */ - struct crypto_shash *s_chksum_driver; + /* Precomputed FS UUID checksum for seeding other checksums */ + __u32 s_chksum_seed; - /* For fault injection */ -#ifdef CONFIG_F2FS_FAULT_INJECTION - struct f2fs_fault_info fault_info; + struct workqueue_struct *post_read_wq; /* post read workqueue */ + + /* + * If we are in irq context, let's update error information into + * on-disk superblock in the work. + */ + struct work_struct s_error_work; + unsigned char errors[MAX_F2FS_ERRORS]; /* error flags */ + unsigned char stop_reason[MAX_STOP_REASON]; /* stop reason */ + spinlock_t error_lock; /* protect errors/stop_reason array */ + bool error_dirty; /* errors of sb is dirty */ + + /* For reclaimed segs statistics per each GC mode */ + unsigned int gc_segment_mode; /* GC state for reclaimed segments */ + unsigned int gc_reclaimed_segs[MAX_GC_MODE]; /* Reclaimed segs for each mode */ + + unsigned long seq_file_ra_mul; /* multiplier for ra_pages of seq. files in fadvise */ + + int max_fragment_chunk; /* max chunk size for block fragmentation mode */ + int max_fragment_hole; /* max hole size for block fragmentation mode */ + + /* For atomic write statistics */ + atomic64_t current_atomic_write; + s64 peak_atomic_write; + u64 committed_atomic_block; + u64 revoked_atomic_block; + + /* carve out reserved_blocks from total blocks */ + bool carve_out; + +#ifdef CONFIG_F2FS_FS_COMPRESSION + struct kmem_cache *page_array_slab; /* page array entry */ + unsigned int page_array_slab_size; /* default page array slab size */ + + /* For runtime compression statistics */ + u64 compr_written_block; + u64 compr_saved_block; + u32 compr_new_inode; + + /* For compressed block cache */ + struct inode *compress_inode; /* cache compressed blocks */ + unsigned int compress_percent; /* cache page percentage */ + unsigned int compress_watermark; /* cache page watermark */ + atomic_t compress_page_hit; /* cache hit count */ +#endif + +#ifdef CONFIG_F2FS_IOSTAT + /* For app/fs IO statistics */ + spinlock_t iostat_lock; + unsigned long long iostat_count[NR_IO_TYPE]; + unsigned long long iostat_bytes[NR_IO_TYPE]; + unsigned long long prev_iostat_bytes[NR_IO_TYPE]; + bool iostat_enable; + unsigned long iostat_next_period; + unsigned int iostat_period_ms; + + /* For io latency related statistics info in one iostat period */ + spinlock_t iostat_lat_lock; + struct iostat_lat_info *iostat_io_lat; #endif }; +/* Definitions to access f2fs_sb_info */ +#define SEGS_TO_BLKS(sbi, segs) \ + ((segs) << (sbi)->log_blocks_per_seg) +#define BLKS_TO_SEGS(sbi, blks) \ + ((blks) >> (sbi)->log_blocks_per_seg) + +#define BLKS_PER_SEG(sbi) ((sbi)->blocks_per_seg) +#define BLKS_PER_SEC(sbi) (SEGS_TO_BLKS(sbi, (sbi)->segs_per_sec)) +#define SEGS_PER_SEC(sbi) ((sbi)->segs_per_sec) + +__printf(3, 4) +void f2fs_printk(struct f2fs_sb_info *sbi, bool limit_rate, const char *fmt, ...); + +#define f2fs_err(sbi, fmt, ...) \ + f2fs_printk(sbi, false, KERN_ERR fmt, ##__VA_ARGS__) +#define f2fs_warn(sbi, fmt, ...) \ + f2fs_printk(sbi, false, KERN_WARNING fmt, ##__VA_ARGS__) +#define f2fs_notice(sbi, fmt, ...) \ + f2fs_printk(sbi, false, KERN_NOTICE fmt, ##__VA_ARGS__) +#define f2fs_info(sbi, fmt, ...) \ + f2fs_printk(sbi, false, KERN_INFO fmt, ##__VA_ARGS__) +#define f2fs_debug(sbi, fmt, ...) \ + f2fs_printk(sbi, false, KERN_DEBUG fmt, ##__VA_ARGS__) + +#define f2fs_err_ratelimited(sbi, fmt, ...) \ + f2fs_printk(sbi, true, KERN_ERR fmt, ##__VA_ARGS__) +#define f2fs_warn_ratelimited(sbi, fmt, ...) \ + f2fs_printk(sbi, true, KERN_WARNING fmt, ##__VA_ARGS__) +#define f2fs_info_ratelimited(sbi, fmt, ...) \ + f2fs_printk(sbi, true, KERN_INFO fmt, ##__VA_ARGS__) + #ifdef CONFIG_F2FS_FAULT_INJECTION -#define f2fs_show_injection_info(type) \ - printk("%sF2FS-fs : inject %s in %s of %pF\n", \ - KERN_INFO, fault_name[type], \ - __func__, __builtin_return_address(0)) -static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type) +#define time_to_inject(sbi, type) __time_to_inject(sbi, type, __func__, \ + __builtin_return_address(0)) +static inline bool __time_to_inject(struct f2fs_sb_info *sbi, int type, + const char *func, const char *parent_func) { - struct f2fs_fault_info *ffi = &sbi->fault_info; + struct f2fs_fault_info *ffi = &F2FS_OPTION(sbi).fault_info; if (!ffi->inject_rate) return false; @@ -1070,71 +2004,82 @@ static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type) atomic_inc(&ffi->inject_ops); if (atomic_read(&ffi->inject_ops) >= ffi->inject_rate) { atomic_set(&ffi->inject_ops, 0); + ffi->inject_count[type]++; + f2fs_info_ratelimited(sbi, "inject %s in %s of %pS", + f2fs_fault_name[type], func, parent_func); return true; } return false; } +#else +static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type) +{ + return false; +} #endif -/* For write statistics. Suppose sector size is 512 bytes, - * and the return value is in kbytes. s is of struct f2fs_sb_info. +/* + * Test if the mounted volume is a multi-device volume. + * - For a single regular disk volume, sbi->s_ndevs is 0. + * - For a single zoned disk volume, sbi->s_ndevs is 1. + * - For a multi-device volume, sbi->s_ndevs is always 2 or more. */ -#define BD_PART_WRITTEN(s) \ -(((u64)part_stat_read((s)->sb->s_bdev->bd_part, sectors[1]) - \ - (s)->sectors_written_start) >> 1) +static inline bool f2fs_is_multi_device(struct f2fs_sb_info *sbi) +{ + return sbi->s_ndevs > 1; +} static inline void f2fs_update_time(struct f2fs_sb_info *sbi, int type) { - sbi->last_time[type] = jiffies; + unsigned long now = jiffies; + + sbi->last_time[type] = now; + + /* DISCARD_TIME and GC_TIME are based on REQ_TIME */ + if (type == REQ_TIME) { + sbi->last_time[DISCARD_TIME] = now; + sbi->last_time[GC_TIME] = now; + } } static inline bool f2fs_time_over(struct f2fs_sb_info *sbi, int type) { - struct timespec ts = {sbi->interval_time[type], 0}; - unsigned long interval = timespec_to_jiffies(&ts); + unsigned long interval = sbi->interval_time[type] * HZ; return time_after(jiffies, sbi->last_time[type] + interval); } -static inline bool is_idle(struct f2fs_sb_info *sbi) +static inline unsigned int f2fs_time_to_wait(struct f2fs_sb_info *sbi, + int type) { - struct block_device *bdev = sbi->sb->s_bdev; - struct request_queue *q = bdev_get_queue(bdev); - struct request_list *rl = &q->root_rl; + unsigned long interval = sbi->interval_time[type] * HZ; + unsigned int wait_ms = 0; + long delta; - if (rl->count[BLK_RW_SYNC] || rl->count[BLK_RW_ASYNC]) - return 0; + delta = (sbi->last_time[type] + interval) - jiffies; + if (delta > 0) + wait_ms = jiffies_to_msecs(delta); - return f2fs_time_over(sbi, REQ_TIME); + return wait_ms; } /* * Inline functions */ -static inline u32 f2fs_crc32(struct f2fs_sb_info *sbi, const void *address, - unsigned int length) +static inline u32 __f2fs_crc32(u32 crc, const void *address, + unsigned int length) { - SHASH_DESC_ON_STACK(shash, sbi->s_chksum_driver); - u32 *ctx = (u32 *)shash_desc_ctx(shash); - u32 retval; - int err; - - shash->tfm = sbi->s_chksum_driver; - shash->flags = 0; - *ctx = F2FS_SUPER_MAGIC; - - err = crypto_shash_update(shash, address, length); - BUG_ON(err); + return crc32(crc, address, length); +} - retval = *ctx; - barrier_data(ctx); - return retval; +static inline u32 f2fs_crc32(const void *address, unsigned int length) +{ + return __f2fs_crc32(F2FS_SUPER_MAGIC, address, length); } -static inline bool f2fs_crc_valid(struct f2fs_sb_info *sbi, __u32 blk_crc, - void *buf, size_t buf_size) +static inline u32 f2fs_chksum(u32 crc, const void *address, unsigned int length) { - return f2fs_crc32(sbi, buf, buf_size) == blk_crc; + return __f2fs_crc32(crc, address, length); } static inline struct f2fs_inode_info *F2FS_I(struct inode *inode) @@ -1157,9 +2102,9 @@ static inline struct f2fs_sb_info *F2FS_M_SB(struct address_space *mapping) return F2FS_I_SB(mapping->host); } -static inline struct f2fs_sb_info *F2FS_P_SB(struct page *page) +static inline struct f2fs_sb_info *F2FS_F_SB(const struct folio *folio) { - return F2FS_M_SB(page->mapping); + return F2FS_M_SB(folio->mapping); } static inline struct f2fs_super_block *F2FS_RAW_SUPER(struct f2fs_sb_info *sbi) @@ -1167,19 +2112,29 @@ static inline struct f2fs_super_block *F2FS_RAW_SUPER(struct f2fs_sb_info *sbi) return (struct f2fs_super_block *)(sbi->raw_super); } +static inline struct f2fs_super_block *F2FS_SUPER_BLOCK(struct folio *folio, + pgoff_t index) +{ + pgoff_t idx_in_folio = index % folio_nr_pages(folio); + + return (struct f2fs_super_block *) + (page_address(folio_page(folio, idx_in_folio)) + + F2FS_SUPER_OFFSET); +} + static inline struct f2fs_checkpoint *F2FS_CKPT(struct f2fs_sb_info *sbi) { return (struct f2fs_checkpoint *)(sbi->ckpt); } -static inline struct f2fs_node *F2FS_NODE(struct page *page) +static inline struct f2fs_node *F2FS_NODE(const struct folio *folio) { - return (struct f2fs_node *)page_address(page); + return (struct f2fs_node *)folio_address(folio); } -static inline struct f2fs_inode *F2FS_INODE(struct page *page) +static inline struct f2fs_inode *F2FS_INODE(const struct folio *folio) { - return &((struct f2fs_node *)page_address(page))->i; + return &((struct f2fs_node *)folio_address(folio))->i; } static inline struct f2fs_nm_info *NM_I(struct f2fs_sb_info *sbi) @@ -1217,6 +2172,16 @@ static inline struct address_space *NODE_MAPPING(struct f2fs_sb_info *sbi) return sbi->node_inode->i_mapping; } +static inline bool is_meta_folio(struct folio *folio) +{ + return folio->mapping == META_MAPPING(F2FS_F_SB(folio)); +} + +static inline bool is_node_folio(struct folio *folio) +{ + return folio->mapping == NODE_MAPPING(F2FS_F_SB(folio)); +} + static inline bool is_sbi_flag_set(struct f2fs_sb_info *sbi, unsigned int type) { return test_bit(type, &sbi->s_flag); @@ -1237,6 +2202,13 @@ static inline unsigned long long cur_cp_version(struct f2fs_checkpoint *cp) return le64_to_cpu(cp->checkpoint_ver); } +static inline unsigned long f2fs_qf_ino(struct super_block *sb, int type) +{ + if (type < F2FS_MAX_QUOTAS) + return le32_to_cpu(F2FS_SB(sb)->raw_super->qf_ino[type]); + return 0; +} + static inline __u64 cur_cp_crc(struct f2fs_checkpoint *cp) { size_t crc_offset = le32_to_cpu(cp->checksum_offset); @@ -1291,19 +2263,104 @@ static inline void clear_ckpt_flags(struct f2fs_sb_info *sbi, unsigned int f) spin_unlock_irqrestore(&sbi->cp_lock, flags); } +#define init_f2fs_rwsem(sem) \ +do { \ + static struct lock_class_key __key; \ + \ + __init_f2fs_rwsem((sem), #sem, &__key); \ +} while (0) + +static inline void __init_f2fs_rwsem(struct f2fs_rwsem *sem, + const char *sem_name, struct lock_class_key *key) +{ + __init_rwsem(&sem->internal_rwsem, sem_name, key); +#ifdef CONFIG_F2FS_UNFAIR_RWSEM + init_waitqueue_head(&sem->read_waiters); +#endif +} + +static inline int f2fs_rwsem_is_locked(struct f2fs_rwsem *sem) +{ + return rwsem_is_locked(&sem->internal_rwsem); +} + +static inline int f2fs_rwsem_is_contended(struct f2fs_rwsem *sem) +{ + return rwsem_is_contended(&sem->internal_rwsem); +} + +static inline void f2fs_down_read(struct f2fs_rwsem *sem) +{ +#ifdef CONFIG_F2FS_UNFAIR_RWSEM + wait_event(sem->read_waiters, down_read_trylock(&sem->internal_rwsem)); +#else + down_read(&sem->internal_rwsem); +#endif +} + +static inline int f2fs_down_read_trylock(struct f2fs_rwsem *sem) +{ + return down_read_trylock(&sem->internal_rwsem); +} + +static inline void f2fs_up_read(struct f2fs_rwsem *sem) +{ + up_read(&sem->internal_rwsem); +} + +static inline void f2fs_down_write(struct f2fs_rwsem *sem) +{ + down_write(&sem->internal_rwsem); +} + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +static inline void f2fs_down_read_nested(struct f2fs_rwsem *sem, int subclass) +{ + down_read_nested(&sem->internal_rwsem, subclass); +} + +static inline void f2fs_down_write_nested(struct f2fs_rwsem *sem, int subclass) +{ + down_write_nested(&sem->internal_rwsem, subclass); +} +#else +#define f2fs_down_read_nested(sem, subclass) f2fs_down_read(sem) +#define f2fs_down_write_nested(sem, subclass) f2fs_down_write(sem) +#endif + +static inline int f2fs_down_write_trylock(struct f2fs_rwsem *sem) +{ + return down_write_trylock(&sem->internal_rwsem); +} + +static inline void f2fs_up_write(struct f2fs_rwsem *sem) +{ + up_write(&sem->internal_rwsem); +#ifdef CONFIG_F2FS_UNFAIR_RWSEM + wake_up_all(&sem->read_waiters); +#endif +} + static inline void disable_nat_bits(struct f2fs_sb_info *sbi, bool lock) { unsigned long flags; + unsigned char *nat_bits; - set_sbi_flag(sbi, SBI_NEED_FSCK); + /* + * In order to re-enable nat_bits we need to call fsck.f2fs by + * set_sbi_flag(sbi, SBI_NEED_FSCK). But it may give huge cost, + * so let's rely on regular fsck or unclean shutdown. + */ if (lock) spin_lock_irqsave(&sbi->cp_lock, flags); __clear_ckpt_flags(F2FS_CKPT(sbi), CP_NAT_BITS_FLAG); - kfree(NM_I(sbi)->nat_bits); + nat_bits = NM_I(sbi)->nat_bits; NM_I(sbi)->nat_bits = NULL; if (lock) spin_unlock_irqrestore(&sbi->cp_lock, flags); + + kvfree(nat_bits); } static inline bool enabled_nat_bits(struct f2fs_sb_info *sbi, @@ -1316,27 +2373,29 @@ static inline bool enabled_nat_bits(struct f2fs_sb_info *sbi, static inline void f2fs_lock_op(struct f2fs_sb_info *sbi) { - down_read(&sbi->cp_rwsem); + f2fs_down_read(&sbi->cp_rwsem); } static inline int f2fs_trylock_op(struct f2fs_sb_info *sbi) { - return down_read_trylock(&sbi->cp_rwsem); + if (time_to_inject(sbi, FAULT_LOCK_OP)) + return 0; + return f2fs_down_read_trylock(&sbi->cp_rwsem); } static inline void f2fs_unlock_op(struct f2fs_sb_info *sbi) { - up_read(&sbi->cp_rwsem); + f2fs_up_read(&sbi->cp_rwsem); } static inline void f2fs_lock_all(struct f2fs_sb_info *sbi) { - down_write(&sbi->cp_rwsem); + f2fs_down_write(&sbi->cp_rwsem); } static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi) { - up_write(&sbi->cp_rwsem); + f2fs_up_write(&sbi->cp_rwsem); } static inline int __get_cp_reason(struct f2fs_sb_info *sbi) @@ -1362,18 +2421,6 @@ static inline bool __exist_node_summaries(struct f2fs_sb_info *sbi) } /* - * Check whether the given nid is within node id range. - */ -static inline int check_nid_range(struct f2fs_sb_info *sbi, nid_t nid) -{ - if (unlikely(nid < F2FS_ROOT_INO(sbi))) - return -EINVAL; - if (unlikely(nid >= NM_I(sbi)->max_nid)) - return -EINVAL; - return 0; -} - -/* * Check whether the inode has blocks or not */ static inline int F2FS_HAS_BLOCKS(struct inode *inode) @@ -1388,11 +2435,49 @@ static inline bool f2fs_has_xattr_block(unsigned int ofs) return ofs == XATTR_NODE_OFFSET; } +static inline bool __allow_reserved_root(struct f2fs_sb_info *sbi, + struct inode *inode, bool cap) +{ + if (!inode) + return true; + if (IS_NOQUOTA(inode)) + return true; + if (uid_eq(F2FS_OPTION(sbi).s_resuid, current_fsuid())) + return true; + if (!gid_eq(F2FS_OPTION(sbi).s_resgid, GLOBAL_ROOT_GID) && + in_group_p(F2FS_OPTION(sbi).s_resgid)) + return true; + if (cap && capable(CAP_SYS_RESOURCE)) + return true; + return false; +} + +static inline unsigned int get_available_block_count(struct f2fs_sb_info *sbi, + struct inode *inode, bool cap) +{ + block_t avail_user_block_count; + + avail_user_block_count = sbi->user_block_count - + sbi->current_reserved_blocks; + + if (test_opt(sbi, RESERVE_ROOT) && !__allow_reserved_root(sbi, inode, cap)) + avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks; + + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { + if (avail_user_block_count > sbi->unusable_block_count) + avail_user_block_count -= sbi->unusable_block_count; + else + avail_user_block_count = 0; + } + + return avail_user_block_count; +} + static inline void f2fs_i_blocks_write(struct inode *, block_t, bool, bool); static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, - struct inode *inode, blkcnt_t *count) + struct inode *inode, blkcnt_t *count, bool partial) { - blkcnt_t diff = 0, release = 0; + long long diff = 0, release = 0; block_t avail_user_block_count; int ret; @@ -1400,13 +2485,11 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, if (ret) return ret; -#ifdef CONFIG_F2FS_FAULT_INJECTION if (time_to_inject(sbi, FAULT_BLOCK)) { - f2fs_show_injection_info(FAULT_BLOCK); release = *count; - goto enospc; + goto release_quota; } -#endif + /* * let's increase this in prior to actual block count change in order * for f2fs_sync_file to avoid data races when deciding checkpoint. @@ -1414,31 +2497,130 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, percpu_counter_add(&sbi->alloc_valid_block_count, (*count)); spin_lock(&sbi->stat_lock); - sbi->total_valid_block_count += (block_t)(*count); - avail_user_block_count = sbi->user_block_count - sbi->reserved_blocks; - if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) { - diff = sbi->total_valid_block_count - avail_user_block_count; + + avail_user_block_count = get_available_block_count(sbi, inode, true); + diff = (long long)sbi->total_valid_block_count + *count - + avail_user_block_count; + if (unlikely(diff > 0)) { + if (!partial) { + spin_unlock(&sbi->stat_lock); + release = *count; + goto enospc; + } + if (diff > *count) + diff = *count; *count -= diff; release = diff; - sbi->total_valid_block_count = avail_user_block_count; if (!*count) { spin_unlock(&sbi->stat_lock); - percpu_counter_sub(&sbi->alloc_valid_block_count, diff); goto enospc; } } + sbi->total_valid_block_count += (block_t)(*count); + spin_unlock(&sbi->stat_lock); - if (release) + if (unlikely(release)) { + percpu_counter_sub(&sbi->alloc_valid_block_count, release); dquot_release_reservation_block(inode, release); + } f2fs_i_blocks_write(inode, *count, true, true); return 0; enospc: + percpu_counter_sub(&sbi->alloc_valid_block_count, release); +release_quota: dquot_release_reservation_block(inode, release); return -ENOSPC; } +#define PAGE_PRIVATE_GET_FUNC(name, flagname) \ +static inline bool folio_test_f2fs_##name(const struct folio *folio) \ +{ \ + unsigned long priv = (unsigned long)folio->private; \ + unsigned long v = (1UL << PAGE_PRIVATE_NOT_POINTER) | \ + (1UL << PAGE_PRIVATE_##flagname); \ + return (priv & v) == v; \ +} \ +static inline bool page_private_##name(struct page *page) \ +{ \ + return PagePrivate(page) && \ + test_bit(PAGE_PRIVATE_NOT_POINTER, &page_private(page)) && \ + test_bit(PAGE_PRIVATE_##flagname, &page_private(page)); \ +} + +#define PAGE_PRIVATE_SET_FUNC(name, flagname) \ +static inline void folio_set_f2fs_##name(struct folio *folio) \ +{ \ + unsigned long v = (1UL << PAGE_PRIVATE_NOT_POINTER) | \ + (1UL << PAGE_PRIVATE_##flagname); \ + if (!folio->private) \ + folio_attach_private(folio, (void *)v); \ + else { \ + v |= (unsigned long)folio->private; \ + folio->private = (void *)v; \ + } \ +} \ +static inline void set_page_private_##name(struct page *page) \ +{ \ + if (!PagePrivate(page)) \ + attach_page_private(page, (void *)0); \ + set_bit(PAGE_PRIVATE_NOT_POINTER, &page_private(page)); \ + set_bit(PAGE_PRIVATE_##flagname, &page_private(page)); \ +} + +#define PAGE_PRIVATE_CLEAR_FUNC(name, flagname) \ +static inline void folio_clear_f2fs_##name(struct folio *folio) \ +{ \ + unsigned long v = (unsigned long)folio->private; \ + \ + v &= ~(1UL << PAGE_PRIVATE_##flagname); \ + if (v == (1UL << PAGE_PRIVATE_NOT_POINTER)) \ + folio_detach_private(folio); \ + else \ + folio->private = (void *)v; \ +} \ +static inline void clear_page_private_##name(struct page *page) \ +{ \ + clear_bit(PAGE_PRIVATE_##flagname, &page_private(page)); \ + if (page_private(page) == BIT(PAGE_PRIVATE_NOT_POINTER)) \ + detach_page_private(page); \ +} + +PAGE_PRIVATE_GET_FUNC(nonpointer, NOT_POINTER); +PAGE_PRIVATE_GET_FUNC(inline, INLINE_INODE); +PAGE_PRIVATE_GET_FUNC(gcing, ONGOING_MIGRATION); +PAGE_PRIVATE_GET_FUNC(atomic, ATOMIC_WRITE); + +PAGE_PRIVATE_SET_FUNC(reference, REF_RESOURCE); +PAGE_PRIVATE_SET_FUNC(inline, INLINE_INODE); +PAGE_PRIVATE_SET_FUNC(gcing, ONGOING_MIGRATION); +PAGE_PRIVATE_SET_FUNC(atomic, ATOMIC_WRITE); + +PAGE_PRIVATE_CLEAR_FUNC(reference, REF_RESOURCE); +PAGE_PRIVATE_CLEAR_FUNC(inline, INLINE_INODE); +PAGE_PRIVATE_CLEAR_FUNC(gcing, ONGOING_MIGRATION); +PAGE_PRIVATE_CLEAR_FUNC(atomic, ATOMIC_WRITE); + +static inline unsigned long folio_get_f2fs_data(struct folio *folio) +{ + unsigned long data = (unsigned long)folio->private; + + if (!test_bit(PAGE_PRIVATE_NOT_POINTER, &data)) + return 0; + return data >> PAGE_PRIVATE_MAX; +} + +static inline void folio_set_f2fs_data(struct folio *folio, unsigned long data) +{ + data = (1UL << PAGE_PRIVATE_NOT_POINTER) | (data << PAGE_PRIVATE_MAX); + + if (!folio_test_private(folio)) + folio_attach_private(folio, (void *)data); + else + folio->private = (void *)((unsigned long)folio->private | data); +} + static inline void dec_valid_block_count(struct f2fs_sb_info *sbi, struct inode *inode, block_t count) @@ -1446,10 +2628,27 @@ static inline void dec_valid_block_count(struct f2fs_sb_info *sbi, blkcnt_t sectors = count << F2FS_LOG_SECTORS_PER_BLOCK; spin_lock(&sbi->stat_lock); - f2fs_bug_on(sbi, sbi->total_valid_block_count < (block_t) count); - f2fs_bug_on(sbi, inode->i_blocks < sectors); - sbi->total_valid_block_count -= (block_t)count; + if (unlikely(sbi->total_valid_block_count < count)) { + f2fs_warn(sbi, "Inconsistent total_valid_block_count:%u, ino:%lu, count:%u", + sbi->total_valid_block_count, inode->i_ino, count); + sbi->total_valid_block_count = 0; + set_sbi_flag(sbi, SBI_NEED_FSCK); + } else { + sbi->total_valid_block_count -= count; + } + if (sbi->reserved_blocks && + sbi->current_reserved_blocks < sbi->reserved_blocks) + sbi->current_reserved_blocks = min(sbi->reserved_blocks, + sbi->current_reserved_blocks + count); spin_unlock(&sbi->stat_lock); + if (unlikely(inode->i_blocks < sectors)) { + f2fs_warn(sbi, "Inconsistent i_blocks, ino:%lu, iblocks:%llu, sectors:%llu", + inode->i_ino, + (unsigned long long)inode->i_blocks, + (unsigned long long)sectors); + set_sbi_flag(sbi, SBI_NEED_FSCK); + return; + } f2fs_i_blocks_write(inode, count, false, true); } @@ -1457,11 +2656,12 @@ static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type) { atomic_inc(&sbi->nr_pages[count_type]); - if (count_type == F2FS_DIRTY_DATA || count_type == F2FS_INMEM_PAGES || - count_type == F2FS_WB_CP_DATA || count_type == F2FS_WB_DATA) - return; - - set_sbi_flag(sbi, SBI_IS_DIRTY); + if (count_type == F2FS_DIRTY_DENTS || + count_type == F2FS_DIRTY_NODES || + count_type == F2FS_DIRTY_META || + count_type == F2FS_DIRTY_QDATA || + count_type == F2FS_DIRTY_IMETA) + set_sbi_flag(sbi, SBI_IS_DIRTY); } static inline void inode_inc_dirty_pages(struct inode *inode) @@ -1469,6 +2669,8 @@ static inline void inode_inc_dirty_pages(struct inode *inode) atomic_inc(&F2FS_I(inode)->dirty_pages); inc_page_count(F2FS_I_SB(inode), S_ISDIR(inode->i_mode) ? F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA); + if (IS_NOQUOTA(inode)) + inc_page_count(F2FS_I_SB(inode), F2FS_DIRTY_QDATA); } static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type) @@ -1485,6 +2687,30 @@ static inline void inode_dec_dirty_pages(struct inode *inode) atomic_dec(&F2FS_I(inode)->dirty_pages); dec_page_count(F2FS_I_SB(inode), S_ISDIR(inode->i_mode) ? F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA); + if (IS_NOQUOTA(inode)) + dec_page_count(F2FS_I_SB(inode), F2FS_DIRTY_QDATA); +} + +static inline void inc_atomic_write_cnt(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct f2fs_inode_info *fi = F2FS_I(inode); + u64 current_write; + + fi->atomic_write_cnt++; + atomic64_inc(&sbi->current_atomic_write); + current_write = atomic64_read(&sbi->current_atomic_write); + if (current_write > sbi->peak_atomic_write) + sbi->peak_atomic_write = current_write; +} + +static inline void release_atomic_write_cnt(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct f2fs_inode_info *fi = F2FS_I(inode); + + atomic64_sub(fi->atomic_write_cnt, &sbi->current_atomic_write); + fi->atomic_write_cnt = 0; } static inline s64 get_pages(struct f2fs_sb_info *sbi, int count_type) @@ -1499,11 +2725,8 @@ static inline int get_dirty_pages(struct inode *inode) static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type) { - unsigned int pages_per_sec = sbi->segs_per_sec * sbi->blocks_per_seg; - unsigned int segs = (get_pages(sbi, block_type) + pages_per_sec - 1) >> - sbi->log_blocks_per_seg; - - return segs / sbi->segs_per_sec; + return div_u64(get_pages(sbi, block_type) + BLKS_PER_SEC(sbi) - 1, + BLKS_PER_SEC(sbi)); } static inline block_t valid_user_blocks(struct f2fs_sb_info *sbi) @@ -1537,17 +2760,28 @@ static inline block_t __cp_payload(struct f2fs_sb_info *sbi) static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag) { struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + void *tmp_ptr = &ckpt->sit_nat_version_bitmap; int offset; + if (is_set_ckpt_flags(sbi, CP_LARGE_NAT_BITMAP_FLAG)) { + offset = (flag == SIT_BITMAP) ? + le32_to_cpu(ckpt->nat_ver_bitmap_bytesize) : 0; + /* + * if large_nat_bitmap feature is enabled, leave checksum + * protection for all nat/sit bitmaps. + */ + return tmp_ptr + offset + sizeof(__le32); + } + if (__cp_payload(sbi) > 0) { if (flag == NAT_BITMAP) - return &ckpt->sit_nat_version_bitmap; + return tmp_ptr; else return (unsigned char *)ckpt + F2FS_BLKSIZE; } else { offset = (flag == NAT_BITMAP) ? le32_to_cpu(ckpt->sit_ver_bitmap_bytesize) : 0; - return &ckpt->sit_nat_version_bitmap + offset; + return tmp_ptr + offset; } } @@ -1556,7 +2790,7 @@ static inline block_t __start_cp_addr(struct f2fs_sb_info *sbi) block_t start_addr = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr); if (sbi->cur_cp_pack == 2) - start_addr += sbi->blocks_per_seg; + start_addr += BLKS_PER_SEG(sbi); return start_addr; } @@ -1565,7 +2799,7 @@ static inline block_t __start_cp_next_addr(struct f2fs_sb_info *sbi) block_t start_addr = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr); if (sbi->cur_cp_pack == 1) - start_addr += sbi->blocks_per_seg; + start_addr += BLKS_PER_SEG(sbi); return start_addr; } @@ -1579,30 +2813,47 @@ static inline block_t __start_sum_addr(struct f2fs_sb_info *sbi) return le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_start_sum); } +extern void f2fs_mark_inode_dirty_sync(struct inode *inode, bool sync); static inline int inc_valid_node_count(struct f2fs_sb_info *sbi, struct inode *inode, bool is_inode) { block_t valid_block_count; - unsigned int valid_node_count; - bool quota = inode && !is_inode; + unsigned int valid_node_count, avail_user_node_count; + unsigned int avail_user_block_count; + int err; - if (quota) { - int ret = dquot_reserve_block(inode, 1); - if (ret) - return ret; + if (is_inode) { + if (inode) { + err = dquot_alloc_inode(inode); + if (err) + return err; + } + } else { + err = dquot_reserve_block(inode, 1); + if (err) + return err; } + if (time_to_inject(sbi, FAULT_BLOCK)) + goto enospc; + spin_lock(&sbi->stat_lock); valid_block_count = sbi->total_valid_block_count + 1; - if (unlikely(valid_block_count + sbi->reserved_blocks > - sbi->user_block_count)) { + avail_user_block_count = get_available_block_count(sbi, inode, + test_opt(sbi, RESERVE_NODE)); + + if (unlikely(valid_block_count > avail_user_block_count)) { spin_unlock(&sbi->stat_lock); goto enospc; } + avail_user_node_count = sbi->total_node_count - F2FS_RESERVED_NODE_NUM; + if (test_opt(sbi, RESERVE_NODE) && + !__allow_reserved_root(sbi, inode, true)) + avail_user_node_count -= F2FS_OPTION(sbi).root_reserved_nodes; valid_node_count = sbi->total_valid_node_count + 1; - if (unlikely(valid_node_count > sbi->total_node_count)) { + if (unlikely(valid_node_count > avail_user_node_count)) { spin_unlock(&sbi->stat_lock); goto enospc; } @@ -1622,8 +2873,12 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi, return 0; enospc: - if (quota) + if (is_inode) { + if (inode) + dquot_free_inode(inode); + } else { dquot_release_reservation_block(inode, 1); + } return -ENOSPC; } @@ -1632,17 +2887,35 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi, { spin_lock(&sbi->stat_lock); - f2fs_bug_on(sbi, !sbi->total_valid_block_count); - f2fs_bug_on(sbi, !sbi->total_valid_node_count); - f2fs_bug_on(sbi, !is_inode && !inode->i_blocks); + if (unlikely(!sbi->total_valid_block_count || + !sbi->total_valid_node_count)) { + f2fs_warn(sbi, "dec_valid_node_count: inconsistent block counts, total_valid_block:%u, total_valid_node:%u", + sbi->total_valid_block_count, + sbi->total_valid_node_count); + set_sbi_flag(sbi, SBI_NEED_FSCK); + } else { + sbi->total_valid_block_count--; + sbi->total_valid_node_count--; + } - sbi->total_valid_node_count--; - sbi->total_valid_block_count--; + if (sbi->reserved_blocks && + sbi->current_reserved_blocks < sbi->reserved_blocks) + sbi->current_reserved_blocks++; spin_unlock(&sbi->stat_lock); - if (!is_inode) + if (is_inode) { + dquot_free_inode(inode); + } else { + if (unlikely(inode->i_blocks == 0)) { + f2fs_warn(sbi, "dec_valid_node_count: inconsistent i_blocks, ino:%lu, iblocks:%llu", + inode->i_ino, + (unsigned long long)inode->i_blocks); + set_sbi_flag(sbi, SBI_NEED_FSCK); + return; + } f2fs_i_blocks_write(inode, 1, false, true); + } } static inline unsigned int valid_node_count(struct f2fs_sb_info *sbi) @@ -1665,55 +2938,75 @@ static inline s64 valid_inode_count(struct f2fs_sb_info *sbi) return percpu_counter_sum_positive(&sbi->total_valid_inode_count); } -static inline struct page *f2fs_grab_cache_page(struct address_space *mapping, - pgoff_t index, bool for_write) +static inline struct folio *f2fs_grab_cache_folio(struct address_space *mapping, + pgoff_t index, bool for_write) { -#ifdef CONFIG_F2FS_FAULT_INJECTION - struct page *page = find_lock_page(mapping, index); + struct folio *folio; + unsigned int flags; - if (page) - return page; + if (IS_ENABLED(CONFIG_F2FS_FAULT_INJECTION)) { + fgf_t fgf_flags; - if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_ALLOC)) { - f2fs_show_injection_info(FAULT_PAGE_ALLOC); - return NULL; + if (!for_write) + fgf_flags = FGP_LOCK | FGP_ACCESSED; + else + fgf_flags = FGP_LOCK; + folio = __filemap_get_folio(mapping, index, fgf_flags, 0); + if (!IS_ERR(folio)) + return folio; + + if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_ALLOC)) + return ERR_PTR(-ENOMEM); } -#endif + if (!for_write) - return grab_cache_page(mapping, index); - return grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS); + return filemap_grab_folio(mapping, index); + + flags = memalloc_nofs_save(); + folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN, + mapping_gfp_mask(mapping)); + memalloc_nofs_restore(flags); + + return folio; } -static inline void f2fs_copy_page(struct page *src, struct page *dst) +static inline struct folio *f2fs_filemap_get_folio( + struct address_space *mapping, pgoff_t index, + fgf_t fgp_flags, gfp_t gfp_mask) { - char *src_kaddr = kmap(src); - char *dst_kaddr = kmap(dst); + if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_GET)) + return ERR_PTR(-ENOMEM); - memcpy(dst_kaddr, src_kaddr, PAGE_SIZE); - kunmap(dst); - kunmap(src); + return __filemap_get_folio(mapping, index, fgp_flags, gfp_mask); } -static inline void f2fs_put_page(struct page *page, int unlock) +static inline void f2fs_folio_put(struct folio *folio, bool unlock) { - if (!page) + if (IS_ERR_OR_NULL(folio)) return; if (unlock) { - f2fs_bug_on(F2FS_P_SB(page), !PageLocked(page)); - unlock_page(page); + f2fs_bug_on(F2FS_F_SB(folio), !folio_test_locked(folio)); + folio_unlock(folio); } - put_page(page); + folio_put(folio); +} + +static inline void f2fs_put_page(struct page *page, bool unlock) +{ + if (!page) + return; + f2fs_folio_put(page_folio(page), unlock); } static inline void f2fs_put_dnode(struct dnode_of_data *dn) { - if (dn->node_page) - f2fs_put_page(dn->node_page, 1); - if (dn->inode_page && dn->node_page != dn->inode_page) - f2fs_put_page(dn->inode_page, 0); - dn->node_page = NULL; - dn->inode_page = NULL; + if (dn->node_folio) + f2fs_folio_put(dn->node_folio, true); + if (dn->inode_folio && dn->node_folio != dn->inode_folio) + f2fs_folio_put(dn->inode_folio, false); + dn->node_folio = NULL; + dn->inode_folio = NULL; } static inline struct kmem_cache *f2fs_kmem_cache_create(const char *name, @@ -1722,7 +3015,7 @@ static inline struct kmem_cache *f2fs_kmem_cache_create(const char *name, return kmem_cache_create(name, size, 0, SLAB_RECLAIM_ACCOUNT, NULL); } -static inline void *f2fs_kmem_cache_alloc(struct kmem_cache *cachep, +static inline void *f2fs_kmem_cache_alloc_nofail(struct kmem_cache *cachep, gfp_t flags) { void *entry; @@ -1733,15 +3026,66 @@ static inline void *f2fs_kmem_cache_alloc(struct kmem_cache *cachep, return entry; } -static inline struct bio *f2fs_bio_alloc(int npages) +static inline void *f2fs_kmem_cache_alloc(struct kmem_cache *cachep, + gfp_t flags, bool nofail, struct f2fs_sb_info *sbi) { - struct bio *bio; + if (nofail) + return f2fs_kmem_cache_alloc_nofail(cachep, flags); - /* No failure on bio allocation */ - bio = bio_alloc(GFP_NOIO, npages); - if (!bio) - bio = bio_alloc(GFP_NOIO | __GFP_NOFAIL, npages); - return bio; + if (time_to_inject(sbi, FAULT_SLAB_ALLOC)) + return NULL; + + return kmem_cache_alloc(cachep, flags); +} + +static inline bool is_inflight_io(struct f2fs_sb_info *sbi, int type) +{ + if (get_pages(sbi, F2FS_RD_DATA) || get_pages(sbi, F2FS_RD_NODE) || + get_pages(sbi, F2FS_RD_META) || get_pages(sbi, F2FS_WB_DATA) || + get_pages(sbi, F2FS_WB_CP_DATA) || + get_pages(sbi, F2FS_DIO_READ) || + get_pages(sbi, F2FS_DIO_WRITE)) + return true; + + if (type != DISCARD_TIME && SM_I(sbi) && SM_I(sbi)->dcc_info && + atomic_read(&SM_I(sbi)->dcc_info->queued_discard)) + return true; + + if (SM_I(sbi) && SM_I(sbi)->fcc_info && + atomic_read(&SM_I(sbi)->fcc_info->queued_flush)) + return true; + return false; +} + +static inline bool is_inflight_read_io(struct f2fs_sb_info *sbi) +{ + return get_pages(sbi, F2FS_RD_DATA) || get_pages(sbi, F2FS_DIO_READ); +} + +static inline bool is_idle(struct f2fs_sb_info *sbi, int type) +{ + bool zoned_gc = (type == GC_TIME && + F2FS_HAS_FEATURE(sbi, F2FS_FEATURE_BLKZONED)); + + if (sbi->gc_mode == GC_URGENT_HIGH) + return true; + + if (sbi->bggc_io_aware == AWARE_READ_IO && is_inflight_read_io(sbi)) + return false; + if (sbi->bggc_io_aware == AWARE_ALL_IO && is_inflight_io(sbi, type)) + return false; + + if (sbi->gc_mode == GC_URGENT_MID) + return true; + + if (sbi->gc_mode == GC_URGENT_LOW && + (type == DISCARD_TIME || type == GC_TIME)) + return true; + + if (zoned_gc) + return true; + + return f2fs_time_over(sbi, type); } static inline void f2fs_radix_tree_insert(struct radix_tree_root *root, @@ -1753,27 +3097,51 @@ static inline void f2fs_radix_tree_insert(struct radix_tree_root *root, #define RAW_IS_INODE(p) ((p)->footer.nid == (p)->footer.ino) -static inline bool IS_INODE(struct page *page) +static inline bool IS_INODE(const struct folio *folio) { - struct f2fs_node *p = F2FS_NODE(page); + struct f2fs_node *p = F2FS_NODE(folio); return RAW_IS_INODE(p); } +static inline int offset_in_addr(struct f2fs_inode *i) +{ + return (i->i_inline & F2FS_EXTRA_ATTR) ? + (le16_to_cpu(i->i_extra_isize) / sizeof(__le32)) : 0; +} + static inline __le32 *blkaddr_in_node(struct f2fs_node *node) { return RAW_IS_INODE(node) ? node->i.i_addr : node->dn.addr; } -static inline block_t datablock_addr(struct page *node_page, - unsigned int offset) +static inline int f2fs_has_extra_attr(struct inode *inode); +static inline unsigned int get_dnode_base(struct inode *inode, + struct folio *node_folio) +{ + if (!IS_INODE(node_folio)) + return 0; + + return inode ? get_extra_isize(inode) : + offset_in_addr(&F2FS_NODE(node_folio)->i); +} + +static inline __le32 *get_dnode_addr(struct inode *inode, + struct folio *node_folio) { - struct f2fs_node *raw_node; - __le32 *addr_array; + return blkaddr_in_node(F2FS_NODE(node_folio)) + + get_dnode_base(inode, node_folio); +} - raw_node = F2FS_NODE(node_page); - addr_array = blkaddr_in_node(raw_node); - return le32_to_cpu(addr_array[offset]); +static inline block_t data_blkaddr(struct inode *inode, + struct folio *node_folio, unsigned int offset) +{ + return le32_to_cpu(*(get_dnode_addr(inode, node_folio) + offset)); +} + +static inline block_t f2fs_data_blkaddr(struct dnode_of_data *dn) +{ + return data_blkaddr(dn->inode, dn->node_folio, dn->ofs_in_node); } static inline int f2fs_test_bit(unsigned int nr, char *addr) @@ -1781,7 +3149,7 @@ static inline int f2fs_test_bit(unsigned int nr, char *addr) int mask; addr += (nr >> 3); - mask = 1 << (7 - (nr & 0x07)); + mask = BIT(7 - (nr & 0x07)); return mask & *addr; } @@ -1790,7 +3158,7 @@ static inline void f2fs_set_bit(unsigned int nr, char *addr) int mask; addr += (nr >> 3); - mask = 1 << (7 - (nr & 0x07)); + mask = BIT(7 - (nr & 0x07)); *addr |= mask; } @@ -1799,7 +3167,7 @@ static inline void f2fs_clear_bit(unsigned int nr, char *addr) int mask; addr += (nr >> 3); - mask = 1 << (7 - (nr & 0x07)); + mask = BIT(7 - (nr & 0x07)); *addr &= ~mask; } @@ -1809,7 +3177,7 @@ static inline int f2fs_test_and_set_bit(unsigned int nr, char *addr) int ret; addr += (nr >> 3); - mask = 1 << (7 - (nr & 0x07)); + mask = BIT(7 - (nr & 0x07)); ret = mask & *addr; *addr |= mask; return ret; @@ -1821,7 +3189,7 @@ static inline int f2fs_test_and_clear_bit(unsigned int nr, char *addr) int ret; addr += (nr >> 3); - mask = 1 << (7 - (nr & 0x07)); + mask = BIT(7 - (nr & 0x07)); ret = mask & *addr; *addr &= ~mask; return ret; @@ -1832,39 +3200,51 @@ static inline void f2fs_change_bit(unsigned int nr, char *addr) int mask; addr += (nr >> 3); - mask = 1 << (7 - (nr & 0x07)); + mask = BIT(7 - (nr & 0x07)); *addr ^= mask; } -/* used for f2fs_inode_info->flags */ -enum { - FI_NEW_INODE, /* indicate newly allocated inode */ - FI_DIRTY_INODE, /* indicate inode is dirty or not */ - FI_AUTO_RECOVER, /* indicate inode is recoverable */ - FI_DIRTY_DIR, /* indicate directory has dirty pages */ - FI_INC_LINK, /* need to increment i_nlink */ - FI_ACL_MODE, /* indicate acl mode */ - FI_NO_ALLOC, /* should not allocate any blocks */ - FI_FREE_NID, /* free allocated nide */ - FI_NO_EXTENT, /* not to use the extent cache */ - FI_INLINE_XATTR, /* used for inline xattr */ - FI_INLINE_DATA, /* used for inline data*/ - FI_INLINE_DENTRY, /* used for inline dentry */ - FI_APPEND_WRITE, /* inode has appended data */ - FI_UPDATE_WRITE, /* inode has in-place-update data */ - FI_NEED_IPU, /* used for ipu per file */ - FI_ATOMIC_FILE, /* indicate atomic file */ - FI_ATOMIC_COMMIT, /* indicate the state of atomical committing */ - FI_VOLATILE_FILE, /* indicate volatile file */ - FI_FIRST_BLOCK_WRITTEN, /* indicate #0 data block was written */ - FI_DROP_CACHE, /* drop dirty page cache */ - FI_DATA_EXIST, /* indicate data exists */ - FI_INLINE_DOTS, /* indicate inline dot dentries */ - FI_DO_DEFRAG, /* indicate defragment is running */ - FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */ - FI_NO_PREALLOC, /* indicate skipped preallocated blocks */ - FI_HOT_DATA, /* indicate file is hot */ -}; +/* + * On-disk inode flags (f2fs_inode::i_flags) + */ +#define F2FS_COMPR_FL 0x00000004 /* Compress file */ +#define F2FS_SYNC_FL 0x00000008 /* Synchronous updates */ +#define F2FS_IMMUTABLE_FL 0x00000010 /* Immutable file */ +#define F2FS_APPEND_FL 0x00000020 /* writes to file may only append */ +#define F2FS_NODUMP_FL 0x00000040 /* do not dump file */ +#define F2FS_NOATIME_FL 0x00000080 /* do not update atime */ +#define F2FS_NOCOMP_FL 0x00000400 /* Don't compress */ +#define F2FS_INDEX_FL 0x00001000 /* hash-indexed directory */ +#define F2FS_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ +#define F2FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */ +#define F2FS_CASEFOLD_FL 0x40000000 /* Casefolded file */ +#define F2FS_DEVICE_ALIAS_FL 0x80000000 /* File for aliasing a device */ + +#define F2FS_QUOTA_DEFAULT_FL (F2FS_NOATIME_FL | F2FS_IMMUTABLE_FL) + +/* Flags that should be inherited by new inodes from their parent. */ +#define F2FS_FL_INHERITED (F2FS_SYNC_FL | F2FS_NODUMP_FL | F2FS_NOATIME_FL | \ + F2FS_DIRSYNC_FL | F2FS_PROJINHERIT_FL | \ + F2FS_CASEFOLD_FL) + +/* Flags that are appropriate for regular files (all but dir-specific ones). */ +#define F2FS_REG_FLMASK (~(F2FS_DIRSYNC_FL | F2FS_PROJINHERIT_FL | \ + F2FS_CASEFOLD_FL)) + +/* Flags that are appropriate for non-directories/regular files. */ +#define F2FS_OTHER_FLMASK (F2FS_NODUMP_FL | F2FS_NOATIME_FL) + +#define IS_DEVICE_ALIASING(inode) (F2FS_I(inode)->i_flags & F2FS_DEVICE_ALIAS_FL) + +static inline __u32 f2fs_mask_flags(umode_t mode, __u32 flags) +{ + if (S_ISDIR(mode)) + return flags; + else if (S_ISREG(mode)) + return flags & F2FS_REG_FLMASK; + else + return flags & F2FS_OTHER_FLMASK; +} static inline void __mark_inode_dirty_flag(struct inode *inode, int flag, bool set) @@ -1873,33 +3253,40 @@ static inline void __mark_inode_dirty_flag(struct inode *inode, case FI_INLINE_XATTR: case FI_INLINE_DATA: case FI_INLINE_DENTRY: + case FI_NEW_INODE: if (set) return; + fallthrough; case FI_DATA_EXIST: - case FI_INLINE_DOTS: + case FI_PIN_FILE: + case FI_COMPRESS_RELEASED: f2fs_mark_inode_dirty_sync(inode, true); } } static inline void set_inode_flag(struct inode *inode, int flag) { - if (!test_bit(flag, &F2FS_I(inode)->flags)) - set_bit(flag, &F2FS_I(inode)->flags); + set_bit(flag, F2FS_I(inode)->flags); __mark_inode_dirty_flag(inode, flag, true); } static inline int is_inode_flag_set(struct inode *inode, int flag) { - return test_bit(flag, &F2FS_I(inode)->flags); + return test_bit(flag, F2FS_I(inode)->flags); } static inline void clear_inode_flag(struct inode *inode, int flag) { - if (test_bit(flag, &F2FS_I(inode)->flags)) - clear_bit(flag, &F2FS_I(inode)->flags); + clear_bit(flag, F2FS_I(inode)->flags); __mark_inode_dirty_flag(inode, flag, false); } +static inline bool f2fs_verity_in_progress(struct inode *inode) +{ + return IS_ENABLED(CONFIG_FS_VERITY) && + is_inode_flag_set(inode, FI_VERITY_IN_PROGRESS); +} + static inline void set_acl_inode(struct inode *inode, umode_t mode) { F2FS_I(inode)->i_acl_mode = mode; @@ -1937,6 +3324,8 @@ static inline void f2fs_i_blocks_write(struct inode *inode, set_inode_flag(inode, FI_AUTO_RECOVER); } +static inline bool f2fs_is_atomic_file(struct inode *inode); + static inline void f2fs_i_size_write(struct inode *inode, loff_t i_size) { bool clean = !is_inode_flag_set(inode, FI_DIRTY_INODE); @@ -1946,6 +3335,10 @@ static inline void f2fs_i_size_write(struct inode *inode, loff_t i_size) return; i_size_write(inode, i_size); + + if (f2fs_is_atomic_file(inode)) + return; + f2fs_mark_inode_dirty_sync(inode, true); if (clean || recover) set_inode_flag(inode, FI_AUTO_RECOVER); @@ -1957,6 +3350,13 @@ static inline void f2fs_i_depth_write(struct inode *inode, unsigned int depth) f2fs_mark_inode_dirty_sync(inode, true); } +static inline void f2fs_i_gc_failures_write(struct inode *inode, + unsigned int count) +{ + F2FS_I(inode)->i_gc_failures = count; + f2fs_mark_inode_dirty_sync(inode, true); +} + static inline void f2fs_i_xnid_write(struct inode *inode, nid_t xnid) { F2FS_I(inode)->i_xattr_nid = xnid; @@ -1974,15 +3374,19 @@ static inline void get_inline_info(struct inode *inode, struct f2fs_inode *ri) struct f2fs_inode_info *fi = F2FS_I(inode); if (ri->i_inline & F2FS_INLINE_XATTR) - set_bit(FI_INLINE_XATTR, &fi->flags); + set_bit(FI_INLINE_XATTR, fi->flags); if (ri->i_inline & F2FS_INLINE_DATA) - set_bit(FI_INLINE_DATA, &fi->flags); + set_bit(FI_INLINE_DATA, fi->flags); if (ri->i_inline & F2FS_INLINE_DENTRY) - set_bit(FI_INLINE_DENTRY, &fi->flags); + set_bit(FI_INLINE_DENTRY, fi->flags); if (ri->i_inline & F2FS_DATA_EXIST) - set_bit(FI_DATA_EXIST, &fi->flags); - if (ri->i_inline & F2FS_INLINE_DOTS) - set_bit(FI_INLINE_DOTS, &fi->flags); + set_bit(FI_DATA_EXIST, fi->flags); + if (ri->i_inline & F2FS_EXTRA_ATTR) + set_bit(FI_EXTRA_ATTR, fi->flags); + if (ri->i_inline & F2FS_PIN_FILE) + set_bit(FI_PIN_FILE, fi->flags); + if (ri->i_inline & F2FS_COMPRESS_RELEASED) + set_bit(FI_COMPRESS_RELEASED, fi->flags); } static inline void set_raw_inline(struct inode *inode, struct f2fs_inode *ri) @@ -1997,8 +3401,17 @@ static inline void set_raw_inline(struct inode *inode, struct f2fs_inode *ri) ri->i_inline |= F2FS_INLINE_DENTRY; if (is_inode_flag_set(inode, FI_DATA_EXIST)) ri->i_inline |= F2FS_DATA_EXIST; - if (is_inode_flag_set(inode, FI_INLINE_DOTS)) - ri->i_inline |= F2FS_INLINE_DOTS; + if (is_inode_flag_set(inode, FI_EXTRA_ATTR)) + ri->i_inline |= F2FS_EXTRA_ATTR; + if (is_inode_flag_set(inode, FI_PIN_FILE)) + ri->i_inline |= F2FS_PIN_FILE; + if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) + ri->i_inline |= F2FS_COMPRESS_RELEASED; +} + +static inline int f2fs_has_extra_attr(struct inode *inode) +{ + return is_inode_flag_set(inode, FI_EXTRA_ATTR); } static inline int f2fs_has_inline_xattr(struct inode *inode) @@ -2006,29 +3419,59 @@ static inline int f2fs_has_inline_xattr(struct inode *inode) return is_inode_flag_set(inode, FI_INLINE_XATTR); } -static inline unsigned int addrs_per_inode(struct inode *inode) +static inline int f2fs_compressed_file(struct inode *inode) { - if (f2fs_has_inline_xattr(inode)) - return DEF_ADDRS_PER_INODE - F2FS_INLINE_XATTR_ADDRS; - return DEF_ADDRS_PER_INODE; + return S_ISREG(inode->i_mode) && + is_inode_flag_set(inode, FI_COMPRESSED_FILE); +} + +static inline bool f2fs_need_compress_data(struct inode *inode) +{ + int compress_mode = F2FS_OPTION(F2FS_I_SB(inode)).compress_mode; + + if (!f2fs_compressed_file(inode)) + return false; + + if (compress_mode == COMPR_MODE_FS) + return true; + else if (compress_mode == COMPR_MODE_USER && + is_inode_flag_set(inode, FI_ENABLE_COMPRESS)) + return true; + + return false; } -static inline void *inline_xattr_addr(struct page *page) +static inline unsigned int addrs_per_page(struct inode *inode, + bool is_inode) { - struct f2fs_inode *ri = F2FS_INODE(page); + unsigned int addrs = is_inode ? (CUR_ADDRS_PER_INODE(inode) - + get_inline_xattr_addrs(inode)) : DEF_ADDRS_PER_BLOCK; + + if (f2fs_compressed_file(inode)) + return ALIGN_DOWN(addrs, F2FS_I(inode)->i_cluster_size); + return addrs; +} + +static inline +void *inline_xattr_addr(struct inode *inode, const struct folio *folio) +{ + struct f2fs_inode *ri = F2FS_INODE(folio); return (void *)&(ri->i_addr[DEF_ADDRS_PER_INODE - - F2FS_INLINE_XATTR_ADDRS]); + get_inline_xattr_addrs(inode)]); } static inline int inline_xattr_size(struct inode *inode) { if (f2fs_has_inline_xattr(inode)) - return F2FS_INLINE_XATTR_ADDRS << 2; - else - return 0; + return get_inline_xattr_addrs(inode) * sizeof(__le32); + return 0; } +/* + * Notice: check inline_data flag without inode page lock is unsafe. + * It could change at any time by f2fs_convert_inline_folio(). + */ static inline int f2fs_has_inline_data(struct inode *inode) { return is_inode_flag_set(inode, FI_INLINE_DATA); @@ -2039,41 +3482,31 @@ static inline int f2fs_exist_data(struct inode *inode) return is_inode_flag_set(inode, FI_DATA_EXIST); } -static inline int f2fs_has_inline_dots(struct inode *inode) -{ - return is_inode_flag_set(inode, FI_INLINE_DOTS); -} - -static inline bool f2fs_is_atomic_file(struct inode *inode) -{ - return is_inode_flag_set(inode, FI_ATOMIC_FILE); -} - -static inline bool f2fs_is_commit_atomic_write(struct inode *inode) +static inline int f2fs_is_mmap_file(struct inode *inode) { - return is_inode_flag_set(inode, FI_ATOMIC_COMMIT); + return is_inode_flag_set(inode, FI_MMAP_FILE); } -static inline bool f2fs_is_volatile_file(struct inode *inode) +static inline bool f2fs_is_pinned_file(struct inode *inode) { - return is_inode_flag_set(inode, FI_VOLATILE_FILE); + return is_inode_flag_set(inode, FI_PIN_FILE); } -static inline bool f2fs_is_first_block_written(struct inode *inode) +static inline bool f2fs_is_atomic_file(struct inode *inode) { - return is_inode_flag_set(inode, FI_FIRST_BLOCK_WRITTEN); + return is_inode_flag_set(inode, FI_ATOMIC_FILE); } -static inline bool f2fs_is_drop_cache(struct inode *inode) +static inline bool f2fs_is_cow_file(struct inode *inode) { - return is_inode_flag_set(inode, FI_DROP_CACHE); + return is_inode_flag_set(inode, FI_COW_FILE); } -static inline void *inline_data_addr(struct page *page) +static inline void *inline_data_addr(struct inode *inode, struct folio *folio) { - struct f2fs_inode *ri = F2FS_INODE(page); + __le32 *addr = get_dnode_addr(inode, folio); - return (void *)&(ri->i_addr[1]); + return (void *)(addr + DEF_INLINE_RESERVED_SIZE); } static inline int f2fs_has_inline_dentry(struct inode *inode) @@ -2081,12 +3514,6 @@ static inline int f2fs_has_inline_dentry(struct inode *inode) return is_inode_flag_set(inode, FI_INLINE_DENTRY); } -static inline void f2fs_dentry_kunmap(struct inode *dir, struct page *page) -{ - if (!f2fs_has_inline_dentry(dir)) - kunmap(page); -} - static inline int is_file(struct inode *inode, int type) { return F2FS_I(inode)->i_advise & type; @@ -2094,21 +3521,41 @@ static inline int is_file(struct inode *inode, int type) static inline void set_file(struct inode *inode, int type) { + if (is_file(inode, type)) + return; F2FS_I(inode)->i_advise |= type; f2fs_mark_inode_dirty_sync(inode, true); } static inline void clear_file(struct inode *inode, int type) { + if (!is_file(inode, type)) + return; F2FS_I(inode)->i_advise &= ~type; f2fs_mark_inode_dirty_sync(inode, true); } +static inline bool f2fs_is_time_consistent(struct inode *inode) +{ + struct timespec64 ts = inode_get_atime(inode); + + if (!timespec64_equal(F2FS_I(inode)->i_disk_time, &ts)) + return false; + ts = inode_get_ctime(inode); + if (!timespec64_equal(F2FS_I(inode)->i_disk_time + 1, &ts)) + return false; + ts = inode_get_mtime(inode); + if (!timespec64_equal(F2FS_I(inode)->i_disk_time + 2, &ts)) + return false; + return true; +} + static inline bool f2fs_skip_inode_update(struct inode *inode, int dsync) { + bool ret; + if (dsync) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - bool ret; spin_lock(&sbi->inode_lock[DIRTY_META]); ret = list_empty(&F2FS_I(inode)->gdirty_list); @@ -2117,14 +3564,22 @@ static inline bool f2fs_skip_inode_update(struct inode *inode, int dsync) } if (!is_inode_flag_set(inode, FI_AUTO_RECOVER) || file_keep_isize(inode) || - i_size_read(inode) & PAGE_MASK) + i_size_read(inode) & ~PAGE_MASK) return false; - return F2FS_I(inode)->last_disk_size == i_size_read(inode); + + if (!f2fs_is_time_consistent(inode)) + return false; + + spin_lock(&F2FS_I(inode)->i_size_lock); + ret = F2FS_I(inode)->last_disk_size == i_size_read(inode); + spin_unlock(&F2FS_I(inode)->i_size_lock); + + return ret; } -static inline int f2fs_readonly(struct super_block *sb) +static inline bool f2fs_readonly(struct super_block *sb) { - return sb->s_flags & MS_RDONLY; + return sb_rdonly(sb); } static inline bool f2fs_cp_error(struct f2fs_sb_info *sbi) @@ -2132,121 +3587,222 @@ static inline bool f2fs_cp_error(struct f2fs_sb_info *sbi) return is_set_ckpt_flags(sbi, CP_ERROR_FLAG); } -static inline bool is_dot_dotdot(const struct qstr *str) +static inline void *f2fs_kmalloc(struct f2fs_sb_info *sbi, + size_t size, gfp_t flags) { - if (str->len == 1 && str->name[0] == '.') - return true; + if (time_to_inject(sbi, FAULT_KMALLOC)) + return NULL; - if (str->len == 2 && str->name[0] == '.' && str->name[1] == '.') - return true; + return kmalloc(size, flags); +} - return false; +static inline void *f2fs_getname(struct f2fs_sb_info *sbi) +{ + if (time_to_inject(sbi, FAULT_KMALLOC)) + return NULL; + + return __getname(); } -static inline bool f2fs_may_extent_tree(struct inode *inode) +static inline void f2fs_putname(char *buf) { - if (!test_opt(F2FS_I_SB(inode), EXTENT_CACHE) || - is_inode_flag_set(inode, FI_NO_EXTENT)) - return false; + __putname(buf); +} - return S_ISREG(inode->i_mode); +static inline void *f2fs_kzalloc(struct f2fs_sb_info *sbi, + size_t size, gfp_t flags) +{ + return f2fs_kmalloc(sbi, size, flags | __GFP_ZERO); } -static inline void *f2fs_kmalloc(struct f2fs_sb_info *sbi, +static inline void *f2fs_kvmalloc(struct f2fs_sb_info *sbi, size_t size, gfp_t flags) { -#ifdef CONFIG_F2FS_FAULT_INJECTION - if (time_to_inject(sbi, FAULT_KMALLOC)) { - f2fs_show_injection_info(FAULT_KMALLOC); + if (time_to_inject(sbi, FAULT_KVMALLOC)) return NULL; - } -#endif - return kmalloc(size, flags); + + return kvmalloc(size, flags); } -#define get_inode_mode(i) \ +static inline void *f2fs_kvzalloc(struct f2fs_sb_info *sbi, + size_t size, gfp_t flags) +{ + return f2fs_kvmalloc(sbi, size, flags | __GFP_ZERO); +} + +static inline void *f2fs_vmalloc(struct f2fs_sb_info *sbi, size_t size) +{ + if (time_to_inject(sbi, FAULT_VMALLOC)) + return NULL; + + return vmalloc(size); +} + +static inline int get_extra_isize(struct inode *inode) +{ + return F2FS_I(inode)->i_extra_isize / sizeof(__le32); +} + +static inline int get_inline_xattr_addrs(struct inode *inode) +{ + return F2FS_I(inode)->i_inline_xattr_size; +} + +#define f2fs_get_inode_mode(i) \ ((is_inode_flag_set(i, FI_ACL_MODE)) ? \ (F2FS_I(i)->i_acl_mode) : ((i)->i_mode)) +#define F2FS_MIN_EXTRA_ATTR_SIZE (sizeof(__le32)) + +#define F2FS_TOTAL_EXTRA_ATTR_SIZE \ + (offsetof(struct f2fs_inode, i_extra_end) - \ + offsetof(struct f2fs_inode, i_extra_isize)) \ + +#define F2FS_OLD_ATTRIBUTE_SIZE (offsetof(struct f2fs_inode, i_addr)) +#define F2FS_FITS_IN_INODE(f2fs_inode, extra_isize, field) \ + ((offsetof(typeof(*(f2fs_inode)), field) + \ + sizeof((f2fs_inode)->field)) \ + <= (F2FS_OLD_ATTRIBUTE_SIZE + (extra_isize))) \ + +#define __is_large_section(sbi) (SEGS_PER_SEC(sbi) > 1) + +#define __is_meta_io(fio) (PAGE_TYPE_OF_BIO((fio)->type) == META) + +bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, + block_t blkaddr, int type); +static inline void verify_blkaddr(struct f2fs_sb_info *sbi, + block_t blkaddr, int type) +{ + if (!f2fs_is_valid_blkaddr(sbi, blkaddr, type)) + f2fs_err(sbi, "invalid blkaddr: %u, type: %d, run fsck to fix.", + blkaddr, type); +} + +static inline bool __is_valid_data_blkaddr(block_t blkaddr) +{ + if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR || + blkaddr == COMPRESS_ADDR) + return false; + return true; +} + /* * file.c */ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync); -void truncate_data_blocks(struct dnode_of_data *dn); -int truncate_blocks(struct inode *inode, u64 from, bool lock); +int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock); +int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock); int f2fs_truncate(struct inode *inode); -int f2fs_getattr(const struct path *path, struct kstat *stat, - u32 request_mask, unsigned int flags); -int f2fs_setattr(struct dentry *dentry, struct iattr *attr); -int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end); -int truncate_data_blocks_range(struct dnode_of_data *dn, int count); +int f2fs_getattr(struct mnt_idmap *idmap, const struct path *path, + struct kstat *stat, u32 request_mask, unsigned int flags); +int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, + struct iattr *attr); +int f2fs_truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end); +void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count); +int f2fs_do_shutdown(struct f2fs_sb_info *sbi, unsigned int flag, + bool readonly, bool need_lock); +int f2fs_precache_extents(struct inode *inode); +int f2fs_fileattr_get(struct dentry *dentry, struct file_kattr *fa); +int f2fs_fileattr_set(struct mnt_idmap *idmap, + struct dentry *dentry, struct file_kattr *fa); long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg); +int f2fs_transfer_project_quota(struct inode *inode, kprojid_t kprojid); +int f2fs_pin_file_control(struct inode *inode, bool inc); /* * inode.c */ void f2fs_set_inode_flags(struct inode *inode); +bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct folio *folio); +void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct folio *folio); struct inode *f2fs_iget(struct super_block *sb, unsigned long ino); struct inode *f2fs_iget_retry(struct super_block *sb, unsigned long ino); -int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink); -int update_inode(struct inode *inode, struct page *node_page); -int update_inode_page(struct inode *inode); +int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink); +void f2fs_update_inode(struct inode *inode, struct folio *node_folio); +void f2fs_update_inode_page(struct inode *inode); int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc); +void f2fs_remove_donate_inode(struct inode *inode); void f2fs_evict_inode(struct inode *inode); -void handle_failed_inode(struct inode *inode); +void f2fs_handle_failed_inode(struct inode *inode); /* * namei.c */ +int f2fs_update_extension_list(struct f2fs_sb_info *sbi, const char *name, + bool hot, bool set); struct dentry *f2fs_get_parent(struct dentry *child); +int f2fs_get_tmpfile(struct mnt_idmap *idmap, struct inode *dir, + struct inode **new_inode); /* * dir.c */ -void set_de_type(struct f2fs_dir_entry *de, umode_t mode); -unsigned char get_de_type(struct f2fs_dir_entry *de); -struct f2fs_dir_entry *find_target_dentry(struct fscrypt_name *fname, - f2fs_hash_t namehash, int *max_slots, - struct f2fs_dentry_ptr *d); +#if IS_ENABLED(CONFIG_UNICODE) +int f2fs_init_casefolded_name(const struct inode *dir, + struct f2fs_filename *fname); +void f2fs_free_casefolded_name(struct f2fs_filename *fname); +#else +static inline int f2fs_init_casefolded_name(const struct inode *dir, + struct f2fs_filename *fname) +{ + return 0; +} + +static inline void f2fs_free_casefolded_name(struct f2fs_filename *fname) +{ +} +#endif /* CONFIG_UNICODE */ + +int f2fs_setup_filename(struct inode *dir, const struct qstr *iname, + int lookup, struct f2fs_filename *fname); +int f2fs_prepare_lookup(struct inode *dir, struct dentry *dentry, + struct f2fs_filename *fname); +void f2fs_free_filename(struct f2fs_filename *fname); +struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d, + const struct f2fs_filename *fname, int *max_slots, + bool use_hash); int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, unsigned int start_pos, struct fscrypt_str *fstr); -void do_make_empty_dir(struct inode *inode, struct inode *parent, +void f2fs_do_make_empty_dir(struct inode *inode, struct inode *parent, struct f2fs_dentry_ptr *d); -struct page *init_inode_metadata(struct inode *inode, struct inode *dir, - const struct qstr *new_name, - const struct qstr *orig_name, struct page *dpage); -void update_parent_metadata(struct inode *dir, struct inode *inode, +struct folio *f2fs_init_inode_metadata(struct inode *inode, struct inode *dir, + const struct f2fs_filename *fname, struct folio *dfolio); +void f2fs_update_parent_metadata(struct inode *dir, struct inode *inode, unsigned int current_depth); -int room_for_filename(const void *bitmap, int slots, int max_slots); +int f2fs_room_for_filename(const void *bitmap, int slots, int max_slots); void f2fs_drop_nlink(struct inode *dir, struct inode *inode); struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir, - struct fscrypt_name *fname, struct page **res_page); + const struct f2fs_filename *fname, struct folio **res_folio); struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, - const struct qstr *child, struct page **res_page); -struct f2fs_dir_entry *f2fs_parent_dir(struct inode *dir, struct page **p); + const struct qstr *child, struct folio **res_folio); +struct f2fs_dir_entry *f2fs_parent_dir(struct inode *dir, struct folio **f); ino_t f2fs_inode_by_name(struct inode *dir, const struct qstr *qstr, - struct page **page); + struct folio **folio); void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, - struct page *page, struct inode *inode); + struct folio *folio, struct inode *inode); +bool f2fs_has_enough_room(struct inode *dir, struct folio *ifolio, + const struct f2fs_filename *fname); void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d, - const struct qstr *name, f2fs_hash_t name_hash, + const struct fscrypt_str *name, f2fs_hash_t name_hash, unsigned int bit_pos); -int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name, - const struct qstr *orig_name, +int f2fs_add_regular_entry(struct inode *dir, const struct f2fs_filename *fname, struct inode *inode, nid_t ino, umode_t mode); -int __f2fs_do_add_link(struct inode *dir, struct fscrypt_name *fname, +int f2fs_add_dentry(struct inode *dir, const struct f2fs_filename *fname, struct inode *inode, nid_t ino, umode_t mode); -int __f2fs_add_link(struct inode *dir, const struct qstr *name, +int f2fs_do_add_link(struct inode *dir, const struct qstr *name, struct inode *inode, nid_t ino, umode_t mode); -void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, +void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct folio *folio, struct inode *dir, struct inode *inode); -int f2fs_do_tmpfile(struct inode *inode, struct inode *dir); +int f2fs_do_tmpfile(struct inode *inode, struct inode *dir, + struct f2fs_filename *fname); bool f2fs_empty_dir(struct inode *dir); static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode) { - return __f2fs_add_link(d_inode(dentry->d_parent), &dentry->d_name, + if (fscrypt_is_nokey_name(dentry)) + return -ENOKEY; + return f2fs_do_add_link(d_inode(dentry->d_parent), &dentry->d_name, inode, inode->i_ino, inode->i_mode); } @@ -2255,247 +3811,403 @@ static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode) */ int f2fs_inode_dirtied(struct inode *inode, bool sync); void f2fs_inode_synced(struct inode *inode); +int f2fs_dquot_initialize(struct inode *inode); +int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly); +int f2fs_do_quota_sync(struct super_block *sb, int type); +loff_t max_file_blocks(struct inode *inode); +void f2fs_quota_off_umount(struct super_block *sb); +void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag); +void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason); +void f2fs_handle_error(struct f2fs_sb_info *sbi, unsigned char error); int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover); int f2fs_sync_fs(struct super_block *sb, int sync); -extern __printf(3, 4) -void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...); -int sanity_check_ckpt(struct f2fs_sb_info *sbi); +int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi); /* * hash.c */ -f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info, - struct fscrypt_name *fname); +void f2fs_hash_filename(const struct inode *dir, struct f2fs_filename *fname); /* * node.c */ -struct dnode_of_data; struct node_info; - -bool available_free_memory(struct f2fs_sb_info *sbi, int type); -int need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid); -bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid); -bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino); -void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni); -pgoff_t get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs); -int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode); -int truncate_inode_blocks(struct inode *inode, pgoff_t from); -int truncate_xattr_node(struct inode *inode, struct page *page); -int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino); -int remove_inode_page(struct inode *inode); -struct page *new_inode_page(struct inode *inode); -struct page *new_node_page(struct dnode_of_data *dn, - unsigned int ofs, struct page *ipage); -void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid); -struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid); -struct page *get_node_page_ra(struct page *parent, int start); -void move_node_page(struct page *node_page, int gc_type); -int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, - struct writeback_control *wbc, bool atomic); -int sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc); -void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount); -bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid); -void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid); -void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid); -int try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink); -void recover_inline_xattr(struct inode *inode, struct page *page); -int recover_xattr_data(struct inode *inode, struct page *page, - block_t blkaddr); -int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page); -int restore_node_summary(struct f2fs_sb_info *sbi, +enum node_type; + +int f2fs_check_nid_range(struct f2fs_sb_info *sbi, nid_t nid); +bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type); +bool f2fs_in_warm_node_list(struct f2fs_sb_info *sbi, struct folio *folio); +void f2fs_init_fsync_node_info(struct f2fs_sb_info *sbi); +void f2fs_del_fsync_node_entry(struct f2fs_sb_info *sbi, struct folio *folio); +void f2fs_reset_fsync_node_info(struct f2fs_sb_info *sbi); +int f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid); +bool f2fs_is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid); +bool f2fs_need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino); +int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid, + struct node_info *ni, bool checkpoint_context); +pgoff_t f2fs_get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs); +int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode); +int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from); +int f2fs_truncate_xattr_node(struct inode *inode); +int f2fs_wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, + unsigned int seq_id); +int f2fs_remove_inode_page(struct inode *inode); +struct folio *f2fs_new_inode_folio(struct inode *inode); +struct folio *f2fs_new_node_folio(struct dnode_of_data *dn, unsigned int ofs); +void f2fs_ra_node_page(struct f2fs_sb_info *sbi, nid_t nid); +struct folio *f2fs_get_node_folio(struct f2fs_sb_info *sbi, pgoff_t nid, + enum node_type node_type); +struct folio *f2fs_get_inode_folio(struct f2fs_sb_info *sbi, pgoff_t ino); +struct folio *f2fs_get_xnode_folio(struct f2fs_sb_info *sbi, pgoff_t xnid); +int f2fs_move_node_folio(struct folio *node_folio, int gc_type); +void f2fs_flush_inline_data(struct f2fs_sb_info *sbi); +int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, + struct writeback_control *wbc, bool atomic, + unsigned int *seq_id); +int f2fs_sync_node_pages(struct f2fs_sb_info *sbi, + struct writeback_control *wbc, + bool do_balance, enum iostat_type io_type); +int f2fs_build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount); +bool f2fs_alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid); +void f2fs_alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid); +void f2fs_alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid); +int f2fs_try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink); +int f2fs_recover_inline_xattr(struct inode *inode, struct folio *folio); +int f2fs_recover_xattr_data(struct inode *inode, struct folio *folio); +int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct folio *folio); +int f2fs_restore_node_summary(struct f2fs_sb_info *sbi, unsigned int segno, struct f2fs_summary_block *sum); -void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc); -int build_node_manager(struct f2fs_sb_info *sbi); -void destroy_node_manager(struct f2fs_sb_info *sbi); -int __init create_node_manager_caches(void); -void destroy_node_manager_caches(void); +int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc); +int f2fs_build_node_manager(struct f2fs_sb_info *sbi); +void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi); +int __init f2fs_create_node_manager_caches(void); +void f2fs_destroy_node_manager_caches(void); /* * segment.c */ -void register_inmem_page(struct inode *inode, struct page *page); -void drop_inmem_pages(struct inode *inode); -void drop_inmem_page(struct inode *inode, struct page *page); -int commit_inmem_pages(struct inode *inode); +bool f2fs_need_SSR(struct f2fs_sb_info *sbi); +int f2fs_commit_atomic_write(struct inode *inode); +void f2fs_abort_atomic_write(struct inode *inode, bool clean); void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need); -void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi); -int f2fs_issue_flush(struct f2fs_sb_info *sbi); -int create_flush_cmd_control(struct f2fs_sb_info *sbi); -void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free); -void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr); -bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr); -void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new); -void stop_discard_thread(struct f2fs_sb_info *sbi); -void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi); -void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc); -void release_discard_addrs(struct f2fs_sb_info *sbi); -int npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra); -void allocate_new_segments(struct f2fs_sb_info *sbi); +void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg); +int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino); +int f2fs_create_flush_cmd_control(struct f2fs_sb_info *sbi); +int f2fs_flush_device_cache(struct f2fs_sb_info *sbi); +void f2fs_destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free); +void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr, + unsigned int len); +bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr); +int f2fs_start_discard_thread(struct f2fs_sb_info *sbi); +void f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi); +void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi); +bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi); +void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi, + struct cp_control *cpc); +void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi); +block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi); +int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable); +void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi); +int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra); +bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno); +int f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi); +int f2fs_reinit_atgc_curseg(struct f2fs_sb_info *sbi); +void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi); +void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi); +int f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, + unsigned int start, unsigned int end); +int f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force); +int f2fs_allocate_pinning_section(struct f2fs_sb_info *sbi); +int f2fs_allocate_new_segments(struct f2fs_sb_info *sbi); int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range); -bool exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc); -struct page *get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno); -void update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr); -void write_meta_page(struct f2fs_sb_info *sbi, struct page *page); -void write_node_page(unsigned int nid, struct f2fs_io_info *fio); -void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio); -int rewrite_data_page(struct f2fs_io_info *fio); -void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, +bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi, + struct cp_control *cpc); +struct folio *f2fs_get_sum_folio(struct f2fs_sb_info *sbi, unsigned int segno); +void f2fs_update_meta_page(struct f2fs_sb_info *sbi, void *src, + block_t blk_addr); +void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct folio *folio, + enum iostat_type io_type); +void f2fs_do_write_node_page(unsigned int nid, struct f2fs_io_info *fio); +void f2fs_outplace_write_data(struct dnode_of_data *dn, + struct f2fs_io_info *fio); +int f2fs_inplace_write_data(struct f2fs_io_info *fio); +void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, block_t old_blkaddr, block_t new_blkaddr, - bool recover_curseg, bool recover_newaddr); + bool recover_curseg, bool recover_newaddr, + bool from_gc); void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn, block_t old_addr, block_t new_addr, unsigned char version, bool recover_curseg, bool recover_newaddr); -void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, +enum temp_type f2fs_get_segment_temp(struct f2fs_sb_info *sbi, + enum log_type seg_type); +int f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct folio *folio, block_t old_blkaddr, block_t *new_blkaddr, struct f2fs_summary *sum, int type, - struct f2fs_io_info *fio, bool add_list); -void f2fs_wait_on_page_writeback(struct page *page, - enum page_type type, bool ordered); -void f2fs_wait_on_encrypted_page_writeback(struct f2fs_sb_info *sbi, - block_t blkaddr); -void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk); -void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk); -int lookup_journal_in_cursum(struct f2fs_journal *journal, int type, + struct f2fs_io_info *fio); +void f2fs_update_device_state(struct f2fs_sb_info *sbi, nid_t ino, + block_t blkaddr, unsigned int blkcnt); +void f2fs_folio_wait_writeback(struct folio *folio, enum page_type type, + bool ordered, bool locked); +#define f2fs_wait_on_page_writeback(page, type, ordered, locked) \ + f2fs_folio_wait_writeback(page_folio(page), type, ordered, locked) +void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr); +void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr, + block_t len); +void f2fs_write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk); +void f2fs_write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk); +int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type, unsigned int val, int alloc); -void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc); -int build_segment_manager(struct f2fs_sb_info *sbi); -void destroy_segment_manager(struct f2fs_sb_info *sbi); -int __init create_segment_manager_caches(void); -void destroy_segment_manager_caches(void); +void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc); +int f2fs_check_and_fix_write_pointer(struct f2fs_sb_info *sbi); +int f2fs_build_segment_manager(struct f2fs_sb_info *sbi); +void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi); +int __init f2fs_create_segment_manager_caches(void); +void f2fs_destroy_segment_manager_caches(void); +int f2fs_rw_hint_to_seg_type(struct f2fs_sb_info *sbi, enum rw_hint hint); +enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi, + enum page_type type, enum temp_type temp); +unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi); +unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi, + unsigned int segno); +unsigned long long f2fs_get_section_mtime(struct f2fs_sb_info *sbi, + unsigned int segno); + +static inline struct inode *fio_inode(struct f2fs_io_info *fio) +{ + return fio->folio->mapping->host; +} + +#define DEF_FRAGMENT_SIZE 4 +#define MIN_FRAGMENT_SIZE 1 +#define MAX_FRAGMENT_SIZE 512 + +static inline bool f2fs_need_rand_seg(struct f2fs_sb_info *sbi) +{ + return F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_SEG || + F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK; +} /* * checkpoint.c */ -void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io); -struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index); -struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index); -struct page *get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index); -bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type); -int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, +void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io, + unsigned char reason); +void f2fs_flush_ckpt_thread(struct f2fs_sb_info *sbi); +struct folio *f2fs_grab_meta_folio(struct f2fs_sb_info *sbi, pgoff_t index); +struct folio *f2fs_get_meta_folio(struct f2fs_sb_info *sbi, pgoff_t index); +struct folio *f2fs_get_meta_folio_retry(struct f2fs_sb_info *sbi, pgoff_t index); +struct folio *f2fs_get_tmp_folio(struct f2fs_sb_info *sbi, pgoff_t index); +bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, + block_t blkaddr, int type); +bool f2fs_is_valid_blkaddr_raw(struct f2fs_sb_info *sbi, + block_t blkaddr, int type); +int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type, bool sync); -void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index); -long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, - long nr_to_write); -void add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type); -void remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type); -void release_ino_entry(struct f2fs_sb_info *sbi, bool all); -bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode); -int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi); -int acquire_orphan_inode(struct f2fs_sb_info *sbi); -void release_orphan_inode(struct f2fs_sb_info *sbi); -void add_orphan_inode(struct inode *inode); -void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino); -int recover_orphan_inodes(struct f2fs_sb_info *sbi); -int get_valid_checkpoint(struct f2fs_sb_info *sbi); -void update_dirty_page(struct inode *inode, struct page *page); -void remove_dirty_inode(struct inode *inode); -int sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type); -int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc); -void init_ino_entry_info(struct f2fs_sb_info *sbi); -int __init create_checkpoint_caches(void); -void destroy_checkpoint_caches(void); +void f2fs_ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index, + unsigned int ra_blocks); +long f2fs_sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, + long nr_to_write, enum iostat_type io_type); +void f2fs_add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type); +void f2fs_remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type); +void f2fs_release_ino_entry(struct f2fs_sb_info *sbi, bool all); +bool f2fs_exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode); +void f2fs_set_dirty_device(struct f2fs_sb_info *sbi, nid_t ino, + unsigned int devidx, int type); +bool f2fs_is_dirty_device(struct f2fs_sb_info *sbi, nid_t ino, + unsigned int devidx, int type); +int f2fs_acquire_orphan_inode(struct f2fs_sb_info *sbi); +void f2fs_release_orphan_inode(struct f2fs_sb_info *sbi); +void f2fs_add_orphan_inode(struct inode *inode); +void f2fs_remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino); +int f2fs_recover_orphan_inodes(struct f2fs_sb_info *sbi); +int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi); +void f2fs_update_dirty_folio(struct inode *inode, struct folio *folio); +void f2fs_remove_dirty_inode(struct inode *inode); +int f2fs_sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type, + bool from_cp); +void f2fs_wait_on_all_pages(struct f2fs_sb_info *sbi, int type); +u64 f2fs_get_sectors_written(struct f2fs_sb_info *sbi); +int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc); +void f2fs_init_ino_entry_info(struct f2fs_sb_info *sbi); +int __init f2fs_create_checkpoint_caches(void); +void f2fs_destroy_checkpoint_caches(void); +int f2fs_issue_checkpoint(struct f2fs_sb_info *sbi); +int f2fs_start_ckpt_thread(struct f2fs_sb_info *sbi); +void f2fs_stop_ckpt_thread(struct f2fs_sb_info *sbi); +void f2fs_init_ckpt_req_control(struct f2fs_sb_info *sbi); /* * data.c */ +int __init f2fs_init_bioset(void); +void f2fs_destroy_bioset(void); +bool f2fs_is_cp_guaranteed(const struct folio *folio); +int f2fs_init_bio_entry_cache(void); +void f2fs_destroy_bio_entry_cache(void); +void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio, + enum page_type type); +int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi); void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type); void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi, - struct inode *inode, nid_t ino, pgoff_t idx, - enum page_type type); + struct inode *inode, struct folio *folio, + nid_t ino, enum page_type type); +void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi, + struct bio **bio, struct folio *folio); void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi); int f2fs_submit_page_bio(struct f2fs_io_info *fio); -int f2fs_submit_page_write(struct f2fs_io_info *fio); +int f2fs_merge_page_bio(struct f2fs_io_info *fio); +void f2fs_submit_page_write(struct f2fs_io_info *fio); struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi, - block_t blk_addr, struct bio *bio); + block_t blk_addr, sector_t *sector); int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr); -void set_data_blkaddr(struct dnode_of_data *dn); +void f2fs_set_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr); void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr); -int reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count); -int reserve_new_block(struct dnode_of_data *dn); -int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index); -int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from); +int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count); +int f2fs_reserve_new_block(struct dnode_of_data *dn); +int f2fs_get_block_locked(struct dnode_of_data *dn, pgoff_t index); int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index); -struct page *get_read_data_page(struct inode *inode, pgoff_t index, - int op_flags, bool for_write); -struct page *find_data_page(struct inode *inode, pgoff_t index); -struct page *get_lock_data_page(struct inode *inode, pgoff_t index, +struct folio *f2fs_get_read_data_folio(struct inode *inode, pgoff_t index, + blk_opf_t op_flags, bool for_write, pgoff_t *next_pgofs); +struct folio *f2fs_find_data_folio(struct inode *inode, pgoff_t index, + pgoff_t *next_pgofs); +struct folio *f2fs_get_lock_data_folio(struct inode *inode, pgoff_t index, bool for_write); -struct page *get_new_data_page(struct inode *inode, - struct page *ipage, pgoff_t index, bool new_i_size); -int do_write_data_page(struct f2fs_io_info *fio); -int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, - int create, int flag); +struct folio *f2fs_get_new_data_folio(struct inode *inode, + struct folio *ifolio, pgoff_t index, bool new_i_size); +int f2fs_do_write_data_page(struct f2fs_io_info *fio); +int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag); int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len); -void f2fs_set_page_dirty_nobuffers(struct page *page); -void f2fs_invalidate_page(struct page *page, unsigned int offset, - unsigned int length); -int f2fs_release_page(struct page *page, gfp_t wait); -#ifdef CONFIG_MIGRATION -int f2fs_migrate_page(struct address_space *mapping, struct page *newpage, - struct page *page, enum migrate_mode mode); -#endif +int f2fs_encrypt_one_page(struct f2fs_io_info *fio); +bool f2fs_should_update_inplace(struct inode *inode, struct f2fs_io_info *fio); +bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio); +int f2fs_write_single_data_page(struct folio *folio, int *submitted, + struct bio **bio, sector_t *last_block, + struct writeback_control *wbc, + enum iostat_type io_type, + int compr_blocks, bool allow_balance); +void f2fs_write_failed(struct inode *inode, loff_t to); +void f2fs_invalidate_folio(struct folio *folio, size_t offset, size_t length); +bool f2fs_release_folio(struct folio *folio, gfp_t wait); +bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len); +void f2fs_clear_page_cache_dirty_tag(struct folio *folio); +int f2fs_init_post_read_processing(void); +void f2fs_destroy_post_read_processing(void); +int f2fs_init_post_read_wq(struct f2fs_sb_info *sbi); +void f2fs_destroy_post_read_wq(struct f2fs_sb_info *sbi); +extern const struct iomap_ops f2fs_iomap_ops; /* * gc.c */ -int start_gc_thread(struct f2fs_sb_info *sbi); -void stop_gc_thread(struct f2fs_sb_info *sbi); -block_t start_bidx_of_node(unsigned int node_ofs, struct inode *inode); -int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background, - unsigned int segno); -void build_gc_manager(struct f2fs_sb_info *sbi); +int f2fs_start_gc_thread(struct f2fs_sb_info *sbi); +void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi); +block_t f2fs_start_bidx_of_node(unsigned int node_ofs, struct inode *inode); +int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control); +void f2fs_build_gc_manager(struct f2fs_sb_info *sbi); +int f2fs_gc_range(struct f2fs_sb_info *sbi, + unsigned int start_seg, unsigned int end_seg, + bool dry_run, unsigned int dry_run_sections); +int f2fs_resize_fs(struct file *filp, __u64 block_count); +int __init f2fs_create_garbage_collection_cache(void); +void f2fs_destroy_garbage_collection_cache(void); +/* victim selection function for cleaning and SSR */ +int f2fs_get_victim(struct f2fs_sb_info *sbi, unsigned int *result, + int gc_type, int type, char alloc_mode, + unsigned long long age, bool one_time); /* * recovery.c */ -int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only); -bool space_for_roll_forward(struct f2fs_sb_info *sbi); +int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only); +bool f2fs_space_for_roll_forward(struct f2fs_sb_info *sbi); +int __init f2fs_create_recovery_cache(void); +void f2fs_destroy_recovery_cache(void); /* * debug.c */ #ifdef CONFIG_F2FS_STAT_FS +enum { + DEVSTAT_INUSE, + DEVSTAT_DIRTY, + DEVSTAT_FULL, + DEVSTAT_FREE, + DEVSTAT_PREFREE, + DEVSTAT_MAX, +}; + +struct f2fs_dev_stats { + unsigned int devstats[2][DEVSTAT_MAX]; /* 0: segs, 1: secs */ +}; + struct f2fs_stat_info { struct list_head stat_list; struct f2fs_sb_info *sbi; int all_area_segs, sit_area_segs, nat_area_segs, ssa_area_segs; int main_area_segs, main_area_sections, main_area_zones; - unsigned long long hit_largest, hit_cached, hit_rbtree; - unsigned long long hit_total, total_ext; - int ext_tree, zombie_tree, ext_node; - int ndirty_node, ndirty_dent, ndirty_meta, ndirty_data, ndirty_imeta; - int inmem_pages; + unsigned long long hit_cached[NR_EXTENT_CACHES]; + unsigned long long hit_rbtree[NR_EXTENT_CACHES]; + unsigned long long total_ext[NR_EXTENT_CACHES]; + unsigned long long hit_total[NR_EXTENT_CACHES]; + int ext_tree[NR_EXTENT_CACHES]; + int zombie_tree[NR_EXTENT_CACHES]; + int ext_node[NR_EXTENT_CACHES]; + /* to count memory footprint */ + unsigned long long ext_mem[NR_EXTENT_CACHES]; + /* for read extent cache */ + unsigned long long hit_largest; + /* for block age extent cache */ + unsigned long long allocated_data_blocks; + int ndirty_node, ndirty_dent, ndirty_meta, ndirty_imeta; + int ndirty_data, ndirty_qdata; unsigned int ndirty_dirs, ndirty_files, ndirty_all; + unsigned int nquota_files, ndonate_files; int nats, dirty_nats, sits, dirty_sits; int free_nids, avail_nids, alloc_nids; int total_count, utilization; - int bg_gc, nr_wb_cp_data, nr_wb_data; - int nr_flushing, nr_flushed, nr_discarding, nr_discarded; + int nr_wb_cp_data, nr_wb_data; + int nr_rd_data, nr_rd_node, nr_rd_meta; + int nr_dio_read, nr_dio_write; + unsigned int io_skip_bggc, other_skip_bggc; + int nr_flushing, nr_flushed, flush_list_empty; + int nr_discarding, nr_discarded; int nr_discard_cmd; unsigned int undiscard_blks; + int nr_issued_ckpt, nr_total_ckpt, nr_queued_ckpt; + unsigned int cur_ckpt_time, peak_ckpt_time; int inline_xattr, inline_inode, inline_dir, append, update, orphans; - int aw_cnt, max_aw_cnt, vw_cnt, max_vw_cnt; + int compr_inode, swapfile_inode; + unsigned long long compr_blocks; + int aw_cnt, max_aw_cnt; unsigned int valid_count, valid_node_count, valid_inode_count, discard_blks; unsigned int bimodal, avg_vblocks; int util_free, util_valid, util_invalid; int rsvd_segs, overp_segs; - int dirty_count, node_pages, meta_pages; - int prefree_count, call_count, cp_count, bg_cp_count; - int tot_segs, node_segs, data_segs, free_segs, free_secs; - int bg_node_segs, bg_data_segs; + int dirty_count, node_pages, meta_pages, compress_pages; + int compress_page_hit; + int prefree_count, free_segs, free_secs; + int cp_call_count[MAX_CALL_TYPE], cp_count; + int gc_call_count[MAX_CALL_TYPE]; + int gc_segs[2][2]; + int gc_secs[2][2]; int tot_blks, data_blks, node_blks; int bg_data_blks, bg_node_blks; + int blkoff[NR_CURSEG_TYPE]; int curseg[NR_CURSEG_TYPE]; int cursec[NR_CURSEG_TYPE]; int curzone[NR_CURSEG_TYPE]; + unsigned int dirty_seg[NR_CURSEG_TYPE]; + unsigned int full_seg[NR_CURSEG_TYPE]; + unsigned int valid_blks[NR_CURSEG_TYPE]; + unsigned int meta_count[META_MAX]; unsigned int segment_count[2]; unsigned int block_count[2]; unsigned int inplace_count; unsigned long long base_mem, cache_mem, page_mem; + struct f2fs_dev_stats *dev_stats; }; static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) @@ -2503,16 +4215,17 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) return (struct f2fs_stat_info *)sbi->stat_info; } -#define stat_inc_cp_count(si) ((si)->cp_count++) -#define stat_inc_bg_cp_count(si) ((si)->bg_cp_count++) -#define stat_inc_call_count(si) ((si)->call_count++) -#define stat_inc_bggc_count(sbi) ((sbi)->bg_gc++) +#define stat_inc_cp_call_count(sbi, foreground) \ + atomic_inc(&sbi->cp_call_count[(foreground)]) +#define stat_inc_cp_count(sbi) (F2FS_STAT(sbi)->cp_count++) +#define stat_io_skip_bggc_count(sbi) ((sbi)->io_skip_bggc++) +#define stat_other_skip_bggc_count(sbi) ((sbi)->other_skip_bggc++) #define stat_inc_dirty_inode(sbi, type) ((sbi)->ndirty_inode[type]++) #define stat_dec_dirty_inode(sbi, type) ((sbi)->ndirty_inode[type]--) -#define stat_inc_total_hit(sbi) (atomic64_inc(&(sbi)->total_hit_ext)) -#define stat_inc_rbtree_node_hit(sbi) (atomic64_inc(&(sbi)->read_hit_rbtree)) +#define stat_inc_total_hit(sbi, type) (atomic64_inc(&(sbi)->total_hit_ext[type])) +#define stat_inc_rbtree_node_hit(sbi, type) (atomic64_inc(&(sbi)->read_hit_rbtree[type])) #define stat_inc_largest_node_hit(sbi) (atomic64_inc(&(sbi)->read_hit_largest)) -#define stat_inc_cached_node_hit(sbi) (atomic64_inc(&(sbi)->read_hit_cached)) +#define stat_inc_cached_node_hit(sbi, type) (atomic64_inc(&(sbi)->read_hit_cached[type])) #define stat_inc_inline_xattr(inode) \ do { \ if (f2fs_has_inline_xattr(inode)) \ @@ -2543,46 +4256,58 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) if (f2fs_has_inline_dentry(inode)) \ (atomic_dec(&F2FS_I_SB(inode)->inline_dir)); \ } while (0) +#define stat_inc_compr_inode(inode) \ + do { \ + if (f2fs_compressed_file(inode)) \ + (atomic_inc(&F2FS_I_SB(inode)->compr_inode)); \ + } while (0) +#define stat_dec_compr_inode(inode) \ + do { \ + if (f2fs_compressed_file(inode)) \ + (atomic_dec(&F2FS_I_SB(inode)->compr_inode)); \ + } while (0) +#define stat_add_compr_blocks(inode, blocks) \ + (atomic64_add(blocks, &F2FS_I_SB(inode)->compr_blocks)) +#define stat_sub_compr_blocks(inode, blocks) \ + (atomic64_sub(blocks, &F2FS_I_SB(inode)->compr_blocks)) +#define stat_inc_swapfile_inode(inode) \ + (atomic_inc(&F2FS_I_SB(inode)->swapfile_inode)) +#define stat_dec_swapfile_inode(inode) \ + (atomic_dec(&F2FS_I_SB(inode)->swapfile_inode)) +#define stat_inc_atomic_inode(inode) \ + (atomic_inc(&F2FS_I_SB(inode)->atomic_files)) +#define stat_dec_atomic_inode(inode) \ + (atomic_dec(&F2FS_I_SB(inode)->atomic_files)) +#define stat_inc_meta_count(sbi, blkaddr) \ + do { \ + if (blkaddr < SIT_I(sbi)->sit_base_addr) \ + atomic_inc(&(sbi)->meta_count[META_CP]); \ + else if (blkaddr < NM_I(sbi)->nat_blkaddr) \ + atomic_inc(&(sbi)->meta_count[META_SIT]); \ + else if (blkaddr < SM_I(sbi)->ssa_blkaddr) \ + atomic_inc(&(sbi)->meta_count[META_NAT]); \ + else if (blkaddr < SM_I(sbi)->main_blkaddr) \ + atomic_inc(&(sbi)->meta_count[META_SSA]); \ + } while (0) #define stat_inc_seg_type(sbi, curseg) \ ((sbi)->segment_count[(curseg)->alloc_type]++) #define stat_inc_block_count(sbi, curseg) \ ((sbi)->block_count[(curseg)->alloc_type]++) #define stat_inc_inplace_blocks(sbi) \ (atomic_inc(&(sbi)->inplace_count)) -#define stat_inc_atomic_write(inode) \ - (atomic_inc(&F2FS_I_SB(inode)->aw_cnt)) -#define stat_dec_atomic_write(inode) \ - (atomic_dec(&F2FS_I_SB(inode)->aw_cnt)) #define stat_update_max_atomic_write(inode) \ do { \ - int cur = atomic_read(&F2FS_I_SB(inode)->aw_cnt); \ + int cur = atomic_read(&F2FS_I_SB(inode)->atomic_files); \ int max = atomic_read(&F2FS_I_SB(inode)->max_aw_cnt); \ if (cur > max) \ atomic_set(&F2FS_I_SB(inode)->max_aw_cnt, cur); \ } while (0) -#define stat_inc_volatile_write(inode) \ - (atomic_inc(&F2FS_I_SB(inode)->vw_cnt)) -#define stat_dec_volatile_write(inode) \ - (atomic_dec(&F2FS_I_SB(inode)->vw_cnt)) -#define stat_update_max_volatile_write(inode) \ - do { \ - int cur = atomic_read(&F2FS_I_SB(inode)->vw_cnt); \ - int max = atomic_read(&F2FS_I_SB(inode)->max_vw_cnt); \ - if (cur > max) \ - atomic_set(&F2FS_I_SB(inode)->max_vw_cnt, cur); \ - } while (0) -#define stat_inc_seg_count(sbi, type, gc_type) \ - do { \ - struct f2fs_stat_info *si = F2FS_STAT(sbi); \ - si->tot_segs++; \ - if ((type) == SUM_TYPE_DATA) { \ - si->data_segs++; \ - si->bg_data_segs += (gc_type == BG_GC) ? 1 : 0; \ - } else { \ - si->node_segs++; \ - si->bg_node_segs += (gc_type == BG_GC) ? 1 : 0; \ - } \ - } while (0) +#define stat_inc_gc_call_count(sbi, foreground) \ + (F2FS_STAT(sbi)->gc_call_count[(foreground)]++) +#define stat_inc_gc_sec_count(sbi, type, gc_type) \ + (F2FS_STAT(sbi)->gc_secs[(type)][(gc_type)]++) +#define stat_inc_gc_seg_count(sbi, type, gc_type) \ + (F2FS_STAT(sbi)->gc_segs[(type)][(gc_type)]++) #define stat_inc_tot_blk_count(si, blks) \ ((si)->tot_blks += (blks)) @@ -2605,43 +4330,51 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) int f2fs_build_stats(struct f2fs_sb_info *sbi); void f2fs_destroy_stats(struct f2fs_sb_info *sbi); -int __init f2fs_create_root_stats(void); +void __init f2fs_create_root_stats(void); void f2fs_destroy_root_stats(void); +void f2fs_update_sit_info(struct f2fs_sb_info *sbi); #else -#define stat_inc_cp_count(si) do { } while (0) -#define stat_inc_bg_cp_count(si) do { } while (0) -#define stat_inc_call_count(si) do { } while (0) -#define stat_inc_bggc_count(si) do { } while (0) +#define stat_inc_cp_call_count(sbi, foreground) do { } while (0) +#define stat_inc_cp_count(sbi) do { } while (0) +#define stat_io_skip_bggc_count(sbi) do { } while (0) +#define stat_other_skip_bggc_count(sbi) do { } while (0) #define stat_inc_dirty_inode(sbi, type) do { } while (0) #define stat_dec_dirty_inode(sbi, type) do { } while (0) -#define stat_inc_total_hit(sb) do { } while (0) -#define stat_inc_rbtree_node_hit(sb) do { } while (0) +#define stat_inc_total_hit(sbi, type) do { } while (0) +#define stat_inc_rbtree_node_hit(sbi, type) do { } while (0) #define stat_inc_largest_node_hit(sbi) do { } while (0) -#define stat_inc_cached_node_hit(sbi) do { } while (0) +#define stat_inc_cached_node_hit(sbi, type) do { } while (0) #define stat_inc_inline_xattr(inode) do { } while (0) #define stat_dec_inline_xattr(inode) do { } while (0) #define stat_inc_inline_inode(inode) do { } while (0) #define stat_dec_inline_inode(inode) do { } while (0) #define stat_inc_inline_dir(inode) do { } while (0) #define stat_dec_inline_dir(inode) do { } while (0) -#define stat_inc_atomic_write(inode) do { } while (0) -#define stat_dec_atomic_write(inode) do { } while (0) +#define stat_inc_compr_inode(inode) do { } while (0) +#define stat_dec_compr_inode(inode) do { } while (0) +#define stat_add_compr_blocks(inode, blocks) do { } while (0) +#define stat_sub_compr_blocks(inode, blocks) do { } while (0) +#define stat_inc_swapfile_inode(inode) do { } while (0) +#define stat_dec_swapfile_inode(inode) do { } while (0) +#define stat_inc_atomic_inode(inode) do { } while (0) +#define stat_dec_atomic_inode(inode) do { } while (0) #define stat_update_max_atomic_write(inode) do { } while (0) -#define stat_inc_volatile_write(inode) do { } while (0) -#define stat_dec_volatile_write(inode) do { } while (0) -#define stat_update_max_volatile_write(inode) do { } while (0) +#define stat_inc_meta_count(sbi, blkaddr) do { } while (0) #define stat_inc_seg_type(sbi, curseg) do { } while (0) #define stat_inc_block_count(sbi, curseg) do { } while (0) #define stat_inc_inplace_blocks(sbi) do { } while (0) -#define stat_inc_seg_count(sbi, type, gc_type) do { } while (0) +#define stat_inc_gc_call_count(sbi, foreground) do { } while (0) +#define stat_inc_gc_sec_count(sbi, type, gc_type) do { } while (0) +#define stat_inc_gc_seg_count(sbi, type, gc_type) do { } while (0) #define stat_inc_tot_blk_count(si, blks) do { } while (0) #define stat_inc_data_blk_count(sbi, blks, gc_type) do { } while (0) #define stat_inc_node_blk_count(sbi, blks, gc_type) do { } while (0) static inline int f2fs_build_stats(struct f2fs_sb_info *sbi) { return 0; } static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { } -static inline int __init f2fs_create_root_stats(void) { return 0; } +static inline void __init f2fs_create_root_stats(void) { } static inline void f2fs_destroy_root_stats(void) { } +static inline void f2fs_update_sit_info(struct f2fs_sb_info *sbi) {} #endif extern const struct file_operations f2fs_dir_operations; @@ -2654,29 +4387,32 @@ extern const struct inode_operations f2fs_dir_inode_operations; extern const struct inode_operations f2fs_symlink_inode_operations; extern const struct inode_operations f2fs_encrypted_symlink_inode_operations; extern const struct inode_operations f2fs_special_inode_operations; -extern struct kmem_cache *inode_entry_slab; +extern struct kmem_cache *f2fs_inode_entry_slab; /* * inline.c */ bool f2fs_may_inline_data(struct inode *inode); +bool f2fs_sanity_check_inline_data(struct inode *inode, struct folio *ifolio); bool f2fs_may_inline_dentry(struct inode *inode); -void read_inline_data(struct page *page, struct page *ipage); -void truncate_inline_inode(struct inode *inode, struct page *ipage, u64 from); -int f2fs_read_inline_data(struct inode *inode, struct page *page); -int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page); +void f2fs_do_read_inline_data(struct folio *folio, struct folio *ifolio); +void f2fs_truncate_inline_inode(struct inode *inode, struct folio *ifolio, + u64 from); +int f2fs_read_inline_data(struct inode *inode, struct folio *folio); +int f2fs_convert_inline_folio(struct dnode_of_data *dn, struct folio *folio); int f2fs_convert_inline_inode(struct inode *inode); -int f2fs_write_inline_data(struct inode *inode, struct page *page); -bool recover_inline_data(struct inode *inode, struct page *npage); -struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir, - struct fscrypt_name *fname, struct page **res_page); -int make_empty_inline_dir(struct inode *inode, struct inode *parent, - struct page *ipage); -int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, - const struct qstr *orig_name, +int f2fs_try_convert_inline_dir(struct inode *dir, struct dentry *dentry); +int f2fs_write_inline_data(struct inode *inode, struct folio *folio); +int f2fs_recover_inline_data(struct inode *inode, struct folio *nfolio); +struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir, + const struct f2fs_filename *fname, struct folio **res_folio, + bool use_hash); +int f2fs_make_empty_inline_dir(struct inode *inode, struct inode *parent, + struct folio *ifolio); +int f2fs_add_inline_entry(struct inode *dir, const struct f2fs_filename *fname, struct inode *inode, nid_t ino, umode_t mode); -void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page, - struct inode *dir, struct inode *inode); +void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, + struct folio *folio, struct inode *dir, struct inode *inode); bool f2fs_empty_inline_dir(struct inode *dir); int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx, struct fscrypt_str *fstr); @@ -2691,121 +4427,584 @@ unsigned long f2fs_shrink_count(struct shrinker *shrink, struct shrink_control *sc); unsigned long f2fs_shrink_scan(struct shrinker *shrink, struct shrink_control *sc); +unsigned int f2fs_donate_files(void); +void f2fs_reclaim_caches(unsigned int reclaim_caches_kb); void f2fs_join_shrinker(struct f2fs_sb_info *sbi); void f2fs_leave_shrinker(struct f2fs_sb_info *sbi); /* * extent_cache.c */ -struct rb_entry *__lookup_rb_tree(struct rb_root *root, - struct rb_entry *cached_re, unsigned int ofs); -struct rb_node **__lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi, - struct rb_root *root, struct rb_node **parent, - unsigned int ofs); -struct rb_entry *__lookup_rb_tree_ret(struct rb_root *root, - struct rb_entry *cached_re, unsigned int ofs, - struct rb_entry **prev_entry, struct rb_entry **next_entry, - struct rb_node ***insert_p, struct rb_node **insert_parent, - bool force); -bool __check_rb_tree_consistence(struct f2fs_sb_info *sbi, - struct rb_root *root); -unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink); -bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext); +bool sanity_check_extent_cache(struct inode *inode, struct folio *ifolio); +void f2fs_init_extent_tree(struct inode *inode); void f2fs_drop_extent_tree(struct inode *inode); -unsigned int f2fs_destroy_extent_node(struct inode *inode); +void f2fs_destroy_extent_node(struct inode *inode); void f2fs_destroy_extent_tree(struct inode *inode); -bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs, +void f2fs_init_extent_cache_info(struct f2fs_sb_info *sbi); +int __init f2fs_create_extent_cache(void); +void f2fs_destroy_extent_cache(void); + +/* read extent cache ops */ +void f2fs_init_read_extent_tree(struct inode *inode, struct folio *ifolio); +bool f2fs_lookup_read_extent_cache(struct inode *inode, pgoff_t pgofs, struct extent_info *ei); -void f2fs_update_extent_cache(struct dnode_of_data *dn); -void f2fs_update_extent_cache_range(struct dnode_of_data *dn, +bool f2fs_lookup_read_extent_cache_block(struct inode *inode, pgoff_t index, + block_t *blkaddr); +void f2fs_update_read_extent_cache(struct dnode_of_data *dn); +void f2fs_update_read_extent_cache_range(struct dnode_of_data *dn, pgoff_t fofs, block_t blkaddr, unsigned int len); -void init_extent_cache_info(struct f2fs_sb_info *sbi); -int __init create_extent_cache(void); -void destroy_extent_cache(void); +unsigned int f2fs_shrink_read_extent_tree(struct f2fs_sb_info *sbi, + int nr_shrink); + +/* block age extent cache ops */ +void f2fs_init_age_extent_tree(struct inode *inode); +bool f2fs_lookup_age_extent_cache(struct inode *inode, pgoff_t pgofs, + struct extent_info *ei); +void f2fs_update_age_extent_cache(struct dnode_of_data *dn); +void f2fs_update_age_extent_cache_range(struct dnode_of_data *dn, + pgoff_t fofs, unsigned int len); +unsigned int f2fs_shrink_age_extent_tree(struct f2fs_sb_info *sbi, + int nr_shrink); /* * sysfs.c */ -int __init f2fs_register_sysfs(void); -void f2fs_unregister_sysfs(void); -int f2fs_init_sysfs(struct f2fs_sb_info *sbi); -void f2fs_exit_sysfs(struct f2fs_sb_info *sbi); +#define MIN_RA_MUL 2 +#define MAX_RA_MUL 256 + +int __init f2fs_init_sysfs(void); +void f2fs_exit_sysfs(void); +int f2fs_register_sysfs(struct f2fs_sb_info *sbi); +void f2fs_unregister_sysfs(struct f2fs_sb_info *sbi); + +/* verity.c */ +extern const struct fsverity_operations f2fs_verityops; /* * crypto support */ -static inline bool f2fs_encrypted_inode(struct inode *inode) +static inline bool f2fs_encrypted_file(struct inode *inode) { - return file_is_encrypt(inode); + return IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode); } static inline void f2fs_set_encrypted_inode(struct inode *inode) { -#ifdef CONFIG_F2FS_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION file_set_encrypt(inode); + f2fs_set_inode_flags(inode); #endif } -static inline bool f2fs_bio_encrypted(struct bio *bio) +/* + * Returns true if the reads of the inode's data need to undergo some + * postprocessing step, like decryption or authenticity verification. + */ +static inline bool f2fs_post_read_required(struct inode *inode) { - return bio->bi_private != NULL; + return f2fs_encrypted_file(inode) || fsverity_active(inode) || + f2fs_compressed_file(inode); } -static inline int f2fs_sb_has_crypto(struct super_block *sb) +static inline bool f2fs_used_in_atomic_write(struct inode *inode) { - return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_ENCRYPT); + return f2fs_is_atomic_file(inode) || f2fs_is_cow_file(inode); } -static inline int f2fs_sb_mounted_blkzoned(struct super_block *sb) +static inline bool f2fs_meta_inode_gc_required(struct inode *inode) { - return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_BLKZONED); + return f2fs_post_read_required(inode) || f2fs_used_in_atomic_write(inode); +} + +/* + * compress.c + */ +#ifdef CONFIG_F2FS_FS_COMPRESSION +enum cluster_check_type { + CLUSTER_IS_COMPR, /* check only if compressed cluster */ + CLUSTER_COMPR_BLKS, /* return # of compressed blocks in a cluster */ + CLUSTER_RAW_BLKS /* return # of raw blocks in a cluster */ +}; +bool f2fs_is_compressed_page(struct folio *folio); +struct folio *f2fs_compress_control_folio(struct folio *folio); +int f2fs_prepare_compress_overwrite(struct inode *inode, + struct page **pagep, pgoff_t index, void **fsdata); +bool f2fs_compress_write_end(struct inode *inode, void *fsdata, + pgoff_t index, unsigned copied); +int f2fs_truncate_partial_cluster(struct inode *inode, u64 from, bool lock); +void f2fs_compress_write_end_io(struct bio *bio, struct folio *folio); +bool f2fs_is_compress_backend_ready(struct inode *inode); +bool f2fs_is_compress_level_valid(int alg, int lvl); +int __init f2fs_init_compress_mempool(void); +void f2fs_destroy_compress_mempool(void); +void f2fs_decompress_cluster(struct decompress_io_ctx *dic, bool in_task); +void f2fs_end_read_compressed_page(struct folio *folio, bool failed, + block_t blkaddr, bool in_task); +bool f2fs_cluster_is_empty(struct compress_ctx *cc); +bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index); +bool f2fs_all_cluster_page_ready(struct compress_ctx *cc, struct page **pages, + int index, int nr_pages, bool uptodate); +bool f2fs_sanity_check_cluster(struct dnode_of_data *dn); +void f2fs_compress_ctx_add_page(struct compress_ctx *cc, struct folio *folio); +int f2fs_write_multi_pages(struct compress_ctx *cc, + int *submitted, + struct writeback_control *wbc, + enum iostat_type io_type); +int f2fs_is_compressed_cluster(struct inode *inode, pgoff_t index); +bool f2fs_is_sparse_cluster(struct inode *inode, pgoff_t index); +void f2fs_update_read_extent_tree_range_compressed(struct inode *inode, + pgoff_t fofs, block_t blkaddr, + unsigned int llen, unsigned int c_len); +int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, + unsigned nr_pages, sector_t *last_block_in_bio, + struct readahead_control *rac, bool for_write); +struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc); +void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed, + bool in_task); +void f2fs_put_folio_dic(struct folio *folio, bool in_task); +unsigned int f2fs_cluster_blocks_are_contiguous(struct dnode_of_data *dn, + unsigned int ofs_in_node); +int f2fs_init_compress_ctx(struct compress_ctx *cc); +void f2fs_destroy_compress_ctx(struct compress_ctx *cc, bool reuse); +void f2fs_init_compress_info(struct f2fs_sb_info *sbi); +int f2fs_init_compress_inode(struct f2fs_sb_info *sbi); +void f2fs_destroy_compress_inode(struct f2fs_sb_info *sbi); +int f2fs_init_page_array_cache(struct f2fs_sb_info *sbi); +void f2fs_destroy_page_array_cache(struct f2fs_sb_info *sbi); +int __init f2fs_init_compress_cache(void); +void f2fs_destroy_compress_cache(void); +struct address_space *COMPRESS_MAPPING(struct f2fs_sb_info *sbi); +void f2fs_invalidate_compress_pages_range(struct f2fs_sb_info *sbi, + block_t blkaddr, unsigned int len); +bool f2fs_load_compressed_folio(struct f2fs_sb_info *sbi, struct folio *folio, + block_t blkaddr); +void f2fs_invalidate_compress_pages(struct f2fs_sb_info *sbi, nid_t ino); +#define inc_compr_inode_stat(inode) \ + do { \ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); \ + sbi->compr_new_inode++; \ + } while (0) +#define add_compr_block_stat(inode, blocks) \ + do { \ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); \ + int diff = F2FS_I(inode)->i_cluster_size - blocks; \ + sbi->compr_written_block += blocks; \ + sbi->compr_saved_block += diff; \ + } while (0) +#else +static inline bool f2fs_is_compressed_page(struct folio *folio) { return false; } +static inline bool f2fs_is_compress_backend_ready(struct inode *inode) +{ + if (!f2fs_compressed_file(inode)) + return true; + /* not support compression */ + return false; +} +static inline bool f2fs_is_compress_level_valid(int alg, int lvl) { return false; } +static inline struct folio *f2fs_compress_control_folio(struct folio *folio) +{ + WARN_ON_ONCE(1); + return ERR_PTR(-EINVAL); +} +static inline int __init f2fs_init_compress_mempool(void) { return 0; } +static inline void f2fs_destroy_compress_mempool(void) { } +static inline void f2fs_decompress_cluster(struct decompress_io_ctx *dic, + bool in_task) { } +static inline void f2fs_end_read_compressed_page(struct folio *folio, + bool failed, block_t blkaddr, bool in_task) +{ + WARN_ON_ONCE(1); +} +static inline void f2fs_put_folio_dic(struct folio *folio, bool in_task) +{ + WARN_ON_ONCE(1); +} +static inline unsigned int f2fs_cluster_blocks_are_contiguous( + struct dnode_of_data *dn, unsigned int ofs_in_node) { return 0; } +static inline bool f2fs_sanity_check_cluster(struct dnode_of_data *dn) { return false; } +static inline int f2fs_init_compress_inode(struct f2fs_sb_info *sbi) { return 0; } +static inline void f2fs_destroy_compress_inode(struct f2fs_sb_info *sbi) { } +static inline int f2fs_init_page_array_cache(struct f2fs_sb_info *sbi) { return 0; } +static inline void f2fs_destroy_page_array_cache(struct f2fs_sb_info *sbi) { } +static inline int __init f2fs_init_compress_cache(void) { return 0; } +static inline void f2fs_destroy_compress_cache(void) { } +static inline void f2fs_invalidate_compress_pages_range(struct f2fs_sb_info *sbi, + block_t blkaddr, unsigned int len) { } +static inline bool f2fs_load_compressed_folio(struct f2fs_sb_info *sbi, + struct folio *folio, block_t blkaddr) { return false; } +static inline void f2fs_invalidate_compress_pages(struct f2fs_sb_info *sbi, + nid_t ino) { } +#define inc_compr_inode_stat(inode) do { } while (0) +static inline int f2fs_is_compressed_cluster( + struct inode *inode, + pgoff_t index) { return 0; } +static inline bool f2fs_is_sparse_cluster( + struct inode *inode, + pgoff_t index) { return true; } +static inline void f2fs_update_read_extent_tree_range_compressed( + struct inode *inode, + pgoff_t fofs, block_t blkaddr, + unsigned int llen, unsigned int c_len) { } +#endif + +static inline int set_compress_context(struct inode *inode) +{ +#ifdef CONFIG_F2FS_FS_COMPRESSION + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct f2fs_inode_info *fi = F2FS_I(inode); + + fi->i_compress_algorithm = F2FS_OPTION(sbi).compress_algorithm; + fi->i_log_cluster_size = F2FS_OPTION(sbi).compress_log_size; + fi->i_compress_flag = F2FS_OPTION(sbi).compress_chksum ? + BIT(COMPRESS_CHKSUM) : 0; + fi->i_cluster_size = BIT(fi->i_log_cluster_size); + if ((fi->i_compress_algorithm == COMPRESS_LZ4 || + fi->i_compress_algorithm == COMPRESS_ZSTD) && + F2FS_OPTION(sbi).compress_level) + fi->i_compress_level = F2FS_OPTION(sbi).compress_level; + fi->i_flags |= F2FS_COMPR_FL; + set_inode_flag(inode, FI_COMPRESSED_FILE); + stat_inc_compr_inode(inode); + inc_compr_inode_stat(inode); + f2fs_mark_inode_dirty_sync(inode, true); + return 0; +#else + return -EOPNOTSUPP; +#endif } +static inline bool f2fs_disable_compressed_file(struct inode *inode) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); + + f2fs_down_write(&fi->i_sem); + + if (!f2fs_compressed_file(inode)) { + f2fs_up_write(&fi->i_sem); + return true; + } + if (f2fs_is_mmap_file(inode) || atomic_read(&fi->writeback) || + (S_ISREG(inode->i_mode) && F2FS_HAS_BLOCKS(inode))) { + f2fs_up_write(&fi->i_sem); + return false; + } + + fi->i_flags &= ~F2FS_COMPR_FL; + stat_dec_compr_inode(inode); + clear_inode_flag(inode, FI_COMPRESSED_FILE); + f2fs_mark_inode_dirty_sync(inode, true); + + f2fs_up_write(&fi->i_sem); + return true; +} + +#define F2FS_FEATURE_FUNCS(name, flagname) \ +static inline bool f2fs_sb_has_##name(struct f2fs_sb_info *sbi) \ +{ \ + return F2FS_HAS_FEATURE(sbi, F2FS_FEATURE_##flagname); \ +} + +F2FS_FEATURE_FUNCS(encrypt, ENCRYPT); +F2FS_FEATURE_FUNCS(blkzoned, BLKZONED); +F2FS_FEATURE_FUNCS(extra_attr, EXTRA_ATTR); +F2FS_FEATURE_FUNCS(project_quota, PRJQUOTA); +F2FS_FEATURE_FUNCS(inode_chksum, INODE_CHKSUM); +F2FS_FEATURE_FUNCS(flexible_inline_xattr, FLEXIBLE_INLINE_XATTR); +F2FS_FEATURE_FUNCS(quota_ino, QUOTA_INO); +F2FS_FEATURE_FUNCS(inode_crtime, INODE_CRTIME); +F2FS_FEATURE_FUNCS(lost_found, LOST_FOUND); +F2FS_FEATURE_FUNCS(verity, VERITY); +F2FS_FEATURE_FUNCS(sb_chksum, SB_CHKSUM); +F2FS_FEATURE_FUNCS(casefold, CASEFOLD); +F2FS_FEATURE_FUNCS(compression, COMPRESSION); +F2FS_FEATURE_FUNCS(readonly, RO); +F2FS_FEATURE_FUNCS(device_alias, DEVICE_ALIAS); +F2FS_FEATURE_FUNCS(packed_ssa, PACKED_SSA); + #ifdef CONFIG_BLK_DEV_ZONED -static inline int get_blkz_type(struct f2fs_sb_info *sbi, - struct block_device *bdev, block_t blkaddr) +static inline bool f2fs_zone_is_seq(struct f2fs_sb_info *sbi, int devi, + unsigned int zone) +{ + return test_bit(zone, FDEV(devi).blkz_seq); +} + +static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi, + block_t blkaddr) +{ + return f2fs_zone_is_seq(sbi, devi, blkaddr / sbi->blocks_per_blkz); +} +#endif + +static inline int f2fs_bdev_index(struct f2fs_sb_info *sbi, + struct block_device *bdev) { - unsigned int zno = blkaddr >> sbi->log_blocks_per_blkz; int i; + if (!f2fs_is_multi_device(sbi)) + return 0; + for (i = 0; i < sbi->s_ndevs; i++) if (FDEV(i).bdev == bdev) - return FDEV(i).blkz_type[zno]; - return -EINVAL; + return i; + + WARN_ON(1); + return -1; } + +static inline bool f2fs_hw_should_discard(struct f2fs_sb_info *sbi) +{ + return f2fs_sb_has_blkzoned(sbi); +} + +static inline bool f2fs_bdev_support_discard(struct block_device *bdev) +{ + return bdev_max_discard_sectors(bdev) || bdev_is_zoned(bdev); +} + +static inline bool f2fs_hw_support_discard(struct f2fs_sb_info *sbi) +{ + int i; + + if (!f2fs_is_multi_device(sbi)) + return f2fs_bdev_support_discard(sbi->sb->s_bdev); + + for (i = 0; i < sbi->s_ndevs; i++) + if (f2fs_bdev_support_discard(FDEV(i).bdev)) + return true; + return false; +} + +static inline unsigned int f2fs_hw_discard_granularity(struct f2fs_sb_info *sbi) +{ + int i = 1; + unsigned int discard_granularity = bdev_discard_granularity(sbi->sb->s_bdev); + + if (f2fs_is_multi_device(sbi)) + for (; i < sbi->s_ndevs && !bdev_is_zoned(FDEV(i).bdev); i++) + discard_granularity = max_t(unsigned int, discard_granularity, + bdev_discard_granularity(FDEV(i).bdev)); + return discard_granularity; +} + +static inline bool f2fs_realtime_discard_enable(struct f2fs_sb_info *sbi) +{ + return (test_opt(sbi, DISCARD) && f2fs_hw_support_discard(sbi)) || + f2fs_hw_should_discard(sbi); +} + +static inline bool f2fs_hw_is_readonly(struct f2fs_sb_info *sbi) +{ + int i; + + if (!f2fs_is_multi_device(sbi)) + return bdev_read_only(sbi->sb->s_bdev); + + for (i = 0; i < sbi->s_ndevs; i++) + if (bdev_read_only(FDEV(i).bdev)) + return true; + return false; +} + +static inline bool f2fs_dev_is_readonly(struct f2fs_sb_info *sbi) +{ + return f2fs_sb_has_readonly(sbi) || f2fs_hw_is_readonly(sbi); +} + +static inline bool f2fs_lfs_mode(struct f2fs_sb_info *sbi) +{ + return F2FS_OPTION(sbi).fs_mode == FS_MODE_LFS; +} + +static inline bool f2fs_is_sequential_zone_area(struct f2fs_sb_info *sbi, + block_t blkaddr) +{ + if (f2fs_sb_has_blkzoned(sbi)) { +#ifdef CONFIG_BLK_DEV_ZONED + int devi = f2fs_target_device_index(sbi, blkaddr); + + if (!bdev_is_zoned(FDEV(devi).bdev)) + return false; + + if (f2fs_is_multi_device(sbi)) { + if (blkaddr < FDEV(devi).start_blk || + blkaddr > FDEV(devi).end_blk) { + f2fs_err(sbi, "Invalid block %x", blkaddr); + return false; + } + blkaddr -= FDEV(devi).start_blk; + } + + return f2fs_blkz_is_seq(sbi, devi, blkaddr); +#else + return false; #endif + } + return false; +} -static inline bool f2fs_discard_en(struct f2fs_sb_info *sbi) +static inline bool f2fs_low_mem_mode(struct f2fs_sb_info *sbi) { - struct request_queue *q = bdev_get_queue(sbi->sb->s_bdev); + return F2FS_OPTION(sbi).memory_mode == MEMORY_MODE_LOW; +} - return blk_queue_discard(q) || f2fs_sb_mounted_blkzoned(sbi->sb); +static inline bool f2fs_may_compress(struct inode *inode) +{ + if (IS_SWAPFILE(inode) || f2fs_is_pinned_file(inode) || + f2fs_is_atomic_file(inode) || f2fs_has_inline_data(inode) || + f2fs_is_mmap_file(inode)) + return false; + return S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode); } -static inline void set_opt_mode(struct f2fs_sb_info *sbi, unsigned int mt) +static inline void f2fs_i_compr_blocks_update(struct inode *inode, + u64 blocks, bool add) { - clear_opt(sbi, ADAPTIVE); - clear_opt(sbi, LFS); + struct f2fs_inode_info *fi = F2FS_I(inode); + int diff = fi->i_cluster_size - blocks; - switch (mt) { - case F2FS_MOUNT_ADAPTIVE: - set_opt(sbi, ADAPTIVE); - break; - case F2FS_MOUNT_LFS: - set_opt(sbi, LFS); - break; + /* don't update i_compr_blocks if saved blocks were released */ + if (!add && !atomic_read(&fi->i_compr_blocks)) + return; + + if (add) { + atomic_add(diff, &fi->i_compr_blocks); + stat_add_compr_blocks(inode, diff); + } else { + atomic_sub(diff, &fi->i_compr_blocks); + stat_sub_compr_blocks(inode, diff); } + f2fs_mark_inode_dirty_sync(inode, true); } -static inline bool f2fs_may_encrypt(struct inode *inode) +static inline bool f2fs_allow_multi_device_dio(struct f2fs_sb_info *sbi, + int flag) { -#ifdef CONFIG_F2FS_FS_ENCRYPTION - umode_t mode = inode->i_mode; + if (!f2fs_is_multi_device(sbi)) + return false; + if (flag != F2FS_GET_BLOCK_DIO) + return false; + return sbi->aligned_blksize; +} - return (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)); +static inline bool f2fs_need_verity(const struct inode *inode, pgoff_t idx) +{ + return fsverity_active(inode) && + idx < DIV_ROUND_UP(inode->i_size, PAGE_SIZE); +} + +#ifdef CONFIG_F2FS_FAULT_INJECTION +extern int f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned long rate, + unsigned long type, enum fault_option fo); #else +static inline int f2fs_build_fault_attr(struct f2fs_sb_info *sbi, + unsigned long rate, unsigned long type, + enum fault_option fo) +{ return 0; -#endif } +#endif +static inline bool is_journalled_quota(struct f2fs_sb_info *sbi) +{ +#ifdef CONFIG_QUOTA + if (f2fs_sb_has_quota_ino(sbi)) + return true; + if (F2FS_OPTION(sbi).s_qf_names[USRQUOTA] || + F2FS_OPTION(sbi).s_qf_names[GRPQUOTA] || + F2FS_OPTION(sbi).s_qf_names[PRJQUOTA]) + return true; #endif + return false; +} + +static inline bool f2fs_block_unit_discard(struct f2fs_sb_info *sbi) +{ + return F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_BLOCK; +} + +static inline void __f2fs_schedule_timeout(long timeout, bool io) +{ + set_current_state(TASK_UNINTERRUPTIBLE); + if (io) + io_schedule_timeout(timeout); + else + schedule_timeout(timeout); +} + +#define f2fs_io_schedule_timeout(timeout) \ + __f2fs_schedule_timeout(timeout, true) +#define f2fs_schedule_timeout(timeout) \ + __f2fs_schedule_timeout(timeout, false) + +static inline void f2fs_io_schedule_timeout_killable(long timeout) +{ + while (timeout) { + if (fatal_signal_pending(current)) + return; + set_current_state(TASK_UNINTERRUPTIBLE); + io_schedule_timeout(DEFAULT_SCHEDULE_TIMEOUT); + if (timeout <= DEFAULT_SCHEDULE_TIMEOUT) + return; + timeout -= DEFAULT_SCHEDULE_TIMEOUT; + } +} + +static inline void f2fs_handle_page_eio(struct f2fs_sb_info *sbi, + struct folio *folio, enum page_type type) +{ + pgoff_t ofs = folio->index; + + if (unlikely(f2fs_cp_error(sbi))) + return; + + if (ofs == sbi->page_eio_ofs[type]) { + if (sbi->page_eio_cnt[type]++ == MAX_RETRY_PAGE_EIO) + set_ckpt_flags(sbi, CP_ERROR_FLAG); + } else { + sbi->page_eio_ofs[type] = ofs; + sbi->page_eio_cnt[type] = 0; + } +} + +static inline bool f2fs_is_readonly(struct f2fs_sb_info *sbi) +{ + return f2fs_sb_has_readonly(sbi) || f2fs_readonly(sbi->sb); +} + +static inline void f2fs_truncate_meta_inode_pages(struct f2fs_sb_info *sbi, + block_t blkaddr, unsigned int cnt) +{ + bool need_submit = false; + int i = 0; + + do { + struct folio *folio; + + folio = filemap_get_folio(META_MAPPING(sbi), blkaddr + i); + if (!IS_ERR(folio)) { + if (folio_test_writeback(folio)) + need_submit = true; + f2fs_folio_put(folio, false); + } + } while (++i < cnt && !need_submit); + + if (need_submit) + f2fs_submit_merged_write_cond(sbi, sbi->meta_inode, + NULL, 0, DATA); + + truncate_inode_pages_range(META_MAPPING(sbi), + F2FS_BLK_TO_BYTES((loff_t)blkaddr), + F2FS_BLK_END_BYTES((loff_t)(blkaddr + cnt - 1))); +} + +static inline void f2fs_invalidate_internal_cache(struct f2fs_sb_info *sbi, + block_t blkaddr, unsigned int len) +{ + f2fs_truncate_meta_inode_pages(sbi, blkaddr, len); + f2fs_invalidate_compress_pages_range(sbi, blkaddr, len); +} + +#define EFSBADCRC EBADMSG /* Bad CRC detected */ +#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ + +#endif /* _LINUX_F2FS_H */ |
