summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-08-31 09:41:22 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-08-31 09:41:22 -0700
commit87045e6546078dae215d1bd3b2bc82b3ada3ca77 (patch)
treef3d816b9834ca959514f6e399fb9f505871d2729 /include
parent9c849ce86e0fa93a218614eac562ace44053d7ce (diff)
parent0d977e0eba234e01a60bdde27314dc21374201b3 (diff)
Merge tag 'for-5.15-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba: "The highlights of this round are integrations with fs-verity and idmapped mounts, the rest is usual mix of minor improvements, speedups and cleanups. There are some patches outside of btrfs, namely updating some VFS interfaces, all straightforward and acked. Features: - fs-verity support, using standard ioctls, backward compatible with read-only limitation on inodes with previously enabled fs-verity - idmapped mount support - make mount with rescue=ibadroots more tolerant to partially damaged trees - allow raid0 on a single device and raid10 on two devices, degenerate cases but might be useful as an intermediate step during conversion to other profiles - zoned mode block group auto reclaim can be disabled via sysfs knob Performance improvements: - continue readahead of node siblings even if target node is in memory, could speed up full send (on sample test +11%) - batching of delayed items can speed up creating many files - fsync/tree-log speedups - avoid unnecessary work (gains +2% throughput, -2% run time on sample load) - reduced lock contention on renames (on dbench +4% throughput, up to -30% latency) Fixes: - various zoned mode fixes - preemptive flushing threshold tuning, avoid excessive work on almost full filesystems Core: - continued subpage support, preparation for implementing remaining features like compression and defragmentation; with some limitations, write is now enabled on 64K page systems with 4K sectors, still considered experimental - no readahead on compressed reads - inline extents disabled - disabled raid56 profile conversion and mount - improved flushing logic, fixing early ENOSPC on some workloads - inode flags have been internally split to read-only and read-write incompat bit parts, used by fs-verity - new tree items for fs-verity - descriptor item - Merkle tree item - inode operations extended to be namespace-aware - cleanups and refactoring Generic code changes: - fs: new export filemap_fdatawrite_wbc - fs: removed sync_inode - block: bio_trim argument type fixups - vfs: add namespace-aware lookup" * tag 'for-5.15-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (114 commits) btrfs: reset replace target device to allocation state on close btrfs: zoned: fix ordered extent boundary calculation btrfs: do not do preemptive flushing if the majority is global rsv btrfs: reduce the preemptive flushing threshold to 90% btrfs: tree-log: check btrfs_lookup_data_extent return value btrfs: avoid unnecessarily logging directories that had no changes btrfs: allow idmapped mount btrfs: handle ACLs on idmapped mounts btrfs: allow idmapped INO_LOOKUP_USER ioctl btrfs: allow idmapped SUBVOL_SETFLAGS ioctl btrfs: allow idmapped SET_RECEIVED_SUBVOL ioctls btrfs: relax restrictions for SNAP_DESTROY_V2 with subvolids btrfs: allow idmapped SNAP_DESTROY ioctls btrfs: allow idmapped SNAP_CREATE/SUBVOL_CREATE ioctls btrfs: check whether fsgid/fsuid are mapped during subvolume creation btrfs: allow idmapped permission inode op btrfs: allow idmapped setattr inode op btrfs: allow idmapped tmpfile inode op btrfs: allow idmapped symlink inode op btrfs: allow idmapped mkdir inode op ...
Diffstat (limited to 'include')
-rw-r--r--include/linux/bio.h2
-rw-r--r--include/linux/blk_types.h1
-rw-r--r--include/linux/fs.h3
-rw-r--r--include/linux/namei.h1
-rw-r--r--include/trace/events/btrfs.h21
-rw-r--r--include/uapi/linux/btrfs.h1
-rw-r--r--include/uapi/linux/btrfs_tree.h35
7 files changed, 58 insertions, 6 deletions
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 3d67d0fbc868..00952e92eae1 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -374,7 +374,7 @@ static inline void bip_set_seed(struct bio_integrity_payload *bip,
#endif /* CONFIG_BLK_DEV_INTEGRITY */
-extern void bio_trim(struct bio *bio, int offset, int size);
+void bio_trim(struct bio *bio, sector_t offset, sector_t size);
extern struct bio *bio_split(struct bio *bio, int sectors,
gfp_t gfp, struct bio_set *bs);
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 9e392daa1d7f..be622b5a21ed 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -277,6 +277,7 @@ struct bio {
};
#define BIO_RESET_BYTES offsetof(struct bio, bi_max_vecs)
+#define BIO_MAX_SECTORS (UINT_MAX >> SECTOR_SHIFT)
/*
* bio flags
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c58c2611a195..2c2bcfb12fef 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2500,7 +2500,6 @@ static inline void file_accessed(struct file *file)
extern int file_modified(struct file *file);
-int sync_inode(struct inode *inode, struct writeback_control *wbc);
int sync_inode_metadata(struct inode *inode, int wait);
struct file_system_type {
@@ -2852,6 +2851,8 @@ extern int filemap_fdatawrite_range(struct address_space *mapping,
loff_t start, loff_t end);
extern int filemap_check_errors(struct address_space *mapping);
extern void __filemap_set_wb_err(struct address_space *mapping, int err);
+int filemap_fdatawrite_wbc(struct address_space *mapping,
+ struct writeback_control *wbc);
static inline int filemap_write_and_wait(struct address_space *mapping)
{
diff --git a/include/linux/namei.h b/include/linux/namei.h
index be9a2b349ca7..e89329bb3134 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -68,6 +68,7 @@ extern struct dentry *try_lookup_one_len(const char *, struct dentry *, int);
extern struct dentry *lookup_one_len(const char *, struct dentry *, int);
extern struct dentry *lookup_one_len_unlocked(const char *, struct dentry *, int);
extern struct dentry *lookup_positive_unlocked(const char *, struct dentry *, int);
+struct dentry *lookup_one(struct user_namespace *, const char *, struct dentry *, int);
extern int follow_down_one(struct path *);
extern int follow_down(struct path *);
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index b671b1f2ce0f..8f58fd95efc7 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -94,6 +94,7 @@ struct btrfs_space_info;
EM( FLUSH_DELAYED_ITEMS, "FLUSH_DELAYED_ITEMS") \
EM( FLUSH_DELALLOC, "FLUSH_DELALLOC") \
EM( FLUSH_DELALLOC_WAIT, "FLUSH_DELALLOC_WAIT") \
+ EM( FLUSH_DELALLOC_FULL, "FLUSH_DELALLOC_FULL") \
EM( FLUSH_DELAYED_REFS_NR, "FLUSH_DELAYED_REFS_NR") \
EM( FLUSH_DELAYED_REFS, "FLUSH_ELAYED_REFS") \
EM( ALLOC_CHUNK, "ALLOC_CHUNK") \
@@ -2037,7 +2038,7 @@ TRACE_EVENT(btrfs_convert_extent_bit,
);
DECLARE_EVENT_CLASS(btrfs_dump_space_info,
- TP_PROTO(const struct btrfs_fs_info *fs_info,
+ TP_PROTO(struct btrfs_fs_info *fs_info,
const struct btrfs_space_info *sinfo),
TP_ARGS(fs_info, sinfo),
@@ -2057,6 +2058,8 @@ DECLARE_EVENT_CLASS(btrfs_dump_space_info,
__field( u64, delayed_refs_reserved )
__field( u64, delayed_reserved )
__field( u64, free_chunk_space )
+ __field( u64, delalloc_bytes )
+ __field( u64, ordered_bytes )
),
TP_fast_assign_btrfs(fs_info,
@@ -2074,6 +2077,8 @@ DECLARE_EVENT_CLASS(btrfs_dump_space_info,
__entry->delayed_refs_reserved = fs_info->delayed_refs_rsv.reserved;
__entry->delayed_reserved = fs_info->delayed_block_rsv.reserved;
__entry->free_chunk_space = atomic64_read(&fs_info->free_chunk_space);
+ __entry->delalloc_bytes = percpu_counter_sum_positive(&fs_info->delalloc_bytes);
+ __entry->ordered_bytes = percpu_counter_sum_positive(&fs_info->ordered_bytes);
),
TP_printk_btrfs("flags=%s total_bytes=%llu bytes_used=%llu "
@@ -2081,7 +2086,8 @@ DECLARE_EVENT_CLASS(btrfs_dump_space_info,
"bytes_may_use=%llu bytes_readonly=%llu "
"reclaim_size=%llu clamp=%d global_reserved=%llu "
"trans_reserved=%llu delayed_refs_reserved=%llu "
- "delayed_reserved=%llu chunk_free_space=%llu",
+ "delayed_reserved=%llu chunk_free_space=%llu "
+ "delalloc_bytes=%llu ordered_bytes=%llu",
__print_flags(__entry->flags, "|", BTRFS_GROUP_FLAGS),
__entry->total_bytes, __entry->bytes_used,
__entry->bytes_pinned, __entry->bytes_reserved,
@@ -2089,11 +2095,18 @@ DECLARE_EVENT_CLASS(btrfs_dump_space_info,
__entry->reclaim_size, __entry->clamp,
__entry->global_reserved, __entry->trans_reserved,
__entry->delayed_refs_reserved,
- __entry->delayed_reserved, __entry->free_chunk_space)
+ __entry->delayed_reserved, __entry->free_chunk_space,
+ __entry->delalloc_bytes, __entry->ordered_bytes)
);
DEFINE_EVENT(btrfs_dump_space_info, btrfs_done_preemptive_reclaim,
- TP_PROTO(const struct btrfs_fs_info *fs_info,
+ TP_PROTO(struct btrfs_fs_info *fs_info,
+ const struct btrfs_space_info *sinfo),
+ TP_ARGS(fs_info, sinfo)
+);
+
+DEFINE_EVENT(btrfs_dump_space_info, btrfs_fail_all_tickets,
+ TP_PROTO(struct btrfs_fs_info *fs_info,
const struct btrfs_space_info *sinfo),
TP_ARGS(fs_info, sinfo)
);
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index 22cd037123fa..d7d3cfead056 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -288,6 +288,7 @@ struct btrfs_ioctl_fs_info_args {
* first mount when booting older kernel versions.
*/
#define BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE_VALID (1ULL << 1)
+#define BTRFS_FEATURE_COMPAT_RO_VERITY (1ULL << 2)
#define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0)
#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1)
diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h
index ccdb40fe40dc..e1c4c732aaba 100644
--- a/include/uapi/linux/btrfs_tree.h
+++ b/include/uapi/linux/btrfs_tree.h
@@ -118,6 +118,29 @@
#define BTRFS_INODE_REF_KEY 12
#define BTRFS_INODE_EXTREF_KEY 13
#define BTRFS_XATTR_ITEM_KEY 24
+
+/*
+ * fs verity items are stored under two different key types on disk.
+ * The descriptor items:
+ * [ inode objectid, BTRFS_VERITY_DESC_ITEM_KEY, offset ]
+ *
+ * At offset 0, we store a btrfs_verity_descriptor_item which tracks the size
+ * of the descriptor item and some extra data for encryption.
+ * Starting at offset 1, these hold the generic fs verity descriptor. The
+ * latter are opaque to btrfs, we just read and write them as a blob for the
+ * higher level verity code. The most common descriptor size is 256 bytes.
+ *
+ * The merkle tree items:
+ * [ inode objectid, BTRFS_VERITY_MERKLE_ITEM_KEY, offset ]
+ *
+ * These also start at offset 0, and correspond to the merkle tree bytes. When
+ * fsverity asks for page 0 of the merkle tree, we pull up one page starting at
+ * offset 0 for this key type. These are also opaque to btrfs, we're blindly
+ * storing whatever fsverity sends down.
+ */
+#define BTRFS_VERITY_DESC_ITEM_KEY 36
+#define BTRFS_VERITY_MERKLE_ITEM_KEY 37
+
#define BTRFS_ORPHAN_ITEM_KEY 48
/* reserve 2-15 close to the inode for later flexibility */
@@ -991,4 +1014,16 @@ struct btrfs_qgroup_limit_item {
__le64 rsv_excl;
} __attribute__ ((__packed__));
+struct btrfs_verity_descriptor_item {
+ /* Size of the verity descriptor in bytes */
+ __le64 size;
+ /*
+ * When we implement support for fscrypt, we will need to encrypt the
+ * Merkle tree for encrypted verity files. These 128 bits are for the
+ * eventual storage of an fscrypt initialization vector.
+ */
+ __le64 reserved[2];
+ __u8 encryption;
+} __attribute__ ((__packed__));
+
#endif /* _BTRFS_CTREE_H_ */