9 files changed, 109 insertions, 80 deletions
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 97fe3ebf11a2..afd7e50d073d 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -169,7 +169,7 @@ int zstd_compress_pages(struct list_head *ws, struct address_space *mapping,
 		unsigned long *total_in, unsigned long *total_out);
 int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb);
 int zstd_decompress(struct list_head *ws, const u8 *data_in,
-		struct page *dest_page, unsigned long dest_pgoff, size_t srclen,
+		struct page *dest_page, unsigned long start_byte, size_t srclen,
 		size_t destlen);
 void zstd_init_workspace_manager(void);
 void zstd_cleanup_workspace_manager(void);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index b2eeee236e80..8b4bef05e222 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -909,7 +909,7 @@ static int attach_extent_buffer_folio(struct extent_buffer *eb,
 	 * will not race with any other ebs.
 	 */
 	if (folio->mapping)
-		lockdep_assert_held(&folio->mapping->private_lock);
+		lockdep_assert_held(&folio->mapping->i_private_lock);
 
 	if (fs_info->nodesize >= PAGE_SIZE) {
 		if (!folio_test_private(folio))
@@ -1784,16 +1784,16 @@ static int submit_eb_subpage(struct page *page, struct writeback_control *wbc)
 		 * Take private lock to ensure the subpage won't be detached
 		 * in the meantime.
 		 */
-		spin_lock(&page->mapping->private_lock);
+		spin_lock(&page->mapping->i_private_lock);
 		if (!folio_test_private(folio)) {
-			spin_unlock(&page->mapping->private_lock);
+			spin_unlock(&page->mapping->i_private_lock);
 			break;
 		}
 		spin_lock_irqsave(&subpage->lock, flags);
 		if (!test_bit(bit_start + fs_info->subpage_info->dirty_offset,
 			      subpage->bitmaps)) {
 			spin_unlock_irqrestore(&subpage->lock, flags);
-			spin_unlock(&page->mapping->private_lock);
+			spin_unlock(&page->mapping->i_private_lock);
 			bit_start++;
 			continue;
 		}
@@ -1807,7 +1807,7 @@ static int submit_eb_subpage(struct page *page, struct writeback_control *wbc)
 		 */
 		eb = find_extent_buffer_nolock(fs_info, start);
 		spin_unlock_irqrestore(&subpage->lock, flags);
-		spin_unlock(&page->mapping->private_lock);
+		spin_unlock(&page->mapping->i_private_lock);
 
 		/*
 		 * The eb has already reached 0 refs thus find_extent_buffer()
@@ -1860,9 +1860,9 @@ static int submit_eb_page(struct page *page, struct btrfs_eb_write_context *ctx)
 	if (btrfs_sb(page->mapping->host->i_sb)->nodesize < PAGE_SIZE)
 		return submit_eb_subpage(page, wbc);
 
-	spin_lock(&mapping->private_lock);
+	spin_lock(&mapping->i_private_lock);
 	if (!folio_test_private(folio)) {
-		spin_unlock(&mapping->private_lock);
+		spin_unlock(&mapping->i_private_lock);
 		return 0;
 	}
 
@@ -1873,16 +1873,16 @@ static int submit_eb_page(struct page *page, struct btrfs_eb_write_context *ctx)
 	 * crashing the machine for something we can survive anyway.
 	 */
 	if (WARN_ON(!eb)) {
-		spin_unlock(&mapping->private_lock);
+		spin_unlock(&mapping->i_private_lock);
 		return 0;
 	}
 
 	if (eb == ctx->eb) {
-		spin_unlock(&mapping->private_lock);
+		spin_unlock(&mapping->i_private_lock);
 		return 0;
 	}
 	ret = atomic_inc_not_zero(&eb->refs);
-	spin_unlock(&mapping->private_lock);
+	spin_unlock(&mapping->i_private_lock);
 	if (!ret)
 		return 0;
 
@@ -3218,7 +3218,7 @@ static bool folio_range_has_eb(struct btrfs_fs_info *fs_info, struct folio *foli
 {
 	struct btrfs_subpage *subpage;
 
-	lockdep_assert_held(&folio->mapping->private_lock);
+	lockdep_assert_held(&folio->mapping->i_private_lock);
 
 	if (folio_test_private(folio)) {
 		subpage = folio_get_private(folio);
@@ -3241,14 +3241,14 @@ static void detach_extent_buffer_folio(struct extent_buffer *eb, struct folio *f
 
 	/*
 	 * For mapped eb, we're going to change the folio private, which should
-	 * be done under the private_lock.
+	 * be done under the i_private_lock.
 	 */
 	if (mapped)
-		spin_lock(&folio->mapping->private_lock);
+		spin_lock(&folio->mapping->i_private_lock);
 
 	if (!folio_test_private(folio)) {
 		if (mapped)
-			spin_unlock(&folio->mapping->private_lock);
+			spin_unlock(&folio->mapping->i_private_lock);
 		return;
 	}
 
@@ -3268,7 +3268,7 @@ static void detach_extent_buffer_folio(struct extent_buffer *eb, struct folio *f
 			folio_detach_private(folio);
 		}
 		if (mapped)
-			spin_unlock(&folio->mapping->private_lock);
+			spin_unlock(&folio->mapping->i_private_lock);
 		return;
 	}
 
@@ -3291,7 +3291,7 @@ static void detach_extent_buffer_folio(struct extent_buffer *eb, struct folio *f
 	if (!folio_range_has_eb(fs_info, folio))
 		btrfs_detach_subpage(fs_info, folio);
 
-	spin_unlock(&folio->mapping->private_lock);
+	spin_unlock(&folio->mapping->i_private_lock);
 }
 
 /* Release all pages attached to the extent buffer */
@@ -3726,7 +3726,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
 
 	/*
 	 * Preallocate folio private for subpage case, so that we won't
-	 * allocate memory with private_lock nor page lock hold.
+	 * allocate memory with i_private_lock nor page lock hold.
 	 *
 	 * The memory will be freed by attach_extent_buffer_page() or freed
 	 * manually if we exit earlier.
@@ -3789,7 +3789,7 @@ reallocate:
 		 * and free the allocated page.
 		 */
 		folio = eb->folios[i];
-		spin_lock(&mapping->private_lock);
+		spin_lock(&mapping->i_private_lock);
 		/* Should not fail, as we have preallocated the memory */
 		ret = attach_extent_buffer_folio(eb, folio, prealloc);
 		ASSERT(!ret);
@@ -3803,7 +3803,7 @@ reallocate:
 		 * Thus needs no special handling in error path.
 		 */
 		btrfs_folio_inc_eb_refs(fs_info, folio);
-		spin_unlock(&mapping->private_lock);
+		spin_unlock(&mapping->i_private_lock);
 
 		WARN_ON(btrfs_folio_test_dirty(fs_info, folio, eb->start, eb->len));
 
@@ -4895,12 +4895,12 @@ static int try_release_subpage_extent_buffer(struct page *page)
 	 * Finally to check if we have cleared folio private, as if we have
 	 * released all ebs in the page, the folio private should be cleared now.
 	 */
-	spin_lock(&page->mapping->private_lock);
+	spin_lock(&page->mapping->i_private_lock);
 	if (!folio_test_private(page_folio(page)))
 		ret = 1;
 	else
 		ret = 0;
-	spin_unlock(&page->mapping->private_lock);
+	spin_unlock(&page->mapping->i_private_lock);
 	return ret;
 
 }
@@ -4917,9 +4917,9 @@ int try_release_extent_buffer(struct page *page)
 	 * We need to make sure nobody is changing folio private, as we rely on
 	 * folio private as the pointer to extent buffer.
 	 */
-	spin_lock(&page->mapping->private_lock);
+	spin_lock(&page->mapping->i_private_lock);
 	if (!folio_test_private(folio)) {
-		spin_unlock(&page->mapping->private_lock);
+		spin_unlock(&page->mapping->i_private_lock);
 		return 1;
 	}
 
@@ -4934,10 +4934,10 @@ int try_release_extent_buffer(struct page *page)
 	spin_lock(&eb->refs_lock);
 	if (atomic_read(&eb->refs) != 1 || extent_buffer_under_io(eb)) {
 		spin_unlock(&eb->refs_lock);
-		spin_unlock(&page->mapping->private_lock);
+		spin_unlock(&page->mapping->i_private_lock);
 		return 0;
 	}
-	spin_unlock(&page->mapping->private_lock);
+	spin_unlock(&page->mapping->i_private_lock);
 
 	/*
 	 * If tree ref isn't set then we know the ref on this eb is a real ref,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index a3248c47012f..4795738d5785 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -11019,7 +11019,7 @@ static const struct address_space_operations btrfs_aops = {
 	.release_folio	= btrfs_release_folio,
 	.migrate_folio	= btrfs_migrate_folio,
 	.dirty_folio	= filemap_dirty_folio,
-	.error_remove_page = generic_error_remove_page,
+	.error_remove_folio = generic_error_remove_folio,
 	.swap_activate	= btrfs_swap_activate,
 	.swap_deactivate = btrfs_swap_deactivate,
 };
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index b004e3b75311..9d1eac15e09e 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1293,6 +1293,15 @@ static noinline int __btrfs_ioctl_snap_create(struct file *file,
 			 * are limited to own subvolumes only
 			 */
 			ret = -EPERM;
+		} else if (btrfs_ino(BTRFS_I(src_inode)) != BTRFS_FIRST_FREE_OBJECTID) {
+			/*
+			 * Snapshots must be made with the src_inode referring
+			 * to the subvolume inode, otherwise the permission
+			 * checking above is useless because we may have
+			 * permission on a lower directory but not the subvol
+			 * itself.
+			 */
+			ret = -EINVAL;
 		} else {
 			ret = btrfs_mksnapshot(&file->f_path, idmap,
 					       name, namelen,
@@ -4536,29 +4545,29 @@ static int btrfs_ioctl_encoded_write(struct file *file, void __user *argp, bool
 	if (ret < 0)
 		goto out_acct;
 
-	file_start_write(file);
-
 	if (iov_iter_count(&iter) == 0) {
 		ret = 0;
-		goto out_end_write;
+		goto out_iov;
 	}
 	pos = args.offset;
 	ret = rw_verify_area(WRITE, file, &pos, args.len);
 	if (ret < 0)
-		goto out_end_write;
+		goto out_iov;
 
 	init_sync_kiocb(&kiocb, file);
 	ret = kiocb_set_rw_flags(&kiocb, 0);
 	if (ret)
-		goto out_end_write;
+		goto out_iov;
 	kiocb.ki_pos = pos;
 
+	file_start_write(file);
+
 	ret = btrfs_do_write_iter(&kiocb, &iter, &args);
 	if (ret > 0)
 		fsnotify_modify(file);
 
-out_end_write:
 	file_end_write(file);
+out_iov:
 	kfree(iov);
 out_acct:
 	if (ret > 0)
diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c
index 277dd6d312ee..0e49dab8dad2 100644
--- a/fs/btrfs/subpage.c
+++ b/fs/btrfs/subpage.c
@@ -197,7 +197,7 @@ void btrfs_folio_inc_eb_refs(const struct btrfs_fs_info *fs_info, struct folio *
 		return;
 
 	ASSERT(folio_test_private(folio) && folio->mapping);
-	lockdep_assert_held(&folio->mapping->private_lock);
+	lockdep_assert_held(&folio->mapping->i_private_lock);
 
 	subpage = folio_get_private(folio);
 	atomic_inc(&subpage->eb_refs);
@@ -211,7 +211,7 @@ void btrfs_folio_dec_eb_refs(const struct btrfs_fs_info *fs_info, struct folio *
 		return;
 
 	ASSERT(folio_test_private(folio) && folio->mapping);
-	lockdep_assert_held(&folio->mapping->private_lock);
+	lockdep_assert_held(&folio->mapping->i_private_lock);
 
 	subpage = folio_get_private(folio);
 	ASSERT(atomic_read(&subpage->eb_refs));
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index f192f8fe0ce6..101f786963d4 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -259,6 +259,12 @@ static const struct fs_parameter_spec btrfs_fs_parameters[] = {
 	{}
 };
 
+/* No support for restricting writes to btrfs devices yet... */
+static inline blk_mode_t btrfs_open_mode(struct fs_context *fc)
+{
+	return sb_open_mode(fc->sb_flags) & ~BLK_OPEN_RESTRICT_WRITES;
+}
+
 static int btrfs_parse_param(struct fs_context *fc, struct fs_parameter *param)
 {
 	struct btrfs_fs_context *ctx = fc->fs_private;
@@ -295,7 +301,7 @@ static int btrfs_parse_param(struct fs_context *fc, struct fs_parameter *param)
 		break;
 	case Opt_device: {
 		struct btrfs_device *device;
-		blk_mode_t mode = sb_open_mode(fc->sb_flags);
+		blk_mode_t mode = btrfs_open_mode(fc);
 
 		mutex_lock(&uuid_mutex);
 		device = btrfs_scan_one_device(param->string, mode, false);
@@ -1794,7 +1800,7 @@ static int btrfs_get_tree_super(struct fs_context *fc)
 	struct block_device *bdev;
 	struct btrfs_device *device;
 	struct super_block *sb;
-	blk_mode_t mode = sb_open_mode(fc->sb_flags);
+	blk_mode_t mode = btrfs_open_mode(fc);
 	int ret;
 
 	btrfs_ctx_to_info(fs_info, ctx);
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index a7f885ae56d3..5f750fa53a2b 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -578,26 +578,12 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
 
 	kvfree(zones);
 
-	switch (bdev_zoned_model(bdev)) {
-	case BLK_ZONED_HM:
+	if (bdev_is_zoned(bdev)) {
 		model = "host-managed zoned";
 		emulated = "";
-		break;
-	case BLK_ZONED_HA:
-		model = "host-aware zoned";
-		emulated = "";
-		break;
-	case BLK_ZONED_NONE:
+	} else {
 		model = "regular";
 		emulated = "emulated ";
-		break;
-	default:
-		/* Just in case */
-		btrfs_err_in_rcu(fs_info, "zoned: unsupported model %d on %s",
-				 bdev_zoned_model(bdev),
-				 rcu_str_deref(device->name));
-		ret = -EOPNOTSUPP;
-		goto out_free_zone_info;
 	}
 
 	btrfs_info_in_rcu(fs_info,
@@ -609,9 +595,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
 
 out:
 	kvfree(zones);
-out_free_zone_info:
 	btrfs_destroy_dev_zone_info(device);
-
 	return ret;
 }
 
@@ -688,8 +672,7 @@ static int btrfs_check_for_zoned_device(struct btrfs_fs_info *fs_info)
 	struct btrfs_device *device;
 
 	list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) {
-		if (device->bdev &&
-		    bdev_zoned_model(device->bdev) == BLK_ZONED_HM) {
+		if (device->bdev && bdev_is_zoned(device->bdev)) {
 			btrfs_err(fs_info,
 				"zoned: mode not enabled but zoned device found: %pg",
 				device->bdev);
diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h
index f24a5ffb7807..f573bda496fb 100644
--- a/fs/btrfs/zoned.h
+++ b/fs/btrfs/zoned.h
@@ -320,7 +320,7 @@ static inline bool btrfs_check_device_zone_type(const struct btrfs_fs_info *fs_i
 	}
 
 	/* Do not allow Host Managed zoned device. */
-	return bdev_zoned_model(bdev) != BLK_ZONED_HM;
+	return !bdev_is_zoned(bdev);
 }
 
 static inline bool btrfs_check_super_location(struct btrfs_device *device, u64 pos)
diff --git a/fs/btrfs/zstd.c b/fs/btrfs/zstd.c
index 346c46d88d07..0d66db8bc1d4 100644
--- a/fs/btrfs/zstd.c
+++ b/fs/btrfs/zstd.c
@@ -20,7 +20,6 @@
 #include "misc.h"
 #include "compression.h"
 #include "ctree.h"
-#include "super.h"
 
 #define ZSTD_BTRFS_MAX_WINDOWLOG 17
 #define ZSTD_BTRFS_MAX_INPUT (1 << ZSTD_BTRFS_MAX_WINDOWLOG)
@@ -619,48 +618,80 @@ done:
 }
 
 int zstd_decompress(struct list_head *ws, const u8 *data_in,
-		struct page *dest_page, unsigned long dest_pgoff, size_t srclen,
+		struct page *dest_page, unsigned long start_byte, size_t srclen,
 		size_t destlen)
 {
 	struct workspace *workspace = list_entry(ws, struct workspace, list);
-	struct btrfs_fs_info *fs_info = btrfs_sb(dest_page->mapping->host->i_sb);
-	const u32 sectorsize = fs_info->sectorsize;
 	zstd_dstream *stream;
 	int ret = 0;
-	unsigned long to_copy = 0;
+	size_t ret2;
+	unsigned long total_out = 0;
+	unsigned long pg_offset = 0;
 
 	stream = zstd_init_dstream(
 			ZSTD_BTRFS_MAX_INPUT, workspace->mem, workspace->size);
 	if (!stream) {
 		pr_warn("BTRFS: zstd_init_dstream failed\n");
+		ret = -EIO;
 		goto finish;
 	}
 
+	destlen = min_t(size_t, destlen, PAGE_SIZE);
+
 	workspace->in_buf.src = data_in;
 	workspace->in_buf.pos = 0;
 	workspace->in_buf.size = srclen;
 
 	workspace->out_buf.dst = workspace->buf;
 	workspace->out_buf.pos = 0;
-	workspace->out_buf.size = sectorsize;
-
-	/*
-	 * Since both input and output buffers should not exceed one sector,
-	 * one call should end the decompression.
-	 */
-	ret = zstd_decompress_stream(stream, &workspace->out_buf, &workspace->in_buf);
-	if (zstd_is_error(ret)) {
-		pr_warn_ratelimited("BTRFS: zstd_decompress_stream return %d\n",
-				    zstd_get_error_code(ret));
-		goto finish;
+	workspace->out_buf.size = PAGE_SIZE;
+
+	ret2 = 1;
+	while (pg_offset < destlen
+	       && workspace->in_buf.pos < workspace->in_buf.size) {
+		unsigned long buf_start;
+		unsigned long buf_offset;
+		unsigned long bytes;
+
+		/* Check if the frame is over and we still need more input */
+		if (ret2 == 0) {
+			pr_debug("BTRFS: zstd_decompress_stream ended early\n");
+			ret = -EIO;
+			goto finish;
+		}
+		ret2 = zstd_decompress_stream(stream, &workspace->out_buf,
+				&workspace->in_buf);
+		if (zstd_is_error(ret2)) {
+			pr_debug("BTRFS: zstd_decompress_stream returned %d\n",
+					zstd_get_error_code(ret2));
+			ret = -EIO;
+			goto finish;
+		}
+
+		buf_start = total_out;
+		total_out += workspace->out_buf.pos;
+		workspace->out_buf.pos = 0;
+
+		if (total_out <= start_byte)
+			continue;
+
+		if (total_out > start_byte && buf_start < start_byte)
+			buf_offset = start_byte - buf_start;
+		else
+			buf_offset = 0;
+
+		bytes = min_t(unsigned long, destlen - pg_offset,
+				workspace->out_buf.size - buf_offset);
+
+		memcpy_to_page(dest_page, pg_offset,
+			       workspace->out_buf.dst + buf_offset, bytes);
+
+		pg_offset += bytes;
 	}
-	to_copy = workspace->out_buf.pos;
-	memcpy_to_page(dest_page, dest_pgoff + to_copy, workspace->out_buf.dst, to_copy);
+	ret = 0;
 finish:
-	/* Error or early end. */
-	if (unlikely(to_copy < destlen)) {
-		ret = -EIO;
-		memzero_page(dest_page, dest_pgoff + to_copy, destlen - to_copy);
+	if (pg_offset < destlen) {
+		memzero_page(dest_page, pg_offset, destlen - pg_offset);
 	}
 	return ret;
 }