summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-12-13 19:29:45 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2022-12-13 19:29:45 -0800
commite2ca6ba6ba0152361aa4fcbf6067db71b2c7a770 (patch)
treef7ed7753a2e66486a4ffe0fbbf98404ec4ba2212 /fs
parent7e68dd7d07a28faa2e6574dd6b9dbd90cdeaae91 (diff)
parentc45bc55a99957b20e4e0333bcd42e12d1833a7f5 (diff)
Merge tag 'mm-stable-2022-12-13' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull MM updates from Andrew Morton: - More userfaultfs work from Peter Xu - Several convert-to-folios series from Sidhartha Kumar and Huang Ying - Some filemap cleanups from Vishal Moola - David Hildenbrand added the ability to selftest anon memory COW handling - Some cpuset simplifications from Liu Shixin - Addition of vmalloc tracing support by Uladzislau Rezki - Some pagecache folioifications and simplifications from Matthew Wilcox - A pagemap cleanup from Kefeng Wang: we have VM_ACCESS_FLAGS, so use it - Miguel Ojeda contributed some cleanups for our use of the __no_sanitize_thread__ gcc keyword. This series should have been in the non-MM tree, my bad - Naoya Horiguchi improved the interaction between memory poisoning and memory section removal for huge pages - DAMON cleanups and tuneups from SeongJae Park - Tony Luck fixed the handling of COW faults against poisoned pages - Peter Xu utilized the PTE marker code for handling swapin errors - Hugh Dickins reworked compound page mapcount handling, simplifying it and making it more efficient - Removal of the autonuma savedwrite infrastructure from Nadav Amit and David Hildenbrand - zram support for multiple compression streams from Sergey Senozhatsky - David Hildenbrand reworked the GUP code's R/O long-term pinning so that drivers no longer need to use the FOLL_FORCE workaround which didn't work very well anyway - Mel Gorman altered the page allocator so that local IRQs can remnain enabled during per-cpu page allocations - Vishal Moola removed the try_to_release_page() wrapper - Stefan Roesch added some per-BDI sysfs tunables which are used to prevent network block devices from dirtying excessive amounts of pagecache - David Hildenbrand did some cleanup and repair work on KSM COW breaking - Nhat Pham and Johannes Weiner have implemented writeback in zswap's zsmalloc backend - Brian Foster has fixed a longstanding corner-case oddity in file[map]_write_and_wait_range() - sparse-vmemmap changes for MIPS, LoongArch and NIOS2 from Feiyang Chen - Shiyang Ruan has done some work on fsdax, to make its reflink mode work better under xfstests. Better, but still not perfect - Christoph Hellwig has removed the .writepage() method from several filesystems. They only need .writepages() - Yosry Ahmed wrote a series which fixes the memcg reclaim target beancounting - David Hildenbrand has fixed some of our MM selftests for 32-bit machines - Many singleton patches, as usual * tag 'mm-stable-2022-12-13' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (313 commits) mm/hugetlb: set head flag before setting compound_order in __prep_compound_gigantic_folio mm: mmu_gather: allow more than one batch of delayed rmaps mm: fix typo in struct pglist_data code comment kmsan: fix memcpy tests mm: add cond_resched() in swapin_walk_pmd_entry() mm: do not show fs mm pc for VM_LOCKONFAULT pages selftests/vm: ksm_functional_tests: fixes for 32bit selftests/vm: cow: fix compile warning on 32bit selftests/vm: madv_populate: fix missing MADV_POPULATE_(READ|WRITE) definitions mm/gup_test: fix PIN_LONGTERM_TEST_READ with highmem mm,thp,rmap: fix races between updates of subpages_mapcount mm: memcg: fix swapcached stat accounting mm: add nodes= arg to memory.reclaim mm: disable top-tier fallback to reclaim on proactive reclaim selftests: cgroup: make sure reclaim target memcg is unprotected selftests: cgroup: refactor proactive reclaim code to reclaim_until() mm: memcg: fix stale protection of reclaim target memcg mm/mmap: properly unaccount memory on mas_preallocate() failure omfs: remove ->writepage jfs: remove ->writepage ...
Diffstat (limited to 'fs')
-rw-r--r--fs/dax.c221
-rw-r--r--fs/exfat/inode.c9
-rw-r--r--fs/ext4/move_extent.c52
-rw-r--r--fs/fat/inode.c9
-rw-r--r--fs/fuse/dev.c55
-rw-r--r--fs/hfs/inode.c2
-rw-r--r--fs/hfsplus/inode.c2
-rw-r--r--fs/hpfs/file.c9
-rw-r--r--fs/hugetlbfs/inode.c28
-rw-r--r--fs/jfs/inode.c7
-rw-r--r--fs/omfs/file.c7
-rw-r--r--fs/proc/kcore.c33
-rw-r--r--fs/proc/task_mmu.c16
-rw-r--r--fs/xfs/xfs_ioctl.c4
-rw-r--r--fs/xfs/xfs_iomap.c6
-rw-r--r--fs/xfs/xfs_iops.c4
-rw-r--r--fs/xfs/xfs_reflink.c8
17 files changed, 265 insertions, 207 deletions
diff --git a/fs/dax.c b/fs/dax.c
index 1c6867810cbd..c48a3a93ab29 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -334,35 +334,41 @@ static unsigned long dax_end_pfn(void *entry)
for (pfn = dax_to_pfn(entry); \
pfn < dax_end_pfn(entry); pfn++)
-static inline bool dax_mapping_is_cow(struct address_space *mapping)
+static inline bool dax_page_is_shared(struct page *page)
{
- return (unsigned long)mapping == PAGE_MAPPING_DAX_COW;
+ return page->mapping == PAGE_MAPPING_DAX_SHARED;
}
/*
- * Set the page->mapping with FS_DAX_MAPPING_COW flag, increase the refcount.
+ * Set the page->mapping with PAGE_MAPPING_DAX_SHARED flag, increase the
+ * refcount.
*/
-static inline void dax_mapping_set_cow(struct page *page)
+static inline void dax_page_share_get(struct page *page)
{
- if ((uintptr_t)page->mapping != PAGE_MAPPING_DAX_COW) {
+ if (page->mapping != PAGE_MAPPING_DAX_SHARED) {
/*
* Reset the index if the page was already mapped
* regularly before.
*/
if (page->mapping)
- page->index = 1;
- page->mapping = (void *)PAGE_MAPPING_DAX_COW;
+ page->share = 1;
+ page->mapping = PAGE_MAPPING_DAX_SHARED;
}
- page->index++;
+ page->share++;
+}
+
+static inline unsigned long dax_page_share_put(struct page *page)
+{
+ return --page->share;
}
/*
- * When it is called in dax_insert_entry(), the cow flag will indicate that
+ * When it is called in dax_insert_entry(), the shared flag will indicate that
* whether this entry is shared by multiple files. If so, set the page->mapping
- * FS_DAX_MAPPING_COW, and use page->index as refcount.
+ * PAGE_MAPPING_DAX_SHARED, and use page->share as refcount.
*/
static void dax_associate_entry(void *entry, struct address_space *mapping,
- struct vm_area_struct *vma, unsigned long address, bool cow)
+ struct vm_area_struct *vma, unsigned long address, bool shared)
{
unsigned long size = dax_entry_size(entry), pfn, index;
int i = 0;
@@ -374,8 +380,8 @@ static void dax_associate_entry(void *entry, struct address_space *mapping,
for_each_mapped_pfn(entry, pfn) {
struct page *page = pfn_to_page(pfn);
- if (cow) {
- dax_mapping_set_cow(page);
+ if (shared) {
+ dax_page_share_get(page);
} else {
WARN_ON_ONCE(page->mapping);
page->mapping = mapping;
@@ -396,9 +402,9 @@ static void dax_disassociate_entry(void *entry, struct address_space *mapping,
struct page *page = pfn_to_page(pfn);
WARN_ON_ONCE(trunc && page_ref_count(page) > 1);
- if (dax_mapping_is_cow(page->mapping)) {
- /* keep the CoW flag if this page is still shared */
- if (page->index-- > 0)
+ if (dax_page_is_shared(page)) {
+ /* keep the shared flag if this page is still shared */
+ if (dax_page_share_put(page) > 0)
continue;
} else
WARN_ON_ONCE(page->mapping && page->mapping != mapping);
@@ -840,12 +846,6 @@ static bool dax_fault_is_synchronous(const struct iomap_iter *iter,
(iter->iomap.flags & IOMAP_F_DIRTY);
}
-static bool dax_fault_is_cow(const struct iomap_iter *iter)
-{
- return (iter->flags & IOMAP_WRITE) &&
- (iter->iomap.flags & IOMAP_F_SHARED);
-}
-
/*
* By this point grab_mapping_entry() has ensured that we have a locked entry
* of the appropriate size so we don't have to worry about downgrading PMDs to
@@ -859,13 +859,14 @@ static void *dax_insert_entry(struct xa_state *xas, struct vm_fault *vmf,
{
struct address_space *mapping = vmf->vma->vm_file->f_mapping;
void *new_entry = dax_make_entry(pfn, flags);
- bool dirty = !dax_fault_is_synchronous(iter, vmf->vma);
- bool cow = dax_fault_is_cow(iter);
+ bool write = iter->flags & IOMAP_WRITE;
+ bool dirty = write && !dax_fault_is_synchronous(iter, vmf->vma);
+ bool shared = iter->iomap.flags & IOMAP_F_SHARED;
if (dirty)
__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
- if (cow || (dax_is_zero_entry(entry) && !(flags & DAX_ZERO_PAGE))) {
+ if (shared || (dax_is_zero_entry(entry) && !(flags & DAX_ZERO_PAGE))) {
unsigned long index = xas->xa_index;
/* we are replacing a zero page with block mapping */
if (dax_is_pmd_entry(entry))
@@ -877,12 +878,12 @@ static void *dax_insert_entry(struct xa_state *xas, struct vm_fault *vmf,
xas_reset(xas);
xas_lock_irq(xas);
- if (cow || dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) {
+ if (shared || dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) {
void *old;
dax_disassociate_entry(entry, mapping, false);
dax_associate_entry(new_entry, mapping, vmf->vma, vmf->address,
- cow);
+ shared);
/*
* Only swap our new entry into the page cache if the current
* entry is a zero page or an empty entry. If a normal PTE or
@@ -902,7 +903,7 @@ static void *dax_insert_entry(struct xa_state *xas, struct vm_fault *vmf,
if (dirty)
xas_set_mark(xas, PAGECACHE_TAG_DIRTY);
- if (cow)
+ if (write && shared)
xas_set_mark(xas, PAGECACHE_TAG_TOWRITE);
xas_unlock_irq(xas);
@@ -1086,7 +1087,8 @@ out:
}
/**
- * dax_iomap_cow_copy - Copy the data from source to destination before write
+ * dax_iomap_copy_around - Prepare for an unaligned write to a shared/cow page
+ * by copying the data before and after the range to be written.
* @pos: address to do copy from.
* @length: size of copy operation.
* @align_size: aligned w.r.t align_size (either PMD_SIZE or PAGE_SIZE)
@@ -1095,35 +1097,50 @@ out:
*
* This can be called from two places. Either during DAX write fault (page
* aligned), to copy the length size data to daddr. Or, while doing normal DAX
- * write operation, dax_iomap_actor() might call this to do the copy of either
+ * write operation, dax_iomap_iter() might call this to do the copy of either
* start or end unaligned address. In the latter case the rest of the copy of
- * aligned ranges is taken care by dax_iomap_actor() itself.
+ * aligned ranges is taken care by dax_iomap_iter() itself.
+ * If the srcmap contains invalid data, such as HOLE and UNWRITTEN, zero the
+ * area to make sure no old data remains.
*/
-static int dax_iomap_cow_copy(loff_t pos, uint64_t length, size_t align_size,
+static int dax_iomap_copy_around(loff_t pos, uint64_t length, size_t align_size,
const struct iomap *srcmap, void *daddr)
{
loff_t head_off = pos & (align_size - 1);
size_t size = ALIGN(head_off + length, align_size);
loff_t end = pos + length;
loff_t pg_end = round_up(end, align_size);
+ /* copy_all is usually in page fault case */
bool copy_all = head_off == 0 && end == pg_end;
+ /* zero the edges if srcmap is a HOLE or IOMAP_UNWRITTEN */
+ bool zero_edge = srcmap->flags & IOMAP_F_SHARED ||
+ srcmap->type == IOMAP_UNWRITTEN;
void *saddr = 0;
int ret = 0;
- ret = dax_iomap_direct_access(srcmap, pos, size, &saddr, NULL);
- if (ret)
- return ret;
+ if (!zero_edge) {
+ ret = dax_iomap_direct_access(srcmap, pos, size, &saddr, NULL);
+ if (ret)
+ return ret;
+ }
if (copy_all) {
- ret = copy_mc_to_kernel(daddr, saddr, length);
- return ret ? -EIO : 0;
+ if (zero_edge)
+ memset(daddr, 0, size);
+ else
+ ret = copy_mc_to_kernel(daddr, saddr, length);
+ goto out;
}
/* Copy the head part of the range */
if (head_off) {
- ret = copy_mc_to_kernel(daddr, saddr, head_off);
- if (ret)
- return -EIO;
+ if (zero_edge)
+ memset(daddr, 0, head_off);
+ else {
+ ret = copy_mc_to_kernel(daddr, saddr, head_off);
+ if (ret)
+ return -EIO;
+ }
}
/* Copy the tail part of the range */
@@ -1131,12 +1148,19 @@ static int dax_iomap_cow_copy(loff_t pos, uint64_t length, size_t align_size,
loff_t tail_off = head_off + length;
loff_t tail_len = pg_end - end;
- ret = copy_mc_to_kernel(daddr + tail_off, saddr + tail_off,
- tail_len);
- if (ret)
- return -EIO;
+ if (zero_edge)
+ memset(daddr + tail_off, 0, tail_len);
+ else {
+ ret = copy_mc_to_kernel(daddr + tail_off,
+ saddr + tail_off, tail_len);
+ if (ret)
+ return -EIO;
+ }
}
- return 0;
+out:
+ if (zero_edge)
+ dax_flush(srcmap->dax_dev, daddr, size);
+ return ret ? -EIO : 0;
}
/*
@@ -1221,6 +1245,58 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
}
#endif /* CONFIG_FS_DAX_PMD */
+static s64 dax_unshare_iter(struct iomap_iter *iter)
+{
+ struct iomap *iomap = &iter->iomap;
+ const struct iomap *srcmap = iomap_iter_srcmap(iter);
+ loff_t pos = iter->pos;
+ loff_t length = iomap_length(iter);
+ int id = 0;
+ s64 ret = 0;
+ void *daddr = NULL, *saddr = NULL;
+
+ /* don't bother with blocks that are not shared to start with */
+ if (!(iomap->flags & IOMAP_F_SHARED))
+ return length;
+ /* don't bother with holes or unwritten extents */
+ if (srcmap->type == IOMAP_HOLE || srcmap->type == IOMAP_UNWRITTEN)
+ return length;
+
+ id = dax_read_lock();
+ ret = dax_iomap_direct_access(iomap, pos, length, &daddr, NULL);
+ if (ret < 0)
+ goto out_unlock;
+
+ ret = dax_iomap_direct_access(srcmap, pos, length, &saddr, NULL);
+ if (ret < 0)
+ goto out_unlock;
+
+ ret = copy_mc_to_kernel(daddr, saddr, length);
+ if (ret)
+ ret = -EIO;
+
+out_unlock:
+ dax_read_unlock(id);
+ return ret;
+}
+
+int dax_file_unshare(struct inode *inode, loff_t pos, loff_t len,
+ const struct iomap_ops *ops)
+{
+ struct iomap_iter iter = {
+ .inode = inode,
+ .pos = pos,
+ .len = len,
+ .flags = IOMAP_WRITE | IOMAP_UNSHARE | IOMAP_DAX,
+ };
+ int ret;
+
+ while ((ret = iomap_iter(&iter, ops)) > 0)
+ iter.processed = dax_unshare_iter(&iter);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(dax_file_unshare);
+
static int dax_memzero(struct iomap_iter *iter, loff_t pos, size_t size)
{
const struct iomap *iomap = &iter->iomap;
@@ -1235,13 +1311,10 @@ static int dax_memzero(struct iomap_iter *iter, loff_t pos, size_t size)
if (ret < 0)
return ret;
memset(kaddr + offset, 0, size);
- if (srcmap->addr != iomap->addr) {
- ret = dax_iomap_cow_copy(pos, size, PAGE_SIZE, srcmap,
- kaddr);
- if (ret < 0)
- return ret;
- dax_flush(iomap->dax_dev, kaddr, PAGE_SIZE);
- } else
+ if (iomap->flags & IOMAP_F_SHARED)
+ ret = dax_iomap_copy_around(pos, size, PAGE_SIZE, srcmap,
+ kaddr);
+ else
dax_flush(iomap->dax_dev, kaddr + offset, size);
return ret;
}
@@ -1258,6 +1331,15 @@ static s64 dax_zero_iter(struct iomap_iter *iter, bool *did_zero)
if (srcmap->type == IOMAP_HOLE || srcmap->type == IOMAP_UNWRITTEN)
return length;
+ /*
+ * invalidate the pages whose sharing state is to be changed
+ * because of CoW.
+ */
+ if (iomap->flags & IOMAP_F_SHARED)
+ invalidate_inode_pages2_range(iter->inode->i_mapping,
+ pos >> PAGE_SHIFT,
+ (pos + length - 1) >> PAGE_SHIFT);
+
do {
unsigned offset = offset_in_page(pos);
unsigned size = min_t(u64, PAGE_SIZE - offset, length);
@@ -1318,12 +1400,13 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
struct iov_iter *iter)
{
const struct iomap *iomap = &iomi->iomap;
- const struct iomap *srcmap = &iomi->srcmap;
+ const struct iomap *srcmap = iomap_iter_srcmap(iomi);
loff_t length = iomap_length(iomi);
loff_t pos = iomi->pos;
struct dax_device *dax_dev = iomap->dax_dev;
loff_t end = pos + length, done = 0;
bool write = iov_iter_rw(iter) == WRITE;
+ bool cow = write && iomap->flags & IOMAP_F_SHARED;
ssize_t ret = 0;
size_t xfer;
int id;
@@ -1350,7 +1433,7 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
* into page tables. We have to tear down these mappings so that data
* written by write(2) is visible in mmap.
*/
- if (iomap->flags & IOMAP_F_NEW) {
+ if (iomap->flags & IOMAP_F_NEW || cow) {
invalidate_inode_pages2_range(iomi->inode->i_mapping,
pos >> PAGE_SHIFT,
(end - 1) >> PAGE_SHIFT);
@@ -1384,10 +1467,9 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
break;
}
- if (write &&
- srcmap->type != IOMAP_HOLE && srcmap->addr != iomap->addr) {
- ret = dax_iomap_cow_copy(pos, length, PAGE_SIZE, srcmap,
- kaddr);
+ if (cow) {
+ ret = dax_iomap_copy_around(pos, length, PAGE_SIZE,
+ srcmap, kaddr);
if (ret)
break;
}
@@ -1532,7 +1614,7 @@ static vm_fault_t dax_fault_iter(struct vm_fault *vmf,
struct xa_state *xas, void **entry, bool pmd)
{
const struct iomap *iomap = &iter->iomap;
- const struct iomap *srcmap = &iter->srcmap;
+ const struct iomap *srcmap = iomap_iter_srcmap(iter);
size_t size = pmd ? PMD_SIZE : PAGE_SIZE;
loff_t pos = (loff_t)xas->xa_index << PAGE_SHIFT;
bool write = iter->flags & IOMAP_WRITE;
@@ -1563,9 +1645,8 @@ static vm_fault_t dax_fault_iter(struct vm_fault *vmf,
*entry = dax_insert_entry(xas, vmf, iter, *entry, pfn, entry_flags);
- if (write &&
- srcmap->type != IOMAP_HOLE && srcmap->addr != iomap->addr) {
- err = dax_iomap_cow_copy(pos, size, size, srcmap, kaddr);
+ if (write && iomap->flags & IOMAP_F_SHARED) {
+ err = dax_iomap_copy_around(pos, size, size, srcmap, kaddr);
if (err)
return dax_fault_return(err);
}
@@ -1936,15 +2017,15 @@ int dax_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
.len = len,
.flags = IOMAP_DAX,
};
- int ret;
+ int ret, compared = 0;
- while ((ret = iomap_iter(&src_iter, ops)) > 0) {
- while ((ret = iomap_iter(&dst_iter, ops)) > 0) {
- dst_iter.processed = dax_range_compare_iter(&src_iter,
- &dst_iter, len, same);
- }
- if (ret <= 0)
- src_iter.processed = ret;
+ while ((ret = iomap_iter(&src_iter, ops)) > 0 &&
+ (ret = iomap_iter(&dst_iter, ops)) > 0) {
+ compared = dax_range_compare_iter(&src_iter, &dst_iter, len,
+ same);
+ if (compared < 0)
+ return ret;
+ src_iter.processed = dst_iter.processed = compared;
}
return ret;
}
diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c
index 5590a1e83126..eac95bcd9a8a 100644
--- a/fs/exfat/inode.c
+++ b/fs/exfat/inode.c
@@ -345,11 +345,6 @@ static void exfat_readahead(struct readahead_control *rac)
mpage_readahead(rac, exfat_get_block);
}
-static int exfat_writepage(struct page *page, struct writeback_control *wbc)
-{
- return block_write_full_page(page, exfat_get_block, wbc);
-}
-
static int exfat_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
@@ -473,12 +468,12 @@ static const struct address_space_operations exfat_aops = {
.invalidate_folio = block_invalidate_folio,
.read_folio = exfat_read_folio,
.readahead = exfat_readahead,
- .writepage = exfat_writepage,
.writepages = exfat_writepages,
.write_begin = exfat_write_begin,
.write_end = exfat_write_end,
.direct_IO = exfat_direct_IO,
- .bmap = exfat_aop_bmap
+ .bmap = exfat_aop_bmap,
+ .migrate_folio = buffer_migrate_folio,
};
static inline unsigned long exfat_hash(loff_t i_pos)
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 044e34cd835c..8dbb87edf24c 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -253,6 +253,7 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
{
struct inode *orig_inode = file_inode(o_filp);
struct page *pagep[2] = {NULL, NULL};
+ struct folio *folio[2] = {NULL, NULL};
handle_t *handle;
ext4_lblk_t orig_blk_offset, donor_blk_offset;
unsigned long blocksize = orig_inode->i_sb->s_blocksize;
@@ -313,6 +314,13 @@ again:
* hold page's lock, if it is still the case data copy is not
* necessary, just swap data blocks between orig and donor.
*/
+ folio[0] = page_folio(pagep[0]);
+ folio[1] = page_folio(pagep[1]);
+
+ VM_BUG_ON_FOLIO(folio_test_large(folio[0]), folio[0]);
+ VM_BUG_ON_FOLIO(folio_test_large(folio[1]), folio[1]);
+ VM_BUG_ON_FOLIO(folio_nr_pages(folio[0]) != folio_nr_pages(folio[1]), folio[1]);
+
if (unwritten) {
ext4_double_down_write_data_sem(orig_inode, donor_inode);
/* If any of extents in range became initialized we have to
@@ -331,10 +339,10 @@ again:
ext4_double_up_write_data_sem(orig_inode, donor_inode);
goto data_copy;
}
- if ((page_has_private(pagep[0]) &&
- !try_to_release_page(pagep[0], 0)) ||
- (page_has_private(pagep[1]) &&
- !try_to_release_page(pagep[1], 0))) {
+ if ((folio_has_private(folio[0]) &&
+ !filemap_release_folio(folio[0], 0)) ||
+ (folio_has_private(folio[1]) &&
+ !filemap_release_folio(folio[1], 0))) {
*err = -EBUSY;
goto drop_data_sem;
}
@@ -344,19 +352,21 @@ again:
block_len_in_page, 1, err);
drop_data_sem:
ext4_double_up_write_data_sem(orig_inode, donor_inode);
- goto unlock_pages;
+ goto unlock_folios;
}
data_copy:
- *err = mext_page_mkuptodate(pagep[0], from, from + replaced_size);
+ *err = mext_page_mkuptodate(&folio[0]->page, from, from + replaced_size);
if (*err)
- goto unlock_pages;
+ goto unlock_folios;
/* At this point all buffers in range are uptodate, old mapping layout
* is no longer required, try to drop it now. */
- if ((page_has_private(pagep[0]) && !try_to_release_page(pagep[0], 0)) ||
- (page_has_private(pagep[1]) && !try_to_release_page(pagep[1], 0))) {
+ if ((folio_has_private(folio[0]) &&
+ !filemap_release_folio(folio[0], 0)) ||
+ (folio_has_private(folio[1]) &&
+ !filemap_release_folio(folio[1], 0))) {
*err = -EBUSY;
- goto unlock_pages;
+ goto unlock_folios;
}
ext4_double_down_write_data_sem(orig_inode, donor_inode);
replaced_count = ext4_swap_extents(handle, orig_inode, donor_inode,
@@ -369,13 +379,13 @@ data_copy:
replaced_size =
block_len_in_page << orig_inode->i_blkbits;
} else
- goto unlock_pages;
+ goto unlock_folios;
}
/* Perform all necessary steps similar write_begin()/write_end()
* but keeping in mind that i_size will not change */
- if (!page_has_buffers(pagep[0]))
- create_empty_buffers(pagep[0], 1 << orig_inode->i_blkbits, 0);
- bh = page_buffers(pagep[0]);
+ if (!folio_buffers(folio[0]))
+ create_empty_buffers(&folio[0]->page, 1 << orig_inode->i_blkbits, 0);
+ bh = folio_buffers(folio[0]);
for (i = 0; i < data_offset_in_page; i++)
bh = bh->b_this_page;
for (i = 0; i < block_len_in_page; i++) {
@@ -385,7 +395,7 @@ data_copy:
bh = bh->b_this_page;
}
if (!*err)
- *err = block_commit_write(pagep[0], from, from + replaced_size);
+ *err = block_commit_write(&folio[0]->page, from, from + replaced_size);
if (unlikely(*err < 0))
goto repair_branches;
@@ -395,11 +405,11 @@ data_copy:
*err = ext4_jbd2_inode_add_write(handle, orig_inode,
(loff_t)orig_page_offset << PAGE_SHIFT, replaced_size);
-unlock_pages:
- unlock_page(pagep[0]);
- put_page(pagep[0]);
- unlock_page(pagep[1]);
- put_page(pagep[1]);
+unlock_folios:
+ folio_unlock(folio[0]);
+ folio_put(folio[0]);
+ folio_unlock(folio[1]);
+ folio_put(folio[1]);
stop_journal:
ext4_journal_stop(handle);
if (*err == -ENOSPC &&
@@ -430,7 +440,7 @@ repair_branches:
*err = -EIO;
}
replaced_count = 0;
- goto unlock_pages;
+ goto unlock_folios;
}
/**
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 1cbcc4608dc7..d99b8549ec8f 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -194,11 +194,6 @@ static int fat_get_block(struct inode *inode, sector_t iblock,
return 0;
}
-static int fat_writepage(struct page *page, struct writeback_control *wbc)
-{
- return block_write_full_page(page, fat_get_block, wbc);
-}
-
static int fat_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
@@ -346,12 +341,12 @@ static const struct address_space_operations fat_aops = {
.invalidate_folio = block_invalidate_folio,
.read_folio = fat_read_folio,
.readahead = fat_readahead,
- .writepage = fat_writepage,
.writepages = fat_writepages,
.write_begin = fat_write_begin,
.write_end = fat_write_end,
.direct_IO = fat_direct_IO,
- .bmap = _fat_bmap
+ .bmap = _fat_bmap,
+ .migrate_folio = buffer_migrate_folio,
};
/*
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index c73d9c4132f6..e8b60ce72c9a 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -764,11 +764,11 @@ static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size)
return ncpy;
}
-static int fuse_check_page(struct page *page)
+static int fuse_check_folio(struct folio *folio)
{
- if (page_mapcount(page) ||
- page->mapping != NULL ||
- (page->flags & PAGE_FLAGS_CHECK_AT_PREP &
+ if (folio_mapped(folio) ||
+ folio->mapping != NULL ||
+ (folio->flags & PAGE_FLAGS_CHECK_AT_PREP &
~(1 << PG_locked |
1 << PG_referenced |
1 << PG_uptodate |
@@ -778,7 +778,7 @@ static int fuse_check_page(struct page *page)
1 << PG_reclaim |
1 << PG_waiters |
LRU_GEN_MASK | LRU_REFS_MASK))) {
- dump_page(page, "fuse: trying to steal weird page");
+ dump_page(&folio->page, "fuse: trying to steal weird page");
return 1;
}
return 0;
@@ -787,11 +787,11 @@ static int fuse_check_page(struct page *page)
static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
{
int err;
- struct page *oldpage = *pagep;
- struct page *newpage;
+ struct folio *oldfolio = page_folio(*pagep);
+ struct folio *newfolio;
struct pipe_buffer *buf = cs->pipebufs;
- get_page(oldpage);
+ folio_get(oldfolio);
err = unlock_request(cs->req);
if (err)
goto out_put_old;
@@ -814,35 +814,36 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
if (!pipe_buf_try_steal(cs->pipe, buf))
goto out_fallback;
- newpage = buf->page;
+ newfolio = page_folio(buf->page);
- if (!PageUptodate(newpage))
- SetPageUptodate(newpage);
+ if (!folio_test_uptodate(newfolio))
+ folio_mark_uptodate(newfolio);
- ClearPageMappedToDisk(newpage);
+ folio_clear_mappedtodisk(newfolio);
- if (fuse_check_page(newpage) != 0)
+ if (fuse_check_folio(newfolio) != 0)
goto out_fallback_unlock;
/*
* This is a new and locked page, it shouldn't be mapped or
* have any special flags on it
*/
- if (WARN_ON(page_mapped(oldpage)))
+ if (WARN_ON(folio_mapped(oldfolio)))
goto out_fallback_unlock;
- if (WARN_ON(page_has_private(oldpage)))
+ if (WARN_ON(folio_has_private(oldfolio)))
goto out_fallback_unlock;
- if (WARN_ON(PageDirty(oldpage) || PageWriteback(oldpage)))
+ if (WARN_ON(folio_test_dirty(oldfolio) ||
+ folio_test_writeback(oldfolio)))
goto out_fallback_unlock;
- if (WARN_ON(PageMlocked(oldpage)))
+ if (WARN_ON(folio_test_mlocked(oldfolio)))
goto out_fallback_unlock;
- replace_page_cache_page(oldpage, newpage);
+ replace_page_cache_folio(oldfolio, newfolio);
- get_page(newpage);
+ folio_get(newfolio);
if (!(buf->flags & PIPE_BUF_FLAG_LRU))
- lru_cache_add(newpage);
+ folio_add_lru(newfolio);
/*
* Release while we have extra ref on stolen page. Otherwise
@@ -855,28 +856,28 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
if (test_bit(FR_ABORTED, &cs->req->flags))
err = -ENOENT;
else
- *pagep = newpage;
+ *pagep = &newfolio->page;
spin_unlock(&cs->req->waitq.lock);
if (err) {
- unlock_page(newpage);
- put_page(newpage);
+ folio_unlock(newfolio);
+ folio_put(newfolio);
goto out_put_old;
}
- unlock_page(oldpage);
+ folio_unlock(oldfolio);
/* Drop ref for ap->pages[] array */
- put_page(oldpage);
+ folio_put(oldfolio);
cs->len = 0;
err = 0;
out_put_old:
/* Drop ref obtained in this function */
- put_page(oldpage);
+ folio_put(oldfolio);
return err;
out_fallback_unlock:
- unlock_page(newpage);
+ folio_unlock(newfolio);
out_fallback:
cs->pg = buf->page;
cs->offset = buf->offset;
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index a0746be3c1de..9c329a365e75 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -173,12 +173,12 @@ const struct address_space_operations hfs_aops = {
.dirty_folio = block_dirty_folio,
.invalidate_folio = block_invalidate_folio,
.read_folio = hfs_read_folio,
- .writepage = hfs_writepage,
.write_begin = hfs_write_begin,
.write_end = generic_write_end,
.bmap = hfs_bmap,
.direct_IO = hfs_direct_IO,
.writepages = hfs_writepages,
+ .migrate_folio = buffer_migrate_folio,
};
/*
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index b675581aa9d0..840577a0c1e7 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -170,12 +170,12 @@ const struct address_space_operations hfsplus_aops = {
.dirty_folio = block_dirty_folio,
.invalidate_folio = block_invalidate_folio,
.read_folio = hfsplus_read_folio,
- .writepage = hfsplus_writepage,
.write_begin = hfsplus_write_begin,
.write_end = generic_write_end,
.bmap = hfsplus_bmap,
.direct_IO = hfsplus_direct_IO,
.writepages = hfsplus_writepages,
+ .migrate_folio = buffer_migrate_folio,
};
const struct dentry_operations hfsplus_dentry_operations = {
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index f7547a62c81f..88952d4a631e 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -163,11 +163,6 @@ static int hpfs_read_folio(struct file *file, struct folio *folio)
return mpage_read_folio(folio, hpfs_get_block);
}
-static int hpfs_writepage(struct page *page, struct writeback_control *wbc)
-{
- return block_write_full_page(page, hpfs_get_block, wbc);
-}
-
static void hpfs_readahead(struct readahead_control *rac)
{
mpage_readahead(rac, hpfs_get_block);
@@ -248,12 +243,12 @@ const struct address_space_operations hpfs_aops = {
.dirty_folio = block_dirty_folio,
.invalidate_folio = block_invalidate_folio,
.read_folio = hpfs_read_folio,
- .writepage = hpfs_writepage,
.readahead = hpfs_readahead,
.writepages = hpfs_writepages,
.write_begin = hpfs_write_begin,
.write_end = hpfs_write_end,
- .bmap = _hpfs_bmap
+ .bmap = _hpfs_bmap,
+ .migrate_folio = buffer_migrate_folio,
};
const struct file_operations hpfs_file_ops =
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index df7772335dc0..790d2727141a 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -370,11 +370,11 @@ static int hugetlbfs_write_end(struct file *file, struct address_space *mapping,
return -EINVAL;
}
-static void hugetlb_delete_from_page_cache(struct page *page)
+static void hugetlb_delete_from_page_cache(struct folio *folio)
{
- ClearPageDirty(page);
- ClearPageUptodate(page);
- delete_from_page_cache(page);
+ folio_clear_dirty(folio);
+ folio_clear_uptodate(folio);
+ filemap_remove_folio(folio);
}
/*
@@ -580,8 +580,8 @@ static bool remove_inode_single_folio(struct hstate *h, struct inode *inode,
* map could fail. Correspondingly, the subpool and global
* reserve usage count can need to be adjusted.
*/
- VM_BUG_ON(HPageRestoreReserve(&folio->page));
- hugetlb_delete_from_page_cache(&folio->page);
+ VM_BUG_ON_FOLIO(folio_test_hugetlb_restore_reserve(folio), folio);
+ hugetlb_delete_from_page_cache(folio);
ret = true;
if (!truncate_op) {
if (unlikely(hugetlb_unreserve_pages(inode, index,
@@ -1097,10 +1097,10 @@ static int hugetlbfs_migrate_folio(struct address_space *mapping,
if (rc != MIGRATEPAGE_SUCCESS)
return rc;
- if (hugetlb_page_subpool(&src->page)) {
- hugetlb_set_page_subpool(&dst->page,
- hugetlb_page_subpool(&src->page));
- hugetlb_set_page_subpool(&src->page, NULL);
+ if (hugetlb_folio_subpool(src)) {
+ hugetlb_set_folio_subpool(dst,
+ hugetlb_folio_subpool(src));
+ hugetlb_set_folio_subpool(src, NULL);
}
if (mode != MIGRATE_SYNC_NO_COPY)
@@ -1279,7 +1279,7 @@ static const struct address_space_operations hugetlbfs_aops = {
static void init_once(void *foo)
{
- struct hugetlbfs_inode_info *ei = (struct hugetlbfs_inode_info *)foo;
+ struct hugetlbfs_inode_info *ei = foo;
inode_init_once(&ei->vfs_inode);
}
@@ -1377,7 +1377,7 @@ static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *par
case Opt_size:
/* memparse() will accept a K/M/G without a digit */
- if (!isdigit(param->string[0]))
+ if (!param->string || !isdigit(param->string[0]))
goto bad_val;
ctx->max_size_opt = memparse(param->string, &rest);
ctx->max_val_type = SIZE_STD;
@@ -1387,7 +1387,7 @@ static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *par
case Opt_nr_inodes:
/* memparse() will accept a K/M/G without a digit */
- if (!isdigit(param->string[0]))
+ if (!param->string || !isdigit(param->string[0]))
goto bad_val;
ctx->nr_inodes = memparse(param->string, &rest);
return 0;
@@ -1403,7 +1403,7 @@ static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *par
case Opt_min_size:
/* memparse() will accept a K/M/G without a digit */
- if (!isdigit(param->string[0]))
+ if (!param->string || !isdigit(param->string[0]))
goto bad_val;
ctx->min_size_opt = memparse(param->string, &rest);
ctx->min_val_type = SIZE_STD;
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index d1ec920aa030..8ac10e396050 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -264,11 +264,6 @@ int jfs_get_block(struct inode *ip, sector_t lblock,
return rc;
}
-static int jfs_writepage(struct page *page, struct writeback_control *wbc)
-{
- return block_write_full_page(page, jfs_get_block, wbc);
-}
-
static int jfs_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
@@ -355,12 +350,12 @@ const struct address_space_operations jfs_aops = {
.invalidate_folio = block_invalidate_folio,
.read_folio = jfs_read_folio,
.readahead = jfs_readahead,
- .writepage = jfs_writepage,
.writepages = jfs_writepages,
.write_begin = jfs_write_begin,
.write_end = jfs_write_end,
.bmap = jfs_bmap,
.direct_IO = jfs_direct_IO,
+ .migrate_folio = buffer_migrate_folio,
};
/*
diff --git a/fs/omfs/file.c b/fs/omfs/file.c
index fa7fe2393ff6..3a5b4b88a583 100644
--- a/fs/omfs/file.c
+++ b/fs/omfs/file.c
@@ -294,11 +294,6 @@ static void omfs_readahead(struct readahead_control *rac)
mpage_readahead(rac, omfs_get_block);
}
-static int omfs_writepage(struct page *page, struct writeback_control *wbc)
-{
- return block_write_full_page(page, omfs_get_block, wbc);
-}
-
static int
omfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
{
@@ -375,10 +370,10 @@ const struct address_space_operations omfs_aops = {
.invalidate_folio = block_invalidate_folio,
.read_folio = omfs_read_folio,
.readahead = omfs_readahead,
- .writepage = omfs_writepage,
.writepages = omfs_writepages,
.write_begin = omfs_write_begin,
.write_end = generic_write_end,
.bmap = omfs_bmap,
+ .migrate_folio = buffer_migrate_folio,
};
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index dff921f7ca33..71157ee35c1a 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -18,7 +18,6 @@
#include <linux/capability.h>
#include <linux/elf.h>
#include <linux/elfcore.h>
-#include <linux/notifier.h>
#include <linux/vmalloc.h>
#include <linux/highmem.h>
#include <linux/printk.h>
@@ -541,25 +540,17 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
fallthrough;
case KCORE_VMEMMAP:
case KCORE_TEXT:
- if (kern_addr_valid(start)) {
- /*
- * Using bounce buffer to bypass the
- * hardened user copy kernel text checks.
- */
- if (copy_from_kernel_nofault(buf, (void *)start,
- tsz)) {
- if (clear_user(buffer, tsz)) {
- ret = -EFAULT;
- goto out;
- }
- } else {
- if (copy_to_user(buffer, buf, tsz)) {
- ret = -EFAULT;
- goto out;
- }
+ /*
+ * Using bounce buffer to bypass the
+ * hardened user copy kernel text checks.
+ */
+ if (copy_from_kernel_nofault(buf, (void *)start, tsz)) {
+ if (clear_user(buffer, tsz)) {
+ ret = -EFAULT;
+ goto out;
}
} else {
- if (clear_user(buffer, tsz)) {
+ if (copy_to_user(buffer, buf, tsz)) {
ret = -EFAULT;
goto out;
}
@@ -638,10 +629,6 @@ static int __meminit kcore_callback(struct notifier_block *self,
return NOTIFY_OK;
}
-static struct notifier_block kcore_callback_nb __meminitdata = {
- .notifier_call = kcore_callback,
- .priority = 0,
-};
static struct kcore_list kcore_vmalloc;
@@ -694,7 +681,7 @@ static int __init proc_kcore_init(void)
add_modules_range();
/* Store direct-map area from physical memory map */
kcore_update_ram();
- register_hotmemory_notifier(&kcore_callback_nb);
+ hotplug_memory_notifier(kcore_callback, DEFAULT_CALLBACK_PRI);
return 0;
}
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 8a74cdcc9af0..e35a0398db63 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -274,6 +274,7 @@ static void show_vma_header_prefix(struct seq_file *m,
static void
show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
{
+ struct anon_vma_name *anon_name = NULL;
struct mm_struct *mm = vma->vm_mm;
struct file *file = vma->vm_file;
vm_flags_t flags = vma->vm_flags;
@@ -293,6 +294,8 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
start = vma->vm_start;
end = vma->vm_end;
show_vma_header_prefix(m, start, end, flags, pgoff, dev, ino);
+ if (mm)
+ anon_name = anon_vma_name(vma);
/*
* Print the dentry name for named mappings, and a
@@ -300,7 +303,14 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
*/
if (file) {
seq_pad(m, ' ');
- seq_file_path(m, file, "\n");
+ /*
+ * If user named this anon shared memory via
+ * prctl(PR_SET_VMA ..., use the provided name.
+ */
+ if (anon_name)
+ seq_printf(m, "[anon_shmem:%s]", anon_name->name);
+ else
+ seq_file_path(m, file, "\n");
goto done;
}
@@ -312,8 +322,6 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
name = arch_vma_name(vma);
if (!name) {
- struct anon_vma_name *anon_name;
-
if (!mm) {
name = "[vdso]";
goto done;
@@ -330,7 +338,6 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
goto done;
}
- anon_name = anon_vma_name(vma);
if (anon_name) {
seq_pad(m, ' ');
seq_printf(m, "[anon:%s]", anon_name->name);
@@ -667,6 +674,7 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
[ilog2(VM_RAND_READ)] = "rr",
[ilog2(VM_DONTCOPY)] = "dc",
[ilog2(VM_DONTEXPAND)] = "de",
+ [ilog2(VM_LOCKONFAULT)] = "lf",
[ilog2(VM_ACCOUNT)] = "ac",
[ilog2(VM_NORESERVE)] = "nr",
[ilog2(VM_HUGETLB)] = "ht",
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 1f783e979629..13f1b2add390 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -1138,10 +1138,6 @@ xfs_ioctl_setattr_xflags(
if ((fa->fsx_xflags & FS_XFLAG_REALTIME) && xfs_is_reflink_inode(ip))
ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
- /* Don't allow us to set DAX mode for a reflinked file for now. */
- if ((fa->fsx_xflags & FS_XFLAG_DAX) && xfs_is_reflink_inode(ip))
- return -EINVAL;
-
/* diflags2 only valid for v3 inodes. */
i_flags2 = xfs_flags2diflags2(ip, fa->fsx_xflags);
if (i_flags2 && !xfs_has_v3inodes(mp))
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 07da03976ec1..d9401d0300ad 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -1215,7 +1215,7 @@ xfs_read_iomap_begin(
return error;
error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
&nimaps, 0);
- if (!error && (flags & IOMAP_REPORT))
+ if (!error && ((flags & IOMAP_REPORT) || IS_DAX(inode)))
error = xfs_reflink_trim_around_shared(ip, &imap, &shared);
xfs_iunlock(ip, lockmode);
@@ -1370,7 +1370,7 @@ xfs_zero_range(
if (IS_DAX(inode))
return dax_zero_range(inode, pos, len, did_zero,
- &xfs_direct_write_iomap_ops);
+ &xfs_dax_write_iomap_ops);
return iomap_zero_range(inode, pos, len, did_zero,
&xfs_buffered_write_iomap_ops);
}
@@ -1385,7 +1385,7 @@ xfs_truncate_page(
if (IS_DAX(inode))
return dax_truncate_page(inode, pos, did_zero,
- &xfs_direct_write_iomap_ops);
+ &xfs_dax_write_iomap_ops);
return iomap_truncate_page(inode, pos, did_zero,
&xfs_buffered_write_iomap_ops);
}
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 712238305bc3..515318dfbc38 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -1187,10 +1187,6 @@ xfs_inode_supports_dax(
if (!S_ISREG(VFS_I(ip)->i_mode))
return false;
- /* Only supported on non-reflinked files. */
- if (xfs_is_reflink_inode(ip))
- return false;
-
/* Block size must match page size */
if (mp->m_sb.sb_blocksize != PAGE_SIZE)
return false;
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 93bdd25680bc..fe46bce8cae6 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -1693,8 +1693,12 @@ xfs_reflink_unshare(
inode_dio_wait(inode);
- error = iomap_file_unshare(inode, offset, len,
- &xfs_buffered_write_iomap_ops);
+ if (IS_DAX(inode))
+ error = dax_file_unshare(inode, offset, len,
+ &xfs_dax_write_iomap_ops);
+ else
+ error = iomap_file_unshare(inode, offset, len,
+ &xfs_buffered_write_iomap_ops);
if (error)
goto out;