summaryrefslogtreecommitdiff
path: root/include/linux/dax.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/dax.h')
-rw-r--r--include/linux/dax.h330
1 files changed, 239 insertions, 91 deletions
diff --git a/include/linux/dax.h b/include/linux/dax.h
index df97b7af7e2c..9d624f4d9df6 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -1,13 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_DAX_H
#define _LINUX_DAX_H
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/radix-tree.h>
-#include <asm/pgtable.h>
-struct iomap_ops;
+typedef unsigned long dax_entry_t;
+
struct dax_device;
+struct gendisk;
+struct iomap_ops;
+struct iomap_iter;
+struct iomap;
+
+enum dax_access_mode {
+ DAX_ACCESS,
+ DAX_RECOVERY_WRITE,
+};
+
struct dax_operations {
/*
* direct_access: translate a device-relative
@@ -15,152 +26,289 @@ struct dax_operations {
* number of pages available for DAX at that pfn.
*/
long (*direct_access)(struct dax_device *, pgoff_t, long,
- void **, pfn_t *);
- /* copy_from_iter: required operation for fs-dax direct-i/o */
- size_t (*copy_from_iter)(struct dax_device *, pgoff_t, void *, size_t,
- struct iov_iter *);
- /* flush: optional driver-specific cache management after writes */
- void (*flush)(struct dax_device *, pgoff_t, void *, size_t);
+ enum dax_access_mode, void **, unsigned long *);
+ /* zero_page_range: required operation. Zero page range */
+ int (*zero_page_range)(struct dax_device *, pgoff_t, size_t);
+ /*
+ * recovery_write: recover a poisoned range by DAX device driver
+ * capable of clearing poison.
+ */
+ size_t (*recovery_write)(struct dax_device *dax_dev, pgoff_t pgoff,
+ void *addr, size_t bytes, struct iov_iter *iter);
};
-extern struct attribute_group dax_attribute_group;
+struct dax_holder_operations {
+ /*
+ * notify_failure - notify memory failure into inner holder device
+ * @dax_dev: the dax device which contains the holder
+ * @offset: offset on this dax device where memory failure occurs
+ * @len: length of this memory failure event
+ * @flags: action flags for memory failure handler
+ */
+ int (*notify_failure)(struct dax_device *dax_dev, u64 offset,
+ u64 len, int mf_flags);
+};
#if IS_ENABLED(CONFIG_DAX)
-struct dax_device *dax_get_by_host(const char *host);
+struct dax_device *alloc_dax(void *private, const struct dax_operations *ops);
+void *dax_holder(struct dax_device *dax_dev);
void put_dax(struct dax_device *dax_dev);
+void kill_dax(struct dax_device *dax_dev);
+void dax_write_cache(struct dax_device *dax_dev, bool wc);
+bool dax_write_cache_enabled(struct dax_device *dax_dev);
+bool dax_synchronous(struct dax_device *dax_dev);
+void set_dax_nocache(struct dax_device *dax_dev);
+void set_dax_nomc(struct dax_device *dax_dev);
+void set_dax_synchronous(struct dax_device *dax_dev);
+size_t dax_recovery_write(struct dax_device *dax_dev, pgoff_t pgoff,
+ void *addr, size_t bytes, struct iov_iter *i);
+/*
+ * Check if given mapping is supported by the file / underlying device.
+ */
+static inline bool daxdev_mapping_supported(vm_flags_t vm_flags,
+ const struct inode *inode,
+ struct dax_device *dax_dev)
+{
+ if (!(vm_flags & VM_SYNC))
+ return true;
+ if (!IS_DAX(inode))
+ return false;
+ return dax_synchronous(dax_dev);
+}
#else
-static inline struct dax_device *dax_get_by_host(const char *host)
+static inline void *dax_holder(struct dax_device *dax_dev)
{
return NULL;
}
-
+static inline struct dax_device *alloc_dax(void *private,
+ const struct dax_operations *ops)
+{
+ return ERR_PTR(-EOPNOTSUPP);
+}
static inline void put_dax(struct dax_device *dax_dev)
{
}
+static inline void kill_dax(struct dax_device *dax_dev)
+{
+}
+static inline void dax_write_cache(struct dax_device *dax_dev, bool wc)
+{
+}
+static inline bool dax_write_cache_enabled(struct dax_device *dax_dev)
+{
+ return false;
+}
+static inline bool dax_synchronous(struct dax_device *dax_dev)
+{
+ return true;
+}
+static inline void set_dax_nocache(struct dax_device *dax_dev)
+{
+}
+static inline void set_dax_nomc(struct dax_device *dax_dev)
+{
+}
+static inline void set_dax_synchronous(struct dax_device *dax_dev)
+{
+}
+static inline bool daxdev_mapping_supported(vm_flags_t vm_flags,
+ const struct inode *inode,
+ struct dax_device *dax_dev)
+{
+ return !(vm_flags & VM_SYNC);
+}
+static inline size_t dax_recovery_write(struct dax_device *dax_dev,
+ pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i)
+{
+ return 0;
+}
#endif
-int bdev_dax_pgoff(struct block_device *, sector_t, size_t, pgoff_t *pgoff);
-#if IS_ENABLED(CONFIG_FS_DAX)
-int __bdev_dax_supported(struct super_block *sb, int blocksize);
-static inline int bdev_dax_supported(struct super_block *sb, int blocksize)
+struct writeback_control;
+#if defined(CONFIG_BLOCK) && defined(CONFIG_FS_DAX)
+int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk);
+void dax_remove_host(struct gendisk *disk);
+struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev, u64 *start_off,
+ void *holder, const struct dax_holder_operations *ops);
+void fs_put_dax(struct dax_device *dax_dev, void *holder);
+#else
+static inline int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk)
+{
+ return 0;
+}
+static inline void dax_remove_host(struct gendisk *disk)
+{
+}
+static inline struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev,
+ u64 *start_off, void *holder,
+ const struct dax_holder_operations *ops)
+{
+ return NULL;
+}
+static inline void fs_put_dax(struct dax_device *dax_dev, void *holder)
{
- return __bdev_dax_supported(sb, blocksize);
}
+#endif /* CONFIG_BLOCK && CONFIG_FS_DAX */
-static inline struct dax_device *fs_dax_get_by_host(const char *host)
+#if IS_ENABLED(CONFIG_FS_DAX)
+int dax_writeback_mapping_range(struct address_space *mapping,
+ struct dax_device *dax_dev, struct writeback_control *wbc);
+
+struct page *dax_layout_busy_page(struct address_space *mapping);
+struct page *dax_layout_busy_page_range(struct address_space *mapping, loff_t start, loff_t end);
+dax_entry_t dax_lock_folio(struct folio *folio);
+void dax_unlock_folio(struct folio *folio, dax_entry_t cookie);
+dax_entry_t dax_lock_mapping_entry(struct address_space *mapping,
+ unsigned long index, struct page **page);
+void dax_unlock_mapping_entry(struct address_space *mapping,
+ unsigned long index, dax_entry_t cookie);
+#else
+static inline struct page *dax_layout_busy_page(struct address_space *mapping)
{
- return dax_get_by_host(host);
+ return NULL;
}
-static inline void fs_put_dax(struct dax_device *dax_dev)
+static inline struct page *dax_layout_busy_page_range(struct address_space *mapping, pgoff_t start, pgoff_t nr_pages)
{
- put_dax(dax_dev);
+ return NULL;
}
-#else
-static inline int bdev_dax_supported(struct super_block *sb, int blocksize)
+static inline int dax_writeback_mapping_range(struct address_space *mapping,
+ struct dax_device *dax_dev, struct writeback_control *wbc)
{
return -EOPNOTSUPP;
}
-static inline struct dax_device *fs_dax_get_by_host(const char *host)
+static inline dax_entry_t dax_lock_folio(struct folio *folio)
{
- return NULL;
+ if (IS_DAX(folio->mapping->host))
+ return ~0UL;
+ return 0;
}
-static inline void fs_put_dax(struct dax_device *dax_dev)
+static inline void dax_unlock_folio(struct folio *folio, dax_entry_t cookie)
+{
+}
+
+static inline dax_entry_t dax_lock_mapping_entry(struct address_space *mapping,
+ unsigned long index, struct page **page)
+{
+ return 0;
+}
+
+static inline void dax_unlock_mapping_entry(struct address_space *mapping,
+ unsigned long index, dax_entry_t cookie)
{
}
#endif
+int dax_file_unshare(struct inode *inode, loff_t pos, loff_t len,
+ const struct iomap_ops *ops);
+int dax_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero,
+ const struct iomap_ops *ops);
+int dax_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
+ const struct iomap_ops *ops);
+
+static inline bool dax_page_is_idle(struct page *page)
+{
+ return page && page_ref_count(page) == 0;
+}
+
+#if IS_ENABLED(CONFIG_DAX)
int dax_read_lock(void);
void dax_read_unlock(int id);
-struct dax_device *alloc_dax(void *private, const char *host,
- const struct dax_operations *ops);
-bool dax_alive(struct dax_device *dax_dev);
-void kill_dax(struct dax_device *dax_dev);
-void *dax_get_private(struct dax_device *dax_dev);
-long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages,
- void **kaddr, pfn_t *pfn);
-size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
- size_t bytes, struct iov_iter *i);
-void dax_flush(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
- size_t size);
-void dax_write_cache(struct dax_device *dax_dev, bool wc);
-bool dax_write_cache_enabled(struct dax_device *dax_dev);
+#else
+static inline int dax_read_lock(void)
+{
+ return 0;
+}
-/*
- * We use lowest available bit in exceptional entry for locking, one bit for
- * the entry size (PMD) and two more to tell us if the entry is a huge zero
- * page (HZP) or an empty entry that is just used for locking. In total four
- * special bits.
- *
- * If the PMD bit isn't set the entry has size PAGE_SIZE, and if the HZP and
- * EMPTY bits aren't set the entry is a normal DAX entry with a filesystem
- * block allocation.
- */
-#define RADIX_DAX_SHIFT (RADIX_TREE_EXCEPTIONAL_SHIFT + 4)
-#define RADIX_DAX_ENTRY_LOCK (1 << RADIX_TREE_EXCEPTIONAL_SHIFT)
-#define RADIX_DAX_PMD (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 1))
-#define RADIX_DAX_HZP (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 2))
-#define RADIX_DAX_EMPTY (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 3))
+static inline void dax_read_unlock(int id)
+{
+}
+#endif /* CONFIG_DAX */
-static inline unsigned long dax_radix_sector(void *entry)
+#if !IS_ENABLED(CONFIG_FS_DAX)
+static inline int __must_check dax_break_layout(struct inode *inode,
+ loff_t start, loff_t end, void (cb)(struct inode *))
{
- return (unsigned long)entry >> RADIX_DAX_SHIFT;
+ return 0;
}
-static inline void *dax_radix_locked_entry(sector_t sector, unsigned long flags)
+static inline void dax_break_layout_final(struct inode *inode)
{
- return (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY | flags |
- ((unsigned long)sector << RADIX_DAX_SHIFT) |
- RADIX_DAX_ENTRY_LOCK);
}
+#endif
+
+bool dax_alive(struct dax_device *dax_dev);
+void *dax_get_private(struct dax_device *dax_dev);
+long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages,
+ enum dax_access_mode mode, void **kaddr, unsigned long *pfn);
+size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
+ size_t bytes, struct iov_iter *i);
+size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
+ size_t bytes, struct iov_iter *i);
+int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
+ size_t nr_pages);
+int dax_holder_notify_failure(struct dax_device *dax_dev, u64 off, u64 len,
+ int mf_flags);
+void dax_flush(struct dax_device *dax_dev, void *addr, size_t size);
ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
const struct iomap_ops *ops);
-int dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size,
- const struct iomap_ops *ops);
+vm_fault_t dax_iomap_fault(struct vm_fault *vmf, unsigned int order,
+ unsigned long *pfnp, int *errp,
+ const struct iomap_ops *ops);
+vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf,
+ unsigned int order, unsigned long pfn);
int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
+void dax_delete_mapping_range(struct address_space *mapping,
+ loff_t start, loff_t end);
int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
pgoff_t index);
-void dax_wake_mapping_entry_waiter(struct address_space *mapping,
- pgoff_t index, void *entry, bool wake_all);
-
-#ifdef CONFIG_FS_DAX
-int __dax_zero_page_range(struct block_device *bdev,
- struct dax_device *dax_dev, sector_t sector,
- unsigned int offset, unsigned int length);
-#else
-static inline int __dax_zero_page_range(struct block_device *bdev,
- struct dax_device *dax_dev, sector_t sector,
- unsigned int offset, unsigned int length)
+int __must_check dax_break_layout(struct inode *inode, loff_t start,
+ loff_t end, void (cb)(struct inode *));
+static inline int __must_check dax_break_layout_inode(struct inode *inode,
+ void (cb)(struct inode *))
{
- return -ENXIO;
+ return dax_break_layout(inode, 0, LLONG_MAX, cb);
}
-#endif
-
-#ifdef CONFIG_FS_DAX_PMD
-static inline unsigned int dax_radix_order(void *entry)
+void dax_break_layout_final(struct inode *inode);
+int dax_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
+ struct inode *dest, loff_t destoff,
+ loff_t len, bool *is_same,
+ const struct iomap_ops *ops);
+int dax_remap_file_range_prep(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ loff_t *len, unsigned int remap_flags,
+ const struct iomap_ops *ops);
+static inline bool dax_mapping(struct address_space *mapping)
{
- if ((unsigned long)entry & RADIX_DAX_PMD)
- return PMD_SHIFT - PAGE_SHIFT;
- return 0;
+ return mapping->host && IS_DAX(mapping->host);
}
-#else
-static inline unsigned int dax_radix_order(void *entry)
+
+/*
+ * Due to dax's memory and block duo personalities, hwpoison reporting
+ * takes into consideration which personality is presently visible.
+ * When dax acts like a block device, such as in block IO, an encounter of
+ * dax hwpoison is reported as -EIO.
+ * When dax acts like memory, such as in page fault, a detection of hwpoison
+ * is reported as -EHWPOISON which leads to VM_FAULT_HWPOISON.
+ */
+static inline int dax_mem2blk_err(int err)
{
- return 0;
+ return (err == -EHWPOISON) ? -EIO : err;
}
-#endif
-int dax_pfn_mkwrite(struct vm_fault *vmf);
-static inline bool dax_mapping(struct address_space *mapping)
+#ifdef CONFIG_DEV_DAX_HMEM_DEVICES
+void hmem_register_resource(int target_nid, struct resource *r);
+#else
+static inline void hmem_register_resource(int target_nid, struct resource *r)
{
- return mapping->host && IS_DAX(mapping->host);
}
+#endif
-struct writeback_control;
-int dax_writeback_mapping_range(struct address_space *mapping,
- struct block_device *bdev, struct writeback_control *wbc);
+typedef int (*walk_hmem_fn)(struct device *dev, int target_nid,
+ const struct resource *res);
+int walk_hmem_resources(struct device *dev, walk_hmem_fn fn);
#endif