diff options
Diffstat (limited to 'include/linux/dax.h')
| -rw-r--r-- | include/linux/dax.h | 330 |
1 files changed, 239 insertions, 91 deletions
diff --git a/include/linux/dax.h b/include/linux/dax.h index df97b7af7e2c..9d624f4d9df6 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -1,13 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_DAX_H #define _LINUX_DAX_H #include <linux/fs.h> #include <linux/mm.h> #include <linux/radix-tree.h> -#include <asm/pgtable.h> -struct iomap_ops; +typedef unsigned long dax_entry_t; + struct dax_device; +struct gendisk; +struct iomap_ops; +struct iomap_iter; +struct iomap; + +enum dax_access_mode { + DAX_ACCESS, + DAX_RECOVERY_WRITE, +}; + struct dax_operations { /* * direct_access: translate a device-relative @@ -15,152 +26,289 @@ struct dax_operations { * number of pages available for DAX at that pfn. */ long (*direct_access)(struct dax_device *, pgoff_t, long, - void **, pfn_t *); - /* copy_from_iter: required operation for fs-dax direct-i/o */ - size_t (*copy_from_iter)(struct dax_device *, pgoff_t, void *, size_t, - struct iov_iter *); - /* flush: optional driver-specific cache management after writes */ - void (*flush)(struct dax_device *, pgoff_t, void *, size_t); + enum dax_access_mode, void **, unsigned long *); + /* zero_page_range: required operation. Zero page range */ + int (*zero_page_range)(struct dax_device *, pgoff_t, size_t); + /* + * recovery_write: recover a poisoned range by DAX device driver + * capable of clearing poison. + */ + size_t (*recovery_write)(struct dax_device *dax_dev, pgoff_t pgoff, + void *addr, size_t bytes, struct iov_iter *iter); }; -extern struct attribute_group dax_attribute_group; +struct dax_holder_operations { + /* + * notify_failure - notify memory failure into inner holder device + * @dax_dev: the dax device which contains the holder + * @offset: offset on this dax device where memory failure occurs + * @len: length of this memory failure event + * @flags: action flags for memory failure handler + */ + int (*notify_failure)(struct dax_device *dax_dev, u64 offset, + u64 len, int mf_flags); +}; #if IS_ENABLED(CONFIG_DAX) -struct dax_device *dax_get_by_host(const char *host); +struct dax_device *alloc_dax(void *private, const struct dax_operations *ops); +void *dax_holder(struct dax_device *dax_dev); void put_dax(struct dax_device *dax_dev); +void kill_dax(struct dax_device *dax_dev); +void dax_write_cache(struct dax_device *dax_dev, bool wc); +bool dax_write_cache_enabled(struct dax_device *dax_dev); +bool dax_synchronous(struct dax_device *dax_dev); +void set_dax_nocache(struct dax_device *dax_dev); +void set_dax_nomc(struct dax_device *dax_dev); +void set_dax_synchronous(struct dax_device *dax_dev); +size_t dax_recovery_write(struct dax_device *dax_dev, pgoff_t pgoff, + void *addr, size_t bytes, struct iov_iter *i); +/* + * Check if given mapping is supported by the file / underlying device. + */ +static inline bool daxdev_mapping_supported(vm_flags_t vm_flags, + const struct inode *inode, + struct dax_device *dax_dev) +{ + if (!(vm_flags & VM_SYNC)) + return true; + if (!IS_DAX(inode)) + return false; + return dax_synchronous(dax_dev); +} #else -static inline struct dax_device *dax_get_by_host(const char *host) +static inline void *dax_holder(struct dax_device *dax_dev) { return NULL; } - +static inline struct dax_device *alloc_dax(void *private, + const struct dax_operations *ops) +{ + return ERR_PTR(-EOPNOTSUPP); +} static inline void put_dax(struct dax_device *dax_dev) { } +static inline void kill_dax(struct dax_device *dax_dev) +{ +} +static inline void dax_write_cache(struct dax_device *dax_dev, bool wc) +{ +} +static inline bool dax_write_cache_enabled(struct dax_device *dax_dev) +{ + return false; +} +static inline bool dax_synchronous(struct dax_device *dax_dev) +{ + return true; +} +static inline void set_dax_nocache(struct dax_device *dax_dev) +{ +} +static inline void set_dax_nomc(struct dax_device *dax_dev) +{ +} +static inline void set_dax_synchronous(struct dax_device *dax_dev) +{ +} +static inline bool daxdev_mapping_supported(vm_flags_t vm_flags, + const struct inode *inode, + struct dax_device *dax_dev) +{ + return !(vm_flags & VM_SYNC); +} +static inline size_t dax_recovery_write(struct dax_device *dax_dev, + pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i) +{ + return 0; +} #endif -int bdev_dax_pgoff(struct block_device *, sector_t, size_t, pgoff_t *pgoff); -#if IS_ENABLED(CONFIG_FS_DAX) -int __bdev_dax_supported(struct super_block *sb, int blocksize); -static inline int bdev_dax_supported(struct super_block *sb, int blocksize) +struct writeback_control; +#if defined(CONFIG_BLOCK) && defined(CONFIG_FS_DAX) +int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk); +void dax_remove_host(struct gendisk *disk); +struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev, u64 *start_off, + void *holder, const struct dax_holder_operations *ops); +void fs_put_dax(struct dax_device *dax_dev, void *holder); +#else +static inline int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk) +{ + return 0; +} +static inline void dax_remove_host(struct gendisk *disk) +{ +} +static inline struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev, + u64 *start_off, void *holder, + const struct dax_holder_operations *ops) +{ + return NULL; +} +static inline void fs_put_dax(struct dax_device *dax_dev, void *holder) { - return __bdev_dax_supported(sb, blocksize); } +#endif /* CONFIG_BLOCK && CONFIG_FS_DAX */ -static inline struct dax_device *fs_dax_get_by_host(const char *host) +#if IS_ENABLED(CONFIG_FS_DAX) +int dax_writeback_mapping_range(struct address_space *mapping, + struct dax_device *dax_dev, struct writeback_control *wbc); + +struct page *dax_layout_busy_page(struct address_space *mapping); +struct page *dax_layout_busy_page_range(struct address_space *mapping, loff_t start, loff_t end); +dax_entry_t dax_lock_folio(struct folio *folio); +void dax_unlock_folio(struct folio *folio, dax_entry_t cookie); +dax_entry_t dax_lock_mapping_entry(struct address_space *mapping, + unsigned long index, struct page **page); +void dax_unlock_mapping_entry(struct address_space *mapping, + unsigned long index, dax_entry_t cookie); +#else +static inline struct page *dax_layout_busy_page(struct address_space *mapping) { - return dax_get_by_host(host); + return NULL; } -static inline void fs_put_dax(struct dax_device *dax_dev) +static inline struct page *dax_layout_busy_page_range(struct address_space *mapping, pgoff_t start, pgoff_t nr_pages) { - put_dax(dax_dev); + return NULL; } -#else -static inline int bdev_dax_supported(struct super_block *sb, int blocksize) +static inline int dax_writeback_mapping_range(struct address_space *mapping, + struct dax_device *dax_dev, struct writeback_control *wbc) { return -EOPNOTSUPP; } -static inline struct dax_device *fs_dax_get_by_host(const char *host) +static inline dax_entry_t dax_lock_folio(struct folio *folio) { - return NULL; + if (IS_DAX(folio->mapping->host)) + return ~0UL; + return 0; } -static inline void fs_put_dax(struct dax_device *dax_dev) +static inline void dax_unlock_folio(struct folio *folio, dax_entry_t cookie) +{ +} + +static inline dax_entry_t dax_lock_mapping_entry(struct address_space *mapping, + unsigned long index, struct page **page) +{ + return 0; +} + +static inline void dax_unlock_mapping_entry(struct address_space *mapping, + unsigned long index, dax_entry_t cookie) { } #endif +int dax_file_unshare(struct inode *inode, loff_t pos, loff_t len, + const struct iomap_ops *ops); +int dax_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero, + const struct iomap_ops *ops); +int dax_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, + const struct iomap_ops *ops); + +static inline bool dax_page_is_idle(struct page *page) +{ + return page && page_ref_count(page) == 0; +} + +#if IS_ENABLED(CONFIG_DAX) int dax_read_lock(void); void dax_read_unlock(int id); -struct dax_device *alloc_dax(void *private, const char *host, - const struct dax_operations *ops); -bool dax_alive(struct dax_device *dax_dev); -void kill_dax(struct dax_device *dax_dev); -void *dax_get_private(struct dax_device *dax_dev); -long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, - void **kaddr, pfn_t *pfn); -size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, - size_t bytes, struct iov_iter *i); -void dax_flush(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, - size_t size); -void dax_write_cache(struct dax_device *dax_dev, bool wc); -bool dax_write_cache_enabled(struct dax_device *dax_dev); +#else +static inline int dax_read_lock(void) +{ + return 0; +} -/* - * We use lowest available bit in exceptional entry for locking, one bit for - * the entry size (PMD) and two more to tell us if the entry is a huge zero - * page (HZP) or an empty entry that is just used for locking. In total four - * special bits. - * - * If the PMD bit isn't set the entry has size PAGE_SIZE, and if the HZP and - * EMPTY bits aren't set the entry is a normal DAX entry with a filesystem - * block allocation. - */ -#define RADIX_DAX_SHIFT (RADIX_TREE_EXCEPTIONAL_SHIFT + 4) -#define RADIX_DAX_ENTRY_LOCK (1 << RADIX_TREE_EXCEPTIONAL_SHIFT) -#define RADIX_DAX_PMD (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 1)) -#define RADIX_DAX_HZP (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 2)) -#define RADIX_DAX_EMPTY (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 3)) +static inline void dax_read_unlock(int id) +{ +} +#endif /* CONFIG_DAX */ -static inline unsigned long dax_radix_sector(void *entry) +#if !IS_ENABLED(CONFIG_FS_DAX) +static inline int __must_check dax_break_layout(struct inode *inode, + loff_t start, loff_t end, void (cb)(struct inode *)) { - return (unsigned long)entry >> RADIX_DAX_SHIFT; + return 0; } -static inline void *dax_radix_locked_entry(sector_t sector, unsigned long flags) +static inline void dax_break_layout_final(struct inode *inode) { - return (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY | flags | - ((unsigned long)sector << RADIX_DAX_SHIFT) | - RADIX_DAX_ENTRY_LOCK); } +#endif + +bool dax_alive(struct dax_device *dax_dev); +void *dax_get_private(struct dax_device *dax_dev); +long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, + enum dax_access_mode mode, void **kaddr, unsigned long *pfn); +size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, + size_t bytes, struct iov_iter *i); +size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, + size_t bytes, struct iov_iter *i); +int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff, + size_t nr_pages); +int dax_holder_notify_failure(struct dax_device *dax_dev, u64 off, u64 len, + int mf_flags); +void dax_flush(struct dax_device *dax_dev, void *addr, size_t size); ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, const struct iomap_ops *ops); -int dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size, - const struct iomap_ops *ops); +vm_fault_t dax_iomap_fault(struct vm_fault *vmf, unsigned int order, + unsigned long *pfnp, int *errp, + const struct iomap_ops *ops); +vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf, + unsigned int order, unsigned long pfn); int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); +void dax_delete_mapping_range(struct address_space *mapping, + loff_t start, loff_t end); int dax_invalidate_mapping_entry_sync(struct address_space *mapping, pgoff_t index); -void dax_wake_mapping_entry_waiter(struct address_space *mapping, - pgoff_t index, void *entry, bool wake_all); - -#ifdef CONFIG_FS_DAX -int __dax_zero_page_range(struct block_device *bdev, - struct dax_device *dax_dev, sector_t sector, - unsigned int offset, unsigned int length); -#else -static inline int __dax_zero_page_range(struct block_device *bdev, - struct dax_device *dax_dev, sector_t sector, - unsigned int offset, unsigned int length) +int __must_check dax_break_layout(struct inode *inode, loff_t start, + loff_t end, void (cb)(struct inode *)); +static inline int __must_check dax_break_layout_inode(struct inode *inode, + void (cb)(struct inode *)) { - return -ENXIO; + return dax_break_layout(inode, 0, LLONG_MAX, cb); } -#endif - -#ifdef CONFIG_FS_DAX_PMD -static inline unsigned int dax_radix_order(void *entry) +void dax_break_layout_final(struct inode *inode); +int dax_dedupe_file_range_compare(struct inode *src, loff_t srcoff, + struct inode *dest, loff_t destoff, + loff_t len, bool *is_same, + const struct iomap_ops *ops); +int dax_remap_file_range_prep(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + loff_t *len, unsigned int remap_flags, + const struct iomap_ops *ops); +static inline bool dax_mapping(struct address_space *mapping) { - if ((unsigned long)entry & RADIX_DAX_PMD) - return PMD_SHIFT - PAGE_SHIFT; - return 0; + return mapping->host && IS_DAX(mapping->host); } -#else -static inline unsigned int dax_radix_order(void *entry) + +/* + * Due to dax's memory and block duo personalities, hwpoison reporting + * takes into consideration which personality is presently visible. + * When dax acts like a block device, such as in block IO, an encounter of + * dax hwpoison is reported as -EIO. + * When dax acts like memory, such as in page fault, a detection of hwpoison + * is reported as -EHWPOISON which leads to VM_FAULT_HWPOISON. + */ +static inline int dax_mem2blk_err(int err) { - return 0; + return (err == -EHWPOISON) ? -EIO : err; } -#endif -int dax_pfn_mkwrite(struct vm_fault *vmf); -static inline bool dax_mapping(struct address_space *mapping) +#ifdef CONFIG_DEV_DAX_HMEM_DEVICES +void hmem_register_resource(int target_nid, struct resource *r); +#else +static inline void hmem_register_resource(int target_nid, struct resource *r) { - return mapping->host && IS_DAX(mapping->host); } +#endif -struct writeback_control; -int dax_writeback_mapping_range(struct address_space *mapping, - struct block_device *bdev, struct writeback_control *wbc); +typedef int (*walk_hmem_fn)(struct device *dev, int target_nid, + const struct resource *res); +int walk_hmem_resources(struct device *dev, walk_hmem_fn fn); #endif |
