summaryrefslogtreecommitdiff
path: root/include/linux/dax.h
blob: b463502b16e17fbc08ff3a982a9597f5d70cb06b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_DAX_H
#define _LINUX_DAX_H

#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/radix-tree.h>

typedef unsigned long dax_entry_t;

struct dax_device;
struct gendisk;
struct iomap_ops;
struct iomap_iter;
struct iomap;

enum dax_access_mode {
	DAX_ACCESS,
	DAX_RECOVERY_WRITE,
};

struct dax_operations {
	/*
	 * direct_access: translate a device-relative
	 * logical-page-offset into an absolute physical pfn. Return the
	 * number of pages available for DAX at that pfn.
	 */
	long (*direct_access)(struct dax_device *, pgoff_t, long,
			enum dax_access_mode, void **, pfn_t *);
	/*
	 * Validate whether this device is usable as an fsdax backing
	 * device.
	 */
	bool (*dax_supported)(struct dax_device *, struct block_device *, int,
			sector_t, sector_t);
	/* zero_page_range: required operation. Zero page range   */
	int (*zero_page_range)(struct dax_device *, pgoff_t, size_t);
	/*
	 * recovery_write: recover a poisoned range by DAX device driver
	 * capable of clearing poison.
	 */
	size_t (*recovery_write)(struct dax_device *dax_dev, pgoff_t pgoff,
			void *addr, size_t bytes, struct iov_iter *iter);
};

struct dax_holder_operations {
	/*
	 * notify_failure - notify memory failure into inner holder device
	 * @dax_dev: the dax device which contains the holder
	 * @offset: offset on this dax device where memory failure occurs
	 * @len: length of this memory failure event
	 * @flags: action flags for memory failure handler
	 */
	int (*notify_failure)(struct dax_device *dax_dev, u64 offset,
			u64 len, int mf_flags);
};

#if IS_ENABLED(CONFIG_DAX)
struct dax_device *alloc_dax(void *private, const struct dax_operations *ops);
void *dax_holder(struct dax_device *dax_dev);
void put_dax(struct dax_device *dax_dev);
void kill_dax(struct dax_device *dax_dev);
void dax_write_cache(struct dax_device *dax_dev, bool wc);
bool dax_write_cache_enabled(struct dax_device *dax_dev);
bool dax_synchronous(struct dax_device *dax_dev);
void set_dax_synchronous(struct dax_device *dax_dev);
size_t dax_recovery_write(struct dax_device *dax_dev, pgoff_t pgoff,
		void *addr, size_t bytes, struct iov_iter *i);
/*
 * Check if given mapping is supported by the file / underlying device.
 */
static inline bool daxdev_mapping_supported(struct vm_area_struct *vma,
					     struct dax_device *dax_dev)
{
	if (!(vma->vm_flags & VM_SYNC))
		return true;
	if (!IS_DAX(file_inode(vma->vm_file)))
		return false;
	return dax_synchronous(dax_dev);
}
#else
static inline void *dax_holder(struct dax_device *dax_dev)
{
	return NULL;
}
static inline struct dax_device *alloc_dax(void *private,
		const struct dax_operations *ops)
{
	/*
	 * Callers should check IS_ENABLED(CONFIG_DAX) to know if this
	 * NULL is an error or expected.
	 */
	return NULL;
}
static inline void put_dax(struct dax_device *dax_dev)
{
}
static inline void kill_dax(struct dax_device *dax_dev)
{
}
static inline void dax_write_cache(struct dax_device *dax_dev, bool wc)
{
}
static inline bool dax_write_cache_enabled(struct dax_device *dax_dev)
{
	return false;
}
static inline bool dax_synchronous(struct dax_device *dax_dev)
{
	return true;
}
static inline void set_dax_synchronous(struct dax_device *dax_dev)
{
}
static inline bool daxdev_mapping_supported(struct vm_area_struct *vma,
				struct dax_device *dax_dev)
{
	return !(vma->vm_flags & VM_SYNC);
}
static inline size_t dax_recovery_write(struct dax_device *dax_dev,
		pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i)
{
	return 0;
}
#endif

void set_dax_nocache(struct dax_device *dax_dev);
void set_dax_nomc(struct dax_device *dax_dev);

struct writeback_control;
#if defined(CONFIG_BLOCK) && defined(CONFIG_FS_DAX)
int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk);
void dax_remove_host(struct gendisk *disk);
struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev, u64 *start_off,
		void *holder, const struct dax_holder_operations *ops);
void fs_put_dax(struct dax_device *dax_dev, void *holder);
#else
static inline int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk)
{
	return 0;
}
static inline void dax_remove_host(struct gendisk *disk)
{
}
static inline struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev,
		u64 *start_off, void *holder,
		const struct dax_holder_operations *ops)
{
	return NULL;
}
static inline void fs_put_dax(struct dax_device *dax_dev, void *holder)
{
}
#endif /* CONFIG_BLOCK && CONFIG_FS_DAX */

#if IS_ENABLED(CONFIG_FS_DAX)
int dax_writeback_mapping_range(struct address_space *mapping,
		struct dax_device *dax_dev, struct writeback_control *wbc);

struct page *dax_layout_busy_page(struct address_space *mapping);
struct page *dax_layout_busy_page_range(struct address_space *mapping, loff_t start, loff_t end);
dax_entry_t dax_lock_folio(struct folio *folio);
void dax_unlock_folio(struct folio *folio, dax_entry_t cookie);
dax_entry_t dax_lock_mapping_entry(struct address_space *mapping,
		unsigned long index, struct page **page);
void dax_unlock_mapping_entry(struct address_space *mapping,
		unsigned long index, dax_entry_t cookie);
#else
static inline struct page *dax_layout_busy_page(struct address_space *mapping)
{
	return NULL;
}

static inline struct page *dax_layout_busy_page_range(struct address_space *mapping, pgoff_t start, pgoff_t nr_pages)
{
	return NULL;
}

static inline int dax_writeback_mapping_range(struct address_space *mapping,
		struct dax_device *dax_dev, struct writeback_control *wbc)
{
	return -EOPNOTSUPP;
}

static inline dax_entry_t dax_lock_folio(struct folio *folio)
{
	if (IS_DAX(folio->mapping->host))
		return ~0UL;
	return 0;
}

static inline void dax_unlock_folio(struct folio *folio, dax_entry_t cookie)
{
}

static inline dax_entry_t dax_lock_mapping_entry(struct address_space *mapping,
		unsigned long index, struct page **page)
{
	return 0;
}

static inline void dax_unlock_mapping_entry(struct address_space *mapping,
		unsigned long index, dax_entry_t cookie)
{
}
#endif

int dax_file_unshare(struct inode *inode, loff_t pos, loff_t len,
		const struct iomap_ops *ops);
int dax_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero,
		const struct iomap_ops *ops);
int dax_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
		const struct iomap_ops *ops);

#if IS_ENABLED(CONFIG_DAX)
int dax_read_lock(void);
void dax_read_unlock(int id);
#else
static inline int dax_read_lock(void)
{
	return 0;
}

static inline void dax_read_unlock(int id)
{
}
#endif /* CONFIG_DAX */
bool dax_alive(struct dax_device *dax_dev);
void *dax_get_private(struct dax_device *dax_dev);
long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages,
		enum dax_access_mode mode, void **kaddr, pfn_t *pfn);
size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
		size_t bytes, struct iov_iter *i);
size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
		size_t bytes, struct iov_iter *i);
int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
			size_t nr_pages);
int dax_holder_notify_failure(struct dax_device *dax_dev, u64 off, u64 len,
		int mf_flags);
void dax_flush(struct dax_device *dax_dev, void *addr, size_t size);

ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
		const struct iomap_ops *ops);
vm_fault_t dax_iomap_fault(struct vm_fault *vmf, unsigned int order,
		    pfn_t *pfnp, int *errp, const struct iomap_ops *ops);
vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf,
		unsigned int order, pfn_t pfn);
int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
				      pgoff_t index);
int dax_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
				  struct inode *dest, loff_t destoff,
				  loff_t len, bool *is_same,
				  const struct iomap_ops *ops);
int dax_remap_file_range_prep(struct file *file_in, loff_t pos_in,
			      struct file *file_out, loff_t pos_out,
			      loff_t *len, unsigned int remap_flags,
			      const struct iomap_ops *ops);
static inline bool dax_mapping(struct address_space *mapping)
{
	return mapping->host && IS_DAX(mapping->host);
}

/*
 * Due to dax's memory and block duo personalities, hwpoison reporting
 * takes into consideration which personality is presently visible.
 * When dax acts like a block device, such as in block IO, an encounter of
 * dax hwpoison is reported as -EIO.
 * When dax acts like memory, such as in page fault, a detection of hwpoison
 * is reported as -EHWPOISON which leads to VM_FAULT_HWPOISON.
 */
static inline int dax_mem2blk_err(int err)
{
	return (err == -EHWPOISON) ? -EIO : err;
}

#ifdef CONFIG_DEV_DAX_HMEM_DEVICES
void hmem_register_resource(int target_nid, struct resource *r);
#else
static inline void hmem_register_resource(int target_nid, struct resource *r)
{
}
#endif

typedef int (*walk_hmem_fn)(struct device *dev, int target_nid,
			    const struct resource *res);
int walk_hmem_resources(struct device *dev, walk_hmem_fn fn);
#endif