summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-04-08 21:03:40 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-04-08 21:03:40 -0700
commit9b06860d7c1f1f4cb7d70f92e47dfa4a91bd5007 (patch)
tree120882e574394ce3b11bd491533613b4488fae45 /include
parent0906d8b975ff713cfb55328e4f3bf6de5967415e (diff)
parentf6d2b802f80d0ca89ee1f51c1781b3f79cdb25d5 (diff)
Merge tag 'libnvdimm-for-5.7' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm
Pull libnvdimm and dax updates from Dan Williams: "There were multiple touches outside of drivers/nvdimm/ this round to add cross arch compatibility to the devm_memremap_pages() interface, enhance numa information for persistent memory ranges, and add a zero_page_range() dax operation. This cycle I switched from the patchwork api to Konstantin's b4 script for collecting tags (from x86, PowerPC, filesystem, and device-mapper folks), and everything looks to have gone ok there. This has all appeared in -next with no reported issues. Summary: - Add support for region alignment configuration and enforcement to fix compatibility across architectures and PowerPC page size configurations. - Introduce 'zero_page_range' as a dax operation. This facilitates filesystem-dax operation without a block-device. - Introduce phys_to_target_node() to facilitate drivers that want to know resulting numa node if a given reserved address range was onlined. - Advertise a persistence-domain for of_pmem and papr_scm. The persistence domain indicates where cpu-store cycles need to reach in the platform-memory subsystem before the platform will consider them power-fail protected. - Promote numa_map_to_online_node() to a cross-kernel generic facility. - Save x86 numa information to allow for node-id lookups for reserved memory ranges, deploy that capability for the e820-pmem driver. - Pick up some miscellaneous minor fixes, that missed v5.6-final, including a some smatch reports in the ioctl path and some unit test compilation fixups. - Fixup some flexible-array declarations" * tag 'libnvdimm-for-5.7' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (29 commits) dax: Move mandatory ->zero_page_range() check in alloc_dax() dax,iomap: Add helper dax_iomap_zero() to zero a range dax: Use new dax zero page method for zeroing a page dm,dax: Add dax zero_page_range operation s390,dcssblk,dax: Add dax zero_page_range operation to dcssblk driver dax, pmem: Add a dax operation zero_page_range pmem: Add functions for reading/writing page to/from pmem libnvdimm: Update persistence domain value for of_pmem and papr_scm device tools/test/nvdimm: Fix out of tree build libnvdimm/region: Fix build error libnvdimm/region: Replace zero-length array with flexible-array member libnvdimm/label: Replace zero-length array with flexible-array member ACPI: NFIT: Replace zero-length array with flexible-array member libnvdimm/region: Introduce an 'align' attribute libnvdimm/region: Introduce NDD_LABELING libnvdimm/namespace: Enforce memremap_compat_align() libnvdimm/pfn: Prevent raw mode fallback if pfn-infoblock valid libnvdimm: Out of bounds read in __nd_ioctl() acpi/nfit: improve bounds checking for 'func' mm/memremap_pages: Introduce memremap_compat_align() ...
Diffstat (limited to 'include')
-rw-r--r--include/linux/acpi.h23
-rw-r--r--include/linux/dax.h21
-rw-r--r--include/linux/device-mapper.h3
-rw-r--r--include/linux/io.h2
-rw-r--r--include/linux/libnvdimm.h2
-rw-r--r--include/linux/memremap.h8
-rw-r--r--include/linux/mmzone.h1
-rw-r--r--include/linux/numa.h30
8 files changed, 72 insertions, 18 deletions
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 9f70b7807d53..d661cd0ee64d 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -416,9 +416,30 @@ extern void acpi_osi_setup(char *str);
extern bool acpi_osi_is_win8(void);
#ifdef CONFIG_ACPI_NUMA
-int acpi_map_pxm_to_online_node(int pxm);
int acpi_map_pxm_to_node(int pxm);
int acpi_get_node(acpi_handle handle);
+
+/**
+ * acpi_map_pxm_to_online_node - Map proximity ID to online node
+ * @pxm: ACPI proximity ID
+ *
+ * This is similar to acpi_map_pxm_to_node(), but always returns an online
+ * node. When the mapped node from a given proximity ID is offline, it
+ * looks up the node distance table and returns the nearest online node.
+ *
+ * ACPI device drivers, which are called after the NUMA initialization has
+ * completed in the kernel, can call this interface to obtain their device
+ * NUMA topology from ACPI tables. Such drivers do not have to deal with
+ * offline nodes. A node may be offline when a device proximity ID is
+ * unique, SRAT memory entry does not exist, or NUMA is disabled, ex.
+ * "numa=off" on x86.
+ */
+static inline int acpi_map_pxm_to_online_node(int pxm)
+{
+ int node = acpi_map_pxm_to_node(pxm);
+
+ return numa_map_to_online_node(node);
+}
#else
static inline int acpi_map_pxm_to_online_node(int pxm)
{
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 328c2dbb4409..d7af5d243f24 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -13,6 +13,7 @@
typedef unsigned long dax_entry_t;
struct iomap_ops;
+struct iomap;
struct dax_device;
struct dax_operations {
/*
@@ -34,6 +35,8 @@ struct dax_operations {
/* copy_to_iter: required operation for fs-dax direct-i/o */
size_t (*copy_to_iter)(struct dax_device *, pgoff_t, void *, size_t,
struct iov_iter *);
+ /* zero_page_range: required operation. Zero page range */
+ int (*zero_page_range)(struct dax_device *, pgoff_t, size_t);
};
extern struct attribute_group dax_attribute_group;
@@ -199,6 +202,8 @@ size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
size_t bytes, struct iov_iter *i);
size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
size_t bytes, struct iov_iter *i);
+int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
+ size_t nr_pages);
void dax_flush(struct dax_device *dax_dev, void *addr, size_t size);
ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
@@ -210,20 +215,8 @@ vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf,
int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
pgoff_t index);
-
-#ifdef CONFIG_FS_DAX
-int __dax_zero_page_range(struct block_device *bdev,
- struct dax_device *dax_dev, sector_t sector,
- unsigned int offset, unsigned int length);
-#else
-static inline int __dax_zero_page_range(struct block_device *bdev,
- struct dax_device *dax_dev, sector_t sector,
- unsigned int offset, unsigned int length)
-{
- return -ENXIO;
-}
-#endif
-
+int dax_iomap_zero(loff_t pos, unsigned offset, unsigned size,
+ struct iomap *iomap);
static inline bool dax_mapping(struct address_space *mapping)
{
return mapping->host && IS_DAX(mapping->host);
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 475668c69dbc..af48d9da3916 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -141,6 +141,8 @@ typedef long (*dm_dax_direct_access_fn) (struct dm_target *ti, pgoff_t pgoff,
long nr_pages, void **kaddr, pfn_t *pfn);
typedef size_t (*dm_dax_copy_iter_fn)(struct dm_target *ti, pgoff_t pgoff,
void *addr, size_t bytes, struct iov_iter *i);
+typedef int (*dm_dax_zero_page_range_fn)(struct dm_target *ti, pgoff_t pgoff,
+ size_t nr_pages);
#define PAGE_SECTORS (PAGE_SIZE / 512)
void dm_error(const char *message);
@@ -195,6 +197,7 @@ struct target_type {
dm_dax_direct_access_fn direct_access;
dm_dax_copy_iter_fn dax_copy_from_iter;
dm_dax_copy_iter_fn dax_copy_to_iter;
+ dm_dax_zero_page_range_fn dax_zero_page_range;
/* For internal device-mapper use. */
struct list_head list;
diff --git a/include/linux/io.h b/include/linux/io.h
index b1c44bb4b2d7..8394c56babc2 100644
--- a/include/linux/io.h
+++ b/include/linux/io.h
@@ -77,8 +77,6 @@ void *devm_memremap(struct device *dev, resource_size_t offset,
size_t size, unsigned long flags);
void devm_memunmap(struct device *dev, void *addr);
-void *__devm_memremap_pages(struct device *dev, struct resource *res);
-
#ifdef CONFIG_PCI
/*
* The PCI specifications (Rev 3.0, 3.2.5 "Transaction Ordering and
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index 9df091bd30ba..18da4059be09 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -37,6 +37,8 @@ enum {
NDD_WORK_PENDING = 4,
/* ignore / filter NSLABEL_FLAG_LOCAL for this DIMM, i.e. no aliasing */
NDD_NOBLK = 5,
+ /* dimm supports namespace labels */
+ NDD_LABELING = 6,
/* need to set a limit somewhere, but yes, this is likely overkill */
ND_IOCTL_MAX_BUFLEN = SZ_4M,
diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index 8b37c4c9222c..5f5b2df06e61 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -134,6 +134,7 @@ struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
unsigned long vmem_altmap_offset(struct vmem_altmap *altmap);
void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns);
+unsigned long memremap_compat_align(void);
#else
static inline void *devm_memremap_pages(struct device *dev,
struct dev_pagemap *pgmap)
@@ -167,6 +168,12 @@ static inline void vmem_altmap_free(struct vmem_altmap *altmap,
unsigned long nr_pfns)
{
}
+
+/* when memremap_pages() is disabled all archs can remap a single page */
+static inline unsigned long memremap_compat_align(void)
+{
+ return PAGE_SIZE;
+}
#endif /* CONFIG_ZONE_DEVICE */
static inline void put_dev_pagemap(struct dev_pagemap *pgmap)
@@ -174,4 +181,5 @@ static inline void put_dev_pagemap(struct dev_pagemap *pgmap)
if (pgmap)
percpu_ref_put(pgmap->ref);
}
+
#endif /* _LINUX_MEMREMAP_H_ */
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index e9892bf9eba9..1b9de7d220fb 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1127,6 +1127,7 @@ static inline unsigned long section_nr_to_pfn(unsigned long sec)
#define SECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SECTION_MASK)
#define SUBSECTION_SHIFT 21
+#define SUBSECTION_SIZE (1UL << SUBSECTION_SHIFT)
#define PFN_SUBSECTION_SHIFT (SUBSECTION_SHIFT - PAGE_SHIFT)
#define PAGES_PER_SUBSECTION (1UL << PFN_SUBSECTION_SHIFT)
diff --git a/include/linux/numa.h b/include/linux/numa.h
index 110b0e5d0fb0..a42df804679e 100644
--- a/include/linux/numa.h
+++ b/include/linux/numa.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_NUMA_H
#define _LINUX_NUMA_H
-
+#include <linux/types.h>
#ifdef CONFIG_NODES_SHIFT
#define NODES_SHIFT CONFIG_NODES_SHIFT
@@ -13,4 +13,32 @@
#define NUMA_NO_NODE (-1)
+/* optionally keep NUMA memory info available post init */
+#ifdef CONFIG_NUMA_KEEP_MEMINFO
+#define __initdata_or_meminfo
+#else
+#define __initdata_or_meminfo __initdata
+#endif
+
+#ifdef CONFIG_NUMA
+/* Generic implementation available */
+int numa_map_to_online_node(int node);
+
+/*
+ * Optional architecture specific implementation, users need a "depends
+ * on $ARCH"
+ */
+int phys_to_target_node(phys_addr_t addr);
+#else
+static inline int numa_map_to_online_node(int node)
+{
+ return NUMA_NO_NODE;
+}
+
+static inline int phys_to_target_node(phys_addr_t addr)
+{
+ return NUMA_NO_NODE;
+}
+#endif
+
#endif /* _LINUX_NUMA_H */