summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-04-08 21:03:40 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-04-08 21:03:40 -0700
commit9b06860d7c1f1f4cb7d70f92e47dfa4a91bd5007 (patch)
tree120882e574394ce3b11bd491533613b4488fae45 /arch
parent0906d8b975ff713cfb55328e4f3bf6de5967415e (diff)
parentf6d2b802f80d0ca89ee1f51c1781b3f79cdb25d5 (diff)
Merge tag 'libnvdimm-for-5.7' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm
Pull libnvdimm and dax updates from Dan Williams: "There were multiple touches outside of drivers/nvdimm/ this round to add cross arch compatibility to the devm_memremap_pages() interface, enhance numa information for persistent memory ranges, and add a zero_page_range() dax operation. This cycle I switched from the patchwork api to Konstantin's b4 script for collecting tags (from x86, PowerPC, filesystem, and device-mapper folks), and everything looks to have gone ok there. This has all appeared in -next with no reported issues. Summary: - Add support for region alignment configuration and enforcement to fix compatibility across architectures and PowerPC page size configurations. - Introduce 'zero_page_range' as a dax operation. This facilitates filesystem-dax operation without a block-device. - Introduce phys_to_target_node() to facilitate drivers that want to know resulting numa node if a given reserved address range was onlined. - Advertise a persistence-domain for of_pmem and papr_scm. The persistence domain indicates where cpu-store cycles need to reach in the platform-memory subsystem before the platform will consider them power-fail protected. - Promote numa_map_to_online_node() to a cross-kernel generic facility. - Save x86 numa information to allow for node-id lookups for reserved memory ranges, deploy that capability for the e820-pmem driver. - Pick up some miscellaneous minor fixes, that missed v5.6-final, including a some smatch reports in the ioctl path and some unit test compilation fixups. - Fixup some flexible-array declarations" * tag 'libnvdimm-for-5.7' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (29 commits) dax: Move mandatory ->zero_page_range() check in alloc_dax() dax,iomap: Add helper dax_iomap_zero() to zero a range dax: Use new dax zero page method for zeroing a page dm,dax: Add dax zero_page_range operation s390,dcssblk,dax: Add dax zero_page_range operation to dcssblk driver dax, pmem: Add a dax operation zero_page_range pmem: Add functions for reading/writing page to/from pmem libnvdimm: Update persistence domain value for of_pmem and papr_scm device tools/test/nvdimm: Fix out of tree build libnvdimm/region: Fix build error libnvdimm/region: Replace zero-length array with flexible-array member libnvdimm/label: Replace zero-length array with flexible-array member ACPI: NFIT: Replace zero-length array with flexible-array member libnvdimm/region: Introduce an 'align' attribute libnvdimm/region: Introduce NDD_LABELING libnvdimm/namespace: Enforce memremap_compat_align() libnvdimm/pfn: Prevent raw mode fallback if pfn-infoblock valid libnvdimm: Out of bounds read in __nd_ioctl() acpi/nfit: improve bounds checking for 'func' mm/memremap_pages: Introduce memremap_compat_align() ...
Diffstat (limited to 'arch')
-rw-r--r--arch/powerpc/Kconfig1
-rw-r--r--arch/powerpc/mm/ioremap.c21
-rw-r--r--arch/powerpc/platforms/pseries/papr_scm.c27
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/mm/numa.c67
5 files changed, 80 insertions, 37 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 5fc45364e86e..a1a1c15a5023 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -122,6 +122,7 @@ config PPC
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_KCOV
select ARCH_HAS_HUGEPD if HUGETLB_PAGE
+ select ARCH_HAS_MEMREMAP_COMPAT_ALIGN
select ARCH_HAS_MMIOWB if PPC64
select ARCH_HAS_PHYS_TO_DMA
select ARCH_HAS_PMEM_API
diff --git a/arch/powerpc/mm/ioremap.c b/arch/powerpc/mm/ioremap.c
index fc669643ce6a..b1a0aebe8c48 100644
--- a/arch/powerpc/mm/ioremap.c
+++ b/arch/powerpc/mm/ioremap.c
@@ -2,6 +2,7 @@
#include <linux/io.h>
#include <linux/slab.h>
+#include <linux/mmzone.h>
#include <linux/vmalloc.h>
#include <asm/io-workarounds.h>
@@ -97,3 +98,23 @@ void __iomem *do_ioremap(phys_addr_t pa, phys_addr_t offset, unsigned long size,
return NULL;
}
+
+#ifdef CONFIG_ZONE_DEVICE
+/*
+ * Override the generic version in mm/memremap.c.
+ *
+ * With hash translation, the direct-map range is mapped with just one
+ * page size selected by htab_init_page_sizes(). Consult
+ * mmu_psize_defs[] to determine the minimum page size alignment.
+*/
+unsigned long memremap_compat_align(void)
+{
+ unsigned int shift = mmu_psize_defs[mmu_linear_psize].shift;
+
+ if (radix_enabled())
+ return SUBSECTION_SIZE;
+ return max(SUBSECTION_SIZE, 1UL << shift);
+
+}
+EXPORT_SYMBOL_GPL(memremap_compat_align);
+#endif
diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
index e4606100e286..f35592423380 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -286,25 +286,6 @@ static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc,
return 0;
}
-static inline int papr_scm_node(int node)
-{
- int min_dist = INT_MAX, dist;
- int nid, min_node;
-
- if ((node == NUMA_NO_NODE) || node_online(node))
- return node;
-
- min_node = first_online_node;
- for_each_online_node(nid) {
- dist = node_distance(node, nid);
- if (dist < min_dist) {
- min_dist = dist;
- min_node = nid;
- }
- }
- return min_node;
-}
-
static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
{
struct device *dev = &p->pdev->dev;
@@ -329,7 +310,7 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
}
dimm_flags = 0;
- set_bit(NDD_ALIASING, &dimm_flags);
+ set_bit(NDD_LABELING, &dimm_flags);
p->nvdimm = nvdimm_create(p->bus, p, NULL, dimm_flags,
PAPR_SCM_DIMM_CMD_MASK, 0, NULL);
@@ -350,7 +331,7 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
memset(&ndr_desc, 0, sizeof(ndr_desc));
target_nid = dev_to_node(&p->pdev->dev);
- online_nid = papr_scm_node(target_nid);
+ online_nid = numa_map_to_online_node(target_nid);
ndr_desc.numa_node = online_nid;
ndr_desc.target_node = target_nid;
ndr_desc.res = &p->res;
@@ -362,8 +343,10 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
if (p->is_volatile)
p->region = nvdimm_volatile_region_create(p->bus, &ndr_desc);
- else
+ else {
+ set_bit(ND_REGION_PERSIST_MEMCTRL, &ndr_desc.flags);
p->region = nvdimm_pmem_region_create(p->bus, &ndr_desc);
+ }
if (!p->region) {
dev_err(dev, "Error registering region %pR from %pOF\n",
ndr_desc.res, p->dn);
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8d078642b4be..0163d764ade4 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1661,6 +1661,7 @@ config X86_PMEM_LEGACY
depends on PHYS_ADDR_T_64BIT
depends on BLK_DEV
select X86_PMEM_LEGACY_DEVICE
+ select NUMA_KEEP_MEMINFO if NUMA
select LIBNVDIMM
help
Treat memory marked using the non-standard e820 type of 12 as used
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 99f7a68738f0..59ba008504dc 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -25,11 +25,8 @@ nodemask_t numa_nodes_parsed __initdata;
struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
EXPORT_SYMBOL(node_data);
-static struct numa_meminfo numa_meminfo
-#ifndef CONFIG_MEMORY_HOTPLUG
-__initdata
-#endif
-;
+static struct numa_meminfo numa_meminfo __initdata_or_meminfo;
+static struct numa_meminfo numa_reserved_meminfo __initdata_or_meminfo;
static int numa_distance_cnt;
static u8 *numa_distance;
@@ -169,6 +166,19 @@ void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi)
}
/**
+ * numa_move_tail_memblk - Move a numa_memblk from one numa_meminfo to another
+ * @dst: numa_meminfo to append block to
+ * @idx: Index of memblk to remove
+ * @src: numa_meminfo to remove memblk from
+ */
+static void __init numa_move_tail_memblk(struct numa_meminfo *dst, int idx,
+ struct numa_meminfo *src)
+{
+ dst->blk[dst->nr_blks++] = src->blk[idx];
+ numa_remove_memblk_from(idx, src);
+}
+
+/**
* numa_add_memblk - Add one numa_memblk to numa_meminfo
* @nid: NUMA node ID of the new memblk
* @start: Start address of the new memblk
@@ -237,14 +247,19 @@ int __init numa_cleanup_meminfo(struct numa_meminfo *mi)
for (i = 0; i < mi->nr_blks; i++) {
struct numa_memblk *bi = &mi->blk[i];
- /* make sure all blocks are inside the limits */
+ /* move / save reserved memory ranges */
+ if (!memblock_overlaps_region(&memblock.memory,
+ bi->start, bi->end - bi->start)) {
+ numa_move_tail_memblk(&numa_reserved_meminfo, i--, mi);
+ continue;
+ }
+
+ /* make sure all non-reserved blocks are inside the limits */
bi->start = max(bi->start, low);
bi->end = min(bi->end, high);
- /* and there's no empty or non-exist block */
- if (bi->start >= bi->end ||
- !memblock_overlaps_region(&memblock.memory,
- bi->start, bi->end - bi->start))
+ /* and there's no empty block */
+ if (bi->start >= bi->end)
numa_remove_memblk_from(i--, mi);
}
@@ -881,16 +896,38 @@ EXPORT_SYMBOL(cpumask_of_node);
#endif /* !CONFIG_DEBUG_PER_CPU_MAPS */
-#ifdef CONFIG_MEMORY_HOTPLUG
-int memory_add_physaddr_to_nid(u64 start)
+#ifdef CONFIG_NUMA_KEEP_MEMINFO
+static int meminfo_to_nid(struct numa_meminfo *mi, u64 start)
{
- struct numa_meminfo *mi = &numa_meminfo;
- int nid = mi->blk[0].nid;
int i;
for (i = 0; i < mi->nr_blks; i++)
if (mi->blk[i].start <= start && mi->blk[i].end > start)
- nid = mi->blk[i].nid;
+ return mi->blk[i].nid;
+ return NUMA_NO_NODE;
+}
+
+int phys_to_target_node(phys_addr_t start)
+{
+ int nid = meminfo_to_nid(&numa_meminfo, start);
+
+ /*
+ * Prefer online nodes, but if reserved memory might be
+ * hot-added continue the search with reserved ranges.
+ */
+ if (nid != NUMA_NO_NODE)
+ return nid;
+
+ return meminfo_to_nid(&numa_reserved_meminfo, start);
+}
+EXPORT_SYMBOL_GPL(phys_to_target_node);
+
+int memory_add_physaddr_to_nid(u64 start)
+{
+ int nid = meminfo_to_nid(&numa_meminfo, start);
+
+ if (nid == NUMA_NO_NODE)
+ nid = numa_meminfo.blk[0].nid;
return nid;
}
EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);