summaryrefslogtreecommitdiff
path: root/arch/x86/mm/numa_64.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/mm/numa_64.c')
-rw-r--r--arch/x86/mm/numa_64.c67
1 files changed, 52 insertions, 15 deletions
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 16b82ad34b96..c5066d519e5d 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -31,13 +31,15 @@ bootmem_data_t plat_node_bdata[MAX_NUMNODES];
struct memnode memnode;
+#ifdef CONFIG_SMP
int x86_cpu_to_node_map_init[NR_CPUS] = {
[0 ... NR_CPUS-1] = NUMA_NO_NODE
};
void *x86_cpu_to_node_map_early_ptr;
+EXPORT_SYMBOL(x86_cpu_to_node_map_early_ptr);
+#endif
DEFINE_PER_CPU(int, x86_cpu_to_node_map) = NUMA_NO_NODE;
EXPORT_PER_CPU_SYMBOL(x86_cpu_to_node_map);
-EXPORT_SYMBOL(x86_cpu_to_node_map_early_ptr);
s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
[0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
@@ -58,7 +60,7 @@ unsigned long __initdata nodemap_size;
* -1 if node overlap or lost ram (shift too big)
*/
static int __init populate_memnodemap(const struct bootnode *nodes,
- int numnodes, int shift)
+ int numnodes, int shift, int *nodeids)
{
unsigned long addr, end;
int i, res = -1;
@@ -74,7 +76,12 @@ static int __init populate_memnodemap(const struct bootnode *nodes,
do {
if (memnodemap[addr >> shift] != NUMA_NO_NODE)
return -1;
- memnodemap[addr >> shift] = i;
+
+ if (!nodeids)
+ memnodemap[addr >> shift] = i;
+ else
+ memnodemap[addr >> shift] = nodeids[i];
+
addr += (1UL << shift);
} while (addr < end);
res = 1;
@@ -137,7 +144,8 @@ static int __init extract_lsb_from_nodes(const struct bootnode *nodes,
return i;
}
-int __init compute_hash_shift(struct bootnode *nodes, int numnodes)
+int __init compute_hash_shift(struct bootnode *nodes, int numnodes,
+ int *nodeids)
{
int shift;
@@ -147,7 +155,7 @@ int __init compute_hash_shift(struct bootnode *nodes, int numnodes)
printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n",
shift);
- if (populate_memnodemap(nodes, numnodes, shift) != 1) {
+ if (populate_memnodemap(nodes, numnodes, shift, nodeids) != 1) {
printk(KERN_INFO "Your memory is not aligned you need to "
"rebuild your kernel with a bigger NODEMAPSIZE "
"shift=%d\n", shift);
@@ -188,6 +196,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
unsigned long bootmap_start, nodedata_phys;
void *bootmap;
const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE);
+ int nid;
start = round_up(start, ZONE_ALIGN);
@@ -210,9 +219,19 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
NODE_DATA(nodeid)->node_start_pfn = start_pfn;
NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn;
- /* Find a place for the bootmem map */
+ /*
+ * Find a place for the bootmem map
+ * nodedata_phys could be on other nodes by alloc_bootmem,
+ * so need to sure bootmap_start not to be small, otherwise
+ * early_node_mem will get that with find_e820_area instead
+ * of alloc_bootmem, that could clash with reserved range
+ */
bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
- bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE);
+ nid = phys_to_nid(nodedata_phys);
+ if (nid == nodeid)
+ bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE);
+ else
+ bootmap_start = round_up(start, PAGE_SIZE);
/*
* SMP_CAHCE_BYTES could be enough, but init_bootmem_node like
* to use that to align to PAGE_SIZE
@@ -237,10 +256,29 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
free_bootmem_with_active_regions(nodeid, end);
- reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size,
- BOOTMEM_DEFAULT);
- reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start,
- bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT);
+ /*
+ * convert early reserve to bootmem reserve earlier
+ * otherwise early_node_mem could use early reserved mem
+ * on previous node
+ */
+ early_res_to_bootmem(start, end);
+
+ /*
+ * in some case early_node_mem could use alloc_bootmem
+ * to get range on other node, don't reserve that again
+ */
+ if (nid != nodeid)
+ printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nodeid, nid);
+ else
+ reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys,
+ pgdat_size, BOOTMEM_DEFAULT);
+ nid = phys_to_nid(bootmap_start);
+ if (nid != nodeid)
+ printk(KERN_INFO " bootmap(%d) on node %d\n", nodeid, nid);
+ else
+ reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start,
+ bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT);
+
#ifdef CONFIG_ACPI_NUMA
srat_reserve_add_area(nodeid);
#endif
@@ -378,9 +416,10 @@ static int __init split_nodes_by_size(struct bootnode *nodes, u64 *addr,
* Sets up the system RAM area from start_pfn to end_pfn according to the
* numa=fake command-line option.
*/
+static struct bootnode nodes[MAX_NUMNODES] __initdata;
+
static int __init numa_emulation(unsigned long start_pfn, unsigned long end_pfn)
{
- struct bootnode nodes[MAX_NUMNODES];
u64 size, addr = start_pfn << PAGE_SHIFT;
u64 max_addr = end_pfn << PAGE_SHIFT;
int num_nodes = 0, num = 0, coeff_flag, coeff = -1, i;
@@ -460,7 +499,7 @@ done:
}
}
out:
- memnode_shift = compute_hash_shift(nodes, num_nodes);
+ memnode_shift = compute_hash_shift(nodes, num_nodes, NULL);
if (memnode_shift < 0) {
memnode_shift = 0;
printk(KERN_ERR "No NUMA hash function found. NUMA emulation "
@@ -548,8 +587,6 @@ void __cpuinit numa_set_node(int cpu, int node)
{
int *cpu_to_node_map = x86_cpu_to_node_map_early_ptr;
- cpu_pda(cpu)->nodenumber = node;
-
if(cpu_to_node_map)
cpu_to_node_map[cpu] = node;
else if(per_cpu_offset(cpu))