diff options
Diffstat (limited to 'arch/x86/mm/numa_64.c')
-rw-r--r-- | arch/x86/mm/numa_64.c | 67 |
1 files changed, 52 insertions, 15 deletions
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 16b82ad34b96..c5066d519e5d 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -31,13 +31,15 @@ bootmem_data_t plat_node_bdata[MAX_NUMNODES]; struct memnode memnode; +#ifdef CONFIG_SMP int x86_cpu_to_node_map_init[NR_CPUS] = { [0 ... NR_CPUS-1] = NUMA_NO_NODE }; void *x86_cpu_to_node_map_early_ptr; +EXPORT_SYMBOL(x86_cpu_to_node_map_early_ptr); +#endif DEFINE_PER_CPU(int, x86_cpu_to_node_map) = NUMA_NO_NODE; EXPORT_PER_CPU_SYMBOL(x86_cpu_to_node_map); -EXPORT_SYMBOL(x86_cpu_to_node_map_early_ptr); s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = { [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE @@ -58,7 +60,7 @@ unsigned long __initdata nodemap_size; * -1 if node overlap or lost ram (shift too big) */ static int __init populate_memnodemap(const struct bootnode *nodes, - int numnodes, int shift) + int numnodes, int shift, int *nodeids) { unsigned long addr, end; int i, res = -1; @@ -74,7 +76,12 @@ static int __init populate_memnodemap(const struct bootnode *nodes, do { if (memnodemap[addr >> shift] != NUMA_NO_NODE) return -1; - memnodemap[addr >> shift] = i; + + if (!nodeids) + memnodemap[addr >> shift] = i; + else + memnodemap[addr >> shift] = nodeids[i]; + addr += (1UL << shift); } while (addr < end); res = 1; @@ -137,7 +144,8 @@ static int __init extract_lsb_from_nodes(const struct bootnode *nodes, return i; } -int __init compute_hash_shift(struct bootnode *nodes, int numnodes) +int __init compute_hash_shift(struct bootnode *nodes, int numnodes, + int *nodeids) { int shift; @@ -147,7 +155,7 @@ int __init compute_hash_shift(struct bootnode *nodes, int numnodes) printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n", shift); - if (populate_memnodemap(nodes, numnodes, shift) != 1) { + if (populate_memnodemap(nodes, numnodes, shift, nodeids) != 1) { printk(KERN_INFO "Your memory is not aligned you need to " "rebuild your kernel with a bigger NODEMAPSIZE " "shift=%d\n", shift); @@ -188,6 +196,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long bootmap_start, nodedata_phys; void *bootmap; const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE); + int nid; start = round_up(start, ZONE_ALIGN); @@ -210,9 +219,19 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, NODE_DATA(nodeid)->node_start_pfn = start_pfn; NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn; - /* Find a place for the bootmem map */ + /* + * Find a place for the bootmem map + * nodedata_phys could be on other nodes by alloc_bootmem, + * so need to sure bootmap_start not to be small, otherwise + * early_node_mem will get that with find_e820_area instead + * of alloc_bootmem, that could clash with reserved range + */ bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); - bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE); + nid = phys_to_nid(nodedata_phys); + if (nid == nodeid) + bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE); + else + bootmap_start = round_up(start, PAGE_SIZE); /* * SMP_CAHCE_BYTES could be enough, but init_bootmem_node like * to use that to align to PAGE_SIZE @@ -237,10 +256,29 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, free_bootmem_with_active_regions(nodeid, end); - reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size, - BOOTMEM_DEFAULT); - reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, - bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT); + /* + * convert early reserve to bootmem reserve earlier + * otherwise early_node_mem could use early reserved mem + * on previous node + */ + early_res_to_bootmem(start, end); + + /* + * in some case early_node_mem could use alloc_bootmem + * to get range on other node, don't reserve that again + */ + if (nid != nodeid) + printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nodeid, nid); + else + reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, + pgdat_size, BOOTMEM_DEFAULT); + nid = phys_to_nid(bootmap_start); + if (nid != nodeid) + printk(KERN_INFO " bootmap(%d) on node %d\n", nodeid, nid); + else + reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, + bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT); + #ifdef CONFIG_ACPI_NUMA srat_reserve_add_area(nodeid); #endif @@ -378,9 +416,10 @@ static int __init split_nodes_by_size(struct bootnode *nodes, u64 *addr, * Sets up the system RAM area from start_pfn to end_pfn according to the * numa=fake command-line option. */ +static struct bootnode nodes[MAX_NUMNODES] __initdata; + static int __init numa_emulation(unsigned long start_pfn, unsigned long end_pfn) { - struct bootnode nodes[MAX_NUMNODES]; u64 size, addr = start_pfn << PAGE_SHIFT; u64 max_addr = end_pfn << PAGE_SHIFT; int num_nodes = 0, num = 0, coeff_flag, coeff = -1, i; @@ -460,7 +499,7 @@ done: } } out: - memnode_shift = compute_hash_shift(nodes, num_nodes); + memnode_shift = compute_hash_shift(nodes, num_nodes, NULL); if (memnode_shift < 0) { memnode_shift = 0; printk(KERN_ERR "No NUMA hash function found. NUMA emulation " @@ -548,8 +587,6 @@ void __cpuinit numa_set_node(int cpu, int node) { int *cpu_to_node_map = x86_cpu_to_node_map_early_ptr; - cpu_pda(cpu)->nodenumber = node; - if(cpu_to_node_map) cpu_to_node_map[cpu] = node; else if(per_cpu_offset(cpu)) |