diff options
author | Keir Fraser <keir.fraser@citrix.com> | 2010-01-08 11:22:41 +0000 |
---|---|---|
committer | Keir Fraser <keir.fraser@citrix.com> | 2010-01-08 11:22:41 +0000 |
commit | 445386cebbf2303c23e06557ef016ccd8fc90cae (patch) | |
tree | 46a19466a3ba587536ca3894b09eb8650fa14f18 /xen/arch/x86/numa.c | |
parent | b8a5529c455b0d4c8d6cbcc12a1a14daa8613f65 (diff) | |
download | xen-445386cebbf2303c23e06557ef016ccd8fc90cae.tar.gz xen-445386cebbf2303c23e06557ef016ccd8fc90cae.tar.bz2 xen-445386cebbf2303c23e06557ef016ccd8fc90cae.zip |
x86: fix NUMA handling (c/s 20599:e5a757ce7845)
c/s 20599 caused the hash shift to become significantly smaller on
systems with an SRAT like this
(XEN) SRAT: Node 0 PXM 0 0-a0000
(XEN) SRAT: Node 0 PXM 0 100000-80000000
(XEN) SRAT: Node 1 PXM 1 80000000-d0000000
(XEN) SRAT: Node 1 PXM 1 100000000-130000000
Comined with the static size of the memnodemap[] array, NUMA got
therefore disabled on such systems. The backport from Linux was really
incomplete, as Linux much earlier had already introduced a dynamcially
allocated memnodemap[].
Further, doing to/from pdx translations on addresses just past a valid
range is not correct, as it may strip/fail to insert non-zero bits in
this case.
Finally, using 63 as the cover-it-all shift value is invalid on 32bit,
since pdx values are unsigned long.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
Diffstat (limited to 'xen/arch/x86/numa.c')
-rw-r--r-- | xen/arch/x86/numa.c | 45 |
1 files changed, 37 insertions, 8 deletions
diff --git a/xen/arch/x86/numa.c b/xen/arch/x86/numa.c index 6810b484ca..1261c4f292 100644 --- a/xen/arch/x86/numa.c +++ b/xen/arch/x86/numa.c @@ -30,7 +30,9 @@ struct node_data node_data[MAX_NUMNODES]; /* Mapping from pdx to node id */ int memnode_shift; -u8 memnodemap[NODEMAPSIZE]; +static typeof(*memnodemap) _memnodemap[2]; +unsigned long memnodemapsize; +u8 *memnodemap; unsigned char cpu_to_node[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = NUMA_NO_NODE @@ -62,13 +64,13 @@ static int __init populate_memnodemap(const struct node *nodes, unsigned long spdx, epdx; int i, res = -1; - memset(memnodemap, NUMA_NO_NODE, sizeof(memnodemap)); + memset(memnodemap, NUMA_NO_NODE, memnodemapsize * sizeof(*memnodemap)); for (i = 0; i < numnodes; i++) { spdx = paddr_to_pdx(nodes[i].start); - epdx = paddr_to_pdx(nodes[i].end); + epdx = paddr_to_pdx(nodes[i].end - 1) + 1; if (spdx >= epdx) continue; - if ((epdx >> shift) >= NODEMAPSIZE) + if ((epdx >> shift) >= memnodemapsize) return 0; do { if (memnodemap[spdx >> shift] != NUMA_NO_NODE) @@ -86,6 +88,28 @@ static int __init populate_memnodemap(const struct node *nodes, return res; } +static int __init allocate_cachealigned_memnodemap(void) +{ + unsigned long size = PFN_UP(memnodemapsize * sizeof(*memnodemap)); + unsigned long mfn = alloc_boot_pages(size, 1); + + if (!mfn) { + printk(KERN_ERR + "NUMA: Unable to allocate Memory to Node hash map\n"); + memnodemapsize = 0; + return -1; + } + + memnodemap = mfn_to_virt(mfn); + mfn <<= PAGE_SHIFT; + size <<= PAGE_SHIFT; + printk(KERN_DEBUG "NUMA: Allocated memnodemap from %lx - %lx\n", + mfn, mfn + size); + memnodemapsize = size / sizeof(*memnodemap); + + return 0; +} + /* * The LSB of all start and end addresses in the node map is the value of the * maximum possible shift. @@ -99,7 +123,7 @@ static int __init extract_lsb_from_nodes(const struct node *nodes, for (i = 0; i < numnodes; i++) { spdx = paddr_to_pdx(nodes[i].start); - epdx = paddr_to_pdx(nodes[i].end); + epdx = paddr_to_pdx(nodes[i].end - 1) + 1; if (spdx >= epdx) continue; bitfield |= spdx; @@ -108,9 +132,10 @@ static int __init extract_lsb_from_nodes(const struct node *nodes, memtop = epdx; } if (nodes_used <= 1) - i = 63; + i = BITS_PER_LONG - 1; else i = find_first_bit(&bitfield, sizeof(unsigned long)*8); + memnodemapsize = (memtop >> i) + 1; return i; } @@ -120,6 +145,10 @@ int __init compute_hash_shift(struct node *nodes, int numnodes, int shift; shift = extract_lsb_from_nodes(nodes, numnodes); + if (memnodemapsize <= ARRAY_SIZE(_memnodemap)) + memnodemap = _memnodemap; + else if (allocate_cachealigned_memnodemap()) + return -1; printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n", shift); @@ -233,8 +262,8 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) (u64)start_pfn << PAGE_SHIFT, (u64)end_pfn << PAGE_SHIFT); /* setup dummy node covering all memory */ - memnode_shift = 63; - memnodemap[0] = 0; + memnode_shift = BITS_PER_LONG - 1; + memnodemap = _memnodemap; nodes_clear(node_online_map); node_set_online(0); for (i = 0; i < NR_CPUS; i++) |