aboutsummaryrefslogtreecommitdiffstats
path: root/xen/arch/x86/srat.c
diff options
context:
space:
mode:
authorKeir Fraser <keir.fraser@citrix.com>2009-12-09 10:42:53 +0000
committerKeir Fraser <keir.fraser@citrix.com>2009-12-09 10:42:53 +0000
commit6369c4faa303cd4e8af9ff6ad73315122d9defc5 (patch)
tree170dcce577b237ae629f6572685f65c2d3f99171 /xen/arch/x86/srat.c
parent4d817e3923416c21a61c0df32f244893974eb2e8 (diff)
downloadxen-6369c4faa303cd4e8af9ff6ad73315122d9defc5.tar.gz
xen-6369c4faa303cd4e8af9ff6ad73315122d9defc5.tar.bz2
xen-6369c4faa303cd4e8af9ff6ad73315122d9defc5.zip
SRAT memory hotplug 2/2: Support overlapped and sparse node memory arrangement.
Currently xen hypervisor use nodes to keep start/end address of node. It assume memory among nodes has no overlap, this is not always true, especially if we have memory hotplug support in the system. This patch backport Linux kernel's memblks to support overlapping among node. The memblks will be used both for checking conflict, and caculate memnode_shift. Also, currently if there is no memory populated in a node when system booting, the node will be unparsed later, and the corresponding CPU's numa information will be removed also. This patch will keep the CPU information. One thing need notice is, currently we caculate memnode_shift with all memory, including un-populated ones. This should work if the smallest chuck is not so small. Other option can be flags in the page_info structure, etc. The memnodemap is changed from paddr to pdx, both to save space, and also because currently most access is from pfn. A flag is mem_hotplug added if there is hotplug memory range. Signed-off-by: Jiang, Yunhong <yunhong.jiang@intel.com>
Diffstat (limited to 'xen/arch/x86/srat.c')
-rw-r--r--xen/arch/x86/srat.c78
1 files changed, 56 insertions, 22 deletions
diff --git a/xen/arch/x86/srat.c b/xen/arch/x86/srat.c
index f6c5ada414..452f6534eb 100644
--- a/xen/arch/x86/srat.c
+++ b/xen/arch/x86/srat.c
@@ -27,6 +27,11 @@ static nodemask_t nodes_found __initdata;
static struct node nodes[MAX_NUMNODES] __initdata;
static u8 __read_mostly pxm2node[256] = { [0 ... 255] = 0xff };
+
+static int num_node_memblks;
+static struct node node_memblk_range[NR_NODE_MEMBLKS];
+static int memblk_nodeid[NR_NODE_MEMBLKS];
+
/* Too small nodes confuse the VM badly. Usually they result
from BIOS bugs. */
#define NODE_MIN_SIZE (4*1024*1024)
@@ -54,17 +59,33 @@ __devinit int setup_node(int pxm)
return pxm2node[pxm];
}
-static __init int conflicting_nodes(u64 start, u64 end)
+int valid_numa_range(unsigned long start, unsigned long end, int node)
+{
+ int i;
+
+ for (i = 0; i < num_node_memblks; i++) {
+ struct node *nd = &node_memblk_range[i];
+
+ if (nd->start <= start && nd->end > end &&
+ memblk_nodeid[i] == node )
+ return 1;
+ }
+
+ return 0;
+}
+
+static __init int conflicting_memblks(unsigned long start, unsigned long end)
{
int i;
- for_each_node_mask(i, nodes_parsed) {
- struct node *nd = &nodes[i];
+
+ for (i = 0; i < num_node_memblks; i++) {
+ struct node *nd = &node_memblk_range[i];
if (nd->start == nd->end)
continue;
if (nd->end > start && nd->start < end)
- return i;
+ return memblk_nodeid[i];
if (nd->end == end && nd->start == start)
- return i;
+ return memblk_nodeid[i];
}
return -1;
}
@@ -174,6 +195,15 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
}
if (!(ma->flags & ACPI_SRAT_MEM_ENABLED))
return;
+
+ if (num_node_memblks >= NR_NODE_MEMBLKS)
+ {
+ dprintk(XENLOG_WARNING,
+ "Too many numa entry, try bigger NR_NODE_MEMBLKS \n");
+ bad_srat();
+ return;
+ }
+
start = ma->base_address;
end = start + ma->length;
pxm = ma->proximity_domain;
@@ -187,9 +217,15 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
}
/* It is fine to add this area to the nodes data it will be used later*/
if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE)
+ {
printk(KERN_INFO "SRAT: hot plug zone found %"PRIx64" - %"PRIx64" \n",
start, end);
- i = conflicting_nodes(start, end);
+#ifdef CONFIG_X86_64
+ mem_hotplug = 1;
+#endif
+ }
+
+ i = conflicting_memblks(start, end);
if (i == node) {
printk(KERN_WARNING
"SRAT: Warning: PXM %d (%"PRIx64"-%"PRIx64") overlaps with itself (%"
@@ -213,7 +249,12 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
nd->end = end;
}
printk(KERN_INFO "SRAT: Node %u PXM %u %"PRIx64"-%"PRIx64"\n", node, pxm,
- nd->start, nd->end);
+ start, end);
+
+ node_memblk_range[num_node_memblks].start = start;
+ node_memblk_range[num_node_memblks].end = end;
+ memblk_nodeid[num_node_memblks] = node;
+ num_node_memblks++;
}
/* Sanity check to catch more bad SRATs (they are amazingly common).
@@ -258,16 +299,6 @@ static int nodes_cover_memory(void)
return 1;
}
-static void unparse_node(int node)
-{
- int i;
- node_clear(node, nodes_parsed);
- for (i = 0; i < MAX_LOCAL_APIC; i++) {
- if (apicid_to_node[i] == node)
- apicid_to_node[i] = NUMA_NO_NODE;
- }
-}
-
void __init acpi_numa_arch_fixup(void) {}
#ifdef __x86_64__
@@ -340,11 +371,8 @@ int __init acpi_scan_nodes(u64 start, u64 end)
int i;
/* First clean up the node list */
- for (i = 0; i < MAX_NUMNODES; i++) {
+ for (i = 0; i < MAX_NUMNODES; i++)
cutoff_node(i, start, end);
- if ((nodes[i].end - nodes[i].start) < NODE_MIN_SIZE)
- unparse_node(i);
- }
if (acpi_numa <= 0)
return -1;
@@ -354,7 +382,9 @@ int __init acpi_scan_nodes(u64 start, u64 end)
return -1;
}
- memnode_shift = compute_hash_shift(nodes, MAX_NUMNODES);
+ memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks,
+ memblk_nodeid);
+
if (memnode_shift < 0) {
printk(KERN_ERR
"SRAT: No NUMA node hash function found. Contact maintainer\n");
@@ -364,7 +394,11 @@ int __init acpi_scan_nodes(u64 start, u64 end)
/* Finally register nodes */
for_each_node_mask(i, nodes_parsed)
+ {
+ if ((nodes[i].end - nodes[i].start) < NODE_MIN_SIZE)
+ continue;
setup_node_bootmem(i, nodes[i].start, nodes[i].end);
+ }
for (i = 0; i < NR_CPUS; i++) {
if (cpu_to_node[i] == NUMA_NO_NODE)
continue;