ia64/xen-unstable

changeset 11971:f312c2d01d8b

[XEN] Add basic NUMA/SRAT support to Xen from Linux 2.6.16.29.
Signed-off-by: Ryan Harper <ryanh@us.ibm.com>
author kfraser@localhost.localdomain
date Wed Oct 25 12:25:54 2006 +0100 (2006-10-25)
parents a1f987e9640f
children cf95c3218a70
files xen/arch/x86/Makefile xen/arch/x86/numa.c xen/arch/x86/setup.c xen/arch/x86/smpboot.c xen/arch/x86/srat.c xen/drivers/acpi/Makefile xen/drivers/acpi/numa.c xen/include/asm-x86/acpi.h xen/include/asm-x86/config.h xen/include/asm-x86/mach-generic/mach_apic.h xen/include/asm-x86/numa.h xen/include/asm-x86/numnodes.h xen/include/asm-x86/topology.h xen/include/xen/config.h xen/include/xen/nodemask.h xen/include/xen/numa.h xen/include/xen/topology.h
line diff
     1.1 --- a/xen/arch/x86/Makefile	Wed Oct 25 11:51:23 2006 +0100
     1.2 +++ b/xen/arch/x86/Makefile	Wed Oct 25 12:25:54 2006 +0100
     1.3 @@ -28,12 +28,14 @@ obj-y += microcode.o
     1.4  obj-y += mm.o
     1.5  obj-y += mpparse.o
     1.6  obj-y += nmi.o
     1.7 +obj-y += numa.o
     1.8  obj-y += physdev.o
     1.9  obj-y += rwlock.o
    1.10  obj-y += setup.o
    1.11  obj-y += shutdown.o
    1.12  obj-y += smp.o
    1.13  obj-y += smpboot.o
    1.14 +obj-y += srat.o
    1.15  obj-y += string.o
    1.16  obj-y += sysctl.o
    1.17  obj-y += time.o
     2.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.2 +++ b/xen/arch/x86/numa.c	Wed Oct 25 12:25:54 2006 +0100
     2.3 @@ -0,0 +1,302 @@
     2.4 +/* 
     2.5 + * Generic VM initialization for x86-64 NUMA setups.
     2.6 + * Copyright 2002,2003 Andi Kleen, SuSE Labs.
     2.7 + * Adapted for Xen: Ryan Harper <ryanh@us.ibm.com>
     2.8 + */ 
     2.9 +
    2.10 +#include <xen/mm.h>
    2.11 +#include <xen/string.h>
    2.12 +#include <xen/init.h>
    2.13 +#include <xen/ctype.h>
    2.14 +#include <xen/nodemask.h>
    2.15 +#include <xen/numa.h>
    2.16 +#include <xen/keyhandler.h>
    2.17 +#include <xen/time.h>
    2.18 +
    2.19 +#include <asm/numa.h>
    2.20 +#include <asm/acpi.h>
    2.21 +
    2.22 +#ifndef Dprintk
    2.23 +#define Dprintk(x...)
    2.24 +#endif
    2.25 +
    2.26 +/* from proto.h */
    2.27 +#define round_up(x,y) ((((x)+(y))-1) & (~((y)-1)))
    2.28 +
    2.29 +struct node_data node_data[MAX_NUMNODES];
    2.30 +
    2.31 +int memnode_shift;
    2.32 +u8  memnodemap[NODEMAPSIZE];
    2.33 +
    2.34 +unsigned int cpu_to_node[NR_CPUS] __read_mostly = {
    2.35 +	[0 ... NR_CPUS-1] = NUMA_NO_NODE
    2.36 +};
    2.37 +unsigned char apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
    2.38 + 	[0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
    2.39 +};
    2.40 +cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly;
    2.41 +
    2.42 +nodemask_t node_online_map = { { [0] = 1UL } };
    2.43 +
    2.44 +int numa_off __initdata;
    2.45 +
    2.46 +int acpi_numa __initdata;
    2.47 +
    2.48 +/*
    2.49 + * Given a shift value, try to populate memnodemap[]
    2.50 + * Returns :
    2.51 + * 1 if OK
    2.52 + * 0 if memnodmap[] too small (of shift too small)
    2.53 + * -1 if node overlap or lost ram (shift too big)
    2.54 + */
    2.55 +static int __init
    2.56 +populate_memnodemap(const struct node *nodes, int numnodes, int shift)
    2.57 +{
    2.58 +	int i; 
    2.59 +	int res = -1;
    2.60 +	unsigned long addr, end;
    2.61 +
    2.62 +	if (shift >= 64)
    2.63 +		return -1;
    2.64 +	memset(memnodemap, 0xff, sizeof(memnodemap));
    2.65 +	for (i = 0; i < numnodes; i++) {
    2.66 +		addr = nodes[i].start;
    2.67 +		end = nodes[i].end;
    2.68 +		if (addr >= end)
    2.69 +			continue;
    2.70 +		if ((end >> shift) >= NODEMAPSIZE)
    2.71 +			return 0;
    2.72 +		do {
    2.73 +			if (memnodemap[addr >> shift] != 0xff)
    2.74 +				return -1;
    2.75 +			memnodemap[addr >> shift] = i;
    2.76 +                       addr += (1UL << shift);
    2.77 +		} while (addr < end);
    2.78 +		res = 1;
    2.79 +	} 
    2.80 +	return res;
    2.81 +}
    2.82 +
    2.83 +int __init compute_hash_shift(struct node *nodes, int numnodes)
    2.84 +{
    2.85 +	int shift = 20;
    2.86 +
    2.87 +	while (populate_memnodemap(nodes, numnodes, shift + 1) >= 0)
    2.88 +		shift++;
    2.89 +
    2.90 +	printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n",
    2.91 +		shift);
    2.92 +
    2.93 +	if (populate_memnodemap(nodes, numnodes, shift) != 1) {
    2.94 +		printk(KERN_INFO
    2.95 +	"Your memory is not aligned you need to rebuild your kernel "
    2.96 +	"with a bigger NODEMAPSIZE shift=%d\n",
    2.97 +			shift);
    2.98 +		return -1;
    2.99 +	}
   2.100 +	return shift;
   2.101 +}
   2.102 +
   2.103 +/* initialize NODE_DATA given nodeid and start/end */
   2.104 +void __init setup_node_bootmem(int nodeid, u64 start, u64 end)
   2.105 +{ 
   2.106 +	unsigned long start_pfn, end_pfn;
   2.107 +
   2.108 +	start_pfn = start >> PAGE_SHIFT;
   2.109 +	end_pfn = end >> PAGE_SHIFT;
   2.110 +
   2.111 +	NODE_DATA(nodeid)->node_id = nodeid;
   2.112 +	NODE_DATA(nodeid)->node_start_pfn = start_pfn;
   2.113 +	NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn;
   2.114 +
   2.115 +	node_set_online(nodeid);
   2.116 +} 
   2.117 +
   2.118 +void __init numa_init_array(void)
   2.119 +{
   2.120 +	int rr, i;
   2.121 +	/* There are unfortunately some poorly designed mainboards around
   2.122 +	   that only connect memory to a single CPU. This breaks the 1:1 cpu->node
   2.123 +	   mapping. To avoid this fill in the mapping for all possible
   2.124 +	   CPUs, as the number of CPUs is not known yet. 
   2.125 +	   We round robin the existing nodes. */
   2.126 +	rr = first_node(node_online_map);
   2.127 +	for (i = 0; i < NR_CPUS; i++) {
   2.128 +		if (cpu_to_node[i] != NUMA_NO_NODE)
   2.129 +			continue;
   2.130 + 		numa_set_node(i, rr);
   2.131 +		rr = next_node(rr, node_online_map);
   2.132 +		if (rr == MAX_NUMNODES)
   2.133 +			rr = first_node(node_online_map);
   2.134 +	}
   2.135 +
   2.136 +}
   2.137 +
   2.138 +#ifdef CONFIG_NUMA_EMU
   2.139 +/* default to faking a single node as fallback for non-NUMA hardware */
   2.140 +int numa_fake __initdata = 1;
   2.141 +
   2.142 +/* Numa emulation */
   2.143 +static int numa_emulation(unsigned long start_pfn, unsigned long end_pfn)
   2.144 +{
   2.145 + 	int i;
   2.146 + 	struct node nodes[MAX_NUMNODES];
   2.147 + 	unsigned long sz = ((end_pfn - start_pfn)<<PAGE_SHIFT) / numa_fake;
   2.148 +
   2.149 + 	/* Kludge needed for the hash function */
   2.150 + 	if (hweight64(sz) > 1) {
   2.151 + 		unsigned long x = 1;
   2.152 + 		while ((x << 1) < sz)
   2.153 + 			x <<= 1;
   2.154 + 		if (x < sz/2)
   2.155 + 			printk(KERN_ERR "Numa emulation unbalanced. Complain to maintainer\n");
   2.156 + 		sz = x;
   2.157 + 	}
   2.158 +
   2.159 + 	memset(&nodes,0,sizeof(nodes));
   2.160 + 	for (i = 0; i < numa_fake; i++) {
   2.161 + 		nodes[i].start = (start_pfn<<PAGE_SHIFT) + i*sz;
   2.162 + 		if (i == numa_fake-1)
   2.163 + 			sz = (end_pfn<<PAGE_SHIFT) - nodes[i].start;
   2.164 + 		nodes[i].end = nodes[i].start + sz;
   2.165 + 		printk(KERN_INFO "Faking node %d at %"PRIx64"-%"PRIx64" (%"PRIu64"MB)\n",
   2.166 + 		       i,
   2.167 + 		       nodes[i].start, nodes[i].end,
   2.168 + 		       (nodes[i].end - nodes[i].start) >> 20);
   2.169 +		node_set_online(i);
   2.170 + 	}
   2.171 + 	memnode_shift = compute_hash_shift(nodes, numa_fake);
   2.172 + 	if (memnode_shift < 0) {
   2.173 + 		memnode_shift = 0;
   2.174 + 		printk(KERN_ERR "No NUMA hash function found. Emulation disabled.\n");
   2.175 + 		return -1;
   2.176 + 	}
   2.177 + 	for_each_online_node(i)
   2.178 + 		setup_node_bootmem(i, nodes[i].start, nodes[i].end);
   2.179 + 	numa_init_array();
   2.180 + 	return 0;
   2.181 +}
   2.182 +#endif
   2.183 +
   2.184 +void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
   2.185 +{ 
   2.186 +	int i;
   2.187 +
   2.188 +#ifdef CONFIG_ACPI_NUMA
   2.189 +	if (!numa_off && !acpi_scan_nodes(start_pfn << PAGE_SHIFT,
   2.190 +					  end_pfn << PAGE_SHIFT))
   2.191 + 		return;
   2.192 +#endif
   2.193 +
   2.194 +#ifdef CONFIG_NUMA_EMU
   2.195 +   /* fake a numa node for non-numa hardware */
   2.196 +	if (numa_fake && !numa_emulation(start_pfn, end_pfn))
   2.197 + 		return;
   2.198 +#endif
   2.199 +
   2.200 +	printk(KERN_INFO "%s\n",
   2.201 +	       numa_off ? "NUMA turned off" : "No NUMA configuration found");
   2.202 +
   2.203 +	printk(KERN_INFO "Faking a node at %016lx-%016lx\n", 
   2.204 +	       start_pfn << PAGE_SHIFT,
   2.205 +	       end_pfn << PAGE_SHIFT); 
   2.206 +		/* setup dummy node covering all memory */ 
   2.207 +	memnode_shift = 63; 
   2.208 +	memnodemap[0] = 0;
   2.209 +	nodes_clear(node_online_map);
   2.210 +	node_set_online(0);
   2.211 +	for (i = 0; i < NR_CPUS; i++)
   2.212 +		numa_set_node(i, 0);
   2.213 +	node_to_cpumask[0] = cpumask_of_cpu(0);
   2.214 +	setup_node_bootmem(0, start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT);
   2.215 +}
   2.216 +
   2.217 +__cpuinit void numa_add_cpu(int cpu)
   2.218 +{
   2.219 +	set_bit(cpu, &node_to_cpumask[cpu_to_node(cpu)]);
   2.220 +} 
   2.221 +
   2.222 +void __cpuinit numa_set_node(int cpu, int node)
   2.223 +{
   2.224 +	cpu_to_node[cpu] = node;
   2.225 +}
   2.226 +
   2.227 +/* [numa=off] */
   2.228 +__init int numa_setup(char *opt) 
   2.229 +{ 
   2.230 +	if (!strncmp(opt,"off",3))
   2.231 +		numa_off = 1;
   2.232 +#ifdef CONFIG_NUMA_EMU
   2.233 +	if(!strncmp(opt, "fake=", 5)) {
   2.234 +		numa_fake = simple_strtoul(opt+5,NULL,0); ;
   2.235 +		if (numa_fake >= MAX_NUMNODES)
   2.236 +			numa_fake = MAX_NUMNODES;
   2.237 +	}
   2.238 +#endif
   2.239 +#ifdef CONFIG_ACPI_NUMA
   2.240 + 	if (!strncmp(opt,"noacpi",6))
   2.241 + 		acpi_numa = -1;
   2.242 +#endif
   2.243 +	return 1;
   2.244 +} 
   2.245 +
   2.246 +/*
   2.247 + * Setup early cpu_to_node.
   2.248 + *
   2.249 + * Populate cpu_to_node[] only if x86_cpu_to_apicid[],
   2.250 + * and apicid_to_node[] tables have valid entries for a CPU.
   2.251 + * This means we skip cpu_to_node[] initialisation for NUMA
   2.252 + * emulation and faking node case (when running a kernel compiled
   2.253 + * for NUMA on a non NUMA box), which is OK as cpu_to_node[]
   2.254 + * is already initialized in a round robin manner at numa_init_array,
   2.255 + * prior to this call, and this initialization is good enough
   2.256 + * for the fake NUMA cases.
   2.257 + */
   2.258 +void __init init_cpu_to_node(void)
   2.259 +{
   2.260 +	int i;
   2.261 + 	for (i = 0; i < NR_CPUS; i++) {
   2.262 +		u8 apicid = x86_cpu_to_apicid[i];
   2.263 +		if (apicid == BAD_APICID)
   2.264 +			continue;
   2.265 +		if (apicid_to_node[apicid] == NUMA_NO_NODE)
   2.266 +			continue;
   2.267 +		numa_set_node(i,apicid_to_node[apicid]);
   2.268 +	}
   2.269 +}
   2.270 +
   2.271 +EXPORT_SYMBOL(cpu_to_node);
   2.272 +EXPORT_SYMBOL(node_to_cpumask);
   2.273 +EXPORT_SYMBOL(memnode_shift);
   2.274 +EXPORT_SYMBOL(memnodemap);
   2.275 +EXPORT_SYMBOL(node_data);
   2.276 +
   2.277 +static void dump_numa(unsigned char key)
   2.278 +{
   2.279 +	s_time_t now = NOW();
   2.280 +	int i;
   2.281 +
   2.282 +	printk("'%c' pressed -> dumping numa info (now-0x%X:%08X)\n", key,
   2.283 +		  (u32)(now>>32), (u32)now);
   2.284 +
   2.285 +	for_each_online_node(i) {
   2.286 +		unsigned long pa = (NODE_DATA(i)->node_start_pfn + 1)<< PAGE_SHIFT;
   2.287 +		printk("idx%d -> NODE%d start->%lu size->%lu\n",
   2.288 +			  i, NODE_DATA(i)->node_id,
   2.289 +			  NODE_DATA(i)->node_start_pfn,
   2.290 +			  NODE_DATA(i)->node_spanned_pages);
   2.291 +		/* sanity check phys_to_nid() */
   2.292 +		printk("phys_to_nid(%lx) -> %d should be %d\n", pa, phys_to_nid(pa),
   2.293 +			  NODE_DATA(i)->node_id);
   2.294 +	}
   2.295 +	for_each_online_cpu(i)
   2.296 +		printk("CPU%d -> NODE%d\n", i, cpu_to_node[i]);
   2.297 +}
   2.298 +
   2.299 +static __init int register_numa_trigger(void)
   2.300 +{
   2.301 +	register_keyhandler('u', dump_numa, "dump numa info");
   2.302 +	return 0;
   2.303 +}
   2.304 +__initcall(register_numa_trigger);
   2.305 +
     3.1 --- a/xen/arch/x86/setup.c	Wed Oct 25 11:51:23 2006 +0100
     3.2 +++ b/xen/arch/x86/setup.c	Wed Oct 25 12:25:54 2006 +0100
     3.3 @@ -16,6 +16,7 @@
     3.4  #include <xen/percpu.h>
     3.5  #include <xen/hypercall.h>
     3.6  #include <xen/keyhandler.h>
     3.7 +#include <xen/numa.h>
     3.8  #include <public/version.h>
     3.9  #include <asm/bitops.h>
    3.10  #include <asm/smp.h>
    3.11 @@ -25,10 +26,12 @@
    3.12  #include <asm/desc.h>
    3.13  #include <asm/shadow.h>
    3.14  #include <asm/e820.h>
    3.15 +#include <asm/numa.h>
    3.16  #include <acm/acm_hooks.h>
    3.17  
    3.18  extern void dmi_scan_machine(void);
    3.19  extern void generic_apic_probe(void);
    3.20 +extern void numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn);
    3.21  
    3.22  /*
    3.23   * opt_xenheap_megabytes: Size of Xen heap in megabytes, excluding the
    3.24 @@ -60,6 +63,9 @@ boolean_param("watchdog", opt_watchdog);
    3.25  static void parse_acpi_param(char *s);
    3.26  custom_param("acpi", parse_acpi_param);
    3.27  
    3.28 +extern int numa_setup(char *s);
    3.29 +custom_param("numa", numa_setup);
    3.30 +
    3.31  /* **** Linux config option: propagated to domain0. */
    3.32  /* acpi_skip_timer_override: Skip IRQ0 overrides. */
    3.33  extern int acpi_skip_timer_override;
    3.34 @@ -257,6 +263,20 @@ static void __init init_idle_domain(void
    3.35      setup_idle_pagetable();
    3.36  }
    3.37  
    3.38 +static void srat_detect_node(int cpu)
    3.39 +{
    3.40 +   unsigned node;
    3.41 +   u8 apicid = x86_cpu_to_apicid[cpu];
    3.42 +
    3.43 +   node = apicid_to_node[apicid];
    3.44 +   if (node == NUMA_NO_NODE)
    3.45 +      node = 0;
    3.46 +   numa_set_node(cpu, node);
    3.47 +
    3.48 +   if (acpi_numa > 0)
    3.49 +      printk(KERN_INFO "CPU %d APIC %d -> Node %d\n", cpu, apicid, node);
    3.50 +}
    3.51 +
    3.52  void __init __start_xen(multiboot_info_t *mbi)
    3.53  {
    3.54      char __cmdline[] = "", *cmdline = __cmdline;
    3.55 @@ -485,6 +505,12 @@ void __init __start_xen(multiboot_info_t
    3.56  
    3.57      init_frametable();
    3.58  
    3.59 +    acpi_boot_table_init();
    3.60 +
    3.61 +    acpi_numa_init();
    3.62 +
    3.63 +    numa_initmem_init(0, max_page);
    3.64 +
    3.65      end_boot_allocator();
    3.66  
    3.67      /* Initialise the Xen heap, skipping RAM holes. */
    3.68 @@ -536,9 +562,10 @@ void __init __start_xen(multiboot_info_t
    3.69  
    3.70      generic_apic_probe();
    3.71  
    3.72 -    acpi_boot_table_init();
    3.73      acpi_boot_init();
    3.74  
    3.75 +    init_cpu_to_node();
    3.76 +
    3.77      if ( smp_found_config )
    3.78          get_smp_config();
    3.79  
    3.80 @@ -589,6 +616,11 @@ void __init __start_xen(multiboot_info_t
    3.81              break;
    3.82          if ( !cpu_online(i) )
    3.83              __cpu_up(i);
    3.84 +
    3.85 +		/* setup cpu_to_node[] */
    3.86 +        srat_detect_node(i);
    3.87 +		/* setup node_to_cpumask based on cpu_to_node[] */
    3.88 +        numa_add_cpu(i);        
    3.89      }
    3.90  
    3.91      printk("Brought up %ld CPUs\n", (long)num_online_cpus());
     4.1 --- a/xen/arch/x86/smpboot.c	Wed Oct 25 11:51:23 2006 +0100
     4.2 +++ b/xen/arch/x86/smpboot.c	Wed Oct 25 12:25:54 2006 +0100
     4.3 @@ -43,6 +43,8 @@
     4.4  #include <xen/delay.h>
     4.5  #include <xen/softirq.h>
     4.6  #include <xen/serial.h>
     4.7 +#include <xen/numa.h>
     4.8 +#include <asm/numa.h>
     4.9  #include <asm/current.h>
    4.10  #include <asm/mc146818rtc.h>
    4.11  #include <asm/desc.h>
    4.12 @@ -628,7 +630,7 @@ u8 cpu_2_logical_apicid[NR_CPUS] __read_
    4.13  static void map_cpu_to_logical_apicid(void)
    4.14  {
    4.15  	int cpu = smp_processor_id();
    4.16 -	int apicid = logical_smp_processor_id();
    4.17 +	int apicid = hard_smp_processor_id();
    4.18  
    4.19  	cpu_2_logical_apicid[cpu] = apicid;
    4.20  	map_cpu_to_node(cpu, apicid_to_node(apicid));
     5.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     5.2 +++ b/xen/arch/x86/srat.c	Wed Oct 25 12:25:54 2006 +0100
     5.3 @@ -0,0 +1,325 @@
     5.4 +/*
     5.5 + * ACPI 3.0 based NUMA setup
     5.6 + * Copyright 2004 Andi Kleen, SuSE Labs.
     5.7 + *
     5.8 + * Reads the ACPI SRAT table to figure out what memory belongs to which CPUs.
     5.9 + *
    5.10 + * Called from acpi_numa_init while reading the SRAT and SLIT tables.
    5.11 + * Assumes all memory regions belonging to a single proximity domain
    5.12 + * are in one chunk. Holes between them will be included in the node.
    5.13 + * 
    5.14 + * Adapted for Xen: Ryan Harper <ryanh@us.ibm.com>
    5.15 + */
    5.16 +
    5.17 +#if 0
    5.18 +#include <linux/kernel.h>
    5.19 +#include <linux/module.h>
    5.20 +#include <asm/proto.h>
    5.21 +#include <xen/bitmap.h>
    5.22 +#include <xen/numa.h>
    5.23 +#include <xen/topology.h>
    5.24 +#include <asm/e820.h>
    5.25 +#endif
    5.26 +#include <xen/init.h>
    5.27 +#include <xen/mm.h>
    5.28 +#include <xen/inttypes.h>
    5.29 +#include <xen/nodemask.h>
    5.30 +#include <xen/acpi.h>
    5.31 +
    5.32 +#include <asm/numa.h>
    5.33 +#include <asm/page.h>
    5.34 +
    5.35 +static struct acpi_table_slit *acpi_slit;
    5.36 +
    5.37 +static nodemask_t nodes_parsed __initdata;
    5.38 +static nodemask_t nodes_found __initdata;
    5.39 +static struct node nodes[MAX_NUMNODES] __initdata;
    5.40 +static u8 pxm2node[256] = { [0 ... 255] = 0xff };
    5.41 +
    5.42 +/* Too small nodes confuse the VM badly. Usually they result
    5.43 +   from BIOS bugs. */
    5.44 +#define NODE_MIN_SIZE (4*1024*1024)
    5.45 +
    5.46 +static int node_to_pxm(int n);
    5.47 +
    5.48 +int pxm_to_node(int pxm)
    5.49 +{
    5.50 +	if ((unsigned)pxm >= 256)
    5.51 +		return -1;
    5.52 +	/* Extend 0xff to (int)-1 */
    5.53 +	return (signed char)pxm2node[pxm];
    5.54 +}
    5.55 +
    5.56 +static __init int setup_node(int pxm)
    5.57 +{
    5.58 +	unsigned node = pxm2node[pxm];
    5.59 +	if (node == 0xff) {
    5.60 +		if (nodes_weight(nodes_found) >= MAX_NUMNODES)
    5.61 +			return -1;
    5.62 +		node = first_unset_node(nodes_found); 
    5.63 +		node_set(node, nodes_found);
    5.64 +		pxm2node[pxm] = node;
    5.65 +	}
    5.66 +	return pxm2node[pxm];
    5.67 +}
    5.68 +
    5.69 +static __init int conflicting_nodes(u64 start, u64 end)
    5.70 +{
    5.71 +	int i;
    5.72 +	for_each_node_mask(i, nodes_parsed) {
    5.73 +		struct node *nd = &nodes[i];
    5.74 +		if (nd->start == nd->end)
    5.75 +			continue;
    5.76 +		if (nd->end > start && nd->start < end)
    5.77 +			return i;
    5.78 +		if (nd->end == end && nd->start == start)
    5.79 +			return i;
    5.80 +	}
    5.81 +	return -1;
    5.82 +}
    5.83 +
    5.84 +static __init void cutoff_node(int i, u64 start, u64 end)
    5.85 +{
    5.86 +	struct node *nd = &nodes[i];
    5.87 +	if (nd->start < start) {
    5.88 +		nd->start = start;
    5.89 +		if (nd->end < nd->start)
    5.90 +			nd->start = nd->end;
    5.91 +	}
    5.92 +	if (nd->end > end) {
    5.93 +		nd->end = end;
    5.94 +		if (nd->start > nd->end)
    5.95 +			nd->start = nd->end;
    5.96 +	}
    5.97 +}
    5.98 +
    5.99 +static __init void bad_srat(void)
   5.100 +{
   5.101 +	int i;
   5.102 +	printk(KERN_ERR "SRAT: SRAT not used.\n");
   5.103 +	acpi_numa = -1;
   5.104 +	for (i = 0; i < MAX_LOCAL_APIC; i++)
   5.105 +		apicid_to_node[i] = NUMA_NO_NODE;
   5.106 +}
   5.107 +
   5.108 +static __init inline int srat_disabled(void)
   5.109 +{
   5.110 +	return numa_off || acpi_numa < 0;
   5.111 +}
   5.112 +
   5.113 +/*
   5.114 + * A lot of BIOS fill in 10 (= no distance) everywhere. This messes
   5.115 + * up the NUMA heuristics which wants the local node to have a smaller
   5.116 + * distance than the others.
   5.117 + * Do some quick checks here and only use the SLIT if it passes.
   5.118 + */
   5.119 +static __init int slit_valid(struct acpi_table_slit *slit)
   5.120 +{
   5.121 +	int i, j;
   5.122 +	int d = slit->localities;
   5.123 +	for (i = 0; i < d; i++) {
   5.124 +		for (j = 0; j < d; j++)  {
   5.125 +			u8 val = slit->entry[d*i + j];
   5.126 +			if (i == j) {
   5.127 +				if (val != 10)
   5.128 +					return 0;
   5.129 +			} else if (val <= 10)
   5.130 +				return 0;
   5.131 +		}
   5.132 +	}
   5.133 +	return 1;
   5.134 +}
   5.135 +
   5.136 +/* Callback for SLIT parsing */
   5.137 +void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
   5.138 +{
   5.139 +	if (!slit_valid(slit)) {
   5.140 +		printk(KERN_INFO "ACPI: SLIT table looks invalid. Not used.\n");
   5.141 +		return;
   5.142 +	}
   5.143 +	acpi_slit = slit;
   5.144 +}
   5.145 +
   5.146 +/* Callback for Proximity Domain -> LAPIC mapping */
   5.147 +void __init
   5.148 +acpi_numa_processor_affinity_init(struct acpi_table_processor_affinity *pa)
   5.149 +{
   5.150 +	int pxm, node;
   5.151 +	if (srat_disabled())
   5.152 +		return;
   5.153 +	if (pa->header.length != sizeof(struct acpi_table_processor_affinity)) {		bad_srat();
   5.154 +		return;
   5.155 +	}
   5.156 +	if (pa->flags.enabled == 0)
   5.157 +		return;
   5.158 +	pxm = pa->proximity_domain;
   5.159 +	node = setup_node(pxm);
   5.160 +	if (node < 0) {
   5.161 +		printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
   5.162 +		bad_srat();
   5.163 +		return;
   5.164 +	}
   5.165 +	apicid_to_node[pa->apic_id] = node;
   5.166 +	acpi_numa = 1;
   5.167 +	printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n",
   5.168 +	       pxm, pa->apic_id, node);
   5.169 +}
   5.170 +
   5.171 +/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
   5.172 +void __init
   5.173 +acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma)
   5.174 +{
   5.175 +	struct node *nd;
   5.176 +	u64 start, end;
   5.177 +	int node, pxm;
   5.178 +	int i;
   5.179 +
   5.180 +	if (srat_disabled())
   5.181 +		return;
   5.182 +	if (ma->header.length != sizeof(struct acpi_table_memory_affinity)) {
   5.183 +		bad_srat();
   5.184 +		return;
   5.185 +	}
   5.186 +	if (ma->flags.enabled == 0)
   5.187 +		return;
   5.188 +	start = ma->base_addr_lo | ((u64)ma->base_addr_hi << 32);
   5.189 +	end = start + (ma->length_lo | ((u64)ma->length_hi << 32));
   5.190 +	pxm = ma->proximity_domain;
   5.191 +	node = setup_node(pxm);
   5.192 +	if (node < 0) {
   5.193 +		printk(KERN_ERR "SRAT: Too many proximity domains.\n");
   5.194 +		bad_srat();
   5.195 +		return;
   5.196 +	}
   5.197 +	/* It is fine to add this area to the nodes data it will be used later*/
   5.198 +	if (ma->flags.hot_pluggable == 1)
   5.199 +		printk(KERN_INFO "SRAT: hot plug zone found %"PRIx64" - %"PRIx64" \n",
   5.200 +				start, end);
   5.201 +	i = conflicting_nodes(start, end);
   5.202 +	if (i == node) {
   5.203 +		printk(KERN_WARNING
   5.204 +		"SRAT: Warning: PXM %d (%"PRIx64"-%"PRIx64") overlaps with itself (%"
   5.205 +		PRIx64"-%"PRIx64")\n", pxm, start, end, nodes[i].start, nodes[i].end);
   5.206 +	} else if (i >= 0) {
   5.207 +		printk(KERN_ERR
   5.208 +		       "SRAT: PXM %d (%"PRIx64"-%"PRIx64") overlaps with PXM %d (%"
   5.209 +		       PRIx64"-%"PRIx64")\n", pxm, start, end, node_to_pxm(i),
   5.210 +			   nodes[i].start, nodes[i].end);
   5.211 +		bad_srat();
   5.212 +		return;
   5.213 +	}
   5.214 +	nd = &nodes[node];
   5.215 +	if (!node_test_and_set(node, nodes_parsed)) {
   5.216 +		nd->start = start;
   5.217 +		nd->end = end;
   5.218 +	} else {
   5.219 +		if (start < nd->start)
   5.220 +			nd->start = start;
   5.221 +		if (nd->end < end)
   5.222 +			nd->end = end;
   5.223 +	}
   5.224 +	printk(KERN_INFO "SRAT: Node %u PXM %u %"PRIx64"-%"PRIx64"\n", node, pxm,
   5.225 +	       nd->start, nd->end);
   5.226 +}
   5.227 +
   5.228 +/* Sanity check to catch more bad SRATs (they are amazingly common).
   5.229 +   Make sure the PXMs cover all memory. */
   5.230 +static int nodes_cover_memory(void)
   5.231 +{
   5.232 +	int i;
   5.233 +	u64 pxmram, e820ram;
   5.234 +
   5.235 +	pxmram = 0;
   5.236 +	for_each_node_mask(i, nodes_parsed) {
   5.237 +		u64 s = nodes[i].start >> PAGE_SHIFT;
   5.238 +		u64 e = nodes[i].end >> PAGE_SHIFT;
   5.239 +		pxmram += e - s;
   5.240 +	}
   5.241 +
   5.242 +	e820ram = max_page;
   5.243 +	/* We seem to lose 3 pages somewhere. Allow a bit of slack. */
   5.244 +	if ((long)(e820ram - pxmram) >= 1*1024*1024) {
   5.245 +		printk(KERN_ERR "SRAT: PXMs only cover %"PRIu64"MB of your %"
   5.246 +			PRIu64"MB e820 RAM. Not used.\n",
   5.247 +			(pxmram << PAGE_SHIFT) >> 20,
   5.248 +			(e820ram << PAGE_SHIFT) >> 20);
   5.249 +		return 0;
   5.250 +	}
   5.251 +	return 1;
   5.252 +}
   5.253 +
   5.254 +static void unparse_node(int node)
   5.255 +{
   5.256 +	int i;
   5.257 +	node_clear(node, nodes_parsed);
   5.258 +	for (i = 0; i < MAX_LOCAL_APIC; i++) {
   5.259 +		if (apicid_to_node[i] == node)
   5.260 +			apicid_to_node[i] = NUMA_NO_NODE;
   5.261 +	}
   5.262 +}
   5.263 +
   5.264 +void __init acpi_numa_arch_fixup(void) {}
   5.265 +
   5.266 +/* Use the information discovered above to actually set up the nodes. */
   5.267 +int __init acpi_scan_nodes(u64 start, u64 end)
   5.268 +{
   5.269 +	int i;
   5.270 +
   5.271 +	/* First clean up the node list */
   5.272 +	for (i = 0; i < MAX_NUMNODES; i++) {
   5.273 +		cutoff_node(i, start, end);
   5.274 +		if ((nodes[i].end - nodes[i].start) < NODE_MIN_SIZE)
   5.275 +			unparse_node(i);
   5.276 +	}
   5.277 +
   5.278 +	if (acpi_numa <= 0)
   5.279 +		return -1;
   5.280 +
   5.281 +	if (!nodes_cover_memory()) {
   5.282 +		bad_srat();
   5.283 +		return -1;
   5.284 +	}
   5.285 +
   5.286 +	memnode_shift = compute_hash_shift(nodes, MAX_NUMNODES);
   5.287 +	if (memnode_shift < 0) {
   5.288 +		printk(KERN_ERR
   5.289 +		     "SRAT: No NUMA node hash function found. Contact maintainer\n");
   5.290 +		bad_srat();
   5.291 +		return -1;
   5.292 +	}
   5.293 +
   5.294 +	/* Finally register nodes */
   5.295 +	for_each_node_mask(i, nodes_parsed)
   5.296 +		setup_node_bootmem(i, nodes[i].start, nodes[i].end);
   5.297 +	for (i = 0; i < NR_CPUS; i++) { 
   5.298 +		if (cpu_to_node[i] == NUMA_NO_NODE)
   5.299 +			continue;
   5.300 +		if (!node_isset(cpu_to_node[i], nodes_parsed))
   5.301 +			numa_set_node(i, NUMA_NO_NODE);
   5.302 +	}
   5.303 +	numa_init_array();
   5.304 +	return 0;
   5.305 +}
   5.306 +
   5.307 +static int node_to_pxm(int n)
   5.308 +{
   5.309 +       int i;
   5.310 +       if (pxm2node[n] == n)
   5.311 +               return n;
   5.312 +       for (i = 0; i < 256; i++)
   5.313 +               if (pxm2node[i] == n)
   5.314 +                       return i;
   5.315 +       return 0;
   5.316 +}
   5.317 +
   5.318 +int __node_distance(int a, int b)
   5.319 +{
   5.320 +	int index;
   5.321 +
   5.322 +	if (!acpi_slit)
   5.323 +		return a == b ? 10 : 20;
   5.324 +	index = acpi_slit->localities * node_to_pxm(a);
   5.325 +	return acpi_slit->entry[index + node_to_pxm(b)];
   5.326 +}
   5.327 +
   5.328 +EXPORT_SYMBOL(__node_distance);
     6.1 --- a/xen/drivers/acpi/Makefile	Wed Oct 25 11:51:23 2006 +0100
     6.2 +++ b/xen/drivers/acpi/Makefile	Wed Oct 25 12:25:54 2006 +0100
     6.3 @@ -1,1 +1,2 @@
     6.4  obj-y += tables.o
     6.5 +obj-y += numa.o
     7.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     7.2 +++ b/xen/drivers/acpi/numa.c	Wed Oct 25 12:25:54 2006 +0100
     7.3 @@ -0,0 +1,216 @@
     7.4 +/*
     7.5 + *  acpi_numa.c - ACPI NUMA support
     7.6 + *
     7.7 + *  Copyright (C) 2002 Takayoshi Kochi <t-kochi@bq.jp.nec.com>
     7.8 + *
     7.9 + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    7.10 + *
    7.11 + *  This program is free software; you can redistribute it and/or modify
    7.12 + *  it under the terms of the GNU General Public License as published by
    7.13 + *  the Free Software Foundation; either version 2 of the License, or
    7.14 + *  (at your option) any later version.
    7.15 + *
    7.16 + *  This program is distributed in the hope that it will be useful,
    7.17 + *  but WITHOUT ANY WARRANTY; without even the implied warranty of
    7.18 + *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    7.19 + *  GNU General Public License for more details.
    7.20 + *
    7.21 + *  You should have received a copy of the GNU General Public License
    7.22 + *  along with this program; if not, write to the Free Software
    7.23 + *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
    7.24 + *
    7.25 + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    7.26 + *
    7.27 + */
    7.28 +#if 0
    7.29 +#include <linux/module.h>
    7.30 +#include <linux/kernel.h>
    7.31 +#endif
    7.32 +#include <xen/config.h>
    7.33 +#include <xen/init.h>
    7.34 +#include <xen/types.h>
    7.35 +#include <xen/errno.h>
    7.36 +#include <xen/acpi.h>
    7.37 +#include <xen/numa.h>
    7.38 +#include <acpi/acpi_bus.h>
    7.39 +#include <acpi/acmacros.h>
    7.40 +#include <asm/page.h> /* __va() */
    7.41 +
    7.42 +#define ACPI_NUMA	0x80000000
    7.43 +#define _COMPONENT	ACPI_NUMA
    7.44 +ACPI_MODULE_NAME("numa")
    7.45 +
    7.46 +extern int __init acpi_table_parse_madt_family(enum acpi_table_id id,
    7.47 +					       unsigned long madt_size,
    7.48 +					       int entry_id,
    7.49 +					       acpi_madt_entry_handler handler,
    7.50 +					       unsigned int max_entries);
    7.51 +
    7.52 +void __init acpi_table_print_srat_entry(acpi_table_entry_header * header)
    7.53 +{
    7.54 +
    7.55 +	ACPI_FUNCTION_NAME("acpi_table_print_srat_entry");
    7.56 +
    7.57 +	if (!header)
    7.58 +		return;
    7.59 +
    7.60 +	switch (header->type) {
    7.61 +
    7.62 +	case ACPI_SRAT_PROCESSOR_AFFINITY:
    7.63 +#ifdef ACPI_DEBUG_OUTPUT
    7.64 +		{
    7.65 +			struct acpi_table_processor_affinity *p =
    7.66 +			    (struct acpi_table_processor_affinity *)header;
    7.67 +			ACPI_DEBUG_PRINT((ACPI_DB_INFO,
    7.68 +					  "SRAT Processor (id[0x%02x] eid[0x%02x]) in proximity domain %d %s\n",
    7.69 +					  p->apic_id, p->lsapic_eid,
    7.70 +					  p->proximity_domain,
    7.71 +					  p->flags.
    7.72 +					  enabled ? "enabled" : "disabled"));
    7.73 +		}
    7.74 +#endif				/* ACPI_DEBUG_OUTPUT */
    7.75 +		break;
    7.76 +
    7.77 +	case ACPI_SRAT_MEMORY_AFFINITY:
    7.78 +#ifdef ACPI_DEBUG_OUTPUT
    7.79 +		{
    7.80 +			struct acpi_table_memory_affinity *p =
    7.81 +			    (struct acpi_table_memory_affinity *)header;
    7.82 +			ACPI_DEBUG_PRINT((ACPI_DB_INFO,
    7.83 +					  "SRAT Memory (0x%08x%08x length 0x%08x%08x type 0x%x) in proximity domain %d %s%s\n",
    7.84 +					  p->base_addr_hi, p->base_addr_lo,
    7.85 +					  p->length_hi, p->length_lo,
    7.86 +					  p->memory_type, p->proximity_domain,
    7.87 +					  p->flags.
    7.88 +					  enabled ? "enabled" : "disabled",
    7.89 +					  p->flags.
    7.90 +					  hot_pluggable ? " hot-pluggable" :
    7.91 +					  ""));
    7.92 +		}
    7.93 +#endif				/* ACPI_DEBUG_OUTPUT */
    7.94 +		break;
    7.95 +
    7.96 +	default:
    7.97 +		printk(KERN_WARNING PREFIX
    7.98 +		       "Found unsupported SRAT entry (type = 0x%x)\n",
    7.99 +		       header->type);
   7.100 +		break;
   7.101 +	}
   7.102 +}
   7.103 +
   7.104 +static int __init acpi_parse_slit(unsigned long phys_addr, unsigned long size)
   7.105 +{
   7.106 +	struct acpi_table_slit *slit;
   7.107 +	u32 localities;
   7.108 +
   7.109 +	if (!phys_addr || !size)
   7.110 +		return -EINVAL;
   7.111 +
   7.112 +	slit = (struct acpi_table_slit *)__va(phys_addr);
   7.113 +
   7.114 +	/* downcast just for %llu vs %lu for i386/ia64  */
   7.115 +	localities = (u32) slit->localities;
   7.116 +
   7.117 +	acpi_numa_slit_init(slit);
   7.118 +
   7.119 +	return 0;
   7.120 +}
   7.121 +
   7.122 +static int __init
   7.123 +acpi_parse_processor_affinity(acpi_table_entry_header * header,
   7.124 +			      const unsigned long end)
   7.125 +{
   7.126 +	struct acpi_table_processor_affinity *processor_affinity;
   7.127 +
   7.128 +	processor_affinity = (struct acpi_table_processor_affinity *)header;
   7.129 +	if (!processor_affinity)
   7.130 +		return -EINVAL;
   7.131 +
   7.132 +	acpi_table_print_srat_entry(header);
   7.133 +
   7.134 +	/* let architecture-dependent part to do it */
   7.135 +	acpi_numa_processor_affinity_init(processor_affinity);
   7.136 +
   7.137 +	return 0;
   7.138 +}
   7.139 +
   7.140 +static int __init
   7.141 +acpi_parse_memory_affinity(acpi_table_entry_header * header,
   7.142 +			   const unsigned long end)
   7.143 +{
   7.144 +	struct acpi_table_memory_affinity *memory_affinity;
   7.145 +
   7.146 +	memory_affinity = (struct acpi_table_memory_affinity *)header;
   7.147 +	if (!memory_affinity)
   7.148 +		return -EINVAL;
   7.149 +
   7.150 +	acpi_table_print_srat_entry(header);
   7.151 +
   7.152 +	/* let architecture-dependent part to do it */
   7.153 +	acpi_numa_memory_affinity_init(memory_affinity);
   7.154 +
   7.155 +	return 0;
   7.156 +}
   7.157 +
   7.158 +static int __init acpi_parse_srat(unsigned long phys_addr, unsigned long size)
   7.159 +{
   7.160 +	struct acpi_table_srat *srat;
   7.161 +
   7.162 +	if (!phys_addr || !size)
   7.163 +		return -EINVAL;
   7.164 +
   7.165 +	srat = (struct acpi_table_srat *)__va(phys_addr);
   7.166 +
   7.167 +	return 0;
   7.168 +}
   7.169 +
   7.170 +int __init
   7.171 +acpi_table_parse_srat(enum acpi_srat_entry_id id,
   7.172 +		      acpi_madt_entry_handler handler, unsigned int max_entries)
   7.173 +{
   7.174 +	return acpi_table_parse_madt_family(ACPI_SRAT,
   7.175 +					    sizeof(struct acpi_table_srat), id,
   7.176 +					    handler, max_entries);
   7.177 +}
   7.178 +
   7.179 +int __init acpi_numa_init(void)
   7.180 +{
   7.181 +	int result;
   7.182 +
   7.183 +	/* SRAT: Static Resource Affinity Table */
   7.184 +	result = acpi_table_parse(ACPI_SRAT, acpi_parse_srat);
   7.185 +
   7.186 +	if (result > 0) {
   7.187 +		result = acpi_table_parse_srat(ACPI_SRAT_PROCESSOR_AFFINITY,
   7.188 +					       acpi_parse_processor_affinity,
   7.189 +					       NR_CPUS);
   7.190 +		result = acpi_table_parse_srat(ACPI_SRAT_MEMORY_AFFINITY, acpi_parse_memory_affinity, NR_NODE_MEMBLKS);	// IA64 specific
   7.191 +	}
   7.192 +
   7.193 +	/* SLIT: System Locality Information Table */
   7.194 +	result = acpi_table_parse(ACPI_SLIT, acpi_parse_slit);
   7.195 +
   7.196 +	acpi_numa_arch_fixup();
   7.197 +	return 0;
   7.198 +}
   7.199 +
   7.200 +#if 0
   7.201 +int acpi_get_pxm(acpi_handle h)
   7.202 +{
   7.203 +	unsigned long pxm;
   7.204 +	acpi_status status;
   7.205 +	acpi_handle handle;
   7.206 +	acpi_handle phandle = h;
   7.207 +
   7.208 +	do {
   7.209 +		handle = phandle;
   7.210 +		status = acpi_evaluate_integer(handle, "_PXM", NULL, &pxm);
   7.211 +		if (ACPI_SUCCESS(status))
   7.212 +			return (int)pxm;
   7.213 +		status = acpi_get_parent(handle, &phandle);
   7.214 +	} while (ACPI_SUCCESS(status));
   7.215 +	return -1;
   7.216 +}
   7.217 +
   7.218 +EXPORT_SYMBOL(acpi_get_pxm);
   7.219 +#endif
     8.1 --- a/xen/include/asm-x86/acpi.h	Wed Oct 25 11:51:23 2006 +0100
     8.2 +++ b/xen/include/asm-x86/acpi.h	Wed Oct 25 12:25:54 2006 +0100
     8.3 @@ -157,6 +157,8 @@ static inline void check_acpi_pci(void) 
     8.4  
     8.5  static inline void acpi_noirq_set(void) { acpi_noirq = 1; }
     8.6  static inline int acpi_irq_balance_set(char *str) { return 0; }
     8.7 +extern int acpi_scan_nodes(u64 start, u64 end);
     8.8 +extern int acpi_numa;
     8.9  
    8.10  #ifdef CONFIG_ACPI_SLEEP
    8.11  
    8.12 @@ -173,5 +175,6 @@ extern void acpi_reserve_bootmem(void);
    8.13  #endif /*CONFIG_ACPI_SLEEP*/
    8.14  
    8.15  extern u8 x86_acpiid_to_apicid[];
    8.16 +#define MAX_LOCAL_APIC 256
    8.17  
    8.18  #endif /*_ASM_ACPI_H*/
     9.1 --- a/xen/include/asm-x86/config.h	Wed Oct 25 11:51:23 2006 +0100
     9.2 +++ b/xen/include/asm-x86/config.h	Wed Oct 25 12:25:54 2006 +0100
     9.3 @@ -24,6 +24,11 @@
     9.4  #define CONFIG_X86_IO_APIC 1
     9.5  #define CONFIG_HPET_TIMER 1
     9.6  #define CONFIG_X86_MCE_P4THERMAL 1
     9.7 +#define CONFIG_ACPI_NUMA 1
     9.8 +#define CONFIG_NUMA 1
     9.9 +#define CONFIG_ACPI_SRAT 1
    9.10 +#define CONFIG_DISCONTIGMEM 1
    9.11 +#define CONFIG_NUMA_EMU 1
    9.12  
    9.13  /* Intel P4 currently has largest cache line (L2 line size is 128 bytes). */
    9.14  #define CONFIG_X86_L1_CACHE_SHIFT 7
    10.1 --- a/xen/include/asm-x86/mach-generic/mach_apic.h	Wed Oct 25 11:51:23 2006 +0100
    10.2 +++ b/xen/include/asm-x86/mach-generic/mach_apic.h	Wed Oct 25 12:25:54 2006 +0100
    10.3 @@ -22,11 +22,7 @@ static inline void enable_apic_mode(void
    10.4  	return;
    10.5  }
    10.6  
    10.7 -/* No sane NUMA support right now. We should parse ACPI SRAT. */
    10.8 -static inline int apicid_to_node(int logical_apicid)
    10.9 -{
   10.10 -	return 0;
   10.11 -}
   10.12 +#define apicid_to_node(apicid) ((int)apicid_to_node[(u8)apicid])
   10.13  
   10.14  extern u8 bios_cpu_apicid[];
   10.15  static inline int cpu_present_to_apicid(int mps_cpu)
    11.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    11.2 +++ b/xen/include/asm-x86/numa.h	Wed Oct 25 12:25:54 2006 +0100
    11.3 @@ -0,0 +1,65 @@
    11.4 +#ifndef _ASM_X8664_NUMA_H 
    11.5 +#define _ASM_X8664_NUMA_H 1
    11.6 +
    11.7 +#include <xen/nodemask.h>
    11.8 +#include <xen/topology.h>
    11.9 +#include <asm/numnodes.h>
   11.10 +#include <asm/smp.h>
   11.11 +
   11.12 +struct node { 
   11.13 +	u64 start,end; 
   11.14 +};
   11.15 +
   11.16 +extern int compute_hash_shift(struct node *nodes, int numnodes);
   11.17 +extern int pxm_to_node(int nid);
   11.18 +
   11.19 +#define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
   11.20 +#define VIRTUAL_BUG_ON(x) 
   11.21 +#define NODEMAPSIZE 0xfff
   11.22 +
   11.23 +extern void numa_add_cpu(int cpu);
   11.24 +extern void numa_init_array(void);
   11.25 +extern int numa_off;
   11.26 +
   11.27 +extern void numa_set_node(int cpu, int node);
   11.28 +
   11.29 +extern void setup_node_bootmem(int nodeid, u64 start, u64 end);
   11.30 +extern unsigned char apicid_to_node[256];
   11.31 +#ifdef CONFIG_NUMA
   11.32 +extern void __init init_cpu_to_node(void);
   11.33 +
   11.34 +static inline void clear_node_cpumask(int cpu)
   11.35 +{
   11.36 +	clear_bit(cpu, &node_to_cpumask[cpu_to_node(cpu)]);
   11.37 +}
   11.38 +
   11.39 +/* Simple perfect hash to map physical addresses to node numbers */
   11.40 +extern int memnode_shift; 
   11.41 +extern u8  memnodemap[NODEMAPSIZE]; 
   11.42 +
   11.43 +extern struct node_data node_data[];
   11.44 +
   11.45 +static inline __attribute__((pure)) int phys_to_nid(unsigned long addr) 
   11.46 +{ 
   11.47 +	unsigned nid; 
   11.48 +	VIRTUAL_BUG_ON((addr >> memnode_shift) >= NODEMAPSIZE);
   11.49 +	nid = memnodemap[addr >> memnode_shift]; 
   11.50 +	VIRTUAL_BUG_ON(nid >= MAX_NUMNODES || !node_data[nid]); 
   11.51 +	return nid; 
   11.52 +} 
   11.53 +
   11.54 +#define NODE_DATA(nid)		(&(node_data[nid]))
   11.55 +
   11.56 +#define node_start_pfn(nid)	(NODE_DATA(nid)->node_start_pfn)
   11.57 +#define node_end_pfn(nid)       (NODE_DATA(nid)->node_start_pfn + \
   11.58 +				 NODE_DATA(nid)->node_spanned_pages)
   11.59 +
   11.60 +
   11.61 +#else
   11.62 +#define init_cpu_to_node() do {} while (0)
   11.63 +#define clear_node_cpumask(cpu) do {} while (0)
   11.64 +#endif
   11.65 +
   11.66 +#define NUMA_NO_NODE 0xff
   11.67 +
   11.68 +#endif
    12.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    12.2 +++ b/xen/include/asm-x86/numnodes.h	Wed Oct 25 12:25:54 2006 +0100
    12.3 @@ -0,0 +1,26 @@
    12.4 +#ifndef _ASM_MAX_NUMNODES_H
    12.5 +#define _ASM_MAX_NUMNODES_H
    12.6 +
    12.7 +#include <xen/config.h>
    12.8 +
    12.9 +#if defined(__i386__)
   12.10 +#ifdef CONFIG_X86_NUMAQ
   12.11 +
   12.12 +/* Max 16 Nodes */
   12.13 +#define NODES_SHIFT	4
   12.14 +
   12.15 +#elif defined(CONFIG_ACPI_SRAT)
   12.16 +
   12.17 +/* Max 8 Nodes */
   12.18 +#define NODES_SHIFT	3
   12.19 +
   12.20 +#endif /* CONFIG_X86_NUMAQ */
   12.21 +
   12.22 +
   12.23 +#endif /* __i386__ */
   12.24 +
   12.25 +#if defined(CONFIG_NUMA) && defined(__x86_64__)
   12.26 +#define NODES_SHIFT  6
   12.27 +#endif /* __x86_64__ */
   12.28 +
   12.29 +#endif /* _ASM_MAX_NUMNODES_H */
    13.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    13.2 +++ b/xen/include/asm-x86/topology.h	Wed Oct 25 12:25:54 2006 +0100
    13.3 @@ -0,0 +1,40 @@
    13.4 +/*
    13.5 + * Copyright (C) 2006, IBM Corp.
    13.6 + *
    13.7 + * All rights reserved.          
    13.8 + *
    13.9 + * This program is free software; you can redistribute it and/or modify
   13.10 + * it under the terms of the GNU General Public License as published by
   13.11 + * the Free Software Foundation; either version 2 of the License, or
   13.12 + * (at your option) any later version.
   13.13 + *
   13.14 + * This program is distributed in the hope that it will be useful, but
   13.15 + * WITHOUT ANY WARRANTY; without even the implied warranty of
   13.16 + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
   13.17 + * NON INFRINGEMENT.  See the GNU General Public License for more
   13.18 + * details.
   13.19 + *
   13.20 + * You should have received a copy of the GNU General Public License
   13.21 + * along with this program; if not, write to the Free Software
   13.22 + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
   13.23 + *
   13.24 + * Ryan Harper <ryanh@us.ibm.com>
   13.25 + */
   13.26 +
   13.27 +#ifndef _ASM_X86_TOPOLOGY_H
   13.28 +#define _ASM_X86_TOPOLOGY_H
   13.29 +
   13.30 +#include <xen/config.h>
   13.31 +#include <xen/bitops.h>
   13.32 +
   13.33 +extern cpumask_t cpu_online_map;
   13.34 +
   13.35 +extern unsigned int cpu_to_node[];
   13.36 +extern cpumask_t     node_to_cpumask[];
   13.37 +
   13.38 +#define cpu_to_node(cpu)		(cpu_to_node[cpu])
   13.39 +#define parent_node(node)		(node)
   13.40 +#define node_to_first_cpu(node)  (__ffs(node_to_cpumask[node]))
   13.41 +#define node_to_cpumask(node)    (node_to_cpumask[node])
   13.42 +
   13.43 +#endif  /* _ASM_X86_TOPOLOGY_H */
    14.1 --- a/xen/include/xen/config.h	Wed Oct 25 11:51:23 2006 +0100
    14.2 +++ b/xen/include/xen/config.h	Wed Oct 25 12:25:54 2006 +0100
    14.3 @@ -50,5 +50,7 @@
    14.4  #endif /* !__ASSEMBLY__ */
    14.5  
    14.6  #define fastcall
    14.7 +#define __cpuinitdata
    14.8 +#define __cpuinit
    14.9  
   14.10  #endif /* __XEN_CONFIG_H__ */
    15.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    15.2 +++ b/xen/include/xen/nodemask.h	Wed Oct 25 12:25:54 2006 +0100
    15.3 @@ -0,0 +1,342 @@
    15.4 +#ifndef __LINUX_NODEMASK_H
    15.5 +#define __LINUX_NODEMASK_H
    15.6 +
    15.7 +/*
    15.8 + * Nodemasks provide a bitmap suitable for representing the
    15.9 + * set of Node's in a system, one bit position per Node number.
   15.10 + *
   15.11 + * See detailed comments in the file linux/bitmap.h describing the
   15.12 + * data type on which these nodemasks are based.
   15.13 + *
   15.14 + * For details of nodemask_scnprintf() and nodemask_parse(),
   15.15 + * see bitmap_scnprintf() and bitmap_parse() in lib/bitmap.c.
   15.16 + *
   15.17 + * The available nodemask operations are:
   15.18 + *
   15.19 + * void node_set(node, mask)		turn on bit 'node' in mask
   15.20 + * void node_clear(node, mask)		turn off bit 'node' in mask
   15.21 + * void nodes_setall(mask)		set all bits
   15.22 + * void nodes_clear(mask)		clear all bits
   15.23 + * int node_isset(node, mask)		true iff bit 'node' set in mask
   15.24 + * int node_test_and_set(node, mask)	test and set bit 'node' in mask
   15.25 + *
   15.26 + * void nodes_and(dst, src1, src2)	dst = src1 & src2  [intersection]
   15.27 + * void nodes_or(dst, src1, src2)	dst = src1 | src2  [union]
   15.28 + * void nodes_xor(dst, src1, src2)	dst = src1 ^ src2
   15.29 + * void nodes_andnot(dst, src1, src2)	dst = src1 & ~src2
   15.30 + * void nodes_complement(dst, src)	dst = ~src
   15.31 + *
   15.32 + * int nodes_equal(mask1, mask2)	Does mask1 == mask2?
   15.33 + * int nodes_intersects(mask1, mask2)	Do mask1 and mask2 intersect?
   15.34 + * int nodes_subset(mask1, mask2)	Is mask1 a subset of mask2?
   15.35 + * int nodes_empty(mask)		Is mask empty (no bits sets)?
   15.36 + * int nodes_full(mask)			Is mask full (all bits sets)?
   15.37 + * int nodes_weight(mask)		Hamming weight - number of set bits
   15.38 + *
   15.39 + * void nodes_shift_right(dst, src, n)	Shift right
   15.40 + * void nodes_shift_left(dst, src, n)	Shift left
   15.41 + *
   15.42 + * int first_node(mask)			Number lowest set bit, or MAX_NUMNODES
   15.43 + * int next_node(node, mask)		Next node past 'node', or MAX_NUMNODES
   15.44 + * int first_unset_node(mask)		First node not set in mask, or 
   15.45 + *					MAX_NUMNODES.
   15.46 + *
   15.47 + * nodemask_t nodemask_of_node(node)	Return nodemask with bit 'node' set
   15.48 + * NODE_MASK_ALL			Initializer - all bits set
   15.49 + * NODE_MASK_NONE			Initializer - no bits set
   15.50 + * unsigned long *nodes_addr(mask)	Array of unsigned long's in mask
   15.51 + *
   15.52 + * int nodemask_scnprintf(buf, len, mask) Format nodemask for printing
   15.53 + * int nodemask_parse(ubuf, ulen, mask)	Parse ascii string as nodemask
   15.54 + *
   15.55 + * for_each_node_mask(node, mask)	for-loop node over mask
   15.56 + *
   15.57 + * int num_online_nodes()		Number of online Nodes
   15.58 + * int num_possible_nodes()		Number of all possible Nodes
   15.59 + *
   15.60 + * int node_online(node)		Is some node online?
   15.61 + * int node_possible(node)		Is some node possible?
   15.62 + *
   15.63 + * int any_online_node(mask)		First online node in mask
   15.64 + *
   15.65 + * node_set_online(node)		set bit 'node' in node_online_map
   15.66 + * node_set_offline(node)		clear bit 'node' in node_online_map
   15.67 + *
   15.68 + * for_each_node(node)			for-loop node over node_possible_map
   15.69 + * for_each_online_node(node)		for-loop node over node_online_map
   15.70 + *
   15.71 + * Subtlety:
   15.72 + * 1) The 'type-checked' form of node_isset() causes gcc (3.3.2, anyway)
   15.73 + *    to generate slightly worse code.  So use a simple one-line #define
   15.74 + *    for node_isset(), instead of wrapping an inline inside a macro, the
   15.75 + *    way we do the other calls.
   15.76 + */
   15.77 +
   15.78 +#if 0
   15.79 +#include <linux/threads.h>
   15.80 +#include <asm/bug.h>
   15.81 +#endif
   15.82 +#include <xen/kernel.h>
   15.83 +#include <xen/bitmap.h>
   15.84 +#include <xen/numa.h>
   15.85 +
   15.86 +typedef struct { DECLARE_BITMAP(bits, MAX_NUMNODES); } nodemask_t;
   15.87 +extern nodemask_t _unused_nodemask_arg_;
   15.88 +
   15.89 +#define node_set(node, dst) __node_set((node), &(dst))
   15.90 +static inline void __node_set(int node, volatile nodemask_t *dstp)
   15.91 +{
   15.92 +	set_bit(node, dstp->bits);
   15.93 +}
   15.94 +
   15.95 +#define node_clear(node, dst) __node_clear((node), &(dst))
   15.96 +static inline void __node_clear(int node, volatile nodemask_t *dstp)
   15.97 +{
   15.98 +	clear_bit(node, dstp->bits);
   15.99 +}
  15.100 +
  15.101 +#define nodes_setall(dst) __nodes_setall(&(dst), MAX_NUMNODES)
  15.102 +static inline void __nodes_setall(nodemask_t *dstp, int nbits)
  15.103 +{
  15.104 +	bitmap_fill(dstp->bits, nbits);
  15.105 +}
  15.106 +
  15.107 +#define nodes_clear(dst) __nodes_clear(&(dst), MAX_NUMNODES)
  15.108 +static inline void __nodes_clear(nodemask_t *dstp, int nbits)
  15.109 +{
  15.110 +	bitmap_zero(dstp->bits, nbits);
  15.111 +}
  15.112 +
  15.113 +/* No static inline type checking - see Subtlety (1) above. */
  15.114 +#define node_isset(node, nodemask) test_bit((node), (nodemask).bits)
  15.115 +
  15.116 +#define node_test_and_set(node, nodemask) \
  15.117 +			__node_test_and_set((node), &(nodemask))
  15.118 +static inline int __node_test_and_set(int node, nodemask_t *addr)
  15.119 +{
  15.120 +	return test_and_set_bit(node, addr->bits);
  15.121 +}
  15.122 +
  15.123 +#define nodes_and(dst, src1, src2) \
  15.124 +			__nodes_and(&(dst), &(src1), &(src2), MAX_NUMNODES)
  15.125 +static inline void __nodes_and(nodemask_t *dstp, const nodemask_t *src1p,
  15.126 +					const nodemask_t *src2p, int nbits)
  15.127 +{
  15.128 +	bitmap_and(dstp->bits, src1p->bits, src2p->bits, nbits);
  15.129 +}
  15.130 +
  15.131 +#define nodes_or(dst, src1, src2) \
  15.132 +			__nodes_or(&(dst), &(src1), &(src2), MAX_NUMNODES)
  15.133 +static inline void __nodes_or(nodemask_t *dstp, const nodemask_t *src1p,
  15.134 +					const nodemask_t *src2p, int nbits)
  15.135 +{
  15.136 +	bitmap_or(dstp->bits, src1p->bits, src2p->bits, nbits);
  15.137 +}
  15.138 +
  15.139 +#define nodes_xor(dst, src1, src2) \
  15.140 +			__nodes_xor(&(dst), &(src1), &(src2), MAX_NUMNODES)
  15.141 +static inline void __nodes_xor(nodemask_t *dstp, const nodemask_t *src1p,
  15.142 +					const nodemask_t *src2p, int nbits)
  15.143 +{
  15.144 +	bitmap_xor(dstp->bits, src1p->bits, src2p->bits, nbits);
  15.145 +}
  15.146 +
  15.147 +#define nodes_andnot(dst, src1, src2) \
  15.148 +			__nodes_andnot(&(dst), &(src1), &(src2), MAX_NUMNODES)
  15.149 +static inline void __nodes_andnot(nodemask_t *dstp, const nodemask_t *src1p,
  15.150 +					const nodemask_t *src2p, int nbits)
  15.151 +{
  15.152 +	bitmap_andnot(dstp->bits, src1p->bits, src2p->bits, nbits);
  15.153 +}
  15.154 +
  15.155 +#define nodes_complement(dst, src) \
  15.156 +			__nodes_complement(&(dst), &(src), MAX_NUMNODES)
  15.157 +static inline void __nodes_complement(nodemask_t *dstp,
  15.158 +					const nodemask_t *srcp, int nbits)
  15.159 +{
  15.160 +	bitmap_complement(dstp->bits, srcp->bits, nbits);
  15.161 +}
  15.162 +
  15.163 +#define nodes_equal(src1, src2) \
  15.164 +			__nodes_equal(&(src1), &(src2), MAX_NUMNODES)
  15.165 +static inline int __nodes_equal(const nodemask_t *src1p,
  15.166 +					const nodemask_t *src2p, int nbits)
  15.167 +{
  15.168 +	return bitmap_equal(src1p->bits, src2p->bits, nbits);
  15.169 +}
  15.170 +
  15.171 +#define nodes_intersects(src1, src2) \
  15.172 +			__nodes_intersects(&(src1), &(src2), MAX_NUMNODES)
  15.173 +static inline int __nodes_intersects(const nodemask_t *src1p,
  15.174 +					const nodemask_t *src2p, int nbits)
  15.175 +{
  15.176 +	return bitmap_intersects(src1p->bits, src2p->bits, nbits);
  15.177 +}
  15.178 +
  15.179 +#define nodes_subset(src1, src2) \
  15.180 +			__nodes_subset(&(src1), &(src2), MAX_NUMNODES)
  15.181 +static inline int __nodes_subset(const nodemask_t *src1p,
  15.182 +					const nodemask_t *src2p, int nbits)
  15.183 +{
  15.184 +	return bitmap_subset(src1p->bits, src2p->bits, nbits);
  15.185 +}
  15.186 +
  15.187 +#define nodes_empty(src) __nodes_empty(&(src), MAX_NUMNODES)
  15.188 +static inline int __nodes_empty(const nodemask_t *srcp, int nbits)
  15.189 +{
  15.190 +	return bitmap_empty(srcp->bits, nbits);
  15.191 +}
  15.192 +
  15.193 +#define nodes_full(nodemask) __nodes_full(&(nodemask), MAX_NUMNODES)
  15.194 +static inline int __nodes_full(const nodemask_t *srcp, int nbits)
  15.195 +{
  15.196 +	return bitmap_full(srcp->bits, nbits);
  15.197 +}
  15.198 +
  15.199 +#define nodes_weight(nodemask) __nodes_weight(&(nodemask), MAX_NUMNODES)
  15.200 +static inline int __nodes_weight(const nodemask_t *srcp, int nbits)
  15.201 +{
  15.202 +	return bitmap_weight(srcp->bits, nbits);
  15.203 +}
  15.204 +
  15.205 +#define nodes_shift_right(dst, src, n) \
  15.206 +			__nodes_shift_right(&(dst), &(src), (n), MAX_NUMNODES)
  15.207 +static inline void __nodes_shift_right(nodemask_t *dstp,
  15.208 +					const nodemask_t *srcp, int n, int nbits)
  15.209 +{
  15.210 +	bitmap_shift_right(dstp->bits, srcp->bits, n, nbits);
  15.211 +}
  15.212 +
  15.213 +#define nodes_shift_left(dst, src, n) \
  15.214 +			__nodes_shift_left(&(dst), &(src), (n), MAX_NUMNODES)
  15.215 +static inline void __nodes_shift_left(nodemask_t *dstp,
  15.216 +					const nodemask_t *srcp, int n, int nbits)
  15.217 +{
  15.218 +	bitmap_shift_left(dstp->bits, srcp->bits, n, nbits);
  15.219 +}
  15.220 +
  15.221 +/* FIXME: better would be to fix all architectures to never return
  15.222 +          > MAX_NUMNODES, then the silly min_ts could be dropped. */
  15.223 +
  15.224 +#define first_node(src) __first_node(&(src))
  15.225 +static inline int __first_node(const nodemask_t *srcp)
  15.226 +{
  15.227 +	return min_t(int, MAX_NUMNODES, find_first_bit(srcp->bits, MAX_NUMNODES));
  15.228 +}
  15.229 +
  15.230 +#define next_node(n, src) __next_node((n), &(src))
  15.231 +static inline int __next_node(int n, const nodemask_t *srcp)
  15.232 +{
  15.233 +	return min_t(int,MAX_NUMNODES,find_next_bit(srcp->bits, MAX_NUMNODES, n+1));
  15.234 +}
  15.235 +
  15.236 +#define nodemask_of_node(node)						\
  15.237 +({									\
  15.238 +	typeof(_unused_nodemask_arg_) m;				\
  15.239 +	if (sizeof(m) == sizeof(unsigned long)) {			\
  15.240 +		m.bits[0] = 1UL<<(node);				\
  15.241 +	} else {							\
  15.242 +		nodes_clear(m);						\
  15.243 +		node_set((node), m);					\
  15.244 +	}								\
  15.245 +	m;								\
  15.246 +})
  15.247 +
  15.248 +#define first_unset_node(mask) __first_unset_node(&(mask))
  15.249 +static inline int __first_unset_node(const nodemask_t *maskp)
  15.250 +{
  15.251 +	return min_t(int,MAX_NUMNODES,
  15.252 +			find_first_zero_bit(maskp->bits, MAX_NUMNODES));
  15.253 +}
  15.254 +
  15.255 +#define NODE_MASK_LAST_WORD BITMAP_LAST_WORD_MASK(MAX_NUMNODES)
  15.256 +
  15.257 +#if MAX_NUMNODES <= BITS_PER_LONG
  15.258 +
  15.259 +#define NODE_MASK_ALL							\
  15.260 +((nodemask_t) { {							\
  15.261 +	[BITS_TO_LONGS(MAX_NUMNODES)-1] = NODE_MASK_LAST_WORD		\
  15.262 +} })
  15.263 +
  15.264 +#else
  15.265 +
  15.266 +#define NODE_MASK_ALL							\
  15.267 +((nodemask_t) { {							\
  15.268 +	[0 ... BITS_TO_LONGS(MAX_NUMNODES)-2] = ~0UL,			\
  15.269 +	[BITS_TO_LONGS(MAX_NUMNODES)-1] = NODE_MASK_LAST_WORD		\
  15.270 +} })
  15.271 +
  15.272 +#endif
  15.273 +
  15.274 +#define NODE_MASK_NONE							\
  15.275 +((nodemask_t) { {							\
  15.276 +	[0 ... BITS_TO_LONGS(MAX_NUMNODES)-1] =  0UL			\
  15.277 +} })
  15.278 +
  15.279 +#define nodes_addr(src) ((src).bits)
  15.280 +
  15.281 +#if 0
  15.282 +#define nodemask_scnprintf(buf, len, src) \
  15.283 +			__nodemask_scnprintf((buf), (len), &(src), MAX_NUMNODES)
  15.284 +static inline int __nodemask_scnprintf(char *buf, int len,
  15.285 +					const nodemask_t *srcp, int nbits)
  15.286 +{
  15.287 +	return bitmap_scnprintf(buf, len, srcp->bits, nbits);
  15.288 +}
  15.289 +
  15.290 +#define nodemask_parse(ubuf, ulen, dst) \
  15.291 +			__nodemask_parse((ubuf), (ulen), &(dst), MAX_NUMNODES)
  15.292 +static inline int __nodemask_parse(const char __user *buf, int len,
  15.293 +					nodemask_t *dstp, int nbits)
  15.294 +{
  15.295 +	return bitmap_parse(buf, len, dstp->bits, nbits);
  15.296 +}
  15.297 +#endif
  15.298 +
  15.299 +#if MAX_NUMNODES > 1
  15.300 +#define for_each_node_mask(node, mask)			\
  15.301 +	for ((node) = first_node(mask);			\
  15.302 +		(node) < MAX_NUMNODES;			\
  15.303 +		(node) = next_node((node), (mask)))
  15.304 +#else /* MAX_NUMNODES == 1 */
  15.305 +#define for_each_node_mask(node, mask)			\
  15.306 +	if (!nodes_empty(mask))				\
  15.307 +		for ((node) = 0; (node) < 1; (node)++)
  15.308 +#endif /* MAX_NUMNODES */
  15.309 +
  15.310 +/*
  15.311 + * The following particular system nodemasks and operations
  15.312 + * on them manage all possible and online nodes.
  15.313 + */
  15.314 +
  15.315 +extern nodemask_t node_online_map;
  15.316 +extern nodemask_t node_possible_map;
  15.317 +
  15.318 +#if MAX_NUMNODES > 1
  15.319 +#define num_online_nodes()	nodes_weight(node_online_map)
  15.320 +#define num_possible_nodes()	nodes_weight(node_possible_map)
  15.321 +#define node_online(node)	node_isset((node), node_online_map)
  15.322 +#define node_possible(node)	node_isset((node), node_possible_map)
  15.323 +#else
  15.324 +#define num_online_nodes()	1
  15.325 +#define num_possible_nodes()	1
  15.326 +#define node_online(node)	((node) == 0)
  15.327 +#define node_possible(node)	((node) == 0)
  15.328 +#endif
  15.329 +
  15.330 +#define any_online_node(mask)			\
  15.331 +({						\
  15.332 +	int node;				\
  15.333 +	for_each_node_mask(node, (mask))	\
  15.334 +		if (node_online(node))		\
  15.335 +			break;			\
  15.336 +	node;					\
  15.337 +})
  15.338 +
  15.339 +#define node_set_online(node)	   set_bit((node), node_online_map.bits)
  15.340 +#define node_set_offline(node)	   clear_bit((node), node_online_map.bits)
  15.341 +
  15.342 +#define for_each_node(node)	   for_each_node_mask((node), node_possible_map)
  15.343 +#define for_each_online_node(node) for_each_node_mask((node), node_online_map)
  15.344 +
  15.345 +#endif /* __LINUX_NODEMASK_H */
    16.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    16.2 +++ b/xen/include/xen/numa.h	Wed Oct 25 12:25:54 2006 +0100
    16.3 @@ -0,0 +1,35 @@
    16.4 +#ifndef _XEN_NUMA_H
    16.5 +#define _XEN_NUMA_H
    16.6 +
    16.7 +#include <xen/config.h>
    16.8 +
    16.9 +#ifdef CONFIG_DISCONTIGMEM
   16.10 +#include <asm/numnodes.h>
   16.11 +#endif
   16.12 +
   16.13 +#ifndef NODES_SHIFT
   16.14 +#define NODES_SHIFT     0
   16.15 +#endif
   16.16 +
   16.17 +#define MAX_NUMNODES    (1 << NODES_SHIFT)
   16.18 +#define NUMA_NO_NODE    0xff
   16.19 +
   16.20 +#define MAX_PXM_DOMAINS    256   /* 1 byte and no promises about values */
   16.21 +#define PXM_BITMAP_LEN (MAX_PXM_DOMAINS / 8)
   16.22 +#define MAX_CHUNKS_PER_NODE   4
   16.23 +#define MAXCHUNKS    (MAX_CHUNKS_PER_NODE * MAX_NUMNODES)
   16.24 +
   16.25 +/* needed for drivers/acpi/numa.c */
   16.26 +#define NR_NODE_MEMBLKS (MAX_NUMNODES*2)
   16.27 +
   16.28 +extern unsigned int cpu_to_node[];
   16.29 +#include <xen/cpumask.h>
   16.30 +extern cpumask_t node_to_cpumask[];
   16.31 +
   16.32 +typedef struct node_data {
   16.33 +    unsigned long node_start_pfn;
   16.34 +    unsigned long node_spanned_pages;
   16.35 +    unsigned int  node_id;
   16.36 +} node_data_t;
   16.37 +
   16.38 +#endif /* _XEN_NUMA_H */
    17.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    17.2 +++ b/xen/include/xen/topology.h	Wed Oct 25 12:25:54 2006 +0100
    17.3 @@ -0,0 +1,27 @@
    17.4 +/*
    17.5 + * Copyright (C) 2006, IBM Corp.
    17.6 + *
    17.7 + * All rights reserved.          
    17.8 + *
    17.9 + * This program is free software; you can redistribute it and/or modify
   17.10 + * it under the terms of the GNU General Public License as published by
   17.11 + * the Free Software Foundation; either version 2 of the License, or
   17.12 + * (at your option) any later version.
   17.13 + *
   17.14 + * This program is distributed in the hope that it will be useful, but
   17.15 + * WITHOUT ANY WARRANTY; without even the implied warranty of
   17.16 + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
   17.17 + * NON INFRINGEMENT.  See the GNU General Public License for more
   17.18 + * details.
   17.19 + *
   17.20 + * You should have received a copy of the GNU General Public License
   17.21 + * along with this program; if not, write to the Free Software
   17.22 + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
   17.23 + *
   17.24 + */
   17.25 +#ifndef _XEN_TOPOLOGY_H
   17.26 +#define _XEN_TOPOLOGY_H
   17.27 +
   17.28 +#include <asm/topology.h>
   17.29 +
   17.30 +#endif /* _XEN_TOPOLOGY_H */