]> xenbits.xensource.com Git - people/liuw/linux.git/commitdiff
x86/xen: introduce vNUMA support for PV guest
authorWei Liu <wei.liu2@citrix.com>
Tue, 4 Nov 2014 12:02:44 +0000 (12:02 +0000)
committerWei Liu <wei.liu2@citrix.com>
Fri, 7 Nov 2014 11:13:33 +0000 (11:13 +0000)
XXX write more commit log.

Signed-off-by: Wei Liu <wei.liu2@citrix.com>
arch/x86/include/asm/xen/vnuma.h [new file with mode: 0644]
arch/x86/mm/numa.c
arch/x86/xen/Makefile
arch/x86/xen/setup.c
arch/x86/xen/vnuma.c [new file with mode: 0644]

diff --git a/arch/x86/include/asm/xen/vnuma.h b/arch/x86/include/asm/xen/vnuma.h
new file mode 100644 (file)
index 0000000..14beb86
--- /dev/null
@@ -0,0 +1,8 @@
+#ifndef _ASM_X86_VNUMA_H
+#define _ASM_X86_VNUMA_H
+
+#ifdef CONFIG_XEN
+int xen_pv_numa_init(void);
+#endif
+
+#endif /* _ASM_X86_VNUMA_H */
index 1a883705a12a8a12410914be93b2ee65807cc423..51aa72f8ca7cb7cd319718b64a19d9ad26145025 100644 (file)
@@ -18,6 +18,8 @@
 #include <asm/acpi.h>
 #include <asm/amd_nb.h>
 
+#include <asm/xen/vnuma.h>
+
 #include "numa_internal.h"
 
 int __initdata numa_off;
@@ -682,6 +684,13 @@ static int __init dummy_numa_init(void)
 void __init x86_numa_init(void)
 {
        if (!numa_off) {
+#ifdef CONFIG_XEN
+               /* This path is for Xen PV guest, HVM will still go
+                * through ACPI.
+                */
+               if (xen_pv_domain() && !numa_init(xen_pv_numa_init))
+                       return;
+#endif
 #ifdef CONFIG_ACPI_NUMA
                if (!numa_init(x86_acpi_numa_init))
                        return;
index 7322755f337af760db6086450591c584c4dcda77..c904329a8d414e51e06839df2f5f939d41f82e88 100644 (file)
@@ -23,3 +23,4 @@ obj-$(CONFIG_XEN_DEBUG_FS)    += debugfs.o
 obj-$(CONFIG_XEN_DOM0)         += apic.o vga.o
 obj-$(CONFIG_SWIOTLB_XEN)      += pci-swiotlb-xen.o
 obj-$(CONFIG_XEN_EFI)          += efi.o
+obj-$(CONFIG_NUMA)             += vnuma.o
index 29834b3fd87f8eba807d5b969f09085f857ceeb9..6612c6a9f9eaf75e027c3e34fad689744ba4f42d 100644 (file)
@@ -20,6 +20,7 @@
 #include <asm/numa.h>
 #include <asm/xen/hypervisor.h>
 #include <asm/xen/hypercall.h>
+#include <asm/xen/vnuma.h>
 
 #include <xen/xen.h>
 #include <xen/page.h>
@@ -857,6 +858,7 @@ void __init xen_arch_setup(void)
        WARN_ON(xen_set_default_idle());
        fiddle_vdso();
 #ifdef CONFIG_NUMA
-       numa_off = 1;
+       if (xen_initial_domain())
+               numa_off = 1;
 #endif
 }
diff --git a/arch/x86/xen/vnuma.c b/arch/x86/xen/vnuma.c
new file mode 100644 (file)
index 0000000..54de286
--- /dev/null
@@ -0,0 +1,95 @@
+#include <linux/memblock.h>
+#include <xen/xen.h>
+#include <xen/interface/memory.h>
+#include <asm/xen/hypercall.h>
+
+
+int __init xen_pv_numa_init(void)
+{
+       int i, j, rc;
+       unsigned int nr_vcpu, nr_dist, nr_memr;
+       phys_addr_t phys_dist = 0, phys_vcpu = 0, phys_memr = 0;
+       unsigned int *virt_dist, *virt_vcpu;
+       struct vmemrange *virt_memr;
+
+       struct vnuma_topology_info numa_topo = {
+               .domid = DOMID_SELF,
+       };
+
+       phys_dist = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+       phys_vcpu = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+       phys_memr = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+
+       if (!phys_dist || !phys_vcpu || !phys_memr) {
+               rc = -ENOMEM;
+               goto out;
+       }
+
+       virt_dist = __va(phys_dist);
+       virt_vcpu = __va(phys_vcpu);
+       virt_memr = __va(phys_memr);
+
+       nr_dist   = PAGE_SIZE / sizeof(unsigned int);
+       nr_vcpu   = PAGE_SIZE / sizeof(unsigned int);
+       nr_memr   = PAGE_SIZE / sizeof(struct vmemrange);
+
+       /* Cap nr_vcpu */
+       nr_vcpu = nr_vcpu < setup_max_cpus ? nr_vcpu : setup_max_cpus;
+       nr_vcpu = nr_vcpu < num_possible_cpus() ?
+               nr_vcpu : num_possible_cpus();
+
+       /* As the nr_dist == nr_vnodes * nr_vnodes (by definition), we
+        * have room for maximum sqrt(nr_dist) vnodes.
+        *
+        * In x86, that is sqrt(PAGE_SIZE / sizeof(unsigned int)) =
+        * sqrt(4096 / 4) = sqrt(1024) = 32.
+        */
+       numa_topo.nr_vnodes = 32;
+       numa_topo.nr_vcpus = nr_vcpu;
+       numa_topo.nr_vmemranges = nr_memr;
+
+       set_xen_guest_handle(numa_topo.vdistance.h, virt_dist);
+       set_xen_guest_handle(numa_topo.vcpu_to_vnode.h, virt_vcpu);
+       set_xen_guest_handle(numa_topo.vmemrange.h, virt_memr);
+
+       rc = HYPERVISOR_memory_op(XENMEM_get_vnuma_info, &numa_topo);
+       if (rc < 0)
+               goto out;
+
+       for (i = 0; i < numa_topo.nr_vmemranges; i++) {
+               struct vmemrange *v = &numa_topo.vmemrange.h[i];
+
+               rc = numa_add_memblk(v->nid, v->start, v->end);
+               if (rc) goto out;
+               node_set(v->nid, numa_nodes_parsed);
+       }
+
+       setup_nr_node_ids();
+
+       BUG_ON(numa_topo.nr_vcpus != nr_vcpu);
+
+       for (i = 0; i < numa_topo.nr_vcpus; i++) {
+               unsigned int *map = numa_topo.vcpu_to_vnode.h;
+
+               set_apicid_to_node(i, map[i]);
+               numa_set_node(i, map[i]);
+               cpumask_set_cpu(i, node_to_cpumask_map[map[i]]);
+       }
+
+       for (i = 0; i < numa_topo.nr_vnodes; i++) {
+               for (j = 0; j < numa_topo.nr_vnodes; j++) {
+                       unsigned int idx = (i * numa_topo.nr_vnodes) + j;
+                       numa_set_distance(i, j, *(virt_dist + idx));
+               }
+       }
+
+       rc = 0;
+
+out:
+       /* Free resources */
+       if (phys_dist) memblock_free(__pa(phys_dist), PAGE_SIZE);
+       if (phys_vcpu) memblock_free(__pa(phys_vcpu), PAGE_SIZE);
+       if (phys_memr) memblock_free(__pa(phys_memr), PAGE_SIZE);
+
+       return rc;
+}