From: Kevin O'Connor Date: Fri, 9 Oct 2009 02:09:02 +0000 (-0400) Subject: Add SRAT ACPI table support. X-Git-Tag: rel-0.5.0~71 X-Git-Url: http://xenbits.xensource.com/gitweb?a=commitdiff_plain;h=590e554a51d44daa2aa3850a0991db2c79675776;p=seabios.git Add SRAT ACPI table support. Take NUMA topology info from the QEMU firmware configuration interface (number of nodes, node for each (V)CPU and amount of memory) and build a SRAT table describing this topology for the guest OS. Handles more than 4 GB of RAM by including a hole for 32bit PCI memory mapping. Qemu pcbios commit 444f1226c11082d374b7e1361c6f5696e479642a Signed-off-by: Gleb Natapov --- diff --git a/src/acpi.c b/src/acpi.c index 9081cdc..37b5340 100644 --- a/src/acpi.c +++ b/src/acpi.c @@ -151,7 +151,7 @@ struct multiple_apic_table } PACKED; -/* Values for Type in APIC_HEADER_DEF */ +/* Values for Type in APIC sub-headers */ #define APIC_PROCESSOR 0 #define APIC_IO 1 @@ -167,7 +167,7 @@ struct multiple_apic_table /* * MADT sub-structures (Follow MULTIPLE_APIC_DESCRIPTION_TABLE) */ -#define APIC_HEADER_DEF /* Common APIC sub-structure header */\ +#define ACPI_SUB_HEADER_DEF /* Common ACPI sub-structure header */\ u8 type; \ u8 length; @@ -175,7 +175,7 @@ struct multiple_apic_table struct madt_processor_apic { - APIC_HEADER_DEF + ACPI_SUB_HEADER_DEF u8 processor_id; /* ACPI processor id */ u8 local_apic_id; /* Processor's local APIC id */ #if 0 @@ -188,7 +188,7 @@ struct madt_processor_apic struct madt_io_apic { - APIC_HEADER_DEF + ACPI_SUB_HEADER_DEF u8 io_apic_id; /* I/O APIC ID */ u8 reserved; /* Reserved - must be zero */ u32 address; /* APIC physical address */ @@ -200,7 +200,7 @@ struct madt_io_apic #define PCI_ISA_IRQ_MASK 0x0e20 struct madt_intsrcovr { - APIC_HEADER_DEF + ACPI_SUB_HEADER_DEF u8 bus; u8 source; u32 gsi; @@ -231,6 +231,43 @@ struct acpi_20_hpet { } PACKED; #define ACPI_HPET_ADDRESS 0xFED00000UL +/* + * SRAT (NUMA topology description) table + */ + +#define SRAT_PROCESSOR 0 +#define SRAT_MEMORY 1 + +struct system_resource_affinity_table +{ + ACPI_TABLE_HEADER_DEF + u32 reserved1; + u32 reserved2[2]; +} PACKED; + +struct srat_processor_affinity +{ + ACPI_SUB_HEADER_DEF + u8 proximity_lo; + u8 local_apic_id; + u32 flags; + u8 local_sapic_eid; + u8 proximity_hi[3]; + u32 reserved; +} PACKED; + +struct srat_memory_affinity +{ + ACPI_SUB_HEADER_DEF + u8 proximity[4]; + u16 reserved1; + u32 base_addr_low,base_addr_high; + u32 length_low,length_high; + u32 reserved2; + u32 flags; + u32 reserved3[2]; +} PACKED; + #include "acpi-dsdt.hex" static inline u16 cpu_to_le16(u16 x) @@ -448,6 +485,115 @@ build_hpet(void) return hpet; } +static void +acpi_build_srat_memory(struct srat_memory_affinity *numamem, + u64 base, u64 len, int node, int enabled) +{ + numamem->type = SRAT_MEMORY; + numamem->length = sizeof(*numamem); + memset (numamem->proximity, 0 ,4); + numamem->proximity[0] = node; + numamem->flags = cpu_to_le32(!!enabled); + numamem->base_addr_low = base & 0xFFFFFFFF; + numamem->base_addr_high = base >> 32; + numamem->length_low = len & 0xFFFFFFFF; + numamem->length_high = len >> 32; +} + +#define SRAT_SIGNATURE 0x54415253 //HPET +static void * +build_srat(void) +{ + int nb_numa_nodes = qemu_cfg_get_numa_nodes(); + + if (nb_numa_nodes == 0) + return NULL; + + u64 *numadata = malloc_tmphigh(sizeof(u64) * (CountCPUs + nb_numa_nodes)); + if (!numadata) { + dprintf(1, "Not enough memory for read numa data from VM!\n"); + return NULL; + } + + qemu_cfg_get_numa_data(numadata, CountCPUs + nb_numa_nodes); + + struct system_resource_affinity_table *srat; + int srat_size = sizeof(*srat) + + sizeof(struct srat_processor_affinity) * CountCPUs + + sizeof(struct srat_memory_affinity) * (nb_numa_nodes + 2); + + srat = malloc_high(srat_size); + if (!srat) { + dprintf(1, "Not enough memory for srat table!\n"); + return NULL; + } + + memset(srat, 0, srat_size); + srat->reserved1=1; + struct srat_processor_affinity *core = (void*)(srat + 1); + int i; + u64 curnode; + + for (i = 0; i < CountCPUs; ++i) { + core->type = SRAT_PROCESSOR; + core->length = sizeof(*core); + core->local_apic_id = i; + curnode = *numadata++; + core->proximity_lo = curnode; + memset(core->proximity_hi, 0, 3); + core->local_sapic_eid = 0; + if (i < CountCPUs) + core->flags = cpu_to_le32(1); + else + core->flags = 0; + core++; + } + + + /* the memory map is a bit tricky, it contains at least one hole + * from 640k-1M and possibly another one from 3.5G-4G. + */ + struct srat_memory_affinity *numamem = (void*)core; + int slots = 0; + u64 mem_len, mem_base, next_base = 0; + + acpi_build_srat_memory(numamem, 0, 640*1024, 0, 1); + next_base = 1024 * 1024; + numamem++; + slots++; + for (i = 1; i < nb_numa_nodes + 1; ++i) { + mem_base = next_base; + mem_len = *numadata++; + if (i == 1) + mem_len -= 1024 * 1024; + next_base = mem_base + mem_len; + + /* Cut out the PCI hole */ + if (mem_base <= RamSize && next_base > RamSize) { + mem_len -= next_base - RamSize; + if (mem_len > 0) { + acpi_build_srat_memory(numamem, mem_base, mem_len, i-1, 1); + numamem++; + slots++; + } + mem_base = 1ULL << 32; + mem_len = next_base - RamSize; + next_base += (1ULL << 32) - RamSize; + } + acpi_build_srat_memory(numamem, mem_base, mem_len, i-1, 1); + numamem++; + slots++; + } + for (; slots < nb_numa_nodes + 2; slots++) { + acpi_build_srat_memory(numamem, 0, 0, 0, 0); + numamem++; + } + + build_header((void*)srat, SRAT_SIGNATURE, srat_size, 1); + + return srat; +} + struct rsdp_descriptor *RsdpAddr; #define MAX_ACPI_TABLES 20 @@ -487,6 +633,7 @@ acpi_bios_init(void) ACPI_INIT_TABLE(build_ssdt()); ACPI_INIT_TABLE(build_madt()); ACPI_INIT_TABLE(build_hpet()); + ACPI_INIT_TABLE(build_srat()); u16 i, external_tables = qemu_cfg_acpi_additional_tables(); diff --git a/src/paravirt.c b/src/paravirt.c index 8c08ce7..8fbeb9c 100644 --- a/src/paravirt.c +++ b/src/paravirt.c @@ -265,3 +265,19 @@ int qemu_cfg_smbios_load_external(int type, char **p, unsigned *nr_structs, return 0; } +int qemu_cfg_get_numa_nodes(void) +{ + u64 cnt; + + qemu_cfg_read_entry(&cnt, QEMU_CFG_NUMA, sizeof(cnt)); + + return (int)cnt; +} + +void qemu_cfg_get_numa_data(u64 *data, int n) +{ + int i; + + for (i = 0; i < n; i++) + qemu_cfg_read((u8*)(data + i), sizeof(u64)); +} diff --git a/src/paravirt.h b/src/paravirt.h index 2b2f314..04a6907 100644 --- a/src/paravirt.h +++ b/src/paravirt.h @@ -49,5 +49,7 @@ u16 qemu_cfg_smbios_entries(void); size_t qemu_cfg_smbios_load_field(int type, size_t offset, void *addr); int qemu_cfg_smbios_load_external(int type, char **p, unsigned *nr_structs, unsigned *max_struct_size, char *end); +int qemu_cfg_get_numa_nodes(void); +void qemu_cfg_get_numa_data(u64 *data, int n); #endif