#define ARCH_HAS_POWER_INIT 1
extern s8 acpi_numa;
-#define NR_NODE_MEMBLKS (MAX_NUMNODES*2)
extern struct acpi_sleep_info acpi_sinfo;
#define acpi_video_flags bootsym(video_flags)
ASSERT(((_p)->count_info & PGC_count_mask) != 0); \
ASSERT(page_get_owner(_p) == (_d))
-extern paddr_t mem_hotplug;
-
/******************************************************************************
* With shadow pagetables, the different kinds of address start
* to get get confusing.
extern int srat_rev;
extern nodeid_t pxm_to_node(unsigned int pxm);
+extern unsigned int numa_node_to_arch_nid(nodeid_t n);
#define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
#define arch_want_default_dmazone() (num_online_nodes() > 1)
-extern int valid_numa_range(paddr_t start, paddr_t end, nodeid_t node);
-
void srat_parse_regions(paddr_t addr);
extern u8 __node_distance(nodeid_t a, nodeid_t b);
unsigned int arch_get_dma_bitsize(void);
l1_pgentry_t __section(".bss.page_aligned") __aligned(PAGE_SIZE)
l1_fixmap_x[L1_PAGETABLE_ENTRIES];
-paddr_t __read_mostly mem_hotplug;
-
/* Frame table size in pages. */
unsigned long max_page;
unsigned long total_pages;
return acpi_numa < 0;
}
+bool __init arch_numa_unavailable(void)
+{
+ return acpi_numa <= 0;
+}
+
/*
* Setup early cpu_to_node.
*
static struct acpi_table_slit *__read_mostly acpi_slit;
-static nodemask_t memory_nodes_parsed __initdata;
-static nodemask_t processor_nodes_parsed __initdata;
-static struct node nodes[MAX_NUMNODES] __initdata;
-
struct pxm2node {
unsigned pxm;
nodeid_t node;
static struct pxm2node __read_mostly pxm2node[MAX_NUMNODES] =
{ [0 ... MAX_NUMNODES - 1] = {.node = NUMA_NO_NODE} };
-static unsigned node_to_pxm(nodeid_t n);
-
-static int num_node_memblks;
-static struct node node_memblk_range[NR_NODE_MEMBLKS];
-static nodeid_t memblk_nodeid[NR_NODE_MEMBLKS];
-static __initdata DECLARE_BITMAP(memblk_hotplug, NR_NODE_MEMBLKS);
-
-enum conflicts {
- NO_CONFLICT,
- OVERLAP,
- INTERLEAVE,
-};
-
static inline bool node_found(unsigned idx, unsigned pxm)
{
return ((pxm2node[idx].pxm == pxm) &&
return node;
}
-int valid_numa_range(paddr_t start, paddr_t end, nodeid_t node)
-{
- int i;
-
- for (i = 0; i < num_node_memblks; i++) {
- struct node *nd = &node_memblk_range[i];
-
- if (nd->start <= start && nd->end >= end &&
- memblk_nodeid[i] == node)
- return 1;
- }
-
- return 0;
-}
-
-static
-enum conflicts __init conflicting_memblks(nodeid_t nid, paddr_t start,
- paddr_t end, paddr_t nd_start,
- paddr_t nd_end, unsigned int *mblkid)
-{
- unsigned int i;
-
- /*
- * Scan all recorded nodes' memory blocks to check conflicts:
- * Overlap or interleave.
- */
- for (i = 0; i < num_node_memblks; i++) {
- struct node *nd = &node_memblk_range[i];
-
- *mblkid = i;
-
- /* Skip 0 bytes node memory block. */
- if (nd->start == nd->end)
- continue;
- /*
- * Use memblk range to check memblk overlaps, include the
- * self-overlap case. As nd's range is non-empty, the special
- * case "nd->end == end && nd->start == start" also can be covered.
- */
- if (nd->end > start && nd->start < end)
- return OVERLAP;
-
- /*
- * Use node memory range to check whether new range contains
- * memory from other nodes - interleave check. We just need
- * to check full contains situation. Because overlaps have
- * been checked above.
- */
- if (nid != memblk_nodeid[i] &&
- nd->start >= nd_start && nd->end <= nd_end)
- return INTERLEAVE;
- }
-
- return NO_CONFLICT;
-}
-
-static __init void cutoff_node(int i, paddr_t start, paddr_t end)
-{
- struct node *nd = &nodes[i];
- if (nd->start < start) {
- nd->start = start;
- if (nd->end < nd->start)
- nd->start = nd->end;
- }
- if (nd->end > end) {
- nd->end = end;
- if (nd->start > nd->end)
- nd->start = nd->end;
- }
-}
-
-static __init void bad_srat(void)
+void __init numa_fw_bad(void)
{
int i;
printk(KERN_ERR "SRAT: SRAT not used.\n");
if (numa_disabled())
return;
if (pa->header.length < sizeof(struct acpi_srat_x2apic_cpu_affinity)) {
- bad_srat();
+ numa_fw_bad();
return;
}
if (!(pa->flags & ACPI_SRAT_CPU_ENABLED))
pxm = pa->proximity_domain;
node = setup_node(pxm);
if (node == NUMA_NO_NODE) {
- bad_srat();
+ numa_fw_bad();
return;
}
apicid_to_node[pa->apic_id] = node;
- node_set(node, processor_nodes_parsed);
+ numa_set_processor_nodes_parsed(node);
acpi_numa = 1;
if (opt_acpi_verbose)
if (numa_disabled())
return;
if (pa->header.length != sizeof(struct acpi_srat_cpu_affinity)) {
- bad_srat();
+ numa_fw_bad();
return;
}
if (!(pa->flags & ACPI_SRAT_CPU_ENABLED))
}
node = setup_node(pxm);
if (node == NUMA_NO_NODE) {
- bad_srat();
+ numa_fw_bad();
return;
}
apicid_to_node[pa->apic_id] = node;
- node_set(node, processor_nodes_parsed);
+ numa_set_processor_nodes_parsed(node);
acpi_numa = 1;
if (opt_acpi_verbose)
void __init
acpi_numa_memory_affinity_init(const struct acpi_srat_mem_affinity *ma)
{
- struct node *nd;
- paddr_t nd_start, nd_end;
- paddr_t start, end;
unsigned pxm;
nodeid_t node;
- unsigned int i;
- bool next = false;
if (numa_disabled())
return;
if (ma->header.length != sizeof(struct acpi_srat_mem_affinity)) {
- bad_srat();
+ numa_fw_bad();
return;
}
if (!(ma->flags & ACPI_SRAT_MEM_ENABLED))
return;
- start = ma->base_address;
- end = start + ma->length;
/* Supplement the heuristics in l1tf_calculations(). */
- l1tf_safe_maddr = max(l1tf_safe_maddr, ROUNDUP(end, PAGE_SIZE));
+ l1tf_safe_maddr = max(l1tf_safe_maddr,
+ ROUNDUP(ma->base_address + ma->length,
+ PAGE_SIZE));
- if (num_node_memblks >= NR_NODE_MEMBLKS)
- {
+ if (!numa_memblks_available()) {
dprintk(XENLOG_WARNING,
- "Too many numa entry, try bigger NR_NODE_MEMBLKS \n");
- bad_srat();
+ "Too many numa entries, try bigger NR_NODE_MEMBLKS!\n");
+ numa_fw_bad();
return;
}
pxm &= 0xff;
node = setup_node(pxm);
if (node == NUMA_NO_NODE) {
- bad_srat();
+ numa_fw_bad();
return;
}
/*
- * For the node that already has some memory blocks, we will
- * expand the node memory range temporarily to check memory
- * interleaves with other nodes. We will not use this node
- * temp memory range to check overlaps, because it will mask
- * the overlaps in same node.
- *
- * Node with 0 bytes memory doesn't need this expandsion.
+ * In an extremely unlikely case, srat_parse_regions might not
+ * be called. So set the variable here just in case.
*/
- nd_start = start;
- nd_end = end;
- nd = &nodes[node];
- if (nd->start != nd->end) {
- if (nd_start > nd->start)
- nd_start = nd->start;
-
- if (nd_end < nd->end)
- nd_end = nd->end;
- }
-
- /* It is fine to add this area to the nodes data it will be used later*/
- switch (conflicting_memblks(node, start, end, nd_start, nd_end, &i)) {
- case OVERLAP:
- if (memblk_nodeid[i] == node) {
- bool mismatch = !(ma->flags &
- ACPI_SRAT_MEM_HOT_PLUGGABLE) !=
- !test_bit(i, memblk_hotplug);
-
- printk("%sSRAT: PXM %u [%"PRIpaddr", %"PRIpaddr"] overlaps with itself [%"PRIpaddr", %"PRIpaddr"]\n",
- mismatch ? KERN_ERR : KERN_WARNING, pxm, start,
- end - 1, node_memblk_range[i].start,
- node_memblk_range[i].end - 1);
- if (mismatch) {
- bad_srat();
- return;
- }
- break;
- }
-
- printk(KERN_ERR
- "SRAT: PXM %u [%"PRIpaddr", %"PRIpaddr"] overlaps with PXM %u [%"PRIpaddr", %"PRIpaddr"]\n",
- pxm, start, end - 1, node_to_pxm(memblk_nodeid[i]),
- node_memblk_range[i].start,
- node_memblk_range[i].end - 1);
- bad_srat();
- return;
-
- case INTERLEAVE:
- printk(KERN_ERR
- "SRATļ¼ PXM %u: [%"PRIpaddr", %"PRIpaddr"] interleaves with PXM %u memblk [%"PRIpaddr", %"PRIpaddr"]\n",
- pxm, nd_start, nd_end - 1, node_to_pxm(memblk_nodeid[i]),
- node_memblk_range[i].start, node_memblk_range[i].end - 1);
- bad_srat();
- return;
-
- case NO_CONFLICT:
- break;
- }
-
- if (!(ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE)) {
- node_set(node, memory_nodes_parsed);
- nd->start = nd_start;
- nd->end = nd_end;
- }
-
- printk(KERN_INFO "SRAT: Node %u PXM %u [%"PRIpaddr", %"PRIpaddr"]%s\n",
- node, pxm, start, end - 1,
- ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE ? " (hotplug)" : "");
-
- /* Keep node_memblk_range[] sorted by address. */
- for (i = 0; i < num_node_memblks; ++i)
- if (node_memblk_range[i].start > start ||
- (node_memblk_range[i].start == start &&
- node_memblk_range[i].end > end))
- break;
-
- memmove(&node_memblk_range[i + 1], &node_memblk_range[i],
- (num_node_memblks - i) * sizeof(*node_memblk_range));
- node_memblk_range[i].start = start;
- node_memblk_range[i].end = end;
-
- memmove(&memblk_nodeid[i + 1], &memblk_nodeid[i],
- (num_node_memblks - i) * sizeof(*memblk_nodeid));
- memblk_nodeid[i] = node;
-
- if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) {
- next = true;
- if (end > mem_hotplug)
- mem_hotplug = end;
- }
- for (; i <= num_node_memblks; ++i) {
- bool prev = next;
-
- next = test_bit(i, memblk_hotplug);
- if (prev)
- __set_bit(i, memblk_hotplug);
- else
- __clear_bit(i, memblk_hotplug);
- }
-
- num_node_memblks++;
-}
-
-/* Sanity check to catch more bad SRATs (they are amazingly common).
- Make sure the PXMs cover all memory. */
-static int __init nodes_cover_memory(void)
-{
- unsigned int i;
-
- for (i = 0; ; i++) {
- int err;
- unsigned int j;
- bool found;
- paddr_t start, end;
-
- /* Try to loop memory map from index 0 to end to get RAM ranges. */
- err = arch_get_ram_range(i, &start, &end);
-
- /* Reached the end of the memory map? */
- if (err == -ENOENT)
- break;
-
- /* Skip non-RAM entries. */
- if (err)
- continue;
-
- do {
- found = false;
- for_each_node_mask(j, memory_nodes_parsed)
- if (start < nodes[j].end
- && end > nodes[j].start) {
- if (start >= nodes[j].start) {
- start = nodes[j].end;
- found = true;
- }
- if (end <= nodes[j].end) {
- end = nodes[j].start;
- found = true;
- }
- }
- } while (found && start < end);
-
- if (start < end) {
- printk(KERN_ERR "NUMA: No NODE for RAM range: "
- "[%"PRIpaddr", %"PRIpaddr"]\n", start, end - 1);
- return 0;
- }
- }
- return 1;
+ numa_fw_nid_name = "PXM";
+ if (!numa_update_node_memblks(node, pxm, ma->base_address, ma->length,
+ ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE))
+ numa_fw_bad();
}
void __init acpi_numa_arch_fixup(void) {}
acpi_table_parse(ACPI_SIG_SRAT, acpi_parse_srat))
return;
+ /* Set "PXM" as early as feasible. */
+ numa_fw_nid_name = "PXM";
srat_region_mask = pdx_init_mask(addr);
acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY,
srat_parse_region, 0);
pfn_pdx_hole_setup(mask >> PAGE_SHIFT);
}
-/* Use discovered information to actually set up the nodes. */
-int __init numa_process_nodes(paddr_t start, paddr_t end)
-{
- int i;
- nodemask_t all_nodes_parsed;
-
- /* First clean up the node list */
- for (i = 0; i < MAX_NUMNODES; i++)
- cutoff_node(i, start, end);
-
- if (acpi_numa <= 0)
- return -1;
-
- if (!nodes_cover_memory()) {
- bad_srat();
- return -1;
- }
-
- memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks,
- memblk_nodeid);
-
- if (memnode_shift < 0) {
- printk(KERN_ERR
- "SRAT: No NUMA node hash function found. Contact maintainer\n");
- bad_srat();
- return -1;
- }
-
- nodes_or(all_nodes_parsed, memory_nodes_parsed, processor_nodes_parsed);
-
- /* Finally register nodes */
- for_each_node_mask(i, all_nodes_parsed)
- {
- uint64_t size = nodes[i].end - nodes[i].start;
-
- if ( size == 0 )
- printk(KERN_INFO "SRAT: node %u has no memory\n", i);
-
- setup_node_bootmem(i, nodes[i].start, nodes[i].end);
- }
- for (i = 0; i < nr_cpu_ids; i++) {
- if (cpu_to_node[i] == NUMA_NO_NODE)
- continue;
- if (!nodemask_test(cpu_to_node[i], &processor_nodes_parsed))
- numa_set_node(i, NUMA_NO_NODE);
- }
- numa_init_array();
- return 0;
-}
-
-static unsigned node_to_pxm(nodeid_t n)
+unsigned int numa_node_to_arch_nid(nodeid_t n)
{
- unsigned i;
+ unsigned int i;
if ((n < ARRAY_SIZE(pxm2node)) && (pxm2node[n].node == n))
return pxm2node[n].pxm;
if (!acpi_slit)
return a == b ? 10 : 20;
- index = acpi_slit->locality_count * node_to_pxm(a);
- slit_val = acpi_slit->entry[index + node_to_pxm(b)];
+ index = acpi_slit->locality_count * numa_node_to_arch_nid(a);
+ slit_val = acpi_slit->entry[index + numa_node_to_arch_nid(b)];
/* ACPI defines 0xff as an unreachable node and 0-9 are undefined */
if ((slit_val == 0xff) || (slit_val <= 9))
#include <xen/sched.h>
#include <xen/softirq.h>
+static nodemask_t __initdata processor_nodes_parsed;
+static nodemask_t __initdata memory_nodes_parsed;
+static struct node __initdata nodes[MAX_NUMNODES];
+
+static unsigned int __ro_after_init num_node_memblks;
+static struct node __ro_after_init node_memblk_range[NR_NODE_MEMBLKS];
+static nodeid_t __ro_after_init memblk_nodeid[NR_NODE_MEMBLKS];
+static __initdata DECLARE_BITMAP(memblk_hotplug, NR_NODE_MEMBLKS);
+
+enum conflicts {
+ NO_CONFLICT,
+ OVERLAP,
+ INTERLEAVE,
+};
+
struct node_data __ro_after_init node_data[MAX_NUMNODES];
/* Mapping from pdx to node id */
bool __ro_after_init numa_off;
+const char *__ro_after_init numa_fw_nid_name = "???";
+
bool numa_disabled(void)
{
return numa_off || arch_numa_disabled();
}
+void __init numa_set_processor_nodes_parsed(nodeid_t node)
+{
+ node_set(node, processor_nodes_parsed);
+}
+
+bool valid_numa_range(paddr_t start, paddr_t end, nodeid_t node)
+{
+ unsigned int i;
+
+ for ( i = 0; i < num_node_memblks; i++ )
+ {
+ const struct node *nd = &node_memblk_range[i];
+
+ if ( nd->start <= start && nd->end >= end &&
+ memblk_nodeid[i] == node )
+ return true;
+ }
+
+ return false;
+}
+
+static enum conflicts __init conflicting_memblks(
+ nodeid_t nid, paddr_t start, paddr_t end, paddr_t nd_start,
+ paddr_t nd_end, unsigned int *mblkid)
+{
+ unsigned int i;
+
+ /*
+ * Scan all recorded nodes' memory blocks to check conflicts:
+ * Overlap or interleave.
+ */
+ for ( i = 0; i < num_node_memblks; i++ )
+ {
+ const struct node *nd = &node_memblk_range[i];
+
+ *mblkid = i;
+
+ /* Skip 0 bytes node memory block. */
+ if ( nd->start == nd->end )
+ continue;
+ /*
+ * Use memblk range to check memblk overlaps, include the
+ * self-overlap case. As nd's range is non-empty, the special
+ * case "nd->end == end && nd->start == start" also can be covered.
+ */
+ if ( nd->end > start && nd->start < end )
+ return OVERLAP;
+
+ /*
+ * Use node memory range to check whether new range contains
+ * memory from other nodes - interleave check. We just need
+ * to check full contains situation. Because overlaps have
+ * been checked above.
+ */
+ if ( nid != memblk_nodeid[i] &&
+ nd->start >= nd_start && nd->end <= nd_end )
+ return INTERLEAVE;
+ }
+
+ return NO_CONFLICT;
+}
+
+static void __init cutoff_node(nodeid_t i, paddr_t start, paddr_t end)
+{
+ struct node *nd = &nodes[i];
+
+ if ( nd->start < start )
+ {
+ nd->start = start;
+ if ( nd->end < nd->start )
+ nd->start = nd->end;
+ }
+
+ if ( nd->end > end )
+ {
+ nd->end = end;
+ if ( nd->start > nd->end )
+ nd->start = nd->end;
+ }
+}
+
+bool __init numa_memblks_available(void)
+{
+ return num_node_memblks < NR_NODE_MEMBLKS;
+}
+
+/*
+ * This function will be called by NUMA memory affinity initialization to
+ * update NUMA node's memory range. In this function, we assume all memory
+ * regions belonging to a single node are in one chunk. Holes (or MMIO
+ * ranges) between them will be included in the node.
+ *
+ * So in numa_update_node_memblks, if there are multiple banks for each
+ * node, start and end are stretched to cover the holes between them, and
+ * it works as long as memory banks of different NUMA nodes don't interleave.
+ */
+bool __init numa_update_node_memblks(nodeid_t node, unsigned int arch_nid,
+ paddr_t start, paddr_t size, bool hotplug)
+{
+ unsigned int i;
+ bool next = false;
+ paddr_t end = start + size;
+ paddr_t nd_start = start;
+ paddr_t nd_end = end;
+ struct node *nd = &nodes[node];
+
+ /*
+ * For the node that already has some memory blocks, we will
+ * expand the node memory range temporarily to check memory
+ * interleaves with other nodes. We will not use this node
+ * temp memory range to check overlaps, because it will mask
+ * the overlaps in same node.
+ *
+ * Node with 0 bytes memory doesn't need this expansion.
+ */
+ if ( nd->start != nd->end )
+ {
+ if ( nd_start > nd->start )
+ nd_start = nd->start;
+
+ if ( nd_end < nd->end )
+ nd_end = nd->end;
+ }
+
+ /* It is fine to add this area to the nodes data it will be used later */
+ switch ( conflicting_memblks(node, start, end, nd_start, nd_end, &i) )
+ {
+ case OVERLAP:
+ if ( memblk_nodeid[i] == node )
+ {
+ bool mismatch = !hotplug != !test_bit(i, memblk_hotplug);
+
+ printk("%sNUMA: %s %u [%"PRIpaddr", %"PRIpaddr"] overlaps with itself [%"PRIpaddr", %"PRIpaddr"]\n",
+ mismatch ? KERN_ERR : KERN_WARNING, numa_fw_nid_name,
+ arch_nid, start, end - 1,
+ node_memblk_range[i].start, node_memblk_range[i].end - 1);
+ if ( mismatch )
+ return false;
+ break;
+ }
+
+ printk(KERN_ERR
+ "NUMA: %s %u [%"PRIpaddr", %"PRIpaddr"] overlaps with %s %u [%"PRIpaddr", %"PRIpaddr"]\n",
+ numa_fw_nid_name, arch_nid, start, end - 1, numa_fw_nid_name,
+ numa_node_to_arch_nid(memblk_nodeid[i]),
+ node_memblk_range[i].start, node_memblk_range[i].end - 1);
+ return false;
+
+ case INTERLEAVE:
+ printk(KERN_ERR
+ "NUMAļ¼ %s %u: [%"PRIpaddr", %"PRIpaddr"] interleaves with %s %u memblk [%"PRIpaddr", %"PRIpaddr"]\n",
+ numa_fw_nid_name, arch_nid, nd_start, nd_end - 1,
+ numa_fw_nid_name, numa_node_to_arch_nid(memblk_nodeid[i]),
+ node_memblk_range[i].start, node_memblk_range[i].end - 1);
+ return false;
+
+ case NO_CONFLICT:
+ break;
+ }
+
+ if ( !hotplug )
+ {
+ node_set(node, memory_nodes_parsed);
+ nd->start = nd_start;
+ nd->end = nd_end;
+ }
+
+ printk(KERN_INFO "NUMA: Node %u %s %u [%"PRIpaddr", %"PRIpaddr"]%s\n",
+ node, numa_fw_nid_name, arch_nid, start, end - 1,
+ hotplug ? " (hotplug)" : "");
+
+ /* Keep node_memblk_range[] sorted by address. */
+ for ( i = 0; i < num_node_memblks; ++i )
+ if ( node_memblk_range[i].start > start ||
+ (node_memblk_range[i].start == start &&
+ node_memblk_range[i].end > end) )
+ break;
+
+ memmove(&node_memblk_range[i + 1], &node_memblk_range[i],
+ (num_node_memblks - i) * sizeof(*node_memblk_range));
+ node_memblk_range[i].start = start;
+ node_memblk_range[i].end = end;
+
+ memmove(&memblk_nodeid[i + 1], &memblk_nodeid[i],
+ (num_node_memblks - i) * sizeof(*memblk_nodeid));
+ memblk_nodeid[i] = node;
+
+ if ( hotplug )
+ {
+ next = true;
+ if ( end > mem_hotplug )
+ mem_hotplug = end;
+ }
+
+ for ( ; i <= num_node_memblks; ++i )
+ {
+ bool prev = next;
+
+ next = test_bit(i, memblk_hotplug);
+ if ( prev )
+ __set_bit(i, memblk_hotplug);
+ else
+ __clear_bit(i, memblk_hotplug);
+ }
+
+ num_node_memblks++;
+
+ return true;
+}
+
+/*
+ * Sanity check to catch more bad SRATs (they are amazingly common).
+ * Make sure the PXMs cover all memory.
+ */
+static bool __init nodes_cover_memory(void)
+{
+ unsigned int i;
+
+ for ( i = 0; ; i++ )
+ {
+ int err;
+ unsigned int j;
+ bool found;
+ paddr_t start, end;
+
+ /* Try to loop memory map from index 0 to end to get RAM ranges. */
+ err = arch_get_ram_range(i, &start, &end);
+
+ /* Reached the end of the memory map? */
+ if ( err == -ENOENT )
+ break;
+
+ /* Skip non-RAM entries. */
+ if ( err )
+ continue;
+
+ do {
+ found = false;
+ for_each_node_mask ( j, memory_nodes_parsed )
+ if ( start < nodes[j].end && end > nodes[j].start )
+ {
+ if ( start >= nodes[j].start )
+ {
+ start = nodes[j].end;
+ found = true;
+ }
+
+ if ( end <= nodes[j].end )
+ {
+ end = nodes[j].start;
+ found = true;
+ }
+ }
+ } while ( found && start < end );
+
+ if ( start < end )
+ {
+ printk(KERN_ERR "NUMA: No node for RAM range: "
+ "[%"PRIpaddr", %"PRIpaddr"]\n", start, end - 1);
+ return false;
+ }
+ }
+
+ return true;
+}
+
+/* Use discovered information to actually set up the nodes. */
+static bool __init numa_process_nodes(paddr_t start, paddr_t end)
+{
+ int ret;
+ unsigned int i;
+ nodemask_t all_nodes_parsed;
+
+ /* First clean up the node list */
+ for ( i = 0; i < MAX_NUMNODES; i++ )
+ cutoff_node(i, start, end);
+
+ /* When numa is on and has data, we can start to process numa nodes. */
+ if ( arch_numa_unavailable() )
+ return false;
+
+ if ( !nodes_cover_memory() )
+ {
+ numa_fw_bad();
+ return false;
+ }
+
+ ret = compute_hash_shift(node_memblk_range, num_node_memblks,
+ memblk_nodeid);
+ if ( ret < 0 )
+ {
+ printk(KERN_ERR
+ "NUMA: No NUMA node hash function found. Contact maintainer\n");
+ numa_fw_bad();
+ return false;
+ }
+ memnode_shift = ret;
+
+ nodes_or(all_nodes_parsed, memory_nodes_parsed, processor_nodes_parsed);
+
+ /* Finally register nodes */
+ for_each_node_mask ( i, all_nodes_parsed )
+ {
+ if ( nodes[i].end == nodes[i].start )
+ printk(KERN_INFO "NUMA: node %u has no memory\n", i);
+
+ setup_node_bootmem(i, nodes[i].start, nodes[i].end);
+ }
+
+ for ( i = 0; i < nr_cpu_ids; i++ )
+ {
+ if ( cpu_to_node[i] == NUMA_NO_NODE )
+ continue;
+ if ( !nodemask_test(cpu_to_node[i], &processor_nodes_parsed) )
+ numa_set_node(i, NUMA_NO_NODE);
+ }
+
+ numa_init_array();
+
+ return true;
+}
+
/*
* Given a shift value, try to populate memnodemap[]
* Returns :
#endif
#ifdef CONFIG_NUMA
- if ( !numa_off && !numa_process_nodes(start, end) )
+ if ( !numa_off && numa_process_nodes(start, end) )
return;
#endif
#define PGT_TYPE_INFO_INITIALIZER 0
#endif
+paddr_t __ro_after_init mem_hotplug;
+
/*
* Comma-separated list of hexadecimal page numbers containing bad bytes.
* e.g. 'badpage=0x3f45,0x8a321'.
/* Dump info to serial console */
void arch_dump_shared_mem_info(void);
+extern paddr_t mem_hotplug;
+
/*
* Extra fault info types which are used to further describe
* the source of an access violation.
#define NUMA_NO_DISTANCE 0xFF
#define MAX_NUMNODES (1 << NODES_SHIFT)
+#define NR_NODE_MEMBLKS (MAX_NUMNODES * 2)
#define vcpu_to_node(v) (cpu_to_node((v)->processor))
unsigned int numnodes, const nodeid_t *nodeids);
extern bool numa_off;
+extern const char *numa_fw_nid_name;
extern void numa_add_cpu(unsigned int cpu);
extern void numa_init_array(void);
extern void numa_set_node(unsigned int cpu, nodeid_t node);
extern void numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn);
-extern int numa_process_nodes(paddr_t start, paddr_t end);
+extern void numa_fw_bad(void);
extern int arch_numa_setup(const char *opt);
+extern bool arch_numa_unavailable(void);
extern bool arch_numa_disabled(void);
extern void setup_node_bootmem(nodeid_t nodeid, paddr_t start, paddr_t end);
*/
extern int arch_get_ram_range(unsigned int idx,
paddr_t *start, paddr_t *end);
+extern bool valid_numa_range(paddr_t start, paddr_t end, nodeid_t node);
+extern bool numa_memblks_available(void);
+extern bool numa_update_node_memblks(nodeid_t node, unsigned int arch_nid,
+ paddr_t start, paddr_t size, bool hotplug);
+extern void numa_set_processor_nodes_parsed(nodeid_t node);
#endif