ia64/linux-2.6.18-xen.hg

view drivers/acpi/numa.c @ 897:329ea0ccb344

balloon: try harder to balloon up under memory pressure.

Currently if the balloon driver is unable to increase the guest's
reservation it assumes the failure was due to reaching its full
allocation, gives up on the ballooning operation and records the limit
it reached as the "hard limit". The driver will not try again until
the target is set again (even to the same value).

However it is possible that ballooning has in fact failed due to
memory pressure in the host and therefore it is desirable to keep
attempting to reach the target in case memory becomes available. The
most likely scenario is that some guests are ballooning down while
others are ballooning up and therefore there is temporary memory
pressure while things stabilise. You would not expect a well behaved
toolstack to ask a domain to balloon to more than its allocation nor
would you expect it to deliberately over-commit memory by setting
balloon targets which exceed the total host memory.

This patch drops the concept of a hard limit and causes the balloon
driver to retry increasing the reservation on a timer in the same
manner as when decreasing the reservation.

Also if we partially succeed in increasing the reservation
(i.e. receive less pages than we asked for) then we may as well keep
those pages rather than returning them to Xen.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 05 14:01:20 2009 +0100 (2009-06-05)
parents 831230e53067
children
line source
1 /*
2 * acpi_numa.c - ACPI NUMA support
3 *
4 * Copyright (C) 2002 Takayoshi Kochi <t-kochi@bq.jp.nec.com>
5 *
6 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 *
22 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
23 *
24 */
25 #include <linux/module.h>
26 #include <linux/init.h>
27 #include <linux/kernel.h>
28 #include <linux/types.h>
29 #include <linux/errno.h>
30 #include <linux/acpi.h>
31 #include <acpi/acpi_bus.h>
32 #include <acpi/acmacros.h>
34 #define ACPI_NUMA 0x80000000
35 #define _COMPONENT ACPI_NUMA
36 ACPI_MODULE_NAME("numa")
38 static nodemask_t nodes_found_map = NODE_MASK_NONE;
39 #define PXM_INVAL -1
40 #define NID_INVAL -1
42 /* maps to convert between proximity domain and logical node ID */
43 int __cpuinitdata pxm_to_node_map[MAX_PXM_DOMAINS]
44 = { [0 ... MAX_PXM_DOMAINS - 1] = NID_INVAL };
45 int __cpuinitdata node_to_pxm_map[MAX_NUMNODES]
46 = { [0 ... MAX_NUMNODES - 1] = PXM_INVAL };
48 extern int __init acpi_table_parse_madt_family(enum acpi_table_id id,
49 unsigned long madt_size,
50 int entry_id,
51 acpi_madt_entry_handler handler,
52 unsigned int max_entries);
54 int __cpuinit pxm_to_node(int pxm)
55 {
56 if (pxm < 0)
57 return NID_INVAL;
58 return pxm_to_node_map[pxm];
59 }
61 int __cpuinit node_to_pxm(int node)
62 {
63 if (node < 0)
64 return PXM_INVAL;
65 return node_to_pxm_map[node];
66 }
68 int __cpuinit acpi_map_pxm_to_node(int pxm)
69 {
70 int node = pxm_to_node_map[pxm];
72 if (node < 0){
73 if (nodes_weight(nodes_found_map) >= MAX_NUMNODES)
74 return NID_INVAL;
75 node = first_unset_node(nodes_found_map);
76 pxm_to_node_map[pxm] = node;
77 node_to_pxm_map[node] = pxm;
78 node_set(node, nodes_found_map);
79 }
81 return node;
82 }
84 void __cpuinit acpi_unmap_pxm_to_node(int node)
85 {
86 int pxm = node_to_pxm_map[node];
87 pxm_to_node_map[pxm] = NID_INVAL;
88 node_to_pxm_map[node] = PXM_INVAL;
89 node_clear(node, nodes_found_map);
90 }
92 void __init acpi_table_print_srat_entry(acpi_table_entry_header * header)
93 {
95 ACPI_FUNCTION_NAME("acpi_table_print_srat_entry");
97 if (!header)
98 return;
100 switch (header->type) {
102 case ACPI_SRAT_PROCESSOR_AFFINITY:
103 #ifdef ACPI_DEBUG_OUTPUT
104 {
105 struct acpi_table_processor_affinity *p =
106 (struct acpi_table_processor_affinity *)header;
107 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
108 "SRAT Processor (id[0x%02x] eid[0x%02x]) in proximity domain %d %s\n",
109 p->apic_id, p->lsapic_eid,
110 p->proximity_domain,
111 p->flags.
112 enabled ? "enabled" : "disabled"));
113 }
114 #endif /* ACPI_DEBUG_OUTPUT */
115 break;
117 case ACPI_SRAT_MEMORY_AFFINITY:
118 #ifdef ACPI_DEBUG_OUTPUT
119 {
120 struct acpi_table_memory_affinity *p =
121 (struct acpi_table_memory_affinity *)header;
122 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
123 "SRAT Memory (0x%08x%08x length 0x%08x%08x type 0x%x) in proximity domain %d %s%s\n",
124 p->base_addr_hi, p->base_addr_lo,
125 p->length_hi, p->length_lo,
126 p->memory_type, p->proximity_domain,
127 p->flags.
128 enabled ? "enabled" : "disabled",
129 p->flags.
130 hot_pluggable ? " hot-pluggable" :
131 ""));
132 }
133 #endif /* ACPI_DEBUG_OUTPUT */
134 break;
136 default:
137 printk(KERN_WARNING PREFIX
138 "Found unsupported SRAT entry (type = 0x%x)\n",
139 header->type);
140 break;
141 }
142 }
144 static int __init acpi_parse_slit(unsigned long phys_addr, unsigned long size)
145 {
146 struct acpi_table_slit *slit;
147 u32 localities;
149 if (!phys_addr || !size)
150 return -EINVAL;
152 slit = (struct acpi_table_slit *)__va(phys_addr);
154 /* downcast just for %llu vs %lu for i386/ia64 */
155 localities = (u32) slit->localities;
157 acpi_numa_slit_init(slit);
159 return 0;
160 }
162 static int __init
163 acpi_parse_processor_affinity(acpi_table_entry_header * header,
164 const unsigned long end)
165 {
166 struct acpi_table_processor_affinity *processor_affinity;
168 processor_affinity = (struct acpi_table_processor_affinity *)header;
169 if (!processor_affinity)
170 return -EINVAL;
172 acpi_table_print_srat_entry(header);
174 /* let architecture-dependent part to do it */
175 acpi_numa_processor_affinity_init(processor_affinity);
177 return 0;
178 }
180 static int __init
181 acpi_parse_memory_affinity(acpi_table_entry_header * header,
182 const unsigned long end)
183 {
184 struct acpi_table_memory_affinity *memory_affinity;
186 memory_affinity = (struct acpi_table_memory_affinity *)header;
187 if (!memory_affinity)
188 return -EINVAL;
190 acpi_table_print_srat_entry(header);
192 /* let architecture-dependent part to do it */
193 acpi_numa_memory_affinity_init(memory_affinity);
195 return 0;
196 }
198 static int __init acpi_parse_srat(unsigned long phys_addr, unsigned long size)
199 {
200 struct acpi_table_srat *srat;
202 if (!phys_addr || !size)
203 return -EINVAL;
205 srat = (struct acpi_table_srat *)__va(phys_addr);
207 return 0;
208 }
210 int __init
211 acpi_table_parse_srat(enum acpi_srat_entry_id id,
212 acpi_madt_entry_handler handler, unsigned int max_entries)
213 {
214 return acpi_table_parse_madt_family(ACPI_SRAT,
215 sizeof(struct acpi_table_srat), id,
216 handler, max_entries);
217 }
219 int __init acpi_numa_init(void)
220 {
221 int result;
223 /* SRAT: Static Resource Affinity Table */
224 result = acpi_table_parse(ACPI_SRAT, acpi_parse_srat);
226 if (result > 0) {
227 result = acpi_table_parse_srat(ACPI_SRAT_PROCESSOR_AFFINITY,
228 acpi_parse_processor_affinity,
229 NR_CPUS);
230 result = acpi_table_parse_srat(ACPI_SRAT_MEMORY_AFFINITY, acpi_parse_memory_affinity, NR_NODE_MEMBLKS); // IA64 specific
231 }
233 /* SLIT: System Locality Information Table */
234 result = acpi_table_parse(ACPI_SLIT, acpi_parse_slit);
236 acpi_numa_arch_fixup();
237 return 0;
238 }
240 int acpi_get_pxm(acpi_handle h)
241 {
242 unsigned long pxm;
243 acpi_status status;
244 acpi_handle handle;
245 acpi_handle phandle = h;
247 do {
248 handle = phandle;
249 status = acpi_evaluate_integer(handle, "_PXM", NULL, &pxm);
250 if (ACPI_SUCCESS(status))
251 return (int)pxm;
252 status = acpi_get_parent(handle, &phandle);
253 } while (ACPI_SUCCESS(status));
254 return -1;
255 }
256 EXPORT_SYMBOL(acpi_get_pxm);
258 int acpi_get_node(acpi_handle *handle)
259 {
260 int pxm, node = -1;
262 pxm = acpi_get_pxm(handle);
263 if (pxm >= 0)
264 node = acpi_map_pxm_to_node(pxm);
266 return node;
267 }
268 EXPORT_SYMBOL(acpi_get_node);