ia64/linux-2.6.18-xen.hg

view drivers/acpi/acpi_memhotplug.c @ 897:329ea0ccb344

balloon: try harder to balloon up under memory pressure.

Currently if the balloon driver is unable to increase the guest's
reservation it assumes the failure was due to reaching its full
allocation, gives up on the ballooning operation and records the limit
it reached as the "hard limit". The driver will not try again until
the target is set again (even to the same value).

However it is possible that ballooning has in fact failed due to
memory pressure in the host and therefore it is desirable to keep
attempting to reach the target in case memory becomes available. The
most likely scenario is that some guests are ballooning down while
others are ballooning up and therefore there is temporary memory
pressure while things stabilise. You would not expect a well behaved
toolstack to ask a domain to balloon to more than its allocation nor
would you expect it to deliberately over-commit memory by setting
balloon targets which exceed the total host memory.

This patch drops the concept of a hard limit and causes the balloon
driver to retry increasing the reservation on a timer in the same
manner as when decreasing the reservation.

Also if we partially succeed in increasing the reservation
(i.e. receive less pages than we asked for) then we may as well keep
those pages rather than returning them to Xen.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 05 14:01:20 2009 +0100 (2009-06-05)
parents 831230e53067
children
line source
1 /*
2 * Copyright (C) 2004 Intel Corporation <naveen.b.s@intel.com>
3 *
4 * All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or (at
9 * your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
14 * NON INFRINGEMENT. See the GNU General Public License for more
15 * details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 *
21 *
22 * ACPI based HotPlug driver that supports Memory Hotplug
23 * This driver fields notifications from firmare for memory add
24 * and remove operations and alerts the VM of the affected memory
25 * ranges.
26 */
28 #include <linux/kernel.h>
29 #include <linux/module.h>
30 #include <linux/init.h>
31 #include <linux/types.h>
32 #include <linux/memory_hotplug.h>
33 #include <acpi/acpi_drivers.h>
35 #define ACPI_MEMORY_DEVICE_COMPONENT 0x08000000UL
36 #define ACPI_MEMORY_DEVICE_CLASS "memory"
37 #define ACPI_MEMORY_DEVICE_HID "PNP0C80"
38 #define ACPI_MEMORY_DEVICE_DRIVER_NAME "Hotplug Mem Driver"
39 #define ACPI_MEMORY_DEVICE_NAME "Hotplug Mem Device"
41 #define _COMPONENT ACPI_MEMORY_DEVICE_COMPONENT
43 ACPI_MODULE_NAME("acpi_memory")
44 MODULE_AUTHOR("Naveen B S <naveen.b.s@intel.com>");
45 MODULE_DESCRIPTION(ACPI_MEMORY_DEVICE_DRIVER_NAME);
46 MODULE_LICENSE("GPL");
48 /* ACPI _STA method values */
49 #define ACPI_MEMORY_STA_PRESENT (0x00000001UL)
50 #define ACPI_MEMORY_STA_ENABLED (0x00000002UL)
51 #define ACPI_MEMORY_STA_FUNCTIONAL (0x00000008UL)
53 /* Memory Device States */
54 #define MEMORY_INVALID_STATE 0
55 #define MEMORY_POWER_ON_STATE 1
56 #define MEMORY_POWER_OFF_STATE 2
58 static int acpi_memory_device_add(struct acpi_device *device);
59 static int acpi_memory_device_remove(struct acpi_device *device, int type);
60 static int acpi_memory_device_start(struct acpi_device *device);
62 static struct acpi_driver acpi_memory_device_driver = {
63 .name = ACPI_MEMORY_DEVICE_DRIVER_NAME,
64 .class = ACPI_MEMORY_DEVICE_CLASS,
65 .ids = ACPI_MEMORY_DEVICE_HID,
66 .ops = {
67 .add = acpi_memory_device_add,
68 .remove = acpi_memory_device_remove,
69 .start = acpi_memory_device_start,
70 },
71 };
73 struct acpi_memory_info {
74 struct list_head list;
75 u64 start_addr; /* Memory Range start physical addr */
76 u64 length; /* Memory Range length */
77 unsigned short caching; /* memory cache attribute */
78 unsigned short write_protect; /* memory read/write attribute */
79 unsigned int enabled:1;
80 };
82 struct acpi_memory_device {
83 struct acpi_device * device;
84 unsigned int state; /* State of the memory device */
85 struct list_head res_list;
86 };
88 static acpi_status
89 acpi_memory_get_resource(struct acpi_resource *resource, void *context)
90 {
91 struct acpi_memory_device *mem_device = context;
92 struct acpi_resource_address64 address64;
93 struct acpi_memory_info *info, *new;
94 acpi_status status;
96 status = acpi_resource_to_address64(resource, &address64);
97 if (ACPI_FAILURE(status) ||
98 (address64.resource_type != ACPI_MEMORY_RANGE))
99 return AE_OK;
101 list_for_each_entry(info, &mem_device->res_list, list) {
102 /* Can we combine the resource range information? */
103 if ((info->caching == address64.info.mem.caching) &&
104 (info->write_protect == address64.info.mem.write_protect) &&
105 (info->start_addr + info->length == address64.minimum)) {
106 info->length += address64.address_length;
107 return AE_OK;
108 }
109 }
111 new = kzalloc(sizeof(struct acpi_memory_info), GFP_KERNEL);
112 if (!new)
113 return AE_ERROR;
115 INIT_LIST_HEAD(&new->list);
116 new->caching = address64.info.mem.caching;
117 new->write_protect = address64.info.mem.write_protect;
118 new->start_addr = address64.minimum;
119 new->length = address64.address_length;
120 list_add_tail(&new->list, &mem_device->res_list);
122 return AE_OK;
123 }
125 static int
126 acpi_memory_get_device_resources(struct acpi_memory_device *mem_device)
127 {
128 acpi_status status;
129 struct acpi_memory_info *info, *n;
132 if (!list_empty(&mem_device->res_list))
133 return 0;
135 status = acpi_walk_resources(mem_device->device->handle, METHOD_NAME__CRS,
136 acpi_memory_get_resource, mem_device);
137 if (ACPI_FAILURE(status)) {
138 list_for_each_entry_safe(info, n, &mem_device->res_list, list)
139 kfree(info);
140 INIT_LIST_HEAD(&mem_device->res_list);
141 return -EINVAL;
142 }
144 return 0;
145 }
147 static int
148 acpi_memory_get_device(acpi_handle handle,
149 struct acpi_memory_device **mem_device)
150 {
151 acpi_status status;
152 acpi_handle phandle;
153 struct acpi_device *device = NULL;
154 struct acpi_device *pdevice = NULL;
157 if (!acpi_bus_get_device(handle, &device) && device)
158 goto end;
160 status = acpi_get_parent(handle, &phandle);
161 if (ACPI_FAILURE(status)) {
162 ACPI_EXCEPTION((AE_INFO, status, "Cannot find acpi parent"));
163 return -EINVAL;
164 }
166 /* Get the parent device */
167 status = acpi_bus_get_device(phandle, &pdevice);
168 if (ACPI_FAILURE(status)) {
169 ACPI_EXCEPTION((AE_INFO, status, "Cannot get acpi bus device"));
170 return -EINVAL;
171 }
173 /*
174 * Now add the notified device. This creates the acpi_device
175 * and invokes .add function
176 */
177 status = acpi_bus_add(&device, pdevice, handle, ACPI_BUS_TYPE_DEVICE);
178 if (ACPI_FAILURE(status)) {
179 ACPI_EXCEPTION((AE_INFO, status, "Cannot add acpi bus"));
180 return -EINVAL;
181 }
183 end:
184 *mem_device = acpi_driver_data(device);
185 if (!(*mem_device)) {
186 printk(KERN_ERR "\n driver data not found");
187 return -ENODEV;
188 }
190 return 0;
191 }
193 static int acpi_memory_check_device(struct acpi_memory_device *mem_device)
194 {
195 unsigned long current_status;
198 /* Get device present/absent information from the _STA */
199 if (ACPI_FAILURE(acpi_evaluate_integer(mem_device->device->handle, "_STA",
200 NULL, &current_status)))
201 return -ENODEV;
202 /*
203 * Check for device status. Device should be
204 * present/enabled/functioning.
205 */
206 if (!((current_status & ACPI_MEMORY_STA_PRESENT)
207 && (current_status & ACPI_MEMORY_STA_ENABLED)
208 && (current_status & ACPI_MEMORY_STA_FUNCTIONAL)))
209 return -ENODEV;
211 return 0;
212 }
214 static int acpi_memory_enable_device(struct acpi_memory_device *mem_device)
215 {
216 int result, num_enabled = 0;
217 struct acpi_memory_info *info;
218 int node;
221 /* Get the range from the _CRS */
222 result = acpi_memory_get_device_resources(mem_device);
223 if (result) {
224 printk(KERN_ERR PREFIX "get_device_resources failed\n");
225 mem_device->state = MEMORY_INVALID_STATE;
226 return result;
227 }
229 node = acpi_get_node(mem_device->device->handle);
230 /*
231 * Tell the VM there is more memory here...
232 * Note: Assume that this function returns zero on success
233 * We don't have memory-hot-add rollback function,now.
234 * (i.e. memory-hot-remove function)
235 */
236 list_for_each_entry(info, &mem_device->res_list, list) {
237 if (info->enabled) { /* just sanity check...*/
238 num_enabled++;
239 continue;
240 }
241 result = add_memory(node, info->start_addr, info->length);
242 if (result)
243 continue;
244 info->enabled = 1;
245 num_enabled++;
246 }
247 if (!num_enabled) {
248 printk(KERN_ERR PREFIX "add_memory failed\n");
249 mem_device->state = MEMORY_INVALID_STATE;
250 return -EINVAL;
251 }
253 return result;
254 }
256 static int acpi_memory_powerdown_device(struct acpi_memory_device *mem_device)
257 {
258 acpi_status status;
259 struct acpi_object_list arg_list;
260 union acpi_object arg;
261 unsigned long current_status;
264 /* Issue the _EJ0 command */
265 arg_list.count = 1;
266 arg_list.pointer = &arg;
267 arg.type = ACPI_TYPE_INTEGER;
268 arg.integer.value = 1;
269 status = acpi_evaluate_object(mem_device->device->handle,
270 "_EJ0", &arg_list, NULL);
271 /* Return on _EJ0 failure */
272 if (ACPI_FAILURE(status)) {
273 ACPI_EXCEPTION((AE_INFO, status, "_EJ0 failed"));
274 return -ENODEV;
275 }
277 /* Evalute _STA to check if the device is disabled */
278 status = acpi_evaluate_integer(mem_device->device->handle, "_STA",
279 NULL, &current_status);
280 if (ACPI_FAILURE(status))
281 return -ENODEV;
283 /* Check for device status. Device should be disabled */
284 if (current_status & ACPI_MEMORY_STA_ENABLED)
285 return -EINVAL;
287 return 0;
288 }
290 static int acpi_memory_disable_device(struct acpi_memory_device *mem_device)
291 {
292 int result;
293 struct acpi_memory_info *info, *n;
296 /*
297 * Ask the VM to offline this memory range.
298 * Note: Assume that this function returns zero on success
299 */
300 list_for_each_entry_safe(info, n, &mem_device->res_list, list) {
301 if (info->enabled) {
302 result = remove_memory(info->start_addr, info->length);
303 if (result)
304 return result;
305 }
306 kfree(info);
307 }
309 /* Power-off and eject the device */
310 result = acpi_memory_powerdown_device(mem_device);
311 if (result) {
312 /* Set the status of the device to invalid */
313 mem_device->state = MEMORY_INVALID_STATE;
314 return result;
315 }
317 mem_device->state = MEMORY_POWER_OFF_STATE;
318 return result;
319 }
321 static void acpi_memory_device_notify(acpi_handle handle, u32 event, void *data)
322 {
323 struct acpi_memory_device *mem_device;
324 struct acpi_device *device;
327 switch (event) {
328 case ACPI_NOTIFY_BUS_CHECK:
329 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
330 "\nReceived BUS CHECK notification for device\n"));
331 /* Fall Through */
332 case ACPI_NOTIFY_DEVICE_CHECK:
333 if (event == ACPI_NOTIFY_DEVICE_CHECK)
334 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
335 "\nReceived DEVICE CHECK notification for device\n"));
336 if (acpi_memory_get_device(handle, &mem_device)) {
337 printk(KERN_ERR PREFIX "Cannot find driver data\n");
338 return;
339 }
341 if (!acpi_memory_check_device(mem_device)) {
342 if (acpi_memory_enable_device(mem_device))
343 printk(KERN_ERR PREFIX
344 "Cannot enable memory device\n");
345 }
346 break;
347 case ACPI_NOTIFY_EJECT_REQUEST:
348 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
349 "\nReceived EJECT REQUEST notification for device\n"));
351 if (acpi_bus_get_device(handle, &device)) {
352 printk(KERN_ERR PREFIX "Device doesn't exist\n");
353 break;
354 }
355 mem_device = acpi_driver_data(device);
356 if (!mem_device) {
357 printk(KERN_ERR PREFIX "Driver Data is NULL\n");
358 break;
359 }
361 /*
362 * Currently disabling memory device from kernel mode
363 * TBD: Can also be disabled from user mode scripts
364 * TBD: Can also be disabled by Callback registration
365 * with generic sysfs driver
366 */
367 if (acpi_memory_disable_device(mem_device))
368 printk(KERN_ERR PREFIX
369 "Disable memory device\n");
370 /*
371 * TBD: Invoke acpi_bus_remove to cleanup data structures
372 */
373 break;
374 default:
375 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
376 "Unsupported event [0x%x]\n", event));
377 break;
378 }
380 return;
381 }
383 static int acpi_memory_device_add(struct acpi_device *device)
384 {
385 int result;
386 struct acpi_memory_device *mem_device = NULL;
389 if (!device)
390 return -EINVAL;
392 mem_device = kmalloc(sizeof(struct acpi_memory_device), GFP_KERNEL);
393 if (!mem_device)
394 return -ENOMEM;
395 memset(mem_device, 0, sizeof(struct acpi_memory_device));
397 INIT_LIST_HEAD(&mem_device->res_list);
398 mem_device->device = device;
399 sprintf(acpi_device_name(device), "%s", ACPI_MEMORY_DEVICE_NAME);
400 sprintf(acpi_device_class(device), "%s", ACPI_MEMORY_DEVICE_CLASS);
401 acpi_driver_data(device) = mem_device;
403 /* Get the range from the _CRS */
404 result = acpi_memory_get_device_resources(mem_device);
405 if (result) {
406 kfree(mem_device);
407 return result;
408 }
410 /* Set the device state */
411 mem_device->state = MEMORY_POWER_ON_STATE;
413 printk(KERN_INFO "%s \n", acpi_device_name(device));
415 return result;
416 }
418 static int acpi_memory_device_remove(struct acpi_device *device, int type)
419 {
420 struct acpi_memory_device *mem_device = NULL;
423 if (!device || !acpi_driver_data(device))
424 return -EINVAL;
426 mem_device = (struct acpi_memory_device *)acpi_driver_data(device);
427 kfree(mem_device);
429 return 0;
430 }
432 static int acpi_memory_device_start (struct acpi_device *device)
433 {
434 struct acpi_memory_device *mem_device;
435 int result = 0;
437 mem_device = acpi_driver_data(device);
439 if (!acpi_memory_check_device(mem_device)) {
440 /* call add_memory func */
441 result = acpi_memory_enable_device(mem_device);
442 if (result)
443 ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
444 "Error in acpi_memory_enable_device\n"));
445 }
446 return result;
447 }
449 /*
450 * Helper function to check for memory device
451 */
452 static acpi_status is_memory_device(acpi_handle handle)
453 {
454 char *hardware_id;
455 acpi_status status;
456 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
457 struct acpi_device_info *info;
460 status = acpi_get_object_info(handle, &buffer);
461 if (ACPI_FAILURE(status))
462 return status;
464 info = buffer.pointer;
465 if (!(info->valid & ACPI_VALID_HID)) {
466 kfree(buffer.pointer);
467 return AE_ERROR;
468 }
470 hardware_id = info->hardware_id.value;
471 if ((hardware_id == NULL) ||
472 (strcmp(hardware_id, ACPI_MEMORY_DEVICE_HID)))
473 status = AE_ERROR;
475 kfree(buffer.pointer);
476 return status;
477 }
479 static acpi_status
480 acpi_memory_register_notify_handler(acpi_handle handle,
481 u32 level, void *ctxt, void **retv)
482 {
483 acpi_status status;
486 status = is_memory_device(handle);
487 if (ACPI_FAILURE(status))
488 return AE_OK; /* continue */
490 status = acpi_install_notify_handler(handle, ACPI_SYSTEM_NOTIFY,
491 acpi_memory_device_notify, NULL);
492 /* continue */
493 return AE_OK;
494 }
496 static acpi_status
497 acpi_memory_deregister_notify_handler(acpi_handle handle,
498 u32 level, void *ctxt, void **retv)
499 {
500 acpi_status status;
503 status = is_memory_device(handle);
504 if (ACPI_FAILURE(status))
505 return AE_OK; /* continue */
507 status = acpi_remove_notify_handler(handle,
508 ACPI_SYSTEM_NOTIFY,
509 acpi_memory_device_notify);
511 return AE_OK; /* continue */
512 }
514 static int __init acpi_memory_device_init(void)
515 {
516 int result;
517 acpi_status status;
520 result = acpi_bus_register_driver(&acpi_memory_device_driver);
522 if (result < 0)
523 return -ENODEV;
525 status = acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT,
526 ACPI_UINT32_MAX,
527 acpi_memory_register_notify_handler,
528 NULL, NULL);
530 if (ACPI_FAILURE(status)) {
531 ACPI_EXCEPTION((AE_INFO, status, "walk_namespace failed"));
532 acpi_bus_unregister_driver(&acpi_memory_device_driver);
533 return -ENODEV;
534 }
536 return 0;
537 }
539 static void __exit acpi_memory_device_exit(void)
540 {
541 acpi_status status;
544 /*
545 * Adding this to un-install notification handlers for all the device
546 * handles.
547 */
548 status = acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT,
549 ACPI_UINT32_MAX,
550 acpi_memory_deregister_notify_handler,
551 NULL, NULL);
553 if (ACPI_FAILURE(status))
554 ACPI_EXCEPTION((AE_INFO, status, "walk_namespace failed"));
556 acpi_bus_unregister_driver(&acpi_memory_device_driver);
558 return;
559 }
561 module_init(acpi_memory_device_init);
562 module_exit(acpi_memory_device_exit);