ia64/linux-2.6.18-xen.hg

view drivers/pci/pci-acpi.c @ 897:329ea0ccb344

balloon: try harder to balloon up under memory pressure.

Currently if the balloon driver is unable to increase the guest's
reservation it assumes the failure was due to reaching its full
allocation, gives up on the ballooning operation and records the limit
it reached as the "hard limit". The driver will not try again until
the target is set again (even to the same value).

However it is possible that ballooning has in fact failed due to
memory pressure in the host and therefore it is desirable to keep
attempting to reach the target in case memory becomes available. The
most likely scenario is that some guests are ballooning down while
others are ballooning up and therefore there is temporary memory
pressure while things stabilise. You would not expect a well behaved
toolstack to ask a domain to balloon to more than its allocation nor
would you expect it to deliberately over-commit memory by setting
balloon targets which exceed the total host memory.

This patch drops the concept of a hard limit and causes the balloon
driver to retry increasing the reservation on a timer in the same
manner as when decreasing the reservation.

Also if we partially succeed in increasing the reservation
(i.e. receive less pages than we asked for) then we may as well keep
those pages rather than returning them to Xen.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 05 14:01:20 2009 +0100 (2009-06-05)
parents 831230e53067
children
line source
1 /*
2 * File: pci-acpi.c
3 * Purpose: Provide PCI support in ACPI
4 *
5 * Copyright (C) 2005 David Shaohua Li <shaohua.li@intel.com>
6 * Copyright (C) 2004 Tom Long Nguyen <tom.l.nguyen@intel.com>
7 * Copyright (C) 2004 Intel Corp.
8 */
10 #include <linux/delay.h>
11 #include <linux/init.h>
12 #include <linux/pci.h>
13 #include <linux/module.h>
14 #include <acpi/acpi.h>
15 #include <acpi/acnamesp.h>
16 #include <acpi/acresrc.h>
17 #include <acpi/acpi_bus.h>
19 #include <linux/pci-acpi.h>
20 #include "pci.h"
22 static u32 ctrlset_buf[3] = {0, 0, 0};
23 static u32 global_ctrlsets = 0;
24 static u8 OSC_UUID[16] = {0x5B, 0x4D, 0xDB, 0x33, 0xF7, 0x1F, 0x1C, 0x40, 0x96, 0x57, 0x74, 0x41, 0xC0, 0x3D, 0xD7, 0x66};
26 static acpi_status
27 acpi_query_osc (
28 acpi_handle handle,
29 u32 level,
30 void *context,
31 void **retval )
32 {
33 acpi_status status;
34 struct acpi_object_list input;
35 union acpi_object in_params[4];
36 struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL};
37 union acpi_object *out_obj;
38 u32 osc_dw0;
41 /* Setting up input parameters */
42 input.count = 4;
43 input.pointer = in_params;
44 in_params[0].type = ACPI_TYPE_BUFFER;
45 in_params[0].buffer.length = 16;
46 in_params[0].buffer.pointer = OSC_UUID;
47 in_params[1].type = ACPI_TYPE_INTEGER;
48 in_params[1].integer.value = 1;
49 in_params[2].type = ACPI_TYPE_INTEGER;
50 in_params[2].integer.value = 3;
51 in_params[3].type = ACPI_TYPE_BUFFER;
52 in_params[3].buffer.length = 12;
53 in_params[3].buffer.pointer = (u8 *)context;
55 status = acpi_evaluate_object(handle, "_OSC", &input, &output);
56 if (ACPI_FAILURE (status)) {
57 printk(KERN_DEBUG
58 "Evaluate _OSC Set fails. Status = 0x%04x\n", status);
59 return status;
60 }
61 out_obj = output.pointer;
63 if (out_obj->type != ACPI_TYPE_BUFFER) {
64 printk(KERN_DEBUG
65 "Evaluate _OSC returns wrong type\n");
66 status = AE_TYPE;
67 goto query_osc_out;
68 }
69 osc_dw0 = *((u32 *) out_obj->buffer.pointer);
70 if (osc_dw0) {
71 if (osc_dw0 & OSC_REQUEST_ERROR)
72 printk(KERN_DEBUG "_OSC request fails\n");
73 if (osc_dw0 & OSC_INVALID_UUID_ERROR)
74 printk(KERN_DEBUG "_OSC invalid UUID\n");
75 if (osc_dw0 & OSC_INVALID_REVISION_ERROR)
76 printk(KERN_DEBUG "_OSC invalid revision\n");
77 if (osc_dw0 & OSC_CAPABILITIES_MASK_ERROR) {
78 /* Update Global Control Set */
79 global_ctrlsets = *((u32 *)(out_obj->buffer.pointer+8));
80 status = AE_OK;
81 goto query_osc_out;
82 }
83 status = AE_ERROR;
84 goto query_osc_out;
85 }
87 /* Update Global Control Set */
88 global_ctrlsets = *((u32 *)(out_obj->buffer.pointer + 8));
89 status = AE_OK;
91 query_osc_out:
92 kfree(output.pointer);
93 return status;
94 }
97 static acpi_status
98 acpi_run_osc (
99 acpi_handle handle,
100 void *context)
101 {
102 acpi_status status;
103 struct acpi_object_list input;
104 union acpi_object in_params[4];
105 struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL};
106 union acpi_object *out_obj;
107 u32 osc_dw0;
109 /* Setting up input parameters */
110 input.count = 4;
111 input.pointer = in_params;
112 in_params[0].type = ACPI_TYPE_BUFFER;
113 in_params[0].buffer.length = 16;
114 in_params[0].buffer.pointer = OSC_UUID;
115 in_params[1].type = ACPI_TYPE_INTEGER;
116 in_params[1].integer.value = 1;
117 in_params[2].type = ACPI_TYPE_INTEGER;
118 in_params[2].integer.value = 3;
119 in_params[3].type = ACPI_TYPE_BUFFER;
120 in_params[3].buffer.length = 12;
121 in_params[3].buffer.pointer = (u8 *)context;
123 status = acpi_evaluate_object(handle, "_OSC", &input, &output);
124 if (ACPI_FAILURE (status)) {
125 printk(KERN_DEBUG
126 "Evaluate _OSC Set fails. Status = 0x%04x\n", status);
127 return status;
128 }
129 out_obj = output.pointer;
130 if (out_obj->type != ACPI_TYPE_BUFFER) {
131 printk(KERN_DEBUG
132 "Evaluate _OSC returns wrong type\n");
133 status = AE_TYPE;
134 goto run_osc_out;
135 }
136 osc_dw0 = *((u32 *) out_obj->buffer.pointer);
137 if (osc_dw0) {
138 if (osc_dw0 & OSC_REQUEST_ERROR)
139 printk(KERN_DEBUG "_OSC request fails\n");
140 if (osc_dw0 & OSC_INVALID_UUID_ERROR)
141 printk(KERN_DEBUG "_OSC invalid UUID\n");
142 if (osc_dw0 & OSC_INVALID_REVISION_ERROR)
143 printk(KERN_DEBUG "_OSC invalid revision\n");
144 if (osc_dw0 & OSC_CAPABILITIES_MASK_ERROR) {
145 printk(KERN_DEBUG "_OSC FW not grant req. control\n");
146 status = AE_SUPPORT;
147 goto run_osc_out;
148 }
149 status = AE_ERROR;
150 goto run_osc_out;
151 }
152 status = AE_OK;
154 run_osc_out:
155 kfree(output.pointer);
156 return status;
157 }
159 /**
160 * pci_osc_support_set - register OS support to Firmware
161 * @flags: OS support bits
162 *
163 * Update OS support fields and doing a _OSC Query to obtain an update
164 * from Firmware on supported control bits.
165 **/
166 acpi_status pci_osc_support_set(u32 flags)
167 {
168 u32 temp;
170 if (!(flags & OSC_SUPPORT_MASKS)) {
171 return AE_TYPE;
172 }
173 ctrlset_buf[OSC_SUPPORT_TYPE] |= (flags & OSC_SUPPORT_MASKS);
175 /* do _OSC query for all possible controls */
176 temp = ctrlset_buf[OSC_CONTROL_TYPE];
177 ctrlset_buf[OSC_QUERY_TYPE] = OSC_QUERY_ENABLE;
178 ctrlset_buf[OSC_CONTROL_TYPE] = OSC_CONTROL_MASKS;
179 acpi_get_devices ( PCI_ROOT_HID_STRING,
180 acpi_query_osc,
181 ctrlset_buf,
182 NULL );
183 ctrlset_buf[OSC_QUERY_TYPE] = !OSC_QUERY_ENABLE;
184 ctrlset_buf[OSC_CONTROL_TYPE] = temp;
185 return AE_OK;
186 }
187 EXPORT_SYMBOL(pci_osc_support_set);
189 /**
190 * pci_osc_control_set - commit requested control to Firmware
191 * @handle: acpi_handle for the target ACPI object
192 * @flags: driver's requested control bits
193 *
194 * Attempt to take control from Firmware on requested control bits.
195 **/
196 acpi_status pci_osc_control_set(acpi_handle handle, u32 flags)
197 {
198 acpi_status status;
199 u32 ctrlset;
201 ctrlset = (flags & OSC_CONTROL_MASKS);
202 if (!ctrlset) {
203 return AE_TYPE;
204 }
205 if (ctrlset_buf[OSC_SUPPORT_TYPE] &&
206 ((global_ctrlsets & ctrlset) != ctrlset)) {
207 return AE_SUPPORT;
208 }
209 ctrlset_buf[OSC_CONTROL_TYPE] |= ctrlset;
210 status = acpi_run_osc(handle, ctrlset_buf);
211 if (ACPI_FAILURE (status)) {
212 ctrlset_buf[OSC_CONTROL_TYPE] &= ~ctrlset;
213 }
215 return status;
216 }
217 EXPORT_SYMBOL(pci_osc_control_set);
219 /*
220 * _SxD returns the D-state with the highest power
221 * (lowest D-state number) supported in the S-state "x".
222 *
223 * If the devices does not have a _PRW
224 * (Power Resources for Wake) supporting system wakeup from "x"
225 * then the OS is free to choose a lower power (higher number
226 * D-state) than the return value from _SxD.
227 *
228 * But if _PRW is enabled at S-state "x", the OS
229 * must not choose a power lower than _SxD --
230 * unless the device has an _SxW method specifying
231 * the lowest power (highest D-state number) the device
232 * may enter while still able to wake the system.
233 *
234 * ie. depending on global OS policy:
235 *
236 * if (_PRW at S-state x)
237 * choose from highest power _SxD to lowest power _SxW
238 * else // no _PRW at S-state x
239 * choose highest power _SxD or any lower power
240 *
241 * currently we simply return _SxD, if present.
242 */
244 static int acpi_pci_choose_state(struct pci_dev *pdev, pm_message_t state)
245 {
246 /* TBD */
248 return -ENODEV;
249 }
251 static int acpi_pci_set_power_state(struct pci_dev *dev, pci_power_t state)
252 {
253 acpi_handle handle = DEVICE_ACPI_HANDLE(&dev->dev);
254 static int state_conv[] = {
255 [0] = 0,
256 [1] = 1,
257 [2] = 2,
258 [3] = 3,
259 [4] = 3
260 };
261 int acpi_state = state_conv[(int __force) state];
263 if (!handle)
264 return -ENODEV;
265 return acpi_bus_set_power(handle, acpi_state);
266 }
269 /* ACPI bus type */
270 static int acpi_pci_find_device(struct device *dev, acpi_handle *handle)
271 {
272 struct pci_dev * pci_dev;
273 acpi_integer addr;
275 pci_dev = to_pci_dev(dev);
276 /* Please ref to ACPI spec for the syntax of _ADR */
277 addr = (PCI_SLOT(pci_dev->devfn) << 16) | PCI_FUNC(pci_dev->devfn);
278 *handle = acpi_get_child(DEVICE_ACPI_HANDLE(dev->parent), addr);
279 if (!*handle)
280 return -ENODEV;
281 return 0;
282 }
284 static int acpi_pci_find_root_bridge(struct device *dev, acpi_handle *handle)
285 {
286 int num;
287 unsigned int seg, bus;
289 /*
290 * The string should be the same as root bridge's name
291 * Please look at 'pci_scan_bus_parented'
292 */
293 num = sscanf(dev->bus_id, "pci%04x:%02x", &seg, &bus);
294 if (num != 2)
295 return -ENODEV;
296 *handle = acpi_get_pci_rootbridge_handle(seg, bus);
297 if (!*handle)
298 return -ENODEV;
299 return 0;
300 }
302 static struct acpi_bus_type acpi_pci_bus = {
303 .bus = &pci_bus_type,
304 .find_device = acpi_pci_find_device,
305 .find_bridge = acpi_pci_find_root_bridge,
306 };
308 static int __init acpi_pci_init(void)
309 {
310 int ret;
312 ret = register_acpi_bus_type(&acpi_pci_bus);
313 if (ret)
314 return 0;
315 platform_pci_choose_state = acpi_pci_choose_state;
316 platform_pci_set_power_state = acpi_pci_set_power_state;
317 return 0;
318 }
319 arch_initcall(acpi_pci_init);