ia64/linux-2.6.18-xen.hg

view drivers/acpi/processor_thermal.c @ 897:329ea0ccb344

balloon: try harder to balloon up under memory pressure.

Currently if the balloon driver is unable to increase the guest's
reservation it assumes the failure was due to reaching its full
allocation, gives up on the ballooning operation and records the limit
it reached as the "hard limit". The driver will not try again until
the target is set again (even to the same value).

However it is possible that ballooning has in fact failed due to
memory pressure in the host and therefore it is desirable to keep
attempting to reach the target in case memory becomes available. The
most likely scenario is that some guests are ballooning down while
others are ballooning up and therefore there is temporary memory
pressure while things stabilise. You would not expect a well behaved
toolstack to ask a domain to balloon to more than its allocation nor
would you expect it to deliberately over-commit memory by setting
balloon targets which exceed the total host memory.

This patch drops the concept of a hard limit and causes the balloon
driver to retry increasing the reservation on a timer in the same
manner as when decreasing the reservation.

Also if we partially succeed in increasing the reservation
(i.e. receive less pages than we asked for) then we may as well keep
those pages rather than returning them to Xen.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 05 14:01:20 2009 +0100 (2009-06-05)
parents 831230e53067
children
line source
1 /*
2 * processor_thermal.c - Passive cooling submodule of the ACPI processor driver
3 *
4 * Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
5 * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
6 * Copyright (C) 2004 Dominik Brodowski <linux@brodo.de>
7 * Copyright (C) 2004 Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
8 * - Added processor hotplug support
9 *
10 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or (at
15 * your option) any later version.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License along
23 * with this program; if not, write to the Free Software Foundation, Inc.,
24 * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
25 *
26 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
27 */
29 #include <linux/kernel.h>
30 #include <linux/module.h>
31 #include <linux/init.h>
32 #include <linux/cpufreq.h>
33 #include <linux/proc_fs.h>
34 #include <linux/seq_file.h>
36 #include <asm/uaccess.h>
38 #include <acpi/acpi_bus.h>
39 #include <acpi/processor.h>
40 #include <acpi/acpi_drivers.h>
42 #define ACPI_PROCESSOR_COMPONENT 0x01000000
43 #define ACPI_PROCESSOR_CLASS "processor"
44 #define ACPI_PROCESSOR_DRIVER_NAME "ACPI Processor Driver"
45 #define _COMPONENT ACPI_PROCESSOR_COMPONENT
46 ACPI_MODULE_NAME("acpi_processor")
48 /* --------------------------------------------------------------------------
49 Limit Interface
50 -------------------------------------------------------------------------- */
51 static int acpi_processor_apply_limit(struct acpi_processor *pr)
52 {
53 int result = 0;
54 u16 px = 0;
55 u16 tx = 0;
58 if (!pr)
59 return -EINVAL;
61 if (!pr->flags.limit)
62 return -ENODEV;
64 if (pr->flags.throttling) {
65 if (pr->limit.user.tx > tx)
66 tx = pr->limit.user.tx;
67 if (pr->limit.thermal.tx > tx)
68 tx = pr->limit.thermal.tx;
70 result = acpi_processor_set_throttling(pr, tx);
71 if (result)
72 goto end;
73 }
75 pr->limit.state.px = px;
76 pr->limit.state.tx = tx;
78 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
79 "Processor [%d] limit set to (P%d:T%d)\n", pr->id,
80 pr->limit.state.px, pr->limit.state.tx));
82 end:
83 if (result)
84 printk(KERN_ERR PREFIX "Unable to set limit\n");
86 return result;
87 }
89 #ifdef CONFIG_CPU_FREQ
91 /* If a passive cooling situation is detected, primarily CPUfreq is used, as it
92 * offers (in most cases) voltage scaling in addition to frequency scaling, and
93 * thus a cubic (instead of linear) reduction of energy. Also, we allow for
94 * _any_ cpufreq driver and not only the acpi-cpufreq driver.
95 */
97 static unsigned int cpufreq_thermal_reduction_pctg[NR_CPUS];
98 static unsigned int acpi_thermal_cpufreq_is_init = 0;
100 static int cpu_has_cpufreq(unsigned int cpu)
101 {
102 struct cpufreq_policy policy;
103 if (!acpi_thermal_cpufreq_is_init || cpufreq_get_policy(&policy, cpu))
104 return 0;
105 return 1;
106 }
108 static int acpi_thermal_cpufreq_increase(unsigned int cpu)
109 {
110 if (!cpu_has_cpufreq(cpu))
111 return -ENODEV;
113 if (cpufreq_thermal_reduction_pctg[cpu] < 60) {
114 cpufreq_thermal_reduction_pctg[cpu] += 20;
115 cpufreq_update_policy(cpu);
116 return 0;
117 }
119 return -ERANGE;
120 }
122 static int acpi_thermal_cpufreq_decrease(unsigned int cpu)
123 {
124 if (!cpu_has_cpufreq(cpu))
125 return -ENODEV;
127 if (cpufreq_thermal_reduction_pctg[cpu] > 20)
128 cpufreq_thermal_reduction_pctg[cpu] -= 20;
129 else
130 cpufreq_thermal_reduction_pctg[cpu] = 0;
131 cpufreq_update_policy(cpu);
132 /* We reached max freq again and can leave passive mode */
133 return !cpufreq_thermal_reduction_pctg[cpu];
134 }
136 static int acpi_thermal_cpufreq_notifier(struct notifier_block *nb,
137 unsigned long event, void *data)
138 {
139 struct cpufreq_policy *policy = data;
140 unsigned long max_freq = 0;
142 if (event != CPUFREQ_ADJUST)
143 goto out;
145 max_freq =
146 (policy->cpuinfo.max_freq *
147 (100 - cpufreq_thermal_reduction_pctg[policy->cpu])) / 100;
149 cpufreq_verify_within_limits(policy, 0, max_freq);
151 out:
152 return 0;
153 }
155 static struct notifier_block acpi_thermal_cpufreq_notifier_block = {
156 .notifier_call = acpi_thermal_cpufreq_notifier,
157 };
159 void acpi_thermal_cpufreq_init(void)
160 {
161 int i;
163 for (i = 0; i < NR_CPUS; i++)
164 cpufreq_thermal_reduction_pctg[i] = 0;
166 i = cpufreq_register_notifier(&acpi_thermal_cpufreq_notifier_block,
167 CPUFREQ_POLICY_NOTIFIER);
168 if (!i)
169 acpi_thermal_cpufreq_is_init = 1;
170 }
172 void acpi_thermal_cpufreq_exit(void)
173 {
174 if (acpi_thermal_cpufreq_is_init)
175 cpufreq_unregister_notifier
176 (&acpi_thermal_cpufreq_notifier_block,
177 CPUFREQ_POLICY_NOTIFIER);
179 acpi_thermal_cpufreq_is_init = 0;
180 }
182 #else /* ! CONFIG_CPU_FREQ */
184 static int acpi_thermal_cpufreq_increase(unsigned int cpu)
185 {
186 return -ENODEV;
187 }
188 static int acpi_thermal_cpufreq_decrease(unsigned int cpu)
189 {
190 return -ENODEV;
191 }
193 #endif
195 int acpi_processor_set_thermal_limit(acpi_handle handle, int type)
196 {
197 int result = 0;
198 struct acpi_processor *pr = NULL;
199 struct acpi_device *device = NULL;
200 int tx = 0, max_tx_px = 0;
203 if ((type < ACPI_PROCESSOR_LIMIT_NONE)
204 || (type > ACPI_PROCESSOR_LIMIT_DECREMENT))
205 return -EINVAL;
207 result = acpi_bus_get_device(handle, &device);
208 if (result)
209 return result;
211 pr = (struct acpi_processor *)acpi_driver_data(device);
212 if (!pr)
213 return -ENODEV;
215 /* Thermal limits are always relative to the current Px/Tx state. */
216 if (pr->flags.throttling)
217 pr->limit.thermal.tx = pr->throttling.state;
219 /*
220 * Our default policy is to only use throttling at the lowest
221 * performance state.
222 */
224 tx = pr->limit.thermal.tx;
226 switch (type) {
228 case ACPI_PROCESSOR_LIMIT_NONE:
229 do {
230 result = acpi_thermal_cpufreq_decrease(pr->id);
231 } while (!result);
232 tx = 0;
233 break;
235 case ACPI_PROCESSOR_LIMIT_INCREMENT:
236 /* if going up: P-states first, T-states later */
238 result = acpi_thermal_cpufreq_increase(pr->id);
239 if (!result)
240 goto end;
241 else if (result == -ERANGE)
242 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
243 "At maximum performance state\n"));
245 if (pr->flags.throttling) {
246 if (tx == (pr->throttling.state_count - 1))
247 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
248 "At maximum throttling state\n"));
249 else
250 tx++;
251 }
252 break;
254 case ACPI_PROCESSOR_LIMIT_DECREMENT:
255 /* if going down: T-states first, P-states later */
257 if (pr->flags.throttling) {
258 if (tx == 0) {
259 max_tx_px = 1;
260 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
261 "At minimum throttling state\n"));
262 } else {
263 tx--;
264 goto end;
265 }
266 }
268 result = acpi_thermal_cpufreq_decrease(pr->id);
269 if (result) {
270 /*
271 * We only could get -ERANGE, 1 or 0.
272 * In the first two cases we reached max freq again.
273 */
274 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
275 "At minimum performance state\n"));
276 max_tx_px = 1;
277 } else
278 max_tx_px = 0;
280 break;
281 }
283 end:
284 if (pr->flags.throttling) {
285 pr->limit.thermal.px = 0;
286 pr->limit.thermal.tx = tx;
288 result = acpi_processor_apply_limit(pr);
289 if (result)
290 printk(KERN_ERR PREFIX "Unable to set thermal limit\n");
292 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Thermal limit now (P%d:T%d)\n",
293 pr->limit.thermal.px, pr->limit.thermal.tx));
294 } else
295 result = 0;
296 if (max_tx_px)
297 return 1;
298 else
299 return result;
300 }
302 int acpi_processor_get_limit_info(struct acpi_processor *pr)
303 {
305 if (!pr)
306 return -EINVAL;
308 if (pr->flags.throttling)
309 pr->flags.limit = 1;
311 return 0;
312 }
314 /* /proc interface */
316 static int acpi_processor_limit_seq_show(struct seq_file *seq, void *offset)
317 {
318 struct acpi_processor *pr = (struct acpi_processor *)seq->private;
321 if (!pr)
322 goto end;
324 if (!pr->flags.limit) {
325 seq_puts(seq, "<not supported>\n");
326 goto end;
327 }
329 seq_printf(seq, "active limit: P%d:T%d\n"
330 "user limit: P%d:T%d\n"
331 "thermal limit: P%d:T%d\n",
332 pr->limit.state.px, pr->limit.state.tx,
333 pr->limit.user.px, pr->limit.user.tx,
334 pr->limit.thermal.px, pr->limit.thermal.tx);
336 end:
337 return 0;
338 }
340 static int acpi_processor_limit_open_fs(struct inode *inode, struct file *file)
341 {
342 return single_open(file, acpi_processor_limit_seq_show,
343 PDE(inode)->data);
344 }
346 static ssize_t acpi_processor_write_limit(struct file * file,
347 const char __user * buffer,
348 size_t count, loff_t * data)
349 {
350 int result = 0;
351 struct seq_file *m = (struct seq_file *)file->private_data;
352 struct acpi_processor *pr = (struct acpi_processor *)m->private;
353 char limit_string[25] = { '\0' };
354 int px = 0;
355 int tx = 0;
358 if (!pr || (count > sizeof(limit_string) - 1)) {
359 return -EINVAL;
360 }
362 if (copy_from_user(limit_string, buffer, count)) {
363 return -EFAULT;
364 }
366 limit_string[count] = '\0';
368 if (sscanf(limit_string, "%d:%d", &px, &tx) != 2) {
369 printk(KERN_ERR PREFIX "Invalid data format\n");
370 return -EINVAL;
371 }
373 if (pr->flags.throttling) {
374 if ((tx < 0) || (tx > (pr->throttling.state_count - 1))) {
375 printk(KERN_ERR PREFIX "Invalid tx\n");
376 return -EINVAL;
377 }
378 pr->limit.user.tx = tx;
379 }
381 result = acpi_processor_apply_limit(pr);
383 return count;
384 }
386 struct file_operations acpi_processor_limit_fops = {
387 .open = acpi_processor_limit_open_fs,
388 .read = seq_read,
389 .write = acpi_processor_write_limit,
390 .llseek = seq_lseek,
391 .release = single_release,
392 };