ia64/linux-2.6.18-xen.hg

annotate drivers/acpi/processor_thermal.c @ 897:329ea0ccb344

balloon: try harder to balloon up under memory pressure.

Currently if the balloon driver is unable to increase the guest's
reservation it assumes the failure was due to reaching its full
allocation, gives up on the ballooning operation and records the limit
it reached as the "hard limit". The driver will not try again until
the target is set again (even to the same value).

However it is possible that ballooning has in fact failed due to
memory pressure in the host and therefore it is desirable to keep
attempting to reach the target in case memory becomes available. The
most likely scenario is that some guests are ballooning down while
others are ballooning up and therefore there is temporary memory
pressure while things stabilise. You would not expect a well behaved
toolstack to ask a domain to balloon to more than its allocation nor
would you expect it to deliberately over-commit memory by setting
balloon targets which exceed the total host memory.

This patch drops the concept of a hard limit and causes the balloon
driver to retry increasing the reservation on a timer in the same
manner as when decreasing the reservation.

Also if we partially succeed in increasing the reservation
(i.e. receive less pages than we asked for) then we may as well keep
those pages rather than returning them to Xen.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 05 14:01:20 2009 +0100 (2009-06-05)
parents 831230e53067
children
rev   line source
ian@0 1 /*
ian@0 2 * processor_thermal.c - Passive cooling submodule of the ACPI processor driver
ian@0 3 *
ian@0 4 * Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
ian@0 5 * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
ian@0 6 * Copyright (C) 2004 Dominik Brodowski <linux@brodo.de>
ian@0 7 * Copyright (C) 2004 Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
ian@0 8 * - Added processor hotplug support
ian@0 9 *
ian@0 10 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
ian@0 11 *
ian@0 12 * This program is free software; you can redistribute it and/or modify
ian@0 13 * it under the terms of the GNU General Public License as published by
ian@0 14 * the Free Software Foundation; either version 2 of the License, or (at
ian@0 15 * your option) any later version.
ian@0 16 *
ian@0 17 * This program is distributed in the hope that it will be useful, but
ian@0 18 * WITHOUT ANY WARRANTY; without even the implied warranty of
ian@0 19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
ian@0 20 * General Public License for more details.
ian@0 21 *
ian@0 22 * You should have received a copy of the GNU General Public License along
ian@0 23 * with this program; if not, write to the Free Software Foundation, Inc.,
ian@0 24 * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
ian@0 25 *
ian@0 26 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
ian@0 27 */
ian@0 28
ian@0 29 #include <linux/kernel.h>
ian@0 30 #include <linux/module.h>
ian@0 31 #include <linux/init.h>
ian@0 32 #include <linux/cpufreq.h>
ian@0 33 #include <linux/proc_fs.h>
ian@0 34 #include <linux/seq_file.h>
ian@0 35
ian@0 36 #include <asm/uaccess.h>
ian@0 37
ian@0 38 #include <acpi/acpi_bus.h>
ian@0 39 #include <acpi/processor.h>
ian@0 40 #include <acpi/acpi_drivers.h>
ian@0 41
ian@0 42 #define ACPI_PROCESSOR_COMPONENT 0x01000000
ian@0 43 #define ACPI_PROCESSOR_CLASS "processor"
ian@0 44 #define ACPI_PROCESSOR_DRIVER_NAME "ACPI Processor Driver"
ian@0 45 #define _COMPONENT ACPI_PROCESSOR_COMPONENT
ian@0 46 ACPI_MODULE_NAME("acpi_processor")
ian@0 47
ian@0 48 /* --------------------------------------------------------------------------
ian@0 49 Limit Interface
ian@0 50 -------------------------------------------------------------------------- */
ian@0 51 static int acpi_processor_apply_limit(struct acpi_processor *pr)
ian@0 52 {
ian@0 53 int result = 0;
ian@0 54 u16 px = 0;
ian@0 55 u16 tx = 0;
ian@0 56
ian@0 57
ian@0 58 if (!pr)
ian@0 59 return -EINVAL;
ian@0 60
ian@0 61 if (!pr->flags.limit)
ian@0 62 return -ENODEV;
ian@0 63
ian@0 64 if (pr->flags.throttling) {
ian@0 65 if (pr->limit.user.tx > tx)
ian@0 66 tx = pr->limit.user.tx;
ian@0 67 if (pr->limit.thermal.tx > tx)
ian@0 68 tx = pr->limit.thermal.tx;
ian@0 69
ian@0 70 result = acpi_processor_set_throttling(pr, tx);
ian@0 71 if (result)
ian@0 72 goto end;
ian@0 73 }
ian@0 74
ian@0 75 pr->limit.state.px = px;
ian@0 76 pr->limit.state.tx = tx;
ian@0 77
ian@0 78 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
ian@0 79 "Processor [%d] limit set to (P%d:T%d)\n", pr->id,
ian@0 80 pr->limit.state.px, pr->limit.state.tx));
ian@0 81
ian@0 82 end:
ian@0 83 if (result)
ian@0 84 printk(KERN_ERR PREFIX "Unable to set limit\n");
ian@0 85
ian@0 86 return result;
ian@0 87 }
ian@0 88
ian@0 89 #ifdef CONFIG_CPU_FREQ
ian@0 90
ian@0 91 /* If a passive cooling situation is detected, primarily CPUfreq is used, as it
ian@0 92 * offers (in most cases) voltage scaling in addition to frequency scaling, and
ian@0 93 * thus a cubic (instead of linear) reduction of energy. Also, we allow for
ian@0 94 * _any_ cpufreq driver and not only the acpi-cpufreq driver.
ian@0 95 */
ian@0 96
ian@0 97 static unsigned int cpufreq_thermal_reduction_pctg[NR_CPUS];
ian@0 98 static unsigned int acpi_thermal_cpufreq_is_init = 0;
ian@0 99
ian@0 100 static int cpu_has_cpufreq(unsigned int cpu)
ian@0 101 {
ian@0 102 struct cpufreq_policy policy;
ian@0 103 if (!acpi_thermal_cpufreq_is_init || cpufreq_get_policy(&policy, cpu))
ian@0 104 return 0;
ian@0 105 return 1;
ian@0 106 }
ian@0 107
ian@0 108 static int acpi_thermal_cpufreq_increase(unsigned int cpu)
ian@0 109 {
ian@0 110 if (!cpu_has_cpufreq(cpu))
ian@0 111 return -ENODEV;
ian@0 112
ian@0 113 if (cpufreq_thermal_reduction_pctg[cpu] < 60) {
ian@0 114 cpufreq_thermal_reduction_pctg[cpu] += 20;
ian@0 115 cpufreq_update_policy(cpu);
ian@0 116 return 0;
ian@0 117 }
ian@0 118
ian@0 119 return -ERANGE;
ian@0 120 }
ian@0 121
ian@0 122 static int acpi_thermal_cpufreq_decrease(unsigned int cpu)
ian@0 123 {
ian@0 124 if (!cpu_has_cpufreq(cpu))
ian@0 125 return -ENODEV;
ian@0 126
ian@0 127 if (cpufreq_thermal_reduction_pctg[cpu] > 20)
ian@0 128 cpufreq_thermal_reduction_pctg[cpu] -= 20;
ian@0 129 else
ian@0 130 cpufreq_thermal_reduction_pctg[cpu] = 0;
ian@0 131 cpufreq_update_policy(cpu);
ian@0 132 /* We reached max freq again and can leave passive mode */
ian@0 133 return !cpufreq_thermal_reduction_pctg[cpu];
ian@0 134 }
ian@0 135
ian@0 136 static int acpi_thermal_cpufreq_notifier(struct notifier_block *nb,
ian@0 137 unsigned long event, void *data)
ian@0 138 {
ian@0 139 struct cpufreq_policy *policy = data;
ian@0 140 unsigned long max_freq = 0;
ian@0 141
ian@0 142 if (event != CPUFREQ_ADJUST)
ian@0 143 goto out;
ian@0 144
ian@0 145 max_freq =
ian@0 146 (policy->cpuinfo.max_freq *
ian@0 147 (100 - cpufreq_thermal_reduction_pctg[policy->cpu])) / 100;
ian@0 148
ian@0 149 cpufreq_verify_within_limits(policy, 0, max_freq);
ian@0 150
ian@0 151 out:
ian@0 152 return 0;
ian@0 153 }
ian@0 154
ian@0 155 static struct notifier_block acpi_thermal_cpufreq_notifier_block = {
ian@0 156 .notifier_call = acpi_thermal_cpufreq_notifier,
ian@0 157 };
ian@0 158
ian@0 159 void acpi_thermal_cpufreq_init(void)
ian@0 160 {
ian@0 161 int i;
ian@0 162
ian@0 163 for (i = 0; i < NR_CPUS; i++)
ian@0 164 cpufreq_thermal_reduction_pctg[i] = 0;
ian@0 165
ian@0 166 i = cpufreq_register_notifier(&acpi_thermal_cpufreq_notifier_block,
ian@0 167 CPUFREQ_POLICY_NOTIFIER);
ian@0 168 if (!i)
ian@0 169 acpi_thermal_cpufreq_is_init = 1;
ian@0 170 }
ian@0 171
ian@0 172 void acpi_thermal_cpufreq_exit(void)
ian@0 173 {
ian@0 174 if (acpi_thermal_cpufreq_is_init)
ian@0 175 cpufreq_unregister_notifier
ian@0 176 (&acpi_thermal_cpufreq_notifier_block,
ian@0 177 CPUFREQ_POLICY_NOTIFIER);
ian@0 178
ian@0 179 acpi_thermal_cpufreq_is_init = 0;
ian@0 180 }
ian@0 181
ian@0 182 #else /* ! CONFIG_CPU_FREQ */
ian@0 183
ian@0 184 static int acpi_thermal_cpufreq_increase(unsigned int cpu)
ian@0 185 {
ian@0 186 return -ENODEV;
ian@0 187 }
ian@0 188 static int acpi_thermal_cpufreq_decrease(unsigned int cpu)
ian@0 189 {
ian@0 190 return -ENODEV;
ian@0 191 }
ian@0 192
ian@0 193 #endif
ian@0 194
ian@0 195 int acpi_processor_set_thermal_limit(acpi_handle handle, int type)
ian@0 196 {
ian@0 197 int result = 0;
ian@0 198 struct acpi_processor *pr = NULL;
ian@0 199 struct acpi_device *device = NULL;
ian@0 200 int tx = 0, max_tx_px = 0;
ian@0 201
ian@0 202
ian@0 203 if ((type < ACPI_PROCESSOR_LIMIT_NONE)
ian@0 204 || (type > ACPI_PROCESSOR_LIMIT_DECREMENT))
ian@0 205 return -EINVAL;
ian@0 206
ian@0 207 result = acpi_bus_get_device(handle, &device);
ian@0 208 if (result)
ian@0 209 return result;
ian@0 210
ian@0 211 pr = (struct acpi_processor *)acpi_driver_data(device);
ian@0 212 if (!pr)
ian@0 213 return -ENODEV;
ian@0 214
ian@0 215 /* Thermal limits are always relative to the current Px/Tx state. */
ian@0 216 if (pr->flags.throttling)
ian@0 217 pr->limit.thermal.tx = pr->throttling.state;
ian@0 218
ian@0 219 /*
ian@0 220 * Our default policy is to only use throttling at the lowest
ian@0 221 * performance state.
ian@0 222 */
ian@0 223
ian@0 224 tx = pr->limit.thermal.tx;
ian@0 225
ian@0 226 switch (type) {
ian@0 227
ian@0 228 case ACPI_PROCESSOR_LIMIT_NONE:
ian@0 229 do {
ian@0 230 result = acpi_thermal_cpufreq_decrease(pr->id);
ian@0 231 } while (!result);
ian@0 232 tx = 0;
ian@0 233 break;
ian@0 234
ian@0 235 case ACPI_PROCESSOR_LIMIT_INCREMENT:
ian@0 236 /* if going up: P-states first, T-states later */
ian@0 237
ian@0 238 result = acpi_thermal_cpufreq_increase(pr->id);
ian@0 239 if (!result)
ian@0 240 goto end;
ian@0 241 else if (result == -ERANGE)
ian@0 242 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
ian@0 243 "At maximum performance state\n"));
ian@0 244
ian@0 245 if (pr->flags.throttling) {
ian@0 246 if (tx == (pr->throttling.state_count - 1))
ian@0 247 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
ian@0 248 "At maximum throttling state\n"));
ian@0 249 else
ian@0 250 tx++;
ian@0 251 }
ian@0 252 break;
ian@0 253
ian@0 254 case ACPI_PROCESSOR_LIMIT_DECREMENT:
ian@0 255 /* if going down: T-states first, P-states later */
ian@0 256
ian@0 257 if (pr->flags.throttling) {
ian@0 258 if (tx == 0) {
ian@0 259 max_tx_px = 1;
ian@0 260 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
ian@0 261 "At minimum throttling state\n"));
ian@0 262 } else {
ian@0 263 tx--;
ian@0 264 goto end;
ian@0 265 }
ian@0 266 }
ian@0 267
ian@0 268 result = acpi_thermal_cpufreq_decrease(pr->id);
ian@0 269 if (result) {
ian@0 270 /*
ian@0 271 * We only could get -ERANGE, 1 or 0.
ian@0 272 * In the first two cases we reached max freq again.
ian@0 273 */
ian@0 274 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
ian@0 275 "At minimum performance state\n"));
ian@0 276 max_tx_px = 1;
ian@0 277 } else
ian@0 278 max_tx_px = 0;
ian@0 279
ian@0 280 break;
ian@0 281 }
ian@0 282
ian@0 283 end:
ian@0 284 if (pr->flags.throttling) {
ian@0 285 pr->limit.thermal.px = 0;
ian@0 286 pr->limit.thermal.tx = tx;
ian@0 287
ian@0 288 result = acpi_processor_apply_limit(pr);
ian@0 289 if (result)
ian@0 290 printk(KERN_ERR PREFIX "Unable to set thermal limit\n");
ian@0 291
ian@0 292 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Thermal limit now (P%d:T%d)\n",
ian@0 293 pr->limit.thermal.px, pr->limit.thermal.tx));
ian@0 294 } else
ian@0 295 result = 0;
ian@0 296 if (max_tx_px)
ian@0 297 return 1;
ian@0 298 else
ian@0 299 return result;
ian@0 300 }
ian@0 301
ian@0 302 int acpi_processor_get_limit_info(struct acpi_processor *pr)
ian@0 303 {
ian@0 304
ian@0 305 if (!pr)
ian@0 306 return -EINVAL;
ian@0 307
ian@0 308 if (pr->flags.throttling)
ian@0 309 pr->flags.limit = 1;
ian@0 310
ian@0 311 return 0;
ian@0 312 }
ian@0 313
ian@0 314 /* /proc interface */
ian@0 315
ian@0 316 static int acpi_processor_limit_seq_show(struct seq_file *seq, void *offset)
ian@0 317 {
ian@0 318 struct acpi_processor *pr = (struct acpi_processor *)seq->private;
ian@0 319
ian@0 320
ian@0 321 if (!pr)
ian@0 322 goto end;
ian@0 323
ian@0 324 if (!pr->flags.limit) {
ian@0 325 seq_puts(seq, "<not supported>\n");
ian@0 326 goto end;
ian@0 327 }
ian@0 328
ian@0 329 seq_printf(seq, "active limit: P%d:T%d\n"
ian@0 330 "user limit: P%d:T%d\n"
ian@0 331 "thermal limit: P%d:T%d\n",
ian@0 332 pr->limit.state.px, pr->limit.state.tx,
ian@0 333 pr->limit.user.px, pr->limit.user.tx,
ian@0 334 pr->limit.thermal.px, pr->limit.thermal.tx);
ian@0 335
ian@0 336 end:
ian@0 337 return 0;
ian@0 338 }
ian@0 339
ian@0 340 static int acpi_processor_limit_open_fs(struct inode *inode, struct file *file)
ian@0 341 {
ian@0 342 return single_open(file, acpi_processor_limit_seq_show,
ian@0 343 PDE(inode)->data);
ian@0 344 }
ian@0 345
ian@0 346 static ssize_t acpi_processor_write_limit(struct file * file,
ian@0 347 const char __user * buffer,
ian@0 348 size_t count, loff_t * data)
ian@0 349 {
ian@0 350 int result = 0;
ian@0 351 struct seq_file *m = (struct seq_file *)file->private_data;
ian@0 352 struct acpi_processor *pr = (struct acpi_processor *)m->private;
ian@0 353 char limit_string[25] = { '\0' };
ian@0 354 int px = 0;
ian@0 355 int tx = 0;
ian@0 356
ian@0 357
ian@0 358 if (!pr || (count > sizeof(limit_string) - 1)) {
ian@0 359 return -EINVAL;
ian@0 360 }
ian@0 361
ian@0 362 if (copy_from_user(limit_string, buffer, count)) {
ian@0 363 return -EFAULT;
ian@0 364 }
ian@0 365
ian@0 366 limit_string[count] = '\0';
ian@0 367
ian@0 368 if (sscanf(limit_string, "%d:%d", &px, &tx) != 2) {
ian@0 369 printk(KERN_ERR PREFIX "Invalid data format\n");
ian@0 370 return -EINVAL;
ian@0 371 }
ian@0 372
ian@0 373 if (pr->flags.throttling) {
ian@0 374 if ((tx < 0) || (tx > (pr->throttling.state_count - 1))) {
ian@0 375 printk(KERN_ERR PREFIX "Invalid tx\n");
ian@0 376 return -EINVAL;
ian@0 377 }
ian@0 378 pr->limit.user.tx = tx;
ian@0 379 }
ian@0 380
ian@0 381 result = acpi_processor_apply_limit(pr);
ian@0 382
ian@0 383 return count;
ian@0 384 }
ian@0 385
ian@0 386 struct file_operations acpi_processor_limit_fops = {
ian@0 387 .open = acpi_processor_limit_open_fs,
ian@0 388 .read = seq_read,
ian@0 389 .write = acpi_processor_write_limit,
ian@0 390 .llseek = seq_lseek,
ian@0 391 .release = single_release,
ian@0 392 };