ia64/linux-2.6.18-xen.hg

view drivers/acpi/processor_extcntl.c @ 897:329ea0ccb344

balloon: try harder to balloon up under memory pressure.

Currently if the balloon driver is unable to increase the guest's
reservation it assumes the failure was due to reaching its full
allocation, gives up on the ballooning operation and records the limit
it reached as the "hard limit". The driver will not try again until
the target is set again (even to the same value).

However it is possible that ballooning has in fact failed due to
memory pressure in the host and therefore it is desirable to keep
attempting to reach the target in case memory becomes available. The
most likely scenario is that some guests are ballooning down while
others are ballooning up and therefore there is temporary memory
pressure while things stabilise. You would not expect a well behaved
toolstack to ask a domain to balloon to more than its allocation nor
would you expect it to deliberately over-commit memory by setting
balloon targets which exceed the total host memory.

This patch drops the concept of a hard limit and causes the balloon
driver to retry increasing the reservation on a timer in the same
manner as when decreasing the reservation.

Also if we partially succeed in increasing the reservation
(i.e. receive less pages than we asked for) then we may as well keep
those pages rather than returning them to Xen.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 05 14:01:20 2009 +0100 (2009-06-05)
parents e86f9e05144a
children
line source
1 /*
2 * processor_extcntl.c - channel to external control logic
3 *
4 * Copyright (C) 2008, Intel corporation
5 *
6 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or (at
11 * your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License along
19 * with this program; if not, write to the Free Software Foundation, Inc.,
20 * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
21 *
22 */
24 #include <linux/kernel.h>
25 #include <linux/init.h>
26 #include <linux/types.h>
27 #include <linux/acpi.h>
28 #include <linux/pm.h>
29 #include <linux/cpu.h>
31 #include <acpi/processor.h>
33 #define ACPI_PROCESSOR_COMPONENT 0x01000000
34 #define ACPI_PROCESSOR_CLASS "processor"
35 #define ACPI_PROCESSOR_DRIVER_NAME "ACPI Processor Driver"
36 #define _COMPONENT ACPI_PROCESSOR_COMPONENT
37 ACPI_MODULE_NAME("acpi_processor")
39 static int processor_extcntl_parse_csd(struct acpi_processor *pr);
40 static int processor_extcntl_get_performance(struct acpi_processor *pr);
41 /*
42 * External processor control logic may register with its own set of
43 * ops to get ACPI related notification. One example is like VMM.
44 */
45 const struct processor_extcntl_ops *processor_extcntl_ops;
46 EXPORT_SYMBOL(processor_extcntl_ops);
48 static int processor_notify_smm(void)
49 {
50 acpi_status status;
51 static int is_done = 0;
53 /* only need successfully notify BIOS once */
54 /* avoid double notification which may lead to unexpected result */
55 if (is_done)
56 return 0;
58 /* Can't write pstate_cnt to smi_cmd if either value is zero */
59 if ((!acpi_fadt.smi_cmd) || (!acpi_fadt.pstate_cnt)) {
60 ACPI_DEBUG_PRINT((ACPI_DB_INFO,"No SMI port or pstate_cnt\n"));
61 return 0;
62 }
64 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
65 "Writing pstate_cnt [0x%x] to smi_cmd [0x%x]\n",
66 acpi_fadt.pstate_cnt, acpi_fadt.smi_cmd));
68 /* FADT v1 doesn't support pstate_cnt, many BIOS vendors use
69 * it anyway, so we need to support it... */
70 if (acpi_fadt_is_v1) {
71 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
72 "Using v1.0 FADT reserved value for pstate_cnt\n"));
73 }
75 status = acpi_os_write_port(acpi_fadt.smi_cmd,
76 (u32) acpi_fadt.pstate_cnt, 8);
77 if (ACPI_FAILURE(status))
78 return status;
80 is_done = 1;
82 return 0;
83 }
85 int processor_notify_external(struct acpi_processor *pr, int event, int type)
86 {
87 int ret = -EINVAL;
89 if (!processor_cntl_external())
90 return -EINVAL;
92 switch (event) {
93 case PROCESSOR_PM_INIT:
94 case PROCESSOR_PM_CHANGE:
95 if ((type >= PM_TYPE_MAX) ||
96 !processor_extcntl_ops->pm_ops[type])
97 break;
99 ret = processor_extcntl_ops->pm_ops[type](pr, event);
100 break;
101 case PROCESSOR_HOTPLUG:
102 if (processor_extcntl_ops->hotplug)
103 ret = processor_extcntl_ops->hotplug(pr, type);
104 break;
105 default:
106 printk(KERN_ERR "Unsupport processor events %d.\n", event);
107 break;
108 }
110 return ret;
111 }
113 /*
114 * External control logic can decide to grab full or part of physical
115 * processor control bits. Take a VMM for example, physical processors
116 * are owned by VMM and thus existence information like hotplug is
117 * always required to be notified to VMM. Similar is processor idle
118 * state which is also necessarily controlled by VMM. But for other
119 * control bits like performance/throttle states, VMM may choose to
120 * control or not upon its own policy.
121 */
122 void processor_extcntl_init(void)
123 {
124 if (!processor_extcntl_ops)
125 arch_acpi_processor_init_extcntl(&processor_extcntl_ops);
126 }
128 /*
129 * This is called from ACPI processor init, and targeted to hold
130 * some tricky housekeeping jobs to satisfy external control model.
131 * For example, we may put dependency parse stub here for idle
132 * and performance state. Those information may be not available
133 * if splitting from dom0 control logic like cpufreq driver.
134 */
135 int processor_extcntl_prepare(struct acpi_processor *pr)
136 {
137 /* parse cstate dependency information */
138 if (processor_pm_external())
139 processor_extcntl_parse_csd(pr);
141 /* Initialize performance states */
142 if (processor_pmperf_external())
143 processor_extcntl_get_performance(pr);
145 return 0;
146 }
148 /*
149 * Currently no _CSD is implemented which is why existing ACPI code
150 * doesn't parse _CSD at all. But to keep interface complete with
151 * external control logic, we put a placeholder here for future
152 * compatibility.
153 */
154 static int processor_extcntl_parse_csd(struct acpi_processor *pr)
155 {
156 int i;
158 for (i = 0; i < pr->power.count; i++) {
159 if (!pr->power.states[i].valid)
160 continue;
162 /* No dependency by default */
163 pr->power.states[i].domain_info = NULL;
164 pr->power.states[i].csd_count = 0;
165 }
167 return 0;
168 }
170 /*
171 * Existing ACPI module does parse performance states at some point,
172 * when acpi-cpufreq driver is loaded which however is something
173 * we'd like to disable to avoid confliction with external control
174 * logic. So we have to collect raw performance information here
175 * when ACPI processor object is found and started.
176 */
177 static int processor_extcntl_get_performance(struct acpi_processor *pr)
178 {
179 int ret;
180 struct acpi_processor_performance *perf;
181 struct acpi_psd_package *pdomain;
183 if (pr->performance)
184 return -EBUSY;
186 perf = kzalloc(sizeof(struct acpi_processor_performance), GFP_KERNEL);
187 if (!perf)
188 return -ENOMEM;
190 pr->performance = perf;
191 /* Get basic performance state information */
192 ret = acpi_processor_get_performance_info(pr);
193 if (ret < 0)
194 goto err_out;
196 /*
197 * Well, here we need retrieve performance dependency information
198 * from _PSD object. The reason why existing interface is not used
199 * is due to the reason that existing interface sticks to Linux cpu
200 * id to construct some bitmap, however we want to split ACPI
201 * processor objects from Linux cpu id logic. For example, even
202 * when Linux is configured as UP, we still want to parse all ACPI
203 * processor objects to external logic. In this case, it's preferred
204 * to use ACPI ID instead.
205 */
206 pdomain = &pr->performance->domain_info;
207 pdomain->num_processors = 0;
208 ret = acpi_processor_get_psd(pr);
209 if (ret < 0) {
210 /*
211 * _PSD is optional - assume no coordination if absent (or
212 * broken), matching native kernels' behavior.
213 */
214 pdomain->num_entries = ACPI_PSD_REV0_ENTRIES;
215 pdomain->revision = ACPI_PSD_REV0_REVISION;
216 pdomain->domain = pr->acpi_id;
217 pdomain->coord_type = DOMAIN_COORD_TYPE_SW_ALL;
218 pdomain->num_processors = 1;
219 }
221 /* Some sanity check */
222 if ((pdomain->revision != ACPI_PSD_REV0_REVISION) ||
223 (pdomain->num_entries != ACPI_PSD_REV0_ENTRIES) ||
224 ((pdomain->coord_type != DOMAIN_COORD_TYPE_SW_ALL) &&
225 (pdomain->coord_type != DOMAIN_COORD_TYPE_SW_ANY) &&
226 (pdomain->coord_type != DOMAIN_COORD_TYPE_HW_ALL))) {
227 ret = -EINVAL;
228 goto err_out;
229 }
231 /* Last step is to notify BIOS that external logic exists */
232 processor_notify_smm();
234 processor_notify_external(pr, PROCESSOR_PM_INIT, PM_TYPE_PERF);
236 return 0;
237 err_out:
238 pr->performance = NULL;
239 kfree(perf);
240 return ret;
241 }