ia64/linux-2.6.18-xen.hg

view arch/sparc64/kernel/us2e_cpufreq.c @ 897:329ea0ccb344

balloon: try harder to balloon up under memory pressure.

Currently if the balloon driver is unable to increase the guest's
reservation it assumes the failure was due to reaching its full
allocation, gives up on the ballooning operation and records the limit
it reached as the "hard limit". The driver will not try again until
the target is set again (even to the same value).

However it is possible that ballooning has in fact failed due to
memory pressure in the host and therefore it is desirable to keep
attempting to reach the target in case memory becomes available. The
most likely scenario is that some guests are ballooning down while
others are ballooning up and therefore there is temporary memory
pressure while things stabilise. You would not expect a well behaved
toolstack to ask a domain to balloon to more than its allocation nor
would you expect it to deliberately over-commit memory by setting
balloon targets which exceed the total host memory.

This patch drops the concept of a hard limit and causes the balloon
driver to retry increasing the reservation on a timer in the same
manner as when decreasing the reservation.

Also if we partially succeed in increasing the reservation
(i.e. receive less pages than we asked for) then we may as well keep
those pages rather than returning them to Xen.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 05 14:01:20 2009 +0100 (2009-06-05)
parents 831230e53067
children
line source
1 /* us2e_cpufreq.c: UltraSPARC-IIe cpu frequency support
2 *
3 * Copyright (C) 2003 David S. Miller (davem@redhat.com)
4 *
5 * Many thanks to Dominik Brodowski for fixing up the cpufreq
6 * infrastructure in order to make this driver easier to implement.
7 */
9 #include <linux/kernel.h>
10 #include <linux/module.h>
11 #include <linux/sched.h>
12 #include <linux/smp.h>
13 #include <linux/cpufreq.h>
14 #include <linux/threads.h>
15 #include <linux/slab.h>
16 #include <linux/delay.h>
17 #include <linux/init.h>
19 #include <asm/asi.h>
20 #include <asm/timer.h>
22 static struct cpufreq_driver *cpufreq_us2e_driver;
24 struct us2e_freq_percpu_info {
25 struct cpufreq_frequency_table table[6];
26 };
28 /* Indexed by cpu number. */
29 static struct us2e_freq_percpu_info *us2e_freq_table;
31 #define HBIRD_MEM_CNTL0_ADDR 0x1fe0000f010UL
32 #define HBIRD_ESTAR_MODE_ADDR 0x1fe0000f080UL
34 /* UltraSPARC-IIe has five dividers: 1, 2, 4, 6, and 8. These are controlled
35 * in the ESTAR mode control register.
36 */
37 #define ESTAR_MODE_DIV_1 0x0000000000000000UL
38 #define ESTAR_MODE_DIV_2 0x0000000000000001UL
39 #define ESTAR_MODE_DIV_4 0x0000000000000003UL
40 #define ESTAR_MODE_DIV_6 0x0000000000000002UL
41 #define ESTAR_MODE_DIV_8 0x0000000000000004UL
42 #define ESTAR_MODE_DIV_MASK 0x0000000000000007UL
44 #define MCTRL0_SREFRESH_ENAB 0x0000000000010000UL
45 #define MCTRL0_REFR_COUNT_MASK 0x0000000000007f00UL
46 #define MCTRL0_REFR_COUNT_SHIFT 8
47 #define MCTRL0_REFR_INTERVAL 7800
48 #define MCTRL0_REFR_CLKS_P_CNT 64
50 static unsigned long read_hbreg(unsigned long addr)
51 {
52 unsigned long ret;
54 __asm__ __volatile__("ldxa [%1] %2, %0"
55 : "=&r" (ret)
56 : "r" (addr), "i" (ASI_PHYS_BYPASS_EC_E));
57 return ret;
58 }
60 static void write_hbreg(unsigned long addr, unsigned long val)
61 {
62 __asm__ __volatile__("stxa %0, [%1] %2\n\t"
63 "membar #Sync"
64 : /* no outputs */
65 : "r" (val), "r" (addr), "i" (ASI_PHYS_BYPASS_EC_E)
66 : "memory");
67 if (addr == HBIRD_ESTAR_MODE_ADDR) {
68 /* Need to wait 16 clock cycles for the PLL to lock. */
69 udelay(1);
70 }
71 }
73 static void self_refresh_ctl(int enable)
74 {
75 unsigned long mctrl = read_hbreg(HBIRD_MEM_CNTL0_ADDR);
77 if (enable)
78 mctrl |= MCTRL0_SREFRESH_ENAB;
79 else
80 mctrl &= ~MCTRL0_SREFRESH_ENAB;
81 write_hbreg(HBIRD_MEM_CNTL0_ADDR, mctrl);
82 (void) read_hbreg(HBIRD_MEM_CNTL0_ADDR);
83 }
85 static void frob_mem_refresh(int cpu_slowing_down,
86 unsigned long clock_tick,
87 unsigned long old_divisor, unsigned long divisor)
88 {
89 unsigned long old_refr_count, refr_count, mctrl;
91 refr_count = (clock_tick * MCTRL0_REFR_INTERVAL);
92 refr_count /= (MCTRL0_REFR_CLKS_P_CNT * divisor * 1000000000UL);
94 mctrl = read_hbreg(HBIRD_MEM_CNTL0_ADDR);
95 old_refr_count = (mctrl & MCTRL0_REFR_COUNT_MASK)
96 >> MCTRL0_REFR_COUNT_SHIFT;
98 mctrl &= ~MCTRL0_REFR_COUNT_MASK;
99 mctrl |= refr_count << MCTRL0_REFR_COUNT_SHIFT;
100 write_hbreg(HBIRD_MEM_CNTL0_ADDR, mctrl);
101 mctrl = read_hbreg(HBIRD_MEM_CNTL0_ADDR);
103 if (cpu_slowing_down && !(mctrl & MCTRL0_SREFRESH_ENAB)) {
104 unsigned long usecs;
106 /* We have to wait for both refresh counts (old
107 * and new) to go to zero.
108 */
109 usecs = (MCTRL0_REFR_CLKS_P_CNT *
110 (refr_count + old_refr_count) *
111 1000000UL *
112 old_divisor) / clock_tick;
113 udelay(usecs + 1UL);
114 }
115 }
117 static void us2e_transition(unsigned long estar, unsigned long new_bits,
118 unsigned long clock_tick,
119 unsigned long old_divisor, unsigned long divisor)
120 {
121 unsigned long flags;
123 local_irq_save(flags);
125 estar &= ~ESTAR_MODE_DIV_MASK;
127 /* This is based upon the state transition diagram in the IIe manual. */
128 if (old_divisor == 2 && divisor == 1) {
129 self_refresh_ctl(0);
130 write_hbreg(HBIRD_ESTAR_MODE_ADDR, estar | new_bits);
131 frob_mem_refresh(0, clock_tick, old_divisor, divisor);
132 } else if (old_divisor == 1 && divisor == 2) {
133 frob_mem_refresh(1, clock_tick, old_divisor, divisor);
134 write_hbreg(HBIRD_ESTAR_MODE_ADDR, estar | new_bits);
135 self_refresh_ctl(1);
136 } else if (old_divisor == 1 && divisor > 2) {
137 us2e_transition(estar, ESTAR_MODE_DIV_2, clock_tick,
138 1, 2);
139 us2e_transition(estar, new_bits, clock_tick,
140 2, divisor);
141 } else if (old_divisor > 2 && divisor == 1) {
142 us2e_transition(estar, ESTAR_MODE_DIV_2, clock_tick,
143 old_divisor, 2);
144 us2e_transition(estar, new_bits, clock_tick,
145 2, divisor);
146 } else if (old_divisor < divisor) {
147 frob_mem_refresh(0, clock_tick, old_divisor, divisor);
148 write_hbreg(HBIRD_ESTAR_MODE_ADDR, estar | new_bits);
149 } else if (old_divisor > divisor) {
150 write_hbreg(HBIRD_ESTAR_MODE_ADDR, estar | new_bits);
151 frob_mem_refresh(1, clock_tick, old_divisor, divisor);
152 } else {
153 BUG();
154 }
156 local_irq_restore(flags);
157 }
159 static unsigned long index_to_estar_mode(unsigned int index)
160 {
161 switch (index) {
162 case 0:
163 return ESTAR_MODE_DIV_1;
165 case 1:
166 return ESTAR_MODE_DIV_2;
168 case 2:
169 return ESTAR_MODE_DIV_4;
171 case 3:
172 return ESTAR_MODE_DIV_6;
174 case 4:
175 return ESTAR_MODE_DIV_8;
177 default:
178 BUG();
179 };
180 }
182 static unsigned long index_to_divisor(unsigned int index)
183 {
184 switch (index) {
185 case 0:
186 return 1;
188 case 1:
189 return 2;
191 case 2:
192 return 4;
194 case 3:
195 return 6;
197 case 4:
198 return 8;
200 default:
201 BUG();
202 };
203 }
205 static unsigned long estar_to_divisor(unsigned long estar)
206 {
207 unsigned long ret;
209 switch (estar & ESTAR_MODE_DIV_MASK) {
210 case ESTAR_MODE_DIV_1:
211 ret = 1;
212 break;
213 case ESTAR_MODE_DIV_2:
214 ret = 2;
215 break;
216 case ESTAR_MODE_DIV_4:
217 ret = 4;
218 break;
219 case ESTAR_MODE_DIV_6:
220 ret = 6;
221 break;
222 case ESTAR_MODE_DIV_8:
223 ret = 8;
224 break;
225 default:
226 BUG();
227 };
229 return ret;
230 }
232 static unsigned int us2e_freq_get(unsigned int cpu)
233 {
234 cpumask_t cpus_allowed;
235 unsigned long clock_tick, estar;
237 if (!cpu_online(cpu))
238 return 0;
240 cpus_allowed = current->cpus_allowed;
241 set_cpus_allowed(current, cpumask_of_cpu(cpu));
243 clock_tick = sparc64_get_clock_tick(cpu) / 1000;
244 estar = read_hbreg(HBIRD_ESTAR_MODE_ADDR);
246 set_cpus_allowed(current, cpus_allowed);
248 return clock_tick / estar_to_divisor(estar);
249 }
251 static void us2e_set_cpu_divider_index(unsigned int cpu, unsigned int index)
252 {
253 unsigned long new_bits, new_freq;
254 unsigned long clock_tick, divisor, old_divisor, estar;
255 cpumask_t cpus_allowed;
256 struct cpufreq_freqs freqs;
258 if (!cpu_online(cpu))
259 return;
261 cpus_allowed = current->cpus_allowed;
262 set_cpus_allowed(current, cpumask_of_cpu(cpu));
264 new_freq = clock_tick = sparc64_get_clock_tick(cpu) / 1000;
265 new_bits = index_to_estar_mode(index);
266 divisor = index_to_divisor(index);
267 new_freq /= divisor;
269 estar = read_hbreg(HBIRD_ESTAR_MODE_ADDR);
271 old_divisor = estar_to_divisor(estar);
273 freqs.old = clock_tick / old_divisor;
274 freqs.new = new_freq;
275 freqs.cpu = cpu;
276 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
278 if (old_divisor != divisor)
279 us2e_transition(estar, new_bits, clock_tick * 1000,
280 old_divisor, divisor);
282 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
284 set_cpus_allowed(current, cpus_allowed);
285 }
287 static int us2e_freq_target(struct cpufreq_policy *policy,
288 unsigned int target_freq,
289 unsigned int relation)
290 {
291 unsigned int new_index = 0;
293 if (cpufreq_frequency_table_target(policy,
294 &us2e_freq_table[policy->cpu].table[0],
295 target_freq, relation, &new_index))
296 return -EINVAL;
298 us2e_set_cpu_divider_index(policy->cpu, new_index);
300 return 0;
301 }
303 static int us2e_freq_verify(struct cpufreq_policy *policy)
304 {
305 return cpufreq_frequency_table_verify(policy,
306 &us2e_freq_table[policy->cpu].table[0]);
307 }
309 static int __init us2e_freq_cpu_init(struct cpufreq_policy *policy)
310 {
311 unsigned int cpu = policy->cpu;
312 unsigned long clock_tick = sparc64_get_clock_tick(cpu) / 1000;
313 struct cpufreq_frequency_table *table =
314 &us2e_freq_table[cpu].table[0];
316 table[0].index = 0;
317 table[0].frequency = clock_tick / 1;
318 table[1].index = 1;
319 table[1].frequency = clock_tick / 2;
320 table[2].index = 2;
321 table[2].frequency = clock_tick / 4;
322 table[2].index = 3;
323 table[2].frequency = clock_tick / 6;
324 table[2].index = 4;
325 table[2].frequency = clock_tick / 8;
326 table[2].index = 5;
327 table[3].frequency = CPUFREQ_TABLE_END;
329 policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
330 policy->cpuinfo.transition_latency = 0;
331 policy->cur = clock_tick;
333 return cpufreq_frequency_table_cpuinfo(policy, table);
334 }
336 static int us2e_freq_cpu_exit(struct cpufreq_policy *policy)
337 {
338 if (cpufreq_us2e_driver)
339 us2e_set_cpu_divider_index(policy->cpu, 0);
341 return 0;
342 }
344 static int __init us2e_freq_init(void)
345 {
346 unsigned long manuf, impl, ver;
347 int ret;
349 if (tlb_type != spitfire)
350 return -ENODEV;
352 __asm__("rdpr %%ver, %0" : "=r" (ver));
353 manuf = ((ver >> 48) & 0xffff);
354 impl = ((ver >> 32) & 0xffff);
356 if (manuf == 0x17 && impl == 0x13) {
357 struct cpufreq_driver *driver;
359 ret = -ENOMEM;
360 driver = kzalloc(sizeof(struct cpufreq_driver), GFP_KERNEL);
361 if (!driver)
362 goto err_out;
364 us2e_freq_table = kzalloc(
365 (NR_CPUS * sizeof(struct us2e_freq_percpu_info)),
366 GFP_KERNEL);
367 if (!us2e_freq_table)
368 goto err_out;
370 driver->init = us2e_freq_cpu_init;
371 driver->verify = us2e_freq_verify;
372 driver->target = us2e_freq_target;
373 driver->get = us2e_freq_get;
374 driver->exit = us2e_freq_cpu_exit;
375 driver->owner = THIS_MODULE,
376 strcpy(driver->name, "UltraSPARC-IIe");
378 cpufreq_us2e_driver = driver;
379 ret = cpufreq_register_driver(driver);
380 if (ret)
381 goto err_out;
383 return 0;
385 err_out:
386 if (driver) {
387 kfree(driver);
388 cpufreq_us2e_driver = NULL;
389 }
390 kfree(us2e_freq_table);
391 us2e_freq_table = NULL;
392 return ret;
393 }
395 return -ENODEV;
396 }
398 static void __exit us2e_freq_exit(void)
399 {
400 if (cpufreq_us2e_driver) {
401 cpufreq_unregister_driver(cpufreq_us2e_driver);
402 kfree(cpufreq_us2e_driver);
403 cpufreq_us2e_driver = NULL;
404 kfree(us2e_freq_table);
405 us2e_freq_table = NULL;
406 }
407 }
409 MODULE_AUTHOR("David S. Miller <davem@redhat.com>");
410 MODULE_DESCRIPTION("cpufreq driver for UltraSPARC-IIe");
411 MODULE_LICENSE("GPL");
413 module_init(us2e_freq_init);
414 module_exit(us2e_freq_exit);