ia64/linux-2.6.18-xen.hg

view lib/kernel_lock.c @ 897:329ea0ccb344

balloon: try harder to balloon up under memory pressure.

Currently if the balloon driver is unable to increase the guest's
reservation it assumes the failure was due to reaching its full
allocation, gives up on the ballooning operation and records the limit
it reached as the "hard limit". The driver will not try again until
the target is set again (even to the same value).

However it is possible that ballooning has in fact failed due to
memory pressure in the host and therefore it is desirable to keep
attempting to reach the target in case memory becomes available. The
most likely scenario is that some guests are ballooning down while
others are ballooning up and therefore there is temporary memory
pressure while things stabilise. You would not expect a well behaved
toolstack to ask a domain to balloon to more than its allocation nor
would you expect it to deliberately over-commit memory by setting
balloon targets which exceed the total host memory.

This patch drops the concept of a hard limit and causes the balloon
driver to retry increasing the reservation on a timer in the same
manner as when decreasing the reservation.

Also if we partially succeed in increasing the reservation
(i.e. receive less pages than we asked for) then we may as well keep
those pages rather than returning them to Xen.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 05 14:01:20 2009 +0100 (2009-06-05)
parents 831230e53067
children
line source
1 /*
2 * lib/kernel_lock.c
3 *
4 * This is the traditional BKL - big kernel lock. Largely
5 * relegated to obsolescense, but used by various less
6 * important (or lazy) subsystems.
7 */
8 #include <linux/smp_lock.h>
9 #include <linux/module.h>
10 #include <linux/kallsyms.h>
12 #ifdef CONFIG_PREEMPT_BKL
13 /*
14 * The 'big kernel semaphore'
15 *
16 * This mutex is taken and released recursively by lock_kernel()
17 * and unlock_kernel(). It is transparently dropped and reacquired
18 * over schedule(). It is used to protect legacy code that hasn't
19 * been migrated to a proper locking design yet.
20 *
21 * Note: code locked by this semaphore will only be serialized against
22 * other code using the same locking facility. The code guarantees that
23 * the task remains on the same CPU.
24 *
25 * Don't use in new code.
26 */
27 static DECLARE_MUTEX(kernel_sem);
29 /*
30 * Re-acquire the kernel semaphore.
31 *
32 * This function is called with preemption off.
33 *
34 * We are executing in schedule() so the code must be extremely careful
35 * about recursion, both due to the down() and due to the enabling of
36 * preemption. schedule() will re-check the preemption flag after
37 * reacquiring the semaphore.
38 */
39 int __lockfunc __reacquire_kernel_lock(void)
40 {
41 struct task_struct *task = current;
42 int saved_lock_depth = task->lock_depth;
44 BUG_ON(saved_lock_depth < 0);
46 task->lock_depth = -1;
47 preempt_enable_no_resched();
49 down(&kernel_sem);
51 preempt_disable();
52 task->lock_depth = saved_lock_depth;
54 return 0;
55 }
57 void __lockfunc __release_kernel_lock(void)
58 {
59 up(&kernel_sem);
60 }
62 /*
63 * Getting the big kernel semaphore.
64 */
65 void __lockfunc lock_kernel(void)
66 {
67 struct task_struct *task = current;
68 int depth = task->lock_depth + 1;
70 if (likely(!depth))
71 /*
72 * No recursion worries - we set up lock_depth _after_
73 */
74 down(&kernel_sem);
76 task->lock_depth = depth;
77 }
79 void __lockfunc unlock_kernel(void)
80 {
81 struct task_struct *task = current;
83 BUG_ON(task->lock_depth < 0);
85 if (likely(--task->lock_depth < 0))
86 up(&kernel_sem);
87 }
89 #else
91 /*
92 * The 'big kernel lock'
93 *
94 * This spinlock is taken and released recursively by lock_kernel()
95 * and unlock_kernel(). It is transparently dropped and reacquired
96 * over schedule(). It is used to protect legacy code that hasn't
97 * been migrated to a proper locking design yet.
98 *
99 * Don't use in new code.
100 */
101 static __cacheline_aligned_in_smp DEFINE_SPINLOCK(kernel_flag);
104 /*
105 * Acquire/release the underlying lock from the scheduler.
106 *
107 * This is called with preemption disabled, and should
108 * return an error value if it cannot get the lock and
109 * TIF_NEED_RESCHED gets set.
110 *
111 * If it successfully gets the lock, it should increment
112 * the preemption count like any spinlock does.
113 *
114 * (This works on UP too - _raw_spin_trylock will never
115 * return false in that case)
116 */
117 int __lockfunc __reacquire_kernel_lock(void)
118 {
119 while (!_raw_spin_trylock(&kernel_flag)) {
120 if (test_thread_flag(TIF_NEED_RESCHED))
121 return -EAGAIN;
122 cpu_relax();
123 }
124 preempt_disable();
125 return 0;
126 }
128 void __lockfunc __release_kernel_lock(void)
129 {
130 _raw_spin_unlock(&kernel_flag);
131 preempt_enable_no_resched();
132 }
134 /*
135 * These are the BKL spinlocks - we try to be polite about preemption.
136 * If SMP is not on (ie UP preemption), this all goes away because the
137 * _raw_spin_trylock() will always succeed.
138 */
139 #ifdef CONFIG_PREEMPT
140 static inline void __lock_kernel(void)
141 {
142 preempt_disable();
143 if (unlikely(!_raw_spin_trylock(&kernel_flag))) {
144 /*
145 * If preemption was disabled even before this
146 * was called, there's nothing we can be polite
147 * about - just spin.
148 */
149 if (preempt_count() > 1) {
150 _raw_spin_lock(&kernel_flag);
151 return;
152 }
154 /*
155 * Otherwise, let's wait for the kernel lock
156 * with preemption enabled..
157 */
158 do {
159 preempt_enable();
160 while (spin_is_locked(&kernel_flag))
161 cpu_relax();
162 preempt_disable();
163 } while (!_raw_spin_trylock(&kernel_flag));
164 }
165 }
167 #else
169 /*
170 * Non-preemption case - just get the spinlock
171 */
172 static inline void __lock_kernel(void)
173 {
174 _raw_spin_lock(&kernel_flag);
175 }
176 #endif
178 static inline void __unlock_kernel(void)
179 {
180 /*
181 * the BKL is not covered by lockdep, so we open-code the
182 * unlocking sequence (and thus avoid the dep-chain ops):
183 */
184 _raw_spin_unlock(&kernel_flag);
185 preempt_enable();
186 }
188 /*
189 * Getting the big kernel lock.
190 *
191 * This cannot happen asynchronously, so we only need to
192 * worry about other CPU's.
193 */
194 void __lockfunc lock_kernel(void)
195 {
196 int depth = current->lock_depth+1;
197 if (likely(!depth))
198 __lock_kernel();
199 current->lock_depth = depth;
200 }
202 void __lockfunc unlock_kernel(void)
203 {
204 BUG_ON(current->lock_depth < 0);
205 if (likely(--current->lock_depth < 0))
206 __unlock_kernel();
207 }
209 #endif
211 EXPORT_SYMBOL(lock_kernel);
212 EXPORT_SYMBOL(unlock_kernel);