ia64/linux-2.6.18-xen.hg

view lib/rwsem-spinlock.c @ 897:329ea0ccb344

balloon: try harder to balloon up under memory pressure.

Currently if the balloon driver is unable to increase the guest's
reservation it assumes the failure was due to reaching its full
allocation, gives up on the ballooning operation and records the limit
it reached as the "hard limit". The driver will not try again until
the target is set again (even to the same value).

However it is possible that ballooning has in fact failed due to
memory pressure in the host and therefore it is desirable to keep
attempting to reach the target in case memory becomes available. The
most likely scenario is that some guests are ballooning down while
others are ballooning up and therefore there is temporary memory
pressure while things stabilise. You would not expect a well behaved
toolstack to ask a domain to balloon to more than its allocation nor
would you expect it to deliberately over-commit memory by setting
balloon targets which exceed the total host memory.

This patch drops the concept of a hard limit and causes the balloon
driver to retry increasing the reservation on a timer in the same
manner as when decreasing the reservation.

Also if we partially succeed in increasing the reservation
(i.e. receive less pages than we asked for) then we may as well keep
those pages rather than returning them to Xen.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 05 14:01:20 2009 +0100 (2009-06-05)
parents 831230e53067
children
line source
1 /* rwsem-spinlock.c: R/W semaphores: contention handling functions for
2 * generic spinlock implementation
3 *
4 * Copyright (c) 2001 David Howells (dhowells@redhat.com).
5 * - Derived partially from idea by Andrea Arcangeli <andrea@suse.de>
6 * - Derived also from comments by Linus
7 */
8 #include <linux/rwsem.h>
9 #include <linux/sched.h>
10 #include <linux/module.h>
12 struct rwsem_waiter {
13 struct list_head list;
14 struct task_struct *task;
15 unsigned int flags;
16 #define RWSEM_WAITING_FOR_READ 0x00000001
17 #define RWSEM_WAITING_FOR_WRITE 0x00000002
18 };
20 /*
21 * initialise the semaphore
22 */
23 void __init_rwsem(struct rw_semaphore *sem, const char *name,
24 struct lock_class_key *key)
25 {
26 #ifdef CONFIG_DEBUG_LOCK_ALLOC
27 /*
28 * Make sure we are not reinitializing a held semaphore:
29 */
30 debug_check_no_locks_freed((void *)sem, sizeof(*sem));
31 lockdep_init_map(&sem->dep_map, name, key);
32 #endif
33 sem->activity = 0;
34 spin_lock_init(&sem->wait_lock);
35 INIT_LIST_HEAD(&sem->wait_list);
36 }
38 /*
39 * handle the lock release when processes blocked on it that can now run
40 * - if we come here, then:
41 * - the 'active count' _reached_ zero
42 * - the 'waiting count' is non-zero
43 * - the spinlock must be held by the caller
44 * - woken process blocks are discarded from the list after having task zeroed
45 * - writers are only woken if wakewrite is non-zero
46 */
47 static inline struct rw_semaphore *
48 __rwsem_do_wake(struct rw_semaphore *sem, int wakewrite)
49 {
50 struct rwsem_waiter *waiter;
51 struct task_struct *tsk;
52 int woken;
54 waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
56 if (!wakewrite) {
57 if (waiter->flags & RWSEM_WAITING_FOR_WRITE)
58 goto out;
59 goto dont_wake_writers;
60 }
62 /* if we are allowed to wake writers try to grant a single write lock
63 * if there's a writer at the front of the queue
64 * - we leave the 'waiting count' incremented to signify potential
65 * contention
66 */
67 if (waiter->flags & RWSEM_WAITING_FOR_WRITE) {
68 sem->activity = -1;
69 list_del(&waiter->list);
70 tsk = waiter->task;
71 /* Don't touch waiter after ->task has been NULLed */
72 smp_mb();
73 waiter->task = NULL;
74 wake_up_process(tsk);
75 put_task_struct(tsk);
76 goto out;
77 }
79 /* grant an infinite number of read locks to the front of the queue */
80 dont_wake_writers:
81 woken = 0;
82 while (waiter->flags & RWSEM_WAITING_FOR_READ) {
83 struct list_head *next = waiter->list.next;
85 list_del(&waiter->list);
86 tsk = waiter->task;
87 smp_mb();
88 waiter->task = NULL;
89 wake_up_process(tsk);
90 put_task_struct(tsk);
91 woken++;
92 if (list_empty(&sem->wait_list))
93 break;
94 waiter = list_entry(next, struct rwsem_waiter, list);
95 }
97 sem->activity += woken;
99 out:
100 return sem;
101 }
103 /*
104 * wake a single writer
105 */
106 static inline struct rw_semaphore *
107 __rwsem_wake_one_writer(struct rw_semaphore *sem)
108 {
109 struct rwsem_waiter *waiter;
110 struct task_struct *tsk;
112 sem->activity = -1;
114 waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
115 list_del(&waiter->list);
117 tsk = waiter->task;
118 smp_mb();
119 waiter->task = NULL;
120 wake_up_process(tsk);
121 put_task_struct(tsk);
122 return sem;
123 }
125 /*
126 * get a read lock on the semaphore
127 */
128 void fastcall __sched __down_read(struct rw_semaphore *sem)
129 {
130 struct rwsem_waiter waiter;
131 struct task_struct *tsk;
133 spin_lock_irq(&sem->wait_lock);
135 if (sem->activity >= 0 && list_empty(&sem->wait_list)) {
136 /* granted */
137 sem->activity++;
138 spin_unlock_irq(&sem->wait_lock);
139 goto out;
140 }
142 tsk = current;
143 set_task_state(tsk, TASK_UNINTERRUPTIBLE);
145 /* set up my own style of waitqueue */
146 waiter.task = tsk;
147 waiter.flags = RWSEM_WAITING_FOR_READ;
148 get_task_struct(tsk);
150 list_add_tail(&waiter.list, &sem->wait_list);
152 /* we don't need to touch the semaphore struct anymore */
153 spin_unlock_irq(&sem->wait_lock);
155 /* wait to be given the lock */
156 for (;;) {
157 if (!waiter.task)
158 break;
159 schedule();
160 set_task_state(tsk, TASK_UNINTERRUPTIBLE);
161 }
163 tsk->state = TASK_RUNNING;
164 out:
165 ;
166 }
168 /*
169 * trylock for reading -- returns 1 if successful, 0 if contention
170 */
171 int fastcall __down_read_trylock(struct rw_semaphore *sem)
172 {
173 unsigned long flags;
174 int ret = 0;
177 spin_lock_irqsave(&sem->wait_lock, flags);
179 if (sem->activity >= 0 && list_empty(&sem->wait_list)) {
180 /* granted */
181 sem->activity++;
182 ret = 1;
183 }
185 spin_unlock_irqrestore(&sem->wait_lock, flags);
187 return ret;
188 }
190 /*
191 * get a write lock on the semaphore
192 * - we increment the waiting count anyway to indicate an exclusive lock
193 */
194 void fastcall __sched __down_write_nested(struct rw_semaphore *sem, int subclass)
195 {
196 struct rwsem_waiter waiter;
197 struct task_struct *tsk;
199 spin_lock_irq(&sem->wait_lock);
201 if (sem->activity == 0 && list_empty(&sem->wait_list)) {
202 /* granted */
203 sem->activity = -1;
204 spin_unlock_irq(&sem->wait_lock);
205 goto out;
206 }
208 tsk = current;
209 set_task_state(tsk, TASK_UNINTERRUPTIBLE);
211 /* set up my own style of waitqueue */
212 waiter.task = tsk;
213 waiter.flags = RWSEM_WAITING_FOR_WRITE;
214 get_task_struct(tsk);
216 list_add_tail(&waiter.list, &sem->wait_list);
218 /* we don't need to touch the semaphore struct anymore */
219 spin_unlock_irq(&sem->wait_lock);
221 /* wait to be given the lock */
222 for (;;) {
223 if (!waiter.task)
224 break;
225 schedule();
226 set_task_state(tsk, TASK_UNINTERRUPTIBLE);
227 }
229 tsk->state = TASK_RUNNING;
230 out:
231 ;
232 }
234 void fastcall __sched __down_write(struct rw_semaphore *sem)
235 {
236 __down_write_nested(sem, 0);
237 }
239 /*
240 * trylock for writing -- returns 1 if successful, 0 if contention
241 */
242 int fastcall __down_write_trylock(struct rw_semaphore *sem)
243 {
244 unsigned long flags;
245 int ret = 0;
247 spin_lock_irqsave(&sem->wait_lock, flags);
249 if (sem->activity == 0 && list_empty(&sem->wait_list)) {
250 /* granted */
251 sem->activity = -1;
252 ret = 1;
253 }
255 spin_unlock_irqrestore(&sem->wait_lock, flags);
257 return ret;
258 }
260 /*
261 * release a read lock on the semaphore
262 */
263 void fastcall __up_read(struct rw_semaphore *sem)
264 {
265 unsigned long flags;
267 spin_lock_irqsave(&sem->wait_lock, flags);
269 if (--sem->activity == 0 && !list_empty(&sem->wait_list))
270 sem = __rwsem_wake_one_writer(sem);
272 spin_unlock_irqrestore(&sem->wait_lock, flags);
273 }
275 /*
276 * release a write lock on the semaphore
277 */
278 void fastcall __up_write(struct rw_semaphore *sem)
279 {
280 unsigned long flags;
282 spin_lock_irqsave(&sem->wait_lock, flags);
284 sem->activity = 0;
285 if (!list_empty(&sem->wait_list))
286 sem = __rwsem_do_wake(sem, 1);
288 spin_unlock_irqrestore(&sem->wait_lock, flags);
289 }
291 /*
292 * downgrade a write lock into a read lock
293 * - just wake up any readers at the front of the queue
294 */
295 void fastcall __downgrade_write(struct rw_semaphore *sem)
296 {
297 unsigned long flags;
299 spin_lock_irqsave(&sem->wait_lock, flags);
301 sem->activity = 1;
302 if (!list_empty(&sem->wait_list))
303 sem = __rwsem_do_wake(sem, 0);
305 spin_unlock_irqrestore(&sem->wait_lock, flags);
306 }
308 EXPORT_SYMBOL(__init_rwsem);
309 EXPORT_SYMBOL(__down_read);
310 EXPORT_SYMBOL(__down_read_trylock);
311 EXPORT_SYMBOL(__down_write_nested);
312 EXPORT_SYMBOL(__down_write);
313 EXPORT_SYMBOL(__down_write_trylock);
314 EXPORT_SYMBOL(__up_read);
315 EXPORT_SYMBOL(__up_write);
316 EXPORT_SYMBOL(__downgrade_write);