ia64/linux-2.6.18-xen.hg

view lib/rwsem.c @ 897:329ea0ccb344

balloon: try harder to balloon up under memory pressure.

Currently if the balloon driver is unable to increase the guest's
reservation it assumes the failure was due to reaching its full
allocation, gives up on the ballooning operation and records the limit
it reached as the "hard limit". The driver will not try again until
the target is set again (even to the same value).

However it is possible that ballooning has in fact failed due to
memory pressure in the host and therefore it is desirable to keep
attempting to reach the target in case memory becomes available. The
most likely scenario is that some guests are ballooning down while
others are ballooning up and therefore there is temporary memory
pressure while things stabilise. You would not expect a well behaved
toolstack to ask a domain to balloon to more than its allocation nor
would you expect it to deliberately over-commit memory by setting
balloon targets which exceed the total host memory.

This patch drops the concept of a hard limit and causes the balloon
driver to retry increasing the reservation on a timer in the same
manner as when decreasing the reservation.

Also if we partially succeed in increasing the reservation
(i.e. receive less pages than we asked for) then we may as well keep
those pages rather than returning them to Xen.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 05 14:01:20 2009 +0100 (2009-06-05)
parents 831230e53067
children
line source
1 /* rwsem.c: R/W semaphores: contention handling functions
2 *
3 * Written by David Howells (dhowells@redhat.com).
4 * Derived from arch/i386/kernel/semaphore.c
5 */
6 #include <linux/rwsem.h>
7 #include <linux/sched.h>
8 #include <linux/init.h>
9 #include <linux/module.h>
11 /*
12 * Initialize an rwsem:
13 */
14 void __init_rwsem(struct rw_semaphore *sem, const char *name,
15 struct lock_class_key *key)
16 {
17 #ifdef CONFIG_DEBUG_LOCK_ALLOC
18 /*
19 * Make sure we are not reinitializing a held semaphore:
20 */
21 debug_check_no_locks_freed((void *)sem, sizeof(*sem));
22 lockdep_init_map(&sem->dep_map, name, key);
23 #endif
24 sem->count = RWSEM_UNLOCKED_VALUE;
25 spin_lock_init(&sem->wait_lock);
26 INIT_LIST_HEAD(&sem->wait_list);
27 }
29 EXPORT_SYMBOL(__init_rwsem);
31 struct rwsem_waiter {
32 struct list_head list;
33 struct task_struct *task;
34 unsigned int flags;
35 #define RWSEM_WAITING_FOR_READ 0x00000001
36 #define RWSEM_WAITING_FOR_WRITE 0x00000002
37 };
39 /*
40 * handle the lock release when processes blocked on it that can now run
41 * - if we come here from up_xxxx(), then:
42 * - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed)
43 * - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so)
44 * - there must be someone on the queue
45 * - the spinlock must be held by the caller
46 * - woken process blocks are discarded from the list after having task zeroed
47 * - writers are only woken if downgrading is false
48 */
49 static inline struct rw_semaphore *
50 __rwsem_do_wake(struct rw_semaphore *sem, int downgrading)
51 {
52 struct rwsem_waiter *waiter;
53 struct task_struct *tsk;
54 struct list_head *next;
55 signed long oldcount, woken, loop;
57 if (downgrading)
58 goto dont_wake_writers;
60 /* if we came through an up_xxxx() call, we only only wake someone up
61 * if we can transition the active part of the count from 0 -> 1
62 */
63 try_again:
64 oldcount = rwsem_atomic_update(RWSEM_ACTIVE_BIAS, sem)
65 - RWSEM_ACTIVE_BIAS;
66 if (oldcount & RWSEM_ACTIVE_MASK)
67 goto undo;
69 waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
71 /* try to grant a single write lock if there's a writer at the front
72 * of the queue - note we leave the 'active part' of the count
73 * incremented by 1 and the waiting part incremented by 0x00010000
74 */
75 if (!(waiter->flags & RWSEM_WAITING_FOR_WRITE))
76 goto readers_only;
78 /* We must be careful not to touch 'waiter' after we set ->task = NULL.
79 * It is an allocated on the waiter's stack and may become invalid at
80 * any time after that point (due to a wakeup from another source).
81 */
82 list_del(&waiter->list);
83 tsk = waiter->task;
84 smp_mb();
85 waiter->task = NULL;
86 wake_up_process(tsk);
87 put_task_struct(tsk);
88 goto out;
90 /* don't want to wake any writers */
91 dont_wake_writers:
92 waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
93 if (waiter->flags & RWSEM_WAITING_FOR_WRITE)
94 goto out;
96 /* grant an infinite number of read locks to the readers at the front
97 * of the queue
98 * - note we increment the 'active part' of the count by the number of
99 * readers before waking any processes up
100 */
101 readers_only:
102 woken = 0;
103 do {
104 woken++;
106 if (waiter->list.next == &sem->wait_list)
107 break;
109 waiter = list_entry(waiter->list.next,
110 struct rwsem_waiter, list);
112 } while (waiter->flags & RWSEM_WAITING_FOR_READ);
114 loop = woken;
115 woken *= RWSEM_ACTIVE_BIAS - RWSEM_WAITING_BIAS;
116 if (!downgrading)
117 /* we'd already done one increment earlier */
118 woken -= RWSEM_ACTIVE_BIAS;
120 rwsem_atomic_add(woken, sem);
122 next = sem->wait_list.next;
123 for (; loop > 0; loop--) {
124 waiter = list_entry(next, struct rwsem_waiter, list);
125 next = waiter->list.next;
126 tsk = waiter->task;
127 smp_mb();
128 waiter->task = NULL;
129 wake_up_process(tsk);
130 put_task_struct(tsk);
131 }
133 sem->wait_list.next = next;
134 next->prev = &sem->wait_list;
136 out:
137 return sem;
139 /* undo the change to count, but check for a transition 1->0 */
140 undo:
141 if (rwsem_atomic_update(-RWSEM_ACTIVE_BIAS, sem) != 0)
142 goto out;
143 goto try_again;
144 }
146 /*
147 * wait for a lock to be granted
148 */
149 static inline struct rw_semaphore *
150 rwsem_down_failed_common(struct rw_semaphore *sem,
151 struct rwsem_waiter *waiter, signed long adjustment)
152 {
153 struct task_struct *tsk = current;
154 signed long count;
156 set_task_state(tsk, TASK_UNINTERRUPTIBLE);
158 /* set up my own style of waitqueue */
159 spin_lock_irq(&sem->wait_lock);
160 waiter->task = tsk;
161 get_task_struct(tsk);
163 list_add_tail(&waiter->list, &sem->wait_list);
165 /* we're now waiting on the lock, but no longer actively read-locking */
166 count = rwsem_atomic_update(adjustment, sem);
168 /* if there are no active locks, wake the front queued process(es) up */
169 if (!(count & RWSEM_ACTIVE_MASK))
170 sem = __rwsem_do_wake(sem, 0);
172 spin_unlock_irq(&sem->wait_lock);
174 /* wait to be given the lock */
175 for (;;) {
176 if (!waiter->task)
177 break;
178 schedule();
179 set_task_state(tsk, TASK_UNINTERRUPTIBLE);
180 }
182 tsk->state = TASK_RUNNING;
184 return sem;
185 }
187 /*
188 * wait for the read lock to be granted
189 */
190 struct rw_semaphore fastcall __sched *
191 rwsem_down_read_failed(struct rw_semaphore *sem)
192 {
193 struct rwsem_waiter waiter;
195 waiter.flags = RWSEM_WAITING_FOR_READ;
196 rwsem_down_failed_common(sem, &waiter,
197 RWSEM_WAITING_BIAS - RWSEM_ACTIVE_BIAS);
198 return sem;
199 }
201 /*
202 * wait for the write lock to be granted
203 */
204 struct rw_semaphore fastcall __sched *
205 rwsem_down_write_failed(struct rw_semaphore *sem)
206 {
207 struct rwsem_waiter waiter;
209 waiter.flags = RWSEM_WAITING_FOR_WRITE;
210 rwsem_down_failed_common(sem, &waiter, -RWSEM_ACTIVE_BIAS);
212 return sem;
213 }
215 /*
216 * handle waking up a waiter on the semaphore
217 * - up_read/up_write has decremented the active part of count if we come here
218 */
219 struct rw_semaphore fastcall *rwsem_wake(struct rw_semaphore *sem)
220 {
221 unsigned long flags;
223 spin_lock_irqsave(&sem->wait_lock, flags);
225 /* do nothing if list empty */
226 if (!list_empty(&sem->wait_list))
227 sem = __rwsem_do_wake(sem, 0);
229 spin_unlock_irqrestore(&sem->wait_lock, flags);
231 return sem;
232 }
234 /*
235 * downgrade a write lock into a read lock
236 * - caller incremented waiting part of count and discovered it still negative
237 * - just wake up any readers at the front of the queue
238 */
239 struct rw_semaphore fastcall *rwsem_downgrade_wake(struct rw_semaphore *sem)
240 {
241 unsigned long flags;
243 spin_lock_irqsave(&sem->wait_lock, flags);
245 /* do nothing if list empty */
246 if (!list_empty(&sem->wait_list))
247 sem = __rwsem_do_wake(sem, 1);
249 spin_unlock_irqrestore(&sem->wait_lock, flags);
251 return sem;
252 }
254 EXPORT_SYMBOL(rwsem_down_read_failed);
255 EXPORT_SYMBOL(rwsem_down_write_failed);
256 EXPORT_SYMBOL(rwsem_wake);
257 EXPORT_SYMBOL(rwsem_downgrade_wake);