ia64/linux-2.6.18-xen.hg

view lib/prio_tree.c @ 897:329ea0ccb344

balloon: try harder to balloon up under memory pressure.

Currently if the balloon driver is unable to increase the guest's
reservation it assumes the failure was due to reaching its full
allocation, gives up on the ballooning operation and records the limit
it reached as the "hard limit". The driver will not try again until
the target is set again (even to the same value).

However it is possible that ballooning has in fact failed due to
memory pressure in the host and therefore it is desirable to keep
attempting to reach the target in case memory becomes available. The
most likely scenario is that some guests are ballooning down while
others are ballooning up and therefore there is temporary memory
pressure while things stabilise. You would not expect a well behaved
toolstack to ask a domain to balloon to more than its allocation nor
would you expect it to deliberately over-commit memory by setting
balloon targets which exceed the total host memory.

This patch drops the concept of a hard limit and causes the balloon
driver to retry increasing the reservation on a timer in the same
manner as when decreasing the reservation.

Also if we partially succeed in increasing the reservation
(i.e. receive less pages than we asked for) then we may as well keep
those pages rather than returning them to Xen.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 05 14:01:20 2009 +0100 (2009-06-05)
parents 831230e53067
children
line source
1 /*
2 * lib/prio_tree.c - priority search tree
3 *
4 * Copyright (C) 2004, Rajesh Venkatasubramanian <vrajesh@umich.edu>
5 *
6 * This file is released under the GPL v2.
7 *
8 * Based on the radix priority search tree proposed by Edward M. McCreight
9 * SIAM Journal of Computing, vol. 14, no.2, pages 257-276, May 1985
10 *
11 * 02Feb2004 Initial version
12 */
14 #include <linux/init.h>
15 #include <linux/mm.h>
16 #include <linux/prio_tree.h>
18 /*
19 * A clever mix of heap and radix trees forms a radix priority search tree (PST)
20 * which is useful for storing intervals, e.g, we can consider a vma as a closed
21 * interval of file pages [offset_begin, offset_end], and store all vmas that
22 * map a file in a PST. Then, using the PST, we can answer a stabbing query,
23 * i.e., selecting a set of stored intervals (vmas) that overlap with (map) a
24 * given input interval X (a set of consecutive file pages), in "O(log n + m)"
25 * time where 'log n' is the height of the PST, and 'm' is the number of stored
26 * intervals (vmas) that overlap (map) with the input interval X (the set of
27 * consecutive file pages).
28 *
29 * In our implementation, we store closed intervals of the form [radix_index,
30 * heap_index]. We assume that always radix_index <= heap_index. McCreight's PST
31 * is designed for storing intervals with unique radix indices, i.e., each
32 * interval have different radix_index. However, this limitation can be easily
33 * overcome by using the size, i.e., heap_index - radix_index, as part of the
34 * index, so we index the tree using [(radix_index,size), heap_index].
35 *
36 * When the above-mentioned indexing scheme is used, theoretically, in a 32 bit
37 * machine, the maximum height of a PST can be 64. We can use a balanced version
38 * of the priority search tree to optimize the tree height, but the balanced
39 * tree proposed by McCreight is too complex and memory-hungry for our purpose.
40 */
42 /*
43 * The following macros are used for implementing prio_tree for i_mmap
44 */
46 #define RADIX_INDEX(vma) ((vma)->vm_pgoff)
47 #define VMA_SIZE(vma) (((vma)->vm_end - (vma)->vm_start) >> PAGE_SHIFT)
48 /* avoid overflow */
49 #define HEAP_INDEX(vma) ((vma)->vm_pgoff + (VMA_SIZE(vma) - 1))
52 static void get_index(const struct prio_tree_root *root,
53 const struct prio_tree_node *node,
54 unsigned long *radix, unsigned long *heap)
55 {
56 if (root->raw) {
57 struct vm_area_struct *vma = prio_tree_entry(
58 node, struct vm_area_struct, shared.prio_tree_node);
60 *radix = RADIX_INDEX(vma);
61 *heap = HEAP_INDEX(vma);
62 }
63 else {
64 *radix = node->start;
65 *heap = node->last;
66 }
67 }
69 static unsigned long index_bits_to_maxindex[BITS_PER_LONG];
71 void __init prio_tree_init(void)
72 {
73 unsigned int i;
75 for (i = 0; i < ARRAY_SIZE(index_bits_to_maxindex) - 1; i++)
76 index_bits_to_maxindex[i] = (1UL << (i + 1)) - 1;
77 index_bits_to_maxindex[ARRAY_SIZE(index_bits_to_maxindex) - 1] = ~0UL;
78 }
80 /*
81 * Maximum heap_index that can be stored in a PST with index_bits bits
82 */
83 static inline unsigned long prio_tree_maxindex(unsigned int bits)
84 {
85 return index_bits_to_maxindex[bits - 1];
86 }
88 /*
89 * Extend a priority search tree so that it can store a node with heap_index
90 * max_heap_index. In the worst case, this algorithm takes O((log n)^2).
91 * However, this function is used rarely and the common case performance is
92 * not bad.
93 */
94 static struct prio_tree_node *prio_tree_expand(struct prio_tree_root *root,
95 struct prio_tree_node *node, unsigned long max_heap_index)
96 {
97 struct prio_tree_node *first = NULL, *prev, *last = NULL;
99 if (max_heap_index > prio_tree_maxindex(root->index_bits))
100 root->index_bits++;
102 while (max_heap_index > prio_tree_maxindex(root->index_bits)) {
103 root->index_bits++;
105 if (prio_tree_empty(root))
106 continue;
108 if (first == NULL) {
109 first = root->prio_tree_node;
110 prio_tree_remove(root, root->prio_tree_node);
111 INIT_PRIO_TREE_NODE(first);
112 last = first;
113 } else {
114 prev = last;
115 last = root->prio_tree_node;
116 prio_tree_remove(root, root->prio_tree_node);
117 INIT_PRIO_TREE_NODE(last);
118 prev->left = last;
119 last->parent = prev;
120 }
121 }
123 INIT_PRIO_TREE_NODE(node);
125 if (first) {
126 node->left = first;
127 first->parent = node;
128 } else
129 last = node;
131 if (!prio_tree_empty(root)) {
132 last->left = root->prio_tree_node;
133 last->left->parent = last;
134 }
136 root->prio_tree_node = node;
137 return node;
138 }
140 /*
141 * Replace a prio_tree_node with a new node and return the old node
142 */
143 struct prio_tree_node *prio_tree_replace(struct prio_tree_root *root,
144 struct prio_tree_node *old, struct prio_tree_node *node)
145 {
146 INIT_PRIO_TREE_NODE(node);
148 if (prio_tree_root(old)) {
149 BUG_ON(root->prio_tree_node != old);
150 /*
151 * We can reduce root->index_bits here. However, it is complex
152 * and does not help much to improve performance (IMO).
153 */
154 node->parent = node;
155 root->prio_tree_node = node;
156 } else {
157 node->parent = old->parent;
158 if (old->parent->left == old)
159 old->parent->left = node;
160 else
161 old->parent->right = node;
162 }
164 if (!prio_tree_left_empty(old)) {
165 node->left = old->left;
166 old->left->parent = node;
167 }
169 if (!prio_tree_right_empty(old)) {
170 node->right = old->right;
171 old->right->parent = node;
172 }
174 return old;
175 }
177 /*
178 * Insert a prio_tree_node @node into a radix priority search tree @root. The
179 * algorithm typically takes O(log n) time where 'log n' is the number of bits
180 * required to represent the maximum heap_index. In the worst case, the algo
181 * can take O((log n)^2) - check prio_tree_expand.
182 *
183 * If a prior node with same radix_index and heap_index is already found in
184 * the tree, then returns the address of the prior node. Otherwise, inserts
185 * @node into the tree and returns @node.
186 */
187 struct prio_tree_node *prio_tree_insert(struct prio_tree_root *root,
188 struct prio_tree_node *node)
189 {
190 struct prio_tree_node *cur, *res = node;
191 unsigned long radix_index, heap_index;
192 unsigned long r_index, h_index, index, mask;
193 int size_flag = 0;
195 get_index(root, node, &radix_index, &heap_index);
197 if (prio_tree_empty(root) ||
198 heap_index > prio_tree_maxindex(root->index_bits))
199 return prio_tree_expand(root, node, heap_index);
201 cur = root->prio_tree_node;
202 mask = 1UL << (root->index_bits - 1);
204 while (mask) {
205 get_index(root, cur, &r_index, &h_index);
207 if (r_index == radix_index && h_index == heap_index)
208 return cur;
210 if (h_index < heap_index ||
211 (h_index == heap_index && r_index > radix_index)) {
212 struct prio_tree_node *tmp = node;
213 node = prio_tree_replace(root, cur, node);
214 cur = tmp;
215 /* swap indices */
216 index = r_index;
217 r_index = radix_index;
218 radix_index = index;
219 index = h_index;
220 h_index = heap_index;
221 heap_index = index;
222 }
224 if (size_flag)
225 index = heap_index - radix_index;
226 else
227 index = radix_index;
229 if (index & mask) {
230 if (prio_tree_right_empty(cur)) {
231 INIT_PRIO_TREE_NODE(node);
232 cur->right = node;
233 node->parent = cur;
234 return res;
235 } else
236 cur = cur->right;
237 } else {
238 if (prio_tree_left_empty(cur)) {
239 INIT_PRIO_TREE_NODE(node);
240 cur->left = node;
241 node->parent = cur;
242 return res;
243 } else
244 cur = cur->left;
245 }
247 mask >>= 1;
249 if (!mask) {
250 mask = 1UL << (BITS_PER_LONG - 1);
251 size_flag = 1;
252 }
253 }
254 /* Should not reach here */
255 BUG();
256 return NULL;
257 }
259 /*
260 * Remove a prio_tree_node @node from a radix priority search tree @root. The
261 * algorithm takes O(log n) time where 'log n' is the number of bits required
262 * to represent the maximum heap_index.
263 */
264 void prio_tree_remove(struct prio_tree_root *root, struct prio_tree_node *node)
265 {
266 struct prio_tree_node *cur;
267 unsigned long r_index, h_index_right, h_index_left;
269 cur = node;
271 while (!prio_tree_left_empty(cur) || !prio_tree_right_empty(cur)) {
272 if (!prio_tree_left_empty(cur))
273 get_index(root, cur->left, &r_index, &h_index_left);
274 else {
275 cur = cur->right;
276 continue;
277 }
279 if (!prio_tree_right_empty(cur))
280 get_index(root, cur->right, &r_index, &h_index_right);
281 else {
282 cur = cur->left;
283 continue;
284 }
286 /* both h_index_left and h_index_right cannot be 0 */
287 if (h_index_left >= h_index_right)
288 cur = cur->left;
289 else
290 cur = cur->right;
291 }
293 if (prio_tree_root(cur)) {
294 BUG_ON(root->prio_tree_node != cur);
295 __INIT_PRIO_TREE_ROOT(root, root->raw);
296 return;
297 }
299 if (cur->parent->right == cur)
300 cur->parent->right = cur->parent;
301 else
302 cur->parent->left = cur->parent;
304 while (cur != node)
305 cur = prio_tree_replace(root, cur->parent, cur);
306 }
308 /*
309 * Following functions help to enumerate all prio_tree_nodes in the tree that
310 * overlap with the input interval X [radix_index, heap_index]. The enumeration
311 * takes O(log n + m) time where 'log n' is the height of the tree (which is
312 * proportional to # of bits required to represent the maximum heap_index) and
313 * 'm' is the number of prio_tree_nodes that overlap the interval X.
314 */
316 static struct prio_tree_node *prio_tree_left(struct prio_tree_iter *iter,
317 unsigned long *r_index, unsigned long *h_index)
318 {
319 if (prio_tree_left_empty(iter->cur))
320 return NULL;
322 get_index(iter->root, iter->cur->left, r_index, h_index);
324 if (iter->r_index <= *h_index) {
325 iter->cur = iter->cur->left;
326 iter->mask >>= 1;
327 if (iter->mask) {
328 if (iter->size_level)
329 iter->size_level++;
330 } else {
331 if (iter->size_level) {
332 BUG_ON(!prio_tree_left_empty(iter->cur));
333 BUG_ON(!prio_tree_right_empty(iter->cur));
334 iter->size_level++;
335 iter->mask = ULONG_MAX;
336 } else {
337 iter->size_level = 1;
338 iter->mask = 1UL << (BITS_PER_LONG - 1);
339 }
340 }
341 return iter->cur;
342 }
344 return NULL;
345 }
347 static struct prio_tree_node *prio_tree_right(struct prio_tree_iter *iter,
348 unsigned long *r_index, unsigned long *h_index)
349 {
350 unsigned long value;
352 if (prio_tree_right_empty(iter->cur))
353 return NULL;
355 if (iter->size_level)
356 value = iter->value;
357 else
358 value = iter->value | iter->mask;
360 if (iter->h_index < value)
361 return NULL;
363 get_index(iter->root, iter->cur->right, r_index, h_index);
365 if (iter->r_index <= *h_index) {
366 iter->cur = iter->cur->right;
367 iter->mask >>= 1;
368 iter->value = value;
369 if (iter->mask) {
370 if (iter->size_level)
371 iter->size_level++;
372 } else {
373 if (iter->size_level) {
374 BUG_ON(!prio_tree_left_empty(iter->cur));
375 BUG_ON(!prio_tree_right_empty(iter->cur));
376 iter->size_level++;
377 iter->mask = ULONG_MAX;
378 } else {
379 iter->size_level = 1;
380 iter->mask = 1UL << (BITS_PER_LONG - 1);
381 }
382 }
383 return iter->cur;
384 }
386 return NULL;
387 }
389 static struct prio_tree_node *prio_tree_parent(struct prio_tree_iter *iter)
390 {
391 iter->cur = iter->cur->parent;
392 if (iter->mask == ULONG_MAX)
393 iter->mask = 1UL;
394 else if (iter->size_level == 1)
395 iter->mask = 1UL;
396 else
397 iter->mask <<= 1;
398 if (iter->size_level)
399 iter->size_level--;
400 if (!iter->size_level && (iter->value & iter->mask))
401 iter->value ^= iter->mask;
402 return iter->cur;
403 }
405 static inline int overlap(struct prio_tree_iter *iter,
406 unsigned long r_index, unsigned long h_index)
407 {
408 return iter->h_index >= r_index && iter->r_index <= h_index;
409 }
411 /*
412 * prio_tree_first:
413 *
414 * Get the first prio_tree_node that overlaps with the interval [radix_index,
415 * heap_index]. Note that always radix_index <= heap_index. We do a pre-order
416 * traversal of the tree.
417 */
418 static struct prio_tree_node *prio_tree_first(struct prio_tree_iter *iter)
419 {
420 struct prio_tree_root *root;
421 unsigned long r_index, h_index;
423 INIT_PRIO_TREE_ITER(iter);
425 root = iter->root;
426 if (prio_tree_empty(root))
427 return NULL;
429 get_index(root, root->prio_tree_node, &r_index, &h_index);
431 if (iter->r_index > h_index)
432 return NULL;
434 iter->mask = 1UL << (root->index_bits - 1);
435 iter->cur = root->prio_tree_node;
437 while (1) {
438 if (overlap(iter, r_index, h_index))
439 return iter->cur;
441 if (prio_tree_left(iter, &r_index, &h_index))
442 continue;
444 if (prio_tree_right(iter, &r_index, &h_index))
445 continue;
447 break;
448 }
449 return NULL;
450 }
452 /*
453 * prio_tree_next:
454 *
455 * Get the next prio_tree_node that overlaps with the input interval in iter
456 */
457 struct prio_tree_node *prio_tree_next(struct prio_tree_iter *iter)
458 {
459 unsigned long r_index, h_index;
461 if (iter->cur == NULL)
462 return prio_tree_first(iter);
464 repeat:
465 while (prio_tree_left(iter, &r_index, &h_index))
466 if (overlap(iter, r_index, h_index))
467 return iter->cur;
469 while (!prio_tree_right(iter, &r_index, &h_index)) {
470 while (!prio_tree_root(iter->cur) &&
471 iter->cur->parent->right == iter->cur)
472 prio_tree_parent(iter);
474 if (prio_tree_root(iter->cur))
475 return NULL;
477 prio_tree_parent(iter);
478 }
480 if (overlap(iter, r_index, h_index))
481 return iter->cur;
483 goto repeat;
484 }