ia64/linux-2.6.18-xen.hg

view arch/sparc/lib/umul.S @ 897:329ea0ccb344

balloon: try harder to balloon up under memory pressure.

Currently if the balloon driver is unable to increase the guest's
reservation it assumes the failure was due to reaching its full
allocation, gives up on the ballooning operation and records the limit
it reached as the "hard limit". The driver will not try again until
the target is set again (even to the same value).

However it is possible that ballooning has in fact failed due to
memory pressure in the host and therefore it is desirable to keep
attempting to reach the target in case memory becomes available. The
most likely scenario is that some guests are ballooning down while
others are ballooning up and therefore there is temporary memory
pressure while things stabilise. You would not expect a well behaved
toolstack to ask a domain to balloon to more than its allocation nor
would you expect it to deliberately over-commit memory by setting
balloon targets which exceed the total host memory.

This patch drops the concept of a hard limit and causes the balloon
driver to retry increasing the reservation on a timer in the same
manner as when decreasing the reservation.

Also if we partially succeed in increasing the reservation
(i.e. receive less pages than we asked for) then we may as well keep
those pages rather than returning them to Xen.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 05 14:01:20 2009 +0100 (2009-06-05)
parents 831230e53067
children
line source
1 /* $Id: umul.S,v 1.4 1996/09/30 02:22:39 davem Exp $
2 * umul.S: This routine was taken from glibc-1.09 and is covered
3 * by the GNU Library General Public License Version 2.
4 */
7 /*
8 * Unsigned multiply. Returns %o0 * %o1 in %o1%o0 (i.e., %o1 holds the
9 * upper 32 bits of the 64-bit product).
10 *
11 * This code optimizes short (less than 13-bit) multiplies. Short
12 * multiplies require 25 instruction cycles, and long ones require
13 * 45 instruction cycles.
14 *
15 * On return, overflow has occurred (%o1 is not zero) if and only if
16 * the Z condition code is clear, allowing, e.g., the following:
17 *
18 * call .umul
19 * nop
20 * bnz overflow (or tnz)
21 */
23 .globl .umul
24 .globl _Umul
25 .umul:
26 _Umul: /* needed for export */
27 or %o0, %o1, %o4
28 mov %o0, %y ! multiplier -> Y
30 andncc %o4, 0xfff, %g0 ! test bits 12..31 of *both* args
31 be Lmul_shortway ! if zero, can do it the short way
32 andcc %g0, %g0, %o4 ! zero the partial product and clear N and V
34 /*
35 * Long multiply. 32 steps, followed by a final shift step.
36 */
37 mulscc %o4, %o1, %o4 ! 1
38 mulscc %o4, %o1, %o4 ! 2
39 mulscc %o4, %o1, %o4 ! 3
40 mulscc %o4, %o1, %o4 ! 4
41 mulscc %o4, %o1, %o4 ! 5
42 mulscc %o4, %o1, %o4 ! 6
43 mulscc %o4, %o1, %o4 ! 7
44 mulscc %o4, %o1, %o4 ! 8
45 mulscc %o4, %o1, %o4 ! 9
46 mulscc %o4, %o1, %o4 ! 10
47 mulscc %o4, %o1, %o4 ! 11
48 mulscc %o4, %o1, %o4 ! 12
49 mulscc %o4, %o1, %o4 ! 13
50 mulscc %o4, %o1, %o4 ! 14
51 mulscc %o4, %o1, %o4 ! 15
52 mulscc %o4, %o1, %o4 ! 16
53 mulscc %o4, %o1, %o4 ! 17
54 mulscc %o4, %o1, %o4 ! 18
55 mulscc %o4, %o1, %o4 ! 19
56 mulscc %o4, %o1, %o4 ! 20
57 mulscc %o4, %o1, %o4 ! 21
58 mulscc %o4, %o1, %o4 ! 22
59 mulscc %o4, %o1, %o4 ! 23
60 mulscc %o4, %o1, %o4 ! 24
61 mulscc %o4, %o1, %o4 ! 25
62 mulscc %o4, %o1, %o4 ! 26
63 mulscc %o4, %o1, %o4 ! 27
64 mulscc %o4, %o1, %o4 ! 28
65 mulscc %o4, %o1, %o4 ! 29
66 mulscc %o4, %o1, %o4 ! 30
67 mulscc %o4, %o1, %o4 ! 31
68 mulscc %o4, %o1, %o4 ! 32
69 mulscc %o4, %g0, %o4 ! final shift
72 /*
73 * Normally, with the shift-and-add approach, if both numbers are
74 * positive you get the correct result. With 32-bit two's-complement
75 * numbers, -x is represented as
76 *
77 * x 32
78 * ( 2 - ------ ) mod 2 * 2
79 * 32
80 * 2
81 *
82 * (the `mod 2' subtracts 1 from 1.bbbb). To avoid lots of 2^32s,
83 * we can treat this as if the radix point were just to the left
84 * of the sign bit (multiply by 2^32), and get
85 *
86 * -x = (2 - x) mod 2
87 *
88 * Then, ignoring the `mod 2's for convenience:
89 *
90 * x * y = xy
91 * -x * y = 2y - xy
92 * x * -y = 2x - xy
93 * -x * -y = 4 - 2x - 2y + xy
94 *
95 * For signed multiplies, we subtract (x << 32) from the partial
96 * product to fix this problem for negative multipliers (see mul.s).
97 * Because of the way the shift into the partial product is calculated
98 * (N xor V), this term is automatically removed for the multiplicand,
99 * so we don't have to adjust.
100 *
101 * But for unsigned multiplies, the high order bit wasn't a sign bit,
102 * and the correction is wrong. So for unsigned multiplies where the
103 * high order bit is one, we end up with xy - (y << 32). To fix it
104 * we add y << 32.
105 */
106 #if 0
107 tst %o1
108 bl,a 1f ! if %o1 < 0 (high order bit = 1),
109 add %o4, %o0, %o4 ! %o4 += %o0 (add y to upper half)
111 1:
112 rd %y, %o0 ! get lower half of product
113 retl
114 addcc %o4, %g0, %o1 ! put upper half in place and set Z for %o1==0
115 #else
116 /* Faster code from tege@sics.se. */
117 sra %o1, 31, %o2 ! make mask from sign bit
118 and %o0, %o2, %o2 ! %o2 = 0 or %o0, depending on sign of %o1
119 rd %y, %o0 ! get lower half of product
120 retl
121 addcc %o4, %o2, %o1 ! add compensation and put upper half in place
122 #endif
124 Lmul_shortway:
125 /*
126 * Short multiply. 12 steps, followed by a final shift step.
127 * The resulting bits are off by 12 and (32-12) = 20 bit positions,
128 * but there is no problem with %o0 being negative (unlike above),
129 * and overflow is impossible (the answer is at most 24 bits long).
130 */
131 mulscc %o4, %o1, %o4 ! 1
132 mulscc %o4, %o1, %o4 ! 2
133 mulscc %o4, %o1, %o4 ! 3
134 mulscc %o4, %o1, %o4 ! 4
135 mulscc %o4, %o1, %o4 ! 5
136 mulscc %o4, %o1, %o4 ! 6
137 mulscc %o4, %o1, %o4 ! 7
138 mulscc %o4, %o1, %o4 ! 8
139 mulscc %o4, %o1, %o4 ! 9
140 mulscc %o4, %o1, %o4 ! 10
141 mulscc %o4, %o1, %o4 ! 11
142 mulscc %o4, %o1, %o4 ! 12
143 mulscc %o4, %g0, %o4 ! final shift
145 /*
146 * %o4 has 20 of the bits that should be in the result; %y has
147 * the bottom 12 (as %y's top 12). That is:
148 *
149 * %o4 %y
150 * +----------------+----------------+
151 * | -12- | -20- | -12- | -20- |
152 * +------(---------+------)---------+
153 * -----result-----
154 *
155 * The 12 bits of %o4 left of the `result' area are all zero;
156 * in fact, all top 20 bits of %o4 are zero.
157 */
159 rd %y, %o5
160 sll %o4, 12, %o0 ! shift middle bits left 12
161 srl %o5, 20, %o5 ! shift low bits right 20
162 or %o5, %o0, %o0
163 retl
164 addcc %g0, %g0, %o1 ! %o1 = zero, and set Z
166 .globl .umul_patch
167 .umul_patch:
168 umul %o0, %o1, %o0
169 retl
170 rd %y, %o1
171 nop