ia64/linux-2.6.18-xen.hg

view arch/mips/pci/ops-au1000.c @ 897:329ea0ccb344

balloon: try harder to balloon up under memory pressure.

Currently if the balloon driver is unable to increase the guest's
reservation it assumes the failure was due to reaching its full
allocation, gives up on the ballooning operation and records the limit
it reached as the "hard limit". The driver will not try again until
the target is set again (even to the same value).

However it is possible that ballooning has in fact failed due to
memory pressure in the host and therefore it is desirable to keep
attempting to reach the target in case memory becomes available. The
most likely scenario is that some guests are ballooning down while
others are ballooning up and therefore there is temporary memory
pressure while things stabilise. You would not expect a well behaved
toolstack to ask a domain to balloon to more than its allocation nor
would you expect it to deliberately over-commit memory by setting
balloon targets which exceed the total host memory.

This patch drops the concept of a hard limit and causes the balloon
driver to retry increasing the reservation on a timer in the same
manner as when decreasing the reservation.

Also if we partially succeed in increasing the reservation
(i.e. receive less pages than we asked for) then we may as well keep
those pages rather than returning them to Xen.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 05 14:01:20 2009 +0100 (2009-06-05)
parents 831230e53067
children
line source
1 /*
2 * BRIEF MODULE DESCRIPTION
3 * Alchemy/AMD Au1x00 pci support.
4 *
5 * Copyright 2001,2002,2003 MontaVista Software Inc.
6 * Author: MontaVista Software, Inc.
7 * ppopov@mvista.com or source@mvista.com
8 *
9 * Support for all devices (greater than 16) added by David Gathright.
10 *
11 * This program is free software; you can redistribute it and/or modify it
12 * under the terms of the GNU General Public License as published by the
13 * Free Software Foundation; either version 2 of the License, or (at your
14 * option) any later version.
15 *
16 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
17 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
18 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
19 * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
22 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
23 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 *
27 * You should have received a copy of the GNU General Public License along
28 * with this program; if not, write to the Free Software Foundation, Inc.,
29 * 675 Mass Ave, Cambridge, MA 02139, USA.
30 */
31 #include <linux/types.h>
32 #include <linux/pci.h>
33 #include <linux/kernel.h>
34 #include <linux/init.h>
35 #include <linux/vmalloc.h>
37 #include <asm/mach-au1x00/au1000.h>
39 #undef DEBUG
40 #ifdef DEBUG
41 #define DBG(x...) printk(x)
42 #else
43 #define DBG(x...)
44 #endif
46 #define PCI_ACCESS_READ 0
47 #define PCI_ACCESS_WRITE 1
50 int (*board_pci_idsel)(unsigned int devsel, int assert);
52 void mod_wired_entry(int entry, unsigned long entrylo0,
53 unsigned long entrylo1, unsigned long entryhi,
54 unsigned long pagemask)
55 {
56 unsigned long old_pagemask;
57 unsigned long old_ctx;
59 /* Save old context and create impossible VPN2 value */
60 old_ctx = read_c0_entryhi() & 0xff;
61 old_pagemask = read_c0_pagemask();
62 write_c0_index(entry);
63 write_c0_pagemask(pagemask);
64 write_c0_entryhi(entryhi);
65 write_c0_entrylo0(entrylo0);
66 write_c0_entrylo1(entrylo1);
67 tlb_write_indexed();
68 write_c0_entryhi(old_ctx);
69 write_c0_pagemask(old_pagemask);
70 }
72 struct vm_struct *pci_cfg_vm;
73 static int pci_cfg_wired_entry;
74 static int first_cfg = 1;
75 unsigned long last_entryLo0, last_entryLo1;
77 static int config_access(unsigned char access_type, struct pci_bus *bus,
78 unsigned int dev_fn, unsigned char where,
79 u32 * data)
80 {
81 #if defined( CONFIG_SOC_AU1500 ) || defined( CONFIG_SOC_AU1550 )
82 unsigned int device = PCI_SLOT(dev_fn);
83 unsigned int function = PCI_FUNC(dev_fn);
84 unsigned long offset, status;
85 unsigned long cfg_base;
86 unsigned long flags;
87 int error = PCIBIOS_SUCCESSFUL;
88 unsigned long entryLo0, entryLo1;
90 if (device > 19) {
91 *data = 0xffffffff;
92 return -1;
93 }
95 local_irq_save(flags);
96 au_writel(((0x2000 << 16) | (au_readl(Au1500_PCI_STATCMD) & 0xffff)),
97 Au1500_PCI_STATCMD);
98 au_sync_udelay(1);
100 /*
101 * We can't ioremap the entire pci config space because it's
102 * too large. Nor can we call ioremap dynamically because some
103 * device drivers use the pci config routines from within
104 * interrupt handlers and that becomes a problem in get_vm_area().
105 * We use one wired tlb to handle all config accesses for all
106 * busses. To improve performance, if the current device
107 * is the same as the last device accessed, we don't touch the
108 * tlb.
109 */
110 if (first_cfg) {
111 /* reserve a wired entry for pci config accesses */
112 first_cfg = 0;
113 pci_cfg_vm = get_vm_area(0x2000, 0);
114 if (!pci_cfg_vm)
115 panic (KERN_ERR "PCI unable to get vm area\n");
116 pci_cfg_wired_entry = read_c0_wired();
117 add_wired_entry(0, 0, (unsigned long)pci_cfg_vm->addr, PM_4K);
118 last_entryLo0 = last_entryLo1 = 0xffffffff;
119 }
121 /* Allow board vendors to implement their own off-chip idsel.
122 * If it doesn't succeed, may as well bail out at this point.
123 */
124 if (board_pci_idsel) {
125 if (board_pci_idsel(device, 1) == 0) {
126 *data = 0xffffffff;
127 local_irq_restore(flags);
128 return -1;
129 }
130 }
132 /* setup the config window */
133 if (bus->number == 0) {
134 cfg_base = ((1<<device)<<11);
135 } else {
136 cfg_base = 0x80000000 | (bus->number<<16) | (device<<11);
137 }
139 /* setup the lower bits of the 36 bit address */
140 offset = (function << 8) | (where & ~0x3);
141 /* pick up any address that falls below the page mask */
142 offset |= cfg_base & ~PAGE_MASK;
144 /* page boundary */
145 cfg_base = cfg_base & PAGE_MASK;
147 entryLo0 = (6 << 26) | (cfg_base >> 6) | (2 << 3) | 7;
148 entryLo1 = (6 << 26) | (cfg_base >> 6) | (0x1000 >> 6) | (2 << 3) | 7;
150 if ((entryLo0 != last_entryLo0) || (entryLo1 != last_entryLo1)) {
151 mod_wired_entry(pci_cfg_wired_entry, entryLo0, entryLo1,
152 (unsigned long)pci_cfg_vm->addr, PM_4K);
153 last_entryLo0 = entryLo0;
154 last_entryLo1 = entryLo1;
155 }
157 if (access_type == PCI_ACCESS_WRITE) {
158 au_writel(*data, (int)(pci_cfg_vm->addr + offset));
159 } else {
160 *data = au_readl((int)(pci_cfg_vm->addr + offset));
161 }
162 au_sync_udelay(2);
164 DBG("cfg_access %d bus->number %d dev %d at %x *data %x conf %x\n",
165 access_type, bus->number, device, where, *data, offset);
167 /* check master abort */
168 status = au_readl(Au1500_PCI_STATCMD);
170 if (status & (1<<29)) {
171 *data = 0xffffffff;
172 error = -1;
173 DBG("Au1x Master Abort\n");
174 } else if ((status >> 28) & 0xf) {
175 DBG("PCI ERR detected: status %x\n", status);
176 *data = 0xffffffff;
177 error = -1;
178 }
180 /* Take away the idsel.
181 */
182 if (board_pci_idsel) {
183 (void)board_pci_idsel(device, 0);
184 }
186 local_irq_restore(flags);
187 return error;
188 #endif
189 }
191 static int read_config_byte(struct pci_bus *bus, unsigned int devfn,
192 int where, u8 * val)
193 {
194 u32 data;
195 int ret;
197 ret = config_access(PCI_ACCESS_READ, bus, devfn, where, &data);
198 if (where & 1)
199 data >>= 8;
200 if (where & 2)
201 data >>= 16;
202 *val = data & 0xff;
203 return ret;
204 }
207 static int read_config_word(struct pci_bus *bus, unsigned int devfn,
208 int where, u16 * val)
209 {
210 u32 data;
211 int ret;
213 ret = config_access(PCI_ACCESS_READ, bus, devfn, where, &data);
214 if (where & 2)
215 data >>= 16;
216 *val = data & 0xffff;
217 return ret;
218 }
220 static int read_config_dword(struct pci_bus *bus, unsigned int devfn,
221 int where, u32 * val)
222 {
223 int ret;
225 ret = config_access(PCI_ACCESS_READ, bus, devfn, where, val);
226 return ret;
227 }
229 static int
230 write_config_byte(struct pci_bus *bus, unsigned int devfn, int where,
231 u8 val)
232 {
233 u32 data = 0;
235 if (config_access(PCI_ACCESS_READ, bus, devfn, where, &data))
236 return -1;
238 data = (data & ~(0xff << ((where & 3) << 3))) |
239 (val << ((where & 3) << 3));
241 if (config_access(PCI_ACCESS_WRITE, bus, devfn, where, &data))
242 return -1;
244 return PCIBIOS_SUCCESSFUL;
245 }
247 static int
248 write_config_word(struct pci_bus *bus, unsigned int devfn, int where,
249 u16 val)
250 {
251 u32 data = 0;
253 if (config_access(PCI_ACCESS_READ, bus, devfn, where, &data))
254 return -1;
256 data = (data & ~(0xffff << ((where & 3) << 3))) |
257 (val << ((where & 3) << 3));
259 if (config_access(PCI_ACCESS_WRITE, bus, devfn, where, &data))
260 return -1;
263 return PCIBIOS_SUCCESSFUL;
264 }
266 static int
267 write_config_dword(struct pci_bus *bus, unsigned int devfn, int where,
268 u32 val)
269 {
270 if (config_access(PCI_ACCESS_WRITE, bus, devfn, where, &val))
271 return -1;
273 return PCIBIOS_SUCCESSFUL;
274 }
276 static int config_read(struct pci_bus *bus, unsigned int devfn,
277 int where, int size, u32 * val)
278 {
279 switch (size) {
280 case 1: {
281 u8 _val;
282 int rc = read_config_byte(bus, devfn, where, &_val);
283 *val = _val;
284 return rc;
285 }
286 case 2: {
287 u16 _val;
288 int rc = read_config_word(bus, devfn, where, &_val);
289 *val = _val;
290 return rc;
291 }
292 default:
293 return read_config_dword(bus, devfn, where, val);
294 }
295 }
297 static int config_write(struct pci_bus *bus, unsigned int devfn,
298 int where, int size, u32 val)
299 {
300 switch (size) {
301 case 1:
302 return write_config_byte(bus, devfn, where, (u8) val);
303 case 2:
304 return write_config_word(bus, devfn, where, (u16) val);
305 default:
306 return write_config_dword(bus, devfn, where, val);
307 }
308 }
311 struct pci_ops au1x_pci_ops = {
312 config_read,
313 config_write
314 };