ia64/linux-2.6.18-xen.hg

view drivers/pci/proc.c @ 897:329ea0ccb344

balloon: try harder to balloon up under memory pressure.

Currently if the balloon driver is unable to increase the guest's
reservation it assumes the failure was due to reaching its full
allocation, gives up on the ballooning operation and records the limit
it reached as the "hard limit". The driver will not try again until
the target is set again (even to the same value).

However it is possible that ballooning has in fact failed due to
memory pressure in the host and therefore it is desirable to keep
attempting to reach the target in case memory becomes available. The
most likely scenario is that some guests are ballooning down while
others are ballooning up and therefore there is temporary memory
pressure while things stabilise. You would not expect a well behaved
toolstack to ask a domain to balloon to more than its allocation nor
would you expect it to deliberately over-commit memory by setting
balloon targets which exceed the total host memory.

This patch drops the concept of a hard limit and causes the balloon
driver to retry increasing the reservation on a timer in the same
manner as when decreasing the reservation.

Also if we partially succeed in increasing the reservation
(i.e. receive less pages than we asked for) then we may as well keep
those pages rather than returning them to Xen.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 05 14:01:20 2009 +0100 (2009-06-05)
parents f77ac4979fae
children
line source
1 /*
2 * $Id: proc.c,v 1.13 1998/05/12 07:36:07 mj Exp $
3 *
4 * Procfs interface for the PCI bus.
5 *
6 * Copyright (c) 1997--1999 Martin Mares <mj@ucw.cz>
7 */
9 #include <linux/init.h>
10 #include <linux/pci.h>
11 #include <linux/module.h>
12 #include <linux/proc_fs.h>
13 #include <linux/seq_file.h>
14 #include <linux/smp_lock.h>
16 #include <asm/uaccess.h>
17 #include <asm/byteorder.h>
18 #include "pci.h"
20 static int proc_initialized; /* = 0 */
22 static loff_t
23 proc_bus_pci_lseek(struct file *file, loff_t off, int whence)
24 {
25 loff_t new = -1;
26 struct inode *inode = file->f_dentry->d_inode;
28 mutex_lock(&inode->i_mutex);
29 switch (whence) {
30 case 0:
31 new = off;
32 break;
33 case 1:
34 new = file->f_pos + off;
35 break;
36 case 2:
37 new = inode->i_size + off;
38 break;
39 }
40 if (new < 0 || new > inode->i_size)
41 new = -EINVAL;
42 else
43 file->f_pos = new;
44 mutex_unlock(&inode->i_mutex);
45 return new;
46 }
48 static ssize_t
49 proc_bus_pci_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos)
50 {
51 const struct inode *ino = file->f_dentry->d_inode;
52 const struct proc_dir_entry *dp = PDE(ino);
53 struct pci_dev *dev = dp->data;
54 unsigned int pos = *ppos;
55 unsigned int cnt, size;
57 /*
58 * Normal users can read only the standardized portion of the
59 * configuration space as several chips lock up when trying to read
60 * undefined locations (think of Intel PIIX4 as a typical example).
61 */
63 if (capable(CAP_SYS_ADMIN))
64 size = dev->cfg_size;
65 else if (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)
66 size = 128;
67 else
68 size = 64;
70 if (pos >= size)
71 return 0;
72 if (nbytes >= size)
73 nbytes = size;
74 if (pos + nbytes > size)
75 nbytes = size - pos;
76 cnt = nbytes;
78 if (!access_ok(VERIFY_WRITE, buf, cnt))
79 return -EINVAL;
81 if ((pos & 1) && cnt) {
82 unsigned char val;
83 pci_user_read_config_byte(dev, pos, &val);
84 __put_user(val, buf);
85 buf++;
86 pos++;
87 cnt--;
88 }
90 if ((pos & 3) && cnt > 2) {
91 unsigned short val;
92 pci_user_read_config_word(dev, pos, &val);
93 __put_user(cpu_to_le16(val), (unsigned short __user *) buf);
94 buf += 2;
95 pos += 2;
96 cnt -= 2;
97 }
99 while (cnt >= 4) {
100 unsigned int val;
101 pci_user_read_config_dword(dev, pos, &val);
102 __put_user(cpu_to_le32(val), (unsigned int __user *) buf);
103 buf += 4;
104 pos += 4;
105 cnt -= 4;
106 }
108 if (cnt >= 2) {
109 unsigned short val;
110 pci_user_read_config_word(dev, pos, &val);
111 __put_user(cpu_to_le16(val), (unsigned short __user *) buf);
112 buf += 2;
113 pos += 2;
114 cnt -= 2;
115 }
117 if (cnt) {
118 unsigned char val;
119 pci_user_read_config_byte(dev, pos, &val);
120 __put_user(val, buf);
121 buf++;
122 pos++;
123 cnt--;
124 }
126 *ppos = pos;
127 return nbytes;
128 }
130 static ssize_t
131 proc_bus_pci_write(struct file *file, const char __user *buf, size_t nbytes, loff_t *ppos)
132 {
133 const struct inode *ino = file->f_dentry->d_inode;
134 const struct proc_dir_entry *dp = PDE(ino);
135 struct pci_dev *dev = dp->data;
136 int pos = *ppos;
137 int size = dev->cfg_size;
138 int cnt;
140 if (pos >= size)
141 return 0;
142 if (nbytes >= size)
143 nbytes = size;
144 if (pos + nbytes > size)
145 nbytes = size - pos;
146 cnt = nbytes;
148 if (!access_ok(VERIFY_READ, buf, cnt))
149 return -EINVAL;
151 if ((pos & 1) && cnt) {
152 unsigned char val;
153 __get_user(val, buf);
154 pci_user_write_config_byte(dev, pos, val);
155 buf++;
156 pos++;
157 cnt--;
158 }
160 if ((pos & 3) && cnt > 2) {
161 unsigned short val;
162 __get_user(val, (unsigned short __user *) buf);
163 pci_user_write_config_word(dev, pos, le16_to_cpu(val));
164 buf += 2;
165 pos += 2;
166 cnt -= 2;
167 }
169 while (cnt >= 4) {
170 unsigned int val;
171 __get_user(val, (unsigned int __user *) buf);
172 pci_user_write_config_dword(dev, pos, le32_to_cpu(val));
173 buf += 4;
174 pos += 4;
175 cnt -= 4;
176 }
178 if (cnt >= 2) {
179 unsigned short val;
180 __get_user(val, (unsigned short __user *) buf);
181 pci_user_write_config_word(dev, pos, le16_to_cpu(val));
182 buf += 2;
183 pos += 2;
184 cnt -= 2;
185 }
187 if (cnt) {
188 unsigned char val;
189 __get_user(val, buf);
190 pci_user_write_config_byte(dev, pos, val);
191 buf++;
192 pos++;
193 cnt--;
194 }
196 *ppos = pos;
197 return nbytes;
198 }
200 struct pci_filp_private {
201 enum pci_mmap_state mmap_state;
202 int write_combine;
203 };
205 static int proc_bus_pci_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg)
206 {
207 const struct proc_dir_entry *dp = PDE(inode);
208 struct pci_dev *dev = dp->data;
209 #ifdef HAVE_PCI_MMAP
210 struct pci_filp_private *fpriv = file->private_data;
211 #endif /* HAVE_PCI_MMAP */
212 int ret = 0;
214 switch (cmd) {
215 case PCIIOC_CONTROLLER:
216 ret = pci_domain_nr(dev->bus);
217 break;
219 #ifdef HAVE_PCI_MMAP
220 case PCIIOC_MMAP_IS_IO:
221 fpriv->mmap_state = pci_mmap_io;
222 break;
224 case PCIIOC_MMAP_IS_MEM:
225 fpriv->mmap_state = pci_mmap_mem;
226 break;
228 case PCIIOC_WRITE_COMBINE:
229 if (arg)
230 fpriv->write_combine = 1;
231 else
232 fpriv->write_combine = 0;
233 break;
235 #endif /* HAVE_PCI_MMAP */
237 default:
238 ret = -EINVAL;
239 break;
240 };
242 return ret;
243 }
245 #ifdef HAVE_PCI_MMAP
246 static int proc_bus_pci_mmap(struct file *file, struct vm_area_struct *vma)
247 {
248 struct inode *inode = file->f_dentry->d_inode;
249 const struct proc_dir_entry *dp = PDE(inode);
250 struct pci_dev *dev = dp->data;
251 struct pci_filp_private *fpriv = file->private_data;
252 int ret;
254 if (!capable(CAP_SYS_RAWIO))
255 return -EPERM;
257 ret = pci_mmap_page_range(dev, vma,
258 fpriv->mmap_state,
259 fpriv->write_combine);
260 if (ret < 0)
261 return ret;
263 return 0;
264 }
266 static int proc_bus_pci_open(struct inode *inode, struct file *file)
267 {
268 struct pci_filp_private *fpriv = kmalloc(sizeof(*fpriv), GFP_KERNEL);
270 if (!fpriv)
271 return -ENOMEM;
273 fpriv->mmap_state = pci_mmap_io;
274 fpriv->write_combine = 0;
276 file->private_data = fpriv;
278 return 0;
279 }
281 static int proc_bus_pci_release(struct inode *inode, struct file *file)
282 {
283 kfree(file->private_data);
284 file->private_data = NULL;
286 return 0;
287 }
288 #endif /* HAVE_PCI_MMAP */
290 static struct file_operations proc_bus_pci_operations = {
291 .llseek = proc_bus_pci_lseek,
292 .read = proc_bus_pci_read,
293 .write = proc_bus_pci_write,
294 .ioctl = proc_bus_pci_ioctl,
295 #ifdef HAVE_PCI_MMAP
296 .open = proc_bus_pci_open,
297 .release = proc_bus_pci_release,
298 .mmap = proc_bus_pci_mmap,
299 #ifdef HAVE_ARCH_PCI_GET_UNMAPPED_AREA
300 .get_unmapped_area = get_pci_unmapped_area,
301 #endif /* HAVE_ARCH_PCI_GET_UNMAPPED_AREA */
302 #endif /* HAVE_PCI_MMAP */
303 };
305 /* iterator */
306 static void *pci_seq_start(struct seq_file *m, loff_t *pos)
307 {
308 struct pci_dev *dev = NULL;
309 loff_t n = *pos;
311 for_each_pci_dev(dev) {
312 if (!n--)
313 break;
314 }
315 return dev;
316 }
318 static void *pci_seq_next(struct seq_file *m, void *v, loff_t *pos)
319 {
320 struct pci_dev *dev = v;
322 (*pos)++;
323 dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev);
324 return dev;
325 }
327 static void pci_seq_stop(struct seq_file *m, void *v)
328 {
329 if (v) {
330 struct pci_dev *dev = v;
331 pci_dev_put(dev);
332 }
333 }
335 static int show_device(struct seq_file *m, void *v)
336 {
337 const struct pci_dev *dev = v;
338 const struct pci_driver *drv;
339 int i;
341 if (dev == NULL)
342 return 0;
344 drv = pci_dev_driver(dev);
345 seq_printf(m, "%02x%02x\t%04x%04x\t%x",
346 dev->bus->number,
347 dev->devfn,
348 dev->vendor,
349 dev->device,
350 dev->irq);
352 /* only print standard and ROM resources to preserve compatibility */
353 for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
354 resource_size_t start, end;
355 pci_resource_to_user(dev, i, &dev->resource[i], &start, &end);
356 seq_printf(m, "\t%16llx",
357 (unsigned long long)(start |
358 (dev->resource[i].flags & PCI_REGION_FLAG_MASK)));
359 }
360 for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
361 resource_size_t start, end;
362 pci_resource_to_user(dev, i, &dev->resource[i], &start, &end);
363 seq_printf(m, "\t%16llx",
364 dev->resource[i].start < dev->resource[i].end ?
365 (unsigned long long)(end - start) + 1 : 0);
366 }
367 seq_putc(m, '\t');
368 if (drv)
369 seq_printf(m, "%s", drv->name);
370 seq_putc(m, '\n');
371 return 0;
372 }
374 static struct seq_operations proc_bus_pci_devices_op = {
375 .start = pci_seq_start,
376 .next = pci_seq_next,
377 .stop = pci_seq_stop,
378 .show = show_device
379 };
381 static struct proc_dir_entry *proc_bus_pci_dir;
383 int pci_proc_attach_device(struct pci_dev *dev)
384 {
385 struct pci_bus *bus = dev->bus;
386 struct proc_dir_entry *e;
387 char name[16];
389 if (!proc_initialized)
390 return -EACCES;
392 if (!bus->procdir) {
393 if (pci_proc_domain(bus)) {
394 sprintf(name, "%04x:%02x", pci_domain_nr(bus),
395 bus->number);
396 } else {
397 sprintf(name, "%02x", bus->number);
398 }
399 bus->procdir = proc_mkdir(name, proc_bus_pci_dir);
400 if (!bus->procdir)
401 return -ENOMEM;
402 }
404 sprintf(name, "%02x.%x", PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
405 e = create_proc_entry(name, S_IFREG | S_IRUGO | S_IWUSR, bus->procdir);
406 if (!e)
407 return -ENOMEM;
408 e->proc_fops = &proc_bus_pci_operations;
409 e->data = dev;
410 e->size = dev->cfg_size;
411 dev->procent = e;
413 return 0;
414 }
416 int pci_proc_detach_device(struct pci_dev *dev)
417 {
418 struct proc_dir_entry *e;
420 if ((e = dev->procent)) {
421 if (atomic_read(&e->count))
422 return -EBUSY;
423 remove_proc_entry(e->name, dev->bus->procdir);
424 dev->procent = NULL;
425 }
426 return 0;
427 }
429 #if 0
430 int pci_proc_attach_bus(struct pci_bus* bus)
431 {
432 struct proc_dir_entry *de = bus->procdir;
434 if (!proc_initialized)
435 return -EACCES;
437 if (!de) {
438 char name[16];
439 sprintf(name, "%02x", bus->number);
440 de = bus->procdir = proc_mkdir(name, proc_bus_pci_dir);
441 if (!de)
442 return -ENOMEM;
443 }
444 return 0;
445 }
446 #endif /* 0 */
448 int pci_proc_detach_bus(struct pci_bus* bus)
449 {
450 struct proc_dir_entry *de = bus->procdir;
451 if (de)
452 remove_proc_entry(de->name, proc_bus_pci_dir);
453 return 0;
454 }
456 static int proc_bus_pci_dev_open(struct inode *inode, struct file *file)
457 {
458 return seq_open(file, &proc_bus_pci_devices_op);
459 }
460 static struct file_operations proc_bus_pci_dev_operations = {
461 .open = proc_bus_pci_dev_open,
462 .read = seq_read,
463 .llseek = seq_lseek,
464 .release = seq_release,
465 };
467 static int __init pci_proc_init(void)
468 {
469 struct proc_dir_entry *entry;
470 struct pci_dev *dev = NULL;
471 proc_bus_pci_dir = proc_mkdir("pci", proc_bus);
472 entry = create_proc_entry("devices", 0, proc_bus_pci_dir);
473 if (entry)
474 entry->proc_fops = &proc_bus_pci_dev_operations;
475 proc_initialized = 1;
476 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
477 pci_proc_attach_device(dev);
478 }
479 return 0;
480 }
482 __initcall(pci_proc_init);
484 #ifdef CONFIG_HOTPLUG
485 EXPORT_SYMBOL(pci_proc_attach_device);
486 EXPORT_SYMBOL(pci_proc_detach_bus);
487 #endif