ia64/linux-2.6.18-xen.hg

view drivers/pci/pci-sysfs.c @ 897:329ea0ccb344

balloon: try harder to balloon up under memory pressure.

Currently if the balloon driver is unable to increase the guest's
reservation it assumes the failure was due to reaching its full
allocation, gives up on the ballooning operation and records the limit
it reached as the "hard limit". The driver will not try again until
the target is set again (even to the same value).

However it is possible that ballooning has in fact failed due to
memory pressure in the host and therefore it is desirable to keep
attempting to reach the target in case memory becomes available. The
most likely scenario is that some guests are ballooning down while
others are ballooning up and therefore there is temporary memory
pressure while things stabilise. You would not expect a well behaved
toolstack to ask a domain to balloon to more than its allocation nor
would you expect it to deliberately over-commit memory by setting
balloon targets which exceed the total host memory.

This patch drops the concept of a hard limit and causes the balloon
driver to retry increasing the reservation on a timer in the same
manner as when decreasing the reservation.

Also if we partially succeed in increasing the reservation
(i.e. receive less pages than we asked for) then we may as well keep
those pages rather than returning them to Xen.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 05 14:01:20 2009 +0100 (2009-06-05)
parents f77ac4979fae
children
line source
1 /*
2 * drivers/pci/pci-sysfs.c
3 *
4 * (C) Copyright 2002-2004 Greg Kroah-Hartman <greg@kroah.com>
5 * (C) Copyright 2002-2004 IBM Corp.
6 * (C) Copyright 2003 Matthew Wilcox
7 * (C) Copyright 2003 Hewlett-Packard
8 * (C) Copyright 2004 Jon Smirl <jonsmirl@yahoo.com>
9 * (C) Copyright 2004 Silicon Graphics, Inc. Jesse Barnes <jbarnes@sgi.com>
10 *
11 * File attributes for PCI devices
12 *
13 * Modeled after usb's driverfs.c
14 *
15 */
18 #include <linux/kernel.h>
19 #include <linux/pci.h>
20 #include <linux/stat.h>
21 #include <linux/topology.h>
22 #include <linux/mm.h>
24 #include "pci.h"
26 static int sysfs_initialized; /* = 0 */
28 /* show configuration fields */
29 #define pci_config_attr(field, format_string) \
30 static ssize_t \
31 field##_show(struct device *dev, struct device_attribute *attr, char *buf) \
32 { \
33 struct pci_dev *pdev; \
34 \
35 pdev = to_pci_dev (dev); \
36 return sprintf (buf, format_string, pdev->field); \
37 }
39 pci_config_attr(vendor, "0x%04x\n");
40 pci_config_attr(device, "0x%04x\n");
41 pci_config_attr(subsystem_vendor, "0x%04x\n");
42 pci_config_attr(subsystem_device, "0x%04x\n");
43 pci_config_attr(class, "0x%06x\n");
44 pci_config_attr(irq, "%u\n");
45 pci_config_attr(is_enabled, "%u\n");
47 static ssize_t broken_parity_status_show(struct device *dev,
48 struct device_attribute *attr,
49 char *buf)
50 {
51 struct pci_dev *pdev = to_pci_dev(dev);
52 return sprintf (buf, "%u\n", pdev->broken_parity_status);
53 }
55 static ssize_t broken_parity_status_store(struct device *dev,
56 struct device_attribute *attr,
57 const char *buf, size_t count)
58 {
59 struct pci_dev *pdev = to_pci_dev(dev);
60 ssize_t consumed = -EINVAL;
62 if ((count > 0) && (*buf == '0' || *buf == '1')) {
63 pdev->broken_parity_status = *buf == '1' ? 1 : 0;
64 consumed = count;
65 }
66 return consumed;
67 }
69 static ssize_t local_cpus_show(struct device *dev,
70 struct device_attribute *attr, char *buf)
71 {
72 cpumask_t mask;
73 int len;
75 mask = pcibus_to_cpumask(to_pci_dev(dev)->bus);
76 len = cpumask_scnprintf(buf, PAGE_SIZE-2, mask);
77 strcat(buf,"\n");
78 return 1+len;
79 }
81 /* show resources */
82 static ssize_t
83 resource_show(struct device * dev, struct device_attribute *attr, char * buf)
84 {
85 struct pci_dev * pci_dev = to_pci_dev(dev);
86 char * str = buf;
87 int i;
88 int max;
89 resource_size_t start, end;
91 if (pci_dev->subordinate)
92 max = DEVICE_COUNT_RESOURCE;
93 else
94 max = PCI_BRIDGE_RESOURCES;
96 for (i = 0; i < max; i++) {
97 struct resource *res = &pci_dev->resource[i];
98 pci_resource_to_user(pci_dev, i, res, &start, &end);
99 str += sprintf(str,"0x%016llx 0x%016llx 0x%016llx\n",
100 (unsigned long long)start,
101 (unsigned long long)end,
102 (unsigned long long)res->flags);
103 }
104 return (str - buf);
105 }
107 static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, char *buf)
108 {
109 struct pci_dev *pci_dev = to_pci_dev(dev);
111 return sprintf(buf, "pci:v%08Xd%08Xsv%08Xsd%08Xbc%02Xsc%02Xi%02x\n",
112 pci_dev->vendor, pci_dev->device,
113 pci_dev->subsystem_vendor, pci_dev->subsystem_device,
114 (u8)(pci_dev->class >> 16), (u8)(pci_dev->class >> 8),
115 (u8)(pci_dev->class));
116 }
117 static ssize_t
118 is_enabled_store(struct device *dev, struct device_attribute *attr,
119 const char *buf, size_t count)
120 {
121 struct pci_dev *pdev = to_pci_dev(dev);
123 /* this can crash the machine when done on the "wrong" device */
124 if (!capable(CAP_SYS_ADMIN))
125 return count;
127 if (*buf == '0')
128 pci_disable_device(pdev);
130 if (*buf == '1')
131 pci_enable_device(pdev);
133 return count;
134 }
137 struct device_attribute pci_dev_attrs[] = {
138 __ATTR_RO(resource),
139 __ATTR_RO(vendor),
140 __ATTR_RO(device),
141 __ATTR_RO(subsystem_vendor),
142 __ATTR_RO(subsystem_device),
143 __ATTR_RO(class),
144 __ATTR_RO(irq),
145 __ATTR_RO(local_cpus),
146 __ATTR_RO(modalias),
147 __ATTR(enable, 0600, is_enabled_show, is_enabled_store),
148 __ATTR(broken_parity_status,(S_IRUGO|S_IWUSR),
149 broken_parity_status_show,broken_parity_status_store),
150 __ATTR_NULL,
151 };
153 static ssize_t
154 pci_read_config(struct kobject *kobj, char *buf, loff_t off, size_t count)
155 {
156 struct pci_dev *dev = to_pci_dev(container_of(kobj,struct device,kobj));
157 unsigned int size = 64;
158 loff_t init_off = off;
159 u8 *data = (u8*) buf;
161 /* Several chips lock up trying to read undefined config space */
162 if (capable(CAP_SYS_ADMIN)) {
163 size = dev->cfg_size;
164 } else if (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS) {
165 size = 128;
166 }
168 if (off > size)
169 return 0;
170 if (off + count > size) {
171 size -= off;
172 count = size;
173 } else {
174 size = count;
175 }
177 if ((off & 1) && size) {
178 u8 val;
179 pci_user_read_config_byte(dev, off, &val);
180 data[off - init_off] = val;
181 off++;
182 size--;
183 }
185 if ((off & 3) && size > 2) {
186 u16 val;
187 pci_user_read_config_word(dev, off, &val);
188 data[off - init_off] = val & 0xff;
189 data[off - init_off + 1] = (val >> 8) & 0xff;
190 off += 2;
191 size -= 2;
192 }
194 while (size > 3) {
195 u32 val;
196 pci_user_read_config_dword(dev, off, &val);
197 data[off - init_off] = val & 0xff;
198 data[off - init_off + 1] = (val >> 8) & 0xff;
199 data[off - init_off + 2] = (val >> 16) & 0xff;
200 data[off - init_off + 3] = (val >> 24) & 0xff;
201 off += 4;
202 size -= 4;
203 }
205 if (size >= 2) {
206 u16 val;
207 pci_user_read_config_word(dev, off, &val);
208 data[off - init_off] = val & 0xff;
209 data[off - init_off + 1] = (val >> 8) & 0xff;
210 off += 2;
211 size -= 2;
212 }
214 if (size > 0) {
215 u8 val;
216 pci_user_read_config_byte(dev, off, &val);
217 data[off - init_off] = val;
218 off++;
219 --size;
220 }
222 return count;
223 }
225 static ssize_t
226 pci_write_config(struct kobject *kobj, char *buf, loff_t off, size_t count)
227 {
228 struct pci_dev *dev = to_pci_dev(container_of(kobj,struct device,kobj));
229 unsigned int size = count;
230 loff_t init_off = off;
231 u8 *data = (u8*) buf;
233 if (off > dev->cfg_size)
234 return 0;
235 if (off + count > dev->cfg_size) {
236 size = dev->cfg_size - off;
237 count = size;
238 }
240 if ((off & 1) && size) {
241 pci_user_write_config_byte(dev, off, data[off - init_off]);
242 off++;
243 size--;
244 }
246 if ((off & 3) && size > 2) {
247 u16 val = data[off - init_off];
248 val |= (u16) data[off - init_off + 1] << 8;
249 pci_user_write_config_word(dev, off, val);
250 off += 2;
251 size -= 2;
252 }
254 while (size > 3) {
255 u32 val = data[off - init_off];
256 val |= (u32) data[off - init_off + 1] << 8;
257 val |= (u32) data[off - init_off + 2] << 16;
258 val |= (u32) data[off - init_off + 3] << 24;
259 pci_user_write_config_dword(dev, off, val);
260 off += 4;
261 size -= 4;
262 }
264 if (size >= 2) {
265 u16 val = data[off - init_off];
266 val |= (u16) data[off - init_off + 1] << 8;
267 pci_user_write_config_word(dev, off, val);
268 off += 2;
269 size -= 2;
270 }
272 if (size) {
273 pci_user_write_config_byte(dev, off, data[off - init_off]);
274 off++;
275 --size;
276 }
278 return count;
279 }
281 #ifdef HAVE_PCI_LEGACY
282 /**
283 * pci_read_legacy_io - read byte(s) from legacy I/O port space
284 * @kobj: kobject corresponding to file to read from
285 * @buf: buffer to store results
286 * @off: offset into legacy I/O port space
287 * @count: number of bytes to read
288 *
289 * Reads 1, 2, or 4 bytes from legacy I/O port space using an arch specific
290 * callback routine (pci_legacy_read).
291 */
292 ssize_t
293 pci_read_legacy_io(struct kobject *kobj, char *buf, loff_t off, size_t count)
294 {
295 struct pci_bus *bus = to_pci_bus(container_of(kobj,
296 struct class_device,
297 kobj));
299 /* Only support 1, 2 or 4 byte accesses */
300 if (count != 1 && count != 2 && count != 4)
301 return -EINVAL;
303 return pci_legacy_read(bus, off, (u32 *)buf, count);
304 }
306 /**
307 * pci_write_legacy_io - write byte(s) to legacy I/O port space
308 * @kobj: kobject corresponding to file to read from
309 * @buf: buffer containing value to be written
310 * @off: offset into legacy I/O port space
311 * @count: number of bytes to write
312 *
313 * Writes 1, 2, or 4 bytes from legacy I/O port space using an arch specific
314 * callback routine (pci_legacy_write).
315 */
316 ssize_t
317 pci_write_legacy_io(struct kobject *kobj, char *buf, loff_t off, size_t count)
318 {
319 struct pci_bus *bus = to_pci_bus(container_of(kobj,
320 struct class_device,
321 kobj));
322 /* Only support 1, 2 or 4 byte accesses */
323 if (count != 1 && count != 2 && count != 4)
324 return -EINVAL;
326 return pci_legacy_write(bus, off, *(u32 *)buf, count);
327 }
329 /**
330 * pci_mmap_legacy_mem - map legacy PCI memory into user memory space
331 * @kobj: kobject corresponding to device to be mapped
332 * @attr: struct bin_attribute for this file
333 * @vma: struct vm_area_struct passed to mmap
334 *
335 * Uses an arch specific callback, pci_mmap_legacy_page_range, to mmap
336 * legacy memory space (first meg of bus space) into application virtual
337 * memory space.
338 */
339 int
340 pci_mmap_legacy_mem(struct kobject *kobj, struct bin_attribute *attr,
341 struct vm_area_struct *vma)
342 {
343 struct pci_bus *bus = to_pci_bus(container_of(kobj,
344 struct class_device,
345 kobj));
347 return pci_mmap_legacy_page_range(bus, vma);
348 }
349 #endif /* HAVE_PCI_LEGACY */
351 #ifdef HAVE_PCI_MMAP
352 /**
353 * pci_mmap_resource - map a PCI resource into user memory space
354 * @kobj: kobject for mapping
355 * @attr: struct bin_attribute for the file being mapped
356 * @vma: struct vm_area_struct passed into the mmap
357 *
358 * Use the regular PCI mapping routines to map a PCI resource into userspace.
359 * FIXME: write combining? maybe automatic for prefetchable regions?
360 */
361 static int
362 pci_mmap_resource(struct kobject *kobj, struct bin_attribute *attr,
363 struct vm_area_struct *vma)
364 {
365 struct pci_dev *pdev = to_pci_dev(container_of(kobj,
366 struct device, kobj));
367 struct resource *res = (struct resource *)attr->private;
368 enum pci_mmap_state mmap_type;
369 resource_size_t start, end;
370 int i;
372 for (i = 0; i < PCI_ROM_RESOURCE; i++)
373 if (res == &pdev->resource[i])
374 break;
375 if (i >= PCI_ROM_RESOURCE)
376 return -ENODEV;
378 /* pci_mmap_page_range() expects the same kind of entry as coming
379 * from /proc/bus/pci/ which is a "user visible" value. If this is
380 * different from the resource itself, arch will do necessary fixup.
381 */
382 pci_resource_to_user(pdev, i, res, &start, &end);
383 vma->vm_pgoff += start >> PAGE_SHIFT;
384 mmap_type = res->flags & IORESOURCE_MEM ? pci_mmap_mem : pci_mmap_io;
386 return pci_mmap_page_range(pdev, vma, mmap_type, 0);
387 }
389 /**
390 * pci_create_resource_files - create resource files in sysfs for @dev
391 * @dev: dev in question
392 *
393 * Walk the resources in @dev creating files for each resource available.
394 */
395 static void
396 pci_create_resource_files(struct pci_dev *pdev)
397 {
398 int i;
400 /* Expose the PCI resources from this device as files */
401 for (i = 0; i < PCI_ROM_RESOURCE; i++) {
402 struct bin_attribute *res_attr;
404 /* skip empty resources */
405 if (!pci_resource_len(pdev, i))
406 continue;
408 /* allocate attribute structure, piggyback attribute name */
409 res_attr = kzalloc(sizeof(*res_attr) + 10, GFP_ATOMIC);
410 if (res_attr) {
411 char *res_attr_name = (char *)(res_attr + 1);
413 pdev->res_attr[i] = res_attr;
414 sprintf(res_attr_name, "resource%d", i);
415 res_attr->attr.name = res_attr_name;
416 res_attr->attr.mode = S_IRUSR | S_IWUSR;
417 res_attr->attr.owner = THIS_MODULE;
418 res_attr->size = pci_resource_len(pdev, i);
419 res_attr->mmap = pci_mmap_resource;
420 res_attr->private = &pdev->resource[i];
421 sysfs_create_bin_file(&pdev->dev.kobj, res_attr);
422 }
423 }
424 }
426 /**
427 * pci_remove_resource_files - cleanup resource files
428 * @dev: dev to cleanup
429 *
430 * If we created resource files for @dev, remove them from sysfs and
431 * free their resources.
432 */
433 static void
434 pci_remove_resource_files(struct pci_dev *pdev)
435 {
436 int i;
438 for (i = 0; i < PCI_ROM_RESOURCE; i++) {
439 struct bin_attribute *res_attr;
441 res_attr = pdev->res_attr[i];
442 if (res_attr) {
443 sysfs_remove_bin_file(&pdev->dev.kobj, res_attr);
444 kfree(res_attr);
445 }
446 }
447 }
448 #else /* !HAVE_PCI_MMAP */
449 static inline void pci_create_resource_files(struct pci_dev *dev) { return; }
450 static inline void pci_remove_resource_files(struct pci_dev *dev) { return; }
451 #endif /* HAVE_PCI_MMAP */
453 /**
454 * pci_write_rom - used to enable access to the PCI ROM display
455 * @kobj: kernel object handle
456 * @buf: user input
457 * @off: file offset
458 * @count: number of byte in input
459 *
460 * writing anything except 0 enables it
461 */
462 static ssize_t
463 pci_write_rom(struct kobject *kobj, char *buf, loff_t off, size_t count)
464 {
465 struct pci_dev *pdev = to_pci_dev(container_of(kobj, struct device, kobj));
467 if ((off == 0) && (*buf == '0') && (count == 2))
468 pdev->rom_attr_enabled = 0;
469 else
470 pdev->rom_attr_enabled = 1;
472 return count;
473 }
475 /**
476 * pci_read_rom - read a PCI ROM
477 * @kobj: kernel object handle
478 * @buf: where to put the data we read from the ROM
479 * @off: file offset
480 * @count: number of bytes to read
481 *
482 * Put @count bytes starting at @off into @buf from the ROM in the PCI
483 * device corresponding to @kobj.
484 */
485 static ssize_t
486 pci_read_rom(struct kobject *kobj, char *buf, loff_t off, size_t count)
487 {
488 struct pci_dev *pdev = to_pci_dev(container_of(kobj, struct device, kobj));
489 void __iomem *rom;
490 size_t size;
492 if (!pdev->rom_attr_enabled)
493 return -EINVAL;
495 rom = pci_map_rom(pdev, &size); /* size starts out as PCI window size */
496 if (!rom)
497 return 0;
499 if (off >= size)
500 count = 0;
501 else {
502 if (off + count > size)
503 count = size - off;
505 memcpy_fromio(buf, rom + off, count);
506 }
507 pci_unmap_rom(pdev, rom);
509 return count;
510 }
512 static struct bin_attribute pci_config_attr = {
513 .attr = {
514 .name = "config",
515 .mode = S_IRUGO | S_IWUSR,
516 .owner = THIS_MODULE,
517 },
518 .size = 256,
519 .read = pci_read_config,
520 .write = pci_write_config,
521 };
523 static struct bin_attribute pcie_config_attr = {
524 .attr = {
525 .name = "config",
526 .mode = S_IRUGO | S_IWUSR,
527 .owner = THIS_MODULE,
528 },
529 .size = 4096,
530 .read = pci_read_config,
531 .write = pci_write_config,
532 };
534 int pci_create_sysfs_dev_files (struct pci_dev *pdev)
535 {
536 if (!sysfs_initialized)
537 return -EACCES;
539 if (pdev->cfg_size < 4096)
540 sysfs_create_bin_file(&pdev->dev.kobj, &pci_config_attr);
541 else
542 sysfs_create_bin_file(&pdev->dev.kobj, &pcie_config_attr);
544 pci_create_resource_files(pdev);
546 /* If the device has a ROM, try to expose it in sysfs. */
547 if (pci_resource_len(pdev, PCI_ROM_RESOURCE)) {
548 struct bin_attribute *rom_attr;
550 rom_attr = kzalloc(sizeof(*rom_attr), GFP_ATOMIC);
551 if (rom_attr) {
552 pdev->rom_attr = rom_attr;
553 rom_attr->size = pci_resource_len(pdev, PCI_ROM_RESOURCE);
554 rom_attr->attr.name = "rom";
555 rom_attr->attr.mode = S_IRUSR;
556 rom_attr->attr.owner = THIS_MODULE;
557 rom_attr->read = pci_read_rom;
558 rom_attr->write = pci_write_rom;
559 sysfs_create_bin_file(&pdev->dev.kobj, rom_attr);
560 }
561 }
562 /* add platform-specific attributes */
563 pcibios_add_platform_entries(pdev);
565 return 0;
566 }
568 /**
569 * pci_remove_sysfs_dev_files - cleanup PCI specific sysfs files
570 * @pdev: device whose entries we should free
571 *
572 * Cleanup when @pdev is removed from sysfs.
573 */
574 void pci_remove_sysfs_dev_files(struct pci_dev *pdev)
575 {
576 if (!sysfs_initialized)
577 return;
579 if (pdev->cfg_size < 4096)
580 sysfs_remove_bin_file(&pdev->dev.kobj, &pci_config_attr);
581 else
582 sysfs_remove_bin_file(&pdev->dev.kobj, &pcie_config_attr);
584 pci_remove_resource_files(pdev);
586 if (pci_resource_len(pdev, PCI_ROM_RESOURCE)) {
587 if (pdev->rom_attr) {
588 sysfs_remove_bin_file(&pdev->dev.kobj, pdev->rom_attr);
589 kfree(pdev->rom_attr);
590 }
591 }
592 }
594 static int __init pci_sysfs_init(void)
595 {
596 struct pci_dev *pdev = NULL;
598 sysfs_initialized = 1;
599 for_each_pci_dev(pdev)
600 pci_create_sysfs_dev_files(pdev);
602 return 0;
603 }
605 __initcall(pci_sysfs_init);