ia64/linux-2.6.18-xen.hg

view drivers/pci/iomulti.c @ 897:329ea0ccb344

balloon: try harder to balloon up under memory pressure.

Currently if the balloon driver is unable to increase the guest's
reservation it assumes the failure was due to reaching its full
allocation, gives up on the ballooning operation and records the limit
it reached as the "hard limit". The driver will not try again until
the target is set again (even to the same value).

However it is possible that ballooning has in fact failed due to
memory pressure in the host and therefore it is desirable to keep
attempting to reach the target in case memory becomes available. The
most likely scenario is that some guests are ballooning down while
others are ballooning up and therefore there is temporary memory
pressure while things stabilise. You would not expect a well behaved
toolstack to ask a domain to balloon to more than its allocation nor
would you expect it to deliberately over-commit memory by setting
balloon targets which exceed the total host memory.

This patch drops the concept of a hard limit and causes the balloon
driver to retry increasing the reservation on a timer in the same
manner as when decreasing the reservation.

Also if we partially succeed in increasing the reservation
(i.e. receive less pages than we asked for) then we may as well keep
those pages rather than returning them to Xen.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 05 14:01:20 2009 +0100 (2009-06-05)
parents 20be7f6d414a
children
line source
1 /*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
15 *
16 * Copyright (c) 2009 Isaku Yamahata
17 * VA Linux Systems Japan K.K.
18 *
19 */
21 #include <linux/kernel.h>
22 #include <linux/list.h>
23 #include <linux/miscdevice.h>
24 #include <linux/pci.h>
25 #include <linux/sort.h>
27 #include <asm/setup.h>
28 #include <asm/uaccess.h>
30 #include "pci.h"
31 #include "iomulti.h"
33 #define PCI_NUM_BARS 6
34 #define PCI_BUS_MAX 255
35 #define PCI_DEV_MAX 31
36 #define PCI_FUNC_MAX 7
37 #define PCI_NUM_FUNC 8
39 /* see pci_resource_len */
40 static inline resource_size_t pci_iomul_len(const struct resource* r)
41 {
42 if (r->start == 0 && r->start == r->end)
43 return 0;
44 return r->end - r->start + 1;
45 }
47 #define ROUND_UP(x, a) (((x) + (a) - 1) & ~((a) - 1))
48 /* stolen from pbus_size_io() */
49 static unsigned long pdev_size_io(struct pci_dev *pdev)
50 {
51 unsigned long size = 0, size1 = 0;
52 int i;
54 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
55 struct resource *r = &pdev->resource[i];
56 unsigned long r_size;
58 if (!(r->flags & IORESOURCE_IO))
59 continue;
61 r_size = r->end - r->start + 1;
63 if (r_size < 0x400)
64 /* Might be re-aligned for ISA */
65 size += r_size;
66 else
67 size1 += r_size;
68 }
70 /* To be fixed in 2.5: we should have sort of HAVE_ISA
71 flag in the struct pci_bus. */
72 #if defined(CONFIG_ISA) || defined(CONFIG_EISA)
73 size = (size & 0xff) + ((size & ~0xffUL) << 2);
74 #endif
75 size = ROUND_UP(size + size1, 4096);
76 return size;
77 }
79 /*
80 * primary bus number of PCI-PCI bridge in switch on which
81 * this slots sits.
82 * i.e. the primary bus number of PCI-PCI bridge of downstream port
83 * or root port in switch.
84 * the secondary bus number of PCI-PCI bridge of upstream port
85 * in switch.
86 */
87 static inline unsigned char pci_dev_switch_busnr(struct pci_dev *pdev)
88 {
89 if (pci_find_capability(pdev, PCI_CAP_ID_EXP))
90 return pdev->bus->primary;
91 return pdev->bus->number;
92 }
94 struct pci_iomul_func {
95 int segment;
96 uint8_t bus;
97 uint8_t devfn;
99 /* only start and end are used */
100 unsigned long io_size;
101 uint8_t io_bar;
102 struct resource resource[PCI_NUM_BARS];
103 struct resource dummy_parent;
104 };
106 struct pci_iomul_switch {
107 struct list_head list; /* bus_list_lock protects */
109 /*
110 * This lock the following entry and following
111 * pci_iomul_slot/pci_iomul_func.
112 */
113 struct mutex lock;
114 struct kref kref;
116 struct resource io_resource;
117 struct resource *io_region;
118 unsigned int count;
119 struct pci_dev *current_pdev;
121 int segment;
122 uint8_t bus;
124 uint32_t io_base;
125 uint32_t io_limit;
127 /* func which has the largeset io size*/
128 struct pci_iomul_func *func;
130 struct list_head slots;
131 };
133 struct pci_iomul_slot {
134 struct list_head sibling;
135 struct kref kref;
136 /*
137 * busnr
138 * when pcie, the primary busnr of the PCI-PCI bridge on which
139 * this devices sits.
140 */
141 uint8_t switch_busnr;
142 struct resource dummy_parent[PCI_NUM_RESOURCES - PCI_BRIDGE_RESOURCES];
144 /* device */
145 int segment;
146 uint8_t bus;
147 uint8_t dev;
149 struct pci_iomul_func *func[PCI_NUM_FUNC];
150 };
152 static LIST_HEAD(switch_list);
153 static DEFINE_MUTEX(switch_list_lock);
155 /*****************************************************************************/
156 static int inline pci_iomul_switch_io_allocated(
157 const struct pci_iomul_switch *sw)
158 {
159 return !(sw->io_base == 0 || sw->io_base > sw->io_limit);
160 }
162 static struct pci_iomul_switch *pci_iomul_find_switch_locked(int segment,
163 uint8_t bus)
164 {
165 struct pci_iomul_switch *sw;
167 BUG_ON(!mutex_is_locked(&switch_list_lock));
168 list_for_each_entry(sw, &switch_list, list) {
169 if (sw->segment == segment && sw->bus == bus)
170 return sw;
171 }
172 return NULL;
173 }
175 static struct pci_iomul_slot *pci_iomul_find_slot_locked(
176 struct pci_iomul_switch *sw, uint8_t busnr, uint8_t dev)
177 {
178 struct pci_iomul_slot *slot;
180 BUG_ON(!mutex_is_locked(&sw->lock));
181 list_for_each_entry(slot, &sw->slots, sibling) {
182 if (slot->bus == busnr && slot->dev == dev)
183 return slot;
184 }
185 return NULL;
186 }
188 static void pci_iomul_switch_get(struct pci_iomul_switch *sw);
189 /* on successfull exit, sw->lock is locked for use slot and
190 * refrence count of sw is incremented.
191 */
192 static void pci_iomul_get_lock_switch(struct pci_dev *pdev,
193 struct pci_iomul_switch **swp,
194 struct pci_iomul_slot **slot)
195 {
196 mutex_lock(&switch_list_lock);
198 *swp = pci_iomul_find_switch_locked(pci_domain_nr(pdev->bus),
199 pci_dev_switch_busnr(pdev));
200 if (*swp == NULL) {
201 *slot = NULL;
202 goto out;
203 }
205 mutex_lock(&(*swp)->lock);
206 *slot = pci_iomul_find_slot_locked(*swp, pdev->bus->number,
207 PCI_SLOT(pdev->devfn));
208 if (*slot == NULL) {
209 mutex_unlock(&(*swp)->lock);
210 *swp = NULL;
211 } else {
212 pci_iomul_switch_get(*swp);
213 }
214 out:
215 mutex_unlock(&switch_list_lock);
216 }
218 static struct pci_iomul_switch *pci_iomul_switch_alloc(int segment,
219 uint8_t bus)
220 {
221 struct pci_iomul_switch *sw;
223 BUG_ON(!mutex_is_locked(&switch_list_lock));
225 sw = kmalloc(sizeof(*sw), GFP_KERNEL);
227 mutex_init(&sw->lock);
228 kref_init(&sw->kref);
229 sw->io_region = NULL;
230 sw->count = 0;
231 sw->current_pdev = NULL;
232 sw->segment = segment;
233 sw->bus = bus;
234 sw->io_base = 0;
235 sw->io_limit = 0;
236 sw->func = NULL;
237 INIT_LIST_HEAD(&sw->slots);
239 return sw;
240 }
242 static void pci_iomul_switch_add_locked(struct pci_iomul_switch *sw)
243 {
244 BUG_ON(!mutex_is_locked(&switch_list_lock));
245 list_add(&sw->list, &switch_list);
246 }
248 #ifdef CONFIG_HOTPLUG_PCI
249 static void pci_iomul_switch_del_locked(struct pci_iomul_switch *sw)
250 {
251 BUG_ON(!mutex_is_locked(&switch_list_lock));
252 list_del(&sw->list);
253 }
254 #endif
256 static void pci_iomul_switch_get(struct pci_iomul_switch *sw)
257 {
258 kref_get(&sw->kref);
259 }
261 static void pci_iomul_switch_release(struct kref *kref)
262 {
263 struct pci_iomul_switch *sw = container_of(kref,
264 struct pci_iomul_switch,
265 kref);
266 kfree(sw);
267 }
269 static void pci_iomul_switch_put(struct pci_iomul_switch *sw)
270 {
271 kref_put(&sw->kref, &pci_iomul_switch_release);
272 }
274 static int __devinit pci_iomul_slot_init(struct pci_dev *pdev,
275 struct pci_iomul_slot *slot)
276 {
277 u16 rpcap;
278 u16 cap;
280 rpcap = pci_find_capability(pdev, PCI_CAP_ID_EXP);
281 if (!rpcap) {
282 /* pci device isn't supported */
283 printk(KERN_INFO
284 "PCI: sharing io port of non PCIe device %s "
285 "isn't supported. ignoring.\n",
286 pci_name(pdev));
287 return -ENOSYS;
288 }
290 pci_read_config_word(pdev, rpcap + PCI_CAP_FLAGS, &cap);
291 switch ((cap & PCI_EXP_FLAGS_TYPE) >> 4) {
292 case PCI_EXP_TYPE_RC_END:
293 printk(KERN_INFO
294 "PCI: io port sharing of root complex integrated "
295 "endpoint %s isn't supported. ignoring.\n",
296 pci_name(pdev));
297 return -ENOSYS;
298 case PCI_EXP_TYPE_ENDPOINT:
299 case PCI_EXP_TYPE_LEG_END:
300 break;
301 default:
302 printk(KERN_INFO
303 "PCI: io port sharing of non endpoint %s "
304 "doesn't make sense. ignoring.\n",
305 pci_name(pdev));
306 return -EINVAL;
307 }
309 kref_init(&slot->kref);
310 slot->switch_busnr = pci_dev_switch_busnr(pdev);
311 slot->segment = pci_domain_nr(pdev->bus);
312 slot->bus = pdev->bus->number;
313 slot->dev = PCI_SLOT(pdev->devfn);
315 return 0;
316 }
318 static struct pci_iomul_slot *pci_iomul_slot_alloc(struct pci_dev *pdev)
319 {
320 struct pci_iomul_slot *slot;
322 slot = kzalloc(sizeof(*slot), GFP_KERNEL);
323 if (slot == NULL)
324 return NULL;
326 if (pci_iomul_slot_init(pdev, slot) != 0) {
327 kfree(slot);
328 return NULL;
329 }
330 return slot;
331 }
333 static void pci_iomul_slot_add_locked(struct pci_iomul_switch *sw,
334 struct pci_iomul_slot *slot)
335 {
336 BUG_ON(!mutex_is_locked(&sw->lock));
337 list_add(&slot->sibling, &sw->slots);
338 }
340 #ifdef CONFIG_HOTPLUG_PCI
341 static void pci_iomul_slot_del_locked(struct pci_iomul_switch *sw,
342 struct pci_iomul_slot *slot)
343 {
344 BUG_ON(!mutex_is_locked(&sw->lock));
345 list_del(&slot->sibling);
346 }
347 #endif
349 static void pci_iomul_slot_get(struct pci_iomul_slot *slot)
350 {
351 kref_get(&slot->kref);
352 }
354 static void pci_iomul_slot_release(struct kref *kref)
355 {
356 struct pci_iomul_slot *slot = container_of(kref, struct pci_iomul_slot,
357 kref);
358 kfree(slot);
359 }
361 static void pci_iomul_slot_put(struct pci_iomul_slot *slot)
362 {
363 kref_put(&slot->kref, &pci_iomul_slot_release);
364 }
366 /*****************************************************************************/
367 static int pci_get_sbd(const char *str,
368 int *segment__, uint8_t *bus__, uint8_t *dev__)
369 {
370 int segment;
371 int bus;
372 int dev;
374 if (sscanf(str, "%x:%x:%x", &segment, &bus, &dev) != 3) {
375 if (sscanf(str, "%x:%x", &bus, &dev) == 2)
376 segment = 0;
377 else
378 return -EINVAL;
379 }
381 if (segment < 0 || INT_MAX <= segment)
382 return -EINVAL;
383 if (bus < 0 || PCI_BUS_MAX < bus)
384 return -EINVAL;
385 if (dev < 0 || PCI_DEV_MAX < dev)
386 return -EINVAL;
388 *segment__ = segment;
389 *bus__ = bus;
390 *dev__ = dev;
391 return 0;
392 }
394 static char iomul_param[COMMAND_LINE_SIZE];
395 #define TOKEN_MAX 10 /* SSSS:BB:DD length is 10 */
396 static int pci_is_iomul_dev_param(struct pci_dev *pdev)
397 {
398 int len;
399 char *p;
400 char *next_str;
402 for (p = &iomul_param[0]; *p != '\0'; p = next_str + 1) {
403 next_str = strchr(p, ',');
404 if (next_str != NULL)
405 len = next_str - p;
406 else
407 len = strlen(p);
409 if (len > 0 && len <= TOKEN_MAX) {
410 char tmp[TOKEN_MAX+1];
411 int seg;
412 uint8_t bus;
413 uint8_t dev;
415 strlcpy(tmp, p, len);
416 if (pci_get_sbd(tmp, &seg, &bus, &dev) == 0 &&
417 pci_domain_nr(pdev->bus) == seg &&
418 pdev->bus->number == bus &&
419 PCI_SLOT(pdev->devfn) == dev)
420 return 1;
421 }
422 if (next_str == NULL)
423 break;
424 }
426 /* check guestcev=<device>+iomul option */
427 return pci_is_iomuldev(pdev);
428 }
430 /*
431 * Format: [<segment>:]<bus>:<dev>[,[<segment>:]<bus>:<dev>[,...]
432 */
433 static int __init pci_iomul_param_setup(char *str)
434 {
435 if (strlen(str) >= COMMAND_LINE_SIZE)
436 return 0;
438 /* parse it after pci bus scanning */
439 strlcpy(iomul_param, str, sizeof(iomul_param));
440 return 1;
441 }
442 __setup("guestiomuldev=", pci_iomul_param_setup);
444 /*****************************************************************************/
445 static void __devinit pci_iomul_set_bridge_io_window(struct pci_dev *bridge,
446 uint32_t io_base,
447 uint32_t io_limit)
448 {
449 uint16_t l;
450 uint32_t upper16;
452 io_base >>= 12;
453 io_base <<= 4;
454 io_limit >>= 12;
455 io_limit <<= 4;
456 l = (io_base & 0xff) | ((io_limit & 0xff) << 8);
457 upper16 = ((io_base & 0xffff00) >> 8) |
458 (((io_limit & 0xffff00) >> 8) << 16);
460 /* Temporarily disable the I/O range before updating PCI_IO_BASE. */
461 pci_write_config_dword(bridge, PCI_IO_BASE_UPPER16, 0x0000ffff);
462 /* Update lower 16 bits of I/O base/limit. */
463 pci_write_config_word(bridge, PCI_IO_BASE, l);
464 /* Update upper 16 bits of I/O base/limit. */
465 pci_write_config_dword(bridge, PCI_IO_BASE_UPPER16, upper16);
466 }
468 static void __devinit pci_disable_bridge_io_window(struct pci_dev *bridge)
469 {
470 /* set base = 0xffffff limit = 0x0 */
471 pci_iomul_set_bridge_io_window(bridge, 0xffffff, 0);
472 }
474 static int __devinit pci_iomul_func_scan(struct pci_dev *pdev,
475 struct pci_iomul_slot *slot,
476 uint8_t func)
477 {
478 struct pci_iomul_func *f;
479 unsigned int i;
481 f = kzalloc(sizeof(*f), GFP_KERNEL);
482 if (f == NULL)
483 return -ENOMEM;
485 f->segment = slot->segment;
486 f->bus = slot->bus;
487 f->devfn = PCI_DEVFN(slot->dev, func);
488 f->io_size = pdev_size_io(pdev);
490 for (i = 0; i < PCI_NUM_BARS; i++) {
491 if (!(pci_resource_flags(pdev, i) & IORESOURCE_IO))
492 continue;
493 if (pci_resource_len(pdev, i) == 0)
494 continue;
496 f->io_bar |= 1 << i;
497 f->resource[i] = pdev->resource[i];
498 }
500 if (f->io_bar)
501 slot->func[func] = f;
502 else
503 kfree(f);
504 return 0;
505 }
507 /*
508 * This is tricky part.
509 * fake PCI resource assignment routines by setting flags to 0.
510 * PCI resource allocate routines think the resource should
511 * be allocated by checking flags. 0 means this resource isn't used.
512 * See pbus_size_io() and pdev_sort_resources().
513 *
514 * After allocated resources, flags (IORESOURCE_IO) is exported
515 * to other part including user process.
516 * So we have to set flags to IORESOURCE_IO, but at the same time
517 * we must prevent those resources from reassigning when pci hot plug.
518 * To achieve that, set r->parent to dummy resource.
519 */
520 static void __devinit pci_iomul_disable_resource(struct resource *r)
521 {
522 /* don't allocate this resource */
523 r->flags = 0;
524 }
526 static void __devinit pci_iomul_reenable_resource(
527 struct resource *dummy_parent, struct resource *r)
528 {
529 int ret;
531 dummy_parent->start = r->start;
532 dummy_parent->end = r->end;
533 dummy_parent->flags = r->flags;
534 dummy_parent->name = "PCI IOMUL dummy resource";
536 ret = request_resource(dummy_parent, r);
537 BUG_ON(ret);
538 }
540 static void __devinit pci_iomul_fixup_ioresource(struct pci_dev *pdev,
541 struct pci_iomul_func *func,
542 int reassign, int dealloc)
543 {
544 uint8_t i;
545 struct resource *r;
547 printk(KERN_INFO "PCI: deallocating io resource[%s]. io size 0x%lx\n",
548 pci_name(pdev), func->io_size);
549 for (i = 0; i < PCI_NUM_BARS; i++) {
550 r = &pdev->resource[i];
551 if (!(func->io_bar & (1 << i)))
552 continue;
554 if (reassign) {
555 r->end -= r->start;
556 r->start = 0;
557 pci_update_resource(pdev, i);
558 func->resource[i] = *r;
559 }
561 if (dealloc)
562 /* don't allocate this resource */
563 pci_iomul_disable_resource(r);
564 }
566 /* parent PCI-PCI bridge */
567 if (!reassign)
568 return;
569 pdev = pdev->bus->self;
570 if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_HOST)
571 return;
572 pci_disable_bridge_io_window(pdev);
573 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
574 r = &pdev->resource[i];
575 if (!(r->flags & IORESOURCE_IO))
576 continue;
578 r->end -= r->start;
579 r->start = 0;
580 if (i < PCI_BRIDGE_RESOURCES)
581 pci_update_resource(pdev, i);
582 }
583 }
585 static void __devinit __quirk_iomul_dealloc_ioresource(
586 struct pci_iomul_switch *sw,
587 struct pci_dev *pdev, struct pci_iomul_slot *slot)
588 {
589 struct pci_iomul_func *f;
590 struct pci_iomul_func *__f;
592 if (pci_iomul_func_scan(pdev, slot, PCI_FUNC(pdev->devfn)) != 0)
593 return;
595 f = slot->func[PCI_FUNC(pdev->devfn)];
596 if (f == NULL)
597 return;
599 __f = sw->func;
600 /* sw->io_base == 0 means that we are called at boot time.
601 * != 0 means that we are called by php after boot. */
602 if (sw->io_base == 0 &&
603 (__f == NULL || __f->io_size < f->io_size)) {
604 if (__f != NULL) {
605 struct pci_bus *__pbus;
606 struct pci_dev *__pdev;
608 __pbus = pci_find_bus(__f->segment, __f->bus);
609 BUG_ON(__pbus == NULL);
610 __pdev = pci_get_slot(__pbus, __f->devfn);
611 BUG_ON(__pdev == NULL);
612 pci_iomul_fixup_ioresource(__pdev, __f, 0, 1);
613 pci_dev_put(__pdev);
614 }
616 pci_iomul_fixup_ioresource(pdev, f, 1, 0);
617 sw->func = f;
618 } else {
619 pci_iomul_fixup_ioresource(pdev, f, 1, 1);
620 }
621 }
623 static void __devinit quirk_iomul_dealloc_ioresource(struct pci_dev *pdev)
624 {
625 struct pci_iomul_switch *sw;
626 struct pci_iomul_slot *slot;
628 if (pdev->hdr_type != PCI_HEADER_TYPE_NORMAL)
629 return;
630 if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_HOST)
631 return; /* PCI Host Bridge isn't a target device */
632 if (!pci_is_iomul_dev_param(pdev))
633 return;
635 mutex_lock(&switch_list_lock);
636 sw = pci_iomul_find_switch_locked(pci_domain_nr(pdev->bus),
637 pci_dev_switch_busnr(pdev));
638 if (sw == NULL) {
639 sw = pci_iomul_switch_alloc(pci_domain_nr(pdev->bus),
640 pci_dev_switch_busnr(pdev));
641 if (sw == NULL) {
642 mutex_unlock(&switch_list_lock);
643 printk(KERN_WARNING
644 "PCI: can't allocate memory "
645 "for sw of IO mulplexing %s", pci_name(pdev));
646 return;
647 }
648 pci_iomul_switch_add_locked(sw);
649 }
650 pci_iomul_switch_get(sw);
651 mutex_unlock(&switch_list_lock);
653 mutex_lock(&sw->lock);
654 slot = pci_iomul_find_slot_locked(sw, pdev->bus->number,
655 PCI_SLOT(pdev->devfn));
656 if (slot == NULL) {
657 slot = pci_iomul_slot_alloc(pdev);
658 if (slot == NULL) {
659 mutex_unlock(&sw->lock);
660 pci_iomul_switch_put(sw);
661 printk(KERN_WARNING "PCI: can't allocate memory "
662 "for IO mulplexing %s", pci_name(pdev));
663 return;
664 }
665 pci_iomul_slot_add_locked(sw, slot);
666 }
668 printk(KERN_INFO "PCI: disable device and release io resource[%s].\n",
669 pci_name(pdev));
670 pci_disable_device(pdev);
672 __quirk_iomul_dealloc_ioresource(sw, pdev, slot);
674 mutex_unlock(&sw->lock);
675 pci_iomul_switch_put(sw);
676 }
677 DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID,
678 quirk_iomul_dealloc_ioresource);
680 static void __devinit pci_iomul_read_bridge_io(struct pci_iomul_switch *sw)
681 {
682 struct pci_iomul_func *f = sw->func;
684 struct pci_bus *pbus;
685 struct pci_dev *pdev;
686 struct pci_dev *bridge;
688 uint16_t l;
689 uint16_t base_upper16;
690 uint16_t limit_upper16;
691 uint32_t io_base;
692 uint32_t io_limit;
694 pbus = pci_find_bus(f->segment, f->bus);
695 BUG_ON(pbus == NULL);
697 pdev = pci_get_slot(pbus, f->devfn);
698 BUG_ON(pdev == NULL);
700 bridge = pdev->bus->self;
701 pci_read_config_word(bridge, PCI_IO_BASE, &l);
702 pci_read_config_word(bridge, PCI_IO_BASE_UPPER16, &base_upper16);
703 pci_read_config_word(bridge, PCI_IO_LIMIT_UPPER16, &limit_upper16);
705 io_base = (l & 0xf0) | ((uint32_t)base_upper16 << 8);
706 io_base <<= 8;
707 io_limit = (l >> 8) | ((uint32_t)limit_upper16 << 8);
708 io_limit <<= 8;
709 io_limit |= 0xfff;
711 sw->io_base = io_base;
712 sw->io_limit = io_limit;
714 pci_dev_put(pdev);
715 printk(KERN_INFO "PCI: bridge %s base 0x%x limit 0x%x\n",
716 pci_name(bridge), sw->io_base, sw->io_limit);
717 }
719 static void __devinit pci_iomul_setup_brige(struct pci_dev *bridge,
720 uint32_t io_base,
721 uint32_t io_limit)
722 {
723 uint16_t cmd;
725 if ((bridge->class >> 8) == PCI_CLASS_BRIDGE_HOST)
726 return;
728 pci_iomul_set_bridge_io_window(bridge, io_base, io_limit);
730 /* and forcibly enables IO */
731 pci_read_config_word(bridge, PCI_COMMAND, &cmd);
732 if (!(cmd & PCI_COMMAND_IO)) {
733 cmd |= PCI_COMMAND_IO;
734 printk(KERN_INFO "PCI: Forcibly Enabling IO %s\n",
735 pci_name(bridge));
736 pci_write_config_word(bridge, PCI_COMMAND, cmd);
737 }
738 }
740 struct __bar {
741 unsigned long size;
742 uint8_t bar;
743 };
745 /* decending order */
746 static int __devinit pci_iomul_bar_cmp(const void *lhs__, const void *rhs__)
747 {
748 const struct __bar *lhs = (struct __bar*)lhs__;
749 const struct __bar *rhs = (struct __bar*)rhs__;
750 return - (lhs->size - rhs->size);
751 }
753 static void __devinit pci_iomul_setup_dev(struct pci_dev *pdev,
754 struct pci_iomul_func *f,
755 uint32_t io_base)
756 {
757 struct __bar bars[PCI_NUM_BARS];
758 int i;
759 uint8_t num_bars = 0;
760 struct resource *r;
762 printk(KERN_INFO "PCI: Forcibly assign IO %s from 0x%x\n",
763 pci_name(pdev), io_base);
765 for (i = 0; i < PCI_NUM_BARS; i++) {
766 if (!(f->io_bar & (1 << i)))
767 continue;
769 r = &f->resource[i];
770 bars[num_bars].size = pci_iomul_len(r);
771 bars[num_bars].bar = i;
773 num_bars++;
774 }
776 sort(bars, num_bars, sizeof(bars[0]), &pci_iomul_bar_cmp, NULL);
778 for (i = 0; i < num_bars; i++) {
779 struct resource *fr = &f->resource[bars[i].bar];
780 r = &pdev->resource[bars[i].bar];
782 BUG_ON(r->start != 0);
783 r->start += io_base;
784 r->end += io_base;
786 fr->start = r->start;
787 fr->end = r->end;
789 /* pci_update_resource() check flags. */
790 r->flags = fr->flags;
791 pci_update_resource(pdev, bars[i].bar);
792 pci_iomul_reenable_resource(&f->dummy_parent, r);
794 io_base += bars[i].size;
795 }
796 }
798 static void __devinit pci_iomul_release_io_resource(
799 struct pci_dev *pdev, struct pci_iomul_switch *sw,
800 struct pci_iomul_slot *slot, struct pci_iomul_func *f)
801 {
802 int i;
803 struct resource *r;
805 for (i = 0; i < PCI_NUM_BARS; i++) {
806 if (pci_resource_flags(pdev, i) & IORESOURCE_IO &&
807 pdev->resource[i].parent != NULL) {
808 r = &pdev->resource[i];
809 f->resource[i] = *r;
810 release_resource(r);
811 pci_iomul_reenable_resource(&f->dummy_parent, r);
812 }
813 }
815 /* parent PCI-PCI bridge */
816 pdev = pdev->bus->self;
817 if ((pdev->class >> 8) != PCI_CLASS_BRIDGE_HOST) {
818 for (i = PCI_BRIDGE_RESOURCES; i < PCI_NUM_RESOURCES; i++) {
819 struct resource *parent = pdev->resource[i].parent;
821 if (pci_resource_flags(pdev, i) & IORESOURCE_IO &&
822 parent != NULL) {
823 r = &pdev->resource[i];
825 sw->io_resource.flags = r->flags;
826 sw->io_resource.start = sw->io_base;
827 sw->io_resource.end = sw->io_limit;
828 sw->io_resource.name = "PCI IO Multiplexer";
830 release_resource(r);
831 pci_iomul_reenable_resource(
832 &slot->dummy_parent[i - PCI_BRIDGE_RESOURCES], r);
834 if (request_resource(parent,
835 &sw->io_resource))
836 printk(KERN_ERR
837 "PCI IOMul: can't allocate "
838 "resource. [0x%x, 0x%x]",
839 sw->io_base, sw->io_limit);
840 }
841 }
842 }
843 }
845 static void __devinit quirk_iomul_reassign_ioresource(struct pci_dev *pdev)
846 {
847 struct pci_iomul_switch *sw;
848 struct pci_iomul_slot *slot;
849 struct pci_iomul_func *sf;
850 struct pci_iomul_func *f;
852 pci_iomul_get_lock_switch(pdev, &sw, &slot);
853 if (sw == NULL || slot == NULL)
854 return;
856 if (sw->io_base == 0)
857 pci_iomul_read_bridge_io(sw);
858 if (!pci_iomul_switch_io_allocated(sw))
859 goto out;
861 sf = sw->func;
862 f = slot->func[PCI_FUNC(pdev->devfn)];
863 if (f == NULL)
864 /* (sf == NULL || f == NULL) case
865 * can happen when all the specified devices
866 * don't have io space
867 */
868 goto out;
870 if (sf != NULL &&
871 (pci_domain_nr(pdev->bus) != sf->segment ||
872 pdev->bus->number != sf->bus ||
873 PCI_SLOT(pdev->devfn) != PCI_SLOT(sf->devfn)) &&
874 PCI_FUNC(pdev->devfn) == 0) {
875 pci_iomul_setup_brige(pdev->bus->self,
876 sw->io_base, sw->io_limit);
877 }
879 BUG_ON(f->io_size > sw->io_limit - sw->io_base + 1);
880 if (/* f == sf */
881 sf != NULL &&
882 pci_domain_nr(pdev->bus) == sf->segment &&
883 pdev->bus->number == sf->bus &&
884 pdev->devfn == sf->devfn)
885 pci_iomul_release_io_resource(pdev, sw, slot, f);
886 else
887 pci_iomul_setup_dev(pdev, f, sw->io_base);
889 out:
890 mutex_unlock(&sw->lock);
891 pci_iomul_switch_put(sw);
892 }
894 DECLARE_PCI_FIXUP_FINAL(PCI_ANY_ID, PCI_ANY_ID,
895 quirk_iomul_reassign_ioresource);
897 /*****************************************************************************/
898 #ifdef CONFIG_HOTPLUG_PCI
899 static int __devinit __pci_iomul_notifier_del_device(struct pci_dev *pdev)
900 {
901 struct pci_iomul_switch *sw;
902 struct pci_iomul_slot *slot;
903 int i;
905 pci_iomul_get_lock_switch(pdev, &sw, &slot);
906 if (sw == NULL || slot == NULL)
907 return 0;
909 if (sw->func == slot->func[PCI_FUNC(pdev->devfn)])
910 sw->func = NULL;
911 kfree(slot->func[PCI_FUNC(pdev->devfn)]);
912 slot->func[PCI_FUNC(pdev->devfn)] = NULL;
913 for (i = 0; i < PCI_NUM_FUNC; i++) {
914 if (slot->func[i] != NULL)
915 goto out;
916 }
918 pci_iomul_slot_del_locked(sw, slot);
919 pci_iomul_slot_put(slot);
921 out:
922 mutex_unlock(&sw->lock);
923 pci_iomul_switch_put(sw);
924 return 0;
925 }
927 static int __devinit __pci_iomul_notifier_del_switch(struct pci_dev *pdev)
928 {
929 struct pci_iomul_switch *sw;
931 mutex_lock(&switch_list_lock);
932 sw = pci_iomul_find_switch_locked(pci_domain_nr(pdev->bus),
933 pdev->bus->number);
934 if (sw == NULL)
935 goto out;
937 pci_iomul_switch_del_locked(sw);
939 mutex_lock(&sw->lock);
940 if (sw->io_resource.parent)
941 release_resource(&sw->io_resource);
942 sw->io_base = 0; /* to tell this switch is removed */
943 sw->io_limit = 0;
944 BUG_ON(!list_empty(&sw->slots));
945 mutex_unlock(&sw->lock);
947 out:
948 mutex_unlock(&switch_list_lock);
949 pci_iomul_switch_put(sw);
950 return 0;
951 }
953 static int __devinit pci_iomul_notifier_del_device(struct pci_dev *pdev)
954 {
955 int ret;
956 switch (pdev->hdr_type) {
957 case PCI_HEADER_TYPE_NORMAL:
958 ret = __pci_iomul_notifier_del_device(pdev);
959 break;
960 case PCI_HEADER_TYPE_BRIDGE:
961 ret = __pci_iomul_notifier_del_switch(pdev);
962 break;
963 default:
964 printk(KERN_WARNING "PCI IOMUL: "
965 "device %s has unknown header type %02x, ignoring.\n",
966 pci_name(pdev), pdev->hdr_type);
967 ret = -EIO;
968 break;
969 }
970 return ret;
971 }
973 static int __devinit pci_iomul_notifier(struct notifier_block *nb,
974 unsigned long action, void *data)
975 {
976 struct device *dev = data;
977 struct pci_dev *pdev = to_pci_dev(dev);
979 switch (action) {
980 case BUS_NOTIFY_ADD_DEVICE:
981 quirk_iomul_reassign_ioresource(pdev);
982 break;
983 case BUS_NOTIFY_DEL_DEVICE:
984 return pci_iomul_notifier_del_device(pdev);
985 default:
986 /* nothing */
987 break;
988 }
990 return 0;
991 }
993 static struct notifier_block pci_iomul_nb = {
994 .notifier_call = pci_iomul_notifier,
995 };
997 static int __init pci_iomul_hotplug_init(void)
998 {
999 bus_register_notifier(&pci_bus_type, &pci_iomul_nb);
1000 return 0;
1003 late_initcall(pci_iomul_hotplug_init);
1004 #endif
1006 /*****************************************************************************/
1007 struct pci_iomul_data {
1008 struct mutex lock;
1010 struct pci_dev *pdev;
1011 struct pci_iomul_switch *sw;
1012 struct pci_iomul_slot *slot; /* slot::kref */
1013 struct pci_iomul_func **func; /* when dereferencing,
1014 sw->lock is necessary */
1015 };
1017 static int pci_iomul_func_ioport(struct pci_iomul_func *func,
1018 uint8_t bar, uint64_t offset, int *port)
1020 if (!(func->io_bar & (1 << bar)))
1021 return -EINVAL;
1023 *port = func->resource[bar].start + offset;
1024 if (*port < func->resource[bar].start ||
1025 *port > func->resource[bar].end)
1026 return -EINVAL;
1028 return 0;
1031 static inline int pci_iomul_valid(struct pci_iomul_data *iomul)
1033 BUG_ON(!mutex_is_locked(&iomul->lock));
1034 BUG_ON(!mutex_is_locked(&iomul->sw->lock));
1035 return pci_iomul_switch_io_allocated(iomul->sw) &&
1036 *iomul->func != NULL;
1039 static void __pci_iomul_enable_io(struct pci_dev *pdev)
1041 uint16_t cmd;
1043 pci_dev_get(pdev);
1044 pci_read_config_word(pdev, PCI_COMMAND, &cmd);
1045 cmd |= PCI_COMMAND_IO;
1046 pci_write_config_word(pdev, PCI_COMMAND, cmd);
1049 static void __pci_iomul_disable_io(struct pci_iomul_data *iomul,
1050 struct pci_dev *pdev)
1052 uint16_t cmd;
1054 if (!pci_iomul_valid(iomul))
1055 return;
1057 pci_read_config_word(pdev, PCI_COMMAND, &cmd);
1058 cmd &= ~PCI_COMMAND_IO;
1059 pci_write_config_word(pdev, PCI_COMMAND, cmd);
1060 pci_dev_put(pdev);
1063 static int pci_iomul_open(struct inode *inode, struct file *filp)
1065 struct pci_iomul_data *iomul;
1066 iomul = kmalloc(sizeof(*iomul), GFP_KERNEL);
1067 if (iomul == NULL)
1068 return -ENOMEM;
1070 mutex_init(&iomul->lock);
1071 iomul->pdev = NULL;
1072 iomul->sw = NULL;
1073 iomul->slot = NULL;
1074 iomul->func = NULL;
1075 filp->private_data = (void*)iomul;
1077 return 0;
1080 static int pci_iomul_release(struct inode *inode, struct file *filp)
1082 struct pci_iomul_data *iomul =
1083 (struct pci_iomul_data*)filp->private_data;
1084 struct pci_iomul_switch *sw;
1085 struct pci_iomul_slot *slot = NULL;
1087 mutex_lock(&iomul->lock);
1088 sw = iomul->sw;
1089 slot = iomul->slot;
1090 if (iomul->pdev != NULL) {
1091 if (sw != NULL) {
1092 mutex_lock(&sw->lock);
1093 if (sw->current_pdev == iomul->pdev) {
1094 __pci_iomul_disable_io(iomul,
1095 sw->current_pdev);
1096 sw->current_pdev = NULL;
1098 sw->count--;
1099 if (sw->count == 0) {
1100 release_region(sw->io_region->start, sw->io_region->end - sw->io_region->start + 1);
1101 sw->io_region = NULL;
1103 mutex_unlock(&sw->lock);
1105 pci_dev_put(iomul->pdev);
1107 mutex_unlock(&iomul->lock);
1109 if (slot != NULL)
1110 pci_iomul_slot_put(slot);
1111 if (sw != NULL)
1112 pci_iomul_switch_put(sw);
1113 kfree(iomul);
1114 return 0;
1117 static long pci_iomul_setup(struct pci_iomul_data *iomul,
1118 struct pci_iomul_setup __user *arg)
1120 long error = 0;
1121 struct pci_iomul_setup setup;
1122 struct pci_iomul_switch *sw = NULL;
1123 struct pci_iomul_slot *slot;
1124 struct pci_bus *pbus;
1125 struct pci_dev *pdev;
1127 if (copy_from_user(&setup, arg, sizeof(setup)))
1128 return -EFAULT;
1130 pbus = pci_find_bus(setup.segment, setup.bus);
1131 if (pbus == NULL)
1132 return -ENODEV;
1133 pdev = pci_get_slot(pbus, setup.dev);
1134 if (pdev == NULL)
1135 return -ENODEV;
1137 mutex_lock(&iomul->lock);
1138 if (iomul->sw != NULL) {
1139 error = -EBUSY;
1140 goto out0;
1143 pci_iomul_get_lock_switch(pdev, &sw, &slot);
1144 if (sw == NULL || slot == NULL) {
1145 error = -ENODEV;
1146 goto out0;
1148 if (!pci_iomul_switch_io_allocated(sw)) {
1149 error = -ENODEV;
1150 goto out;
1153 if (slot->func[setup.func] == NULL) {
1154 error = -ENODEV;
1155 goto out;
1158 if (sw->count == 0) {
1159 BUG_ON(sw->io_region != NULL);
1160 sw->io_region =
1161 request_region(sw->io_base,
1162 sw->io_limit - sw->io_base + 1,
1163 "PCI IO Multiplexer driver");
1164 if (sw->io_region == NULL) {
1165 mutex_unlock(&sw->lock);
1166 error = -EBUSY;
1167 goto out;
1170 sw->count++;
1171 pci_iomul_slot_get(slot);
1173 iomul->pdev = pdev;
1174 iomul->sw = sw;
1175 iomul->slot = slot;
1176 iomul->func = &slot->func[setup.func];
1178 out:
1179 mutex_unlock(&sw->lock);
1180 out0:
1181 mutex_unlock(&iomul->lock);
1182 if (error != 0) {
1183 if (sw != NULL)
1184 pci_iomul_switch_put(sw);
1185 pci_dev_put(pdev);
1187 return error;
1190 static int pci_iomul_lock(struct pci_iomul_data *iomul,
1191 struct pci_iomul_switch **sw,
1192 struct pci_iomul_func **func)
1194 mutex_lock(&iomul->lock);
1195 *sw = iomul->sw;
1196 if (*sw == NULL) {
1197 mutex_unlock(&iomul->lock);
1198 return -ENODEV;
1200 mutex_lock(&(*sw)->lock);
1201 if (!pci_iomul_valid(iomul)) {
1202 mutex_unlock(&(*sw)->lock);
1203 mutex_unlock(&iomul->lock);
1204 return -ENODEV;
1206 *func = *iomul->func;
1208 return 0;
1211 static long pci_iomul_disable_io(struct pci_iomul_data *iomul)
1213 long error = 0;
1214 struct pci_iomul_switch *sw;
1215 struct pci_iomul_func *dummy_func;
1216 struct pci_dev *pdev;
1218 if (pci_iomul_lock(iomul, &sw, &dummy_func) < 0)
1219 return -ENODEV;
1221 pdev = iomul->pdev;
1222 if (pdev == NULL)
1223 error = -ENODEV;
1225 if (pdev != NULL && sw->current_pdev == pdev) {
1226 __pci_iomul_disable_io(iomul, pdev);
1227 sw->current_pdev = NULL;
1230 mutex_unlock(&sw->lock);
1231 mutex_unlock(&iomul->lock);
1232 return error;
1235 static void pci_iomul_switch_to(
1236 struct pci_iomul_data *iomul, struct pci_iomul_switch *sw,
1237 struct pci_dev *next_pdev)
1239 if (sw->current_pdev == next_pdev)
1240 /* nothing to do */
1241 return;
1243 if (sw->current_pdev != NULL)
1244 __pci_iomul_disable_io(iomul, sw->current_pdev);
1246 __pci_iomul_enable_io(next_pdev);
1247 sw->current_pdev = next_pdev;
1250 static long pci_iomul_in(struct pci_iomul_data *iomul,
1251 struct pci_iomul_in __user *arg)
1253 struct pci_iomul_in in;
1254 struct pci_iomul_switch *sw;
1255 struct pci_iomul_func *func;
1257 long error = 0;
1258 int port;
1259 uint32_t value = 0;
1261 if (copy_from_user(&in, arg, sizeof(in)))
1262 return -EFAULT;
1264 if (pci_iomul_lock(iomul, &sw, &func) < 0)
1265 return -ENODEV;
1267 error = pci_iomul_func_ioport(func, in.bar, in.offset, &port);
1268 if (error)
1269 goto out;
1271 pci_iomul_switch_to(iomul, sw, iomul->pdev);
1272 switch (in.size) {
1273 case 4:
1274 value = inl(port);
1275 break;
1276 case 2:
1277 value = inw(port);
1278 break;
1279 case 1:
1280 value = inb(port);
1281 break;
1282 default:
1283 error = -EINVAL;
1284 break;
1287 out:
1288 mutex_unlock(&sw->lock);
1289 mutex_unlock(&iomul->lock);
1291 if (error == 0 && put_user(value, &arg->value))
1292 return -EFAULT;
1293 return error;
1296 static long pci_iomul_out(struct pci_iomul_data *iomul,
1297 struct pci_iomul_out __user *arg)
1299 struct pci_iomul_in out;
1300 struct pci_iomul_switch *sw;
1301 struct pci_iomul_func *func;
1303 long error = 0;
1304 int port;
1306 if (copy_from_user(&out, arg, sizeof(out)))
1307 return -EFAULT;
1309 if (pci_iomul_lock(iomul, &sw, &func) < 0)
1310 return -ENODEV;
1312 error = pci_iomul_func_ioport(func, out.bar, out.offset, &port);
1313 if (error)
1314 goto out;
1316 pci_iomul_switch_to(iomul, sw, iomul->pdev);
1317 switch (out.size) {
1318 case 4:
1319 outl(out.value, port);
1320 break;
1321 case 2:
1322 outw(out.value, port);
1323 break;
1324 case 1:
1325 outb(out.value, port);
1326 break;
1327 default:
1328 error = -EINVAL;
1329 break;
1332 out:
1333 mutex_unlock(&sw->lock);
1334 mutex_unlock(&iomul->lock);
1335 return error;
1338 static long pci_iomul_ioctl(struct file *filp,
1339 unsigned int cmd, unsigned long arg)
1341 long error;
1342 struct pci_iomul_data *iomul =
1343 (struct pci_iomul_data*)filp->private_data;
1345 if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO))
1346 return -EPERM;
1348 switch (cmd) {
1349 case PCI_IOMUL_SETUP:
1350 error = pci_iomul_setup(iomul,
1351 (struct pci_iomul_setup __user *)arg);
1352 break;
1353 case PCI_IOMUL_DISABLE_IO:
1354 error = pci_iomul_disable_io(iomul);
1355 break;
1356 case PCI_IOMUL_IN:
1357 error = pci_iomul_in(iomul, (struct pci_iomul_in __user *)arg);
1358 break;
1359 case PCI_IOMUL_OUT:
1360 error = pci_iomul_out(iomul,
1361 (struct pci_iomul_out __user *)arg);
1362 break;
1363 default:
1364 error = -ENOSYS;
1365 break;
1368 return error;
1371 static const struct file_operations pci_iomul_fops = {
1372 .owner = THIS_MODULE,
1374 .open = pci_iomul_open, /* nonseekable_open */
1375 .release = pci_iomul_release,
1377 .unlocked_ioctl = pci_iomul_ioctl,
1378 };
1380 static struct miscdevice pci_iomul_miscdev = {
1381 .minor = MISC_DYNAMIC_MINOR,
1382 .name = "pci_iomul",
1383 .fops = &pci_iomul_fops,
1384 };
1386 static int pci_iomul_init(void)
1388 int error;
1389 error = misc_register(&pci_iomul_miscdev);
1390 if (error != 0) {
1391 printk(KERN_ALERT "Couldn't register /dev/misc/pci_iomul");
1392 return error;
1394 printk("PCI IO multiplexer device installed.\n");
1395 return 0;
1398 #if 0
1399 static void pci_iomul_cleanup(void)
1401 misc_deregister(&pci_iomul_miscdev);
1403 #endif
1405 /*
1406 * This must be called after pci fixup final which is called by
1407 * device_initcall(pci_init).
1408 */
1409 late_initcall(pci_iomul_init);
1411 MODULE_LICENSE("GPL");
1412 MODULE_AUTHOR("Isaku Yamahata <yamahata@valinux.co.jp>");
1413 MODULE_DESCRIPTION("PCI IO space multiplexing driver");