ia64/linux-2.6.18-xen.hg

view drivers/pci/setup-res.c @ 882:8dec4aa9b8b9

PCI pass through: PCIe IO space multiplexing

This is required for more than 16 HVM domain to boot from
PCIe pass through device.

Linux as dom0 exclusively assigns IO space to downstream PCI bridges
and the assignment unit of PCI bridge IO space is 4K. So the only up
to 16 PCIe device can be accessed via IO space within 64K IO ports.
PCI expansion ROM BIOS often uses IO port access to boot from the
device, so on virtualized environment, it means only up to 16 guest
domain can boot from pass-through device.

This patch allows PCIe IO space sharing of pass-through device.
- reassign IO space of PCIe devices specified by
"guestiomuldev=[<segment>:]<bus>:<dev>[,[<segment:><bus>:dev]][,...]"
to be shared.
This is implemented as Linux PCI quirk fixup.

The sharing unit is PCIe switch. Ie IO space of the end point
devices under the same switch will be shared. If there are more than
one switches, two areas of IO space will be used.

- And the driver which arbitrates the accesses to the multiplexed PCIe
IO space. Later qemu-dm will use this.

Limitation:
IO port of IO shared devices can't be accessed from dom0 Linux device
driver. But this wouldn't be a big issue because PCIe specification
discourages the use of IO space and recommends that IO space should be
used only for bootable device with ROM code. OS device driver should
work without IO space access.

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
author Keir Fraser <keir.fraser@citrix.com>
date Thu May 28 09:57:49 2009 +0100 (2009-05-28)
parents dfd2adc58740
children
line source
1 /*
2 * drivers/pci/setup-res.c
3 *
4 * Extruded from code written by
5 * Dave Rusling (david.rusling@reo.mts.dec.com)
6 * David Mosberger (davidm@cs.arizona.edu)
7 * David Miller (davem@redhat.com)
8 *
9 * Support routines for initializing a PCI subsystem.
10 */
12 /* fixed for multiple pci buses, 1999 Andrea Arcangeli <andrea@suse.de> */
14 /*
15 * Nov 2000, Ivan Kokshaysky <ink@jurassic.park.msu.ru>
16 * Resource sorting
17 */
19 #include <linux/init.h>
20 #include <linux/kernel.h>
21 #include <linux/pci.h>
22 #include <linux/errno.h>
23 #include <linux/ioport.h>
24 #include <linux/cache.h>
25 #include <linux/slab.h>
26 #include "pci.h"
29 void
30 pci_update_resource(struct pci_dev *dev, int resno)
31 {
32 struct pci_bus_region region;
33 u32 new, check, mask;
34 int reg;
35 enum pci_bar_type type;
36 struct resource *res = dev->resource + resno;
38 /* Ignore resources for unimplemented BARs and unused resource slots
39 for 64 bit BARs. */
40 if (!res->flags)
41 return;
43 pcibios_resource_to_bus(dev, &region, res);
45 pr_debug(" got res [%llx:%llx] bus [%lx:%lx] flags %lx for "
46 "BAR %d of %s\n", (unsigned long long)res->start,
47 (unsigned long long)res->end,
48 region.start, region.end, res->flags, resno, pci_name(dev));
50 new = region.start | (res->flags & PCI_REGION_FLAG_MASK);
51 if (res->flags & IORESOURCE_IO)
52 mask = (u32)PCI_BASE_ADDRESS_IO_MASK;
53 else
54 mask = (u32)PCI_BASE_ADDRESS_MEM_MASK;
56 reg = pci_resource_bar(dev, resno, &type);
57 if (!reg)
58 return;
59 if (type != pci_bar_unknown) {
60 if (!(res->flags & IORESOURCE_ROM_ENABLE))
61 return;
62 new |= PCI_ROM_ADDRESS_ENABLE;
63 }
65 pci_write_config_dword(dev, reg, new);
66 pci_read_config_dword(dev, reg, &check);
68 if ((new ^ check) & mask) {
69 printk(KERN_ERR "PCI: Error while updating region "
70 "%s/%d (%08x != %08x)\n", pci_name(dev), resno,
71 new, check);
72 }
74 if ((new & (PCI_BASE_ADDRESS_SPACE|PCI_BASE_ADDRESS_MEM_TYPE_MASK)) ==
75 (PCI_BASE_ADDRESS_SPACE_MEMORY|PCI_BASE_ADDRESS_MEM_TYPE_64)) {
76 new = region.start >> 16 >> 16;
77 pci_write_config_dword(dev, reg + 4, new);
78 pci_read_config_dword(dev, reg + 4, &check);
79 if (check != new) {
80 printk(KERN_ERR "PCI: Error updating region "
81 "%s/%d (high %08x != %08x)\n",
82 pci_name(dev), resno, new, check);
83 }
84 }
85 res->flags &= ~IORESOURCE_UNSET;
86 pr_debug("PCI: moved device %s resource %d (%lx) to %x\n",
87 pci_name(dev), resno, res->flags,
88 new & ~PCI_REGION_FLAG_MASK);
89 }
91 int __devinit
92 pci_claim_resource(struct pci_dev *dev, int resource)
93 {
94 struct resource *res = &dev->resource[resource];
95 struct resource *root = NULL;
96 char *dtype = resource < PCI_BRIDGE_RESOURCES ? "device" : "bridge";
97 int err;
99 root = pcibios_select_root(dev, res);
101 err = -EINVAL;
102 if (root != NULL)
103 err = insert_resource(root, res);
105 if (err) {
106 printk(KERN_ERR "PCI: %s region %d of %s %s [%llx:%llx]\n",
107 root ? "Address space collision on" :
108 "No parent found for",
109 resource, dtype, pci_name(dev),
110 (unsigned long long)res->start,
111 (unsigned long long)res->end);
112 }
114 return err;
115 }
116 EXPORT_SYMBOL_GPL(pci_claim_resource);
118 #ifdef CONFIG_PCI_REASSIGN
119 void pci_disable_bridge_window(struct pci_dev *dev)
120 {
121 printk(KERN_DEBUG "PCI: Disable bridge window on %s\n", pci_name(dev));
123 /* MMIO Base/Limit */
124 pci_write_config_dword(dev, PCI_MEMORY_BASE, 0x0000fff0);
126 /* Prefetchable MMIO Base/Limit */
127 pci_write_config_dword(dev, PCI_PREF_LIMIT_UPPER32, 0);
128 pci_write_config_dword(dev, PCI_PREF_MEMORY_BASE, 0x0000fff0);
129 pci_write_config_dword(dev, PCI_PREF_BASE_UPPER32, 0xffffffff);
130 }
131 #endif
133 int pci_assign_resource(struct pci_dev *dev, int resno)
134 {
135 struct pci_bus *bus = dev->bus;
136 struct resource *res = dev->resource + resno;
137 resource_size_t size, min, align;
138 int ret;
139 int reassigndev = pci_is_reassigndev(dev);
141 size = res->end - res->start + 1;
142 min = (res->flags & IORESOURCE_IO) ? PCIBIOS_MIN_IO : PCIBIOS_MIN_MEM;
143 /* The bridge resources are special, as their
144 size != alignment. Sizing routines return
145 required alignment in the "start" field. */
146 if (resno < PCI_BRIDGE_RESOURCES) {
147 align = size;
148 if ((reassigndev) &&
149 (res->flags & IORESOURCE_MEM)) {
150 align = ALIGN(align, PAGE_SIZE);
151 }
152 } else {
153 align = res->start;
154 }
156 /* First, try exact prefetching match.. */
157 ret = pci_bus_alloc_resource(bus, res, size, align, min,
158 IORESOURCE_PREFETCH,
159 pcibios_align_resource, dev);
161 if (ret < 0 && (res->flags & IORESOURCE_PREFETCH)) {
162 /*
163 * That failed.
164 *
165 * But a prefetching area can handle a non-prefetching
166 * window (it will just not perform as well).
167 */
168 ret = pci_bus_alloc_resource(bus, res, size, align, min, 0,
169 pcibios_align_resource, dev);
170 }
172 if (ret) {
173 printk(KERN_ERR "PCI: Failed to allocate %s resource "
174 "#%d:%llx@%llx for %s\n",
175 res->flags & IORESOURCE_IO ? "I/O" : "mem",
176 resno, (unsigned long long)size,
177 (unsigned long long)res->start, pci_name(dev));
178 } else if (resno < PCI_BRIDGE_RESOURCES) {
179 if (reassigndev)
180 printk(KERN_DEBUG "PCI: Assign resource(%d) on %s "
181 "%016llx - %016llx\n", resno, pci_name(dev),
182 (unsigned long long)res->start,
183 (unsigned long long)res->end);
184 pci_update_resource(dev, resno);
185 }
187 return ret;
188 }
190 #ifdef CONFIG_EMBEDDED
191 int pci_assign_resource_fixed(struct pci_dev *dev, int resno)
192 {
193 struct pci_bus *bus = dev->bus;
194 struct resource *res = dev->resource + resno;
195 unsigned int type_mask;
196 int i, ret = -EBUSY;
198 type_mask = IORESOURCE_IO | IORESOURCE_MEM | IORESOURCE_PREFETCH;
200 for (i = 0; i < PCI_BUS_NUM_RESOURCES; i++) {
201 struct resource *r = bus->resource[i];
202 if (!r)
203 continue;
205 /* type_mask must match */
206 if ((res->flags ^ r->flags) & type_mask)
207 continue;
209 ret = request_resource(r, res);
211 if (ret == 0)
212 break;
213 }
215 if (ret) {
216 printk(KERN_ERR "PCI: Failed to allocate %s resource "
217 "#%d:%llx@%llx for %s\n",
218 res->flags & IORESOURCE_IO ? "I/O" : "mem",
219 resno, (unsigned long long)(res->end - res->start + 1),
220 (unsigned long long)res->start, pci_name(dev));
221 } else if (resno < PCI_BRIDGE_RESOURCES) {
222 pci_update_resource(dev, resno);
223 }
225 return ret;
226 }
227 EXPORT_SYMBOL_GPL(pci_assign_resource_fixed);
228 #endif
230 /* Sort resources by alignment */
231 void __devinit
232 pdev_sort_resources(struct pci_dev *dev, struct resource_list *head)
233 {
234 int i;
235 int reassigndev = pci_is_reassigndev(dev);
237 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
238 struct resource *r;
239 struct resource_list *list, *tmp;
240 resource_size_t r_align;
242 r = &dev->resource[i];
243 r_align = r->end - r->start;
245 if (!(r->flags) || r->parent)
246 continue;
248 if (!r_align) {
249 printk(KERN_WARNING "PCI: Ignore bogus resource %d "
250 "[%llx:%llx] of %s\n",
251 i, (unsigned long long)r->start,
252 (unsigned long long)r->end, pci_name(dev));
253 continue;
254 }
255 r_align = (i < PCI_BRIDGE_RESOURCES) ? r_align + 1 : r->start;
257 if (i < PCI_BRIDGE_RESOURCES && (r->flags & IORESOURCE_MEM) &&
258 reassigndev)
259 r_align = ALIGN(r_align, PAGE_SIZE);
261 for (list = head; ; list = list->next) {
262 resource_size_t align = 0;
263 struct resource_list *ln = list->next;
264 int idx;
266 if (ln) {
267 idx = ln->res - &ln->dev->resource[0];
268 align = (idx < PCI_BRIDGE_RESOURCES) ?
269 ln->res->end - ln->res->start + 1 :
270 ln->res->start;
271 if ((idx < PCI_BRIDGE_RESOURCES) &&
272 (ln->res->flags & IORESOURCE_MEM) &&
273 pci_is_reassigndev(ln->dev))
274 align = ALIGN(align, PAGE_SIZE);
275 }
276 if (r_align > align) {
277 tmp = kmalloc(sizeof(*tmp), GFP_KERNEL);
278 if (!tmp)
279 panic("pdev_sort_resources(): "
280 "kmalloc() failed!\n");
281 tmp->next = ln;
282 tmp->res = r;
283 tmp->dev = dev;
284 list->next = tmp;
285 break;
286 }
287 }
288 }
289 }