ia64/linux-2.6.18-xen.hg

view drivers/pci/proc.c @ 882:8dec4aa9b8b9

PCI pass through: PCIe IO space multiplexing

This is required for more than 16 HVM domain to boot from
PCIe pass through device.

Linux as dom0 exclusively assigns IO space to downstream PCI bridges
and the assignment unit of PCI bridge IO space is 4K. So the only up
to 16 PCIe device can be accessed via IO space within 64K IO ports.
PCI expansion ROM BIOS often uses IO port access to boot from the
device, so on virtualized environment, it means only up to 16 guest
domain can boot from pass-through device.

This patch allows PCIe IO space sharing of pass-through device.
- reassign IO space of PCIe devices specified by
"guestiomuldev=[<segment>:]<bus>:<dev>[,[<segment:><bus>:dev]][,...]"
to be shared.
This is implemented as Linux PCI quirk fixup.

The sharing unit is PCIe switch. Ie IO space of the end point
devices under the same switch will be shared. If there are more than
one switches, two areas of IO space will be used.

- And the driver which arbitrates the accesses to the multiplexed PCIe
IO space. Later qemu-dm will use this.

Limitation:
IO port of IO shared devices can't be accessed from dom0 Linux device
driver. But this wouldn't be a big issue because PCIe specification
discourages the use of IO space and recommends that IO space should be
used only for bootable device with ROM code. OS device driver should
work without IO space access.

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
author Keir Fraser <keir.fraser@citrix.com>
date Thu May 28 09:57:49 2009 +0100 (2009-05-28)
parents f77ac4979fae
children
line source
1 /*
2 * $Id: proc.c,v 1.13 1998/05/12 07:36:07 mj Exp $
3 *
4 * Procfs interface for the PCI bus.
5 *
6 * Copyright (c) 1997--1999 Martin Mares <mj@ucw.cz>
7 */
9 #include <linux/init.h>
10 #include <linux/pci.h>
11 #include <linux/module.h>
12 #include <linux/proc_fs.h>
13 #include <linux/seq_file.h>
14 #include <linux/smp_lock.h>
16 #include <asm/uaccess.h>
17 #include <asm/byteorder.h>
18 #include "pci.h"
20 static int proc_initialized; /* = 0 */
22 static loff_t
23 proc_bus_pci_lseek(struct file *file, loff_t off, int whence)
24 {
25 loff_t new = -1;
26 struct inode *inode = file->f_dentry->d_inode;
28 mutex_lock(&inode->i_mutex);
29 switch (whence) {
30 case 0:
31 new = off;
32 break;
33 case 1:
34 new = file->f_pos + off;
35 break;
36 case 2:
37 new = inode->i_size + off;
38 break;
39 }
40 if (new < 0 || new > inode->i_size)
41 new = -EINVAL;
42 else
43 file->f_pos = new;
44 mutex_unlock(&inode->i_mutex);
45 return new;
46 }
48 static ssize_t
49 proc_bus_pci_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos)
50 {
51 const struct inode *ino = file->f_dentry->d_inode;
52 const struct proc_dir_entry *dp = PDE(ino);
53 struct pci_dev *dev = dp->data;
54 unsigned int pos = *ppos;
55 unsigned int cnt, size;
57 /*
58 * Normal users can read only the standardized portion of the
59 * configuration space as several chips lock up when trying to read
60 * undefined locations (think of Intel PIIX4 as a typical example).
61 */
63 if (capable(CAP_SYS_ADMIN))
64 size = dev->cfg_size;
65 else if (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)
66 size = 128;
67 else
68 size = 64;
70 if (pos >= size)
71 return 0;
72 if (nbytes >= size)
73 nbytes = size;
74 if (pos + nbytes > size)
75 nbytes = size - pos;
76 cnt = nbytes;
78 if (!access_ok(VERIFY_WRITE, buf, cnt))
79 return -EINVAL;
81 if ((pos & 1) && cnt) {
82 unsigned char val;
83 pci_user_read_config_byte(dev, pos, &val);
84 __put_user(val, buf);
85 buf++;
86 pos++;
87 cnt--;
88 }
90 if ((pos & 3) && cnt > 2) {
91 unsigned short val;
92 pci_user_read_config_word(dev, pos, &val);
93 __put_user(cpu_to_le16(val), (unsigned short __user *) buf);
94 buf += 2;
95 pos += 2;
96 cnt -= 2;
97 }
99 while (cnt >= 4) {
100 unsigned int val;
101 pci_user_read_config_dword(dev, pos, &val);
102 __put_user(cpu_to_le32(val), (unsigned int __user *) buf);
103 buf += 4;
104 pos += 4;
105 cnt -= 4;
106 }
108 if (cnt >= 2) {
109 unsigned short val;
110 pci_user_read_config_word(dev, pos, &val);
111 __put_user(cpu_to_le16(val), (unsigned short __user *) buf);
112 buf += 2;
113 pos += 2;
114 cnt -= 2;
115 }
117 if (cnt) {
118 unsigned char val;
119 pci_user_read_config_byte(dev, pos, &val);
120 __put_user(val, buf);
121 buf++;
122 pos++;
123 cnt--;
124 }
126 *ppos = pos;
127 return nbytes;
128 }
130 static ssize_t
131 proc_bus_pci_write(struct file *file, const char __user *buf, size_t nbytes, loff_t *ppos)
132 {
133 const struct inode *ino = file->f_dentry->d_inode;
134 const struct proc_dir_entry *dp = PDE(ino);
135 struct pci_dev *dev = dp->data;
136 int pos = *ppos;
137 int size = dev->cfg_size;
138 int cnt;
140 if (pos >= size)
141 return 0;
142 if (nbytes >= size)
143 nbytes = size;
144 if (pos + nbytes > size)
145 nbytes = size - pos;
146 cnt = nbytes;
148 if (!access_ok(VERIFY_READ, buf, cnt))
149 return -EINVAL;
151 if ((pos & 1) && cnt) {
152 unsigned char val;
153 __get_user(val, buf);
154 pci_user_write_config_byte(dev, pos, val);
155 buf++;
156 pos++;
157 cnt--;
158 }
160 if ((pos & 3) && cnt > 2) {
161 unsigned short val;
162 __get_user(val, (unsigned short __user *) buf);
163 pci_user_write_config_word(dev, pos, le16_to_cpu(val));
164 buf += 2;
165 pos += 2;
166 cnt -= 2;
167 }
169 while (cnt >= 4) {
170 unsigned int val;
171 __get_user(val, (unsigned int __user *) buf);
172 pci_user_write_config_dword(dev, pos, le32_to_cpu(val));
173 buf += 4;
174 pos += 4;
175 cnt -= 4;
176 }
178 if (cnt >= 2) {
179 unsigned short val;
180 __get_user(val, (unsigned short __user *) buf);
181 pci_user_write_config_word(dev, pos, le16_to_cpu(val));
182 buf += 2;
183 pos += 2;
184 cnt -= 2;
185 }
187 if (cnt) {
188 unsigned char val;
189 __get_user(val, buf);
190 pci_user_write_config_byte(dev, pos, val);
191 buf++;
192 pos++;
193 cnt--;
194 }
196 *ppos = pos;
197 return nbytes;
198 }
200 struct pci_filp_private {
201 enum pci_mmap_state mmap_state;
202 int write_combine;
203 };
205 static int proc_bus_pci_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg)
206 {
207 const struct proc_dir_entry *dp = PDE(inode);
208 struct pci_dev *dev = dp->data;
209 #ifdef HAVE_PCI_MMAP
210 struct pci_filp_private *fpriv = file->private_data;
211 #endif /* HAVE_PCI_MMAP */
212 int ret = 0;
214 switch (cmd) {
215 case PCIIOC_CONTROLLER:
216 ret = pci_domain_nr(dev->bus);
217 break;
219 #ifdef HAVE_PCI_MMAP
220 case PCIIOC_MMAP_IS_IO:
221 fpriv->mmap_state = pci_mmap_io;
222 break;
224 case PCIIOC_MMAP_IS_MEM:
225 fpriv->mmap_state = pci_mmap_mem;
226 break;
228 case PCIIOC_WRITE_COMBINE:
229 if (arg)
230 fpriv->write_combine = 1;
231 else
232 fpriv->write_combine = 0;
233 break;
235 #endif /* HAVE_PCI_MMAP */
237 default:
238 ret = -EINVAL;
239 break;
240 };
242 return ret;
243 }
245 #ifdef HAVE_PCI_MMAP
246 static int proc_bus_pci_mmap(struct file *file, struct vm_area_struct *vma)
247 {
248 struct inode *inode = file->f_dentry->d_inode;
249 const struct proc_dir_entry *dp = PDE(inode);
250 struct pci_dev *dev = dp->data;
251 struct pci_filp_private *fpriv = file->private_data;
252 int ret;
254 if (!capable(CAP_SYS_RAWIO))
255 return -EPERM;
257 ret = pci_mmap_page_range(dev, vma,
258 fpriv->mmap_state,
259 fpriv->write_combine);
260 if (ret < 0)
261 return ret;
263 return 0;
264 }
266 static int proc_bus_pci_open(struct inode *inode, struct file *file)
267 {
268 struct pci_filp_private *fpriv = kmalloc(sizeof(*fpriv), GFP_KERNEL);
270 if (!fpriv)
271 return -ENOMEM;
273 fpriv->mmap_state = pci_mmap_io;
274 fpriv->write_combine = 0;
276 file->private_data = fpriv;
278 return 0;
279 }
281 static int proc_bus_pci_release(struct inode *inode, struct file *file)
282 {
283 kfree(file->private_data);
284 file->private_data = NULL;
286 return 0;
287 }
288 #endif /* HAVE_PCI_MMAP */
290 static struct file_operations proc_bus_pci_operations = {
291 .llseek = proc_bus_pci_lseek,
292 .read = proc_bus_pci_read,
293 .write = proc_bus_pci_write,
294 .ioctl = proc_bus_pci_ioctl,
295 #ifdef HAVE_PCI_MMAP
296 .open = proc_bus_pci_open,
297 .release = proc_bus_pci_release,
298 .mmap = proc_bus_pci_mmap,
299 #ifdef HAVE_ARCH_PCI_GET_UNMAPPED_AREA
300 .get_unmapped_area = get_pci_unmapped_area,
301 #endif /* HAVE_ARCH_PCI_GET_UNMAPPED_AREA */
302 #endif /* HAVE_PCI_MMAP */
303 };
305 /* iterator */
306 static void *pci_seq_start(struct seq_file *m, loff_t *pos)
307 {
308 struct pci_dev *dev = NULL;
309 loff_t n = *pos;
311 for_each_pci_dev(dev) {
312 if (!n--)
313 break;
314 }
315 return dev;
316 }
318 static void *pci_seq_next(struct seq_file *m, void *v, loff_t *pos)
319 {
320 struct pci_dev *dev = v;
322 (*pos)++;
323 dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev);
324 return dev;
325 }
327 static void pci_seq_stop(struct seq_file *m, void *v)
328 {
329 if (v) {
330 struct pci_dev *dev = v;
331 pci_dev_put(dev);
332 }
333 }
335 static int show_device(struct seq_file *m, void *v)
336 {
337 const struct pci_dev *dev = v;
338 const struct pci_driver *drv;
339 int i;
341 if (dev == NULL)
342 return 0;
344 drv = pci_dev_driver(dev);
345 seq_printf(m, "%02x%02x\t%04x%04x\t%x",
346 dev->bus->number,
347 dev->devfn,
348 dev->vendor,
349 dev->device,
350 dev->irq);
352 /* only print standard and ROM resources to preserve compatibility */
353 for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
354 resource_size_t start, end;
355 pci_resource_to_user(dev, i, &dev->resource[i], &start, &end);
356 seq_printf(m, "\t%16llx",
357 (unsigned long long)(start |
358 (dev->resource[i].flags & PCI_REGION_FLAG_MASK)));
359 }
360 for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
361 resource_size_t start, end;
362 pci_resource_to_user(dev, i, &dev->resource[i], &start, &end);
363 seq_printf(m, "\t%16llx",
364 dev->resource[i].start < dev->resource[i].end ?
365 (unsigned long long)(end - start) + 1 : 0);
366 }
367 seq_putc(m, '\t');
368 if (drv)
369 seq_printf(m, "%s", drv->name);
370 seq_putc(m, '\n');
371 return 0;
372 }
374 static struct seq_operations proc_bus_pci_devices_op = {
375 .start = pci_seq_start,
376 .next = pci_seq_next,
377 .stop = pci_seq_stop,
378 .show = show_device
379 };
381 static struct proc_dir_entry *proc_bus_pci_dir;
383 int pci_proc_attach_device(struct pci_dev *dev)
384 {
385 struct pci_bus *bus = dev->bus;
386 struct proc_dir_entry *e;
387 char name[16];
389 if (!proc_initialized)
390 return -EACCES;
392 if (!bus->procdir) {
393 if (pci_proc_domain(bus)) {
394 sprintf(name, "%04x:%02x", pci_domain_nr(bus),
395 bus->number);
396 } else {
397 sprintf(name, "%02x", bus->number);
398 }
399 bus->procdir = proc_mkdir(name, proc_bus_pci_dir);
400 if (!bus->procdir)
401 return -ENOMEM;
402 }
404 sprintf(name, "%02x.%x", PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
405 e = create_proc_entry(name, S_IFREG | S_IRUGO | S_IWUSR, bus->procdir);
406 if (!e)
407 return -ENOMEM;
408 e->proc_fops = &proc_bus_pci_operations;
409 e->data = dev;
410 e->size = dev->cfg_size;
411 dev->procent = e;
413 return 0;
414 }
416 int pci_proc_detach_device(struct pci_dev *dev)
417 {
418 struct proc_dir_entry *e;
420 if ((e = dev->procent)) {
421 if (atomic_read(&e->count))
422 return -EBUSY;
423 remove_proc_entry(e->name, dev->bus->procdir);
424 dev->procent = NULL;
425 }
426 return 0;
427 }
429 #if 0
430 int pci_proc_attach_bus(struct pci_bus* bus)
431 {
432 struct proc_dir_entry *de = bus->procdir;
434 if (!proc_initialized)
435 return -EACCES;
437 if (!de) {
438 char name[16];
439 sprintf(name, "%02x", bus->number);
440 de = bus->procdir = proc_mkdir(name, proc_bus_pci_dir);
441 if (!de)
442 return -ENOMEM;
443 }
444 return 0;
445 }
446 #endif /* 0 */
448 int pci_proc_detach_bus(struct pci_bus* bus)
449 {
450 struct proc_dir_entry *de = bus->procdir;
451 if (de)
452 remove_proc_entry(de->name, proc_bus_pci_dir);
453 return 0;
454 }
456 static int proc_bus_pci_dev_open(struct inode *inode, struct file *file)
457 {
458 return seq_open(file, &proc_bus_pci_devices_op);
459 }
460 static struct file_operations proc_bus_pci_dev_operations = {
461 .open = proc_bus_pci_dev_open,
462 .read = seq_read,
463 .llseek = seq_lseek,
464 .release = seq_release,
465 };
467 static int __init pci_proc_init(void)
468 {
469 struct proc_dir_entry *entry;
470 struct pci_dev *dev = NULL;
471 proc_bus_pci_dir = proc_mkdir("pci", proc_bus);
472 entry = create_proc_entry("devices", 0, proc_bus_pci_dir);
473 if (entry)
474 entry->proc_fops = &proc_bus_pci_dev_operations;
475 proc_initialized = 1;
476 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
477 pci_proc_attach_device(dev);
478 }
479 return 0;
480 }
482 __initcall(pci_proc_init);
484 #ifdef CONFIG_HOTPLUG
485 EXPORT_SYMBOL(pci_proc_attach_device);
486 EXPORT_SYMBOL(pci_proc_detach_bus);
487 #endif