ia64/xen-unstable

changeset 17976:7f7d0e7aa01b

Add management and locking of PCI device structures

Add functions for managing pci_dev structures. Create a list
containing all current pci_devs. Remove msi_pdev_list. Create a
read-write lock protecting all pci_dev lists. Add spinlocks for
pci_dev access. Do necessary modifications to MSI code.

Signed-off-by: Espen Skoglund <espen.skoglund@netronome.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jul 04 17:52:24 2008 +0100 (2008-07-04)
parents bd7f2a120f94
children 183ca809e1d7
files xen/arch/x86/i8259.c xen/arch/x86/msi.c xen/arch/x86/physdev.c xen/drivers/passthrough/Makefile xen/drivers/passthrough/amd/pci_amd_iommu.c xen/drivers/passthrough/iommu.c xen/drivers/passthrough/pci.c xen/drivers/passthrough/vtd/iommu.c xen/include/asm-x86/msi.h xen/include/xen/iommu.h xen/include/xen/pci.h
line diff
     1.1 --- a/xen/arch/x86/i8259.c	Fri Jul 04 17:51:42 2008 +0100
     1.2 +++ b/xen/arch/x86/i8259.c	Fri Jul 04 17:52:24 2008 +0100
     1.3 @@ -382,7 +382,6 @@ void __devinit init_8259A(int auto_eoi)
     1.4  
     1.5  static struct irqaction cascade = { no_action, "cascade", NULL};
     1.6  
     1.7 -extern struct list_head msi_pdev_list;
     1.8  void __init init_IRQ(void)
     1.9  {
    1.10      int i;
    1.11 @@ -419,7 +418,5 @@ void __init init_IRQ(void)
    1.12      outb(LATCH >> 8, PIT_CH0);     /* MSB */
    1.13  
    1.14      setup_irq(2, &cascade);
    1.15 -
    1.16 -    INIT_LIST_HEAD(&msi_pdev_list);
    1.17  }
    1.18  
     2.1 --- a/xen/arch/x86/msi.c	Fri Jul 04 17:51:42 2008 +0100
     2.2 +++ b/xen/arch/x86/msi.c	Fri Jul 04 17:52:24 2008 +0100
     2.3 @@ -29,21 +29,6 @@
     2.4  
     2.5  extern int msi_irq_enable;
     2.6  
     2.7 -/* PCI-dev list with MSI/MSIX capabilities */
     2.8 -DEFINE_SPINLOCK(msi_pdev_lock);
     2.9 -struct list_head msi_pdev_list;
    2.10 -
    2.11 -struct pci_dev *get_msi_pdev(u8 bus, u8 devfn)
    2.12 -{
    2.13 -    struct pci_dev *pdev = NULL;
    2.14 -
    2.15 -    list_for_each_entry(pdev, &msi_pdev_list, msi_dev_list)
    2.16 -        if ( pdev->bus == bus && pdev->devfn == devfn )
    2.17 -            return pdev;
    2.18 -
    2.19 -    return NULL;
    2.20 -}
    2.21 -
    2.22  /* bitmap indicate which fixed map is free */
    2.23  DEFINE_SPINLOCK(msix_fixmap_lock);
    2.24  DECLARE_BITMAP(msix_fixmap_pages, MAX_MSIX_PAGES);
    2.25 @@ -112,10 +97,8 @@ static void msi_compose_msg(struct pci_d
    2.26      }
    2.27  }
    2.28  
    2.29 -void read_msi_msg(unsigned int irq, struct msi_msg *msg)
    2.30 +static void read_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
    2.31  {
    2.32 -    struct msi_desc *entry = irq_desc[irq].msi_desc;
    2.33 -
    2.34      switch ( entry->msi_attrib.type )
    2.35      {
    2.36      case PCI_CAP_ID_MSI:
    2.37 @@ -147,7 +130,7 @@ void read_msi_msg(unsigned int irq, stru
    2.38      {
    2.39          void __iomem *base;
    2.40          base = entry->mask_base +
    2.41 -            entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
    2.42 +	    entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
    2.43  
    2.44          msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
    2.45          msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
    2.46 @@ -164,9 +147,6 @@ void read_msi_msg(unsigned int irq, stru
    2.47  
    2.48  static int set_vector_msi(struct msi_desc *entry)
    2.49  {
    2.50 -    irq_desc_t *desc;
    2.51 -    unsigned long flags;
    2.52 -
    2.53      if ( entry->vector >= NR_VECTORS )
    2.54      {
    2.55          dprintk(XENLOG_ERR, "Trying to install msi data for Vector %d\n",
    2.56 @@ -174,19 +154,12 @@ static int set_vector_msi(struct msi_des
    2.57          return -EINVAL;
    2.58      }
    2.59  
    2.60 -    desc = &irq_desc[entry->vector];
    2.61 -    spin_lock_irqsave(&desc->lock, flags);
    2.62 -    desc->msi_desc = entry;
    2.63 -    spin_unlock_irqrestore(&desc->lock, flags);
    2.64 -
    2.65 +    irq_desc[entry->vector].msi_desc = entry;
    2.66      return 0;
    2.67  }
    2.68  
    2.69  static int unset_vector_msi(int vector)
    2.70  {
    2.71 -    irq_desc_t *desc;
    2.72 -    unsigned long flags;
    2.73 -
    2.74      if ( vector >= NR_VECTORS )
    2.75      {
    2.76          dprintk(XENLOG_ERR, "Trying to uninstall msi data for Vector %d\n",
    2.77 @@ -194,18 +167,12 @@ static int unset_vector_msi(int vector)
    2.78          return -EINVAL;
    2.79      }
    2.80  
    2.81 -    desc = &irq_desc[vector];
    2.82 -    spin_lock_irqsave(&desc->lock, flags);
    2.83 -    desc->msi_desc = NULL;
    2.84 -    spin_unlock_irqrestore(&desc->lock, flags);
    2.85 -
    2.86 +    irq_desc[vector].msi_desc = NULL;
    2.87      return 0;
    2.88  }
    2.89  
    2.90 -void write_msi_msg(unsigned int irq, struct msi_msg *msg)
    2.91 +static void write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
    2.92  {
    2.93 -    struct msi_desc *entry = irq_desc[irq].msi_desc;
    2.94 -
    2.95      if ( vtd_enabled )
    2.96          msi_msg_write_remap_rte(entry, msg);
    2.97  
    2.98 @@ -254,6 +221,7 @@ void write_msi_msg(unsigned int irq, str
    2.99  
   2.100  void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
   2.101  {
   2.102 +    struct msi_desc *desc = irq_desc[irq].msi_desc;
   2.103      struct msi_msg msg;
   2.104      unsigned int dest;
   2.105  
   2.106 @@ -263,12 +231,18 @@ void set_msi_irq_affinity(unsigned int i
   2.107          mask = TARGET_CPUS;
   2.108      dest = cpu_mask_to_apicid(mask);
   2.109  
   2.110 -    read_msi_msg(irq, &msg);
   2.111 +    if ( !desc )
   2.112 +	return;
   2.113 +
   2.114 +    ASSERT(spin_is_locked(&irq_desc[vector].lock));
   2.115 +    spin_lock(&desc->dev->lock);
   2.116 +    read_msi_msg(desc, &msg);
   2.117  
   2.118      msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
   2.119      msg.address_lo |= MSI_ADDR_DEST_ID(dest);
   2.120  
   2.121 -    write_msi_msg(irq, &msg);
   2.122 +    write_msi_msg(desc, &msg);
   2.123 +    spin_unlock(&desc->dev->lock);
   2.124  }
   2.125  
   2.126  static void msi_set_enable(struct pci_dev *dev, int enable)
   2.127 @@ -290,7 +264,7 @@ static void msi_set_enable(struct pci_de
   2.128      }
   2.129  }
   2.130  
   2.131 -void msix_set_enable(struct pci_dev *dev, int enable)
   2.132 +static void msix_set_enable(struct pci_dev *dev, int enable)
   2.133  {
   2.134      int pos;
   2.135      u16 control;
   2.136 @@ -335,6 +309,7 @@ static void msi_set_mask_bit(unsigned in
   2.137  {
   2.138      struct msi_desc *entry = irq_desc[irq].msi_desc;
   2.139  
   2.140 +    ASSERT(spin_is_locked(&irq_desc[vector].lock));
   2.141      BUG_ON(!entry || !entry->dev);
   2.142      switch (entry->msi_attrib.type) {
   2.143      case PCI_CAP_ID_MSI:
   2.144 @@ -401,7 +376,7 @@ static int setup_msi_irq(struct pci_dev 
   2.145  
   2.146      msi_compose_msg(dev, desc->vector, &msg);
   2.147      set_vector_msi(desc);
   2.148 -    write_msi_msg(desc->vector, &msg);
   2.149 +    write_msi_msg(irq_desc[desc->vector].msi_desc, &msg);
   2.150  
   2.151      return 0;
   2.152  }
   2.153 @@ -415,8 +390,8 @@ static void msi_free_vector(int vector)
   2.154  {
   2.155      struct msi_desc *entry;
   2.156  
   2.157 +    ASSERT(spin_is_locked(&irq_desc[vector].lock));
   2.158      entry = irq_desc[vector].msi_desc;
   2.159 -
   2.160      teardown_msi_vector(vector);
   2.161  
   2.162      if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
   2.163 @@ -619,35 +594,22 @@ static int msix_capability_init(struct p
   2.164  static int __pci_enable_msi(u8 bus, u8 devfn, int vector)
   2.165  {
   2.166      int status;
   2.167 -    struct pci_dev *dev;
   2.168 +    struct pci_dev *pdev;
   2.169  
   2.170 -    dev = get_msi_pdev(bus, devfn);
   2.171 -    if ( !dev )
   2.172 +    pdev = pci_lock_pdev(bus, devfn);
   2.173 +    if ( !pdev )
   2.174 +	return -ENODEV;
   2.175 +
   2.176 +    if ( find_msi_entry(pdev, vector, PCI_CAP_ID_MSI) )
   2.177      {
   2.178 -        dev = xmalloc(struct pci_dev);
   2.179 -        if ( !dev )
   2.180 -            return -ENOMEM;
   2.181 -        dev->bus = bus;
   2.182 -        dev->devfn = devfn;
   2.183 -        INIT_LIST_HEAD(&dev->msi_list);
   2.184 -    }
   2.185 -
   2.186 -    if ( find_msi_entry(dev, vector, PCI_CAP_ID_MSI) )
   2.187 -    {
   2.188 +	spin_unlock(&pdev->lock);
   2.189          dprintk(XENLOG_WARNING, "vector %d has already mapped to MSI on device \
   2.190              %02x:%02x.%01x.\n", vector, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
   2.191          return 0;
   2.192      }
   2.193  
   2.194 -    status = msi_capability_init(dev, vector);
   2.195 -
   2.196 -    if ( dev != get_msi_pdev(bus, devfn) )
   2.197 -    {
   2.198 -        spin_lock(&msi_pdev_lock);
   2.199 -        list_add_tail(&dev->msi_dev_list, &msi_pdev_list);
   2.200 -        spin_unlock(&msi_pdev_lock);
   2.201 -    }
   2.202 -
   2.203 +    status = msi_capability_init(pdev, vector);
   2.204 +    spin_unlock(&pdev->lock);
   2.205      return status;
   2.206  }
   2.207  
   2.208 @@ -660,6 +622,13 @@ static void __pci_disable_msi(int vector
   2.209      u8 bus, slot, func;
   2.210  
   2.211      entry = irq_desc[vector].msi_desc;
   2.212 +    if ( !entry )
   2.213 +	return;
   2.214 +    /*
   2.215 +     * Lock here is safe.  msi_desc can not be removed without holding
   2.216 +     * both irq_desc[].lock (which we do) and pdev->lock.
   2.217 +     */
   2.218 +    spin_lock(&entry->dev->lock);
   2.219      dev = entry->dev;
   2.220      bus = dev->bus;
   2.221      slot = PCI_SLOT(dev->devfn);
   2.222 @@ -674,6 +643,7 @@ static void __pci_disable_msi(int vector
   2.223      msi_free_vector(vector);
   2.224  
   2.225      pci_conf_write16(bus, slot, func, msi_control_reg(pos), control);
   2.226 +    spin_unlock(&dev->lock);
   2.227  }
   2.228  
   2.229  /**
   2.230 @@ -694,47 +664,35 @@ static void __pci_disable_msi(int vector
   2.231  static int __pci_enable_msix(u8 bus, u8 devfn, int vector, int entry_nr)
   2.232  {
   2.233      int status, pos, nr_entries;
   2.234 -    struct pci_dev *dev;
   2.235 +    struct pci_dev *pdev;
   2.236      u16 control;
   2.237      u8 slot = PCI_SLOT(devfn);
   2.238      u8 func = PCI_FUNC(devfn);
   2.239  
   2.240 +    pdev = pci_lock_pdev(bus, devfn);
   2.241 +    if ( !pdev )
   2.242 +	return -ENODEV;
   2.243 +
   2.244      pos = pci_find_cap_offset(bus, slot, func, PCI_CAP_ID_MSIX);
   2.245      control = pci_conf_read16(bus, slot, func, msi_control_reg(pos));
   2.246      nr_entries = multi_msix_capable(control);
   2.247      if (entry_nr > nr_entries)
   2.248 +    {
   2.249 +	spin_unlock(&pdev->lock);
   2.250          return -EINVAL;
   2.251 -
   2.252 -    /* Check whether driver already requested for MSI-X irqs */
   2.253 -    dev = get_msi_pdev(bus, devfn);
   2.254 -
   2.255 -    if ( !dev )
   2.256 -    {
   2.257 -        dev = xmalloc(struct pci_dev);
   2.258 -        if ( !dev )
   2.259 -            return -ENOMEM;
   2.260 -        dev->bus = bus;
   2.261 -        dev->devfn = devfn;
   2.262 -        INIT_LIST_HEAD(&dev->msi_list);
   2.263      }
   2.264  
   2.265 -    if ( find_msi_entry(dev, vector, PCI_CAP_ID_MSIX) )
   2.266 +    if ( find_msi_entry(pdev, vector, PCI_CAP_ID_MSIX) )
   2.267      {
   2.268 +	spin_unlock(&pdev->lock);
   2.269          dprintk(XENLOG_WARNING, "vector %d has already mapped to MSIX on \
   2.270                  device %02x:%02x.%01x.\n", vector, bus,
   2.271                  PCI_SLOT(devfn), PCI_FUNC(devfn));
   2.272          return 0;
   2.273      }
   2.274  
   2.275 -    status = msix_capability_init(dev, vector, entry_nr);
   2.276 -
   2.277 -    if ( dev != get_msi_pdev(bus, devfn) )
   2.278 -    {
   2.279 -        spin_lock(&msi_pdev_lock);
   2.280 -        list_add_tail(&dev->msi_dev_list, &msi_pdev_list);
   2.281 -        spin_unlock(&msi_pdev_lock);
   2.282 -    }
   2.283 -
   2.284 +    status = msix_capability_init(pdev, vector, entry_nr);
   2.285 +    spin_unlock(&pdev->lock);
   2.286      return status;
   2.287  }
   2.288  
   2.289 @@ -747,6 +705,13 @@ static void __pci_disable_msix(int vecto
   2.290      u8 bus, slot, func;
   2.291  
   2.292      entry = irq_desc[vector].msi_desc;
   2.293 +    if ( !entry )
   2.294 +	return;
   2.295 +    /*
   2.296 +     * Lock here is safe.  msi_desc can not be removed without holding
   2.297 +     * both irq_desc[].lock (which we do) and pdev->lock.
   2.298 +     */
   2.299 +    spin_lock(&entry->dev->lock);
   2.300      dev = entry->dev;
   2.301      bus = dev->bus;
   2.302      slot = PCI_SLOT(dev->devfn);
   2.303 @@ -761,10 +726,12 @@ static void __pci_disable_msix(int vecto
   2.304      msi_free_vector(vector);
   2.305  
   2.306      pci_conf_write16(bus, slot, func, msix_control_reg(pos), control);
   2.307 +    spin_unlock(&dev->lock);
   2.308  }
   2.309  
   2.310  int pci_enable_msi(u8 bus, u8 devfn, int vector, int entry_nr, int msi)
   2.311  {
   2.312 +    ASSERT(spin_is_locked(&irq_desc[vector].lock));
   2.313      if ( msi )
   2.314          return __pci_enable_msi(bus, devfn, vector);
   2.315      else
   2.316 @@ -773,9 +740,11 @@ int pci_enable_msi(u8 bus, u8 devfn, int
   2.317  
   2.318  void pci_disable_msi(int vector)
   2.319  {
   2.320 -    irq_desc_t *desc;
   2.321 +    irq_desc_t *desc = &irq_desc[vector];
   2.322 +    ASSERT(spin_is_locked(&desc->lock));
   2.323 +    if ( !desc->msi_desc )
   2.324 +	return;
   2.325  
   2.326 -    desc = &irq_desc[vector];
   2.327      if ( desc->msi_desc->msi_attrib.type == PCI_CAP_ID_MSI )
   2.328          __pci_disable_msi(vector);
   2.329      else if ( desc->msi_desc->msi_attrib.type == PCI_CAP_ID_MSIX )
   2.330 @@ -789,10 +758,18 @@ static void msi_free_vectors(struct pci_
   2.331      irq_desc_t *desc;
   2.332      unsigned long flags;
   2.333  
   2.334 +retry:
   2.335      list_for_each_entry_safe( entry, tmp, &dev->msi_list, list )
   2.336      {
   2.337          desc = &irq_desc[entry->vector];
   2.338  
   2.339 +	local_irq_save(flags);
   2.340 +	if ( !spin_trylock(&desc->lock) )
   2.341 +	{
   2.342 +	    local_irq_restore(flags);
   2.343 +	    goto retry;
   2.344 +	}
   2.345 +
   2.346          spin_lock_irqsave(&desc->lock, flags);
   2.347          if ( desc->handler == &pci_msi_type )
   2.348          {
   2.349 @@ -800,22 +777,17 @@ static void msi_free_vectors(struct pci_
   2.350              BUG_ON(desc->status & IRQ_GUEST);
   2.351              desc->handler = &no_irq_type;
   2.352          }
   2.353 -        spin_unlock_irqrestore(&desc->lock, flags);
   2.354  
   2.355          msi_free_vector(entry->vector);
   2.356 +        spin_unlock_irqrestore(&desc->lock, flags);
   2.357      }
   2.358  }
   2.359  
   2.360 -void pci_cleanup_msi(u8 bus, u8 devfn)
   2.361 +void pci_cleanup_msi(struct pci_dev *pdev)
   2.362  {
   2.363 -    struct pci_dev *dev = get_msi_pdev(bus, devfn);
   2.364 -
   2.365 -    if ( !dev )
   2.366 -        return;
   2.367 -
   2.368      /* Disable MSI and/or MSI-X */
   2.369 -    msi_set_enable(dev, 0);
   2.370 -    msix_set_enable(dev, 0);
   2.371 -    msi_free_vectors(dev);
   2.372 +    msi_set_enable(pdev, 0);
   2.373 +    msix_set_enable(pdev, 0);
   2.374 +    msi_free_vectors(pdev);
   2.375  }
   2.376  
     3.1 --- a/xen/arch/x86/physdev.c	Fri Jul 04 17:51:42 2008 +0100
     3.2 +++ b/xen/arch/x86/physdev.c	Fri Jul 04 17:52:24 2008 +0100
     3.3 @@ -114,12 +114,12 @@ static int map_domain_pirq(struct domain
     3.4              gdprintk(XENLOG_G_ERR, "Map vector %x to msi while it is in use\n",
     3.5                       vector);
     3.6          desc->handler = &pci_msi_type;
     3.7 -        spin_unlock_irqrestore(&desc->lock, flags);
     3.8  
     3.9          ret = pci_enable_msi(map->msi_info.bus,
    3.10  		                     map->msi_info.devfn, vector,
    3.11  							 map->msi_info.entry_nr,
    3.12  							 map->msi_info.msi);
    3.13 +        spin_unlock_irqrestore(&desc->lock, flags);
    3.14          if ( ret )
    3.15              goto done;
    3.16      }
    3.17 @@ -161,10 +161,10 @@ static int unmap_domain_pirq(struct doma
    3.18          irq_desc_t *desc;
    3.19  
    3.20          desc = &irq_desc[vector];
    3.21 +        spin_lock_irqsave(&desc->lock, flags);
    3.22          if ( desc->msi_desc )
    3.23              pci_disable_msi(vector);
    3.24  
    3.25 -        spin_lock_irqsave(&desc->lock, flags);
    3.26          if ( desc->handler == &pci_msi_type )
    3.27          {
    3.28              /* MSI is not shared, so should be released already */
     4.1 --- a/xen/drivers/passthrough/Makefile	Fri Jul 04 17:51:42 2008 +0100
     4.2 +++ b/xen/drivers/passthrough/Makefile	Fri Jul 04 17:52:24 2008 +0100
     4.3 @@ -3,3 +3,4 @@ subdir-$(x86) += amd
     4.4  
     4.5  obj-y += iommu.o
     4.6  obj-y += io.o
     4.7 +obj-y += pci.o
     5.1 --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c	Fri Jul 04 17:51:42 2008 +0100
     5.2 +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c	Fri Jul 04 17:52:24 2008 +0100
     5.3 @@ -298,6 +298,7 @@ static void amd_iommu_setup_dom0_devices
     5.4      u32 l;
     5.5      int bdf;
     5.6  
     5.7 +    write_lock(&pcidevs_lock);
     5.8      for ( bus = 0; bus < 256; bus++ )
     5.9      {
    5.10          for ( dev = 0; dev < 32; dev++ )
    5.11 @@ -310,10 +311,9 @@ static void amd_iommu_setup_dom0_devices
    5.12                       (l == 0x0000ffff) || (l == 0xffff0000) )
    5.13                      continue;
    5.14  
    5.15 -                pdev = xmalloc(struct pci_dev);
    5.16 -                pdev->bus = bus;
    5.17 -                pdev->devfn = PCI_DEVFN(dev, func);
    5.18 -                list_add_tail(&pdev->domain_list, &d->arch.pdev_list);
    5.19 +                pdev = alloc_pdev(bus, PCI_DEVFN(dev, func));
    5.20 +                pdev->domain = d;
    5.21 +                list_add(&pdev->domain_list, &d->arch.pdev_list);
    5.22  
    5.23                  bdf = (bus << 8) | pdev->devfn;
    5.24                  /* supported device? */
    5.25 @@ -325,6 +325,7 @@ static void amd_iommu_setup_dom0_devices
    5.26              }
    5.27          }
    5.28      }
    5.29 +    write_unlock(&pcidevs_lock);
    5.30  }
    5.31  
    5.32  int amd_iov_detect(void)
    5.33 @@ -493,38 +494,37 @@ static int reassign_device( struct domai
    5.34      struct amd_iommu *iommu;
    5.35      int bdf;
    5.36  
    5.37 -    for_each_pdev ( source, pdev )
    5.38 -    {
    5.39 -        if ( (pdev->bus != bus) || (pdev->devfn != devfn) )
    5.40 -            continue;
    5.41 +    pdev = pci_lock_domain_pdev(source, bus, devfn);
    5.42 +    if ( !pdev )
    5.43 +	return -ENODEV;
    5.44  
    5.45 -        pdev->bus = bus;
    5.46 -        pdev->devfn = devfn;
    5.47 -
    5.48 -        bdf = (bus << 8) | devfn;
    5.49 -        /* supported device? */
    5.50 -        iommu = (bdf < ivrs_bdf_entries) ?
    5.51 -            find_iommu_for_device(bus, pdev->devfn) : NULL;
    5.52 +    bdf = (bus << 8) | devfn;
    5.53 +    /* supported device? */
    5.54 +    iommu = (bdf < ivrs_bdf_entries) ?
    5.55 +	find_iommu_for_device(bus, pdev->devfn) : NULL;
    5.56  
    5.57 -        if ( !iommu )
    5.58 -        {
    5.59 -            amd_iov_error("Fail to find iommu."
    5.60 -                     " %x:%x.%x cannot be assigned to domain %d\n", 
    5.61 -                     bus, PCI_SLOT(devfn), PCI_FUNC(devfn), target->domain_id);
    5.62 -            return -ENODEV;
    5.63 -        }
    5.64 +    if ( !iommu )
    5.65 +    {
    5.66 +	spin_unlock(&pdev->lock);
    5.67 +	amd_iov_error("Fail to find iommu."
    5.68 +		      " %x:%x.%x cannot be assigned to domain %d\n", 
    5.69 +		      bus, PCI_SLOT(devfn), PCI_FUNC(devfn), target->domain_id);
    5.70 +	return -ENODEV;
    5.71 +    }
    5.72  
    5.73 -        amd_iommu_disable_domain_device(source, iommu, bdf);
    5.74 -        /* Move pci device from the source domain to target domain. */
    5.75 -        list_move(&pdev->domain_list, &target->arch.pdev_list);
    5.76 +    amd_iommu_disable_domain_device(source, iommu, bdf);
    5.77  
    5.78 -        amd_iommu_setup_domain_device(target, iommu, bdf);
    5.79 -        amd_iov_info("reassign %x:%x.%x domain %d -> domain %d\n",
    5.80 +    write_lock(&pcidevs_lock);
    5.81 +    list_move(&pdev->domain_list, &target->arch.pdev_list);
    5.82 +    write_unlock(&pcidevs_lock);
    5.83 +    pdev->domain = target;
    5.84 +
    5.85 +    amd_iommu_setup_domain_device(target, iommu, bdf);
    5.86 +    amd_iov_info("reassign %x:%x.%x domain %d -> domain %d\n",
    5.87                   bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
    5.88                   source->domain_id, target->domain_id);
    5.89  
    5.90 -        break;
    5.91 -    }
    5.92 +    spin_unlock(&pdev->lock);
    5.93      return 0;
    5.94  }
    5.95  
    5.96 @@ -552,14 +552,16 @@ static int amd_iommu_assign_device(struc
    5.97  static void release_domain_devices(struct domain *d)
    5.98  {
    5.99      struct pci_dev *pdev;
   5.100 +    u8 bus, devfn;
   5.101  
   5.102 -    while ( has_arch_pdevs(d) )
   5.103 +    while ( (pdev = pci_lock_domain_pdev(d, -1, -1)) )
   5.104      {
   5.105 -        pdev = list_entry(d->arch.pdev_list.next, typeof(*pdev), domain_list);
   5.106          pdev_flr(pdev->bus, pdev->devfn);
   5.107 +	bus = pdev->bus; devfn = pdev->devfn;
   5.108 +	spin_unlock(&pdev->lock);
   5.109          amd_iov_info("release domain %d devices %x:%x.%x\n", d->domain_id,
   5.110 -                 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
   5.111 -        reassign_device(d, dom0, pdev->bus, pdev->devfn);
   5.112 +		     bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
   5.113 +        reassign_device(d, dom0, bus, devfn);
   5.114      }
   5.115  }
   5.116  
   5.117 @@ -619,11 +621,11 @@ static void amd_iommu_domain_destroy(str
   5.118      release_domain_devices(d);
   5.119  }
   5.120  
   5.121 -static void amd_iommu_return_device(
   5.122 +static int amd_iommu_return_device(
   5.123      struct domain *s, struct domain *t, u8 bus, u8 devfn)
   5.124  {
   5.125      pdev_flr(bus, devfn);
   5.126 -    reassign_device(s, t, bus, devfn);
   5.127 +    return reassign_device(s, t, bus, devfn);
   5.128  }
   5.129  
   5.130  static int amd_iommu_group_id(u8 bus, u8 devfn)
     6.1 --- a/xen/drivers/passthrough/iommu.c	Fri Jul 04 17:51:42 2008 +0100
     6.2 +++ b/xen/drivers/passthrough/iommu.c	Fri Jul 04 17:52:24 2008 +0100
     6.3 @@ -240,6 +240,7 @@ int iommu_get_device_group(struct domain
     6.4  
     6.5      group_id = ops->get_device_group_id(bus, devfn);
     6.6  
     6.7 +    read_lock(&pcidevs_lock);
     6.8      for_each_pdev( d, pdev )
     6.9      {
    6.10          if ( (pdev->bus == bus) && (pdev->devfn == devfn) )
    6.11 @@ -252,10 +253,14 @@ int iommu_get_device_group(struct domain
    6.12              bdf |= (pdev->bus & 0xff) << 16;
    6.13              bdf |= (pdev->devfn & 0xff) << 8;
    6.14              if ( unlikely(copy_to_guest_offset(buf, i, &bdf, 1)) )
    6.15 +            {
    6.16 +                read_unlock(&pcidevs_lock);
    6.17                  return -1;
    6.18 +            }
    6.19              i++;
    6.20          }
    6.21      }
    6.22 +    read_unlock(&pcidevs_lock);
    6.23  
    6.24      return i;
    6.25  }
     7.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     7.2 +++ b/xen/drivers/passthrough/pci.c	Fri Jul 04 17:52:24 2008 +0100
     7.3 @@ -0,0 +1,124 @@
     7.4 +/*
     7.5 + * Copyright (C) 2008,  Netronome Systems, Inc.
     7.6 + *                
     7.7 + * This program is free software; you can redistribute it and/or modify it
     7.8 + * under the terms and conditions of the GNU General Public License,
     7.9 + * version 2, as published by the Free Software Foundation.
    7.10 + *
    7.11 + * This program is distributed in the hope it will be useful, but WITHOUT
    7.12 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    7.13 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
    7.14 + * more details.
    7.15 + *
    7.16 + * You should have received a copy of the GNU General Public License along with
    7.17 + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
    7.18 + * Place - Suite 330, Boston, MA 02111-1307 USA.
    7.19 + */
    7.20 +
    7.21 +#include <xen/sched.h>
    7.22 +#include <xen/pci.h>
    7.23 +#include <xen/list.h>
    7.24 +#include <xen/prefetch.h>
    7.25 +#include <xen/keyhandler.h>
    7.26 +
    7.27 +
    7.28 +LIST_HEAD(alldevs_list);
    7.29 +rwlock_t pcidevs_lock = RW_LOCK_UNLOCKED;
    7.30 +
    7.31 +struct pci_dev *alloc_pdev(u8 bus, u8 devfn)
    7.32 +{
    7.33 +    struct pci_dev *pdev;
    7.34 +
    7.35 +    list_for_each_entry ( pdev, &alldevs_list, alldevs_list )
    7.36 +        if ( pdev->bus == bus && pdev->devfn == devfn )
    7.37 +	    return pdev;
    7.38 +
    7.39 +    pdev = xmalloc(struct pci_dev);
    7.40 +    if ( !pdev )
    7.41 +	return NULL;
    7.42 +
    7.43 +    *((u8*) &pdev->bus) = bus;
    7.44 +    *((u8*) &pdev->devfn) = devfn;
    7.45 +    pdev->domain = NULL;
    7.46 +    spin_lock_init(&pdev->lock);
    7.47 +    INIT_LIST_HEAD(&pdev->msi_list);
    7.48 +    list_add(&pdev->alldevs_list, &alldevs_list);
    7.49 +
    7.50 +    return pdev;
    7.51 +}
    7.52 +
    7.53 +void free_pdev(struct pci_dev *pdev)
    7.54 +{
    7.55 +    list_del(&pdev->alldevs_list);
    7.56 +    xfree(pdev);
    7.57 +}
    7.58 +
    7.59 +struct pci_dev *pci_lock_pdev(int bus, int devfn)
    7.60 +{
    7.61 +    struct pci_dev *pdev;
    7.62 +
    7.63 +    read_lock(&pcidevs_lock);
    7.64 +    list_for_each_entry ( pdev, &alldevs_list, alldevs_list )
    7.65 +        if ( (pdev->bus == bus || bus == -1) &&
    7.66 +	     (pdev->devfn == devfn || devfn == -1) )
    7.67 +	{
    7.68 +	    spin_lock(&pdev->lock);
    7.69 +	    read_unlock(&pcidevs_lock);
    7.70 +	    return pdev;
    7.71 +	}
    7.72 +    read_unlock(&pcidevs_lock);
    7.73 +
    7.74 +    return NULL;
    7.75 +}
    7.76 +
    7.77 +struct pci_dev *pci_lock_domain_pdev(struct domain *d, int bus, int devfn)
    7.78 +{
    7.79 +    struct pci_dev *pdev;
    7.80 +
    7.81 +    read_lock(&pcidevs_lock);
    7.82 +    list_for_each_entry ( pdev, &d->arch.pdev_list, domain_list )
    7.83 +    {
    7.84 +	spin_lock(&pdev->lock);
    7.85 +        if ( (pdev->bus == bus || bus == -1) &&
    7.86 +	     (pdev->devfn == devfn || devfn == -1) &&
    7.87 +	     (pdev->domain == d) )
    7.88 +	{
    7.89 +	    read_unlock(&pcidevs_lock);
    7.90 +	    return pdev;
    7.91 +	}
    7.92 +	spin_unlock(&pdev->lock);
    7.93 +    }
    7.94 +    read_unlock(&pcidevs_lock);
    7.95 +
    7.96 +    return NULL;
    7.97 +}
    7.98 +
    7.99 +static void dump_pci_devices(unsigned char ch)
   7.100 +{
   7.101 +    struct pci_dev *pdev;
   7.102 +    struct msi_desc *msi;
   7.103 +
   7.104 +    printk("==== PCI devices ====\n");
   7.105 +    read_lock(&pcidevs_lock);
   7.106 +
   7.107 +    list_for_each_entry ( pdev, &alldevs_list, alldevs_list )
   7.108 +    {
   7.109 +	spin_lock(&pdev->lock);
   7.110 +        printk("%02x:%02x.%x - dom %-3d - MSIs < ",
   7.111 +               pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
   7.112 +               pdev->domain ? pdev->domain->domain_id : -1);
   7.113 +	list_for_each_entry ( msi, &pdev->msi_list, list )
   7.114 +	    printk("%d ", msi->vector);
   7.115 +	printk(">\n");
   7.116 +	spin_unlock(&pdev->lock);
   7.117 +    }
   7.118 +
   7.119 +    read_unlock(&pcidevs_lock);
   7.120 +}
   7.121 +
   7.122 +static int __init setup_dump_pcidevs(void)
   7.123 +{
   7.124 +    register_keyhandler('P', dump_pci_devices, "dump PCI devices");
   7.125 +    return 0;
   7.126 +}
   7.127 +__initcall(setup_dump_pcidevs);
     8.1 --- a/xen/drivers/passthrough/vtd/iommu.c	Fri Jul 04 17:51:42 2008 +0100
     8.2 +++ b/xen/drivers/passthrough/vtd/iommu.c	Fri Jul 04 17:52:24 2008 +0100
     8.3 @@ -1345,7 +1345,7 @@ static int domain_context_unmap(u8 bus, 
     8.4      return ret;
     8.5  }
     8.6  
     8.7 -void reassign_device_ownership(
     8.8 +static int reassign_device_ownership(
     8.9      struct domain *source,
    8.10      struct domain *target,
    8.11      u8 bus, u8 devfn)
    8.12 @@ -1353,61 +1353,62 @@ void reassign_device_ownership(
    8.13      struct hvm_iommu *source_hd = domain_hvm_iommu(source);
    8.14      struct pci_dev *pdev;
    8.15      struct acpi_drhd_unit *drhd;
    8.16 -    struct iommu *iommu;
    8.17 -    int status;
    8.18 -    int found = 0;
    8.19 +    struct iommu *pdev_iommu;
    8.20 +    int ret, found = 0;
    8.21 +
    8.22 +    if ( !(pdev = pci_lock_domain_pdev(source, bus, devfn)) )
    8.23 +        return -ENODEV;
    8.24  
    8.25      pdev_flr(bus, devfn);
    8.26 -
    8.27 -    for_each_pdev( source, pdev )
    8.28 -        if ( (pdev->bus == bus) && (pdev->devfn == devfn) )
    8.29 -            goto found;
    8.30 -
    8.31 -    return;
    8.32 -
    8.33 -found:
    8.34      drhd = acpi_find_matched_drhd_unit(bus, devfn);
    8.35 -    iommu = drhd->iommu;
    8.36 +    pdev_iommu = drhd->iommu;
    8.37      domain_context_unmap(bus, devfn);
    8.38  
    8.39 -    /* Move pci device from the source domain to target domain. */
    8.40 +    write_lock(&pcidevs_lock);
    8.41      list_move(&pdev->domain_list, &target->arch.pdev_list);
    8.42 +    write_unlock(&pcidevs_lock);
    8.43 +    pdev->domain = target;
    8.44  
    8.45 +    ret = domain_context_mapping(target, bus, devfn);
    8.46 +    spin_unlock(&pdev->lock);
    8.47 +
    8.48 +    read_lock(&pcidevs_lock);
    8.49      for_each_pdev ( source, pdev )
    8.50      {
    8.51          drhd = acpi_find_matched_drhd_unit(pdev->bus, pdev->devfn);
    8.52 -        if ( drhd->iommu == iommu )
    8.53 +        if ( drhd->iommu == pdev_iommu )
    8.54          {
    8.55              found = 1;
    8.56              break;
    8.57          }
    8.58      }
    8.59 +    read_unlock(&pcidevs_lock);
    8.60  
    8.61      if ( !found )
    8.62 -        clear_bit(iommu->index, &source_hd->iommu_bitmap);
    8.63 +        clear_bit(pdev_iommu->index, &source_hd->iommu_bitmap);
    8.64  
    8.65 -    status = domain_context_mapping(target, bus, devfn);
    8.66 -    if ( status != 0 )
    8.67 -        gdprintk(XENLOG_ERR VTDPREFIX, "domain_context_mapping failed\n");
    8.68 +    return ret;
    8.69  }
    8.70  
    8.71  void return_devices_to_dom0(struct domain *d)
    8.72  {
    8.73      struct pci_dev *pdev;
    8.74  
    8.75 -    while ( has_arch_pdevs(d) )
    8.76 +    while ( (pdev = pci_lock_domain_pdev(d, -1, -1)) )
    8.77      {
    8.78 -        pdev = list_entry(d->arch.pdev_list.next, typeof(*pdev), domain_list);
    8.79 -        pci_cleanup_msi(pdev->bus, pdev->devfn);
    8.80 +        pci_cleanup_msi(pdev);
    8.81 +        spin_unlock(&pdev->lock);
    8.82          reassign_device_ownership(d, dom0, pdev->bus, pdev->devfn);
    8.83      }
    8.84  
    8.85  #ifdef VTD_DEBUG
    8.86 +    read_lock(&pcidevs_lock);
    8.87      for_each_pdev ( dom0, pdev )
    8.88          dprintk(XENLOG_INFO VTDPREFIX,
    8.89                  "return_devices_to_dom0:%x: bdf = %x:%x:%x\n",
    8.90                  dom0->domain_id, pdev->bus,
    8.91                  PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
    8.92 +    read_unlock(&pcidevs_lock);
    8.93  #endif
    8.94  }
    8.95  
    8.96 @@ -1568,11 +1569,7 @@ static int iommu_prepare_rmrr_dev(struct
    8.97          return ret;
    8.98  
    8.99      if ( domain_context_mapped(bus, devfn) == 0 )
   8.100 -    {
   8.101          ret = domain_context_mapping(d, bus, devfn);
   8.102 -        if ( !ret )
   8.103 -            return 0;
   8.104 -    }
   8.105  
   8.106      return ret;
   8.107  }
   8.108 @@ -1586,6 +1583,7 @@ static void setup_dom0_devices(struct do
   8.109  
   8.110      hd = domain_hvm_iommu(d);
   8.111  
   8.112 +    write_lock(&pcidevs_lock);
   8.113      for ( bus = 0; bus < 256; bus++ )
   8.114      {
   8.115          for ( dev = 0; dev < 32; dev++ )
   8.116 @@ -1597,10 +1595,10 @@ static void setup_dom0_devices(struct do
   8.117                  if ( (l == 0xffffffff) || (l == 0x00000000) ||
   8.118                       (l == 0x0000ffff) || (l == 0xffff0000) )
   8.119                      continue;
   8.120 -                pdev = xmalloc(struct pci_dev);
   8.121 -                pdev->bus = bus;
   8.122 -                pdev->devfn = PCI_DEVFN(dev, func);
   8.123 -                list_add_tail(&pdev->domain_list, &d->arch.pdev_list);
   8.124 +
   8.125 +                pdev = alloc_pdev(bus, PCI_DEVFN(dev, func));
   8.126 +                pdev->domain = d;
   8.127 +                list_add(&pdev->domain_list, &d->arch.pdev_list);
   8.128  
   8.129                  ret = domain_context_mapping(d, pdev->bus, pdev->devfn);
   8.130                  if ( ret != 0 )
   8.131 @@ -1609,6 +1607,7 @@ static void setup_dom0_devices(struct do
   8.132              }
   8.133          }
   8.134      }
   8.135 +    write_unlock(&pcidevs_lock);
   8.136  }
   8.137  
   8.138  void clear_fault_bits(struct iommu *iommu)
   8.139 @@ -1737,9 +1736,11 @@ int device_assigned(u8 bus, u8 devfn)
   8.140  {
   8.141      struct pci_dev *pdev;
   8.142  
   8.143 -    for_each_pdev( dom0, pdev )
   8.144 -        if ( (pdev->bus == bus ) && (pdev->devfn == devfn) )
   8.145 -            return 0;
   8.146 +    if ( (pdev = pci_lock_domain_pdev(dom0, bus, devfn)) )
   8.147 +    {
   8.148 +        spin_unlock(&pdev->lock);
   8.149 +        return 0;
   8.150 +    }
   8.151  
   8.152      return 1;
   8.153  }
   8.154 @@ -1751,9 +1752,11 @@ int intel_iommu_assign_device(struct dom
   8.155      u16 bdf;
   8.156  
   8.157      if ( list_empty(&acpi_drhd_units) )
   8.158 -        return ret;
   8.159 +        return -ENODEV;
   8.160  
   8.161 -    reassign_device_ownership(dom0, d, bus, devfn);
   8.162 +    ret = reassign_device_ownership(dom0, d, bus, devfn);
   8.163 +    if ( ret )
   8.164 +        return ret;
   8.165  
   8.166      /* Setup rmrr identity mapping */
   8.167      for_each_rmrr_device( rmrr, bdf, i )
     9.1 --- a/xen/include/asm-x86/msi.h	Fri Jul 04 17:51:42 2008 +0100
     9.2 +++ b/xen/include/asm-x86/msi.h	Fri Jul 04 17:52:24 2008 +0100
     9.3 @@ -63,12 +63,10 @@ struct msi_msg {
     9.4  /* Helper functions */
     9.5  extern void mask_msi_irq(unsigned int irq);
     9.6  extern void unmask_msi_irq(unsigned int irq);
     9.7 -extern void read_msi_msg(unsigned int irq, struct msi_msg *msg);
     9.8 -extern void write_msi_msg(unsigned int irq, struct msi_msg *msg);
     9.9  extern void set_msi_irq_affinity(unsigned int irq, cpumask_t mask);
    9.10  extern int pci_enable_msi(u8 bus, u8 devfn, int vector, int entry_nr, int msi);
    9.11  extern void pci_disable_msi(int vector);
    9.12 -extern void pci_cleanup_msi(u8 bus, u8 devfn);
    9.13 +extern void pci_cleanup_msi(struct pci_dev *pdev);
    9.14  
    9.15  struct msi_desc {
    9.16  	struct {
    10.1 --- a/xen/include/xen/iommu.h	Fri Jul 04 17:51:42 2008 +0100
    10.2 +++ b/xen/include/xen/iommu.h	Fri Jul 04 17:52:24 2008 +0100
    10.3 @@ -56,6 +56,8 @@ struct iommu {
    10.4      struct intel_iommu *intel;
    10.5  };
    10.6  
    10.7 +int iommu_add_device(u8 bus, u8 devfn);
    10.8 +void iommu_remove_device(u8 bus, u8 devfn);
    10.9  int iommu_domain_init(struct domain *d);
   10.10  void iommu_domain_destroy(struct domain *d);
   10.11  int device_assigned(u8 bus, u8 devfn);
   10.12 @@ -63,9 +65,6 @@ int assign_device(struct domain *d, u8 b
   10.13  void deassign_device(struct domain *d, u8 bus, u8 devfn);
   10.14  int iommu_get_device_group(struct domain *d, u8 bus, u8 devfn, 
   10.15      XEN_GUEST_HANDLE_64(uint32) buf, int max_sdevs);
   10.16 -void reassign_device_ownership(struct domain *source,
   10.17 -                               struct domain *target,
   10.18 -                               u8 bus, u8 devfn);
   10.19  int iommu_map_page(struct domain *d, unsigned long gfn, unsigned long mfn);
   10.20  int iommu_unmap_page(struct domain *d, unsigned long gfn);
   10.21  void iommu_domain_teardown(struct domain *d);
   10.22 @@ -99,8 +98,8 @@ struct iommu_ops {
   10.23      void (*teardown)(struct domain *d);
   10.24      int (*map_page)(struct domain *d, unsigned long gfn, unsigned long mfn);
   10.25      int (*unmap_page)(struct domain *d, unsigned long gfn);
   10.26 -    void (*reassign_device)(struct domain *s, struct domain *t,
   10.27 -                            u8 bus, u8 devfn);
   10.28 +    int (*reassign_device)(struct domain *s, struct domain *t,
   10.29 +			   u8 bus, u8 devfn);
   10.30      int (*get_device_group_id)(u8 bus, u8 devfn);
   10.31  };
   10.32  
    11.1 --- a/xen/include/xen/pci.h	Fri Jul 04 17:51:42 2008 +0100
    11.2 +++ b/xen/include/xen/pci.h	Fri Jul 04 17:52:24 2008 +0100
    11.3 @@ -10,6 +10,7 @@
    11.4  #include <xen/config.h>
    11.5  #include <xen/types.h>
    11.6  #include <xen/list.h>
    11.7 +#include <xen/spinlock.h>
    11.8  
    11.9  /*
   11.10   * The PCI interface treats multi-function devices as independent
   11.11 @@ -29,16 +30,32 @@
   11.12  #define PCI_BDF2(b,df)  (((b & 0xff) << 8) | (df & 0xff))
   11.13  
   11.14  struct pci_dev {
   11.15 +    struct list_head alldevs_list;
   11.16      struct list_head domain_list;
   11.17 -    struct list_head msi_dev_list;
   11.18 -    u8 bus;
   11.19 -    u8 devfn;
   11.20      struct list_head msi_list;
   11.21 +    struct domain *domain;
   11.22 +    const u8 bus;
   11.23 +    const u8 devfn;
   11.24 +    spinlock_t lock;
   11.25  };
   11.26  
   11.27  #define for_each_pdev(domain, pdev) \
   11.28      list_for_each_entry(pdev, &(domain->arch.pdev_list), domain_list)
   11.29  
   11.30 +/*
   11.31 + * The pcidevs_lock write-lock must be held when doing alloc_pdev() or
   11.32 + * free_pdev().  Never de-reference pdev without holding pdev->lock or
   11.33 + * pcidevs_lock.  Always aquire pcidevs_lock before pdev->lock when
   11.34 + * doing free_pdev().
   11.35 + */
   11.36 +
   11.37 +extern rwlock_t pcidevs_lock;
   11.38 +
   11.39 +struct pci_dev *alloc_pdev(u8 bus, u8 devfn);
   11.40 +void free_pdev(struct pci_dev *pdev);
   11.41 +struct pci_dev *pci_lock_pdev(int bus, int devfn);
   11.42 +struct pci_dev *pci_lock_domain_pdev(struct domain *d, int bus, int devfn);
   11.43 +
   11.44  
   11.45  uint8_t pci_conf_read8(
   11.46      unsigned int bus, unsigned int dev, unsigned int func, unsigned int reg);