ia64/xen-unstable

changeset 18916:2941b1a97c60

Re-enable MSI support

Currently the MSI is disabled because of some lock issue. This patch
tries to clean up the locking related to MSI lock.

Signed-off-by: Jiang Yunhong <yunhong.jiang@intel.com>
author Keir Fraser <keir.fraser@citrix.com>
date Thu Dec 11 11:48:19 2008 +0000 (2008-12-11)
parents c15244125a69
children b56d5fe594ae
files xen/arch/x86/domctl.c xen/arch/x86/irq.c xen/arch/x86/msi.c xen/arch/x86/physdev.c xen/arch/x86/x86_64/asm-offsets.c xen/common/domain.c xen/drivers/passthrough/amd/pci_amd_iommu.c xen/drivers/passthrough/iommu.c xen/drivers/passthrough/pci.c xen/drivers/passthrough/vtd/iommu.c xen/include/asm-x86/msi.h xen/include/xen/iommu.h xen/include/xen/pci.h
line diff
     1.1 --- a/xen/arch/x86/domctl.c	Thu Dec 11 11:40:10 2008 +0000
     1.2 +++ b/xen/arch/x86/domctl.c	Thu Dec 11 11:48:19 2008 +0000
     1.3 @@ -665,14 +665,6 @@ long arch_do_domctl(
     1.4          }
     1.5  
     1.6          ret = -EINVAL;
     1.7 -        if ( device_assigned(bus, devfn) )
     1.8 -        {
     1.9 -            gdprintk(XENLOG_ERR, "XEN_DOMCTL_assign_device: "
    1.10 -                     "%x:%x:%x already assigned, or non-existent\n",
    1.11 -                     bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
    1.12 -            put_domain(d);
    1.13 -            break;
    1.14 -        }
    1.15  
    1.16          ret = assign_device(d, bus, devfn);
    1.17          if ( ret )
    1.18 @@ -715,15 +707,8 @@ long arch_do_domctl(
    1.19              put_domain(d);
    1.20              break;
    1.21          }
    1.22 -
    1.23 -        if ( !device_assigned(bus, devfn) )
    1.24 -        {
    1.25 -            put_domain(d);
    1.26 -            break;
    1.27 -        }
    1.28 -
    1.29          ret = 0;
    1.30 -        deassign_device(d, bus, devfn);
    1.31 +        ret = deassign_device(d, bus, devfn);
    1.32          gdprintk(XENLOG_INFO, "XEN_DOMCTL_deassign_device: bdf = %x:%x:%x\n",
    1.33              bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
    1.34  
     2.1 --- a/xen/arch/x86/irq.c	Thu Dec 11 11:40:10 2008 +0000
     2.2 +++ b/xen/arch/x86/irq.c	Thu Dec 11 11:48:19 2008 +0000
     2.3 @@ -847,12 +847,11 @@ int map_domain_pirq(
     2.4      int old_vector, old_pirq;
     2.5      irq_desc_t *desc;
     2.6      unsigned long flags;
     2.7 -
     2.8 -    ASSERT(spin_is_locked(&d->event_lock));
     2.9 +    struct msi_desc *msi_desc;
    2.10 +    struct pci_dev *pdev = NULL;
    2.11  
    2.12 -    /* XXX Until pcidev and msi locking is fixed. */
    2.13 -    if ( type == MAP_PIRQ_TYPE_MSI )
    2.14 -        return -EINVAL;
    2.15 +    ASSERT(spin_is_locked(&pcidevs_lock));
    2.16 +    ASSERT(spin_is_locked(&d->event_lock));
    2.17  
    2.18      if ( !IS_PRIV(current->domain) )
    2.19          return -EPERM;
    2.20 @@ -884,25 +883,35 @@ int map_domain_pirq(
    2.21      }
    2.22  
    2.23      desc = &irq_desc[vector];
    2.24 -    spin_lock_irqsave(&desc->lock, flags);
    2.25  
    2.26      if ( type == MAP_PIRQ_TYPE_MSI )
    2.27      {
    2.28          struct msi_info *msi = (struct msi_info *)data;
    2.29 -        if ( desc->handler != &no_irq_type )
    2.30 -            dprintk(XENLOG_G_ERR, "dom%d: vector %d in use\n",
    2.31 -                    d->domain_id, vector);
    2.32 -        desc->handler = &pci_msi_type;
    2.33 -        ret = pci_enable_msi(msi);
    2.34 +
    2.35 +        pdev = pci_get_pdev(msi->bus, msi->devfn);
    2.36 +        ret = pci_enable_msi(msi, &msi_desc);
    2.37          if ( ret )
    2.38              goto done;
    2.39 +
    2.40 +        spin_lock_irqsave(&desc->lock, flags);
    2.41 +
    2.42 +        if ( desc->handler != &no_irq_type )
    2.43 +            dprintk(XENLOG_G_ERR, "dom%d: vector %d in use\n",
    2.44 +              d->domain_id, vector);
    2.45 +        desc->handler = &pci_msi_type;
    2.46 +        d->arch.pirq_vector[pirq] = vector;
    2.47 +        d->arch.vector_pirq[vector] = pirq;
    2.48 +        setup_msi_irq(pdev, msi_desc);
    2.49 +        spin_unlock_irqrestore(&desc->lock, flags);
    2.50 +    } else
    2.51 +    {
    2.52 +        spin_lock_irqsave(&desc->lock, flags);
    2.53 +        d->arch.pirq_vector[pirq] = vector;
    2.54 +        d->arch.vector_pirq[vector] = pirq;
    2.55 +        spin_unlock_irqrestore(&desc->lock, flags);
    2.56      }
    2.57  
    2.58 -    d->arch.pirq_vector[pirq] = vector;
    2.59 -    d->arch.vector_pirq[vector] = pirq;
    2.60 -
    2.61   done:
    2.62 -    spin_unlock_irqrestore(&desc->lock, flags);
    2.63      return ret;
    2.64  }
    2.65  
    2.66 @@ -913,6 +922,7 @@ int unmap_domain_pirq(struct domain *d, 
    2.67      irq_desc_t *desc;
    2.68      int vector, ret = 0;
    2.69      bool_t forced_unbind;
    2.70 +    struct msi_desc *msi_desc = NULL;
    2.71  
    2.72      if ( (pirq < 0) || (pirq >= NR_IRQS) )
    2.73          return -EINVAL;
    2.74 @@ -920,6 +930,7 @@ int unmap_domain_pirq(struct domain *d, 
    2.75      if ( !IS_PRIV(current->domain) )
    2.76          return -EINVAL;
    2.77  
    2.78 +    ASSERT(spin_is_locked(&pcidevs_lock));
    2.79      ASSERT(spin_is_locked(&d->event_lock));
    2.80  
    2.81      vector = d->arch.pirq_vector[pirq];
    2.82 @@ -937,18 +948,19 @@ int unmap_domain_pirq(struct domain *d, 
    2.83                  d->domain_id, pirq);
    2.84  
    2.85      desc = &irq_desc[vector];
    2.86 +
    2.87 +    if ( (msi_desc = desc->msi_desc) != NULL )
    2.88 +        pci_disable_msi(msi_desc);
    2.89 +
    2.90      spin_lock_irqsave(&desc->lock, flags);
    2.91  
    2.92      BUG_ON(vector != d->arch.pirq_vector[pirq]);
    2.93  
    2.94 -    if ( desc->msi_desc )
    2.95 -        pci_disable_msi(vector);
    2.96 +    if ( msi_desc )
    2.97 +        teardown_msi_vector(vector);
    2.98  
    2.99      if ( desc->handler == &pci_msi_type )
   2.100 -    {
   2.101          desc->handler = &no_irq_type;
   2.102 -        free_irq_vector(vector);
   2.103 -    }
   2.104  
   2.105      if ( !forced_unbind )
   2.106      {
   2.107 @@ -962,6 +974,11 @@ int unmap_domain_pirq(struct domain *d, 
   2.108      }
   2.109  
   2.110      spin_unlock_irqrestore(&desc->lock, flags);
   2.111 +    if (msi_desc)
   2.112 +    {
   2.113 +        msi_free_vector(msi_desc);
   2.114 +        free_irq_vector(vector);
   2.115 +    }
   2.116  
   2.117      ret = irq_deny_access(d, pirq);
   2.118      if ( ret )
   2.119 @@ -976,6 +993,7 @@ void free_domain_pirqs(struct domain *d)
   2.120  {
   2.121      int i;
   2.122  
   2.123 +    read_lock(&pcidevs_lock);
   2.124      spin_lock(&d->event_lock);
   2.125  
   2.126      for ( i = 0; i < NR_IRQS; i++ )
   2.127 @@ -983,6 +1001,7 @@ void free_domain_pirqs(struct domain *d)
   2.128              unmap_domain_pirq(d, i);
   2.129  
   2.130      spin_unlock(&d->event_lock);
   2.131 +    read_unlock(&pcidevs_lock);
   2.132  }
   2.133  
   2.134  extern void dump_ioapic_irq_info(void);
     3.1 --- a/xen/arch/x86/msi.c	Thu Dec 11 11:40:10 2008 +0000
     3.2 +++ b/xen/arch/x86/msi.c	Thu Dec 11 11:48:19 2008 +0000
     3.3 @@ -153,6 +153,8 @@ static int set_vector_msi(struct msi_des
     3.4  
     3.5  static int unset_vector_msi(int vector)
     3.6  {
     3.7 +    ASSERT(spin_is_locked(&irq_desc[vector].lock));
     3.8 +
     3.9      if ( vector >= NR_VECTORS )
    3.10      {
    3.11          dprintk(XENLOG_ERR, "Trying to uninstall msi data for Vector %d\n",
    3.12 @@ -161,6 +163,7 @@ static int unset_vector_msi(int vector)
    3.13      }
    3.14  
    3.15      irq_desc[vector].msi_desc = NULL;
    3.16 +
    3.17      return 0;
    3.18  }
    3.19  
    3.20 @@ -228,14 +231,12 @@ void set_msi_affinity(unsigned int vecto
    3.21          return;
    3.22  
    3.23      ASSERT(spin_is_locked(&irq_desc[vector].lock));
    3.24 -    spin_lock(&desc->dev->lock);
    3.25      read_msi_msg(desc, &msg);
    3.26  
    3.27      msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
    3.28      msg.address_lo |= MSI_ADDR_DEST_ID(dest);
    3.29  
    3.30      write_msi_msg(desc, &msg);
    3.31 -    spin_unlock(&desc->dev->lock);
    3.32  }
    3.33  
    3.34  static void msi_set_enable(struct pci_dev *dev, int enable)
    3.35 @@ -369,7 +370,7 @@ static struct msi_desc* alloc_msi_entry(
    3.36      return entry;
    3.37  }
    3.38  
    3.39 -static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
    3.40 +int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
    3.41  {
    3.42      struct msi_msg msg;
    3.43  
    3.44 @@ -380,19 +381,13 @@ static int setup_msi_irq(struct pci_dev 
    3.45      return 0;
    3.46  }
    3.47  
    3.48 -static void teardown_msi_vector(int vector)
    3.49 +void teardown_msi_vector(int vector)
    3.50  {
    3.51      unset_vector_msi(vector);
    3.52  }
    3.53  
    3.54 -static void msi_free_vector(int vector)
    3.55 +int msi_free_vector(struct msi_desc *entry)
    3.56  {
    3.57 -    struct msi_desc *entry;
    3.58 -
    3.59 -    ASSERT(spin_is_locked(&irq_desc[vector].lock));
    3.60 -    entry = irq_desc[vector].msi_desc;
    3.61 -    teardown_msi_vector(vector);
    3.62 -
    3.63      if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
    3.64      {
    3.65          unsigned long start;
    3.66 @@ -407,6 +402,7 @@ static void msi_free_vector(int vector)
    3.67      }
    3.68      list_del(&entry->list);
    3.69      xfree(entry);
    3.70 +    return 0;
    3.71  }
    3.72  
    3.73  static struct msi_desc *find_msi_entry(struct pci_dev *dev,
    3.74 @@ -433,15 +429,18 @@ static struct msi_desc *find_msi_entry(s
    3.75   * multiple messages. A return of zero indicates the successful setup
    3.76   * of an entry zero with the new MSI irq or non-zero for otherwise.
    3.77   **/
    3.78 -static int msi_capability_init(struct pci_dev *dev, int vector)
    3.79 +static int msi_capability_init(struct pci_dev *dev,
    3.80 +                               int vector,
    3.81 +                               struct msi_desc **desc)
    3.82  {
    3.83      struct msi_desc *entry;
    3.84 -    int pos, ret;
    3.85 +    int pos;
    3.86      u16 control;
    3.87      u8 bus = dev->bus;
    3.88      u8 slot = PCI_SLOT(dev->devfn);
    3.89      u8 func = PCI_FUNC(dev->devfn);
    3.90  
    3.91 +    ASSERT(spin_is_locked(&pcidevs_lock));
    3.92      pos = pci_find_cap_offset(bus, slot, func, PCI_CAP_ID_MSI);
    3.93      control = pci_conf_read16(bus, slot, func, msi_control_reg(pos));
    3.94      /* MSI Entry Initialization */
    3.95 @@ -477,14 +476,7 @@ static int msi_capability_init(struct pc
    3.96      }
    3.97      list_add_tail(&entry->list, &dev->msi_list);
    3.98  
    3.99 -    /* Configure MSI capability structure */
   3.100 -    ret = setup_msi_irq(dev, entry);
   3.101 -    if ( ret )
   3.102 -    {
   3.103 -        msi_free_vector(vector);
   3.104 -        return ret;
   3.105 -    }
   3.106 -
   3.107 +    *desc = entry;
   3.108      /* Restore the original MSI enabled bits  */
   3.109      pci_conf_write16(bus, slot, func, msi_control_reg(pos), control);
   3.110  
   3.111 @@ -501,7 +493,9 @@ static int msi_capability_init(struct pc
   3.112   * single MSI-X irq. A return of zero indicates the successful setup of
   3.113   * requested MSI-X entries with allocated irqs or non-zero for otherwise.
   3.114   **/
   3.115 -static int msix_capability_init(struct pci_dev *dev, struct msi_info *msi)
   3.116 +static int msix_capability_init(struct pci_dev *dev,
   3.117 +                                struct msi_info *msi,
   3.118 +                                struct msi_desc **desc)
   3.119  {
   3.120      struct msi_desc *entry;
   3.121      int pos;
   3.122 @@ -515,6 +509,9 @@ static int msix_capability_init(struct p
   3.123      u8 slot = PCI_SLOT(dev->devfn);
   3.124      u8 func = PCI_FUNC(dev->devfn);
   3.125  
   3.126 +    ASSERT(spin_is_locked(&pcidevs_lock));
   3.127 +    ASSERT(desc);
   3.128 +
   3.129      pos = pci_find_cap_offset(bus, slot, func, PCI_CAP_ID_MSIX);
   3.130      control = pci_conf_read16(bus, slot, func, msix_control_reg(pos));
   3.131      msix_set_enable(dev, 0);/* Ensure msix is disabled as I set it up */
   3.132 @@ -550,9 +547,13 @@ static int msix_capability_init(struct p
   3.133  
   3.134      list_add_tail(&entry->list, &dev->msi_list);
   3.135  
   3.136 -    setup_msi_irq(dev, entry);
   3.137 +    /* Mask interrupt here */
   3.138 +    writel(1, entry->mask_base + entry->msi_attrib.entry_nr
   3.139 +                * PCI_MSIX_ENTRY_SIZE
   3.140 +                + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
   3.141  
   3.142 -    /* Set MSI-X enabled bits */
   3.143 +    *desc = entry;
   3.144 +    /* Restore MSI-X enabled bits */
   3.145      pci_conf_write16(bus, slot, func, msix_control_reg(pos), control);
   3.146  
   3.147      return 0;
   3.148 @@ -568,45 +569,35 @@ static int msix_capability_init(struct p
   3.149   * indicates the successful setup of an entry zero with the new MSI
   3.150   * irq or non-zero for otherwise.
   3.151   **/
   3.152 -static int __pci_enable_msi(struct msi_info *msi)
   3.153 +static int __pci_enable_msi(struct msi_info *msi, struct msi_desc **desc)
   3.154  {
   3.155      int status;
   3.156      struct pci_dev *pdev;
   3.157  
   3.158 -    pdev = pci_lock_pdev(msi->bus, msi->devfn);
   3.159 +    ASSERT(spin_is_locked(&pcidevs_lock));
   3.160 +    pdev = pci_get_pdev(msi->bus, msi->devfn);
   3.161      if ( !pdev )
   3.162          return -ENODEV;
   3.163  
   3.164      if ( find_msi_entry(pdev, msi->vector, PCI_CAP_ID_MSI) )
   3.165      {
   3.166 -        spin_unlock(&pdev->lock);
   3.167          dprintk(XENLOG_WARNING, "vector %d has already mapped to MSI on "
   3.168                  "device %02x:%02x.%01x.\n", msi->vector, msi->bus,
   3.169                  PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn));
   3.170          return 0;
   3.171      }
   3.172  
   3.173 -    status = msi_capability_init(pdev, msi->vector);
   3.174 -    spin_unlock(&pdev->lock);
   3.175 +    status = msi_capability_init(pdev, msi->vector, desc);
   3.176      return status;
   3.177  }
   3.178  
   3.179 -static void __pci_disable_msi(int vector)
   3.180 +static void __pci_disable_msi(struct msi_desc *entry)
   3.181  {
   3.182 -    struct msi_desc *entry;
   3.183      struct pci_dev *dev;
   3.184      int pos;
   3.185      u16 control;
   3.186      u8 bus, slot, func;
   3.187  
   3.188 -    entry = irq_desc[vector].msi_desc;
   3.189 -    if ( !entry )
   3.190 -        return;
   3.191 -    /*
   3.192 -     * Lock here is safe.  msi_desc can not be removed without holding
   3.193 -     * both irq_desc[].lock (which we do) and pdev->lock.
   3.194 -     */
   3.195 -    spin_lock(&entry->dev->lock);
   3.196      dev = entry->dev;
   3.197      bus = dev->bus;
   3.198      slot = PCI_SLOT(dev->devfn);
   3.199 @@ -618,10 +609,6 @@ static void __pci_disable_msi(int vector
   3.200  
   3.201      BUG_ON(list_empty(&dev->msi_list));
   3.202  
   3.203 -    msi_free_vector(vector);
   3.204 -
   3.205 -    pci_conf_write16(bus, slot, func, msi_control_reg(pos), control);
   3.206 -    spin_unlock(&dev->lock);
   3.207  }
   3.208  
   3.209  /**
   3.210 @@ -639,7 +626,7 @@ static void __pci_disable_msi(int vector
   3.211   * of irqs available. Driver should use the returned value to re-send
   3.212   * its request.
   3.213   **/
   3.214 -static int __pci_enable_msix(struct msi_info *msi)
   3.215 +static int __pci_enable_msix(struct msi_info *msi, struct msi_desc **desc)
   3.216  {
   3.217      int status, pos, nr_entries;
   3.218      struct pci_dev *pdev;
   3.219 @@ -647,7 +634,8 @@ static int __pci_enable_msix(struct msi_
   3.220      u8 slot = PCI_SLOT(msi->devfn);
   3.221      u8 func = PCI_FUNC(msi->devfn);
   3.222  
   3.223 -    pdev = pci_lock_pdev(msi->bus, msi->devfn);
   3.224 +    ASSERT(spin_is_locked(&pcidevs_lock));
   3.225 +    pdev = pci_get_pdev(msi->bus, msi->devfn);
   3.226      if ( !pdev )
   3.227          return -ENODEV;
   3.228  
   3.229 @@ -655,41 +643,27 @@ static int __pci_enable_msix(struct msi_
   3.230      control = pci_conf_read16(msi->bus, slot, func, msi_control_reg(pos));
   3.231      nr_entries = multi_msix_capable(control);
   3.232      if (msi->entry_nr >= nr_entries)
   3.233 -    {
   3.234 -        spin_unlock(&pdev->lock);
   3.235          return -EINVAL;
   3.236 -    }
   3.237  
   3.238      if ( find_msi_entry(pdev, msi->vector, PCI_CAP_ID_MSIX) )
   3.239      {
   3.240 -        spin_unlock(&pdev->lock);
   3.241          dprintk(XENLOG_WARNING, "vector %d has already mapped to MSIX on "
   3.242                  "device %02x:%02x.%01x.\n", msi->vector, msi->bus,
   3.243                  PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn));
   3.244          return 0;
   3.245      }
   3.246  
   3.247 -    status = msix_capability_init(pdev, msi);
   3.248 -    spin_unlock(&pdev->lock);
   3.249 +    status = msix_capability_init(pdev, msi, desc);
   3.250      return status;
   3.251  }
   3.252  
   3.253 -static void __pci_disable_msix(int vector)
   3.254 +static void __pci_disable_msix(struct msi_desc *entry)
   3.255  {
   3.256 -    struct msi_desc *entry;
   3.257      struct pci_dev *dev;
   3.258      int pos;
   3.259      u16 control;
   3.260      u8 bus, slot, func;
   3.261  
   3.262 -    entry = irq_desc[vector].msi_desc;
   3.263 -    if ( !entry )
   3.264 -        return;
   3.265 -    /*
   3.266 -     * Lock here is safe.  msi_desc can not be removed without holding
   3.267 -     * both irq_desc[].lock (which we do) and pdev->lock.
   3.268 -     */
   3.269 -    spin_lock(&entry->dev->lock);
   3.270      dev = entry->dev;
   3.271      bus = dev->bus;
   3.272      slot = PCI_SLOT(dev->devfn);
   3.273 @@ -701,50 +675,51 @@ static void __pci_disable_msix(int vecto
   3.274  
   3.275      BUG_ON(list_empty(&dev->msi_list));
   3.276  
   3.277 -    msi_free_vector(vector);
   3.278 +    writel(1, entry->mask_base + entry->msi_attrib.entry_nr
   3.279 +      * PCI_MSIX_ENTRY_SIZE
   3.280 +      + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
   3.281  
   3.282      pci_conf_write16(bus, slot, func, msix_control_reg(pos), control);
   3.283 -    spin_unlock(&dev->lock);
   3.284  }
   3.285  
   3.286 -int pci_enable_msi(struct msi_info *msi)
   3.287 +/*
   3.288 + * Notice: only construct the msi_desc
   3.289 + * no change to irq_desc here, and the interrupt is masked
   3.290 + */
   3.291 +int pci_enable_msi(struct msi_info *msi, struct msi_desc **desc)
   3.292  {
   3.293 -    ASSERT(spin_is_locked(&irq_desc[msi->vector].lock));
   3.294 +    ASSERT(spin_is_locked(&pcidevs_lock));
   3.295  
   3.296 -    return  msi->table_base ? __pci_enable_msix(msi) :
   3.297 -        __pci_enable_msi(msi);
   3.298 +    return  msi->table_base ? __pci_enable_msix(msi, desc) :
   3.299 +        __pci_enable_msi(msi, desc);
   3.300  }
   3.301  
   3.302 -void pci_disable_msi(int vector)
   3.303 +/*
   3.304 + * Device only, no irq_desc
   3.305 + */
   3.306 +void pci_disable_msi(struct msi_desc *msi_desc)
   3.307  {
   3.308 -    irq_desc_t *desc = &irq_desc[vector];
   3.309 -    ASSERT(spin_is_locked(&desc->lock));
   3.310 -    if ( !desc->msi_desc )
   3.311 -        return;
   3.312 -
   3.313 -    if ( desc->msi_desc->msi_attrib.type == PCI_CAP_ID_MSI )
   3.314 -        __pci_disable_msi(vector);
   3.315 -    else if ( desc->msi_desc->msi_attrib.type == PCI_CAP_ID_MSIX )
   3.316 -        __pci_disable_msix(vector);
   3.317 +    if ( msi_desc->msi_attrib.type == PCI_CAP_ID_MSI )
   3.318 +        __pci_disable_msi(msi_desc);
   3.319 +    else if ( msi_desc->msi_attrib.type == PCI_CAP_ID_MSIX )
   3.320 +        __pci_disable_msix(msi_desc);
   3.321  }
   3.322  
   3.323  static void msi_free_vectors(struct pci_dev* dev)
   3.324  {
   3.325      struct msi_desc *entry, *tmp;
   3.326      irq_desc_t *desc;
   3.327 -    unsigned long flags;
   3.328 +    unsigned long flags, vector;
   3.329  
   3.330 - retry:
   3.331      list_for_each_entry_safe( entry, tmp, &dev->msi_list, list )
   3.332      {
   3.333 -        desc = &irq_desc[entry->vector];
   3.334 +        vector = entry->vector;
   3.335 +        desc = &irq_desc[vector];
   3.336 +        pci_disable_msi(entry);
   3.337  
   3.338 -        local_irq_save(flags);
   3.339 -        if ( !spin_trylock(&desc->lock) )
   3.340 -        {
   3.341 -            local_irq_restore(flags);
   3.342 -            goto retry;
   3.343 -        }
   3.344 +        spin_lock_irqsave(&desc->lock, flags);
   3.345 +
   3.346 +        teardown_msi_vector(vector);
   3.347  
   3.348          if ( desc->handler == &pci_msi_type )
   3.349          {
   3.350 @@ -753,8 +728,8 @@ static void msi_free_vectors(struct pci_
   3.351              desc->handler = &no_irq_type;
   3.352          }
   3.353  
   3.354 -        msi_free_vector(entry->vector);
   3.355          spin_unlock_irqrestore(&desc->lock, flags);
   3.356 +        msi_free_vector(entry);
   3.357      }
   3.358  }
   3.359  
     4.1 --- a/xen/arch/x86/physdev.c	Thu Dec 11 11:40:10 2008 +0000
     4.2 +++ b/xen/arch/x86/physdev.c	Thu Dec 11 11:48:19 2008 +0000
     4.3 @@ -100,6 +100,7 @@ static int physdev_map_pirq(struct physd
     4.4              goto free_domain;
     4.5      }
     4.6  
     4.7 +    read_lock(&pcidevs_lock);
     4.8      /* Verify or get pirq. */
     4.9      spin_lock(&d->event_lock);
    4.10      if ( map->pirq < 0 )
    4.11 @@ -147,6 +148,7 @@ static int physdev_map_pirq(struct physd
    4.12  
    4.13  done:
    4.14      spin_unlock(&d->event_lock);
    4.15 +    read_unlock(&pcidevs_lock);
    4.16      if ( (ret != 0) && (map->type == MAP_PIRQ_TYPE_MSI) && (map->index == -1) )
    4.17          free_irq_vector(vector);
    4.18  free_domain:
    4.19 @@ -170,9 +172,11 @@ static int physdev_unmap_pirq(struct phy
    4.20      if ( d == NULL )
    4.21          return -ESRCH;
    4.22  
    4.23 +    read_lock(&pcidevs_lock);
    4.24      spin_lock(&d->event_lock);
    4.25      ret = unmap_domain_pirq(d, unmap->pirq);
    4.26      spin_unlock(&d->event_lock);
    4.27 +    read_unlock(&pcidevs_lock);
    4.28  
    4.29      rcu_unlock_domain(d);
    4.30  
    4.31 @@ -341,10 +345,12 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
    4.32  
    4.33          irq_op.vector = assign_irq_vector(irq);
    4.34  
    4.35 +        read_lock(&pcidevs_lock);
    4.36          spin_lock(&dom0->event_lock);
    4.37          ret = map_domain_pirq(dom0, irq_op.irq, irq_op.vector,
    4.38                                MAP_PIRQ_TYPE_GSI, NULL);
    4.39          spin_unlock(&dom0->event_lock);
    4.40 +        read_unlock(&pcidevs_lock);
    4.41  
    4.42          if ( copy_to_guest(arg, &irq_op, 1) != 0 )
    4.43              ret = -EFAULT;
     5.1 --- a/xen/arch/x86/x86_64/asm-offsets.c	Thu Dec 11 11:40:10 2008 +0000
     5.2 +++ b/xen/arch/x86/x86_64/asm-offsets.c	Thu Dec 11 11:48:19 2008 +0000
     5.3 @@ -60,6 +60,8 @@ void __dummy__(void)
     5.4      DEFINE(UREGS_user_sizeof, sizeof(struct cpu_user_regs));
     5.5      BLANK();
     5.6  
     5.7 +    OFFSET(irq_caps_offset, struct domain, irq_caps);
     5.8 +    OFFSET(next_in_list_offset, struct domain, next_in_list);
     5.9      OFFSET(VCPU_processor, struct vcpu, processor);
    5.10      OFFSET(VCPU_domain, struct vcpu, domain);
    5.11      OFFSET(VCPU_vcpu_info, struct vcpu, vcpu_info);
     6.1 --- a/xen/common/domain.c	Thu Dec 11 11:40:10 2008 +0000
     6.2 +++ b/xen/common/domain.c	Thu Dec 11 11:48:19 2008 +0000
     6.3 @@ -558,12 +558,12 @@ static void complete_domain_destroy(stru
     6.4          sched_destroy_vcpu(v);
     6.5      }
     6.6  
     6.7 -    rangeset_domain_destroy(d);
     6.8 -
     6.9      grant_table_destroy(d);
    6.10  
    6.11      arch_domain_destroy(d);
    6.12  
    6.13 +    rangeset_domain_destroy(d);
    6.14 +
    6.15      sched_destroy_domain(d);
    6.16  
    6.17      /* Free page used by xen oprofile buffer. */
     7.1 --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c	Thu Dec 11 11:40:10 2008 +0000
     7.2 +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c	Thu Dec 11 11:48:19 2008 +0000
     7.3 @@ -282,9 +282,13 @@ static int reassign_device( struct domai
     7.4      struct amd_iommu *iommu;
     7.5      int bdf;
     7.6  
     7.7 -    pdev = pci_lock_domain_pdev(source, bus, devfn);
     7.8 +    read_lock(&pcidevs_lock);
     7.9 +    pdev = pci_get_pdev_by_domain(source, bus, devfn);
    7.10      if ( !pdev )
    7.11 -	return -ENODEV;
    7.12 +    {
    7.13 +        read_unlock(&pcidevs_lock);
    7.14 +        return -ENODEV;
    7.15 +    }
    7.16  
    7.17      bdf = (bus << 8) | devfn;
    7.18      /* supported device? */
    7.19 @@ -293,8 +297,8 @@ static int reassign_device( struct domai
    7.20  
    7.21      if ( !iommu )
    7.22      {
    7.23 -	spin_unlock(&pdev->lock);
    7.24 -	amd_iov_error("Fail to find iommu."
    7.25 +        read_unlock(&pcidevs_lock);
    7.26 +        amd_iov_error("Fail to find iommu."
    7.27  		      " %x:%x.%x cannot be assigned to domain %d\n", 
    7.28  		      bus, PCI_SLOT(devfn), PCI_FUNC(devfn), target->domain_id);
    7.29  	return -ENODEV;
    7.30 @@ -302,9 +306,7 @@ static int reassign_device( struct domai
    7.31  
    7.32      amd_iommu_disable_domain_device(source, iommu, bdf);
    7.33  
    7.34 -    write_lock(&pcidevs_lock);
    7.35      list_move(&pdev->domain_list, &target->arch.pdev_list);
    7.36 -    write_unlock(&pcidevs_lock);
    7.37      pdev->domain = target;
    7.38  
    7.39      amd_iommu_setup_domain_device(target, iommu, bdf);
    7.40 @@ -312,7 +314,7 @@ static int reassign_device( struct domai
    7.41                   bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
    7.42                   source->domain_id, target->domain_id);
    7.43  
    7.44 -    spin_unlock(&pdev->lock);
    7.45 +    read_unlock(&pcidevs_lock);
    7.46      return 0;
    7.47  }
    7.48  
     8.1 --- a/xen/drivers/passthrough/iommu.c	Thu Dec 11 11:40:10 2008 +0000
     8.2 +++ b/xen/drivers/passthrough/iommu.c	Thu Dec 11 11:48:19 2008 +0000
     8.3 @@ -83,9 +83,12 @@ int iommu_domain_init(struct domain *dom
     8.4  int iommu_add_device(struct pci_dev *pdev)
     8.5  {
     8.6      struct hvm_iommu *hd;
     8.7 +
     8.8      if ( !pdev->domain )
     8.9          return -EINVAL;
    8.10  
    8.11 +    ASSERT(spin_is_locked(&pcidevs_lock));
    8.12 +
    8.13      hd = domain_hvm_iommu(pdev->domain);
    8.14      if ( !iommu_enabled || !hd->platform_ops )
    8.15          return 0;
    8.16 @@ -109,20 +112,24 @@ int iommu_remove_device(struct pci_dev *
    8.17  int assign_device(struct domain *d, u8 bus, u8 devfn)
    8.18  {
    8.19      struct hvm_iommu *hd = domain_hvm_iommu(d);
    8.20 -    int rc;
    8.21 +    int rc = 0;
    8.22  
    8.23      if ( !iommu_enabled || !hd->platform_ops )
    8.24          return 0;
    8.25  
    8.26 +    read_lock(&pcidevs_lock);
    8.27      if ( (rc = hd->platform_ops->assign_device(d, bus, devfn)) )
    8.28 -        return rc;
    8.29 +        goto done;
    8.30  
    8.31      if ( has_arch_pdevs(d) && !is_hvm_domain(d) && !need_iommu(d) )
    8.32      {
    8.33          d->need_iommu = 1;
    8.34 -        return iommu_populate_page_table(d);
    8.35 +        rc = iommu_populate_page_table(d);
    8.36 +        goto done;
    8.37      }
    8.38 -    return 0;
    8.39 +done:    
    8.40 +    read_unlock(&pcidevs_lock);
    8.41 +    return rc;
    8.42  }
    8.43  
    8.44  static int iommu_populate_page_table(struct domain *d)
    8.45 @@ -204,12 +211,29 @@ int iommu_unmap_page(struct domain *d, u
    8.46      return hd->platform_ops->unmap_page(d, gfn);
    8.47  }
    8.48  
    8.49 -void deassign_device(struct domain *d, u8 bus, u8 devfn)
    8.50 +int  deassign_device(struct domain *d, u8 bus, u8 devfn)
    8.51  {
    8.52      struct hvm_iommu *hd = domain_hvm_iommu(d);
    8.53 +    struct pci_dev *pdev = NULL;
    8.54  
    8.55      if ( !iommu_enabled || !hd->platform_ops )
    8.56 -        return;
    8.57 +        return -EINVAL;
    8.58 +
    8.59 +    read_lock(&pcidevs_lock);
    8.60 +    pdev = pci_get_pdev(bus, devfn);
    8.61 +    if (!pdev)
    8.62 +    {
    8.63 +        read_unlock(&pcidevs_lock);
    8.64 +        return -ENODEV;
    8.65 +    }
    8.66 +
    8.67 +    if (pdev->domain != d)
    8.68 +    {
    8.69 +        read_unlock(&pcidevs_lock);
    8.70 +        gdprintk(XENLOG_ERR VTDPREFIX,
    8.71 +                "IOMMU: deassign a device not owned\n");
    8.72 +       return -EINVAL;
    8.73 +    }
    8.74  
    8.75      hd->platform_ops->reassign_device(d, dom0, bus, devfn);
    8.76  
    8.77 @@ -218,6 +242,10 @@ void deassign_device(struct domain *d, u
    8.78          d->need_iommu = 0;
    8.79          hd->platform_ops->teardown(d);
    8.80      }
    8.81 +
    8.82 +    read_unlock(&pcidevs_lock);
    8.83 +
    8.84 +    return 0;
    8.85  }
    8.86  
    8.87  static int iommu_setup(void)
     9.1 --- a/xen/drivers/passthrough/pci.c	Thu Dec 11 11:40:10 2008 +0000
     9.2 +++ b/xen/drivers/passthrough/pci.c	Thu Dec 11 11:48:19 2008 +0000
     9.3 @@ -41,11 +41,11 @@ struct pci_dev *alloc_pdev(u8 bus, u8 de
     9.4      pdev = xmalloc(struct pci_dev);
     9.5      if ( !pdev )
     9.6          return NULL;
     9.7 +    memset(pdev, 0, sizeof(struct pci_dev));
     9.8  
     9.9      *((u8*) &pdev->bus) = bus;
    9.10      *((u8*) &pdev->devfn) = devfn;
    9.11      pdev->domain = NULL;
    9.12 -    spin_lock_init(&pdev->lock);
    9.13      INIT_LIST_HEAD(&pdev->msi_list);
    9.14      list_add(&pdev->alldevs_list, &alldevs_list);
    9.15  
    9.16 @@ -58,42 +58,35 @@ void free_pdev(struct pci_dev *pdev)
    9.17      xfree(pdev);
    9.18  }
    9.19  
    9.20 -struct pci_dev *pci_lock_pdev(int bus, int devfn)
    9.21 +struct pci_dev *pci_get_pdev(int bus, int devfn)
    9.22  {
    9.23 -    struct pci_dev *pdev;
    9.24 +    struct pci_dev *pdev = NULL;
    9.25  
    9.26 -    read_lock(&pcidevs_lock);
    9.27 +    ASSERT(spin_is_locked(&pcidevs_lock));
    9.28 +
    9.29      list_for_each_entry ( pdev, &alldevs_list, alldevs_list )
    9.30          if ( (pdev->bus == bus || bus == -1) &&
    9.31               (pdev->devfn == devfn || devfn == -1) )
    9.32 -    {
    9.33 -        spin_lock(&pdev->lock);
    9.34 -        read_unlock(&pcidevs_lock);
    9.35 -        return pdev;
    9.36 -    }
    9.37 -    read_unlock(&pcidevs_lock);
    9.38 +        {
    9.39 +            return pdev;
    9.40 +        }
    9.41  
    9.42      return NULL;
    9.43  }
    9.44  
    9.45 -struct pci_dev *pci_lock_domain_pdev(struct domain *d, int bus, int devfn)
    9.46 +struct pci_dev *pci_get_pdev_by_domain(struct domain *d, int bus, int devfn)
    9.47  {
    9.48 -    struct pci_dev *pdev;
    9.49 +    struct pci_dev *pdev = NULL;
    9.50 +
    9.51 +    ASSERT(spin_is_locked(&pcidevs_lock));
    9.52  
    9.53 -    read_lock(&pcidevs_lock);
    9.54 -    list_for_each_entry ( pdev, &d->arch.pdev_list, domain_list )
    9.55 -    {
    9.56 -        spin_lock(&pdev->lock);
    9.57 -        if ( (pdev->bus == bus || bus == -1) &&
    9.58 -             (pdev->devfn == devfn || devfn == -1) &&
    9.59 -             (pdev->domain == d) )
    9.60 -        {
    9.61 -            read_unlock(&pcidevs_lock);
    9.62 -            return pdev;
    9.63 -        }
    9.64 -        spin_unlock(&pdev->lock);
    9.65 -    }
    9.66 -    read_unlock(&pcidevs_lock);
    9.67 +    list_for_each_entry ( pdev, &alldevs_list, alldevs_list )
    9.68 +         if ( (pdev->bus == bus || bus == -1) &&
    9.69 +              (pdev->devfn == devfn || devfn == -1) &&
    9.70 +              (pdev->domain == d) )
    9.71 +         {
    9.72 +             return pdev;
    9.73 +         }
    9.74  
    9.75      return NULL;
    9.76  }
    9.77 @@ -109,24 +102,20 @@ int pci_add_device(u8 bus, u8 devfn)
    9.78          goto out;
    9.79  
    9.80      ret = 0;
    9.81 -    spin_lock(&pdev->lock);
    9.82      if ( !pdev->domain )
    9.83      {
    9.84          pdev->domain = dom0;
    9.85          ret = iommu_add_device(pdev);
    9.86          if ( ret )
    9.87 -        {
    9.88 -            spin_unlock(&pdev->lock);
    9.89              goto out;
    9.90 -        }
    9.91 +
    9.92          list_add(&pdev->domain_list, &dom0->arch.pdev_list);
    9.93      }
    9.94 -    spin_unlock(&pdev->lock);
    9.95 -    printk(XENLOG_DEBUG "PCI add device %02x:%02x.%x\n", bus,
    9.96 -           PCI_SLOT(devfn), PCI_FUNC(devfn));
    9.97  
    9.98  out:
    9.99      write_unlock(&pcidevs_lock);
   9.100 +    printk(XENLOG_DEBUG "PCI add device %02x:%02x.%x\n", bus,
   9.101 +           PCI_SLOT(devfn), PCI_FUNC(devfn));
   9.102      return ret;
   9.103  }
   9.104  
   9.105 @@ -139,7 +128,6 @@ int pci_remove_device(u8 bus, u8 devfn)
   9.106      list_for_each_entry ( pdev, &alldevs_list, alldevs_list )
   9.107          if ( pdev->bus == bus && pdev->devfn == devfn )
   9.108          {
   9.109 -            spin_lock(&pdev->lock);
   9.110              ret = iommu_remove_device(pdev);
   9.111              if ( pdev->domain )
   9.112                  list_del(&pdev->domain_list);
   9.113 @@ -199,14 +187,15 @@ void pci_release_devices(struct domain *
   9.114      struct pci_dev *pdev;
   9.115      u8 bus, devfn;
   9.116  
   9.117 +    read_lock(&pcidevs_lock);
   9.118      pci_clean_dpci_irqs(d);
   9.119 -    while ( (pdev = pci_lock_domain_pdev(d, -1, -1)) )
   9.120 +    while ( (pdev = pci_get_pdev_by_domain(d, -1, -1)) )
   9.121      {
   9.122          pci_cleanup_msi(pdev);
   9.123          bus = pdev->bus; devfn = pdev->devfn;
   9.124 -        spin_unlock(&pdev->lock);
   9.125          deassign_device(d, bus, devfn);
   9.126      }
   9.127 +    read_unlock(&pcidevs_lock);
   9.128  }
   9.129  
   9.130  #ifdef SUPPORT_MSI_REMAPPING
   9.131 @@ -220,14 +209,12 @@ static void dump_pci_devices(unsigned ch
   9.132  
   9.133      list_for_each_entry ( pdev, &alldevs_list, alldevs_list )
   9.134      {
   9.135 -        spin_lock(&pdev->lock);
   9.136          printk("%02x:%02x.%x - dom %-3d - MSIs < ",
   9.137                 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
   9.138                 pdev->domain ? pdev->domain->domain_id : -1);
   9.139          list_for_each_entry ( msi, &pdev->msi_list, list )
   9.140                 printk("%d ", msi->vector);
   9.141          printk(">\n");
   9.142 -        spin_unlock(&pdev->lock);
   9.143      }
   9.144  
   9.145      read_unlock(&pcidevs_lock);
    10.1 --- a/xen/drivers/passthrough/vtd/iommu.c	Thu Dec 11 11:40:10 2008 +0000
    10.2 +++ b/xen/drivers/passthrough/vtd/iommu.c	Thu Dec 11 11:48:19 2008 +0000
    10.3 @@ -49,15 +49,14 @@ static void setup_dom0_rmrr(struct domai
    10.4  static void context_set_domain_id(struct context_entry *context,
    10.5                                    struct domain *d)
    10.6  {
    10.7 -    unsigned long flags;
    10.8      domid_t iommu_domid = domain_iommu_domid(d);
    10.9  
   10.10      if ( iommu_domid == 0 )
   10.11      {
   10.12 -        spin_lock_irqsave(&domid_bitmap_lock, flags);
   10.13 +        spin_lock(&domid_bitmap_lock);
   10.14          iommu_domid = find_first_zero_bit(domid_bitmap, domid_bitmap_size);
   10.15          set_bit(iommu_domid, domid_bitmap);
   10.16 -        spin_unlock_irqrestore(&domid_bitmap_lock, flags);
   10.17 +        spin_unlock(&domid_bitmap_lock);
   10.18          d->arch.hvm_domain.hvm_iommu.iommu_domid = iommu_domid;
   10.19      }
   10.20  
   10.21 @@ -140,10 +139,9 @@ int nr_iommus;
   10.22  static u64 bus_to_context_maddr(struct iommu *iommu, u8 bus)
   10.23  {
   10.24      struct root_entry *root, *root_entries;
   10.25 -    unsigned long flags;
   10.26      u64 maddr;
   10.27  
   10.28 -    spin_lock_irqsave(&iommu->lock, flags);
   10.29 +    ASSERT(spin_is_locked(&iommu->lock));
   10.30      root_entries = (struct root_entry *)map_vtd_domain_page(iommu->root_maddr);
   10.31      root = &root_entries[bus];
   10.32      if ( !root_present(*root) )
   10.33 @@ -152,7 +150,6 @@ static u64 bus_to_context_maddr(struct i
   10.34          if ( maddr == 0 )
   10.35          {
   10.36              unmap_vtd_domain_page(root_entries);
   10.37 -            spin_unlock_irqrestore(&iommu->lock, flags);
   10.38              return 0;
   10.39          }
   10.40          set_root_value(*root, maddr);
   10.41 @@ -161,36 +158,9 @@ static u64 bus_to_context_maddr(struct i
   10.42      }
   10.43      maddr = (u64) get_context_addr(*root);
   10.44      unmap_vtd_domain_page(root_entries);
   10.45 -    spin_unlock_irqrestore(&iommu->lock, flags);
   10.46      return maddr;
   10.47  }
   10.48  
   10.49 -static int device_context_mapped(struct iommu *iommu, u8 bus, u8 devfn)
   10.50 -{
   10.51 -    struct root_entry *root, *root_entries;
   10.52 -    struct context_entry *context;
   10.53 -    u64 context_maddr;
   10.54 -    int ret;
   10.55 -    unsigned long flags;
   10.56 -
   10.57 -    spin_lock_irqsave(&iommu->lock, flags);
   10.58 -    root_entries = (struct root_entry *)map_vtd_domain_page(iommu->root_maddr);
   10.59 -    root = &root_entries[bus];
   10.60 -    if ( !root_present(*root) )
   10.61 -    {
   10.62 -        ret = 0;
   10.63 -        goto out;
   10.64 -    }
   10.65 -    context_maddr = get_context_addr(*root);
   10.66 -    context = (struct context_entry *)map_vtd_domain_page(context_maddr);
   10.67 -    ret = context_present(context[devfn]);
   10.68 -    unmap_vtd_domain_page(context);
   10.69 - out:
   10.70 -    unmap_vtd_domain_page(root_entries);
   10.71 -    spin_unlock_irqrestore(&iommu->lock, flags);
   10.72 -    return ret;
   10.73 -}
   10.74 -
   10.75  static u64 addr_to_dma_page_maddr(struct domain *domain, u64 addr, int alloc)
   10.76  {
   10.77      struct hvm_iommu *hd = domain_hvm_iommu(domain);
   10.78 @@ -198,12 +168,11 @@ static u64 addr_to_dma_page_maddr(struct
   10.79      struct dma_pte *parent, *pte = NULL;
   10.80      int level = agaw_to_level(hd->agaw);
   10.81      int offset;
   10.82 -    unsigned long flags;
   10.83      u64 pte_maddr = 0, maddr;
   10.84      u64 *vaddr = NULL;
   10.85  
   10.86      addr &= (((u64)1) << addr_width) - 1;
   10.87 -    spin_lock_irqsave(&hd->mapping_lock, flags);
   10.88 +    ASSERT(spin_is_locked(&hd->mapping_lock));
   10.89      if ( hd->pgd_maddr == 0 )
   10.90          if ( !alloc || ((hd->pgd_maddr = alloc_pgtable_maddr(domain)) == 0) )
   10.91              goto out;
   10.92 @@ -252,7 +221,6 @@ static u64 addr_to_dma_page_maddr(struct
   10.93  
   10.94      unmap_vtd_domain_page(parent);
   10.95   out:
   10.96 -    spin_unlock_irqrestore(&hd->mapping_lock, flags);
   10.97      return pte_maddr;
   10.98  }
   10.99  
  10.100 @@ -536,22 +504,30 @@ static void dma_pte_clear_one(struct dom
  10.101      struct dma_pte *page = NULL, *pte = NULL;
  10.102      u64 pg_maddr;
  10.103  
  10.104 +    spin_lock(&hd->mapping_lock);
  10.105      /* get last level pte */
  10.106      pg_maddr = addr_to_dma_page_maddr(domain, addr, 0);
  10.107      if ( pg_maddr == 0 )
  10.108 +    {
  10.109 +        spin_unlock(&hd->mapping_lock);
  10.110          return;
  10.111 +    }
  10.112 +
  10.113      page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
  10.114      pte = page + address_level_offset(addr, 1);
  10.115  
  10.116      if ( !dma_pte_present(*pte) )
  10.117      {
  10.118 +        spin_unlock(&hd->mapping_lock);
  10.119          unmap_vtd_domain_page(page);
  10.120          return;
  10.121      }
  10.122  
  10.123      dma_clear_pte(*pte); 
  10.124 +    spin_unlock(&hd->mapping_lock);
  10.125      iommu_flush_cache_entry(pte);
  10.126  
  10.127 +    /* No need pcidevs_lock here since do that on assign/deassign device*/
  10.128      for_each_drhd_unit ( drhd )
  10.129      {
  10.130          iommu = drhd->iommu;
  10.131 @@ -598,16 +574,18 @@ static int iommu_set_root_entry(struct i
  10.132      unsigned long flags;
  10.133      s_time_t start_time;
  10.134  
  10.135 -    spin_lock_irqsave(&iommu->register_lock, flags);
  10.136 +    spin_lock(&iommu->lock);
  10.137  
  10.138      if ( iommu->root_maddr == 0 )
  10.139          iommu->root_maddr = alloc_pgtable_maddr(NULL);
  10.140      if ( iommu->root_maddr == 0 )
  10.141      {
  10.142 -        spin_unlock_irqrestore(&iommu->register_lock, flags);
  10.143 +        spin_unlock(&iommu->lock);
  10.144          return -ENOMEM;
  10.145      }
  10.146  
  10.147 +    spin_unlock(&iommu->lock);
  10.148 +    spin_lock_irqsave(&iommu->register_lock, flags);
  10.149      dmar_writeq(iommu->reg, DMAR_RTADDR_REG, iommu->root_maddr);
  10.150      cmd = iommu->gcmd | DMA_GCMD_SRTP;
  10.151      dmar_writel(iommu->reg, DMAR_GCMD_REG, cmd);
  10.152 @@ -742,9 +720,7 @@ static void iommu_page_fault(int vector,
  10.153      dprintk(XENLOG_WARNING VTDPREFIX,
  10.154              "iommu_page_fault: iommu->reg = %p\n", iommu->reg);
  10.155  
  10.156 -    spin_lock_irqsave(&iommu->register_lock, flags);
  10.157      fault_status = dmar_readl(iommu->reg, DMAR_FSTS_REG);
  10.158 -    spin_unlock_irqrestore(&iommu->register_lock, flags);
  10.159  
  10.160      iommu_fault_status(fault_status);
  10.161  
  10.162 @@ -1057,21 +1033,30 @@ static int domain_context_mapping_one(
  10.163  {
  10.164      struct hvm_iommu *hd = domain_hvm_iommu(domain);
  10.165      struct context_entry *context, *context_entries;
  10.166 -    unsigned long flags;
  10.167      u64 maddr, pgd_maddr;
  10.168 +    struct pci_dev *pdev = NULL;
  10.169      int agaw;
  10.170  
  10.171 +    ASSERT(spin_is_locked(&pcidevs_lock));
  10.172 +    spin_lock(&iommu->lock);
  10.173      maddr = bus_to_context_maddr(iommu, bus);
  10.174      context_entries = (struct context_entry *)map_vtd_domain_page(maddr);
  10.175      context = &context_entries[devfn];
  10.176  
  10.177      if ( context_present(*context) )
  10.178      {
  10.179 +        int res = 0;
  10.180 +
  10.181 +        pdev = pci_get_pdev(bus, devfn);
  10.182 +        if (!pdev)
  10.183 +            res = -ENODEV;
  10.184 +        else if (pdev->domain != domain)
  10.185 +            res = -EINVAL;
  10.186          unmap_vtd_domain_page(context_entries);
  10.187 -        return 0;
  10.188 +        spin_unlock(&iommu->lock);
  10.189 +        return res;
  10.190      }
  10.191  
  10.192 -    spin_lock_irqsave(&iommu->lock, flags);
  10.193      if ( iommu_passthrough &&
  10.194           ecap_pass_thru(iommu->ecap) && (domain->domain_id == 0) )
  10.195      {
  10.196 @@ -1080,6 +1065,8 @@ static int domain_context_mapping_one(
  10.197      }
  10.198      else
  10.199      {
  10.200 +        spin_lock(&hd->mapping_lock);
  10.201 +
  10.202          /* Ensure we have pagetables allocated down to leaf PTE. */
  10.203          if ( hd->pgd_maddr == 0 )
  10.204          {
  10.205 @@ -1087,8 +1074,9 @@ static int domain_context_mapping_one(
  10.206              if ( hd->pgd_maddr == 0 )
  10.207              {
  10.208              nomem:
  10.209 +                spin_unlock(&hd->mapping_lock);
  10.210 +                spin_unlock(&iommu->lock);
  10.211                  unmap_vtd_domain_page(context_entries);
  10.212 -                spin_unlock_irqrestore(&iommu->lock, flags);
  10.213                  return -ENOMEM;
  10.214              }
  10.215          }
  10.216 @@ -1108,6 +1096,7 @@ static int domain_context_mapping_one(
  10.217  
  10.218          context_set_address_root(*context, pgd_maddr);
  10.219          context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
  10.220 +        spin_unlock(&hd->mapping_lock);
  10.221      }
  10.222  
  10.223      /*
  10.224 @@ -1119,8 +1108,7 @@ static int domain_context_mapping_one(
  10.225      context_set_fault_enable(*context);
  10.226      context_set_present(*context);
  10.227      iommu_flush_cache_entry(context);
  10.228 -
  10.229 -    unmap_vtd_domain_page(context_entries);
  10.230 +    spin_unlock(&iommu->lock);
  10.231  
  10.232      /* Context entry was previously non-present (with domid 0). */
  10.233      if ( iommu_flush_context_device(iommu, 0, (((u16)bus) << 8) | devfn,
  10.234 @@ -1130,7 +1118,8 @@ static int domain_context_mapping_one(
  10.235          iommu_flush_iotlb_dsi(iommu, 0, 1);
  10.236  
  10.237      set_bit(iommu->index, &hd->iommu_bitmap);
  10.238 -    spin_unlock_irqrestore(&iommu->lock, flags);
  10.239 +
  10.240 +    unmap_vtd_domain_page(context_entries);
  10.241  
  10.242      return 0;
  10.243  }
  10.244 @@ -1174,17 +1163,15 @@ int pdev_type(u8 bus, u8 devfn)
  10.245  }
  10.246  
  10.247  #define MAX_BUSES 256
  10.248 +static DEFINE_SPINLOCK(bus2bridge_lock);
  10.249  static struct { u8 map, bus, devfn; } bus2bridge[MAX_BUSES];
  10.250  
  10.251 -static int find_pcie_endpoint(u8 *bus, u8 *devfn, u8 *secbus)
  10.252 +static int _find_pcie_endpoint(u8 *bus, u8 *devfn, u8 *secbus)
  10.253  {
  10.254      int cnt = 0;
  10.255      *secbus = *bus;
  10.256  
  10.257 -    if ( *bus == 0 )
  10.258 -        /* assume integrated PCI devices in RC have valid requester-id */
  10.259 -        return 1;
  10.260 -
  10.261 +    ASSERT(spin_is_locked(&bus2bridge_lock));
  10.262      if ( !bus2bridge[*bus].map )
  10.263          return 0;
  10.264  
  10.265 @@ -1200,6 +1187,21 @@ static int find_pcie_endpoint(u8 *bus, u
  10.266      return 1;
  10.267  }
  10.268  
  10.269 +static int find_pcie_endpoint(u8 *bus, u8 *devfn, u8 *secbus)
  10.270 +{
  10.271 +    int ret = 0;
  10.272 +
  10.273 +    if ( *bus == 0 )
  10.274 +        /* assume integrated PCI devices in RC have valid requester-id */
  10.275 +        return 1;
  10.276 +
  10.277 +    spin_lock(&bus2bridge_lock);
  10.278 +    ret = _find_pcie_endpoint(bus, devfn, secbus);
  10.279 +    spin_unlock(&bus2bridge_lock);
  10.280 +
  10.281 +    return ret;
  10.282 +}
  10.283 +
  10.284  static int domain_context_mapping(struct domain *domain, u8 bus, u8 devfn)
  10.285  {
  10.286      struct acpi_drhd_unit *drhd;
  10.287 @@ -1212,6 +1214,8 @@ static int domain_context_mapping(struct
  10.288      if ( !drhd )
  10.289          return -ENODEV;
  10.290  
  10.291 +    ASSERT(spin_is_locked(&pcidevs_lock));
  10.292 +
  10.293      type = pdev_type(bus, devfn);
  10.294      switch ( type )
  10.295      {
  10.296 @@ -1226,12 +1230,14 @@ static int domain_context_mapping(struct
  10.297          if ( type == DEV_TYPE_PCIe_BRIDGE )
  10.298              break;
  10.299  
  10.300 +        spin_lock(&bus2bridge_lock);
  10.301          for ( sub_bus &= 0xff; sec_bus <= sub_bus; sec_bus++ )
  10.302          {
  10.303              bus2bridge[sec_bus].map = 1;
  10.304              bus2bridge[sec_bus].bus =  bus;
  10.305              bus2bridge[sec_bus].devfn =  devfn;
  10.306          }
  10.307 +        spin_unlock(&bus2bridge_lock);
  10.308          break;
  10.309  
  10.310      case DEV_TYPE_PCIe_ENDPOINT:
  10.311 @@ -1290,20 +1296,22 @@ static int domain_context_unmap_one(
  10.312      u8 bus, u8 devfn)
  10.313  {
  10.314      struct context_entry *context, *context_entries;
  10.315 -    unsigned long flags;
  10.316      u64 maddr;
  10.317  
  10.318 +    ASSERT(spin_is_locked(&pcidevs_lock));
  10.319 +    spin_lock(&iommu->lock);
  10.320 +
  10.321      maddr = bus_to_context_maddr(iommu, bus);
  10.322      context_entries = (struct context_entry *)map_vtd_domain_page(maddr);
  10.323      context = &context_entries[devfn];
  10.324  
  10.325      if ( !context_present(*context) )
  10.326      {
  10.327 +        spin_unlock(&iommu->lock);
  10.328          unmap_vtd_domain_page(context_entries);
  10.329          return 0;
  10.330      }
  10.331  
  10.332 -    spin_lock_irqsave(&iommu->lock, flags);
  10.333      context_clear_present(*context);
  10.334      context_clear_entry(*context);
  10.335      iommu_flush_cache_entry(context);
  10.336 @@ -1315,8 +1323,8 @@ static int domain_context_unmap_one(
  10.337      else
  10.338          iommu_flush_iotlb_dsi(iommu, domain_iommu_domid(domain), 0);
  10.339  
  10.340 +    spin_unlock(&iommu->lock);
  10.341      unmap_vtd_domain_page(context_entries);
  10.342 -    spin_unlock_irqrestore(&iommu->lock, flags);
  10.343  
  10.344      return 0;
  10.345  }
  10.346 @@ -1380,7 +1388,10 @@ static int reassign_device_ownership(
  10.347      struct iommu *pdev_iommu;
  10.348      int ret, found = 0;
  10.349  
  10.350 -    if ( !(pdev = pci_lock_domain_pdev(source, bus, devfn)) )
  10.351 +    ASSERT(spin_is_locked(&pcidevs_lock));
  10.352 +    pdev = pci_get_pdev_by_domain(source, bus, devfn);
  10.353 +
  10.354 +    if (!pdev)
  10.355          return -ENODEV;
  10.356  
  10.357      drhd = acpi_find_matched_drhd_unit(bus, devfn);
  10.358 @@ -1391,14 +1402,9 @@ static int reassign_device_ownership(
  10.359      if ( ret )
  10.360          return ret;
  10.361  
  10.362 -    write_lock(&pcidevs_lock);
  10.363      list_move(&pdev->domain_list, &target->arch.pdev_list);
  10.364 -    write_unlock(&pcidevs_lock);
  10.365      pdev->domain = target;
  10.366  
  10.367 -    spin_unlock(&pdev->lock);
  10.368 -
  10.369 -    read_lock(&pcidevs_lock);
  10.370      for_each_pdev ( source, pdev )
  10.371      {
  10.372          drhd = acpi_find_matched_drhd_unit(pdev->bus, pdev->devfn);
  10.373 @@ -1408,7 +1414,6 @@ static int reassign_device_ownership(
  10.374              break;
  10.375          }
  10.376      }
  10.377 -    read_unlock(&pcidevs_lock);
  10.378  
  10.379      if ( !found )
  10.380          clear_bit(pdev_iommu->index, &source_hd->iommu_bitmap);
  10.381 @@ -1423,20 +1428,13 @@ void iommu_domain_teardown(struct domain
  10.382      if ( list_empty(&acpi_drhd_units) )
  10.383          return;
  10.384  
  10.385 +    ASSERT(spin_is_locked(&pcidevs_lock));
  10.386 +    spin_lock(&hd->mapping_lock);
  10.387      iommu_free_pagetable(hd->pgd_maddr, agaw_to_level(hd->agaw));
  10.388      hd->pgd_maddr = 0;
  10.389 -    iommu_domid_release(d);
  10.390 -}
  10.391 +    spin_unlock(&hd->mapping_lock);
  10.392  
  10.393 -static int domain_context_mapped(u8 bus, u8 devfn)
  10.394 -{
  10.395 -    struct acpi_drhd_unit *drhd;
  10.396 -
  10.397 -    for_each_drhd_unit ( drhd )
  10.398 -        if ( device_context_mapped(drhd->iommu, bus, devfn) )
  10.399 -            return 1;
  10.400 -
  10.401 -    return 0;
  10.402 +    iommu_domid_release(d);
  10.403  }
  10.404  
  10.405  int intel_iommu_map_page(
  10.406 @@ -1457,17 +1455,27 @@ int intel_iommu_map_page(
  10.407           ecap_pass_thru(iommu->ecap) && (d->domain_id == 0) )
  10.408          return 0;
  10.409  
  10.410 +    spin_lock(&hd->mapping_lock);
  10.411 +
  10.412      pg_maddr = addr_to_dma_page_maddr(d, (paddr_t)gfn << PAGE_SHIFT_4K, 1);
  10.413      if ( pg_maddr == 0 )
  10.414 +    {
  10.415 +        spin_unlock(&hd->mapping_lock);
  10.416          return -ENOMEM;
  10.417 +    }
  10.418      page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
  10.419      pte = page + (gfn & LEVEL_MASK);
  10.420      pte_present = dma_pte_present(*pte);
  10.421      dma_set_pte_addr(*pte, (paddr_t)mfn << PAGE_SHIFT_4K);
  10.422      dma_set_pte_prot(*pte, DMA_PTE_READ | DMA_PTE_WRITE);
  10.423      iommu_flush_cache_entry(pte);
  10.424 +    spin_unlock(&hd->mapping_lock);
  10.425      unmap_vtd_domain_page(page);
  10.426  
  10.427 +    /*
  10.428 +     * No need pcideves_lock here because we have flush
  10.429 +     * when assign/deassign device
  10.430 +     */
  10.431      for_each_drhd_unit ( drhd )
  10.432      {
  10.433          iommu = drhd->iommu;
  10.434 @@ -1510,6 +1518,7 @@ static int iommu_prepare_rmrr_dev(struct
  10.435      u64 base, end;
  10.436      unsigned long base_pfn, end_pfn;
  10.437  
  10.438 +    ASSERT(spin_is_locked(&pcidevs_lock));
  10.439      ASSERT(rmrr->base_address < rmrr->end_address);
  10.440      
  10.441      base = rmrr->base_address & PAGE_MASK_4K;
  10.442 @@ -1523,8 +1532,7 @@ static int iommu_prepare_rmrr_dev(struct
  10.443          base_pfn++;
  10.444      }
  10.445  
  10.446 -    if ( domain_context_mapped(bus, devfn) == 0 )
  10.447 -        ret = domain_context_mapping(d, bus, devfn);
  10.448 +    ret = domain_context_mapping(d, bus, devfn);
  10.449  
  10.450      return ret;
  10.451  }
  10.452 @@ -1535,6 +1543,8 @@ static int intel_iommu_add_device(struct
  10.453      u16 bdf;
  10.454      int ret, i;
  10.455  
  10.456 +    ASSERT(spin_is_locked(&pcidevs_lock));
  10.457 +
  10.458      if ( !pdev->domain )
  10.459          return -EINVAL;
  10.460  
  10.461 @@ -1689,6 +1699,7 @@ static void setup_dom0_rmrr(struct domai
  10.462      u16 bdf;
  10.463      int ret, i;
  10.464  
  10.465 +    read_lock(&pcidevs_lock);
  10.466      for_each_rmrr_device ( rmrr, bdf, i )
  10.467      {
  10.468          ret = iommu_prepare_rmrr_dev(d, rmrr, PCI_BUS(bdf), PCI_DEVFN2(bdf));
  10.469 @@ -1696,6 +1707,7 @@ static void setup_dom0_rmrr(struct domai
  10.470              gdprintk(XENLOG_ERR VTDPREFIX,
  10.471                       "IOMMU: mapping reserved region failed\n");
  10.472      }
  10.473 +    read_unlock(&pcidevs_lock);
  10.474  }
  10.475  
  10.476  int intel_vtd_setup(void)
  10.477 @@ -1748,27 +1760,43 @@ int device_assigned(u8 bus, u8 devfn)
  10.478  {
  10.479      struct pci_dev *pdev;
  10.480  
  10.481 -    if ( (pdev = pci_lock_domain_pdev(dom0, bus, devfn)) )
  10.482 +    read_lock(&pcidevs_lock);
  10.483 +    pdev = pci_get_pdev_by_domain(dom0, bus, devfn);
  10.484 +    if (!pdev)
  10.485      {
  10.486 -        spin_unlock(&pdev->lock);
  10.487 -        return 0;
  10.488 +        read_unlock(&pcidevs_lock);
  10.489 +        return -1;
  10.490      }
  10.491  
  10.492 -    return 1;
  10.493 +    read_unlock(&pcidevs_lock);
  10.494 +    return 0;
  10.495  }
  10.496  
  10.497  int intel_iommu_assign_device(struct domain *d, u8 bus, u8 devfn)
  10.498  {
  10.499      struct acpi_rmrr_unit *rmrr;
  10.500      int ret = 0, i;
  10.501 +    struct pci_dev *pdev;
  10.502      u16 bdf;
  10.503  
  10.504      if ( list_empty(&acpi_drhd_units) )
  10.505          return -ENODEV;
  10.506  
  10.507 +    ASSERT(spin_is_locked(&pcidevs_lock));
  10.508 +    pdev = pci_get_pdev(bus, devfn);
  10.509 +    if (!pdev)
  10.510 +        return -ENODEV;
  10.511 +
  10.512 +    if (pdev->domain != dom0)
  10.513 +    {
  10.514 +        gdprintk(XENLOG_ERR VTDPREFIX,
  10.515 +                "IOMMU: assign a assigned device\n");
  10.516 +       return -EBUSY;
  10.517 +    }
  10.518 +
  10.519      ret = reassign_device_ownership(dom0, d, bus, devfn);
  10.520      if ( ret )
  10.521 -        return ret;
  10.522 +        goto done;
  10.523  
  10.524      /* Setup rmrr identity mapping */
  10.525      for_each_rmrr_device( rmrr, bdf, i )
  10.526 @@ -1779,16 +1807,20 @@ int intel_iommu_assign_device(struct dom
  10.527               * ignore USB RMRR temporarily.
  10.528               */
  10.529              if ( is_usb_device(bus, devfn) )
  10.530 -                return 0;
  10.531 +            {
  10.532 +                ret = 0;
  10.533 +                goto done;
  10.534 +            }
  10.535  
  10.536              ret = iommu_prepare_rmrr_dev(d, rmrr, bus, devfn);
  10.537              if ( ret )
  10.538                  gdprintk(XENLOG_ERR VTDPREFIX,
  10.539                           "IOMMU: mapping reserved region failed\n");
  10.540 -            return ret;
  10.541 +            goto done; 
  10.542          }
  10.543      }
  10.544  
  10.545 +done:
  10.546      return ret;
  10.547  }
  10.548  
    11.1 --- a/xen/include/asm-x86/msi.h	Thu Dec 11 11:40:10 2008 +0000
    11.2 +++ b/xen/include/asm-x86/msi.h	Thu Dec 11 11:48:19 2008 +0000
    11.3 @@ -68,13 +68,17 @@ struct msi_msg {
    11.4  	u32	data;		/* 16 bits of msi message data */
    11.5  };
    11.6  
    11.7 +struct msi_desc;
    11.8  /* Helper functions */
    11.9  extern void mask_msi_vector(unsigned int vector);
   11.10  extern void unmask_msi_vector(unsigned int vector);
   11.11  extern void set_msi_affinity(unsigned int vector, cpumask_t mask);
   11.12 -extern int pci_enable_msi(struct msi_info *msi);
   11.13 -extern void pci_disable_msi(int vector);
   11.14 +extern int pci_enable_msi(struct msi_info *msi, struct msi_desc **desc);
   11.15 +extern void pci_disable_msi(struct msi_desc *desc);
   11.16  extern void pci_cleanup_msi(struct pci_dev *pdev);
   11.17 +extern int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc);
   11.18 +extern void teardown_msi_vector(int vector);
   11.19 +extern int msi_free_vector(struct msi_desc *entry);
   11.20  
   11.21  struct msi_desc {
   11.22  	struct {
    12.1 --- a/xen/include/xen/iommu.h	Thu Dec 11 11:40:10 2008 +0000
    12.2 +++ b/xen/include/xen/iommu.h	Thu Dec 11 11:48:19 2008 +0000
    12.3 @@ -62,7 +62,7 @@ int iommu_domain_init(struct domain *d);
    12.4  void iommu_domain_destroy(struct domain *d);
    12.5  int device_assigned(u8 bus, u8 devfn);
    12.6  int assign_device(struct domain *d, u8 bus, u8 devfn);
    12.7 -void deassign_device(struct domain *d, u8 bus, u8 devfn);
    12.8 +int deassign_device(struct domain *d, u8 bus, u8 devfn);
    12.9  int iommu_get_device_group(struct domain *d, u8 bus, u8 devfn, 
   12.10      XEN_GUEST_HANDLE_64(uint32) buf, int max_sdevs);
   12.11  int iommu_map_page(struct domain *d, unsigned long gfn, unsigned long mfn);
    13.1 --- a/xen/include/xen/pci.h	Thu Dec 11 11:40:10 2008 +0000
    13.2 +++ b/xen/include/xen/pci.h	Thu Dec 11 11:48:19 2008 +0000
    13.3 @@ -36,7 +36,6 @@ struct pci_dev {
    13.4      struct domain *domain;
    13.5      const u8 bus;
    13.6      const u8 devfn;
    13.7 -    spinlock_t lock;
    13.8  };
    13.9  
   13.10  #define for_each_pdev(domain, pdev) \
   13.11 @@ -59,6 +58,8 @@ struct pci_dev *pci_lock_domain_pdev(str
   13.12  void pci_release_devices(struct domain *d);
   13.13  int pci_add_device(u8 bus, u8 devfn);
   13.14  int pci_remove_device(u8 bus, u8 devfn);
   13.15 +struct pci_dev *pci_get_pdev(int bus, int devfn);
   13.16 +struct pci_dev *pci_get_pdev_by_domain(struct domain *d, int bus, int devfn);
   13.17  
   13.18  uint8_t pci_conf_read8(
   13.19      unsigned int bus, unsigned int dev, unsigned int func, unsigned int reg);