ia64/xen-unstable

changeset 19217:2e6de0f50f3f

passthrough: fix MSI-X table fixmap allocation

Currently, msix table pages are allocated a fixmap page per vector,
the available fixmap pages will be depleted when assigning devices
with large number of vectors. This patch fixes it, and a bug that
prevents cross-page MSI-X table from working properly

It now allocates msix table fixmap pages per device, if the table
entries of two msix vectors share the same page, it will only be
mapped to fixmap once. A ref count is maintained so that it can
be unmapped when all the vectors are freed.

Also changes the meaning of msi_desc->mask_base from the va of msix
table start to the va of the target entry. The former one is currently
buggy (it always maps the first page but msix can support up to 2048
entries) and can't handle separately allocated pages.

Signed-off-by: Qing He <qing.he@intel.com>
author Keir Fraser <keir.fraser@citrix.com>
date Tue Feb 17 11:06:16 2009 +0000 (2009-02-17)
parents cb8ece5d7647
children 261ccf78bea6
files xen/arch/x86/msi.c xen/drivers/passthrough/pci.c xen/include/asm-x86/fixmap.h xen/include/asm-x86/msi.h xen/include/xen/pci.h
line diff
     1.1 --- a/xen/arch/x86/msi.c	Tue Feb 17 11:04:08 2009 +0000
     1.2 +++ b/xen/arch/x86/msi.c	Tue Feb 17 11:06:16 2009 +0000
     1.3 @@ -29,17 +29,17 @@
     1.4  
     1.5  /* bitmap indicate which fixed map is free */
     1.6  DEFINE_SPINLOCK(msix_fixmap_lock);
     1.7 -DECLARE_BITMAP(msix_fixmap_pages, MAX_MSIX_PAGES);
     1.8 +DECLARE_BITMAP(msix_fixmap_pages, FIX_MSIX_MAX_PAGES);
     1.9  
    1.10  static int msix_fixmap_alloc(void)
    1.11  {
    1.12 -    int i, rc = -1;
    1.13 +    int i, rc = -ENOMEM;
    1.14  
    1.15      spin_lock(&msix_fixmap_lock);
    1.16 -    for ( i = 0; i < MAX_MSIX_PAGES; i++ )
    1.17 +    for ( i = 0; i < FIX_MSIX_MAX_PAGES; i++ )
    1.18          if ( !test_bit(i, &msix_fixmap_pages) )
    1.19              break;
    1.20 -    if ( i == MAX_MSIX_PAGES )
    1.21 +    if ( i == FIX_MSIX_MAX_PAGES )
    1.22          goto out;
    1.23      rc = FIX_MSIX_IO_RESERV_BASE + i;
    1.24      set_bit(i, &msix_fixmap_pages);
    1.25 @@ -51,8 +51,66 @@ static int msix_fixmap_alloc(void)
    1.26  
    1.27  static void msix_fixmap_free(int idx)
    1.28  {
    1.29 +    spin_lock(&msix_fixmap_lock);
    1.30      if ( idx >= FIX_MSIX_IO_RESERV_BASE )
    1.31          clear_bit(idx - FIX_MSIX_IO_RESERV_BASE, &msix_fixmap_pages);
    1.32 +    spin_unlock(&msix_fixmap_lock);
    1.33 +}
    1.34 +
    1.35 +static int msix_get_fixmap(struct pci_dev *dev, unsigned long table_paddr,
    1.36 +                           unsigned long entry_paddr)
    1.37 +{
    1.38 +    int nr_page, idx;
    1.39 +
    1.40 +    nr_page = (entry_paddr >> PAGE_SHIFT) - (table_paddr >> PAGE_SHIFT);
    1.41 +
    1.42 +    if ( nr_page < 0 || nr_page >= MAX_MSIX_TABLE_PAGES )
    1.43 +        return -EINVAL;
    1.44 +
    1.45 +    spin_lock(&dev->msix_table_lock);
    1.46 +    if ( dev->msix_table_refcnt[nr_page]++ == 0 )
    1.47 +    {
    1.48 +        idx = msix_fixmap_alloc();
    1.49 +        if ( idx < 0 )
    1.50 +        {
    1.51 +            dev->msix_table_refcnt[nr_page]--;
    1.52 +            goto out;
    1.53 +        }
    1.54 +        set_fixmap_nocache(idx, entry_paddr);
    1.55 +        dev->msix_table_idx[nr_page] = idx;
    1.56 +    }
    1.57 +    else
    1.58 +        idx = dev->msix_table_idx[nr_page];
    1.59 +
    1.60 + out:
    1.61 +    spin_unlock(&dev->msix_table_lock);
    1.62 +    return idx;
    1.63 +}
    1.64 +
    1.65 +static void msix_put_fixmap(struct pci_dev *dev, int idx)
    1.66 +{
    1.67 +    int i;
    1.68 +    unsigned long start;
    1.69 +
    1.70 +    spin_lock(&dev->msix_table_lock);
    1.71 +    for ( i = 0; i < MAX_MSIX_TABLE_PAGES; i++ )
    1.72 +    {
    1.73 +        if ( dev->msix_table_idx[i] == idx )
    1.74 +            break;
    1.75 +    }
    1.76 +    if ( i == MAX_MSIX_TABLE_PAGES )
    1.77 +        goto out;
    1.78 +
    1.79 +    if ( --dev->msix_table_refcnt[i] == 0 )
    1.80 +    {
    1.81 +        start = fix_to_virt(idx);
    1.82 +        destroy_xen_mappings(start, start + PAGE_SIZE);
    1.83 +        msix_fixmap_free(idx);
    1.84 +        dev->msix_table_idx[i] = 0;
    1.85 +    }
    1.86 +
    1.87 + out:
    1.88 +    spin_unlock(&dev->msix_table_lock);
    1.89  }
    1.90  
    1.91  /*
    1.92 @@ -122,8 +180,7 @@ static void read_msi_msg(struct msi_desc
    1.93      case PCI_CAP_ID_MSIX:
    1.94      {
    1.95          void __iomem *base;
    1.96 -        base = entry->mask_base +
    1.97 -            entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
    1.98 +        base = entry->mask_base;
    1.99  
   1.100          msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
   1.101          msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
   1.102 @@ -199,8 +256,7 @@ static void write_msi_msg(struct msi_des
   1.103      case PCI_CAP_ID_MSIX:
   1.104      {
   1.105          void __iomem *base;
   1.106 -        base = entry->mask_base +
   1.107 -            entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
   1.108 +        base = entry->mask_base;
   1.109  
   1.110          writel(msg->address_lo,
   1.111                 base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
   1.112 @@ -288,8 +344,7 @@ static void msix_flush_writes(unsigned i
   1.113          break;
   1.114      case PCI_CAP_ID_MSIX:
   1.115      {
   1.116 -        int offset = entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
   1.117 -            PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET;
   1.118 +        int offset = PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET;
   1.119          readl(entry->mask_base + offset);
   1.120          break;
   1.121      }
   1.122 @@ -330,8 +385,7 @@ static void msi_set_mask_bit(unsigned in
   1.123          break;
   1.124      case PCI_CAP_ID_MSIX:
   1.125      {
   1.126 -        int offset = entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
   1.127 -            PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET;
   1.128 +        int offset = PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET;
   1.129          writel(flag, entry->mask_base + offset);
   1.130          readl(entry->mask_base + offset);
   1.131          break;
   1.132 @@ -392,13 +446,10 @@ int msi_free_vector(struct msi_desc *ent
   1.133      {
   1.134          unsigned long start;
   1.135  
   1.136 -        writel(1, entry->mask_base + entry->msi_attrib.entry_nr
   1.137 -               * PCI_MSIX_ENTRY_SIZE
   1.138 -               + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
   1.139 +        writel(1, entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
   1.140  
   1.141          start = (unsigned long)entry->mask_base & ~(PAGE_SIZE - 1);
   1.142 -        msix_fixmap_free(virt_to_fix(start));
   1.143 -        destroy_xen_mappings(start, start + PAGE_SIZE);
   1.144 +        msix_put_fixmap(entry->dev, virt_to_fix(start));
   1.145      }
   1.146      list_del(&entry->list);
   1.147      xfree(entry);
   1.148 @@ -500,8 +551,8 @@ static int msix_capability_init(struct p
   1.149      struct msi_desc *entry;
   1.150      int pos;
   1.151      u16 control;
   1.152 -    unsigned long phys_addr;
   1.153 -    u32 table_offset;
   1.154 +    unsigned long table_paddr, entry_paddr;
   1.155 +    u32 table_offset, entry_offset;
   1.156      u8 bir;
   1.157      void __iomem *base;
   1.158      int idx;
   1.159 @@ -525,15 +576,17 @@ static int msix_capability_init(struct p
   1.160      table_offset = pci_conf_read32(bus, slot, func, msix_table_offset_reg(pos));
   1.161      bir = (u8)(table_offset & PCI_MSIX_FLAGS_BIRMASK);
   1.162      table_offset &= ~PCI_MSIX_FLAGS_BIRMASK;
   1.163 -    phys_addr = msi->table_base + table_offset;
   1.164 -    idx = msix_fixmap_alloc();
   1.165 +    entry_offset = msi->entry_nr * PCI_MSIX_ENTRY_SIZE;
   1.166 +
   1.167 +    table_paddr = msi->table_base + table_offset;
   1.168 +    entry_paddr = table_paddr + entry_offset;
   1.169 +    idx = msix_get_fixmap(dev, table_paddr, entry_paddr);
   1.170      if ( idx < 0 )
   1.171      {
   1.172          xfree(entry);
   1.173 -        return -ENOMEM;
   1.174 +        return idx;
   1.175      }
   1.176 -    set_fixmap_nocache(idx, phys_addr);
   1.177 -    base = (void *)(fix_to_virt(idx) + (phys_addr & ((1UL << PAGE_SHIFT) - 1)));
   1.178 +    base = (void *)(fix_to_virt(idx) + (entry_paddr & ((1UL << PAGE_SHIFT) - 1)));
   1.179  
   1.180      entry->msi_attrib.type = PCI_CAP_ID_MSIX;
   1.181      entry->msi_attrib.is_64 = 1;
   1.182 @@ -548,9 +601,7 @@ static int msix_capability_init(struct p
   1.183      list_add_tail(&entry->list, &dev->msi_list);
   1.184  
   1.185      /* Mask interrupt here */
   1.186 -    writel(1, entry->mask_base + entry->msi_attrib.entry_nr
   1.187 -                * PCI_MSIX_ENTRY_SIZE
   1.188 -                + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
   1.189 +    writel(1, entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
   1.190  
   1.191      *desc = entry;
   1.192      /* Restore MSI-X enabled bits */
   1.193 @@ -675,9 +726,7 @@ static void __pci_disable_msix(struct ms
   1.194  
   1.195      BUG_ON(list_empty(&dev->msi_list));
   1.196  
   1.197 -    writel(1, entry->mask_base + entry->msi_attrib.entry_nr
   1.198 -      * PCI_MSIX_ENTRY_SIZE
   1.199 -      + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
   1.200 +    writel(1, entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
   1.201  
   1.202      pci_conf_write16(bus, slot, func, msix_control_reg(pos), control);
   1.203  }
     2.1 --- a/xen/drivers/passthrough/pci.c	Tue Feb 17 11:04:08 2009 +0000
     2.2 +++ b/xen/drivers/passthrough/pci.c	Tue Feb 17 11:06:16 2009 +0000
     2.3 @@ -48,6 +48,7 @@ struct pci_dev *alloc_pdev(u8 bus, u8 de
     2.4      pdev->domain = NULL;
     2.5      INIT_LIST_HEAD(&pdev->msi_list);
     2.6      list_add(&pdev->alldevs_list, &alldevs_list);
     2.7 +    spin_lock_init(&pdev->msix_table_lock);
     2.8  
     2.9      return pdev;
    2.10  }
     3.1 --- a/xen/include/asm-x86/fixmap.h	Tue Feb 17 11:04:08 2009 +0000
     3.2 +++ b/xen/include/asm-x86/fixmap.h	Tue Feb 17 11:06:16 2009 +0000
     3.3 @@ -50,7 +50,7 @@ enum fixed_addresses {
     3.4      FIX_IOMMU_MMIO_END = FIX_IOMMU_MMIO_BASE_0 + IOMMU_PAGES -1,
     3.5      FIX_TBOOT_SHARED_BASE,
     3.6      FIX_MSIX_IO_RESERV_BASE,
     3.7 -    FIX_MSIX_IO_RESERV_END = FIX_MSIX_IO_RESERV_BASE + MAX_MSIX_PAGES -1,
     3.8 +    FIX_MSIX_IO_RESERV_END = FIX_MSIX_IO_RESERV_BASE + FIX_MSIX_MAX_PAGES -1,
     3.9      __end_of_fixed_addresses
    3.10  };
    3.11  
     4.1 --- a/xen/include/asm-x86/msi.h	Tue Feb 17 11:04:08 2009 +0000
     4.2 +++ b/xen/include/asm-x86/msi.h	Tue Feb 17 11:06:16 2009 +0000
     4.3 @@ -49,9 +49,9 @@
     4.4  
     4.5  /* MAX fixed pages reserved for mapping MSIX tables. */
     4.6  #if defined(__x86_64__)
     4.7 -#define MAX_MSIX_PAGES              512
     4.8 +#define FIX_MSIX_MAX_PAGES              512
     4.9  #else
    4.10 -#define MAX_MSIX_PAGES              32
    4.11 +#define FIX_MSIX_MAX_PAGES              32
    4.12  #endif
    4.13  
    4.14  struct msi_info {
    4.15 @@ -93,7 +93,7 @@ struct msi_desc {
    4.16  
    4.17  	struct list_head list;
    4.18  
    4.19 -	void __iomem *mask_base;
    4.20 +	void __iomem *mask_base;        /* va for the entry in mask table */
    4.21  	struct pci_dev *dev;
    4.22  	int vector;
    4.23  
     5.1 --- a/xen/include/xen/pci.h	Tue Feb 17 11:04:08 2009 +0000
     5.2 +++ b/xen/include/xen/pci.h	Tue Feb 17 11:06:16 2009 +0000
     5.3 @@ -29,10 +29,16 @@
     5.4  #define PCI_BDF(b,d,f)  ((((b) & 0xff) << 8) | PCI_DEVFN(d,f))
     5.5  #define PCI_BDF2(b,df)  ((((b) & 0xff) << 8) | ((df) & 0xff))
     5.6  
     5.7 +#define MAX_MSIX_TABLE_PAGES    8    /* 2048 entries */
     5.8  struct pci_dev {
     5.9      struct list_head alldevs_list;
    5.10      struct list_head domain_list;
    5.11 +
    5.12      struct list_head msi_list;
    5.13 +    int msix_table_refcnt[MAX_MSIX_TABLE_PAGES];
    5.14 +    int msix_table_idx[MAX_MSIX_TABLE_PAGES];
    5.15 +    spinlock_t msix_table_lock;
    5.16 +
    5.17      struct domain *domain;
    5.18      const u8 bus;
    5.19      const u8 devfn;