ia64/xen-unstable

changeset 19248:9bc5799566be

hvm: passthrough MSI-X mask bit acceleration

Add a new parameter to DOMCTL_bind_pt_irq to allow Xen to know the
guest physical address of MSI-X table. Also add a new MMIO intercept
handler to intercept that gpa in order to handle MSI-X vector mask
bit operation in the hypervisor. This reduces the load of device model
considerably if the guest does mask and unmask frequently

Signed-off-by: Qing He <qing.he@intel.com>
author Keir Fraser <keir.fraser@citrix.com>
date Mon Mar 02 10:26:37 2009 +0000 (2009-03-02)
parents d0df93e627bc
children 5255784eb0d7
files tools/libxc/xc_domain.c tools/libxc/xenctrl.h xen/arch/x86/hvm/hvm.c xen/arch/x86/hvm/intercept.c xen/arch/x86/hvm/vmsi.c xen/arch/x86/msi.c xen/drivers/passthrough/io.c xen/include/asm-x86/hvm/domain.h xen/include/asm-x86/msi.h xen/include/public/domctl.h xen/include/xen/pci.h
line diff
     1.1 --- a/tools/libxc/xc_domain.c	Mon Mar 02 10:23:50 2009 +0000
     1.2 +++ b/tools/libxc/xc_domain.c	Mon Mar 02 10:26:37 2009 +0000
     1.3 @@ -920,7 +920,8 @@ int xc_domain_update_msi_irq(
     1.4      uint32_t domid,
     1.5      uint32_t gvec,
     1.6      uint32_t pirq,
     1.7 -    uint32_t gflags)
     1.8 +    uint32_t gflags,
     1.9 +    uint64_t gtable)
    1.10  {
    1.11      int rc;
    1.12      xen_domctl_bind_pt_irq_t *bind;
    1.13 @@ -936,6 +937,7 @@ int xc_domain_update_msi_irq(
    1.14      bind->machine_irq = pirq;
    1.15      bind->u.msi.gvec = gvec;
    1.16      bind->u.msi.gflags = gflags;
    1.17 +    bind->u.msi.gtable = gtable;
    1.18  
    1.19      rc = do_domctl(xc_handle, &domctl);
    1.20      return rc;
     2.1 --- a/tools/libxc/xenctrl.h	Mon Mar 02 10:23:50 2009 +0000
     2.2 +++ b/tools/libxc/xenctrl.h	Mon Mar 02 10:26:37 2009 +0000
     2.3 @@ -1092,7 +1092,8 @@ int xc_domain_update_msi_irq(
     2.4      uint32_t domid,
     2.5      uint32_t gvec,
     2.6      uint32_t pirq,
     2.7 -    uint32_t gflags);
     2.8 +    uint32_t gflags,
     2.9 +    uint64_t gtable);
    2.10  
    2.11  int xc_domain_unbind_msi_irq(int xc_handle,
    2.12                               uint32_t domid,
     3.1 --- a/xen/arch/x86/hvm/hvm.c	Mon Mar 02 10:23:50 2009 +0000
     3.2 +++ b/xen/arch/x86/hvm/hvm.c	Mon Mar 02 10:26:37 2009 +0000
     3.3 @@ -308,6 +308,9 @@ int hvm_domain_initialise(struct domain 
     3.4      spin_lock_init(&d->arch.hvm_domain.irq_lock);
     3.5      spin_lock_init(&d->arch.hvm_domain.uc_lock);
     3.6  
     3.7 +    INIT_LIST_HEAD(&d->arch.hvm_domain.msixtbl_list);
     3.8 +    spin_lock_init(&d->arch.hvm_domain.msixtbl_list_lock);
     3.9 +
    3.10      hvm_init_guest_time(d);
    3.11  
    3.12      d->arch.hvm_domain.params[HVM_PARAM_HPET_ENABLED] = 1;
    3.13 @@ -348,11 +351,15 @@ int hvm_domain_initialise(struct domain 
    3.14      return rc;
    3.15  }
    3.16  
    3.17 +extern void msixtbl_pt_cleanup(struct domain *d);
    3.18 +
    3.19  void hvm_domain_relinquish_resources(struct domain *d)
    3.20  {
    3.21      hvm_destroy_ioreq_page(d, &d->arch.hvm_domain.ioreq);
    3.22      hvm_destroy_ioreq_page(d, &d->arch.hvm_domain.buf_ioreq);
    3.23  
    3.24 +    msixtbl_pt_cleanup(d);
    3.25 +
    3.26      /* Stop all asynchronous timer actions. */
    3.27      rtc_deinit(d);
    3.28      if ( d->vcpu[0] != NULL )
     4.1 --- a/xen/arch/x86/hvm/intercept.c	Mon Mar 02 10:23:50 2009 +0000
     4.2 +++ b/xen/arch/x86/hvm/intercept.c	Mon Mar 02 10:26:37 2009 +0000
     4.3 @@ -35,14 +35,16 @@
     4.4  extern struct hvm_mmio_handler hpet_mmio_handler;
     4.5  extern struct hvm_mmio_handler vlapic_mmio_handler;
     4.6  extern struct hvm_mmio_handler vioapic_mmio_handler;
     4.7 +extern struct hvm_mmio_handler msixtbl_mmio_handler;
     4.8  
     4.9 -#define HVM_MMIO_HANDLER_NR 3
    4.10 +#define HVM_MMIO_HANDLER_NR 4
    4.11  
    4.12  static struct hvm_mmio_handler *hvm_mmio_handlers[HVM_MMIO_HANDLER_NR] =
    4.13  {
    4.14      &hpet_mmio_handler,
    4.15      &vlapic_mmio_handler,
    4.16 -    &vioapic_mmio_handler
    4.17 +    &vioapic_mmio_handler,
    4.18 +    &msixtbl_mmio_handler
    4.19  };
    4.20  
    4.21  static int hvm_mmio_access(struct vcpu *v,
     5.1 --- a/xen/arch/x86/hvm/vmsi.c	Mon Mar 02 10:23:50 2009 +0000
     5.2 +++ b/xen/arch/x86/hvm/vmsi.c	Mon Mar 02 10:26:37 2009 +0000
     5.3 @@ -193,3 +193,283 @@ int vmsi_deliver(struct domain *d, int p
     5.4      return 1;
     5.5  }
     5.6  
     5.7 +/* MSI-X mask bit hypervisor interception */
     5.8 +struct msixtbl_entry
     5.9 +{
    5.10 +    struct list_head list;
    5.11 +    atomic_t refcnt;    /* how many bind_pt_irq called for the device */
    5.12 +
    5.13 +    /* TODO: resolve the potential race by destruction of pdev */
    5.14 +    struct pci_dev *pdev;
    5.15 +    unsigned long gtable;       /* gpa of msix table */
    5.16 +    unsigned long table_len;
    5.17 +    unsigned long table_flags[MAX_MSIX_TABLE_ENTRIES / BITS_PER_LONG + 1];
    5.18 +
    5.19 +    struct rcu_head rcu;
    5.20 +};
    5.21 +
    5.22 +static struct msixtbl_entry *msixtbl_find_entry(
    5.23 +    struct vcpu *v, unsigned long addr)
    5.24 +{
    5.25 +    struct msixtbl_entry *entry;
    5.26 +    struct domain *d = v->domain;
    5.27 +
    5.28 +    list_for_each_entry( entry, &d->arch.hvm_domain.msixtbl_list, list )
    5.29 +        if ( addr >= entry->gtable &&
    5.30 +             addr < entry->gtable + entry->table_len )
    5.31 +            return entry;
    5.32 +
    5.33 +    return NULL;
    5.34 +}
    5.35 +
    5.36 +static void __iomem *msixtbl_addr_to_virt(
    5.37 +    struct msixtbl_entry *entry, unsigned long addr)
    5.38 +{
    5.39 +    int idx, nr_page;
    5.40 +
    5.41 +    if ( !entry )
    5.42 +        return NULL;
    5.43 +
    5.44 +    nr_page = (addr >> PAGE_SHIFT) -
    5.45 +              (entry->gtable >> PAGE_SHIFT);
    5.46 +
    5.47 +    if ( !entry->pdev )
    5.48 +        return NULL;
    5.49 +
    5.50 +    idx = entry->pdev->msix_table_idx[nr_page];
    5.51 +    if ( !idx )
    5.52 +        return NULL;
    5.53 +
    5.54 +    return (void *)(fix_to_virt(idx) +
    5.55 +                    (addr & ((1UL << PAGE_SHIFT) - 1)));
    5.56 +}
    5.57 +
    5.58 +static int msixtbl_read(
    5.59 +    struct vcpu *v, unsigned long address,
    5.60 +    unsigned long len, unsigned long *pval)
    5.61 +{
    5.62 +    unsigned long offset;
    5.63 +    struct msixtbl_entry *entry;
    5.64 +    void *virt;
    5.65 +    int r = X86EMUL_UNHANDLEABLE;
    5.66 +
    5.67 +    rcu_read_lock();
    5.68 +
    5.69 +    if ( len != 4 )
    5.70 +        goto out;
    5.71 +
    5.72 +    offset = address & (PCI_MSIX_ENTRY_SIZE - 1);
    5.73 +    if ( offset != PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET)
    5.74 +        goto out;
    5.75 +
    5.76 +    entry = msixtbl_find_entry(v, address);
    5.77 +    virt = msixtbl_addr_to_virt(entry, address);
    5.78 +    if ( !virt )
    5.79 +        goto out;
    5.80 +
    5.81 +    *pval = readl(virt);
    5.82 +    r = X86EMUL_OKAY;
    5.83 +
    5.84 +out:
    5.85 +    rcu_read_unlock();
    5.86 +    return r;
    5.87 +}
    5.88 +
    5.89 +static int msixtbl_write(struct vcpu *v, unsigned long address,
    5.90 +                        unsigned long len, unsigned long val)
    5.91 +{
    5.92 +    unsigned long offset;
    5.93 +    struct msixtbl_entry *entry;
    5.94 +    void *virt;
    5.95 +    int nr_entry;
    5.96 +    int r = X86EMUL_UNHANDLEABLE;
    5.97 +
    5.98 +    rcu_read_lock();
    5.99 +
   5.100 +    if ( len != 4 )
   5.101 +        goto out;
   5.102 +
   5.103 +    entry = msixtbl_find_entry(v, address);
   5.104 +    nr_entry = (address - entry->gtable) % PCI_MSIX_ENTRY_SIZE;
   5.105 +
   5.106 +    offset = address & (PCI_MSIX_ENTRY_SIZE - 1);
   5.107 +    if ( offset != PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET)
   5.108 +    {
   5.109 +        set_bit(nr_entry, &entry->table_flags);
   5.110 +        goto out;
   5.111 +    }
   5.112 +
   5.113 +    /* exit to device model if address/data has been modified */
   5.114 +    if ( test_and_clear_bit(nr_entry, &entry->table_flags) )
   5.115 +        goto out;
   5.116 +
   5.117 +    virt = msixtbl_addr_to_virt(entry, address);
   5.118 +    if ( !virt )
   5.119 +        goto out;
   5.120 +
   5.121 +    writel(val, virt);
   5.122 +    r = X86EMUL_OKAY;
   5.123 +
   5.124 +out:
   5.125 +    rcu_read_unlock();
   5.126 +    return r;
   5.127 +}
   5.128 +
   5.129 +static int msixtbl_range(struct vcpu *v, unsigned long addr)
   5.130 +{
   5.131 +    struct msixtbl_entry *entry;
   5.132 +    void *virt;
   5.133 +
   5.134 +    rcu_read_lock();
   5.135 +
   5.136 +    entry = msixtbl_find_entry(v, addr);
   5.137 +    virt = msixtbl_addr_to_virt(entry, addr);
   5.138 +
   5.139 +    rcu_read_unlock();
   5.140 +
   5.141 +    return !!virt;
   5.142 +}
   5.143 +
   5.144 +struct hvm_mmio_handler msixtbl_mmio_handler = {
   5.145 +    .check_handler = msixtbl_range,
   5.146 +    .read_handler = msixtbl_read,
   5.147 +    .write_handler = msixtbl_write
   5.148 +};
   5.149 +
   5.150 +static struct msixtbl_entry *add_msixtbl_entry(struct domain *d,
   5.151 +                                               struct pci_dev *pdev,
   5.152 +                                               uint64_t gtable)
   5.153 +{
   5.154 +    struct msixtbl_entry *entry;
   5.155 +    u32 len;
   5.156 +
   5.157 +    entry = xmalloc(struct msixtbl_entry);
   5.158 +    if ( !entry )
   5.159 +        return NULL;
   5.160 +
   5.161 +    memset(entry, 0, sizeof(struct msixtbl_entry));
   5.162 +        
   5.163 +    INIT_LIST_HEAD(&entry->list);
   5.164 +    INIT_RCU_HEAD(&entry->rcu);
   5.165 +    atomic_set(&entry->refcnt, 0);
   5.166 +
   5.167 +    len = pci_msix_get_table_len(pdev);
   5.168 +    entry->table_len = len;
   5.169 +    entry->pdev = pdev;
   5.170 +    entry->gtable = (unsigned long) gtable;
   5.171 +
   5.172 +    list_add_rcu(&entry->list, &d->arch.hvm_domain.msixtbl_list);
   5.173 +
   5.174 +    return entry;
   5.175 +}
   5.176 +
   5.177 +static void free_msixtbl_entry(struct rcu_head *rcu)
   5.178 +{
   5.179 +    struct msixtbl_entry *entry;
   5.180 +
   5.181 +    entry = container_of (rcu, struct msixtbl_entry, rcu);
   5.182 +
   5.183 +    xfree(entry);
   5.184 +}
   5.185 +
   5.186 +static void del_msixtbl_entry(struct msixtbl_entry *entry)
   5.187 +{
   5.188 +    list_del_rcu(&entry->list);
   5.189 +    call_rcu(&entry->rcu, free_msixtbl_entry);
   5.190 +}
   5.191 +
   5.192 +int msixtbl_pt_register(struct domain *d, int pirq, uint64_t gtable)
   5.193 +{
   5.194 +    irq_desc_t *irq_desc;
   5.195 +    struct msi_desc *msi_desc;
   5.196 +    struct pci_dev *pdev;
   5.197 +    struct msixtbl_entry *entry;
   5.198 +    int r = -EINVAL;
   5.199 +
   5.200 +    /* pcidevs_lock already held */
   5.201 +    irq_desc = domain_spin_lock_irq_desc(d, pirq, NULL);
   5.202 +
   5.203 +    if ( irq_desc->handler != &pci_msi_type )
   5.204 +        goto out;
   5.205 +
   5.206 +    msi_desc = irq_desc->msi_desc;
   5.207 +    if ( !msi_desc )
   5.208 +        goto out;
   5.209 +
   5.210 +    pdev = msi_desc->dev;
   5.211 +
   5.212 +    spin_lock(&d->arch.hvm_domain.msixtbl_list_lock);
   5.213 +
   5.214 +    list_for_each_entry( entry, &d->arch.hvm_domain.msixtbl_list, list )
   5.215 +        if ( pdev == entry->pdev )
   5.216 +            goto found;
   5.217 +
   5.218 +    entry = add_msixtbl_entry(d, pdev, gtable);
   5.219 +    if ( !entry )
   5.220 +    {
   5.221 +        spin_unlock(&d->arch.hvm_domain.msixtbl_list_lock);
   5.222 +        goto out;
   5.223 +    }
   5.224 +
   5.225 +found:
   5.226 +    atomic_inc(&entry->refcnt);
   5.227 +
   5.228 +    spin_unlock(&d->arch.hvm_domain.msixtbl_list_lock);
   5.229 +
   5.230 +out:
   5.231 +    spin_unlock_irq(&irq_desc->lock);
   5.232 +    return r;
   5.233 +
   5.234 +}
   5.235 +
   5.236 +void msixtbl_pt_unregister(struct domain *d, int pirq)
   5.237 +{
   5.238 +    irq_desc_t *irq_desc;
   5.239 +    struct msi_desc *msi_desc;
   5.240 +    struct pci_dev *pdev;
   5.241 +    struct msixtbl_entry *entry;
   5.242 +
   5.243 +    /* pcidevs_lock already held */
   5.244 +    irq_desc = domain_spin_lock_irq_desc(d, pirq, NULL);
   5.245 +
   5.246 +    if ( irq_desc->handler != &pci_msi_type )
   5.247 +        goto out;
   5.248 +
   5.249 +    msi_desc = irq_desc->msi_desc;
   5.250 +    if ( !msi_desc )
   5.251 +        goto out;
   5.252 +
   5.253 +    pdev = msi_desc->dev;
   5.254 +
   5.255 +    spin_lock(&d->arch.hvm_domain.msixtbl_list_lock);
   5.256 +
   5.257 +    list_for_each_entry( entry, &d->arch.hvm_domain.msixtbl_list, list )
   5.258 +        if ( pdev == entry->pdev )
   5.259 +            goto found;
   5.260 +
   5.261 +    spin_unlock(&d->arch.hvm_domain.msixtbl_list_lock);
   5.262 +
   5.263 +
   5.264 +out:
   5.265 +    spin_unlock(&irq_desc->lock);
   5.266 +    return;
   5.267 +
   5.268 +found:
   5.269 +    if ( !atomic_dec_and_test(&entry->refcnt) )
   5.270 +        del_msixtbl_entry(entry);
   5.271 +
   5.272 +    spin_unlock(&d->arch.hvm_domain.msixtbl_list_lock);
   5.273 +    spin_unlock(&irq_desc->lock);
   5.274 +}
   5.275 +void msixtbl_pt_cleanup(struct domain *d, int pirq)
   5.276 +{
   5.277 +    struct msixtbl_entry *entry, *temp;
   5.278 +
   5.279 +    spin_lock(&d->arch.hvm_domain.msixtbl_list_lock);
   5.280 +
   5.281 +    list_for_each_entry_safe( entry, temp,
   5.282 +                              &d->arch.hvm_domain.msixtbl_list, list )
   5.283 +        del_msixtbl_entry(entry);
   5.284 +
   5.285 +    spin_unlock(&d->arch.hvm_domain.msixtbl_list_lock);
   5.286 +}
     6.1 --- a/xen/arch/x86/msi.c	Mon Mar 02 10:23:50 2009 +0000
     6.2 +++ b/xen/arch/x86/msi.c	Mon Mar 02 10:26:37 2009 +0000
     6.3 @@ -839,3 +839,23 @@ int pci_restore_msi_state(struct pci_dev
     6.4      return 0;
     6.5  }
     6.6  
     6.7 +unsigned int pci_msix_get_table_len(struct pci_dev *pdev)
     6.8 +{
     6.9 +    int pos;
    6.10 +    u16 control;
    6.11 +    u8 bus, slot, func;
    6.12 +    unsigned int len;
    6.13 +
    6.14 +    bus = pdev->bus;
    6.15 +    slot = PCI_SLOT(pdev->devfn);
    6.16 +    func = PCI_FUNC(pdev->devfn);
    6.17 +
    6.18 +    pos = pci_find_cap_offset(bus, slot, func, PCI_CAP_ID_MSIX);
    6.19 +    if ( !pos )
    6.20 +        return 0;
    6.21 +
    6.22 +    control = pci_conf_read16(bus, slot, func, msix_control_reg(pos));
    6.23 +    len = msix_table_size(control) * PCI_MSIX_ENTRY_SIZE;
    6.24 +
    6.25 +    return len;
    6.26 +}
     7.1 --- a/xen/drivers/passthrough/io.c	Mon Mar 02 10:23:50 2009 +0000
     7.2 +++ b/xen/drivers/passthrough/io.c	Mon Mar 02 10:26:37 2009 +0000
     7.3 @@ -58,6 +58,9 @@ static void pt_irq_time_out(void *data)
     7.4      pirq_guest_eoi(irq_map->dom, machine_gsi);
     7.5  }
     7.6  
     7.7 +extern int msixtbl_pt_register(struct domain *d, int pirq, uint64_t gtable);
     7.8 +extern int msixtbl_pt_unregister(struct domain *d, int pirq);
     7.9 +
    7.10  int pt_irq_create_bind_vtd(
    7.11      struct domain *d, xen_domctl_bind_pt_irq_t *pt_irq_bind)
    7.12  {
    7.13 @@ -115,6 +118,8 @@ int pt_irq_create_bind_vtd(
    7.14                  spin_unlock(&d->event_lock);
    7.15                  return rc;
    7.16              }
    7.17 +            if ( pt_irq_bind->u.msi.gtable )
    7.18 +                msixtbl_pt_register(d, pirq, pt_irq_bind->u.msi.gtable);
    7.19          }
    7.20          else if (hvm_irq_dpci->mirq[pirq].gmsi.gvec != pt_irq_bind->u.msi.gvec
    7.21                  ||hvm_irq_dpci->msi_gvec_pirq[pt_irq_bind->u.msi.gvec] != pirq)
    7.22 @@ -259,6 +264,7 @@ int pt_irq_destroy_bind_vtd(
    7.23          if ( list_empty(&hvm_irq_dpci->mirq[machine_gsi].digl_list) )
    7.24          {
    7.25              pirq_guest_unbind(d, machine_gsi);
    7.26 +            msixtbl_pt_unregister(d, machine_gsi);
    7.27              if ( pt_irq_need_timer(hvm_irq_dpci->mirq[machine_gsi].flags) )
    7.28                  kill_timer(&hvm_irq_dpci->hvm_timer[domain_irq_to_vector(d, machine_gsi)]);
    7.29              hvm_irq_dpci->mirq[machine_gsi].dom   = NULL;
     8.1 --- a/xen/include/asm-x86/hvm/domain.h	Mon Mar 02 10:23:50 2009 +0000
     8.2 +++ b/xen/include/asm-x86/hvm/domain.h	Mon Mar 02 10:26:37 2009 +0000
     8.3 @@ -75,6 +75,10 @@ struct hvm_domain {
     8.4      /* Pass-through */
     8.5      struct hvm_iommu       hvm_iommu;
     8.6  
     8.7 +    /* hypervisor intercepted msix table */
     8.8 +    struct list_head       msixtbl_list;
     8.9 +    spinlock_t             msixtbl_list_lock;
    8.10 +
    8.11      struct viridian_domain viridian;
    8.12  
    8.13      bool_t                 hap_enabled;
     9.1 --- a/xen/include/asm-x86/msi.h	Mon Mar 02 10:23:50 2009 +0000
     9.2 +++ b/xen/include/asm-x86/msi.h	Mon Mar 02 10:26:37 2009 +0000
     9.3 @@ -81,6 +81,8 @@ extern void teardown_msi_vector(int vect
     9.4  extern int msi_free_vector(struct msi_desc *entry);
     9.5  extern int pci_restore_msi_state(struct pci_dev *pdev);
     9.6  
     9.7 +extern unsigned int pci_msix_get_table_len(struct pci_dev *pdev);
     9.8 +
     9.9  struct msi_desc {
    9.10  	struct {
    9.11  		__u8	type	: 5; 	/* {0: unused, 5h:MSI, 11h:MSI-X} */
    10.1 --- a/xen/include/public/domctl.h	Mon Mar 02 10:23:50 2009 +0000
    10.2 +++ b/xen/include/public/domctl.h	Mon Mar 02 10:26:37 2009 +0000
    10.3 @@ -485,6 +485,7 @@ struct xen_domctl_bind_pt_irq {
    10.4          struct {
    10.5              uint8_t gvec;
    10.6              uint32_t gflags;
    10.7 +            uint64_t gtable;
    10.8          } msi;
    10.9      } u;
   10.10  };
    11.1 --- a/xen/include/xen/pci.h	Mon Mar 02 10:23:50 2009 +0000
    11.2 +++ b/xen/include/xen/pci.h	Mon Mar 02 10:26:37 2009 +0000
    11.3 @@ -29,7 +29,8 @@
    11.4  #define PCI_BDF(b,d,f)  ((((b) & 0xff) << 8) | PCI_DEVFN(d,f))
    11.5  #define PCI_BDF2(b,df)  ((((b) & 0xff) << 8) | ((df) & 0xff))
    11.6  
    11.7 -#define MAX_MSIX_TABLE_PAGES    8    /* 2048 entries */
    11.8 +#define MAX_MSIX_TABLE_ENTRIES  2048
    11.9 +#define MAX_MSIX_TABLE_PAGES    8
   11.10  struct pci_dev {
   11.11      struct list_head alldevs_list;
   11.12      struct list_head domain_list;