ia64/xen-unstable
changeset 19248:9bc5799566be
hvm: passthrough MSI-X mask bit acceleration
Add a new parameter to DOMCTL_bind_pt_irq to allow Xen to know the
guest physical address of MSI-X table. Also add a new MMIO intercept
handler to intercept that gpa in order to handle MSI-X vector mask
bit operation in the hypervisor. This reduces the load of device model
considerably if the guest does mask and unmask frequently
Signed-off-by: Qing He <qing.he@intel.com>
Add a new parameter to DOMCTL_bind_pt_irq to allow Xen to know the
guest physical address of MSI-X table. Also add a new MMIO intercept
handler to intercept that gpa in order to handle MSI-X vector mask
bit operation in the hypervisor. This reduces the load of device model
considerably if the guest does mask and unmask frequently
Signed-off-by: Qing He <qing.he@intel.com>
author | Keir Fraser <keir.fraser@citrix.com> |
---|---|
date | Mon Mar 02 10:26:37 2009 +0000 (2009-03-02) |
parents | d0df93e627bc |
children | 5255784eb0d7 |
files | tools/libxc/xc_domain.c tools/libxc/xenctrl.h xen/arch/x86/hvm/hvm.c xen/arch/x86/hvm/intercept.c xen/arch/x86/hvm/vmsi.c xen/arch/x86/msi.c xen/drivers/passthrough/io.c xen/include/asm-x86/hvm/domain.h xen/include/asm-x86/msi.h xen/include/public/domctl.h xen/include/xen/pci.h |
line diff
1.1 --- a/tools/libxc/xc_domain.c Mon Mar 02 10:23:50 2009 +0000 1.2 +++ b/tools/libxc/xc_domain.c Mon Mar 02 10:26:37 2009 +0000 1.3 @@ -920,7 +920,8 @@ int xc_domain_update_msi_irq( 1.4 uint32_t domid, 1.5 uint32_t gvec, 1.6 uint32_t pirq, 1.7 - uint32_t gflags) 1.8 + uint32_t gflags, 1.9 + uint64_t gtable) 1.10 { 1.11 int rc; 1.12 xen_domctl_bind_pt_irq_t *bind; 1.13 @@ -936,6 +937,7 @@ int xc_domain_update_msi_irq( 1.14 bind->machine_irq = pirq; 1.15 bind->u.msi.gvec = gvec; 1.16 bind->u.msi.gflags = gflags; 1.17 + bind->u.msi.gtable = gtable; 1.18 1.19 rc = do_domctl(xc_handle, &domctl); 1.20 return rc;
2.1 --- a/tools/libxc/xenctrl.h Mon Mar 02 10:23:50 2009 +0000 2.2 +++ b/tools/libxc/xenctrl.h Mon Mar 02 10:26:37 2009 +0000 2.3 @@ -1092,7 +1092,8 @@ int xc_domain_update_msi_irq( 2.4 uint32_t domid, 2.5 uint32_t gvec, 2.6 uint32_t pirq, 2.7 - uint32_t gflags); 2.8 + uint32_t gflags, 2.9 + uint64_t gtable); 2.10 2.11 int xc_domain_unbind_msi_irq(int xc_handle, 2.12 uint32_t domid,
3.1 --- a/xen/arch/x86/hvm/hvm.c Mon Mar 02 10:23:50 2009 +0000 3.2 +++ b/xen/arch/x86/hvm/hvm.c Mon Mar 02 10:26:37 2009 +0000 3.3 @@ -308,6 +308,9 @@ int hvm_domain_initialise(struct domain 3.4 spin_lock_init(&d->arch.hvm_domain.irq_lock); 3.5 spin_lock_init(&d->arch.hvm_domain.uc_lock); 3.6 3.7 + INIT_LIST_HEAD(&d->arch.hvm_domain.msixtbl_list); 3.8 + spin_lock_init(&d->arch.hvm_domain.msixtbl_list_lock); 3.9 + 3.10 hvm_init_guest_time(d); 3.11 3.12 d->arch.hvm_domain.params[HVM_PARAM_HPET_ENABLED] = 1; 3.13 @@ -348,11 +351,15 @@ int hvm_domain_initialise(struct domain 3.14 return rc; 3.15 } 3.16 3.17 +extern void msixtbl_pt_cleanup(struct domain *d); 3.18 + 3.19 void hvm_domain_relinquish_resources(struct domain *d) 3.20 { 3.21 hvm_destroy_ioreq_page(d, &d->arch.hvm_domain.ioreq); 3.22 hvm_destroy_ioreq_page(d, &d->arch.hvm_domain.buf_ioreq); 3.23 3.24 + msixtbl_pt_cleanup(d); 3.25 + 3.26 /* Stop all asynchronous timer actions. */ 3.27 rtc_deinit(d); 3.28 if ( d->vcpu[0] != NULL )
4.1 --- a/xen/arch/x86/hvm/intercept.c Mon Mar 02 10:23:50 2009 +0000 4.2 +++ b/xen/arch/x86/hvm/intercept.c Mon Mar 02 10:26:37 2009 +0000 4.3 @@ -35,14 +35,16 @@ 4.4 extern struct hvm_mmio_handler hpet_mmio_handler; 4.5 extern struct hvm_mmio_handler vlapic_mmio_handler; 4.6 extern struct hvm_mmio_handler vioapic_mmio_handler; 4.7 +extern struct hvm_mmio_handler msixtbl_mmio_handler; 4.8 4.9 -#define HVM_MMIO_HANDLER_NR 3 4.10 +#define HVM_MMIO_HANDLER_NR 4 4.11 4.12 static struct hvm_mmio_handler *hvm_mmio_handlers[HVM_MMIO_HANDLER_NR] = 4.13 { 4.14 &hpet_mmio_handler, 4.15 &vlapic_mmio_handler, 4.16 - &vioapic_mmio_handler 4.17 + &vioapic_mmio_handler, 4.18 + &msixtbl_mmio_handler 4.19 }; 4.20 4.21 static int hvm_mmio_access(struct vcpu *v,
5.1 --- a/xen/arch/x86/hvm/vmsi.c Mon Mar 02 10:23:50 2009 +0000 5.2 +++ b/xen/arch/x86/hvm/vmsi.c Mon Mar 02 10:26:37 2009 +0000 5.3 @@ -193,3 +193,283 @@ int vmsi_deliver(struct domain *d, int p 5.4 return 1; 5.5 } 5.6 5.7 +/* MSI-X mask bit hypervisor interception */ 5.8 +struct msixtbl_entry 5.9 +{ 5.10 + struct list_head list; 5.11 + atomic_t refcnt; /* how many bind_pt_irq called for the device */ 5.12 + 5.13 + /* TODO: resolve the potential race by destruction of pdev */ 5.14 + struct pci_dev *pdev; 5.15 + unsigned long gtable; /* gpa of msix table */ 5.16 + unsigned long table_len; 5.17 + unsigned long table_flags[MAX_MSIX_TABLE_ENTRIES / BITS_PER_LONG + 1]; 5.18 + 5.19 + struct rcu_head rcu; 5.20 +}; 5.21 + 5.22 +static struct msixtbl_entry *msixtbl_find_entry( 5.23 + struct vcpu *v, unsigned long addr) 5.24 +{ 5.25 + struct msixtbl_entry *entry; 5.26 + struct domain *d = v->domain; 5.27 + 5.28 + list_for_each_entry( entry, &d->arch.hvm_domain.msixtbl_list, list ) 5.29 + if ( addr >= entry->gtable && 5.30 + addr < entry->gtable + entry->table_len ) 5.31 + return entry; 5.32 + 5.33 + return NULL; 5.34 +} 5.35 + 5.36 +static void __iomem *msixtbl_addr_to_virt( 5.37 + struct msixtbl_entry *entry, unsigned long addr) 5.38 +{ 5.39 + int idx, nr_page; 5.40 + 5.41 + if ( !entry ) 5.42 + return NULL; 5.43 + 5.44 + nr_page = (addr >> PAGE_SHIFT) - 5.45 + (entry->gtable >> PAGE_SHIFT); 5.46 + 5.47 + if ( !entry->pdev ) 5.48 + return NULL; 5.49 + 5.50 + idx = entry->pdev->msix_table_idx[nr_page]; 5.51 + if ( !idx ) 5.52 + return NULL; 5.53 + 5.54 + return (void *)(fix_to_virt(idx) + 5.55 + (addr & ((1UL << PAGE_SHIFT) - 1))); 5.56 +} 5.57 + 5.58 +static int msixtbl_read( 5.59 + struct vcpu *v, unsigned long address, 5.60 + unsigned long len, unsigned long *pval) 5.61 +{ 5.62 + unsigned long offset; 5.63 + struct msixtbl_entry *entry; 5.64 + void *virt; 5.65 + int r = X86EMUL_UNHANDLEABLE; 5.66 + 5.67 + rcu_read_lock(); 5.68 + 5.69 + if ( len != 4 ) 5.70 + goto out; 5.71 + 5.72 + offset = address & (PCI_MSIX_ENTRY_SIZE - 1); 5.73 + if ( offset != PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET) 5.74 + goto out; 5.75 + 5.76 + entry = msixtbl_find_entry(v, address); 5.77 + virt = msixtbl_addr_to_virt(entry, address); 5.78 + if ( !virt ) 5.79 + goto out; 5.80 + 5.81 + *pval = readl(virt); 5.82 + r = X86EMUL_OKAY; 5.83 + 5.84 +out: 5.85 + rcu_read_unlock(); 5.86 + return r; 5.87 +} 5.88 + 5.89 +static int msixtbl_write(struct vcpu *v, unsigned long address, 5.90 + unsigned long len, unsigned long val) 5.91 +{ 5.92 + unsigned long offset; 5.93 + struct msixtbl_entry *entry; 5.94 + void *virt; 5.95 + int nr_entry; 5.96 + int r = X86EMUL_UNHANDLEABLE; 5.97 + 5.98 + rcu_read_lock(); 5.99 + 5.100 + if ( len != 4 ) 5.101 + goto out; 5.102 + 5.103 + entry = msixtbl_find_entry(v, address); 5.104 + nr_entry = (address - entry->gtable) % PCI_MSIX_ENTRY_SIZE; 5.105 + 5.106 + offset = address & (PCI_MSIX_ENTRY_SIZE - 1); 5.107 + if ( offset != PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET) 5.108 + { 5.109 + set_bit(nr_entry, &entry->table_flags); 5.110 + goto out; 5.111 + } 5.112 + 5.113 + /* exit to device model if address/data has been modified */ 5.114 + if ( test_and_clear_bit(nr_entry, &entry->table_flags) ) 5.115 + goto out; 5.116 + 5.117 + virt = msixtbl_addr_to_virt(entry, address); 5.118 + if ( !virt ) 5.119 + goto out; 5.120 + 5.121 + writel(val, virt); 5.122 + r = X86EMUL_OKAY; 5.123 + 5.124 +out: 5.125 + rcu_read_unlock(); 5.126 + return r; 5.127 +} 5.128 + 5.129 +static int msixtbl_range(struct vcpu *v, unsigned long addr) 5.130 +{ 5.131 + struct msixtbl_entry *entry; 5.132 + void *virt; 5.133 + 5.134 + rcu_read_lock(); 5.135 + 5.136 + entry = msixtbl_find_entry(v, addr); 5.137 + virt = msixtbl_addr_to_virt(entry, addr); 5.138 + 5.139 + rcu_read_unlock(); 5.140 + 5.141 + return !!virt; 5.142 +} 5.143 + 5.144 +struct hvm_mmio_handler msixtbl_mmio_handler = { 5.145 + .check_handler = msixtbl_range, 5.146 + .read_handler = msixtbl_read, 5.147 + .write_handler = msixtbl_write 5.148 +}; 5.149 + 5.150 +static struct msixtbl_entry *add_msixtbl_entry(struct domain *d, 5.151 + struct pci_dev *pdev, 5.152 + uint64_t gtable) 5.153 +{ 5.154 + struct msixtbl_entry *entry; 5.155 + u32 len; 5.156 + 5.157 + entry = xmalloc(struct msixtbl_entry); 5.158 + if ( !entry ) 5.159 + return NULL; 5.160 + 5.161 + memset(entry, 0, sizeof(struct msixtbl_entry)); 5.162 + 5.163 + INIT_LIST_HEAD(&entry->list); 5.164 + INIT_RCU_HEAD(&entry->rcu); 5.165 + atomic_set(&entry->refcnt, 0); 5.166 + 5.167 + len = pci_msix_get_table_len(pdev); 5.168 + entry->table_len = len; 5.169 + entry->pdev = pdev; 5.170 + entry->gtable = (unsigned long) gtable; 5.171 + 5.172 + list_add_rcu(&entry->list, &d->arch.hvm_domain.msixtbl_list); 5.173 + 5.174 + return entry; 5.175 +} 5.176 + 5.177 +static void free_msixtbl_entry(struct rcu_head *rcu) 5.178 +{ 5.179 + struct msixtbl_entry *entry; 5.180 + 5.181 + entry = container_of (rcu, struct msixtbl_entry, rcu); 5.182 + 5.183 + xfree(entry); 5.184 +} 5.185 + 5.186 +static void del_msixtbl_entry(struct msixtbl_entry *entry) 5.187 +{ 5.188 + list_del_rcu(&entry->list); 5.189 + call_rcu(&entry->rcu, free_msixtbl_entry); 5.190 +} 5.191 + 5.192 +int msixtbl_pt_register(struct domain *d, int pirq, uint64_t gtable) 5.193 +{ 5.194 + irq_desc_t *irq_desc; 5.195 + struct msi_desc *msi_desc; 5.196 + struct pci_dev *pdev; 5.197 + struct msixtbl_entry *entry; 5.198 + int r = -EINVAL; 5.199 + 5.200 + /* pcidevs_lock already held */ 5.201 + irq_desc = domain_spin_lock_irq_desc(d, pirq, NULL); 5.202 + 5.203 + if ( irq_desc->handler != &pci_msi_type ) 5.204 + goto out; 5.205 + 5.206 + msi_desc = irq_desc->msi_desc; 5.207 + if ( !msi_desc ) 5.208 + goto out; 5.209 + 5.210 + pdev = msi_desc->dev; 5.211 + 5.212 + spin_lock(&d->arch.hvm_domain.msixtbl_list_lock); 5.213 + 5.214 + list_for_each_entry( entry, &d->arch.hvm_domain.msixtbl_list, list ) 5.215 + if ( pdev == entry->pdev ) 5.216 + goto found; 5.217 + 5.218 + entry = add_msixtbl_entry(d, pdev, gtable); 5.219 + if ( !entry ) 5.220 + { 5.221 + spin_unlock(&d->arch.hvm_domain.msixtbl_list_lock); 5.222 + goto out; 5.223 + } 5.224 + 5.225 +found: 5.226 + atomic_inc(&entry->refcnt); 5.227 + 5.228 + spin_unlock(&d->arch.hvm_domain.msixtbl_list_lock); 5.229 + 5.230 +out: 5.231 + spin_unlock_irq(&irq_desc->lock); 5.232 + return r; 5.233 + 5.234 +} 5.235 + 5.236 +void msixtbl_pt_unregister(struct domain *d, int pirq) 5.237 +{ 5.238 + irq_desc_t *irq_desc; 5.239 + struct msi_desc *msi_desc; 5.240 + struct pci_dev *pdev; 5.241 + struct msixtbl_entry *entry; 5.242 + 5.243 + /* pcidevs_lock already held */ 5.244 + irq_desc = domain_spin_lock_irq_desc(d, pirq, NULL); 5.245 + 5.246 + if ( irq_desc->handler != &pci_msi_type ) 5.247 + goto out; 5.248 + 5.249 + msi_desc = irq_desc->msi_desc; 5.250 + if ( !msi_desc ) 5.251 + goto out; 5.252 + 5.253 + pdev = msi_desc->dev; 5.254 + 5.255 + spin_lock(&d->arch.hvm_domain.msixtbl_list_lock); 5.256 + 5.257 + list_for_each_entry( entry, &d->arch.hvm_domain.msixtbl_list, list ) 5.258 + if ( pdev == entry->pdev ) 5.259 + goto found; 5.260 + 5.261 + spin_unlock(&d->arch.hvm_domain.msixtbl_list_lock); 5.262 + 5.263 + 5.264 +out: 5.265 + spin_unlock(&irq_desc->lock); 5.266 + return; 5.267 + 5.268 +found: 5.269 + if ( !atomic_dec_and_test(&entry->refcnt) ) 5.270 + del_msixtbl_entry(entry); 5.271 + 5.272 + spin_unlock(&d->arch.hvm_domain.msixtbl_list_lock); 5.273 + spin_unlock(&irq_desc->lock); 5.274 +} 5.275 +void msixtbl_pt_cleanup(struct domain *d, int pirq) 5.276 +{ 5.277 + struct msixtbl_entry *entry, *temp; 5.278 + 5.279 + spin_lock(&d->arch.hvm_domain.msixtbl_list_lock); 5.280 + 5.281 + list_for_each_entry_safe( entry, temp, 5.282 + &d->arch.hvm_domain.msixtbl_list, list ) 5.283 + del_msixtbl_entry(entry); 5.284 + 5.285 + spin_unlock(&d->arch.hvm_domain.msixtbl_list_lock); 5.286 +}
6.1 --- a/xen/arch/x86/msi.c Mon Mar 02 10:23:50 2009 +0000 6.2 +++ b/xen/arch/x86/msi.c Mon Mar 02 10:26:37 2009 +0000 6.3 @@ -839,3 +839,23 @@ int pci_restore_msi_state(struct pci_dev 6.4 return 0; 6.5 } 6.6 6.7 +unsigned int pci_msix_get_table_len(struct pci_dev *pdev) 6.8 +{ 6.9 + int pos; 6.10 + u16 control; 6.11 + u8 bus, slot, func; 6.12 + unsigned int len; 6.13 + 6.14 + bus = pdev->bus; 6.15 + slot = PCI_SLOT(pdev->devfn); 6.16 + func = PCI_FUNC(pdev->devfn); 6.17 + 6.18 + pos = pci_find_cap_offset(bus, slot, func, PCI_CAP_ID_MSIX); 6.19 + if ( !pos ) 6.20 + return 0; 6.21 + 6.22 + control = pci_conf_read16(bus, slot, func, msix_control_reg(pos)); 6.23 + len = msix_table_size(control) * PCI_MSIX_ENTRY_SIZE; 6.24 + 6.25 + return len; 6.26 +}
7.1 --- a/xen/drivers/passthrough/io.c Mon Mar 02 10:23:50 2009 +0000 7.2 +++ b/xen/drivers/passthrough/io.c Mon Mar 02 10:26:37 2009 +0000 7.3 @@ -58,6 +58,9 @@ static void pt_irq_time_out(void *data) 7.4 pirq_guest_eoi(irq_map->dom, machine_gsi); 7.5 } 7.6 7.7 +extern int msixtbl_pt_register(struct domain *d, int pirq, uint64_t gtable); 7.8 +extern int msixtbl_pt_unregister(struct domain *d, int pirq); 7.9 + 7.10 int pt_irq_create_bind_vtd( 7.11 struct domain *d, xen_domctl_bind_pt_irq_t *pt_irq_bind) 7.12 { 7.13 @@ -115,6 +118,8 @@ int pt_irq_create_bind_vtd( 7.14 spin_unlock(&d->event_lock); 7.15 return rc; 7.16 } 7.17 + if ( pt_irq_bind->u.msi.gtable ) 7.18 + msixtbl_pt_register(d, pirq, pt_irq_bind->u.msi.gtable); 7.19 } 7.20 else if (hvm_irq_dpci->mirq[pirq].gmsi.gvec != pt_irq_bind->u.msi.gvec 7.21 ||hvm_irq_dpci->msi_gvec_pirq[pt_irq_bind->u.msi.gvec] != pirq) 7.22 @@ -259,6 +264,7 @@ int pt_irq_destroy_bind_vtd( 7.23 if ( list_empty(&hvm_irq_dpci->mirq[machine_gsi].digl_list) ) 7.24 { 7.25 pirq_guest_unbind(d, machine_gsi); 7.26 + msixtbl_pt_unregister(d, machine_gsi); 7.27 if ( pt_irq_need_timer(hvm_irq_dpci->mirq[machine_gsi].flags) ) 7.28 kill_timer(&hvm_irq_dpci->hvm_timer[domain_irq_to_vector(d, machine_gsi)]); 7.29 hvm_irq_dpci->mirq[machine_gsi].dom = NULL;
8.1 --- a/xen/include/asm-x86/hvm/domain.h Mon Mar 02 10:23:50 2009 +0000 8.2 +++ b/xen/include/asm-x86/hvm/domain.h Mon Mar 02 10:26:37 2009 +0000 8.3 @@ -75,6 +75,10 @@ struct hvm_domain { 8.4 /* Pass-through */ 8.5 struct hvm_iommu hvm_iommu; 8.6 8.7 + /* hypervisor intercepted msix table */ 8.8 + struct list_head msixtbl_list; 8.9 + spinlock_t msixtbl_list_lock; 8.10 + 8.11 struct viridian_domain viridian; 8.12 8.13 bool_t hap_enabled;
9.1 --- a/xen/include/asm-x86/msi.h Mon Mar 02 10:23:50 2009 +0000 9.2 +++ b/xen/include/asm-x86/msi.h Mon Mar 02 10:26:37 2009 +0000 9.3 @@ -81,6 +81,8 @@ extern void teardown_msi_vector(int vect 9.4 extern int msi_free_vector(struct msi_desc *entry); 9.5 extern int pci_restore_msi_state(struct pci_dev *pdev); 9.6 9.7 +extern unsigned int pci_msix_get_table_len(struct pci_dev *pdev); 9.8 + 9.9 struct msi_desc { 9.10 struct { 9.11 __u8 type : 5; /* {0: unused, 5h:MSI, 11h:MSI-X} */
10.1 --- a/xen/include/public/domctl.h Mon Mar 02 10:23:50 2009 +0000 10.2 +++ b/xen/include/public/domctl.h Mon Mar 02 10:26:37 2009 +0000 10.3 @@ -485,6 +485,7 @@ struct xen_domctl_bind_pt_irq { 10.4 struct { 10.5 uint8_t gvec; 10.6 uint32_t gflags; 10.7 + uint64_t gtable; 10.8 } msi; 10.9 } u; 10.10 };
11.1 --- a/xen/include/xen/pci.h Mon Mar 02 10:23:50 2009 +0000 11.2 +++ b/xen/include/xen/pci.h Mon Mar 02 10:26:37 2009 +0000 11.3 @@ -29,7 +29,8 @@ 11.4 #define PCI_BDF(b,d,f) ((((b) & 0xff) << 8) | PCI_DEVFN(d,f)) 11.5 #define PCI_BDF2(b,df) ((((b) & 0xff) << 8) | ((df) & 0xff)) 11.6 11.7 -#define MAX_MSIX_TABLE_PAGES 8 /* 2048 entries */ 11.8 +#define MAX_MSIX_TABLE_ENTRIES 2048 11.9 +#define MAX_MSIX_TABLE_PAGES 8 11.10 struct pci_dev { 11.11 struct list_head alldevs_list; 11.12 struct list_head domain_list;