ia64/xen-unstable
changeset 15916:f4bbd3f327e4
Intel vt-d specific changes in arch/x86/hvm/vmx/vtd.
Signed-off-by: Allen Kay <allen.m.kay@intel.com>
Signed-off-by: Guy Zana <guy@neocleus.com>
Signed-off-by: Allen Kay <allen.m.kay@intel.com>
Signed-off-by: Guy Zana <guy@neocleus.com>
author | kfraser@localhost.localdomain |
---|---|
date | Fri Sep 14 16:40:49 2007 +0100 (2007-09-14) |
parents | acfa9290746f |
children | babe17e7a4ee |
files | xen/arch/x86/hvm/vmx/vtd/Makefile xen/arch/x86/hvm/vmx/vtd/dmar.c xen/arch/x86/hvm/vmx/vtd/dmar.h xen/arch/x86/hvm/vmx/vtd/intel-iommu.c xen/arch/x86/hvm/vmx/vtd/io.c xen/arch/x86/hvm/vmx/vtd/msi.h xen/arch/x86/hvm/vmx/vtd/pci-direct.h xen/arch/x86/hvm/vmx/vtd/pci_regs.h xen/arch/x86/hvm/vmx/vtd/utils.c |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/xen/arch/x86/hvm/vmx/vtd/Makefile Fri Sep 14 16:40:49 2007 +0100 1.3 @@ -0,0 +1,4 @@ 1.4 +obj-y += intel-iommu.o 1.5 +obj-y += dmar.o 1.6 +obj-y += utils.o 1.7 +obj-y += io.o
2.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 2.2 +++ b/xen/arch/x86/hvm/vmx/vtd/dmar.c Fri Sep 14 16:40:49 2007 +0100 2.3 @@ -0,0 +1,494 @@ 2.4 +/* 2.5 + * Copyright (c) 2006, Intel Corporation. 2.6 + * 2.7 + * This program is free software; you can redistribute it and/or modify it 2.8 + * under the terms and conditions of the GNU General Public License, 2.9 + * version 2, as published by the Free Software Foundation. 2.10 + * 2.11 + * This program is distributed in the hope it will be useful, but WITHOUT 2.12 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 2.13 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 2.14 + * more details. 2.15 + * 2.16 + * You should have received a copy of the GNU General Public License along with 2.17 + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple 2.18 + * Place - Suite 330, Boston, MA 02111-1307 USA. 2.19 + * 2.20 + * Copyright (C) Ashok Raj <ashok.raj@intel.com> 2.21 + * Copyright (C) Shaohua Li <shaohua.li@intel.com> 2.22 + * Copyright (C) Allen Kay <allen.m.kay@intel.com> - adapted to xen 2.23 + */ 2.24 + 2.25 +#include <xen/init.h> 2.26 +#include <xen/bitmap.h> 2.27 +#include <xen/kernel.h> 2.28 +#include <xen/acpi.h> 2.29 +#include <xen/mm.h> 2.30 +#include <xen/xmalloc.h> 2.31 +#include <asm/string.h> 2.32 +#include "dmar.h" 2.33 +#include "pci-direct.h" 2.34 +#include "pci_regs.h" 2.35 + 2.36 +#undef PREFIX 2.37 +#define PREFIX VTDPREFIX "ACPI DMAR:" 2.38 +#define DEBUG 2.39 + 2.40 +#define MIN_SCOPE_LEN (sizeof(struct acpi_pci_path) + sizeof(struct acpi_dev_scope)) 2.41 + 2.42 +LIST_HEAD(acpi_drhd_units); 2.43 +LIST_HEAD(acpi_rmrr_units); 2.44 +LIST_HEAD(acpi_atsr_units); 2.45 +LIST_HEAD(acpi_ioapic_units); 2.46 + 2.47 +u8 dmar_host_address_width; 2.48 + 2.49 +static int __init acpi_register_drhd_unit(struct acpi_drhd_unit *drhd) 2.50 +{ 2.51 + /* 2.52 + * add INCLUDE_ALL at the tail, so scan the list will find it at 2.53 + * the very end. 2.54 + */ 2.55 + if (drhd->include_all) 2.56 + list_add_tail(&drhd->list, &acpi_drhd_units); 2.57 + else 2.58 + list_add(&drhd->list, &acpi_drhd_units); 2.59 + return 0; 2.60 +} 2.61 + 2.62 +static int __init acpi_register_rmrr_unit(struct acpi_rmrr_unit *rmrr) 2.63 +{ 2.64 + list_add(&rmrr->list, &acpi_rmrr_units); 2.65 + return 0; 2.66 +} 2.67 + 2.68 +static int acpi_pci_device_match(struct pci_dev *devices, int cnt, 2.69 + struct pci_dev *dev) 2.70 +{ 2.71 + int i; 2.72 + 2.73 + for (i = 0; i < cnt; i++) { 2.74 + if ((dev->bus == devices->bus) && 2.75 + (dev->devfn == devices->devfn)) 2.76 + return 1; 2.77 + devices++; 2.78 + } 2.79 + return 0; 2.80 +} 2.81 + 2.82 +static int __init acpi_register_atsr_unit(struct acpi_atsr_unit *atsr) 2.83 +{ 2.84 + /* 2.85 + * add ALL_PORTS at the tail, so scan the list will find it at 2.86 + * the very end. 2.87 + */ 2.88 + if (atsr->all_ports) 2.89 + list_add_tail(&atsr->list, &acpi_atsr_units); 2.90 + else 2.91 + list_add(&atsr->list, &acpi_atsr_units); 2.92 + return 0; 2.93 +} 2.94 + 2.95 +struct acpi_drhd_unit * acpi_find_matched_drhd_unit(struct pci_dev *dev) 2.96 +{ 2.97 + struct acpi_drhd_unit *drhd; 2.98 + struct acpi_drhd_unit *include_all_drhd; 2.99 + 2.100 + include_all_drhd = NULL; 2.101 + list_for_each_entry(drhd, &acpi_drhd_units, list) { 2.102 + if (drhd->include_all) 2.103 + include_all_drhd = drhd; 2.104 + if (acpi_pci_device_match(drhd->devices, 2.105 + drhd->devices_cnt, dev)) 2.106 + { 2.107 + gdprintk(XENLOG_INFO VTDPREFIX, 2.108 + "acpi_find_matched_drhd_unit: drhd->address = %lx\n", 2.109 + drhd->address); 2.110 + return drhd; 2.111 + } 2.112 + } 2.113 + 2.114 + if (include_all_drhd) { 2.115 + gdprintk(XENLOG_INFO VTDPREFIX, 2.116 + "acpi_find_matched_drhd_unit:include_all_drhd->addr = %lx\n", 2.117 + include_all_drhd->address); 2.118 + return include_all_drhd;; 2.119 + } 2.120 + 2.121 + return(NULL); 2.122 +} 2.123 + 2.124 +struct acpi_rmrr_unit * acpi_find_matched_rmrr_unit(struct pci_dev *dev) 2.125 +{ 2.126 + struct acpi_rmrr_unit *rmrr; 2.127 + 2.128 + list_for_each_entry(rmrr, &acpi_rmrr_units, list) { 2.129 + if (acpi_pci_device_match(rmrr->devices, 2.130 + rmrr->devices_cnt, dev)) 2.131 + goto out; 2.132 + } 2.133 + rmrr = NULL; 2.134 +out: 2.135 + return rmrr; 2.136 +} 2.137 + 2.138 +struct acpi_atsr_unit * acpi_find_matched_atsr_unit(struct pci_dev *dev) 2.139 +{ 2.140 + struct acpi_atsr_unit *atsru; 2.141 + struct acpi_atsr_unit *all_ports_atsru; 2.142 + 2.143 + all_ports_atsru = NULL; 2.144 + list_for_each_entry(atsru, &acpi_atsr_units, list) { 2.145 + if (atsru->all_ports) 2.146 + all_ports_atsru = atsru; 2.147 + if (acpi_pci_device_match(atsru->devices, atsru->devices_cnt, dev)) 2.148 + return atsru; 2.149 + } 2.150 + if (all_ports_atsru) { 2.151 + gdprintk(XENLOG_INFO VTDPREFIX, 2.152 + "acpi_find_matched_atsr_unit: all_ports_atsru\n"); 2.153 + return all_ports_atsru;; 2.154 + } 2.155 + return(NULL); 2.156 +} 2.157 + 2.158 +static int __init acpi_parse_dev_scope(void *start, void *end, int *cnt, 2.159 + struct pci_dev **devices) 2.160 +{ 2.161 + struct acpi_dev_scope *scope; 2.162 + u8 bus, sub_bus, sec_bus; 2.163 + struct acpi_pci_path *path; 2.164 + struct acpi_ioapic_unit *acpi_ioapic_unit = NULL; 2.165 + int count, dev_count=0; 2.166 + struct pci_dev *pdev; 2.167 + u8 dev, func; 2.168 + u32 l; 2.169 + void *tmp; 2.170 + 2.171 + *cnt = 0; 2.172 + tmp = start; 2.173 + while (start < end) { 2.174 + scope = start; 2.175 + if (scope->length < MIN_SCOPE_LEN || 2.176 + (scope->dev_type != ACPI_DEV_ENDPOINT && 2.177 + scope->dev_type != ACPI_DEV_P2PBRIDGE)) { 2.178 + printk(KERN_WARNING PREFIX "Invalid device scope\n"); 2.179 + return -EINVAL; 2.180 + } 2.181 + (*cnt)++; 2.182 + start += scope->length; 2.183 + } 2.184 + 2.185 + start = tmp; 2.186 + while (start < end) { 2.187 + scope = start; 2.188 + path = (struct acpi_pci_path *)(scope + 1); 2.189 + count = (scope->length - sizeof(struct acpi_dev_scope)) 2.190 + /sizeof(struct acpi_pci_path); 2.191 + bus = scope->start_bus; 2.192 + 2.193 + while (--count) { 2.194 + bus = read_pci_config_byte(bus, path->dev, 2.195 + path->fn, PCI_SECONDARY_BUS); 2.196 + path++; 2.197 + } 2.198 + 2.199 + if (scope->dev_type == ACPI_DEV_ENDPOINT) { 2.200 + printk(KERN_WARNING PREFIX 2.201 + "found endpoint: bdf = %x:%x:%x\n", bus, path->dev, path->fn); 2.202 + dev_count++; 2.203 + } else if (scope->dev_type == ACPI_DEV_P2PBRIDGE) { 2.204 + printk(KERN_WARNING PREFIX 2.205 + "found bridge: bdf = %x:%x:%x\n", bus, path->dev, path->fn); 2.206 + 2.207 + sec_bus = read_pci_config_byte(bus, path->dev, 2.208 + path->fn, PCI_SECONDARY_BUS); 2.209 + sub_bus = read_pci_config_byte(bus, path->dev, 2.210 + path->fn, PCI_SUBORDINATE_BUS); 2.211 + while (sec_bus <= sub_bus) { 2.212 + for (dev = 0; dev < 32; dev++) { 2.213 + for (func = 0; func < 8; func++) { 2.214 + l = read_pci_config(sec_bus, dev, func, PCI_VENDOR_ID); 2.215 + 2.216 + /* some broken boards return 0 or ~0 if a slot is empty: */ 2.217 + if (l == 0xffffffff || l == 0x00000000 || 2.218 + l == 0x0000ffff || l == 0xffff0000) 2.219 + break; 2.220 + dev_count++; 2.221 + } 2.222 + } 2.223 + sec_bus++; 2.224 + } 2.225 + } else if (scope->dev_type == ACPI_DEV_IOAPIC) { 2.226 + printk(KERN_WARNING PREFIX 2.227 + "found IOAPIC: bdf = %x:%x:%x\n", bus, path->dev, path->fn); 2.228 + dev_count++; 2.229 + } else { 2.230 + printk(KERN_WARNING PREFIX 2.231 + "found MSI HPET: bdf = %x:%x:%x\n", bus, path->dev, path->fn); 2.232 + dev_count++; 2.233 + } 2.234 + 2.235 + start += scope->length; 2.236 + } 2.237 + 2.238 + *cnt = dev_count; 2.239 + *devices = xmalloc_array(struct pci_dev, *cnt); 2.240 + if (!*devices) 2.241 + return -ENOMEM; 2.242 + memset(*devices, 0, sizeof(struct pci_dev) * (*cnt)); 2.243 + 2.244 + pdev = *devices; 2.245 + start = tmp; 2.246 + while (start < end) { 2.247 + scope = start; 2.248 + path = (struct acpi_pci_path *)(scope + 1); 2.249 + count = (scope->length - sizeof(struct acpi_dev_scope)) 2.250 + /sizeof(struct acpi_pci_path); 2.251 + bus = scope->start_bus; 2.252 + 2.253 + while (--count) { 2.254 + bus = read_pci_config_byte(bus, path->dev, path->fn, PCI_SECONDARY_BUS); 2.255 + path++; 2.256 + } 2.257 + 2.258 + if (scope->dev_type == ACPI_DEV_ENDPOINT) { 2.259 + printk(KERN_WARNING PREFIX 2.260 + "found endpoint: bdf = %x:%x:%x\n", bus, path->dev, path->fn); 2.261 + 2.262 + pdev->bus = bus; 2.263 + pdev->devfn = PCI_DEVFN(path->dev, path->fn); 2.264 + pdev++; 2.265 + } else if (scope->dev_type == ACPI_DEV_P2PBRIDGE) { 2.266 + printk(KERN_WARNING PREFIX 2.267 + "found bridge: bus = %x dev = %x func = %x\n", bus, path->dev, path->fn); 2.268 + 2.269 + sec_bus = read_pci_config_byte(bus, path->dev, path->fn, PCI_SECONDARY_BUS); 2.270 + sub_bus = read_pci_config_byte(bus, path->dev, path->fn, PCI_SUBORDINATE_BUS); 2.271 + 2.272 + while (sec_bus <= sub_bus) { 2.273 + for (dev = 0; dev < 32; dev++) { 2.274 + for (func = 0; func < 8; func++) { 2.275 + l = read_pci_config(sec_bus, dev, func, PCI_VENDOR_ID); 2.276 + 2.277 + /* some broken boards return 0 or ~0 if a slot is empty: */ 2.278 + if (l == 0xffffffff || l == 0x00000000 || 2.279 + l == 0x0000ffff || l == 0xffff0000) 2.280 + break; 2.281 + 2.282 + pdev->bus = sec_bus; 2.283 + pdev->devfn = PCI_DEVFN(dev, func); 2.284 + pdev++; 2.285 + } 2.286 + } 2.287 + sec_bus++; 2.288 + } 2.289 + } else if (scope->dev_type == ACPI_DEV_IOAPIC) { 2.290 + acpi_ioapic_unit = xmalloc(struct acpi_ioapic_unit); 2.291 + acpi_ioapic_unit->apic_id = scope->enum_id; 2.292 + acpi_ioapic_unit->ioapic.bdf.bus = bus; 2.293 + acpi_ioapic_unit->ioapic.bdf.dev = path->dev; 2.294 + acpi_ioapic_unit->ioapic.bdf.func = path->fn; 2.295 + list_add(&acpi_ioapic_unit->list, &acpi_ioapic_units); 2.296 + printk(KERN_WARNING PREFIX 2.297 + "found IOAPIC: bus = %x dev = %x func = %x\n", bus, path->dev, path->fn); 2.298 + } else { 2.299 + printk(KERN_WARNING PREFIX 2.300 + "found MSI HPET: bus = %x dev = %x func = %x\n", bus, path->dev, path->fn); 2.301 + } 2.302 + 2.303 + start += scope->length; 2.304 + } 2.305 + 2.306 + return 0; 2.307 +} 2.308 + 2.309 +static int __init 2.310 +acpi_parse_one_drhd(struct acpi_dmar_entry_header *header) 2.311 +{ 2.312 + struct acpi_table_drhd * drhd = (struct acpi_table_drhd *)header; 2.313 + struct acpi_drhd_unit *dmaru; 2.314 + int ret = 0; 2.315 + static int include_all; 2.316 + 2.317 + dmaru = xmalloc(struct acpi_drhd_unit); 2.318 + if (!dmaru) 2.319 + return -ENOMEM; 2.320 + memset(dmaru, 0, sizeof(struct acpi_drhd_unit)); 2.321 + 2.322 + dmaru->address = drhd->address; 2.323 + dmaru->include_all = drhd->flags & 1; /* BIT0: INCLUDE_ALL */ 2.324 + printk(KERN_WARNING PREFIX "dmaru->address = %lx\n", dmaru->address); 2.325 + 2.326 + if (!dmaru->include_all) { 2.327 + ret = acpi_parse_dev_scope((void *)(drhd + 1), 2.328 + ((void *)drhd) + header->length, 2.329 + &dmaru->devices_cnt, &dmaru->devices); 2.330 + } 2.331 + else { 2.332 + printk(KERN_WARNING PREFIX "found INCLUDE_ALL\n"); 2.333 + /* Only allow one INCLUDE_ALL */ 2.334 + if (include_all) { 2.335 + printk(KERN_WARNING PREFIX "Only one INCLUDE_ALL " 2.336 + "device scope is allowed\n"); 2.337 + ret = -EINVAL; 2.338 + } 2.339 + include_all = 1; 2.340 + } 2.341 + 2.342 + if (ret) 2.343 + xfree(dmaru); 2.344 + else 2.345 + acpi_register_drhd_unit(dmaru); 2.346 + return ret; 2.347 +} 2.348 + 2.349 +static int __init 2.350 +acpi_parse_one_rmrr(struct acpi_dmar_entry_header *header) 2.351 +{ 2.352 + struct acpi_table_rmrr *rmrr = (struct acpi_table_rmrr *)header; 2.353 + struct acpi_rmrr_unit *rmrru; 2.354 + int ret = 0; 2.355 + 2.356 + rmrru = xmalloc(struct acpi_rmrr_unit); 2.357 + if (!rmrru) 2.358 + return -ENOMEM; 2.359 + memset(rmrru, 0, sizeof(struct acpi_rmrr_unit)); 2.360 + 2.361 +#ifdef VTD_DEBUG 2.362 + gdprintk(XENLOG_INFO VTDPREFIX, 2.363 + "acpi_parse_one_rmrr: base = %lx end = %lx\n", 2.364 + rmrr->base_address, rmrr->end_address); 2.365 +#endif 2.366 + 2.367 + rmrru->base_address = rmrr->base_address; 2.368 + rmrru->end_address = rmrr->end_address; 2.369 + ret = acpi_parse_dev_scope((void *)(rmrr + 1), 2.370 + ((void*)rmrr) + header->length, 2.371 + &rmrru->devices_cnt, &rmrru->devices); 2.372 + 2.373 + if (ret || (rmrru->devices_cnt == 0)) 2.374 + xfree(rmrru); 2.375 + else 2.376 + acpi_register_rmrr_unit(rmrru); 2.377 + return ret; 2.378 +} 2.379 + 2.380 +static int __init 2.381 +acpi_parse_one_atsr(struct acpi_dmar_entry_header *header) 2.382 +{ 2.383 + struct acpi_table_atsr *atsr = (struct acpi_table_atsr *)header; 2.384 + struct acpi_atsr_unit *atsru; 2.385 + int ret = 0; 2.386 + static int all_ports; 2.387 + 2.388 + atsru = xmalloc(struct acpi_atsr_unit); 2.389 + if (!atsru) 2.390 + return -ENOMEM; 2.391 + memset(atsru, 0, sizeof(struct acpi_atsr_unit)); 2.392 + 2.393 + atsru->all_ports = atsr->flags & 1; /* BIT0: ALL_PORTS */ 2.394 + if (!atsru->all_ports) { 2.395 + ret = acpi_parse_dev_scope((void *)(atsr + 1), 2.396 + ((void *)atsr) + header->length, 2.397 + &atsru->devices_cnt, &atsru->devices); 2.398 + } 2.399 + else { 2.400 + printk(KERN_WARNING PREFIX "found ALL_PORTS\n"); 2.401 + /* Only allow one ALL_PORTS */ 2.402 + if (all_ports) { 2.403 + printk(KERN_WARNING PREFIX "Only one ALL_PORTS " 2.404 + "device scope is allowed\n"); 2.405 + ret = -EINVAL; 2.406 + } 2.407 + all_ports = 1; 2.408 + } 2.409 + 2.410 + if (ret) 2.411 + xfree(atsr); 2.412 + else 2.413 + acpi_register_atsr_unit(atsru); 2.414 + return ret; 2.415 +} 2.416 + 2.417 +static void __init 2.418 +acpi_table_print_dmar_entry(struct acpi_dmar_entry_header *header) 2.419 +{ 2.420 + struct acpi_table_drhd *drhd; 2.421 + struct acpi_table_rmrr *rmrr; 2.422 + 2.423 + switch (header->type) { 2.424 + case ACPI_DMAR_DRHD: 2.425 + drhd = (struct acpi_table_drhd *)header; 2.426 + break; 2.427 + case ACPI_DMAR_RMRR: 2.428 + rmrr = (struct acpi_table_rmrr *)header; 2.429 + break; 2.430 + } 2.431 +} 2.432 + 2.433 +static int __init 2.434 +acpi_parse_dmar(unsigned long phys_addr, unsigned long size) 2.435 +{ 2.436 + struct acpi_table_dmar *dmar = NULL; 2.437 + struct acpi_dmar_entry_header *entry_header; 2.438 + int ret = 0; 2.439 + 2.440 + if (!phys_addr || !size) 2.441 + return -EINVAL; 2.442 + 2.443 + dmar = (struct acpi_table_dmar *)__acpi_map_table(phys_addr, size); 2.444 + if (!dmar) { 2.445 + printk (KERN_WARNING PREFIX "Unable to map DMAR\n"); 2.446 + return -ENODEV; 2.447 + } 2.448 + 2.449 + if (!dmar->haw) { 2.450 + printk (KERN_WARNING PREFIX "Zero: Invalid DMAR haw\n"); 2.451 + return -EINVAL; 2.452 + } 2.453 + 2.454 + dmar_host_address_width = dmar->haw; 2.455 + printk (KERN_INFO PREFIX "Host address width %d\n", 2.456 + dmar_host_address_width); 2.457 + 2.458 + entry_header = (struct acpi_dmar_entry_header *)(dmar + 1); 2.459 + while (((unsigned long)entry_header) < (((unsigned long)dmar) + size)) { 2.460 + acpi_table_print_dmar_entry(entry_header); 2.461 + 2.462 + switch (entry_header->type) { 2.463 + case ACPI_DMAR_DRHD: 2.464 + printk (KERN_INFO PREFIX "found ACPI_DMAR_DRHD\n"); 2.465 + ret = acpi_parse_one_drhd(entry_header); 2.466 + break; 2.467 + case ACPI_DMAR_RMRR: 2.468 + printk (KERN_INFO PREFIX "found ACPI_DMAR_RMRR\n"); 2.469 + ret = acpi_parse_one_rmrr(entry_header); 2.470 + break; 2.471 + case ACPI_DMAR_ATSR: 2.472 + printk (KERN_INFO PREFIX "found ACPI_DMAR_RMRR\n"); 2.473 + ret = acpi_parse_one_atsr(entry_header); 2.474 + break; 2.475 + default: 2.476 + printk(KERN_WARNING PREFIX "Unknown DMAR structure type\n"); 2.477 + ret = -EINVAL; 2.478 + break; 2.479 + } 2.480 + if (ret) 2.481 + break; 2.482 + 2.483 + entry_header = ((void *)entry_header + entry_header->length); 2.484 + } 2.485 + return ret; 2.486 +} 2.487 + 2.488 +int acpi_dmar_init(void) 2.489 +{ 2.490 + acpi_table_parse(ACPI_DMAR, acpi_parse_dmar); 2.491 + if (list_empty(&acpi_drhd_units)) { 2.492 + printk(KERN_ERR PREFIX "No DMAR devices found\n"); 2.493 + return -ENODEV; 2.494 + } else 2.495 + vtd_enabled = 1; 2.496 + return 0; 2.497 +}
3.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 3.2 +++ b/xen/arch/x86/hvm/vmx/vtd/dmar.h Fri Sep 14 16:40:49 2007 +0100 3.3 @@ -0,0 +1,90 @@ 3.4 +/* 3.5 + * Copyright (c) 2006, Intel Corporation. 3.6 + * 3.7 + * This program is free software; you can redistribute it and/or modify it 3.8 + * under the terms and conditions of the GNU General Public License, 3.9 + * version 2, as published by the Free Software Foundation. 3.10 + * 3.11 + * This program is distributed in the hope it will be useful, but WITHOUT 3.12 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 3.13 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 3.14 + * more details. 3.15 + * 3.16 + * You should have received a copy of the GNU General Public License along with 3.17 + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple 3.18 + * Place - Suite 330, Boston, MA 02111-1307 USA. 3.19 + * 3.20 + * Copyright (C) Ashok Raj <ashok.raj@intel.com> 3.21 + * Copyright (C) Shaohua Li <shaohua.li@intel.com> 3.22 + */ 3.23 + 3.24 +#ifndef _DMAR_H_ 3.25 +#define _DMAR_H_ 3.26 + 3.27 +#include <xen/list.h> 3.28 +#include <asm/iommu.h> 3.29 + 3.30 +extern u8 dmar_host_address_width; 3.31 + 3.32 +struct acpi_drhd_unit { 3.33 + struct list_head list; 3.34 + unsigned long address; /* register base address of the unit */ 3.35 + struct pci_dev *devices; /* target devices */ 3.36 + int devices_cnt; 3.37 + u8 include_all:1; 3.38 + struct iommu *iommu; 3.39 +}; 3.40 + 3.41 +struct acpi_rmrr_unit { 3.42 + struct list_head list; 3.43 + unsigned long base_address; 3.44 + unsigned long end_address; 3.45 + struct pci_dev *devices; /* target devices */ 3.46 + int devices_cnt; 3.47 + u8 allow_all:1; 3.48 +}; 3.49 + 3.50 +struct acpi_atsr_unit { 3.51 + struct list_head list; 3.52 + struct pci_dev *devices; /* target devices */ 3.53 + int devices_cnt; 3.54 + u8 all_ports:1; 3.55 +}; 3.56 + 3.57 +#define for_each_iommu(domain, iommu) \ 3.58 + list_for_each_entry(iommu, \ 3.59 + &(domain->arch.hvm_domain.hvm_iommu.iommu_list), list) 3.60 + 3.61 +#define for_each_pdev(domain, pdev) \ 3.62 + list_for_each_entry(pdev, \ 3.63 + &(domain->arch.hvm_domain.hvm_iommu.pdev_list), list) 3.64 + 3.65 +#define for_each_drhd_unit(drhd) \ 3.66 + list_for_each_entry(drhd, &acpi_drhd_units, list) 3.67 +#define for_each_rmrr_device(rmrr, pdev) \ 3.68 + list_for_each_entry(rmrr, &acpi_rmrr_units, list) { \ 3.69 + int _i; \ 3.70 + for (_i = 0; _i < rmrr->devices_cnt; _i++) { \ 3.71 + pdev = &(rmrr->devices[_i]); 3.72 +#define end_for_each_rmrr_device(rmrr, pdev) \ 3.73 + } \ 3.74 + } 3.75 + 3.76 +struct acpi_drhd_unit * acpi_find_matched_drhd_unit(struct pci_dev *dev); 3.77 +struct acpi_rmrr_unit * acpi_find_matched_rmrr_unit(struct pci_dev *dev); 3.78 + 3.79 +/* This one is for interrupt remapping */ 3.80 +struct acpi_ioapic_unit { 3.81 + struct list_head list; 3.82 + int apic_id; 3.83 + union { 3.84 + u16 info; 3.85 + struct { 3.86 + u16 bus: 8, 3.87 + dev: 5, 3.88 + func: 3; 3.89 + }bdf; 3.90 + }ioapic; 3.91 +}; 3.92 + 3.93 +#endif // _DMAR_H_
4.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 4.2 +++ b/xen/arch/x86/hvm/vmx/vtd/intel-iommu.c Fri Sep 14 16:40:49 2007 +0100 4.3 @@ -0,0 +1,1927 @@ 4.4 +/* 4.5 + * Copyright (c) 2006, Intel Corporation. 4.6 + * 4.7 + * This program is free software; you can redistribute it and/or modify it 4.8 + * under the terms and conditions of the GNU General Public License, 4.9 + * version 2, as published by the Free Software Foundation. 4.10 + * 4.11 + * This program is distributed in the hope it will be useful, but WITHOUT 4.12 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 4.13 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 4.14 + * more details. 4.15 + * 4.16 + * You should have received a copy of the GNU General Public License along with 4.17 + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple 4.18 + * Place - Suite 330, Boston, MA 02111-1307 USA. 4.19 + * 4.20 + * Copyright (C) Ashok Raj <ashok.raj@intel.com> 4.21 + * Copyright (C) Shaohua Li <shaohua.li@intel.com> 4.22 + * Copyright (C) Allen Kay <allen.m.kay@intel.com> - adapted to xen 4.23 + */ 4.24 + 4.25 +#include <xen/init.h> 4.26 +#include <xen/irq.h> 4.27 +#include <xen/spinlock.h> 4.28 +#include <xen/sched.h> 4.29 +#include <xen/xmalloc.h> 4.30 +#include <xen/domain_page.h> 4.31 +#include <asm/delay.h> 4.32 +#include <asm/string.h> 4.33 +#include <asm/iommu.h> 4.34 +#include <asm/hvm/vmx/intel-iommu.h> 4.35 +#include "dmar.h" 4.36 +#include "pci-direct.h" 4.37 +#include "pci_regs.h" 4.38 +#include "msi.h" 4.39 + 4.40 +extern void print_iommu_regs(struct acpi_drhd_unit *drhd); 4.41 +extern void print_vtd_entries(struct domain *d, int bus, int devfn, 4.42 + unsigned long gmfn); 4.43 +extern void (*interrupt[])(void); 4.44 + 4.45 +#define DMAR_OPERATION_TIMEOUT (HZ*60) /* 1m */ 4.46 + 4.47 +#define time_after(a,b) \ 4.48 + (typecheck(unsigned long, a) && \ 4.49 + typecheck(unsigned long, b) && \ 4.50 + ((long)(b) - (long)(a) < 0)) 4.51 + 4.52 +unsigned int x86_clflush_size; 4.53 +void clflush_cache_range(void *adr, int size) 4.54 +{ 4.55 + int i; 4.56 + for (i = 0; i < size; i += x86_clflush_size) 4.57 + clflush(adr + i); 4.58 +} 4.59 + 4.60 +static void __iommu_flush_cache(struct iommu *iommu, void *addr, int size) 4.61 +{ 4.62 + if (!ecap_coherent(iommu->ecap)) 4.63 + clflush_cache_range(addr, size); 4.64 +} 4.65 + 4.66 +#define iommu_flush_cache_entry(iommu, addr) \ 4.67 + __iommu_flush_cache(iommu, addr, 8) 4.68 +#define iommu_flush_cache_page(iommu, addr) \ 4.69 + __iommu_flush_cache(iommu, addr, PAGE_SIZE_4K) 4.70 + 4.71 +int nr_iommus; 4.72 +/* context entry handling */ 4.73 +static struct context_entry * device_to_context_entry(struct iommu *iommu, 4.74 + u8 bus, u8 devfn) 4.75 +{ 4.76 + struct root_entry *root; 4.77 + struct context_entry *context; 4.78 + unsigned long phy_addr; 4.79 + unsigned long flags; 4.80 + 4.81 + spin_lock_irqsave(&iommu->lock, flags); 4.82 + root = &iommu->root_entry[bus]; 4.83 + if (!root_present(*root)) { 4.84 + phy_addr = (unsigned long) alloc_xenheap_page(); 4.85 + if (!phy_addr) { 4.86 + spin_unlock_irqrestore(&iommu->lock, flags); 4.87 + return NULL; 4.88 + } 4.89 + memset((void *) phy_addr, 0, PAGE_SIZE); 4.90 + iommu_flush_cache_page(iommu, (void *)phy_addr); 4.91 + phy_addr = virt_to_maddr((void *)phy_addr); 4.92 + set_root_value(*root, phy_addr); 4.93 + set_root_present(*root); 4.94 + iommu_flush_cache_entry(iommu, root); 4.95 + } 4.96 + phy_addr = (unsigned long) get_context_addr(*root); 4.97 + context = (struct context_entry *)maddr_to_virt(phy_addr); 4.98 + spin_unlock_irqrestore(&iommu->lock, flags); 4.99 + return &context[devfn]; 4.100 +} 4.101 + 4.102 +static int device_context_mapped(struct iommu *iommu, u8 bus, u8 devfn) 4.103 +{ 4.104 + struct root_entry *root; 4.105 + struct context_entry *context; 4.106 + unsigned long phy_addr; 4.107 + int ret; 4.108 + unsigned long flags; 4.109 + 4.110 + spin_lock_irqsave(&iommu->lock, flags); 4.111 + root = &iommu->root_entry[bus]; 4.112 + if (!root_present(*root)) { 4.113 + ret = 0; 4.114 + goto out; 4.115 + } 4.116 + phy_addr = get_context_addr(*root); 4.117 + context = (struct context_entry *)maddr_to_virt(phy_addr); 4.118 + ret = context_present(context[devfn]); 4.119 +out: 4.120 + spin_unlock_irqrestore(&iommu->lock, flags); 4.121 + return ret; 4.122 +} 4.123 + 4.124 +/* page table handling */ 4.125 +#define LEVEL_STRIDE (9) 4.126 +#define LEVEL_MASK ((1 << LEVEL_STRIDE) - 1) 4.127 +#define agaw_to_level(val) ((val) + 2) 4.128 +#define agaw_to_width(val) (30 + val * LEVEL_STRIDE) 4.129 +#define width_to_agaw(w) ((w - 30)/LEVEL_STRIDE) 4.130 +#define level_to_offset_bits(l) (12 + (l - 1) * LEVEL_STRIDE) 4.131 +#define address_level_offset(addr, level) \ 4.132 + ((addr >> level_to_offset_bits(level)) & LEVEL_MASK) 4.133 +#define level_mask(l) (((u64)(-1)) << level_to_offset_bits(l)) 4.134 +#define level_size(l) (1 << level_to_offset_bits(l)) 4.135 +#define align_to_level(addr, l) ((addr + level_size(l) - 1) & level_mask(l)) 4.136 +static struct dma_pte * addr_to_dma_pte(struct domain *domain, u64 addr) 4.137 +{ 4.138 + struct hvm_iommu *hd = domain_hvm_iommu(domain); 4.139 + struct acpi_drhd_unit *drhd; 4.140 + struct iommu *iommu; 4.141 + int addr_width = agaw_to_width(hd->agaw); 4.142 + struct dma_pte *parent, *pte = NULL, *pgd; 4.143 + int level = agaw_to_level(hd->agaw); 4.144 + int offset; 4.145 + unsigned long flags; 4.146 + 4.147 + drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list); 4.148 + iommu = drhd->iommu; 4.149 + 4.150 + addr &= (((u64)1) << addr_width) - 1; 4.151 + spin_lock_irqsave(&hd->mapping_lock, flags); 4.152 + if (!hd->pgd) { 4.153 + pgd = (struct dma_pte *)alloc_xenheap_page(); 4.154 + if (!pgd && !hd->pgd) { 4.155 + spin_unlock_irqrestore(&hd->mapping_lock, flags); 4.156 + return NULL; 4.157 + } 4.158 + memset((u8*)pgd, 0, PAGE_SIZE); 4.159 + if (!hd->pgd) 4.160 + hd->pgd = pgd; 4.161 + else /* somebody is fast */ 4.162 + free_xenheap_page((void *) pgd); 4.163 + } 4.164 + parent = hd->pgd; 4.165 + while (level > 0) { 4.166 + u8 *tmp; 4.167 + offset = address_level_offset(addr, level); 4.168 + pte = &parent[offset]; 4.169 + if (level == 1) 4.170 + break; 4.171 + if (dma_pte_addr(*pte) == 0) { 4.172 + tmp = alloc_xenheap_page(); 4.173 + if (tmp == NULL) 4.174 + gdprintk(XENLOG_ERR VTDPREFIX, 4.175 + "addr_to_dma_pte: tmp == NULL\n"); 4.176 + 4.177 + memset(tmp, 0, PAGE_SIZE); 4.178 + iommu_flush_cache_page(iommu, tmp); 4.179 + 4.180 + if (!tmp && dma_pte_addr(*pte) == 0) { 4.181 + spin_unlock_irqrestore(&hd->mapping_lock, flags); 4.182 + return NULL; 4.183 + } 4.184 + if (dma_pte_addr(*pte) == 0) { 4.185 + dma_set_pte_addr(*pte, 4.186 + virt_to_maddr(tmp)); 4.187 + /* 4.188 + * high level table always sets r/w, last level 4.189 + * page table control read/write 4.190 + */ 4.191 + dma_set_pte_readable(*pte); 4.192 + dma_set_pte_writable(*pte); 4.193 + iommu_flush_cache_entry(iommu, pte); 4.194 + } else /* somebody is fast */ 4.195 + free_xenheap_page(tmp); 4.196 + } 4.197 + parent = maddr_to_virt(dma_pte_addr(*pte)); 4.198 + level--; 4.199 + } 4.200 + spin_unlock_irqrestore(&hd->mapping_lock, flags); 4.201 + return pte; 4.202 +} 4.203 + 4.204 +/* return address's pte at specific level */ 4.205 +static struct dma_pte *dma_addr_level_pte(struct domain *domain, u64 addr, 4.206 + int level) 4.207 +{ 4.208 + struct hvm_iommu *hd = domain_hvm_iommu(domain); 4.209 + struct dma_pte *parent, *pte = NULL; 4.210 + int total = agaw_to_level(hd->agaw); 4.211 + int offset; 4.212 + 4.213 + parent = hd->pgd; 4.214 + while (level <= total) { 4.215 + offset = address_level_offset(addr, total); 4.216 + pte = &parent[offset]; 4.217 + if (level == total) 4.218 + return pte; 4.219 + 4.220 + if (dma_pte_addr(*pte) == 0) 4.221 + break; 4.222 + parent = maddr_to_virt(dma_pte_addr(*pte)); 4.223 + total--; 4.224 + } 4.225 + return NULL; 4.226 +} 4.227 + 4.228 +static void iommu_flush_write_buffer(struct iommu *iommu) 4.229 +{ 4.230 + u32 val; 4.231 + unsigned long flag; 4.232 + unsigned long start_time; 4.233 + 4.234 + if (!cap_rwbf(iommu->cap)) 4.235 + return; 4.236 + val = iommu->gcmd | DMA_GCMD_WBF; 4.237 + 4.238 + spin_lock_irqsave(&iommu->register_lock, flag); 4.239 + dmar_writel(iommu->reg, DMAR_GCMD_REG, val); 4.240 + 4.241 + /* Make sure hardware complete it */ 4.242 + start_time = jiffies; 4.243 + while (1) { 4.244 + val = dmar_readl(iommu->reg, DMAR_GSTS_REG); 4.245 + if (!(val & DMA_GSTS_WBFS)) 4.246 + break; 4.247 + if (time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT)) 4.248 + panic("DMAR hardware is malfunctional, please disable IOMMU\n"); 4.249 + cpu_relax(); 4.250 + } 4.251 + spin_unlock_irqrestore(&iommu->register_lock, flag); 4.252 +} 4.253 + 4.254 +/* return value determine if we need a write buffer flush */ 4.255 +static int __iommu_flush_context(struct iommu *iommu, 4.256 + u16 did, u16 source_id, u8 function_mask, u64 type, 4.257 + int non_present_entry_flush) 4.258 +{ 4.259 + u64 val = 0; 4.260 + unsigned long flag; 4.261 + unsigned long start_time; 4.262 + 4.263 + /* 4.264 + * In the non-present entry flush case, if hardware doesn't cache 4.265 + * non-present entry we do nothing and if hardware cache non-present 4.266 + * entry, we flush entries of domain 0 (the domain id is used to cache 4.267 + * any non-present entries) 4.268 + */ 4.269 + if (non_present_entry_flush) { 4.270 + if (!cap_caching_mode(iommu->cap)) 4.271 + return 1; 4.272 + else 4.273 + did = 0; 4.274 + } 4.275 + 4.276 + /* use register invalidation */ 4.277 + switch (type) 4.278 + { 4.279 + case DMA_CCMD_GLOBAL_INVL: 4.280 + val = DMA_CCMD_GLOBAL_INVL; 4.281 + break; 4.282 + case DMA_CCMD_DOMAIN_INVL: 4.283 + val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did); 4.284 + break; 4.285 + case DMA_CCMD_DEVICE_INVL: 4.286 + val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did) 4.287 + |DMA_CCMD_SID(source_id)|DMA_CCMD_FM(function_mask); 4.288 + break; 4.289 + default: 4.290 + BUG(); 4.291 + } 4.292 + val |= DMA_CCMD_ICC; 4.293 + 4.294 + spin_lock_irqsave(&iommu->register_lock, flag); 4.295 + dmar_writeq(iommu->reg, DMAR_CCMD_REG, val); 4.296 + 4.297 + /* Make sure hardware complete it */ 4.298 + start_time = jiffies; 4.299 + while (1) { 4.300 + val = dmar_readq(iommu->reg, DMAR_CCMD_REG); 4.301 + if (!(val & DMA_CCMD_ICC)) 4.302 + break; 4.303 + if (time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT)) 4.304 + panic("DMAR hardware is malfunctional, please disable IOMMU\n"); 4.305 + cpu_relax(); 4.306 + } 4.307 + spin_unlock_irqrestore(&iommu->register_lock, flag); 4.308 + /* flush context entry will implictly flush write buffer */ 4.309 + return 0; 4.310 +} 4.311 + 4.312 +static int inline iommu_flush_context_global(struct iommu *iommu, 4.313 + int non_present_entry_flush) 4.314 +{ 4.315 + return __iommu_flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL, 4.316 + non_present_entry_flush); 4.317 +} 4.318 + 4.319 +static int inline iommu_flush_context_domain(struct iommu *iommu, u16 did, 4.320 + int non_present_entry_flush) 4.321 +{ 4.322 + return __iommu_flush_context(iommu, did, 0, 0, DMA_CCMD_DOMAIN_INVL, 4.323 + non_present_entry_flush); 4.324 +} 4.325 + 4.326 +static int inline iommu_flush_context_device(struct iommu *iommu, 4.327 + u16 did, u16 source_id, u8 function_mask, int non_present_entry_flush) 4.328 +{ 4.329 + return __iommu_flush_context(iommu, did, source_id, function_mask, 4.330 + DMA_CCMD_DEVICE_INVL, non_present_entry_flush); 4.331 +} 4.332 + 4.333 +/* return value determine if we need a write buffer flush */ 4.334 +static int __iommu_flush_iotlb(struct iommu *iommu, u16 did, 4.335 + u64 addr, unsigned int size_order, u64 type, 4.336 + int non_present_entry_flush) 4.337 +{ 4.338 + int tlb_offset = ecap_iotlb_offset(iommu->ecap); 4.339 + u64 val = 0, val_iva = 0; 4.340 + unsigned long flag; 4.341 + unsigned long start_time; 4.342 + 4.343 + /* 4.344 + * In the non-present entry flush case, if hardware doesn't cache 4.345 + * non-present entry we do nothing and if hardware cache non-present 4.346 + * entry, we flush entries of domain 0 (the domain id is used to cache 4.347 + * any non-present entries) 4.348 + */ 4.349 + if (non_present_entry_flush) { 4.350 + if (!cap_caching_mode(iommu->cap)) 4.351 + return 1; 4.352 + else 4.353 + did = 0; 4.354 + } 4.355 + 4.356 + /* use register invalidation */ 4.357 + switch (type) { 4.358 + case DMA_TLB_GLOBAL_FLUSH: 4.359 + /* global flush doesn't need set IVA_REG */ 4.360 + val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT; 4.361 + break; 4.362 + case DMA_TLB_DSI_FLUSH: 4.363 + val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did); 4.364 + break; 4.365 + case DMA_TLB_PSI_FLUSH: 4.366 + val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did); 4.367 + /* Note: always flush non-leaf currently */ 4.368 + val_iva = size_order | addr; 4.369 + break; 4.370 + default: 4.371 + BUG(); 4.372 + } 4.373 + /* Note: set drain read/write */ 4.374 +#if 0 4.375 + /* 4.376 + * This is probably to be super secure.. Looks like we can 4.377 + * ignore it without any impact. 4.378 + */ 4.379 + if (cap_read_drain(iommu->cap)) 4.380 + val |= DMA_TLB_READ_DRAIN; 4.381 +#endif 4.382 + if (cap_write_drain(iommu->cap)) 4.383 + val |= DMA_TLB_WRITE_DRAIN; 4.384 + 4.385 + spin_lock_irqsave(&iommu->register_lock, flag); 4.386 + /* Note: Only uses first TLB reg currently */ 4.387 + if (val_iva) 4.388 + dmar_writeq(iommu->reg, tlb_offset, val_iva); 4.389 + dmar_writeq(iommu->reg, tlb_offset + 8, val); 4.390 + 4.391 + /* Make sure hardware complete it */ 4.392 + start_time = jiffies; 4.393 + while (1) { 4.394 + val = dmar_readq(iommu->reg, tlb_offset + 8); 4.395 + if (!(val & DMA_TLB_IVT)) 4.396 + break; 4.397 + if (time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT)) 4.398 + panic("DMAR hardware is malfunctional, please disable IOMMU\n"); 4.399 + cpu_relax(); 4.400 + } 4.401 + spin_unlock_irqrestore(&iommu->register_lock, flag); 4.402 + 4.403 + /* check IOTLB invalidation granularity */ 4.404 + if (DMA_TLB_IAIG(val) == 0) 4.405 + printk(KERN_ERR VTDPREFIX "IOMMU: flush IOTLB failed\n"); 4.406 + if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type)) 4.407 + printk(KERN_ERR VTDPREFIX "IOMMU: tlb flush request %x, actual %x\n", 4.408 + (u32)DMA_TLB_IIRG(type), (u32)DMA_TLB_IAIG(val)); 4.409 + /* flush context entry will implictly flush write buffer */ 4.410 + return 0; 4.411 +} 4.412 + 4.413 +static int inline iommu_flush_iotlb_global(struct iommu *iommu, 4.414 + int non_present_entry_flush) 4.415 +{ 4.416 + return __iommu_flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH, 4.417 + non_present_entry_flush); 4.418 +} 4.419 + 4.420 +static int inline iommu_flush_iotlb_dsi(struct iommu *iommu, u16 did, 4.421 + int non_present_entry_flush) 4.422 +{ 4.423 + return __iommu_flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH, 4.424 + non_present_entry_flush); 4.425 +} 4.426 + 4.427 +static int inline get_alignment(u64 base, unsigned int size) 4.428 +{ 4.429 + int t = 0; 4.430 + u64 end; 4.431 + 4.432 + end = base + size - 1; 4.433 + while (base != end) { 4.434 + t++; 4.435 + base >>= 1; 4.436 + end >>= 1; 4.437 + } 4.438 + return t; 4.439 +} 4.440 + 4.441 +static int inline iommu_flush_iotlb_psi(struct iommu *iommu, u16 did, 4.442 + u64 addr, unsigned int pages, int non_present_entry_flush) 4.443 +{ 4.444 + unsigned int align; 4.445 + 4.446 + BUG_ON(addr & (~PAGE_MASK_4K)); 4.447 + BUG_ON(pages == 0); 4.448 + 4.449 + /* Fallback to domain selective flush if no PSI support */ 4.450 + if (!cap_pgsel_inv(iommu->cap)) 4.451 + return iommu_flush_iotlb_dsi(iommu, did, 4.452 + non_present_entry_flush); 4.453 + 4.454 + /* 4.455 + * PSI requires page size is 2 ^ x, and the base address is naturally 4.456 + * aligned to the size 4.457 + */ 4.458 + align = get_alignment(addr >> PAGE_SHIFT_4K, pages); 4.459 + /* Fallback to domain selective flush if size is too big */ 4.460 + if (align > cap_max_amask_val(iommu->cap)) 4.461 + return iommu_flush_iotlb_dsi(iommu, did, 4.462 + non_present_entry_flush); 4.463 + 4.464 + addr >>= PAGE_SHIFT_4K + align; 4.465 + addr <<= PAGE_SHIFT_4K + align; 4.466 + 4.467 + return __iommu_flush_iotlb(iommu, did, addr, align, 4.468 + DMA_TLB_PSI_FLUSH, non_present_entry_flush); 4.469 +} 4.470 + 4.471 +void flush_all(void) 4.472 +{ 4.473 + struct acpi_drhd_unit *drhd; 4.474 + struct iommu *iommu; 4.475 + int i = 0; 4.476 + 4.477 + wbinvd(); 4.478 + for_each_drhd_unit(drhd) { 4.479 + iommu = drhd->iommu; 4.480 + iommu_flush_context_global(iommu, 0); 4.481 + iommu_flush_iotlb_global(iommu, 0); 4.482 + i++; 4.483 + } 4.484 +} 4.485 + 4.486 +/* clear one page's page table */ 4.487 +static void dma_pte_clear_one(struct domain *domain, u64 addr) 4.488 +{ 4.489 + struct acpi_drhd_unit *drhd; 4.490 + struct iommu *iommu; 4.491 + struct dma_pte *pte = NULL; 4.492 + 4.493 + drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list); 4.494 + 4.495 + /* get last level pte */ 4.496 + pte = dma_addr_level_pte(domain, addr, 1); 4.497 + 4.498 + if (pte) { 4.499 + dma_clear_pte(*pte); 4.500 + iommu_flush_cache_entry(drhd->iommu, pte); 4.501 + 4.502 + for_each_drhd_unit(drhd) { 4.503 + iommu = drhd->iommu; 4.504 + if (cap_caching_mode(iommu->cap)) 4.505 + { 4.506 + iommu_flush_iotlb_psi(iommu, domain->domain_id, addr, 1, 0); 4.507 + } 4.508 + else if (cap_rwbf(iommu->cap)) 4.509 + iommu_flush_write_buffer(iommu); 4.510 + } 4.511 + } 4.512 +} 4.513 + 4.514 +/* clear last level pte, a tlb flush should be followed */ 4.515 +static void dma_pte_clear_range(struct domain *domain, u64 start, u64 end) 4.516 +{ 4.517 + struct hvm_iommu *hd = domain_hvm_iommu(domain); 4.518 + int addr_width = agaw_to_width(hd->agaw); 4.519 + 4.520 + start &= (((u64)1) << addr_width) - 1; 4.521 + end &= (((u64)1) << addr_width) - 1; 4.522 + /* in case it's partial page */ 4.523 + start = PAGE_ALIGN_4K(start); 4.524 + end &= PAGE_MASK_4K; 4.525 + 4.526 + /* we don't need lock here, nobody else touches the iova range */ 4.527 + while (start < end) { 4.528 + dma_pte_clear_one(domain, start); 4.529 + start += PAGE_SIZE_4K; 4.530 + } 4.531 +} 4.532 + 4.533 +/* free page table pages. last level pte should already be cleared */ 4.534 +// static void dma_pte_free_pagetable(struct domain *domain, u64 start, u64 end) 4.535 +void dma_pte_free_pagetable(struct domain *domain, u64 start, u64 end) 4.536 +{ 4.537 + struct acpi_drhd_unit *drhd; 4.538 + struct hvm_iommu *hd = domain_hvm_iommu(domain); 4.539 + struct iommu *iommu; 4.540 + int addr_width = agaw_to_width(hd->agaw); 4.541 + struct dma_pte *pte; 4.542 + int total = agaw_to_level(hd->agaw); 4.543 + int level; 4.544 + u32 tmp; 4.545 + 4.546 + drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list); 4.547 + iommu = drhd->iommu; 4.548 + 4.549 + start &= (((u64)1) << addr_width) - 1; 4.550 + end &= (((u64)1) << addr_width) - 1; 4.551 + 4.552 + /* we don't need lock here, nobody else touches the iova range */ 4.553 + level = 2; 4.554 + while (level <= total) { 4.555 + tmp = align_to_level(start, level); 4.556 + if (tmp >= end || (tmp + level_size(level) > end)) 4.557 + return; 4.558 + 4.559 + while (tmp < end) { 4.560 + pte = dma_addr_level_pte(domain, tmp, level); 4.561 + if (pte) { 4.562 + free_xenheap_page((void *) maddr_to_virt(dma_pte_addr(*pte))); 4.563 + dma_clear_pte(*pte); 4.564 + iommu_flush_cache_entry(iommu, pte); 4.565 + } 4.566 + tmp += level_size(level); 4.567 + } 4.568 + level++; 4.569 + } 4.570 + /* free pgd */ 4.571 + if (start == 0 && end == ((((u64)1) << addr_width) - 1)) { 4.572 + free_xenheap_page((void *)hd->pgd); 4.573 + hd->pgd = NULL; 4.574 + } 4.575 +} 4.576 + 4.577 +/* iommu handling */ 4.578 +static int iommu_set_root_entry(struct iommu *iommu) 4.579 +{ 4.580 + void *addr; 4.581 + u32 cmd, sts; 4.582 + struct root_entry *root; 4.583 + unsigned long flags; 4.584 + 4.585 + if (iommu == NULL) 4.586 + gdprintk(XENLOG_ERR VTDPREFIX, 4.587 + "iommu_set_root_entry: iommu == NULL\n"); 4.588 + 4.589 + spin_lock_irqsave(&iommu->lock, flags); 4.590 + if (!iommu->root_entry) { 4.591 + spin_unlock_irqrestore(&iommu->lock, flags); 4.592 + root = (struct root_entry *)alloc_xenheap_page(); 4.593 + memset((u8*)root, 0, PAGE_SIZE); 4.594 + iommu_flush_cache_page(iommu, root); 4.595 + spin_lock_irqsave(&iommu->lock, flags); 4.596 + 4.597 + if (!root && !iommu->root_entry) { 4.598 + spin_unlock_irqrestore(&iommu->lock, flags); 4.599 + return -ENOMEM; 4.600 + } 4.601 + 4.602 + if (!iommu->root_entry) 4.603 + iommu->root_entry = root; 4.604 + else /* somebody is fast */ 4.605 + free_xenheap_page((void *)root); 4.606 + } 4.607 + spin_unlock_irqrestore(&iommu->lock, flags); 4.608 + 4.609 + addr = iommu->root_entry; 4.610 + spin_lock_irqsave(&iommu->register_lock, flags); 4.611 + dmar_writeq(iommu->reg, DMAR_RTADDR_REG, virt_to_maddr(addr)); 4.612 + cmd = iommu->gcmd | DMA_GCMD_SRTP; 4.613 + dmar_writel(iommu->reg, DMAR_GCMD_REG, cmd); 4.614 + 4.615 + /* Make sure hardware complete it */ 4.616 + while (1) { 4.617 + sts = dmar_readl(iommu->reg, DMAR_GSTS_REG); 4.618 + if (sts & DMA_GSTS_RTPS) 4.619 + break; 4.620 + cpu_relax(); 4.621 + } 4.622 + spin_unlock_irqrestore(&iommu->register_lock, flags); 4.623 + 4.624 + return 0; 4.625 +} 4.626 + 4.627 +static int iommu_enable_translation(struct iommu *iommu) 4.628 +{ 4.629 + u32 sts; 4.630 + unsigned long flags; 4.631 + 4.632 + dprintk(XENLOG_INFO VTDPREFIX, 4.633 + "iommu_enable_translation: enabling vt-d translation\n"); 4.634 + spin_lock_irqsave(&iommu->register_lock, flags); 4.635 + iommu->gcmd |= DMA_GCMD_TE; 4.636 + dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd); 4.637 + /* Make sure hardware complete it */ 4.638 + while (1) { 4.639 + sts = dmar_readl(iommu->reg, DMAR_GSTS_REG); 4.640 + if (sts & DMA_GSTS_TES) { 4.641 + break; 4.642 + } 4.643 + cpu_relax(); 4.644 + } 4.645 + spin_unlock_irqrestore(&iommu->register_lock, flags); 4.646 + return 0; 4.647 +} 4.648 + 4.649 +int iommu_disable_translation(struct iommu *iommu) 4.650 +{ 4.651 + u32 sts; 4.652 + unsigned long flags; 4.653 + 4.654 + spin_lock_irqsave(&iommu->register_lock, flags); 4.655 + iommu->gcmd &= ~ DMA_GCMD_TE; 4.656 + dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd); 4.657 + 4.658 + /* Make sure hardware complete it */ 4.659 + while(1) { 4.660 + sts = dmar_readl(iommu->reg, DMAR_GSTS_REG); 4.661 + if (!(sts & DMA_GSTS_TES)) 4.662 + break; 4.663 + cpu_relax(); 4.664 + } 4.665 + spin_unlock_irqrestore(&iommu->register_lock, flags); 4.666 + return 0; 4.667 +} 4.668 + 4.669 +static struct iommu *vector_to_iommu[NR_VECTORS]; 4.670 +static int iommu_page_fault_do_one(struct iommu *iommu, int type, 4.671 + u8 fault_reason, u16 source_id, u32 addr) 4.672 +{ 4.673 + dprintk(XENLOG_WARNING VTDPREFIX, 4.674 + "iommu_page_fault:%s: DEVICE %x:%x.%x addr %x REASON %x\n", 4.675 + (type ? "DMA Read" : "DMA Write"), 4.676 + (source_id >> 8), PCI_SLOT(source_id & 0xFF), 4.677 + PCI_FUNC(source_id & 0xFF), addr, fault_reason); 4.678 + 4.679 + print_vtd_entries(current->domain, (source_id >> 8),(source_id & 0xff), 4.680 + (addr >> PAGE_SHIFT)); 4.681 + return 0; 4.682 +} 4.683 + 4.684 +#define PRIMARY_FAULT_REG_LEN (16) 4.685 +static void iommu_page_fault(int vector, void *dev_id, 4.686 + struct cpu_user_regs *regs) 4.687 +{ 4.688 + struct iommu *iommu = dev_id; 4.689 + int reg, fault_index; 4.690 + u32 fault_status; 4.691 + unsigned long flags; 4.692 + 4.693 + dprintk(XENLOG_WARNING VTDPREFIX, 4.694 + "iommu_page_fault: iommu->reg = %p\n", iommu->reg); 4.695 + 4.696 + spin_lock_irqsave(&iommu->register_lock, flags); 4.697 + fault_status = dmar_readl(iommu->reg, DMAR_FSTS_REG); 4.698 + spin_unlock_irqrestore(&iommu->register_lock, flags); 4.699 + 4.700 + /* FIXME: ignore advanced fault log */ 4.701 + if (!(fault_status & DMA_FSTS_PPF)) 4.702 + return; 4.703 + fault_index = dma_fsts_fault_record_index(fault_status); 4.704 + reg = cap_fault_reg_offset(iommu->cap); 4.705 + while (1) { 4.706 + u8 fault_reason; 4.707 + u16 source_id; 4.708 + u32 guest_addr; 4.709 + int type; 4.710 + u32 data; 4.711 + 4.712 + /* highest 32 bits */ 4.713 + spin_lock_irqsave(&iommu->register_lock, flags); 4.714 + data = dmar_readl(iommu->reg, reg + 4.715 + fault_index * PRIMARY_FAULT_REG_LEN + 12); 4.716 + if (!(data & DMA_FRCD_F)) { 4.717 + spin_unlock_irqrestore(&iommu->register_lock, flags); 4.718 + break; 4.719 + } 4.720 + 4.721 + fault_reason = dma_frcd_fault_reason(data); 4.722 + type = dma_frcd_type(data); 4.723 + 4.724 + data = dmar_readl(iommu->reg, reg + 4.725 + fault_index * PRIMARY_FAULT_REG_LEN + 8); 4.726 + source_id = dma_frcd_source_id(data); 4.727 + 4.728 + guest_addr = dmar_readq(iommu->reg, reg + 4.729 + fault_index * PRIMARY_FAULT_REG_LEN); 4.730 + guest_addr = dma_frcd_page_addr(guest_addr); 4.731 + /* clear the fault */ 4.732 + dmar_writel(iommu->reg, reg + 4.733 + fault_index * PRIMARY_FAULT_REG_LEN + 12, DMA_FRCD_F); 4.734 + spin_unlock_irqrestore(&iommu->register_lock, flags); 4.735 + 4.736 + iommu_page_fault_do_one(iommu, type, fault_reason, 4.737 + source_id, guest_addr); 4.738 + 4.739 + fault_index++; 4.740 + if (fault_index > cap_num_fault_regs(iommu->cap)) 4.741 + fault_index = 0; 4.742 + } 4.743 + /* clear primary fault overflow */ 4.744 + if (fault_status & DMA_FSTS_PFO) { 4.745 + spin_lock_irqsave(&iommu->register_lock, flags); 4.746 + dmar_writel(iommu->reg, DMAR_FSTS_REG, DMA_FSTS_PFO); 4.747 + spin_unlock_irqrestore(&iommu->register_lock, flags); 4.748 + } 4.749 + return; 4.750 +} 4.751 + 4.752 +static void dma_msi_unmask(unsigned int vector) 4.753 +{ 4.754 + struct iommu *iommu = vector_to_iommu[vector]; 4.755 + unsigned long flags; 4.756 + 4.757 + /* unmask it */ 4.758 + spin_lock_irqsave(&iommu->register_lock, flags); 4.759 + dmar_writel(iommu->reg, DMAR_FECTL_REG, 0); 4.760 + spin_unlock_irqrestore(&iommu->register_lock, flags); 4.761 +} 4.762 + 4.763 +static void dma_msi_mask(unsigned int vector) 4.764 +{ 4.765 + unsigned long flags; 4.766 + struct iommu *iommu = vector_to_iommu[vector]; 4.767 + 4.768 + /* mask it */ 4.769 + spin_lock_irqsave(&iommu->register_lock, flags); 4.770 + dmar_writel(iommu->reg, DMAR_FECTL_REG, DMA_FECTL_IM); 4.771 + spin_unlock_irqrestore(&iommu->register_lock, flags); 4.772 +} 4.773 + 4.774 +static unsigned int dma_msi_startup(unsigned int vector) 4.775 +{ 4.776 + dma_msi_unmask(vector); 4.777 + return 0; 4.778 +} 4.779 + 4.780 +static void dma_msi_end(unsigned int vector) 4.781 +{ 4.782 + dma_msi_unmask(vector); 4.783 + ack_APIC_irq(); 4.784 +} 4.785 + 4.786 +static void dma_msi_data_init(struct iommu *iommu, int vector) 4.787 +{ 4.788 + u32 msi_data = 0; 4.789 + unsigned long flags; 4.790 + 4.791 + /* Fixed, edge, assert mode. Follow MSI setting */ 4.792 + msi_data |= vector & 0xff; 4.793 + msi_data |= 1 << 14; 4.794 + 4.795 + spin_lock_irqsave(&iommu->register_lock, flags); 4.796 + dmar_writel(iommu->reg, DMAR_FEDATA_REG, msi_data); 4.797 + spin_unlock_irqrestore(&iommu->register_lock, flags); 4.798 +} 4.799 + 4.800 +static void dma_msi_addr_init(struct iommu *iommu, int phy_cpu) 4.801 +{ 4.802 + u64 msi_address; 4.803 + unsigned long flags; 4.804 + 4.805 + /* Physical, dedicated cpu. Follow MSI setting */ 4.806 + msi_address = (MSI_ADDRESS_HEADER << (MSI_ADDRESS_HEADER_SHIFT + 8)); 4.807 + msi_address |= MSI_PHYSICAL_MODE << 2; 4.808 + msi_address |= MSI_REDIRECTION_HINT_MODE << 3; 4.809 + msi_address |= phy_cpu << MSI_TARGET_CPU_SHIFT; 4.810 + 4.811 + spin_lock_irqsave(&iommu->register_lock, flags); 4.812 + dmar_writel(iommu->reg, DMAR_FEADDR_REG, (u32)msi_address); 4.813 + dmar_writel(iommu->reg, DMAR_FEUADDR_REG, (u32)(msi_address >> 32)); 4.814 + spin_unlock_irqrestore(&iommu->register_lock, flags); 4.815 +} 4.816 + 4.817 +static void dma_msi_set_affinity(unsigned int vector, cpumask_t dest) 4.818 +{ 4.819 + struct iommu *iommu = vector_to_iommu[vector]; 4.820 + dma_msi_addr_init(iommu, cpu_physical_id(first_cpu(dest))); 4.821 +} 4.822 + 4.823 +static struct hw_interrupt_type dma_msi_type = { 4.824 + .typename = "DMA_MSI", 4.825 + .startup = dma_msi_startup, 4.826 + .shutdown = dma_msi_mask, 4.827 + .enable = dma_msi_unmask, 4.828 + .disable = dma_msi_mask, 4.829 + .ack = dma_msi_mask, 4.830 + .end = dma_msi_end, 4.831 + .set_affinity = dma_msi_set_affinity, 4.832 +}; 4.833 + 4.834 +int iommu_set_interrupt(struct iommu *iommu) 4.835 +{ 4.836 + int vector, ret; 4.837 + unsigned long flags; 4.838 + 4.839 + vector = assign_irq_vector(AUTO_ASSIGN); 4.840 + vector_to_iommu[vector] = iommu; 4.841 + 4.842 + /* VT-d fault is a MSI, make irq == vector */ 4.843 + irq_vector[vector] = vector; 4.844 + vector_irq[vector] = vector; 4.845 + 4.846 + if (!vector) { 4.847 + gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: no vectors\n"); 4.848 + return -EINVAL; 4.849 + } 4.850 + 4.851 + spin_lock_irqsave(&irq_desc[vector].lock, flags); 4.852 + irq_desc[vector].handler = &dma_msi_type; 4.853 + spin_unlock_irqrestore(&irq_desc[vector].lock, flags); 4.854 + set_intr_gate(vector, interrupt[vector]); 4.855 + ret = request_irq(vector, iommu_page_fault, 0, "dmar", iommu); 4.856 + if (ret) 4.857 + gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: can't request irq\n"); 4.858 + return vector; 4.859 +} 4.860 + 4.861 +struct iommu *iommu_alloc(void *hw_data) 4.862 +{ 4.863 + struct acpi_drhd_unit *drhd = (struct acpi_drhd_unit *) hw_data; 4.864 + struct iommu *iommu; 4.865 + 4.866 + if (nr_iommus > MAX_IOMMUS) { 4.867 + gdprintk(XENLOG_ERR VTDPREFIX, 4.868 + "IOMMU: nr_iommus %d > MAX_IOMMUS\n", nr_iommus); 4.869 + return NULL; 4.870 + } 4.871 + 4.872 + iommu = xmalloc(struct iommu); 4.873 + if (!iommu) 4.874 + return NULL; 4.875 + memset(iommu, 0, sizeof(struct iommu)); 4.876 + 4.877 + set_fixmap_nocache(FIX_IOMMU_REGS_BASE_0 + nr_iommus, drhd->address); 4.878 + iommu->reg = (void *) fix_to_virt(FIX_IOMMU_REGS_BASE_0 + nr_iommus); 4.879 + dprintk(XENLOG_INFO VTDPREFIX, 4.880 + "iommu_alloc: iommu->reg = %p drhd->address = %lx\n", 4.881 + iommu->reg, drhd->address); 4.882 + nr_iommus++; 4.883 + 4.884 + if (!iommu->reg) { 4.885 + printk(KERN_ERR VTDPREFIX "IOMMU: can't mapping the region\n"); 4.886 + goto error; 4.887 + } 4.888 + 4.889 + iommu->cap = dmar_readq(iommu->reg, DMAR_CAP_REG); 4.890 + iommu->ecap = dmar_readq(iommu->reg, DMAR_ECAP_REG); 4.891 + 4.892 + spin_lock_init(&iommu->lock); 4.893 + spin_lock_init(&iommu->register_lock); 4.894 + 4.895 + drhd->iommu = iommu; 4.896 + return iommu; 4.897 +error: 4.898 + xfree(iommu); 4.899 + return NULL; 4.900 +} 4.901 + 4.902 +static void free_iommu(struct iommu *iommu) 4.903 +{ 4.904 + if (!iommu) 4.905 + return; 4.906 + if (iommu->root_entry) 4.907 + free_xenheap_page((void *)iommu->root_entry); 4.908 + if (iommu->reg) 4.909 + iounmap(iommu->reg); 4.910 + free_irq(iommu->vector); 4.911 + xfree(iommu); 4.912 +} 4.913 + 4.914 +#define guestwidth_to_adjustwidth(gaw) ({ \ 4.915 + int agaw; \ 4.916 + int r = (gaw - 12) % 9; \ 4.917 + if (r == 0) \ 4.918 + agaw = gaw; \ 4.919 + else \ 4.920 + agaw = gaw + 9 - r; \ 4.921 + if (agaw > 64) \ 4.922 + agaw = 64; \ 4.923 + agaw; }) 4.924 +int iommu_domain_init(struct domain *domain) 4.925 +{ 4.926 + struct hvm_iommu *hd = domain_hvm_iommu(domain); 4.927 + struct iommu *iommu = NULL; 4.928 + int guest_width = DEFAULT_DOMAIN_ADDRESS_WIDTH; 4.929 + int adjust_width, agaw; 4.930 + unsigned long sagaw; 4.931 + struct acpi_drhd_unit *drhd; 4.932 + 4.933 + if (list_empty(&acpi_drhd_units)) 4.934 + return 0; 4.935 + spin_lock_init(&hd->mapping_lock); 4.936 + spin_lock_init(&hd->iommu_list_lock); 4.937 + INIT_LIST_HEAD(&hd->pdev_list); 4.938 + 4.939 + for_each_drhd_unit(drhd) { 4.940 + if (drhd->iommu) 4.941 + iommu = drhd->iommu; 4.942 + else 4.943 + iommu = iommu_alloc(drhd); 4.944 + } 4.945 + 4.946 + /* calculate AGAW */ 4.947 + if (guest_width > cap_mgaw(iommu->cap)) 4.948 + guest_width = cap_mgaw(iommu->cap); 4.949 + adjust_width = guestwidth_to_adjustwidth(guest_width); 4.950 + agaw = width_to_agaw(adjust_width); 4.951 + /* FIXME: hardware doesn't support it, choose a bigger one? */ 4.952 + sagaw = cap_sagaw(iommu->cap); 4.953 + if (!test_bit(agaw, &sagaw)) { 4.954 + gdprintk(XENLOG_ERR VTDPREFIX, 4.955 + "IOMMU: hardware doesn't support the agaw\n"); 4.956 + agaw = find_next_bit(&sagaw, 5, agaw); 4.957 + if (agaw >= 5) 4.958 + return -ENODEV; 4.959 + } 4.960 + hd->agaw = agaw; 4.961 + return 0; 4.962 +} 4.963 + 4.964 +static int domain_context_mapping_one( 4.965 + struct domain *domain, 4.966 + struct iommu *iommu, 4.967 + u8 bus, u8 devfn) 4.968 +{ 4.969 + struct hvm_iommu *hd = domain_hvm_iommu(domain); 4.970 + struct context_entry *context; 4.971 + unsigned long flags; 4.972 + int ret = 0; 4.973 + 4.974 + context = device_to_context_entry(iommu, bus, devfn); 4.975 + if (!context) { 4.976 + gdprintk(XENLOG_INFO VTDPREFIX, 4.977 + "domain_context_mapping_one:context == NULL:bdf = %x:%x:%x \n", 4.978 + bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); 4.979 + return -ENOMEM; 4.980 + } 4.981 + spin_lock_irqsave(&iommu->lock, flags); 4.982 + if (context_present(*context)) { 4.983 + spin_unlock_irqrestore(&iommu->lock, flags); 4.984 + gdprintk(XENLOG_INFO VTDPREFIX, 4.985 + "domain_context_mapping_one:context present:bdf=%x:%x:%x\n", 4.986 + bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); 4.987 + return 0; 4.988 + } 4.989 + 4.990 +#ifdef VTD_DEBUG 4.991 + dprintk(XENLOG_INFO VTDPREFIX, 4.992 + "context_mapping_one_1-%x:%x:%x-*context = %lx %lx\n", 4.993 + bus, PCI_SLOT(devfn), PCI_FUNC(devfn), context->hi, context->lo); 4.994 +#endif 4.995 + 4.996 + /* 4.997 + * domain_id 0 is not valid on Intel's IOMMU, force domain_id to 4.998 + * be 1 based as required by intel's iommu hw. 4.999 + */ 4.1000 + context_set_domain_id(*context, domain->domain_id); 4.1001 + context_set_address_width(*context, hd->agaw); 4.1002 + 4.1003 + if (ecap_pass_thru(iommu->ecap)) 4.1004 + context_set_translation_type(*context, CONTEXT_TT_PASS_THRU); 4.1005 + else { 4.1006 + context_set_address_root(*context, virt_to_maddr(hd->pgd)); 4.1007 + context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL); 4.1008 + } 4.1009 + 4.1010 + context_set_fault_enable(*context); 4.1011 + context_set_present(*context); 4.1012 + iommu_flush_cache_entry(iommu, context); 4.1013 + 4.1014 +#ifdef VTD_DEBUG 4.1015 + dprintk(XENLOG_INFO VTDPREFIX, 4.1016 + "context_mapping_one_2-%x:%x:%x-*context=%lx %lx hd->pgd = %p\n", 4.1017 + bus, PCI_SLOT(devfn), PCI_FUNC(devfn), 4.1018 + context->hi, context->lo, hd->pgd); 4.1019 +#endif 4.1020 + 4.1021 + if (iommu_flush_context_device(iommu, domain->domain_id, 4.1022 + (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT, 1)) 4.1023 + iommu_flush_write_buffer(iommu); 4.1024 + else 4.1025 + iommu_flush_iotlb_dsi(iommu, domain->domain_id, 0); 4.1026 + spin_unlock_irqrestore(&iommu->lock, flags); 4.1027 + return ret; 4.1028 +} 4.1029 + 4.1030 +static int __pci_find_next_cap(u8 bus, unsigned int devfn, u8 pos, int cap) 4.1031 +{ 4.1032 + u8 id; 4.1033 + int ttl = 48; 4.1034 + 4.1035 + while (ttl--) { 4.1036 + pos = read_pci_config_byte(bus, PCI_SLOT(devfn), PCI_FUNC(devfn), pos); 4.1037 + if (pos < 0x40) 4.1038 + break; 4.1039 + pos &= ~3; 4.1040 + id = read_pci_config_byte(bus, PCI_SLOT(devfn), PCI_FUNC(devfn), 4.1041 + pos + PCI_CAP_LIST_ID); 4.1042 + 4.1043 + if (id == 0xff) 4.1044 + break; 4.1045 + if (id == cap) 4.1046 + return pos; 4.1047 + pos += PCI_CAP_LIST_NEXT; 4.1048 + } 4.1049 + return 0; 4.1050 +} 4.1051 + 4.1052 +#define PCI_BASE_CLASS_BRIDGE 0x06 4.1053 +#define PCI_CLASS_BRIDGE_PCI 0x0604 4.1054 + 4.1055 +#define DEV_TYPE_PCIe_ENDPOINT 1 4.1056 +#define DEV_TYPE_PCI_BRIDGE 2 4.1057 +#define DEV_TYPE_PCI 3 4.1058 + 4.1059 +int pdev_type(struct pci_dev *dev) 4.1060 +{ 4.1061 + u16 class_device; 4.1062 + u16 status; 4.1063 + 4.1064 + class_device = read_pci_config_16(dev->bus, PCI_SLOT(dev->devfn), 4.1065 + PCI_FUNC(dev->devfn), PCI_CLASS_DEVICE); 4.1066 + if (class_device == PCI_CLASS_BRIDGE_PCI) 4.1067 + return DEV_TYPE_PCI_BRIDGE; 4.1068 + 4.1069 + status = read_pci_config_16(dev->bus, PCI_SLOT(dev->devfn), 4.1070 + PCI_FUNC(dev->devfn), PCI_STATUS); 4.1071 + 4.1072 + if (!(status & PCI_STATUS_CAP_LIST)) 4.1073 + return DEV_TYPE_PCI; 4.1074 + 4.1075 + if (__pci_find_next_cap(dev->bus, dev->devfn, PCI_CAPABILITY_LIST, PCI_CAP_ID_EXP)) 4.1076 + return DEV_TYPE_PCIe_ENDPOINT; 4.1077 + 4.1078 + return DEV_TYPE_PCI; 4.1079 +} 4.1080 + 4.1081 +#define MAX_BUSES 256 4.1082 +struct pci_dev bus2bridge[MAX_BUSES]; 4.1083 + 4.1084 +static int domain_context_mapping( 4.1085 + struct domain *domain, 4.1086 + struct iommu *iommu, 4.1087 + struct pci_dev *pdev) 4.1088 +{ 4.1089 + int ret = 0; 4.1090 + int dev, func, sec_bus, sub_bus; 4.1091 + u32 type; 4.1092 + 4.1093 + type = pdev_type(pdev); 4.1094 + if (type == DEV_TYPE_PCI_BRIDGE) { 4.1095 + sec_bus = read_pci_config_byte(pdev->bus, PCI_SLOT(pdev->devfn), 4.1096 + PCI_FUNC(pdev->devfn), PCI_SECONDARY_BUS); 4.1097 + 4.1098 + if (bus2bridge[sec_bus].bus == 0) { 4.1099 + bus2bridge[sec_bus].bus = pdev->bus; 4.1100 + bus2bridge[sec_bus].devfn = pdev->devfn; 4.1101 + } 4.1102 + 4.1103 + sub_bus = read_pci_config_byte(pdev->bus, PCI_SLOT(pdev->devfn), 4.1104 + PCI_FUNC(pdev->devfn), PCI_SUBORDINATE_BUS); 4.1105 + 4.1106 + if (sec_bus != sub_bus) { 4.1107 + dprintk(XENLOG_INFO VTDPREFIX, 4.1108 + "context_mapping: nested PCI bridge not supported\n"); 4.1109 + dprintk(XENLOG_INFO VTDPREFIX, 4.1110 + " bdf = %x:%x:%x sec_bus = %x sub_bus = %x\n", 4.1111 + pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn), 4.1112 + sec_bus, sub_bus); 4.1113 + } 4.1114 + } 4.1115 + 4.1116 + if (type == DEV_TYPE_PCIe_ENDPOINT) { 4.1117 + gdprintk(XENLOG_INFO VTDPREFIX, 4.1118 + "domain_context_mapping:PCIe : bdf = %x:%x:%x\n", 4.1119 + pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); 4.1120 + ret = domain_context_mapping_one(domain, iommu, 4.1121 + (u8)(pdev->bus), (u8) (pdev->devfn)); 4.1122 + } 4.1123 + 4.1124 + /* PCI devices */ 4.1125 + if (type == DEV_TYPE_PCI) { 4.1126 + gdprintk(XENLOG_INFO VTDPREFIX, 4.1127 + "domain_context_mapping:PCI: bdf = %x:%x:%x\n", 4.1128 + pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); 4.1129 + 4.1130 + if (pdev->bus == 0) 4.1131 + ret = domain_context_mapping_one(domain, iommu, 4.1132 + (u8)(pdev->bus), (u8) (pdev->devfn)); 4.1133 + else { 4.1134 + if (bus2bridge[pdev->bus].bus != 0) 4.1135 + gdprintk(XENLOG_ERR VTDPREFIX, 4.1136 + "domain_context_mapping:bus2bridge[pdev->bus].bus==0\n"); 4.1137 + 4.1138 + ret = domain_context_mapping_one(domain, iommu, 4.1139 + (u8)(bus2bridge[pdev->bus].bus), 4.1140 + (u8)(bus2bridge[pdev->bus].devfn)); 4.1141 + 4.1142 + /* now map everything behind the PCI bridge */ 4.1143 + for (dev = 0; dev < 32; dev++) { 4.1144 + for (func = 0; func < 8; func++) { 4.1145 + ret = domain_context_mapping_one(domain, iommu, 4.1146 + pdev->bus, (u8)PCI_DEVFN(dev, func)); 4.1147 + if (ret) 4.1148 + return ret; 4.1149 + } 4.1150 + } 4.1151 + } 4.1152 + } 4.1153 + return ret; 4.1154 +} 4.1155 + 4.1156 +static int domain_context_unmap_one( 4.1157 + struct domain *domain, 4.1158 + struct iommu *iommu, 4.1159 + u8 bus, u8 devfn) 4.1160 +{ 4.1161 + struct context_entry *context; 4.1162 + unsigned long flags; 4.1163 + 4.1164 + context = device_to_context_entry(iommu, bus, devfn); 4.1165 + if (!context) { 4.1166 + gdprintk(XENLOG_INFO VTDPREFIX, 4.1167 + "domain_context_unmap_one-%x:%x:%x- context == NULL:return\n", 4.1168 + bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); 4.1169 + return -ENOMEM; 4.1170 + } 4.1171 + spin_lock_irqsave(&iommu->lock, flags); 4.1172 + if (!context_present(*context)) { 4.1173 + spin_unlock_irqrestore(&iommu->lock, flags); 4.1174 + gdprintk(XENLOG_INFO VTDPREFIX, 4.1175 + "domain_context_unmap_one-%x:%x:%x- context NOT present:return\n", 4.1176 + bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); 4.1177 + return 0; 4.1178 + } 4.1179 + gdprintk(XENLOG_INFO VTDPREFIX, 4.1180 + "domain_context_unmap_one_1:bdf = %x:%x:%x\n", 4.1181 + bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); 4.1182 + 4.1183 + context_clear_present(*context); 4.1184 + context_clear_entry(*context); 4.1185 + iommu_flush_cache_entry(iommu, context); 4.1186 + iommu_flush_context_global(iommu, 0); 4.1187 + iommu_flush_iotlb_global(iommu, 0); 4.1188 + spin_unlock_irqrestore(&iommu->lock, flags); 4.1189 + 4.1190 + gdprintk(XENLOG_INFO VTDPREFIX, 4.1191 + "domain_context_unmap_one_2:bdf = %x:%x:%x\n", 4.1192 + bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); 4.1193 + 4.1194 + return 0; 4.1195 +} 4.1196 + 4.1197 +static int domain_context_unmap( 4.1198 + struct domain *domain, 4.1199 + struct iommu *iommu, 4.1200 + struct pci_dev *pdev) 4.1201 +{ 4.1202 + int ret = 0; 4.1203 + int dev, func, sec_bus, sub_bus; 4.1204 + u32 type; 4.1205 + 4.1206 + type = pdev_type(pdev); 4.1207 + if (type == DEV_TYPE_PCI_BRIDGE) { 4.1208 + sec_bus = read_pci_config_byte(pdev->bus, PCI_SLOT(pdev->devfn), 4.1209 + PCI_FUNC(pdev->devfn), PCI_SECONDARY_BUS); 4.1210 + sub_bus = read_pci_config_byte(pdev->bus, PCI_SLOT(pdev->devfn), 4.1211 + PCI_FUNC(pdev->devfn), PCI_SUBORDINATE_BUS); 4.1212 + 4.1213 + gdprintk(XENLOG_INFO VTDPREFIX, 4.1214 + "domain_context_unmap:BRIDGE:%x:%x:%x sec_bus=%x sub_bus=%x\n", 4.1215 + pdev->bus, PCI_SLOT(pdev->devfn), 4.1216 + PCI_FUNC(pdev->devfn), sec_bus, sub_bus); 4.1217 + } 4.1218 + 4.1219 + if (type == DEV_TYPE_PCIe_ENDPOINT) { 4.1220 + gdprintk(XENLOG_INFO VTDPREFIX, 4.1221 + "domain_context_unmap:PCIe : bdf = %x:%x:%x\n", 4.1222 + pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); 4.1223 + ret = domain_context_unmap_one(domain, iommu, 4.1224 + (u8)(pdev->bus), (u8) (pdev->devfn)); 4.1225 + } 4.1226 + 4.1227 + /* PCI devices */ 4.1228 + if (type == DEV_TYPE_PCI) { 4.1229 + gdprintk(XENLOG_INFO VTDPREFIX, 4.1230 + "domain_context_unmap:PCI: bdf = %x:%x:%x\n", 4.1231 + pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); 4.1232 + if (pdev->bus == 0) 4.1233 + ret = domain_context_unmap_one(domain, iommu, 4.1234 + (u8)(pdev->bus), (u8) (pdev->devfn)); 4.1235 + else { 4.1236 + if (bus2bridge[pdev->bus].bus != 0) 4.1237 + gdprintk(XENLOG_INFO VTDPREFIX, 4.1238 + "domain_context_mapping:bus2bridge[pdev->bus].bus==0\n"); 4.1239 + 4.1240 + ret = domain_context_unmap_one(domain, iommu, 4.1241 + (u8)(bus2bridge[pdev->bus].bus), 4.1242 + (u8)(bus2bridge[pdev->bus].devfn)); 4.1243 + 4.1244 + /* now map everything behind the PCI bridge */ 4.1245 + for (dev = 0; dev < 32; dev++) { 4.1246 + for (func = 0; func < 8; func++) { 4.1247 + ret = domain_context_unmap_one(domain, iommu, 4.1248 + pdev->bus, (u8)PCI_DEVFN(dev, func)); 4.1249 + if (ret) 4.1250 + return ret; 4.1251 + } 4.1252 + } 4.1253 + } 4.1254 + } 4.1255 + return ret; 4.1256 +} 4.1257 + 4.1258 +void reassign_device_ownership( 4.1259 + struct domain *source, 4.1260 + struct domain *target, 4.1261 + u8 bus, u8 devfn) 4.1262 +{ 4.1263 + struct hvm_iommu *source_hd = domain_hvm_iommu(source); 4.1264 + struct hvm_iommu *target_hd = domain_hvm_iommu(target); 4.1265 + struct pci_dev *pdev; 4.1266 + struct acpi_drhd_unit *drhd; 4.1267 + struct iommu *iommu; 4.1268 + int status; 4.1269 + unsigned long flags; 4.1270 + 4.1271 + gdprintk(XENLOG_ERR VTDPREFIX, 4.1272 + "reassign_device-%x:%x:%x- source = %d target = %d\n", 4.1273 + bus, PCI_SLOT(devfn), PCI_FUNC(devfn), 4.1274 + source->domain_id, target->domain_id); 4.1275 + 4.1276 + for_each_pdev(source, pdev) { 4.1277 + if ( (pdev->bus != bus) || (pdev->devfn != devfn) ) 4.1278 + continue; 4.1279 + 4.1280 + pdev->bus = bus; 4.1281 + pdev->devfn = devfn; 4.1282 + drhd = acpi_find_matched_drhd_unit(pdev); 4.1283 + iommu = drhd->iommu; 4.1284 + domain_context_unmap(source, iommu, pdev); 4.1285 + 4.1286 + /* 4.1287 + * move pci device from the source domain to target domain. 4.1288 + */ 4.1289 + spin_lock_irqsave(&source_hd->iommu_list_lock, flags); 4.1290 + spin_lock_irqsave(&target_hd->iommu_list_lock, flags); 4.1291 + list_move(&pdev->list, &target_hd->pdev_list); 4.1292 + spin_unlock_irqrestore(&target_hd->iommu_list_lock, flags); 4.1293 + spin_unlock_irqrestore(&source_hd->iommu_list_lock, flags); 4.1294 + 4.1295 + status = domain_context_mapping(target, iommu, pdev); 4.1296 + if (status != 0) 4.1297 + gdprintk(XENLOG_ERR VTDPREFIX, "domain_context_mapping failed\n"); 4.1298 + 4.1299 + /* 4.1300 + * We are done. 4.1301 + */ 4.1302 + break; 4.1303 + } 4.1304 +} 4.1305 + 4.1306 +void return_devices_to_dom0(struct domain *d) 4.1307 +{ 4.1308 + struct hvm_iommu *hd = domain_hvm_iommu(d); 4.1309 + struct pci_dev *pdev; 4.1310 + 4.1311 + while (!list_empty(&hd->pdev_list)) { 4.1312 + pdev = list_entry(hd->pdev_list.next, typeof(*pdev), list); 4.1313 + dprintk(XENLOG_INFO VTDPREFIX, 4.1314 + "return_devices_to_dom0: bdf = %x:%x:%x\n", 4.1315 + pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); 4.1316 + reassign_device_ownership(d, dom0, pdev->bus, pdev->devfn); 4.1317 + } 4.1318 + 4.1319 +#ifdef VTD_DEBUG 4.1320 + for_each_pdev(dom0, pdev) { 4.1321 + dprintk(XENLOG_INFO VTDPREFIX, 4.1322 + "return_devices_to_dom0:%x: bdf = %x:%x:%x\n", 4.1323 + dom0->domain_id, pdev->bus, 4.1324 + PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); 4.1325 + } 4.1326 +#endif 4.1327 +} 4.1328 + 4.1329 +void iommu_domain_teardown(struct domain *d) 4.1330 +{ 4.1331 + if (list_empty(&acpi_drhd_units)) 4.1332 + return; 4.1333 + 4.1334 +#if CONFIG_PAGING_LEVELS == 3 4.1335 + { 4.1336 + struct hvm_iommu *hd = domain_hvm_iommu(d); 4.1337 + int level = agaw_to_level(hd->agaw); 4.1338 + struct dma_pte *pgd = NULL; 4.1339 + 4.1340 + switch (level) 4.1341 + { 4.1342 + case VTD_PAGE_TABLE_LEVEL_3: 4.1343 + if ( hd->pgd ) 4.1344 + free_xenheap_page((void *)hd->pgd); 4.1345 + break; 4.1346 + case VTD_PAGE_TABLE_LEVEL_4: 4.1347 + if ( hd->pgd ) 4.1348 + { 4.1349 + pgd = hd->pgd; 4.1350 + if ( pgd[0].val != 0 ) 4.1351 + free_xenheap_page((void*)maddr_to_virt( 4.1352 + dma_pte_addr(pgd[0]))); 4.1353 + } 4.1354 + break; 4.1355 + default: 4.1356 + gdprintk(XENLOG_ERR VTDPREFIX, 4.1357 + "Unsupported p2m table sharing level!\n"); 4.1358 + break; 4.1359 + } 4.1360 + } 4.1361 +#endif 4.1362 + return_devices_to_dom0(d); 4.1363 +} 4.1364 + 4.1365 +static int domain_context_mapped(struct domain *domain, struct pci_dev *pdev) 4.1366 +{ 4.1367 + struct acpi_drhd_unit *drhd; 4.1368 + struct iommu *iommu; 4.1369 + int ret; 4.1370 + 4.1371 + for_each_drhd_unit(drhd) { 4.1372 + iommu = drhd->iommu; 4.1373 + ret = device_context_mapped(iommu, pdev->bus, pdev->devfn); 4.1374 + if (ret) 4.1375 + return ret; 4.1376 + } 4.1377 + return 0; 4.1378 +} 4.1379 + 4.1380 +int iommu_map_page(struct domain *d, paddr_t gfn, paddr_t mfn) 4.1381 +{ 4.1382 + struct acpi_drhd_unit *drhd; 4.1383 + struct iommu *iommu; 4.1384 + struct dma_pte *pte = NULL; 4.1385 + 4.1386 + drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list); 4.1387 + iommu = drhd->iommu; 4.1388 + 4.1389 + /* do nothing if dom0 and iommu supports pass thru */ 4.1390 + if (ecap_pass_thru(iommu->ecap) && (d->domain_id == 0)) 4.1391 + return 0; 4.1392 + 4.1393 + pte = addr_to_dma_pte(d, gfn << PAGE_SHIFT_4K); 4.1394 + if (!pte) 4.1395 + return -ENOMEM; 4.1396 + dma_set_pte_addr(*pte, mfn << PAGE_SHIFT_4K); 4.1397 + dma_set_pte_prot(*pte, DMA_PTE_READ | DMA_PTE_WRITE); 4.1398 + iommu_flush_cache_entry(iommu, pte); 4.1399 + 4.1400 + for_each_drhd_unit(drhd) { 4.1401 + iommu = drhd->iommu; 4.1402 + if (cap_caching_mode(iommu->cap)) 4.1403 + iommu_flush_iotlb_psi(iommu, d->domain_id, 4.1404 + gfn << PAGE_SHIFT_4K, 1, 0); 4.1405 + else if (cap_rwbf(iommu->cap)) 4.1406 + iommu_flush_write_buffer(iommu); 4.1407 + } 4.1408 + return 0; 4.1409 +} 4.1410 + 4.1411 +int iommu_unmap_page(struct domain *d, dma_addr_t gfn) 4.1412 +{ 4.1413 + struct acpi_drhd_unit *drhd; 4.1414 + struct iommu *iommu; 4.1415 + struct dma_pte *pte = NULL; 4.1416 + 4.1417 + drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list); 4.1418 + iommu = drhd->iommu; 4.1419 + 4.1420 + /* do nothing if dom0 and iommu supports pass thru */ 4.1421 + if (ecap_pass_thru(iommu->ecap) && (d->domain_id == 0)) 4.1422 + return 0; 4.1423 + 4.1424 + /* get last level pte */ 4.1425 + pte = dma_addr_level_pte(d, gfn << PAGE_SHIFT_4K, 1); 4.1426 + dma_pte_clear_one(d, gfn << PAGE_SHIFT_4K); 4.1427 + 4.1428 + return 0; 4.1429 +} 4.1430 + 4.1431 +int iommu_page_mapping(struct domain *domain, dma_addr_t iova, 4.1432 + void *hpa, size_t size, int prot) 4.1433 +{ 4.1434 + struct acpi_drhd_unit *drhd; 4.1435 + struct iommu *iommu; 4.1436 + unsigned long start_pfn, end_pfn; 4.1437 + struct dma_pte *pte = NULL; 4.1438 + int index; 4.1439 + 4.1440 + drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list); 4.1441 + iommu = drhd->iommu; 4.1442 + if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0) 4.1443 + return -EINVAL; 4.1444 + iova = (iova >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K; 4.1445 + start_pfn = (unsigned long)(((unsigned long) hpa) >> PAGE_SHIFT_4K); 4.1446 + end_pfn = (unsigned long) 4.1447 + ((PAGE_ALIGN_4K(((unsigned long)hpa) + size)) >> PAGE_SHIFT_4K); 4.1448 + index = 0; 4.1449 + while (start_pfn < end_pfn) { 4.1450 + pte = addr_to_dma_pte(domain, iova + PAGE_SIZE_4K * index); 4.1451 + if (!pte) 4.1452 + return -ENOMEM; 4.1453 + dma_set_pte_addr(*pte, start_pfn << PAGE_SHIFT_4K); 4.1454 + dma_set_pte_prot(*pte, prot); 4.1455 + iommu_flush_cache_entry(iommu, pte); 4.1456 + start_pfn++; 4.1457 + index++; 4.1458 + } 4.1459 + 4.1460 + for_each_drhd_unit(drhd) { 4.1461 + iommu = drhd->iommu; 4.1462 + if (cap_caching_mode(iommu->cap)) 4.1463 + iommu_flush_iotlb_psi(iommu, domain->domain_id, iova, size, 0); 4.1464 + else if (cap_rwbf(iommu->cap)) 4.1465 + iommu_flush_write_buffer(iommu); 4.1466 + } 4.1467 + return 0; 4.1468 +} 4.1469 + 4.1470 +int iommu_page_unmapping(struct domain *domain, dma_addr_t addr, size_t size) 4.1471 +{ 4.1472 + struct dma_pte *pte = NULL; 4.1473 + 4.1474 + /* get last level pte */ 4.1475 + pte = dma_addr_level_pte(domain, addr, 1); 4.1476 + dma_pte_clear_range(domain, addr, addr + size); 4.1477 + 4.1478 + return 0; 4.1479 +} 4.1480 + 4.1481 +void iommu_flush(struct domain *d, dma_addr_t gfn, u64 *p2m_entry) 4.1482 +{ 4.1483 + struct acpi_drhd_unit *drhd; 4.1484 + struct iommu *iommu = NULL; 4.1485 + struct dma_pte *pte = (struct dma_pte *) p2m_entry; 4.1486 + 4.1487 + for_each_drhd_unit(drhd) { 4.1488 + iommu = drhd->iommu; 4.1489 + if (cap_caching_mode(iommu->cap)) 4.1490 + iommu_flush_iotlb_psi(iommu, d->domain_id, 4.1491 + gfn << PAGE_SHIFT_4K, 1, 0); 4.1492 + else if (cap_rwbf(iommu->cap)) 4.1493 + iommu_flush_write_buffer(iommu); 4.1494 + } 4.1495 + iommu_flush_cache_entry(iommu, pte); 4.1496 +} 4.1497 + 4.1498 +int 4.1499 +prepare_device(struct domain *domain, struct pci_dev dev) 4.1500 +{ 4.1501 + return 0; 4.1502 +} 4.1503 + 4.1504 +static int iommu_prepare_rmrr_dev( 4.1505 + struct domain *d, 4.1506 + struct acpi_rmrr_unit *rmrr, 4.1507 + struct pci_dev *pdev) 4.1508 +{ 4.1509 + struct acpi_drhd_unit *drhd; 4.1510 + unsigned long size; 4.1511 + int ret; 4.1512 + 4.1513 + /* page table init */ 4.1514 + size = rmrr->end_address - rmrr->base_address + 1; 4.1515 + ret = iommu_page_mapping(d, rmrr->base_address, 4.1516 + (void *)rmrr->base_address, size, 4.1517 + DMA_PTE_READ|DMA_PTE_WRITE); 4.1518 + if (ret) 4.1519 + return ret; 4.1520 + 4.1521 + if (domain_context_mapped(d, pdev) == 0) { 4.1522 + drhd = acpi_find_matched_drhd_unit(pdev); 4.1523 + ret = domain_context_mapping(d, drhd->iommu, pdev); 4.1524 + if (!ret) 4.1525 + return 0; 4.1526 + } 4.1527 + return ret; 4.1528 +} 4.1529 + 4.1530 +void __init setup_dom0_devices(void) 4.1531 +{ 4.1532 + struct hvm_iommu *hd = domain_hvm_iommu(dom0); 4.1533 + struct acpi_drhd_unit *drhd; 4.1534 + struct pci_dev *pdev; 4.1535 + int bus, dev, func; 4.1536 + u32 l; 4.1537 + u8 hdr_type; 4.1538 + int ret; 4.1539 + 4.1540 +#ifdef DEBUG_VTD_CONTEXT_ENTRY 4.1541 + for (bus = 0; bus < 256; bus++) { 4.1542 + for (dev = 0; dev < 32; dev++) { 4.1543 + for (func = 0; func < 8; func++) { 4.1544 + struct context_entry *context; 4.1545 + struct pci_dev device; 4.1546 + 4.1547 + device.bus = bus; 4.1548 + device.devfn = PCI_DEVFN(dev, func); 4.1549 + drhd = acpi_find_matched_drhd_unit(&device); 4.1550 + context = device_to_context_entry(drhd->iommu, 4.1551 + bus, PCI_DEVFN(dev, func)); 4.1552 + if ((context->lo != 0) || (context->hi != 0)) 4.1553 + dprintk(XENLOG_INFO VTDPREFIX, 4.1554 + "setup_dom0_devices-%x:%x:%x- context not 0\n", 4.1555 + bus, dev, func); 4.1556 + } 4.1557 + } 4.1558 + } 4.1559 +#endif 4.1560 + 4.1561 + for (bus = 0; bus < 256; bus++) { 4.1562 + for (dev = 0; dev < 32; dev++) { 4.1563 + for (func = 0; func < 8; func++) { 4.1564 + l = read_pci_config(bus, dev, func, PCI_VENDOR_ID); 4.1565 + /* some broken boards return 0 or ~0 if a slot is empty: */ 4.1566 + if (l == 0xffffffff || l == 0x00000000 || 4.1567 + l == 0x0000ffff || l == 0xffff0000) 4.1568 + continue; 4.1569 + pdev = xmalloc(struct pci_dev); 4.1570 + pdev->bus = bus; 4.1571 + pdev->devfn = PCI_DEVFN(dev, func); 4.1572 + list_add_tail(&pdev->list, &hd->pdev_list); 4.1573 + 4.1574 + drhd = acpi_find_matched_drhd_unit(pdev); 4.1575 + ret = domain_context_mapping(dom0, drhd->iommu, pdev); 4.1576 + if (ret != 0) 4.1577 + gdprintk(XENLOG_ERR VTDPREFIX, 4.1578 + "domain_context_mapping failed\n"); 4.1579 + 4.1580 + hdr_type = read_pci_config(bus, dev, func, PCI_HEADER_TYPE); 4.1581 + // if ((hdr_type & 0x8) == 0) 4.1582 + // break; 4.1583 + } 4.1584 + } 4.1585 + } 4.1586 + for_each_pdev(dom0, pdev) { 4.1587 + dprintk(XENLOG_INFO VTDPREFIX, 4.1588 + "setup_dom0_devices: bdf = %x:%x:%x\n", 4.1589 + pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); 4.1590 + } 4.1591 +} 4.1592 + 4.1593 +void clear_fault_bit(struct iommu *iommu) 4.1594 +{ 4.1595 + u64 val; 4.1596 + 4.1597 + val = dmar_readq( 4.1598 + iommu->reg, 4.1599 + cap_fault_reg_offset(dmar_readq(iommu->reg,DMAR_CAP_REG))+0x8); 4.1600 + dmar_writeq( 4.1601 + iommu->reg, 4.1602 + cap_fault_reg_offset(dmar_readq(iommu->reg,DMAR_CAP_REG))+8, 4.1603 + val); 4.1604 + dmar_writel(iommu->reg, DMAR_FSTS_REG, DMA_FSTS_PFO); 4.1605 +} 4.1606 + 4.1607 +/* 4.1608 + * Called from ACPI discovery code, once all DMAR's and RMRR's are done 4.1609 + * scanning, we need to run through and initialize as much of it as necessary 4.1610 + */ 4.1611 +int vtd_enable = 1; 4.1612 +static void setup_vtd_enable(char *s) 4.1613 +{ 4.1614 + if ( !strcmp(s, "0") ) 4.1615 + vtd_enable = 0; 4.1616 + else if ( !strcmp(s, "1") ) 4.1617 + vtd_enable = 1; 4.1618 + else 4.1619 + dprintk(XENLOG_INFO VTDPREFIX, 4.1620 + "Unknown vtd_enable value specified: '%s'\n", s); 4.1621 + dprintk(XENLOG_INFO VTDPREFIX, "vtd_enable = %x\n", vtd_enable); 4.1622 +} 4.1623 +custom_param("vtd", setup_vtd_enable); 4.1624 + 4.1625 +static int init_vtd_hw(void) 4.1626 +{ 4.1627 + struct acpi_drhd_unit *drhd; 4.1628 + struct iommu *iommu; 4.1629 + int ret; 4.1630 + 4.1631 + for_each_drhd_unit(drhd) { 4.1632 + iommu = drhd->iommu; 4.1633 + ret = iommu_set_root_entry(iommu); 4.1634 + if (ret) { 4.1635 + gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: set root entry failed\n"); 4.1636 + return -EIO; 4.1637 + } 4.1638 + } 4.1639 + return 0; 4.1640 +} 4.1641 + 4.1642 +static int enable_vtd_translation(void) 4.1643 +{ 4.1644 + struct acpi_drhd_unit *drhd; 4.1645 + struct iommu *iommu; 4.1646 + int vector = 0; 4.1647 + 4.1648 + for_each_drhd_unit(drhd) { 4.1649 + iommu = drhd->iommu; 4.1650 + vector = iommu_set_interrupt(iommu); 4.1651 + dma_msi_data_init(iommu, vector); 4.1652 + dma_msi_addr_init(iommu, cpu_physical_id(first_cpu(cpu_online_map))); 4.1653 + iommu->vector = vector; 4.1654 + clear_fault_bit(iommu); 4.1655 + if (vtd_enable && iommu_enable_translation(iommu)) 4.1656 + return -EIO; 4.1657 + } 4.1658 + return 0; 4.1659 +} 4.1660 + 4.1661 +static void setup_dom0_rmrr(void) 4.1662 +{ 4.1663 + struct acpi_rmrr_unit *rmrr; 4.1664 + struct pci_dev *pdev; 4.1665 + int ret; 4.1666 + 4.1667 + for_each_rmrr_device(rmrr, pdev) 4.1668 + ret = iommu_prepare_rmrr_dev(dom0, rmrr, pdev); 4.1669 + if (ret) 4.1670 + gdprintk(XENLOG_ERR VTDPREFIX, 4.1671 + "IOMMU: mapping reserved region failed\n"); 4.1672 + end_for_each_rmrr_device(rmrr, pdev) 4.1673 +} 4.1674 + 4.1675 +int iommu_setup(void) 4.1676 +{ 4.1677 + struct hvm_iommu *hd = domain_hvm_iommu(dom0); 4.1678 + struct acpi_drhd_unit *drhd; 4.1679 + struct iommu *iommu; 4.1680 + 4.1681 + if (list_empty(&acpi_drhd_units)) 4.1682 + return 0; 4.1683 + 4.1684 + INIT_LIST_HEAD(&hd->pdev_list); 4.1685 + 4.1686 + /* start from scratch */ 4.1687 + flush_all(); 4.1688 + 4.1689 + /* setup clflush size */ 4.1690 + x86_clflush_size = ((cpuid_ebx(1) >> 8) & 0xff) * 8; 4.1691 + 4.1692 + /* 4.1693 + * allocate IO page directory page for the domain. 4.1694 + */ 4.1695 + drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list); 4.1696 + iommu = drhd->iommu; 4.1697 + 4.1698 + hd->pgd = (struct dma_pte *)alloc_xenheap_page(); 4.1699 + memset((u8*)hd->pgd, 0, PAGE_SIZE); 4.1700 + 4.1701 + if (init_vtd_hw()) 4.1702 + goto error; 4.1703 + setup_dom0_devices(); 4.1704 + setup_dom0_rmrr(); 4.1705 + if (enable_vtd_translation()) 4.1706 + goto error; 4.1707 + 4.1708 + return 0; 4.1709 + 4.1710 +error: 4.1711 + printk("iommu_setup() failed\n"); 4.1712 + for_each_drhd_unit(drhd) { 4.1713 + iommu = drhd->iommu; 4.1714 + free_iommu(iommu); 4.1715 + } 4.1716 + return -EIO; 4.1717 +} 4.1718 + 4.1719 +int assign_device(struct domain *d, u8 bus, u8 devfn) 4.1720 +{ 4.1721 + struct hvm_iommu *hd = domain_hvm_iommu(d); 4.1722 + struct acpi_rmrr_unit *rmrr; 4.1723 + struct pci_dev *pdev; 4.1724 + int ret = 0; 4.1725 + 4.1726 + if (list_empty(&acpi_drhd_units)) 4.1727 + return ret; 4.1728 + 4.1729 + dprintk(XENLOG_INFO VTDPREFIX, 4.1730 + "assign_device: bus = %x dev = %x func = %x\n", 4.1731 + bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); 4.1732 + 4.1733 + reassign_device_ownership(dom0, d, bus, devfn); 4.1734 + 4.1735 + /* setup rmrr identify mapping just once per domain */ 4.1736 + if (list_empty(&hd->pdev_list)) 4.1737 + for_each_rmrr_device(rmrr, pdev) 4.1738 + ret = iommu_prepare_rmrr_dev(d, rmrr, pdev); 4.1739 + if (ret) 4.1740 + gdprintk(XENLOG_ERR VTDPREFIX, 4.1741 + "IOMMU: mapping reserved region failed\n"); 4.1742 + end_for_each_rmrr_device(rmrr, pdev) 4.1743 + return ret; 4.1744 +} 4.1745 + 4.1746 +void iommu_set_pgd(struct domain *d) 4.1747 +{ 4.1748 + struct hvm_iommu *hd = domain_hvm_iommu(d); 4.1749 + unsigned long p2m_table; 4.1750 + 4.1751 + if (hd->pgd) { 4.1752 + gdprintk(XENLOG_INFO VTDPREFIX, 4.1753 + "iommu_set_pgd_1: hd->pgd = %p\n", hd->pgd); 4.1754 + hd->pgd = NULL; 4.1755 + } 4.1756 + p2m_table = mfn_x(pagetable_get_mfn(d->arch.phys_table)); 4.1757 + 4.1758 +#if CONFIG_PAGING_LEVELS == 3 4.1759 + if ( !hd->pgd ) 4.1760 + { 4.1761 + int level = agaw_to_level(hd->agaw); 4.1762 + struct dma_pte *pmd = NULL; 4.1763 + struct dma_pte *pgd = NULL; 4.1764 + struct dma_pte *pte = NULL; 4.1765 + l3_pgentry_t *l3e; 4.1766 + unsigned long flags; 4.1767 + int i; 4.1768 + 4.1769 + spin_lock_irqsave(&hd->mapping_lock, flags); 4.1770 + if (!hd->pgd) { 4.1771 + pgd = (struct dma_pte *)alloc_xenheap_page(); 4.1772 + memset((u8*)pgd, 0, PAGE_SIZE); 4.1773 + if (!hd->pgd) 4.1774 + hd->pgd = pgd; 4.1775 + else /* somebody is fast */ 4.1776 + free_xenheap_page((void *) pgd); 4.1777 + } 4.1778 + 4.1779 + l3e = map_domain_page(p2m_table); 4.1780 + switch(level) 4.1781 + { 4.1782 + case VTD_PAGE_TABLE_LEVEL_3: /* Weybridge */ 4.1783 + /* We only support 8 entries for the PAE L3 p2m table */ 4.1784 + for ( i = 0; i < 8 ; i++ ) 4.1785 + { 4.1786 + /* Don't create new L2 entry, use ones from p2m table */ 4.1787 + pgd[i].val = l3e[i].l3 | _PAGE_PRESENT | _PAGE_RW; 4.1788 + } 4.1789 + break; 4.1790 + 4.1791 + case VTD_PAGE_TABLE_LEVEL_4: /* Stoakley */ 4.1792 + /* We allocate one more page for the top vtd page table. */ 4.1793 + pmd = (struct dma_pte *)alloc_xenheap_page(); 4.1794 + memset((u8*)pmd, 0, PAGE_SIZE); 4.1795 + pte = &pgd[0]; 4.1796 + dma_set_pte_addr(*pte, virt_to_maddr(pmd)); 4.1797 + dma_set_pte_readable(*pte); 4.1798 + dma_set_pte_writable(*pte); 4.1799 + 4.1800 + for ( i = 0; i < 8; i++ ) 4.1801 + { 4.1802 + /* Don't create new L2 entry, use ones from p2m table */ 4.1803 + pmd[i].val = l3e[i].l3 | _PAGE_PRESENT | _PAGE_RW; 4.1804 + } 4.1805 + break; 4.1806 + default: 4.1807 + gdprintk(XENLOG_ERR VTDPREFIX, 4.1808 + "iommu_set_pgd:Unsupported p2m table sharing level!\n"); 4.1809 + break; 4.1810 + } 4.1811 + unmap_domain_page(l3e); 4.1812 + spin_unlock_irqrestore(&hd->mapping_lock, flags); 4.1813 + } 4.1814 +#elif CONFIG_PAGING_LEVELS == 4 4.1815 + if ( !hd->pgd ) 4.1816 + { 4.1817 + int level = agaw_to_level(hd->agaw); 4.1818 + l3_pgentry_t *l3e; 4.1819 + mfn_t pgd_mfn; 4.1820 + 4.1821 + switch (level) 4.1822 + { 4.1823 + case VTD_PAGE_TABLE_LEVEL_3: 4.1824 + l3e = map_domain_page(p2m_table); 4.1825 + if ( (l3e_get_flags(*l3e) & _PAGE_PRESENT) == 0 ) 4.1826 + { 4.1827 + gdprintk(XENLOG_ERR VTDPREFIX, 4.1828 + "iommu_set_pgd: second level wasn't there\n"); 4.1829 + unmap_domain_page(l3e); 4.1830 + return; 4.1831 + } 4.1832 + pgd_mfn = _mfn(l3e_get_pfn(*l3e)); 4.1833 + unmap_domain_page(l3e); 4.1834 + hd->pgd = maddr_to_virt(pagetable_get_paddr( 4.1835 + pagetable_from_mfn(pgd_mfn))); 4.1836 + break; 4.1837 + 4.1838 + case VTD_PAGE_TABLE_LEVEL_4: 4.1839 + pgd_mfn = _mfn(p2m_table); 4.1840 + hd->pgd = maddr_to_virt(pagetable_get_paddr( 4.1841 + pagetable_from_mfn(pgd_mfn))); 4.1842 + break; 4.1843 + default: 4.1844 + gdprintk(XENLOG_ERR VTDPREFIX, 4.1845 + "iommu_set_pgd:Unsupported p2m table sharing level!\n"); 4.1846 + break; 4.1847 + } 4.1848 + } 4.1849 +#endif 4.1850 + gdprintk(XENLOG_INFO VTDPREFIX, 4.1851 + "iommu_set_pgd: hd->pgd = %p\n", hd->pgd); 4.1852 +} 4.1853 + 4.1854 + 4.1855 +u8 iommu_state[MAX_IOMMU_REGS * MAX_IOMMUS]; 4.1856 +int iommu_suspend(void) 4.1857 +{ 4.1858 + struct acpi_drhd_unit *drhd; 4.1859 + struct iommu *iommu; 4.1860 + int i = 0; 4.1861 + 4.1862 + if (!vtd_enable) 4.1863 + return 0; 4.1864 + 4.1865 + flush_all(); 4.1866 + for_each_drhd_unit(drhd) { 4.1867 + iommu = drhd->iommu; 4.1868 + iommu_state[DMAR_RTADDR_REG * i] = 4.1869 + (u64) dmar_readq(iommu->reg, DMAR_RTADDR_REG); 4.1870 + iommu_state[DMAR_FECTL_REG * i] = 4.1871 + (u32) dmar_readl(iommu->reg, DMAR_FECTL_REG); 4.1872 + iommu_state[DMAR_FEDATA_REG * i] = 4.1873 + (u32) dmar_readl(iommu->reg, DMAR_FEDATA_REG); 4.1874 + iommu_state[DMAR_FEADDR_REG * i] = 4.1875 + (u32) dmar_readl(iommu->reg, DMAR_FEADDR_REG); 4.1876 + iommu_state[DMAR_FEUADDR_REG * i] = 4.1877 + (u32) dmar_readl(iommu->reg, DMAR_FEUADDR_REG); 4.1878 + iommu_state[DMAR_PLMBASE_REG * i] = 4.1879 + (u32) dmar_readl(iommu->reg, DMAR_PLMBASE_REG); 4.1880 + iommu_state[DMAR_PLMLIMIT_REG * i] = 4.1881 + (u32) dmar_readl(iommu->reg, DMAR_PLMLIMIT_REG); 4.1882 + iommu_state[DMAR_PHMBASE_REG * i] = 4.1883 + (u64) dmar_readq(iommu->reg, DMAR_PHMBASE_REG); 4.1884 + iommu_state[DMAR_PHMLIMIT_REG * i] = 4.1885 + (u64) dmar_readq(iommu->reg, DMAR_PHMLIMIT_REG); 4.1886 + i++; 4.1887 + } 4.1888 + 4.1889 + return 0; 4.1890 +} 4.1891 + 4.1892 +int iommu_resume(void) 4.1893 +{ 4.1894 + struct acpi_drhd_unit *drhd; 4.1895 + struct iommu *iommu; 4.1896 + int i = 0; 4.1897 + 4.1898 + if (!vtd_enable) 4.1899 + return 0; 4.1900 + 4.1901 + flush_all(); 4.1902 + 4.1903 + init_vtd_hw(); 4.1904 + for_each_drhd_unit(drhd) { 4.1905 + iommu = drhd->iommu; 4.1906 + dmar_writeq( iommu->reg, DMAR_RTADDR_REG, 4.1907 + (u64) iommu_state[DMAR_RTADDR_REG * i]); 4.1908 + dmar_writel(iommu->reg, DMAR_FECTL_REG, 4.1909 + (u32) iommu_state[DMAR_FECTL_REG * i]); 4.1910 + dmar_writel(iommu->reg, DMAR_FEDATA_REG, 4.1911 + (u32) iommu_state[DMAR_FEDATA_REG * i]); 4.1912 + dmar_writel(iommu->reg, DMAR_FEADDR_REG, 4.1913 + (u32) iommu_state[DMAR_FEADDR_REG * i]); 4.1914 + dmar_writel(iommu->reg, DMAR_FEUADDR_REG, 4.1915 + (u32) iommu_state[DMAR_FEUADDR_REG * i]); 4.1916 + dmar_writel(iommu->reg, DMAR_PLMBASE_REG, 4.1917 + (u32) iommu_state[DMAR_PLMBASE_REG * i]); 4.1918 + dmar_writel(iommu->reg, DMAR_PLMLIMIT_REG, 4.1919 + (u32) iommu_state[DMAR_PLMLIMIT_REG * i]); 4.1920 + dmar_writeq(iommu->reg, DMAR_PHMBASE_REG, 4.1921 + (u64) iommu_state[DMAR_PHMBASE_REG * i]); 4.1922 + dmar_writeq(iommu->reg, DMAR_PHMLIMIT_REG, 4.1923 + (u64) iommu_state[DMAR_PHMLIMIT_REG * i]); 4.1924 + 4.1925 + if (iommu_enable_translation(iommu)) 4.1926 + return -EIO; 4.1927 + i++; 4.1928 + } 4.1929 + return 0; 4.1930 +}
5.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 5.2 +++ b/xen/arch/x86/hvm/vmx/vtd/io.c Fri Sep 14 16:40:49 2007 +0100 5.3 @@ -0,0 +1,120 @@ 5.4 +/* 5.5 + * Copyright (c) 2006, Intel Corporation. 5.6 + * 5.7 + * This program is free software; you can redistribute it and/or modify it 5.8 + * under the terms and conditions of the GNU General Public License, 5.9 + * version 2, as published by the Free Software Foundation. 5.10 + * 5.11 + * This program is distributed in the hope it will be useful, but WITHOUT 5.12 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 5.13 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 5.14 + * more details. 5.15 + * 5.16 + * You should have received a copy of the GNU General Public License along with 5.17 + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple 5.18 + * Place - Suite 330, Boston, MA 02111-1307 USA. 5.19 + * 5.20 + * Copyright (C) Allen Kay <allen.m.kay@intel.com> 5.21 + * Copyright (C) Xiaohui Xin <xiaohui.xin@intel.com> 5.22 + */ 5.23 + 5.24 +#include <xen/init.h> 5.25 +#include <xen/config.h> 5.26 +#include <xen/init.h> 5.27 +#include <xen/mm.h> 5.28 +#include <xen/lib.h> 5.29 +#include <xen/errno.h> 5.30 +#include <xen/trace.h> 5.31 +#include <xen/event.h> 5.32 +#include <xen/hypercall.h> 5.33 +#include <asm/current.h> 5.34 +#include <asm/cpufeature.h> 5.35 +#include <asm/processor.h> 5.36 +#include <asm/msr.h> 5.37 +#include <asm/apic.h> 5.38 +#include <asm/paging.h> 5.39 +#include <asm/shadow.h> 5.40 +#include <asm/p2m.h> 5.41 +#include <asm/hvm/hvm.h> 5.42 +#include <asm/hvm/support.h> 5.43 +#include <asm/hvm/vpt.h> 5.44 +#include <asm/hvm/vpic.h> 5.45 +#include <asm/hvm/vlapic.h> 5.46 +#include <public/sched.h> 5.47 +#include <xen/iocap.h> 5.48 +#include <public/hvm/ioreq.h> 5.49 + 5.50 +int hvm_do_IRQ_dpci(struct domain *d, unsigned int mirq) 5.51 +{ 5.52 + uint32_t device, intx; 5.53 + uint32_t link, isa_irq; 5.54 + struct hvm_irq *hvm_irq; 5.55 + 5.56 + if (!vtd_enabled || (d == dom0)) 5.57 + return 0; 5.58 + 5.59 + if (d->arch.hvm_domain.irq.mirq[mirq].valid) 5.60 + { 5.61 + device = d->arch.hvm_domain.irq.mirq[mirq].device; 5.62 + intx = d->arch.hvm_domain.irq.mirq[mirq].intx; 5.63 + link = hvm_pci_intx_link(device, intx); 5.64 + hvm_irq = &d->arch.hvm_domain.irq; 5.65 + isa_irq = hvm_irq->pci_link.route[link]; 5.66 + 5.67 + if ( !d->arch.hvm_domain.irq.girq[isa_irq].valid ) 5.68 + { 5.69 + d->arch.hvm_domain.irq.girq[isa_irq].valid = 1; 5.70 + d->arch.hvm_domain.irq.girq[isa_irq].device = device; 5.71 + d->arch.hvm_domain.irq.girq[isa_irq].intx = intx; 5.72 + d->arch.hvm_domain.irq.girq[isa_irq].machine_gsi = mirq; 5.73 + } 5.74 + 5.75 + if ( !test_and_set_bit(mirq, d->arch.hvm_domain.irq.dirq_mask) ) 5.76 + { 5.77 + vcpu_kick(d->vcpu[0]); 5.78 + return 1; 5.79 + } 5.80 + else 5.81 + dprintk(XENLOG_INFO, "Want to pending mirq, but failed\n"); 5.82 + } 5.83 + return 0; 5.84 +} 5.85 + 5.86 +void hvm_dpci_eoi(unsigned int guest_gsi, union vioapic_redir_entry *ent) 5.87 +{ 5.88 + struct domain *d = current->domain; 5.89 + uint32_t device, intx, machine_gsi; 5.90 + irq_desc_t *desc; 5.91 + 5.92 + if (d->arch.hvm_domain.irq.girq[guest_gsi].valid) 5.93 + { 5.94 + device = d->arch.hvm_domain.irq.girq[guest_gsi].device; 5.95 + intx = d->arch.hvm_domain.irq.girq[guest_gsi].intx; 5.96 + machine_gsi = d->arch.hvm_domain.irq.girq[guest_gsi].machine_gsi; 5.97 + gdprintk(XENLOG_INFO, "hvm_dpci_eoi:: device %x intx %x\n", 5.98 + device, intx); 5.99 + hvm_pci_intx_deassert(d, device, intx); 5.100 + if ( (ent == NULL) || (ent && ent->fields.mask == 0) ) { 5.101 + desc = &irq_desc[irq_to_vector(machine_gsi)]; 5.102 + desc->handler->end(irq_to_vector(machine_gsi)); 5.103 + } 5.104 + } 5.105 +} 5.106 + 5.107 +int release_devices(struct domain *d) 5.108 +{ 5.109 + struct hvm_domain *hd = &d->arch.hvm_domain; 5.110 + uint32_t i; 5.111 + int ret = 0; 5.112 + 5.113 + if (!vtd_enabled) 5.114 + return ret; 5.115 + 5.116 + /* unbind irq */ 5.117 + for (i = 0; i < NR_IRQS; i++) { 5.118 + if (hd->irq.mirq[i].valid) 5.119 + ret = pirq_guest_unbind(d, i); 5.120 + } 5.121 + iommu_domain_teardown(d); 5.122 + return ret; 5.123 +}
6.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 6.2 +++ b/xen/arch/x86/hvm/vmx/vtd/msi.h Fri Sep 14 16:40:49 2007 +0100 6.3 @@ -0,0 +1,128 @@ 6.4 +/* 6.5 + * Copyright (C) 2003-2004 Intel 6.6 + * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com) 6.7 + */ 6.8 + 6.9 +#ifndef MSI_H 6.10 +#define MSI_H 6.11 + 6.12 +/* 6.13 + * Assume the maximum number of hot plug slots supported by the system is about 6.14 + * ten. The worstcase is that each of these slots is hot-added with a device, 6.15 + * which has two MSI/MSI-X capable functions. To avoid any MSI-X driver, which 6.16 + * attempts to request all available vectors, NR_HP_RESERVED_VECTORS is defined 6.17 + * as below to ensure at least one message is assigned to each detected MSI/ 6.18 + * MSI-X device function. 6.19 + */ 6.20 +#define NR_HP_RESERVED_VECTORS 20 6.21 + 6.22 +extern int vector_irq[NR_VECTORS]; 6.23 +extern void (*interrupt[NR_IRQS])(void); 6.24 +extern int pci_vector_resources(int last, int nr_released); 6.25 + 6.26 +/* 6.27 + * MSI-X Address Register 6.28 + */ 6.29 +#define PCI_MSIX_FLAGS_QSIZE 0x7FF 6.30 +#define PCI_MSIX_FLAGS_ENABLE (1 << 15) 6.31 +#define PCI_MSIX_FLAGS_BIRMASK (7 << 0) 6.32 +#define PCI_MSIX_FLAGS_BITMASK (1 << 0) 6.33 + 6.34 +#define PCI_MSIX_ENTRY_SIZE 16 6.35 +#define PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET 0 6.36 +#define PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET 4 6.37 +#define PCI_MSIX_ENTRY_DATA_OFFSET 8 6.38 +#define PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET 12 6.39 + 6.40 +#define msi_control_reg(base) (base + PCI_MSI_FLAGS) 6.41 +#define msi_lower_address_reg(base) (base + PCI_MSI_ADDRESS_LO) 6.42 +#define msi_upper_address_reg(base) (base + PCI_MSI_ADDRESS_HI) 6.43 +#define msi_data_reg(base, is64bit) \ 6.44 + ( (is64bit == 1) ? base+PCI_MSI_DATA_64 : base+PCI_MSI_DATA_32 ) 6.45 +#define msi_mask_bits_reg(base, is64bit) \ 6.46 + ( (is64bit == 1) ? base+PCI_MSI_MASK_BIT : base+PCI_MSI_MASK_BIT-4) 6.47 +#define msi_disable(control) control &= ~PCI_MSI_FLAGS_ENABLE 6.48 +#define multi_msi_capable(control) \ 6.49 + (1 << ((control & PCI_MSI_FLAGS_QMASK) >> 1)) 6.50 +#define multi_msi_enable(control, num) \ 6.51 + control |= (((num >> 1) << 4) & PCI_MSI_FLAGS_QSIZE); 6.52 +#define is_64bit_address(control) (control & PCI_MSI_FLAGS_64BIT) 6.53 +#define is_mask_bit_support(control) (control & PCI_MSI_FLAGS_MASKBIT) 6.54 +#define msi_enable(control, num) multi_msi_enable(control, num); \ 6.55 + control |= PCI_MSI_FLAGS_ENABLE 6.56 + 6.57 +#define msix_table_offset_reg(base) (base + 0x04) 6.58 +#define msix_pba_offset_reg(base) (base + 0x08) 6.59 +#define msix_enable(control) control |= PCI_MSIX_FLAGS_ENABLE 6.60 +#define msix_disable(control) control &= ~PCI_MSIX_FLAGS_ENABLE 6.61 +#define msix_table_size(control) ((control & PCI_MSIX_FLAGS_QSIZE)+1) 6.62 +#define multi_msix_capable msix_table_size 6.63 +#define msix_unmask(address) (address & ~PCI_MSIX_FLAGS_BITMASK) 6.64 +#define msix_mask(address) (address | PCI_MSIX_FLAGS_BITMASK) 6.65 +#define msix_is_pending(address) (address & PCI_MSIX_FLAGS_PENDMASK) 6.66 + 6.67 +/* 6.68 + * MSI Defined Data Structures 6.69 + */ 6.70 +#define MSI_ADDRESS_HEADER 0xfee 6.71 +#define MSI_ADDRESS_HEADER_SHIFT 12 6.72 +#define MSI_ADDRESS_HEADER_MASK 0xfff000 6.73 +#define MSI_ADDRESS_DEST_ID_MASK 0xfff0000f 6.74 +#define MSI_TARGET_CPU_MASK 0xff 6.75 +#define MSI_TARGET_CPU_SHIFT 12 6.76 +#define MSI_DELIVERY_MODE 0 6.77 +#define MSI_LEVEL_MODE 1 /* Edge always assert */ 6.78 +#define MSI_TRIGGER_MODE 0 /* MSI is edge sensitive */ 6.79 +#define MSI_PHYSICAL_MODE 0 6.80 +#define MSI_LOGICAL_MODE 1 6.81 +#define MSI_REDIRECTION_HINT_MODE 0 6.82 + 6.83 +#define __LITTLE_ENDIAN_BITFIELD 1 6.84 + 6.85 +struct msg_data { 6.86 +#if defined(__LITTLE_ENDIAN_BITFIELD) 6.87 + __u32 vector : 8; 6.88 + __u32 delivery_mode : 3; /* 000b: FIXED | 001b: lowest prior */ 6.89 + __u32 reserved_1 : 3; 6.90 + __u32 level : 1; /* 0: deassert | 1: assert */ 6.91 + __u32 trigger : 1; /* 0: edge | 1: level */ 6.92 + __u32 reserved_2 : 16; 6.93 +#elif defined(__BIG_ENDIAN_BITFIELD) 6.94 + __u32 reserved_2 : 16; 6.95 + __u32 trigger : 1; /* 0: edge | 1: level */ 6.96 + __u32 level : 1; /* 0: deassert | 1: assert */ 6.97 + __u32 reserved_1 : 3; 6.98 + __u32 delivery_mode : 3; /* 000b: FIXED | 001b: lowest prior */ 6.99 + __u32 vector : 8; 6.100 +#else 6.101 +#error "Bitfield endianness not defined! Check your byteorder.h" 6.102 +#endif 6.103 +} __attribute__ ((packed)); 6.104 + 6.105 +struct msg_address { 6.106 + union { 6.107 + struct { 6.108 +#if defined(__LITTLE_ENDIAN_BITFIELD) 6.109 + __u32 reserved_1 : 2; 6.110 + __u32 dest_mode : 1; /*0:physic | 1:logic */ 6.111 + __u32 redirection_hint: 1; /*0: dedicated CPU 6.112 + 1: lowest priority */ 6.113 + __u32 reserved_2 : 4; 6.114 + __u32 dest_id : 24; /* Destination ID */ 6.115 +#elif defined(__BIG_ENDIAN_BITFIELD) 6.116 + __u32 dest_id : 24; /* Destination ID */ 6.117 + __u32 reserved_2 : 4; 6.118 + __u32 redirection_hint: 1; /*0: dedicated CPU 6.119 + 1: lowest priority */ 6.120 + __u32 dest_mode : 1; /*0:physic | 1:logic */ 6.121 + __u32 reserved_1 : 2; 6.122 +#else 6.123 +#error "Bitfield endianness not defined! Check your byteorder.h" 6.124 +#endif 6.125 + }u; 6.126 + __u32 value; 6.127 + }lo_address; 6.128 + __u32 hi_address; 6.129 +} __attribute__ ((packed)); 6.130 + 6.131 +#endif /* MSI_H */
7.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 7.2 +++ b/xen/arch/x86/hvm/vmx/vtd/pci-direct.h Fri Sep 14 16:40:49 2007 +0100 7.3 @@ -0,0 +1,48 @@ 7.4 +#ifndef ASM_PCI_DIRECT_H 7.5 +#define ASM_PCI_DIRECT_H 1 7.6 + 7.7 +#include <xen/types.h> 7.8 +#include <asm/io.h> 7.9 + 7.10 +/* Direct PCI access. This is used for PCI accesses in early boot before 7.11 + the PCI subsystem works. */ 7.12 + 7.13 +#define PDprintk(x...) 7.14 + 7.15 +static inline u32 read_pci_config(u8 bus, u8 slot, u8 func, u8 offset) 7.16 +{ 7.17 + u32 v; 7.18 + outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); 7.19 + v = inl(0xcfc); 7.20 + if (v != 0xffffffff) 7.21 + PDprintk("%x reading 4 from %x: %x\n", slot, offset, v); 7.22 + return v; 7.23 +} 7.24 + 7.25 +static inline u8 read_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset) 7.26 +{ 7.27 + u8 v; 7.28 + outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); 7.29 + v = inb(0xcfc + (offset&3)); 7.30 + PDprintk("%x reading 1 from %x: %x\n", slot, offset, v); 7.31 + return v; 7.32 +} 7.33 + 7.34 +static inline u16 read_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset) 7.35 +{ 7.36 + u16 v; 7.37 + outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); 7.38 + v = inw(0xcfc + (offset&2)); 7.39 + PDprintk("%x reading 2 from %x: %x\n", slot, offset, v); 7.40 + return v; 7.41 +} 7.42 + 7.43 +static inline void write_pci_config(u8 bus, u8 slot, u8 func, u8 offset, 7.44 + u32 val) 7.45 +{ 7.46 + PDprintk("%x writing to %x: %x\n", slot, offset, val); 7.47 + outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); 7.48 + outl(val, 0xcfc); 7.49 +} 7.50 + 7.51 +#endif
8.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 8.2 +++ b/xen/arch/x86/hvm/vmx/vtd/pci_regs.h Fri Sep 14 16:40:49 2007 +0100 8.3 @@ -0,0 +1,449 @@ 8.4 +/* 8.5 + * pci_regs.h 8.6 + * 8.7 + * PCI standard defines 8.8 + * Copyright 1994, Drew Eckhardt 8.9 + * Copyright 1997--1999 Martin Mares <mj@ucw.cz> 8.10 + * 8.11 + * For more information, please consult the following manuals (look at 8.12 + * http://www.pcisig.com/ for how to get them): 8.13 + * 8.14 + * PCI BIOS Specification 8.15 + * PCI Local Bus Specification 8.16 + * PCI to PCI Bridge Specification 8.17 + * PCI System Design Guide 8.18 + */ 8.19 + 8.20 +#ifndef LINUX_PCI_REGS_H 8.21 +#define LINUX_PCI_REGS_H 8.22 + 8.23 +/* 8.24 + * Under PCI, each device has 256 bytes of configuration address space, 8.25 + * of which the first 64 bytes are standardized as follows: 8.26 + */ 8.27 +#define PCI_VENDOR_ID 0x00 /* 16 bits */ 8.28 +#define PCI_DEVICE_ID 0x02 /* 16 bits */ 8.29 +#define PCI_COMMAND 0x04 /* 16 bits */ 8.30 +#define PCI_COMMAND_IO 0x1 /* Enable response in I/O space */ 8.31 +#define PCI_COMMAND_MEMORY 0x2 /* Enable response in Memory space */ 8.32 +#define PCI_COMMAND_MASTER 0x4 /* Enable bus mastering */ 8.33 +#define PCI_COMMAND_SPECIAL 0x8 /* Enable response to special cycles */ 8.34 +#define PCI_COMMAND_INVALIDATE 0x10 /* Use memory write and invalidate */ 8.35 +#define PCI_COMMAND_VGA_PALETTE 0x20 /* Enable palette snooping */ 8.36 +#define PCI_COMMAND_PARITY 0x40 /* Enable parity checking */ 8.37 +#define PCI_COMMAND_WAIT 0x80 /* Enable address/data stepping */ 8.38 +#define PCI_COMMAND_SERR 0x100 /* Enable SERR */ 8.39 +#define PCI_COMMAND_FAST_BACK 0x200 /* Enable back-to-back writes */ 8.40 +#define PCI_COMMAND_INTX_DISABLE 0x400 /* INTx Emulation Disable */ 8.41 + 8.42 +#define PCI_STATUS 0x06 /* 16 bits */ 8.43 +#define PCI_STATUS_CAP_LIST 0x10 /* Support Capability List */ 8.44 +#define PCI_STATUS_66MHZ 0x20 /* Support 66 Mhz PCI 2.1 bus */ 8.45 +#define PCI_STATUS_UDF 0x40 /* Support User Definable Features [obsolete] */ 8.46 +#define PCI_STATUS_FAST_BACK 0x80 /* Accept fast-back to back */ 8.47 +#define PCI_STATUS_PARITY 0x100 /* Detected parity error */ 8.48 +#define PCI_STATUS_DEVSEL_MASK 0x600 /* DEVSEL timing */ 8.49 +#define PCI_STATUS_DEVSEL_FAST 0x000 8.50 +#define PCI_STATUS_DEVSEL_MEDIUM 0x200 8.51 +#define PCI_STATUS_DEVSEL_SLOW 0x400 8.52 +#define PCI_STATUS_SIG_TARGET_ABORT 0x800 /* Set on target abort */ 8.53 +#define PCI_STATUS_REC_TARGET_ABORT 0x1000 /* Master ack of " */ 8.54 +#define PCI_STATUS_REC_MASTER_ABORT 0x2000 /* Set on master abort */ 8.55 +#define PCI_STATUS_SIG_SYSTEM_ERROR 0x4000 /* Set when we drive SERR */ 8.56 +#define PCI_STATUS_DETECTED_PARITY 0x8000 /* Set on parity error */ 8.57 + 8.58 +#define PCI_CLASS_REVISION 0x08 /* High 24 bits are class, low 8 revision */ 8.59 +#define PCI_REVISION_ID 0x08 /* Revision ID */ 8.60 +#define PCI_CLASS_PROG 0x09 /* Reg. Level Programming Interface */ 8.61 +#define PCI_CLASS_DEVICE 0x0a /* Device class */ 8.62 + 8.63 +#define PCI_CACHE_LINE_SIZE 0x0c /* 8 bits */ 8.64 +#define PCI_LATENCY_TIMER 0x0d /* 8 bits */ 8.65 +#define PCI_HEADER_TYPE 0x0e /* 8 bits */ 8.66 +#define PCI_HEADER_TYPE_NORMAL 0 8.67 +#define PCI_HEADER_TYPE_BRIDGE 1 8.68 +#define PCI_HEADER_TYPE_CARDBUS 2 8.69 + 8.70 +#define PCI_BIST 0x0f /* 8 bits */ 8.71 +#define PCI_BIST_CODE_MASK 0x0f /* Return result */ 8.72 +#define PCI_BIST_START 0x40 /* 1 to start BIST, 2 secs or less */ 8.73 +#define PCI_BIST_CAPABLE 0x80 /* 1 if BIST capable */ 8.74 + 8.75 +/* 8.76 + * Base addresses specify locations in memory or I/O space. 8.77 + * Decoded size can be determined by writing a value of 8.78 + * 0xffffffff to the register, and reading it back. Only 8.79 + * 1 bits are decoded. 8.80 + */ 8.81 +#define PCI_BASE_ADDRESS_0 0x10 /* 32 bits */ 8.82 +#define PCI_BASE_ADDRESS_1 0x14 /* 32 bits [htype 0,1 only] */ 8.83 +#define PCI_BASE_ADDRESS_2 0x18 /* 32 bits [htype 0 only] */ 8.84 +#define PCI_BASE_ADDRESS_3 0x1c /* 32 bits */ 8.85 +#define PCI_BASE_ADDRESS_4 0x20 /* 32 bits */ 8.86 +#define PCI_BASE_ADDRESS_5 0x24 /* 32 bits */ 8.87 +#define PCI_BASE_ADDRESS_SPACE 0x01 /* 0 = memory, 1 = I/O */ 8.88 +#define PCI_BASE_ADDRESS_SPACE_IO 0x01 8.89 +#define PCI_BASE_ADDRESS_SPACE_MEMORY 0x00 8.90 +#define PCI_BASE_ADDRESS_MEM_TYPE_MASK 0x06 8.91 +#define PCI_BASE_ADDRESS_MEM_TYPE_32 0x00 /* 32 bit address */ 8.92 +#define PCI_BASE_ADDRESS_MEM_TYPE_1M 0x02 /* Below 1M [obsolete] */ 8.93 +#define PCI_BASE_ADDRESS_MEM_TYPE_64 0x04 /* 64 bit address */ 8.94 +#define PCI_BASE_ADDRESS_MEM_PREFETCH 0x08 /* prefetchable? */ 8.95 +#define PCI_BASE_ADDRESS_MEM_MASK (~0x0fUL) 8.96 +#define PCI_BASE_ADDRESS_IO_MASK (~0x03UL) 8.97 +/* bit 1 is reserved if address_space = 1 */ 8.98 + 8.99 +/* Header type 0 (normal devices) */ 8.100 +#define PCI_CARDBUS_CIS 0x28 8.101 +#define PCI_SUBSYSTEM_VENDOR_ID 0x2c 8.102 +#define PCI_SUBSYSTEM_ID 0x2e 8.103 +#define PCI_ROM_ADDRESS 0x30 /* Bits 31..11 are address, 10..1 reserved */ 8.104 +#define PCI_ROM_ADDRESS_ENABLE 0x01 8.105 +#define PCI_ROM_ADDRESS_MASK (~0x7ffUL) 8.106 + 8.107 +#define PCI_CAPABILITY_LIST 0x34 /* Offset of first capability list entry */ 8.108 + 8.109 +/* 0x35-0x3b are reserved */ 8.110 +#define PCI_INTERRUPT_LINE 0x3c /* 8 bits */ 8.111 +#define PCI_INTERRUPT_PIN 0x3d /* 8 bits */ 8.112 +#define PCI_MIN_GNT 0x3e /* 8 bits */ 8.113 +#define PCI_MAX_LAT 0x3f /* 8 bits */ 8.114 + 8.115 +/* Header type 1 (PCI-to-PCI bridges) */ 8.116 +#define PCI_PRIMARY_BUS 0x18 /* Primary bus number */ 8.117 +#define PCI_SECONDARY_BUS 0x19 /* Secondary bus number */ 8.118 +#define PCI_SUBORDINATE_BUS 0x1a /* Highest bus number behind the bridge */ 8.119 +#define PCI_SEC_LATENCY_TIMER 0x1b /* Latency timer for secondary interface */ 8.120 +#define PCI_IO_BASE 0x1c /* I/O range behind the bridge */ 8.121 +#define PCI_IO_LIMIT 0x1d 8.122 +#define PCI_IO_RANGE_TYPE_MASK 0x0fUL /* I/O bridging type */ 8.123 +#define PCI_IO_RANGE_TYPE_16 0x00 8.124 +#define PCI_IO_RANGE_TYPE_32 0x01 8.125 +#define PCI_IO_RANGE_MASK (~0x0fUL) 8.126 +#define PCI_SEC_STATUS 0x1e /* Secondary status register, only bit 14 used */ 8.127 +#define PCI_MEMORY_BASE 0x20 /* Memory range behind */ 8.128 +#define PCI_MEMORY_LIMIT 0x22 8.129 +#define PCI_MEMORY_RANGE_TYPE_MASK 0x0fUL 8.130 +#define PCI_MEMORY_RANGE_MASK (~0x0fUL) 8.131 +#define PCI_PREF_MEMORY_BASE 0x24 /* Prefetchable memory range behind */ 8.132 +#define PCI_PREF_MEMORY_LIMIT 0x26 8.133 +#define PCI_PREF_RANGE_TYPE_MASK 0x0fUL 8.134 +#define PCI_PREF_RANGE_TYPE_32 0x00 8.135 +#define PCI_PREF_RANGE_TYPE_64 0x01 8.136 +#define PCI_PREF_RANGE_MASK (~0x0fUL) 8.137 +#define PCI_PREF_BASE_UPPER32 0x28 /* Upper half of prefetchable memory range */ 8.138 +#define PCI_PREF_LIMIT_UPPER32 0x2c 8.139 +#define PCI_IO_BASE_UPPER16 0x30 /* Upper half of I/O addresses */ 8.140 +#define PCI_IO_LIMIT_UPPER16 0x32 8.141 +/* 0x34 same as for htype 0 */ 8.142 +/* 0x35-0x3b is reserved */ 8.143 +#define PCI_ROM_ADDRESS1 0x38 /* Same as PCI_ROM_ADDRESS, but for htype 1 */ 8.144 +/* 0x3c-0x3d are same as for htype 0 */ 8.145 +#define PCI_BRIDGE_CONTROL 0x3e 8.146 +#define PCI_BRIDGE_CTL_PARITY 0x01 /* Enable parity detection on secondary interface */ 8.147 +#define PCI_BRIDGE_CTL_SERR 0x02 /* The same for SERR forwarding */ 8.148 +#define PCI_BRIDGE_CTL_NO_ISA 0x04 /* Disable bridging of ISA ports */ 8.149 +#define PCI_BRIDGE_CTL_VGA 0x08 /* Forward VGA addresses */ 8.150 +#define PCI_BRIDGE_CTL_MASTER_ABORT 0x20 /* Report master aborts */ 8.151 +#define PCI_BRIDGE_CTL_BUS_RESET 0x40 /* Secondary bus reset */ 8.152 +#define PCI_BRIDGE_CTL_FAST_BACK 0x80 /* Fast Back2Back enabled on secondary interface */ 8.153 + 8.154 +/* Header type 2 (CardBus bridges) */ 8.155 +#define PCI_CB_CAPABILITY_LIST 0x14 8.156 +/* 0x15 reserved */ 8.157 +#define PCI_CB_SEC_STATUS 0x16 /* Secondary status */ 8.158 +#define PCI_CB_PRIMARY_BUS 0x18 /* PCI bus number */ 8.159 +#define PCI_CB_CARD_BUS 0x19 /* CardBus bus number */ 8.160 +#define PCI_CB_SUBORDINATE_BUS 0x1a /* Subordinate bus number */ 8.161 +#define PCI_CB_LATENCY_TIMER 0x1b /* CardBus latency timer */ 8.162 +#define PCI_CB_MEMORY_BASE_0 0x1c 8.163 +#define PCI_CB_MEMORY_LIMIT_0 0x20 8.164 +#define PCI_CB_MEMORY_BASE_1 0x24 8.165 +#define PCI_CB_MEMORY_LIMIT_1 0x28 8.166 +#define PCI_CB_IO_BASE_0 0x2c 8.167 +#define PCI_CB_IO_BASE_0_HI 0x2e 8.168 +#define PCI_CB_IO_LIMIT_0 0x30 8.169 +#define PCI_CB_IO_LIMIT_0_HI 0x32 8.170 +#define PCI_CB_IO_BASE_1 0x34 8.171 +#define PCI_CB_IO_BASE_1_HI 0x36 8.172 +#define PCI_CB_IO_LIMIT_1 0x38 8.173 +#define PCI_CB_IO_LIMIT_1_HI 0x3a 8.174 +#define PCI_CB_IO_RANGE_MASK (~0x03UL) 8.175 +/* 0x3c-0x3d are same as for htype 0 */ 8.176 +#define PCI_CB_BRIDGE_CONTROL 0x3e 8.177 +#define PCI_CB_BRIDGE_CTL_PARITY 0x01 /* Similar to standard bridge control register */ 8.178 +#define PCI_CB_BRIDGE_CTL_SERR 0x02 8.179 +#define PCI_CB_BRIDGE_CTL_ISA 0x04 8.180 +#define PCI_CB_BRIDGE_CTL_VGA 0x08 8.181 +#define PCI_CB_BRIDGE_CTL_MASTER_ABORT 0x20 8.182 +#define PCI_CB_BRIDGE_CTL_CB_RESET 0x40 /* CardBus reset */ 8.183 +#define PCI_CB_BRIDGE_CTL_16BIT_INT 0x80 /* Enable interrupt for 16-bit cards */ 8.184 +#define PCI_CB_BRIDGE_CTL_PREFETCH_MEM0 0x100 /* Prefetch enable for both memory regions */ 8.185 +#define PCI_CB_BRIDGE_CTL_PREFETCH_MEM1 0x200 8.186 +#define PCI_CB_BRIDGE_CTL_POST_WRITES 0x400 8.187 +#define PCI_CB_SUBSYSTEM_VENDOR_ID 0x40 8.188 +#define PCI_CB_SUBSYSTEM_ID 0x42 8.189 +#define PCI_CB_LEGACY_MODE_BASE 0x44 /* 16-bit PC Card legacy mode base address (ExCa) */ 8.190 +/* 0x48-0x7f reserved */ 8.191 + 8.192 +/* Capability lists */ 8.193 + 8.194 +#define PCI_CAP_LIST_ID 0 /* Capability ID */ 8.195 +#define PCI_CAP_ID_PM 0x01 /* Power Management */ 8.196 +#define PCI_CAP_ID_AGP 0x02 /* Accelerated Graphics Port */ 8.197 +#define PCI_CAP_ID_VPD 0x03 /* Vital Product Data */ 8.198 +#define PCI_CAP_ID_SLOTID 0x04 /* Slot Identification */ 8.199 +#define PCI_CAP_ID_MSI 0x05 /* Message Signalled Interrupts */ 8.200 +#define PCI_CAP_ID_CHSWP 0x06 /* CompactPCI HotSwap */ 8.201 +#define PCI_CAP_ID_PCIX 0x07 /* PCI-X */ 8.202 +#define PCI_CAP_ID_HT_IRQCONF 0x08 /* HyperTransport IRQ Configuration */ 8.203 +#define PCI_CAP_ID_SHPC 0x0C /* PCI Standard Hot-Plug Controller */ 8.204 +#define PCI_CAP_ID_EXP 0x10 /* PCI Express */ 8.205 +#define PCI_CAP_ID_MSIX 0x11 /* MSI-X */ 8.206 +#define PCI_CAP_LIST_NEXT 1 /* Next capability in the list */ 8.207 +#define PCI_CAP_FLAGS 2 /* Capability defined flags (16 bits) */ 8.208 +#define PCI_CAP_SIZEOF 4 8.209 + 8.210 +/* Power Management Registers */ 8.211 + 8.212 +#define PCI_PM_PMC 2 /* PM Capabilities Register */ 8.213 +#define PCI_PM_CAP_VER_MASK 0x0007 /* Version */ 8.214 +#define PCI_PM_CAP_PME_CLOCK 0x0008 /* PME clock required */ 8.215 +#define PCI_PM_CAP_RESERVED 0x0010 /* Reserved field */ 8.216 +#define PCI_PM_CAP_DSI 0x0020 /* Device specific initialization */ 8.217 +#define PCI_PM_CAP_AUX_POWER 0x01C0 /* Auxilliary power support mask */ 8.218 +#define PCI_PM_CAP_D1 0x0200 /* D1 power state support */ 8.219 +#define PCI_PM_CAP_D2 0x0400 /* D2 power state support */ 8.220 +#define PCI_PM_CAP_PME 0x0800 /* PME pin supported */ 8.221 +#define PCI_PM_CAP_PME_MASK 0xF800 /* PME Mask of all supported states */ 8.222 +#define PCI_PM_CAP_PME_D0 0x0800 /* PME# from D0 */ 8.223 +#define PCI_PM_CAP_PME_D1 0x1000 /* PME# from D1 */ 8.224 +#define PCI_PM_CAP_PME_D2 0x2000 /* PME# from D2 */ 8.225 +#define PCI_PM_CAP_PME_D3 0x4000 /* PME# from D3 (hot) */ 8.226 +#define PCI_PM_CAP_PME_D3cold 0x8000 /* PME# from D3 (cold) */ 8.227 +#define PCI_PM_CTRL 4 /* PM control and status register */ 8.228 +#define PCI_PM_CTRL_STATE_MASK 0x0003 /* Current power state (D0 to D3) */ 8.229 +#define PCI_PM_CTRL_NO_SOFT_RESET 0x0004 /* No reset for D3hot->D0 */ 8.230 +#define PCI_PM_CTRL_PME_ENABLE 0x0100 /* PME pin enable */ 8.231 +#define PCI_PM_CTRL_DATA_SEL_MASK 0x1e00 /* Data select (??) */ 8.232 +#define PCI_PM_CTRL_DATA_SCALE_MASK 0x6000 /* Data scale (??) */ 8.233 +#define PCI_PM_CTRL_PME_STATUS 0x8000 /* PME pin status */ 8.234 +#define PCI_PM_PPB_EXTENSIONS 6 /* PPB support extensions (??) */ 8.235 +#define PCI_PM_PPB_B2_B3 0x40 /* Stop clock when in D3hot (??) */ 8.236 +#define PCI_PM_BPCC_ENABLE 0x80 /* Bus power/clock control enable (??) */ 8.237 +#define PCI_PM_DATA_REGISTER 7 /* (??) */ 8.238 +#define PCI_PM_SIZEOF 8 8.239 + 8.240 +/* AGP registers */ 8.241 + 8.242 +#define PCI_AGP_VERSION 2 /* BCD version number */ 8.243 +#define PCI_AGP_RFU 3 /* Rest of capability flags */ 8.244 +#define PCI_AGP_STATUS 4 /* Status register */ 8.245 +#define PCI_AGP_STATUS_RQ_MASK 0xff000000 /* Maximum number of requests - 1 */ 8.246 +#define PCI_AGP_STATUS_SBA 0x0200 /* Sideband addressing supported */ 8.247 +#define PCI_AGP_STATUS_64BIT 0x0020 /* 64-bit addressing supported */ 8.248 +#define PCI_AGP_STATUS_FW 0x0010 /* FW transfers supported */ 8.249 +#define PCI_AGP_STATUS_RATE4 0x0004 /* 4x transfer rate supported */ 8.250 +#define PCI_AGP_STATUS_RATE2 0x0002 /* 2x transfer rate supported */ 8.251 +#define PCI_AGP_STATUS_RATE1 0x0001 /* 1x transfer rate supported */ 8.252 +#define PCI_AGP_COMMAND 8 /* Control register */ 8.253 +#define PCI_AGP_COMMAND_RQ_MASK 0xff000000 /* Master: Maximum number of requests */ 8.254 +#define PCI_AGP_COMMAND_SBA 0x0200 /* Sideband addressing enabled */ 8.255 +#define PCI_AGP_COMMAND_AGP 0x0100 /* Allow processing of AGP transactions */ 8.256 +#define PCI_AGP_COMMAND_64BIT 0x0020 /* Allow processing of 64-bit addresses */ 8.257 +#define PCI_AGP_COMMAND_FW 0x0010 /* Force FW transfers */ 8.258 +#define PCI_AGP_COMMAND_RATE4 0x0004 /* Use 4x rate */ 8.259 +#define PCI_AGP_COMMAND_RATE2 0x0002 /* Use 2x rate */ 8.260 +#define PCI_AGP_COMMAND_RATE1 0x0001 /* Use 1x rate */ 8.261 +#define PCI_AGP_SIZEOF 12 8.262 + 8.263 +/* Vital Product Data */ 8.264 + 8.265 +#define PCI_VPD_ADDR 2 /* Address to access (15 bits!) */ 8.266 +#define PCI_VPD_ADDR_MASK 0x7fff /* Address mask */ 8.267 +#define PCI_VPD_ADDR_F 0x8000 /* Write 0, 1 indicates completion */ 8.268 +#define PCI_VPD_DATA 4 /* 32-bits of data returned here */ 8.269 + 8.270 +/* Slot Identification */ 8.271 + 8.272 +#define PCI_SID_ESR 2 /* Expansion Slot Register */ 8.273 +#define PCI_SID_ESR_NSLOTS 0x1f /* Number of expansion slots available */ 8.274 +#define PCI_SID_ESR_FIC 0x20 /* First In Chassis Flag */ 8.275 +#define PCI_SID_CHASSIS_NR 3 /* Chassis Number */ 8.276 + 8.277 +/* Message Signalled Interrupts registers */ 8.278 + 8.279 +#define PCI_MSI_FLAGS 2 /* Various flags */ 8.280 +#define PCI_MSI_FLAGS_64BIT 0x80 /* 64-bit addresses allowed */ 8.281 +#define PCI_MSI_FLAGS_QSIZE 0x70 /* Message queue size configured */ 8.282 +#define PCI_MSI_FLAGS_QMASK 0x0e /* Maximum queue size available */ 8.283 +#define PCI_MSI_FLAGS_ENABLE 0x01 /* MSI feature enabled */ 8.284 +#define PCI_MSI_FLAGS_MASKBIT 0x100 /* 64-bit mask bits allowed */ 8.285 +#define PCI_MSI_RFU 3 /* Rest of capability flags */ 8.286 +#define PCI_MSI_ADDRESS_LO 4 /* Lower 32 bits */ 8.287 +#define PCI_MSI_ADDRESS_HI 8 /* Upper 32 bits (if PCI_MSI_FLAGS_64BIT set) */ 8.288 +#define PCI_MSI_DATA_32 8 /* 16 bits of data for 32-bit devices */ 8.289 +#define PCI_MSI_DATA_64 12 /* 16 bits of data for 64-bit devices */ 8.290 +#define PCI_MSI_MASK_BIT 16 /* Mask bits register */ 8.291 + 8.292 +/* CompactPCI Hotswap Register */ 8.293 + 8.294 +#define PCI_CHSWP_CSR 2 /* Control and Status Register */ 8.295 +#define PCI_CHSWP_DHA 0x01 /* Device Hiding Arm */ 8.296 +#define PCI_CHSWP_EIM 0x02 /* ENUM# Signal Mask */ 8.297 +#define PCI_CHSWP_PIE 0x04 /* Pending Insert or Extract */ 8.298 +#define PCI_CHSWP_LOO 0x08 /* LED On / Off */ 8.299 +#define PCI_CHSWP_PI 0x30 /* Programming Interface */ 8.300 +#define PCI_CHSWP_EXT 0x40 /* ENUM# status - extraction */ 8.301 +#define PCI_CHSWP_INS 0x80 /* ENUM# status - insertion */ 8.302 + 8.303 +/* PCI-X registers */ 8.304 + 8.305 +#define PCI_X_CMD 2 /* Modes & Features */ 8.306 +#define PCI_X_CMD_DPERR_E 0x0001 /* Data Parity Error Recovery Enable */ 8.307 +#define PCI_X_CMD_ERO 0x0002 /* Enable Relaxed Ordering */ 8.308 +#define PCI_X_CMD_MAX_READ 0x000c /* Max Memory Read Byte Count */ 8.309 +#define PCI_X_CMD_MAX_SPLIT 0x0070 /* Max Outstanding Split Transactions */ 8.310 +#define PCI_X_CMD_VERSION(x) (((x) >> 12) & 3) /* Version */ 8.311 +#define PCI_X_STATUS 4 /* PCI-X capabilities */ 8.312 +#define PCI_X_STATUS_DEVFN 0x000000ff /* A copy of devfn */ 8.313 +#define PCI_X_STATUS_BUS 0x0000ff00 /* A copy of bus nr */ 8.314 +#define PCI_X_STATUS_64BIT 0x00010000 /* 64-bit device */ 8.315 +#define PCI_X_STATUS_133MHZ 0x00020000 /* 133 MHz capable */ 8.316 +#define PCI_X_STATUS_SPL_DISC 0x00040000 /* Split Completion Discarded */ 8.317 +#define PCI_X_STATUS_UNX_SPL 0x00080000 /* Unexpected Split Completion */ 8.318 +#define PCI_X_STATUS_COMPLEX 0x00100000 /* Device Complexity */ 8.319 +#define PCI_X_STATUS_MAX_READ 0x00600000 /* Designed Max Memory Read Count */ 8.320 +#define PCI_X_STATUS_MAX_SPLIT 0x03800000 /* Designed Max Outstanding Split Transactions */ 8.321 +#define PCI_X_STATUS_MAX_CUM 0x1c000000 /* Designed Max Cumulative Read Size */ 8.322 +#define PCI_X_STATUS_SPL_ERR 0x20000000 /* Rcvd Split Completion Error Msg */ 8.323 +#define PCI_X_STATUS_266MHZ 0x40000000 /* 266 MHz capable */ 8.324 +#define PCI_X_STATUS_533MHZ 0x80000000 /* 533 MHz capable */ 8.325 + 8.326 +/* PCI Express capability registers */ 8.327 + 8.328 +#define PCI_EXP_FLAGS 2 /* Capabilities register */ 8.329 +#define PCI_EXP_FLAGS_VERS 0x000f /* Capability version */ 8.330 +#define PCI_EXP_FLAGS_TYPE 0x00f0 /* Device/Port type */ 8.331 +#define PCI_EXP_TYPE_ENDPOINT 0x0 /* Express Endpoint */ 8.332 +#define PCI_EXP_TYPE_LEG_END 0x1 /* Legacy Endpoint */ 8.333 +#define PCI_EXP_TYPE_ROOT_PORT 0x4 /* Root Port */ 8.334 +#define PCI_EXP_TYPE_UPSTREAM 0x5 /* Upstream Port */ 8.335 +#define PCI_EXP_TYPE_DOWNSTREAM 0x6 /* Downstream Port */ 8.336 +#define PCI_EXP_TYPE_PCI_BRIDGE 0x7 /* PCI/PCI-X Bridge */ 8.337 +#define PCI_EXP_FLAGS_SLOT 0x0100 /* Slot implemented */ 8.338 +#define PCI_EXP_FLAGS_IRQ 0x3e00 /* Interrupt message number */ 8.339 +#define PCI_EXP_DEVCAP 4 /* Device capabilities */ 8.340 +#define PCI_EXP_DEVCAP_PAYLOAD 0x07 /* Max_Payload_Size */ 8.341 +#define PCI_EXP_DEVCAP_PHANTOM 0x18 /* Phantom functions */ 8.342 +#define PCI_EXP_DEVCAP_EXT_TAG 0x20 /* Extended tags */ 8.343 +#define PCI_EXP_DEVCAP_L0S 0x1c0 /* L0s Acceptable Latency */ 8.344 +#define PCI_EXP_DEVCAP_L1 0xe00 /* L1 Acceptable Latency */ 8.345 +#define PCI_EXP_DEVCAP_ATN_BUT 0x1000 /* Attention Button Present */ 8.346 +#define PCI_EXP_DEVCAP_ATN_IND 0x2000 /* Attention Indicator Present */ 8.347 +#define PCI_EXP_DEVCAP_PWR_IND 0x4000 /* Power Indicator Present */ 8.348 +#define PCI_EXP_DEVCAP_PWR_VAL 0x3fc0000 /* Slot Power Limit Value */ 8.349 +#define PCI_EXP_DEVCAP_PWR_SCL 0xc000000 /* Slot Power Limit Scale */ 8.350 +#define PCI_EXP_DEVCTL 8 /* Device Control */ 8.351 +#define PCI_EXP_DEVCTL_CERE 0x0001 /* Correctable Error Reporting En. */ 8.352 +#define PCI_EXP_DEVCTL_NFERE 0x0002 /* Non-Fatal Error Reporting Enable */ 8.353 +#define PCI_EXP_DEVCTL_FERE 0x0004 /* Fatal Error Reporting Enable */ 8.354 +#define PCI_EXP_DEVCTL_URRE 0x0008 /* Unsupported Request Reporting En. */ 8.355 +#define PCI_EXP_DEVCTL_RELAX_EN 0x0010 /* Enable relaxed ordering */ 8.356 +#define PCI_EXP_DEVCTL_PAYLOAD 0x00e0 /* Max_Payload_Size */ 8.357 +#define PCI_EXP_DEVCTL_EXT_TAG 0x0100 /* Extended Tag Field Enable */ 8.358 +#define PCI_EXP_DEVCTL_PHANTOM 0x0200 /* Phantom Functions Enable */ 8.359 +#define PCI_EXP_DEVCTL_AUX_PME 0x0400 /* Auxiliary Power PM Enable */ 8.360 +#define PCI_EXP_DEVCTL_NOSNOOP_EN 0x0800 /* Enable No Snoop */ 8.361 +#define PCI_EXP_DEVCTL_READRQ 0x7000 /* Max_Read_Request_Size */ 8.362 +#define PCI_EXP_DEVSTA 10 /* Device Status */ 8.363 +#define PCI_EXP_DEVSTA_CED 0x01 /* Correctable Error Detected */ 8.364 +#define PCI_EXP_DEVSTA_NFED 0x02 /* Non-Fatal Error Detected */ 8.365 +#define PCI_EXP_DEVSTA_FED 0x04 /* Fatal Error Detected */ 8.366 +#define PCI_EXP_DEVSTA_URD 0x08 /* Unsupported Request Detected */ 8.367 +#define PCI_EXP_DEVSTA_AUXPD 0x10 /* AUX Power Detected */ 8.368 +#define PCI_EXP_DEVSTA_TRPND 0x20 /* Transactions Pending */ 8.369 +#define PCI_EXP_LNKCAP 12 /* Link Capabilities */ 8.370 +#define PCI_EXP_LNKCTL 16 /* Link Control */ 8.371 +#define PCI_EXP_LNKSTA 18 /* Link Status */ 8.372 +#define PCI_EXP_SLTCAP 20 /* Slot Capabilities */ 8.373 +#define PCI_EXP_SLTCTL 24 /* Slot Control */ 8.374 +#define PCI_EXP_SLTSTA 26 /* Slot Status */ 8.375 +#define PCI_EXP_RTCTL 28 /* Root Control */ 8.376 +#define PCI_EXP_RTCTL_SECEE 0x01 /* System Error on Correctable Error */ 8.377 +#define PCI_EXP_RTCTL_SENFEE 0x02 /* System Error on Non-Fatal Error */ 8.378 +#define PCI_EXP_RTCTL_SEFEE 0x04 /* System Error on Fatal Error */ 8.379 +#define PCI_EXP_RTCTL_PMEIE 0x08 /* PME Interrupt Enable */ 8.380 +#define PCI_EXP_RTCTL_CRSSVE 0x10 /* CRS Software Visibility Enable */ 8.381 +#define PCI_EXP_RTCAP 30 /* Root Capabilities */ 8.382 +#define PCI_EXP_RTSTA 32 /* Root Status */ 8.383 + 8.384 +/* Extended Capabilities (PCI-X 2.0 and Express) */ 8.385 +#define PCI_EXT_CAP_ID(header) (header & 0x0000ffff) 8.386 +#define PCI_EXT_CAP_VER(header) ((header >> 16) & 0xf) 8.387 +#define PCI_EXT_CAP_NEXT(header) ((header >> 20) & 0xffc) 8.388 + 8.389 +#define PCI_EXT_CAP_ID_ERR 1 8.390 +#define PCI_EXT_CAP_ID_VC 2 8.391 +#define PCI_EXT_CAP_ID_DSN 3 8.392 +#define PCI_EXT_CAP_ID_PWR 4 8.393 + 8.394 +/* Advanced Error Reporting */ 8.395 +#define PCI_ERR_UNCOR_STATUS 4 /* Uncorrectable Error Status */ 8.396 +#define PCI_ERR_UNC_TRAIN 0x00000001 /* Training */ 8.397 +#define PCI_ERR_UNC_DLP 0x00000010 /* Data Link Protocol */ 8.398 +#define PCI_ERR_UNC_POISON_TLP 0x00001000 /* Poisoned TLP */ 8.399 +#define PCI_ERR_UNC_FCP 0x00002000 /* Flow Control Protocol */ 8.400 +#define PCI_ERR_UNC_COMP_TIME 0x00004000 /* Completion Timeout */ 8.401 +#define PCI_ERR_UNC_COMP_ABORT 0x00008000 /* Completer Abort */ 8.402 +#define PCI_ERR_UNC_UNX_COMP 0x00010000 /* Unexpected Completion */ 8.403 +#define PCI_ERR_UNC_RX_OVER 0x00020000 /* Receiver Overflow */ 8.404 +#define PCI_ERR_UNC_MALF_TLP 0x00040000 /* Malformed TLP */ 8.405 +#define PCI_ERR_UNC_ECRC 0x00080000 /* ECRC Error Status */ 8.406 +#define PCI_ERR_UNC_UNSUP 0x00100000 /* Unsupported Request */ 8.407 +#define PCI_ERR_UNCOR_MASK 8 /* Uncorrectable Error Mask */ 8.408 + /* Same bits as above */ 8.409 +#define PCI_ERR_UNCOR_SEVER 12 /* Uncorrectable Error Severity */ 8.410 + /* Same bits as above */ 8.411 +#define PCI_ERR_COR_STATUS 16 /* Correctable Error Status */ 8.412 +#define PCI_ERR_COR_RCVR 0x00000001 /* Receiver Error Status */ 8.413 +#define PCI_ERR_COR_BAD_TLP 0x00000040 /* Bad TLP Status */ 8.414 +#define PCI_ERR_COR_BAD_DLLP 0x00000080 /* Bad DLLP Status */ 8.415 +#define PCI_ERR_COR_REP_ROLL 0x00000100 /* REPLAY_NUM Rollover */ 8.416 +#define PCI_ERR_COR_REP_TIMER 0x00001000 /* Replay Timer Timeout */ 8.417 +#define PCI_ERR_COR_MASK 20 /* Correctable Error Mask */ 8.418 + /* Same bits as above */ 8.419 +#define PCI_ERR_CAP 24 /* Advanced Error Capabilities */ 8.420 +#define PCI_ERR_CAP_FEP(x) ((x) & 31) /* First Error Pointer */ 8.421 +#define PCI_ERR_CAP_ECRC_GENC 0x00000020 /* ECRC Generation Capable */ 8.422 +#define PCI_ERR_CAP_ECRC_GENE 0x00000040 /* ECRC Generation Enable */ 8.423 +#define PCI_ERR_CAP_ECRC_CHKC 0x00000080 /* ECRC Check Capable */ 8.424 +#define PCI_ERR_CAP_ECRC_CHKE 0x00000100 /* ECRC Check Enable */ 8.425 +#define PCI_ERR_HEADER_LOG 28 /* Header Log Register (16 bytes) */ 8.426 +#define PCI_ERR_ROOT_COMMAND 44 /* Root Error Command */ 8.427 +#define PCI_ERR_ROOT_STATUS 48 8.428 +#define PCI_ERR_ROOT_COR_SRC 52 8.429 +#define PCI_ERR_ROOT_SRC 54 8.430 + 8.431 +/* Virtual Channel */ 8.432 +#define PCI_VC_PORT_REG1 4 8.433 +#define PCI_VC_PORT_REG2 8 8.434 +#define PCI_VC_PORT_CTRL 12 8.435 +#define PCI_VC_PORT_STATUS 14 8.436 +#define PCI_VC_RES_CAP 16 8.437 +#define PCI_VC_RES_CTRL 20 8.438 +#define PCI_VC_RES_STATUS 26 8.439 + 8.440 +/* Power Budgeting */ 8.441 +#define PCI_PWR_DSR 4 /* Data Select Register */ 8.442 +#define PCI_PWR_DATA 8 /* Data Register */ 8.443 +#define PCI_PWR_DATA_BASE(x) ((x) & 0xff) /* Base Power */ 8.444 +#define PCI_PWR_DATA_SCALE(x) (((x) >> 8) & 3) /* Data Scale */ 8.445 +#define PCI_PWR_DATA_PM_SUB(x) (((x) >> 10) & 7) /* PM Sub State */ 8.446 +#define PCI_PWR_DATA_PM_STATE(x) (((x) >> 13) & 3) /* PM State */ 8.447 +#define PCI_PWR_DATA_TYPE(x) (((x) >> 15) & 7) /* Type */ 8.448 +#define PCI_PWR_DATA_RAIL(x) (((x) >> 18) & 7) /* Power Rail */ 8.449 +#define PCI_PWR_CAP 12 /* Capability */ 8.450 +#define PCI_PWR_CAP_BUDGET(x) ((x) & 1) /* Included in system budget */ 8.451 + 8.452 +#endif /* LINUX_PCI_REGS_H */
9.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 9.2 +++ b/xen/arch/x86/hvm/vmx/vtd/utils.c Fri Sep 14 16:40:49 2007 +0100 9.3 @@ -0,0 +1,302 @@ 9.4 +/* 9.5 + * Copyright (c) 2006, Intel Corporation. 9.6 + * 9.7 + * This program is free software; you can redistribute it and/or modify it 9.8 + * under the terms and conditions of the GNU General Public License, 9.9 + * version 2, as published by the Free Software Foundation. 9.10 + * 9.11 + * This program is distributed in the hope it will be useful, but WITHOUT 9.12 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 9.13 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 9.14 + * more details. 9.15 + * 9.16 + * You should have received a copy of the GNU General Public License along with 9.17 + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple 9.18 + * Place - Suite 330, Boston, MA 02111-1307 USA. 9.19 + * 9.20 + * Copyright (C) Allen Kay <allen.m.kay@intel.com> 9.21 + */ 9.22 + 9.23 +#include <xen/init.h> 9.24 +#include <xen/bitmap.h> 9.25 +#include <xen/irq.h> 9.26 +#include <xen/spinlock.h> 9.27 +#include <xen/sched.h> 9.28 +#include <asm/delay.h> 9.29 +#include <asm/iommu.h> 9.30 +#include <asm/hvm/vmx/intel-iommu.h> 9.31 +#include "dmar.h" 9.32 +#include "pci-direct.h" 9.33 +#include "pci_regs.h" 9.34 +#include "msi.h" 9.35 + 9.36 +#include <xen/mm.h> 9.37 +#include <xen/xmalloc.h> 9.38 + 9.39 +#if defined(__x86_64__) 9.40 +void print_iommu_regs(struct acpi_drhd_unit *drhd) 9.41 +{ 9.42 + struct iommu *iommu = drhd->iommu; 9.43 + 9.44 + printk("---- print_iommu_regs ----\n"); 9.45 + printk("print_iommu_regs: drhd->address = %lx\n", drhd->address); 9.46 + printk("print_iommu_regs: DMAR_VER_REG = %x\n", 9.47 + dmar_readl(iommu->reg,DMAR_VER_REG)); 9.48 + printk("print_iommu_regs: DMAR_CAP_REG = %lx\n", 9.49 + dmar_readq(iommu->reg,DMAR_CAP_REG)); 9.50 + printk("print_iommu_regs: n_fault_reg = %lx\n", 9.51 + cap_num_fault_regs(dmar_readq(iommu->reg, DMAR_CAP_REG))); 9.52 + printk("print_iommu_regs: fault_recording_offset_l = %lx\n", 9.53 + cap_fault_reg_offset(dmar_readq(iommu->reg, DMAR_CAP_REG))); 9.54 + printk("print_iommu_regs: fault_recording_offset_h = %lx\n", 9.55 + cap_fault_reg_offset(dmar_readq(iommu->reg, DMAR_CAP_REG)) + 8); 9.56 + printk("print_iommu_regs: fault_recording_reg_l = %lx\n", 9.57 + dmar_readq(iommu->reg, cap_fault_reg_offset(dmar_readq(iommu->reg, DMAR_CAP_REG)))); 9.58 + printk("print_iommu_regs: fault_recording_reg_h = %lx\n", 9.59 + dmar_readq(iommu->reg, cap_fault_reg_offset(dmar_readq(iommu->reg, DMAR_CAP_REG)) + 8)); 9.60 + printk("print_iommu_regs: DMAR_ECAP_REG = %lx\n", 9.61 + dmar_readq(iommu->reg,DMAR_ECAP_REG)); 9.62 + printk("print_iommu_regs: DMAR_GCMD_REG = %x\n", 9.63 + dmar_readl(iommu->reg,DMAR_GCMD_REG)); 9.64 + printk("print_iommu_regs: DMAR_GSTS_REG = %x\n", 9.65 + dmar_readl(iommu->reg,DMAR_GSTS_REG)); 9.66 + printk("print_iommu_regs: DMAR_RTADDR_REG = %lx\n", 9.67 + dmar_readq(iommu->reg,DMAR_RTADDR_REG)); 9.68 + printk("print_iommu_regs: DMAR_CCMD_REG = %lx\n", 9.69 + dmar_readq(iommu->reg,DMAR_CCMD_REG)); 9.70 + printk("print_iommu_regs: DMAR_FSTS_REG = %x\n", 9.71 + dmar_readl(iommu->reg,DMAR_FSTS_REG)); 9.72 + printk("print_iommu_regs: DMAR_FECTL_REG = %x\n", 9.73 + dmar_readl(iommu->reg,DMAR_FECTL_REG)); 9.74 + printk("print_iommu_regs: DMAR_FEDATA_REG = %x\n", 9.75 + dmar_readl(iommu->reg,DMAR_FEDATA_REG)); 9.76 + printk("print_iommu_regs: DMAR_FEADDR_REG = %x\n", 9.77 + dmar_readl(iommu->reg,DMAR_FEADDR_REG)); 9.78 + printk("print_iommu_regs: DMAR_FEUADDR_REG = %x\n", 9.79 + dmar_readl(iommu->reg,DMAR_FEUADDR_REG)); 9.80 +} 9.81 + 9.82 +void print_vtd_entries(struct domain *d, int bus, int devfn, 9.83 + unsigned long gmfn) 9.84 +{ 9.85 + struct hvm_iommu *hd = domain_hvm_iommu(d); 9.86 + struct acpi_drhd_unit *drhd; 9.87 + struct iommu *iommu; 9.88 + struct context_entry *ctxt_entry; 9.89 + struct root_entry *root_entry; 9.90 + u64 *l3, *l2, *l1; 9.91 + u32 l3_index, l2_index, l1_index; 9.92 + u32 i = 0; 9.93 + 9.94 + printk("print_vtd_entries: domain_id = %x bdf = %x:%x:%x devfn = %x, gmfn = %lx\n", d->domain_id, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), devfn, gmfn); 9.95 + 9.96 + for_each_drhd_unit(drhd) { 9.97 + printk("---- print_vtd_entries %d ----\n", i++); 9.98 + 9.99 + if (hd->pgd == NULL) { 9.100 + printk(" hg->pgd == NULL\n"); 9.101 + return; 9.102 + } 9.103 + 9.104 + iommu = drhd->iommu; 9.105 + root_entry = iommu->root_entry; 9.106 + printk(" hd->pgd = %p virt_to_maddr(hd->pgd) = %lx\n", 9.107 + hd->pgd, virt_to_maddr(hd->pgd)); 9.108 + 9.109 + printk(" root_entry = %p\n", root_entry); 9.110 + if (root_entry == NULL) { 9.111 + printk(" root_entry == NULL\n"); 9.112 + return; 9.113 + } 9.114 + 9.115 + printk(" root_entry[%x] = %lx\n", bus, root_entry[bus].val); 9.116 + printk(" maddr_to_virt(root_entry[%x]) = %p\n", 9.117 + bus, maddr_to_virt(root_entry[bus].val)); 9.118 + 9.119 + if (root_entry[bus].val == 0) { 9.120 + printk(" root_entry[%x].lo == 0\n", bus); 9.121 + return; 9.122 + } 9.123 + 9.124 + ctxt_entry = maddr_to_virt((root_entry[bus].val >> PAGE_SHIFT) << PAGE_SHIFT); 9.125 + if (ctxt_entry == NULL) { 9.126 + printk(" ctxt_entry == NULL\n"); 9.127 + return; 9.128 + } 9.129 + 9.130 + if (ctxt_entry[devfn].lo == 0) { 9.131 + printk(" ctxt_entry[%x].lo == 0\n", devfn); 9.132 + return; 9.133 + } 9.134 + 9.135 + printk(" context = %p\n", ctxt_entry); 9.136 + printk(" context[%x] = %lx %lx\n", 9.137 + devfn, ctxt_entry[devfn].hi, ctxt_entry[devfn].lo); 9.138 + printk(" maddr_to_virt(context[%x].lo) = %p\n", 9.139 + devfn, maddr_to_virt(ctxt_entry[devfn].lo)); 9.140 + printk(" context[%x] = %lx\n", devfn, ctxt_entry[devfn].lo); 9.141 + 9.142 + l3 = maddr_to_virt(ctxt_entry[devfn].lo); 9.143 + l3 = (u64*)(((u64) l3 >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K); 9.144 + printk(" l3 = %p\n", l3); 9.145 + if (l3 == NULL) return; 9.146 + 9.147 + l3_index = (gmfn >> 9 >> 9) & 0x1ff; 9.148 + printk(" l3_index = %x\n", l3_index); 9.149 + printk(" l3[%x] = %lx\n", l3_index, l3[l3_index]); 9.150 + 9.151 + l2 = maddr_to_virt(l3[l3_index]); 9.152 + l2 = (u64*)(((u64) l2 >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K); 9.153 + printk(" l2 = %p\n", l2); 9.154 + if (l2 == NULL) return; 9.155 + 9.156 + l2_index = (gmfn >> 9) & 0x1ff; 9.157 + printk(" gmfn = %lx\n", gmfn); 9.158 + printk(" gmfn >> 9= %lx\n", gmfn >> 9); 9.159 + printk(" l2_index = %x\n", l2_index); 9.160 + printk(" l2[%x] = %lx\n", l2_index, l2[l2_index]); 9.161 + 9.162 + l1 = maddr_to_virt(l2[l2_index]); 9.163 + l1 = (u64*)(((u64) l1 >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K); 9.164 + if (l1 == NULL) return; 9.165 + l1_index = gmfn & 0x1ff; 9.166 + printk(" l1 = %p\n", l1); 9.167 + printk(" l1_index = %x\n", l1_index); 9.168 + printk(" l1[%x] = %lx\n", l1_index, l1[l1_index]); 9.169 + } 9.170 +} 9.171 + 9.172 +#else // !m64 9.173 + 9.174 +void print_iommu_regs(struct acpi_drhd_unit *drhd) 9.175 +{ 9.176 + struct iommu *iommu = drhd->iommu; 9.177 + 9.178 + printk("---- print_iommu_regs ----\n"); 9.179 + printk("print_iommu_regs: drhd->address = %lx\n", drhd->address); 9.180 + printk("print_iommu_regs: DMAR_VER_REG = %x\n", 9.181 + dmar_readl(iommu->reg,DMAR_VER_REG)); 9.182 + printk("print_iommu_regs: DMAR_CAP_REG = %llx\n", 9.183 + dmar_readq(iommu->reg,DMAR_CAP_REG)); 9.184 + printk("print_iommu_regs: n_fault_reg = %llx\n", 9.185 + cap_num_fault_regs(dmar_readq(iommu->reg, DMAR_CAP_REG))); 9.186 + printk("print_iommu_regs: fault_recording_offset_l = %llx\n", 9.187 + cap_fault_reg_offset(dmar_readq(iommu->reg, DMAR_CAP_REG))); 9.188 + printk("print_iommu_regs: fault_recording_offset_h = %llx\n", 9.189 + cap_fault_reg_offset(dmar_readq(iommu->reg, DMAR_CAP_REG)) + 8); 9.190 + printk("print_iommu_regs: fault_recording_reg_l = %llx\n", 9.191 + dmar_readq(iommu->reg, cap_fault_reg_offset(dmar_readq(iommu->reg, DMAR_CAP_REG)))); 9.192 + printk("print_iommu_regs: fault_recording_reg_h = %llx\n", 9.193 + dmar_readq(iommu->reg, cap_fault_reg_offset(dmar_readq(iommu->reg, DMAR_CAP_REG)) + 8)); 9.194 + printk("print_iommu_regs: DMAR_ECAP_REG = %llx\n", 9.195 + dmar_readq(iommu->reg,DMAR_ECAP_REG)); 9.196 + printk("print_iommu_regs: DMAR_GCMD_REG = %x\n", 9.197 + dmar_readl(iommu->reg,DMAR_GCMD_REG)); 9.198 + printk("print_iommu_regs: DMAR_GSTS_REG = %x\n", 9.199 + dmar_readl(iommu->reg,DMAR_GSTS_REG)); 9.200 + printk("print_iommu_regs: DMAR_RTADDR_REG = %llx\n", 9.201 + dmar_readq(iommu->reg,DMAR_RTADDR_REG)); 9.202 + printk("print_iommu_regs: DMAR_CCMD_REG = %llx\n", 9.203 + dmar_readq(iommu->reg,DMAR_CCMD_REG)); 9.204 + printk("print_iommu_regs: DMAR_FSTS_REG = %x\n", 9.205 + dmar_readl(iommu->reg,DMAR_FSTS_REG)); 9.206 + printk("print_iommu_regs: DMAR_FECTL_REG = %x\n", 9.207 + dmar_readl(iommu->reg,DMAR_FECTL_REG)); 9.208 + printk("print_iommu_regs: DMAR_FEDATA_REG = %x\n", 9.209 + dmar_readl(iommu->reg,DMAR_FEDATA_REG)); 9.210 + printk("print_iommu_regs: DMAR_FEADDR_REG = %x\n", 9.211 + dmar_readl(iommu->reg,DMAR_FEADDR_REG)); 9.212 + printk("print_iommu_regs: DMAR_FEUADDR_REG = %x\n", 9.213 + dmar_readl(iommu->reg,DMAR_FEUADDR_REG)); 9.214 +} 9.215 + 9.216 +void print_vtd_entries(struct domain *d, int bus, int devfn, 9.217 + unsigned long gmfn) 9.218 +{ 9.219 + struct hvm_iommu *hd = domain_hvm_iommu(d); 9.220 + struct acpi_drhd_unit *drhd; 9.221 + struct iommu *iommu; 9.222 + struct context_entry *ctxt_entry; 9.223 + struct root_entry *root_entry; 9.224 + u64 *l3, *l2, *l1; 9.225 + u32 l3_index, l2_index, l1_index; 9.226 + u32 i = 0; 9.227 + 9.228 + printk("print_vtd_entries: domain_id = %x bdf = %x:%x:%x devfn = %x, gmfn = %lx\n", d->domain_id, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), devfn, gmfn); 9.229 + 9.230 + for_each_drhd_unit(drhd) { 9.231 + printk("---- print_vtd_entries %d ----\n", i++); 9.232 + 9.233 + if (hd->pgd == NULL) { 9.234 + printk(" hg->pgd == NULL\n"); 9.235 + return; 9.236 + } 9.237 + 9.238 + iommu = drhd->iommu; 9.239 + root_entry = iommu->root_entry; 9.240 + printk(" d->pgd = %p virt_to_maddr(hd->pgd) = %lx\n", 9.241 + hd->pgd, virt_to_maddr(hd->pgd)); 9.242 + 9.243 + printk(" root_entry = %p\n", root_entry); 9.244 + if (root_entry == NULL) { 9.245 + printk(" root_entry == NULL\n"); 9.246 + return; 9.247 + } 9.248 + 9.249 + printk(" root_entry[%x] = %llx\n", bus, root_entry[bus].val); 9.250 + printk(" maddr_to_virt(root_entry[%x]) = %p\n", 9.251 + bus, maddr_to_virt(root_entry[bus].val)); 9.252 + 9.253 + if (root_entry[bus].val == 0) { 9.254 + printk(" root_entry[%x].lo == 0\n", bus); 9.255 + return; 9.256 + } 9.257 + 9.258 + ctxt_entry = maddr_to_virt((root_entry[bus].val >> PAGE_SHIFT) << PAGE_SHIFT); 9.259 + if (ctxt_entry == NULL) { 9.260 + printk(" ctxt_entry == NULL\n"); 9.261 + return; 9.262 + } 9.263 + 9.264 + if (ctxt_entry[devfn].lo == 0) { 9.265 + printk(" ctxt_entry[%x].lo == 0\n", devfn); 9.266 + return; 9.267 + } 9.268 + 9.269 + printk(" context = %p\n", ctxt_entry); 9.270 + printk(" context[%x] = %llx %llx\n", 9.271 + devfn, ctxt_entry[devfn].hi, ctxt_entry[devfn].lo); 9.272 + printk(" maddr_to_virt(context[%x].lo) = %p\n", 9.273 + devfn, maddr_to_virt(ctxt_entry[devfn].lo)); 9.274 + printk(" context[%x] = %llx\n", devfn, ctxt_entry[devfn].lo); 9.275 + 9.276 + l3 = maddr_to_virt(ctxt_entry[devfn].lo); 9.277 + l3 = (u64*)(((u32) l3 >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K); 9.278 + printk(" l3 = %p\n", l3); 9.279 + if (l3 == NULL) return; 9.280 + 9.281 + l3_index = (gmfn >> 9 >> 9) & 0x1ff; 9.282 + printk(" l3_index = %x\n", l3_index); 9.283 + printk(" l3[%x] = %llx\n", l3_index, l3[l3_index]); 9.284 + 9.285 + l2 = maddr_to_virt(l3[l3_index]); 9.286 + l2 = (u64*)(((u32) l2 >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K); 9.287 + printk(" l2 = %p\n", l2); 9.288 + if (l2 == NULL) return; 9.289 + 9.290 + l2_index = (gmfn >> 9) & 0x1ff; 9.291 + printk(" gmfn = %lx\n", gmfn); 9.292 + printk(" gmfn >> 9= %lx\n", gmfn >> 9); 9.293 + printk(" l2_index = %x\n", l2_index); 9.294 + printk(" l2[%x] = %llx\n", l2_index, l2[l2_index]); 9.295 + 9.296 + l1 = maddr_to_virt(l2[l2_index]); 9.297 + l1 = (u64*)(((u32) l1 >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K); 9.298 + if (l1 == NULL) return; 9.299 + l1_index = gmfn & 0x1ff; 9.300 + printk(" l1 = %p\n", l1); 9.301 + printk(" l1_index = %x\n", l1_index); 9.302 + printk(" l1[%x] = %llx\n", l1_index, l1[l1_index]); 9.303 + } 9.304 +} 9.305 +#endif // !m64