ia64/xen-unstable

changeset 15916:f4bbd3f327e4

Intel vt-d specific changes in arch/x86/hvm/vmx/vtd.

Signed-off-by: Allen Kay <allen.m.kay@intel.com>
Signed-off-by: Guy Zana <guy@neocleus.com>
author kfraser@localhost.localdomain
date Fri Sep 14 16:40:49 2007 +0100 (2007-09-14)
parents acfa9290746f
children babe17e7a4ee
files xen/arch/x86/hvm/vmx/vtd/Makefile xen/arch/x86/hvm/vmx/vtd/dmar.c xen/arch/x86/hvm/vmx/vtd/dmar.h xen/arch/x86/hvm/vmx/vtd/intel-iommu.c xen/arch/x86/hvm/vmx/vtd/io.c xen/arch/x86/hvm/vmx/vtd/msi.h xen/arch/x86/hvm/vmx/vtd/pci-direct.h xen/arch/x86/hvm/vmx/vtd/pci_regs.h xen/arch/x86/hvm/vmx/vtd/utils.c
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/xen/arch/x86/hvm/vmx/vtd/Makefile	Fri Sep 14 16:40:49 2007 +0100
     1.3 @@ -0,0 +1,4 @@
     1.4 +obj-y += intel-iommu.o
     1.5 +obj-y += dmar.o
     1.6 +obj-y += utils.o
     1.7 +obj-y += io.o
     2.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.2 +++ b/xen/arch/x86/hvm/vmx/vtd/dmar.c	Fri Sep 14 16:40:49 2007 +0100
     2.3 @@ -0,0 +1,494 @@
     2.4 +/*
     2.5 + * Copyright (c) 2006, Intel Corporation.
     2.6 + *
     2.7 + * This program is free software; you can redistribute it and/or modify it
     2.8 + * under the terms and conditions of the GNU General Public License,
     2.9 + * version 2, as published by the Free Software Foundation.
    2.10 + *
    2.11 + * This program is distributed in the hope it will be useful, but WITHOUT
    2.12 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    2.13 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
    2.14 + * more details.
    2.15 + *
    2.16 + * You should have received a copy of the GNU General Public License along with
    2.17 + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
    2.18 + * Place - Suite 330, Boston, MA 02111-1307 USA.
    2.19 + *
    2.20 + * Copyright (C) Ashok Raj <ashok.raj@intel.com>
    2.21 + * Copyright (C) Shaohua Li <shaohua.li@intel.com>
    2.22 + * Copyright (C) Allen Kay <allen.m.kay@intel.com> - adapted to xen
    2.23 + */
    2.24 +
    2.25 +#include <xen/init.h>
    2.26 +#include <xen/bitmap.h>
    2.27 +#include <xen/kernel.h>
    2.28 +#include <xen/acpi.h>
    2.29 +#include <xen/mm.h>
    2.30 +#include <xen/xmalloc.h>
    2.31 +#include <asm/string.h>
    2.32 +#include "dmar.h"
    2.33 +#include "pci-direct.h"
    2.34 +#include "pci_regs.h"
    2.35 +
    2.36 +#undef PREFIX
    2.37 +#define PREFIX VTDPREFIX "ACPI DMAR:"
    2.38 +#define DEBUG
    2.39 +
    2.40 +#define MIN_SCOPE_LEN (sizeof(struct acpi_pci_path) + sizeof(struct acpi_dev_scope))
    2.41 +
    2.42 +LIST_HEAD(acpi_drhd_units);
    2.43 +LIST_HEAD(acpi_rmrr_units);
    2.44 +LIST_HEAD(acpi_atsr_units);
    2.45 +LIST_HEAD(acpi_ioapic_units);
    2.46 +
    2.47 +u8 dmar_host_address_width;
    2.48 +
    2.49 +static int __init acpi_register_drhd_unit(struct acpi_drhd_unit *drhd)
    2.50 +{
    2.51 +    /*
    2.52 +     * add INCLUDE_ALL at the tail, so scan the list will find it at
    2.53 +     * the very end.
    2.54 +     */
    2.55 +    if (drhd->include_all)
    2.56 +        list_add_tail(&drhd->list, &acpi_drhd_units);
    2.57 +    else
    2.58 +        list_add(&drhd->list, &acpi_drhd_units);
    2.59 +    return 0;
    2.60 +}
    2.61 +
    2.62 +static int __init acpi_register_rmrr_unit(struct acpi_rmrr_unit *rmrr)
    2.63 +{
    2.64 +    list_add(&rmrr->list, &acpi_rmrr_units);
    2.65 +    return 0;
    2.66 +}
    2.67 +
    2.68 +static int acpi_pci_device_match(struct pci_dev *devices, int cnt,
    2.69 +                 struct pci_dev *dev)
    2.70 +{
    2.71 +    int i;
    2.72 +
    2.73 +    for (i = 0; i < cnt; i++) {
    2.74 +        if ((dev->bus == devices->bus) &&
    2.75 +            (dev->devfn == devices->devfn))
    2.76 +            return 1;
    2.77 +        devices++;
    2.78 +    }
    2.79 +    return 0;
    2.80 +}
    2.81 +
    2.82 +static int __init acpi_register_atsr_unit(struct acpi_atsr_unit *atsr)
    2.83 +{
    2.84 +    /*
    2.85 +     * add ALL_PORTS at the tail, so scan the list will find it at
    2.86 +     * the very end.
    2.87 +     */
    2.88 +    if (atsr->all_ports)
    2.89 +        list_add_tail(&atsr->list, &acpi_atsr_units);
    2.90 +    else
    2.91 +        list_add(&atsr->list, &acpi_atsr_units);
    2.92 +    return 0;
    2.93 +}
    2.94 +
    2.95 +struct acpi_drhd_unit * acpi_find_matched_drhd_unit(struct pci_dev *dev)
    2.96 +{
    2.97 +    struct acpi_drhd_unit *drhd;
    2.98 +    struct acpi_drhd_unit *include_all_drhd;
    2.99 +
   2.100 +    include_all_drhd = NULL;
   2.101 +    list_for_each_entry(drhd, &acpi_drhd_units, list) {
   2.102 +        if (drhd->include_all)
   2.103 +            include_all_drhd = drhd;
   2.104 +        if (acpi_pci_device_match(drhd->devices,
   2.105 +                        drhd->devices_cnt, dev))
   2.106 +        {
   2.107 +            gdprintk(XENLOG_INFO VTDPREFIX, 
   2.108 +                     "acpi_find_matched_drhd_unit: drhd->address = %lx\n",
   2.109 +                     drhd->address);
   2.110 +            return drhd;
   2.111 +        }
   2.112 +    }
   2.113 +
   2.114 +    if (include_all_drhd) {
   2.115 +        gdprintk(XENLOG_INFO VTDPREFIX, 
   2.116 +                 "acpi_find_matched_drhd_unit:include_all_drhd->addr = %lx\n",
   2.117 +                 include_all_drhd->address);
   2.118 +        return include_all_drhd;;
   2.119 +    }
   2.120 +
   2.121 +    return(NULL);
   2.122 +}
   2.123 +
   2.124 +struct acpi_rmrr_unit * acpi_find_matched_rmrr_unit(struct pci_dev *dev)
   2.125 +{
   2.126 +    struct acpi_rmrr_unit *rmrr;
   2.127 +
   2.128 +    list_for_each_entry(rmrr, &acpi_rmrr_units, list) {
   2.129 +        if (acpi_pci_device_match(rmrr->devices,
   2.130 +                        rmrr->devices_cnt, dev))
   2.131 +            goto out;
   2.132 +    }
   2.133 +    rmrr = NULL;
   2.134 +out:
   2.135 +    return rmrr;
   2.136 +}
   2.137 +
   2.138 +struct acpi_atsr_unit * acpi_find_matched_atsr_unit(struct pci_dev *dev)
   2.139 +{
   2.140 +    struct acpi_atsr_unit *atsru;
   2.141 +    struct acpi_atsr_unit *all_ports_atsru;
   2.142 +
   2.143 +    all_ports_atsru = NULL;
   2.144 +    list_for_each_entry(atsru, &acpi_atsr_units, list) {
   2.145 +        if (atsru->all_ports)
   2.146 +            all_ports_atsru = atsru;
   2.147 +        if (acpi_pci_device_match(atsru->devices, atsru->devices_cnt, dev))
   2.148 +            return atsru;
   2.149 +    }
   2.150 +    if (all_ports_atsru) {
   2.151 +        gdprintk(XENLOG_INFO VTDPREFIX, 
   2.152 +                 "acpi_find_matched_atsr_unit: all_ports_atsru\n");
   2.153 +        return all_ports_atsru;;
   2.154 +    }
   2.155 +    return(NULL);
   2.156 +}
   2.157 +
   2.158 +static int __init acpi_parse_dev_scope(void *start, void *end, int *cnt,
   2.159 +                       struct pci_dev **devices)
   2.160 +{
   2.161 +    struct acpi_dev_scope *scope;
   2.162 +    u8 bus, sub_bus, sec_bus;
   2.163 +    struct acpi_pci_path *path;
   2.164 +    struct acpi_ioapic_unit *acpi_ioapic_unit = NULL;
   2.165 +    int count, dev_count=0;
   2.166 +    struct pci_dev *pdev;
   2.167 +    u8 dev, func;
   2.168 +    u32 l;
   2.169 +    void *tmp;
   2.170 +
   2.171 +    *cnt = 0;
   2.172 +    tmp = start;
   2.173 +    while (start < end) {
   2.174 +        scope = start;
   2.175 +        if (scope->length < MIN_SCOPE_LEN ||
   2.176 +            (scope->dev_type != ACPI_DEV_ENDPOINT &&
   2.177 +            scope->dev_type != ACPI_DEV_P2PBRIDGE)) {
   2.178 +            printk(KERN_WARNING PREFIX "Invalid device scope\n");
   2.179 +            return -EINVAL;
   2.180 +        }
   2.181 +        (*cnt)++;
   2.182 +        start += scope->length;
   2.183 +    }
   2.184 +
   2.185 +    start = tmp;
   2.186 +    while (start < end) {
   2.187 +        scope = start;
   2.188 +        path = (struct acpi_pci_path *)(scope + 1);
   2.189 +        count = (scope->length - sizeof(struct acpi_dev_scope))
   2.190 +		    /sizeof(struct acpi_pci_path);
   2.191 +        bus = scope->start_bus;
   2.192 +
   2.193 +        while (--count) {
   2.194 +            bus = read_pci_config_byte(bus, path->dev,
   2.195 +                                       path->fn, PCI_SECONDARY_BUS);
   2.196 +            path++;
   2.197 +        }
   2.198 +
   2.199 +        if (scope->dev_type == ACPI_DEV_ENDPOINT) {
   2.200 +            printk(KERN_WARNING PREFIX
   2.201 +                "found endpoint: bdf = %x:%x:%x\n", bus, path->dev, path->fn);
   2.202 +                dev_count++;
   2.203 +        } else if (scope->dev_type == ACPI_DEV_P2PBRIDGE) {
   2.204 +            printk(KERN_WARNING PREFIX
   2.205 +                "found bridge: bdf = %x:%x:%x\n", bus, path->dev, path->fn);
   2.206 +
   2.207 +            sec_bus = read_pci_config_byte(bus, path->dev,
   2.208 +                                       path->fn, PCI_SECONDARY_BUS);
   2.209 +            sub_bus = read_pci_config_byte(bus, path->dev,
   2.210 +                                       path->fn, PCI_SUBORDINATE_BUS);
   2.211 +            while (sec_bus <= sub_bus) {
   2.212 +                for (dev = 0; dev < 32; dev++) {
   2.213 +                    for (func = 0; func < 8; func++) {
   2.214 +                        l = read_pci_config(sec_bus, dev, func, PCI_VENDOR_ID);
   2.215 +
   2.216 +                        /* some broken boards return 0 or ~0 if a slot is empty: */
   2.217 +                        if (l == 0xffffffff || l == 0x00000000 ||
   2.218 +                            l == 0x0000ffff || l == 0xffff0000)
   2.219 +                            break;
   2.220 +                        dev_count++;
   2.221 +                    }
   2.222 +                }
   2.223 +                sec_bus++;
   2.224 +            }
   2.225 +        } else if (scope->dev_type == ACPI_DEV_IOAPIC) {
   2.226 +            printk(KERN_WARNING PREFIX
   2.227 +                "found IOAPIC: bdf = %x:%x:%x\n", bus, path->dev, path->fn);
   2.228 +            dev_count++;
   2.229 +        } else {
   2.230 +            printk(KERN_WARNING PREFIX
   2.231 +                "found MSI HPET: bdf = %x:%x:%x\n", bus, path->dev, path->fn);
   2.232 +            dev_count++;
   2.233 +        }
   2.234 +
   2.235 +        start += scope->length;
   2.236 +    }
   2.237 +
   2.238 +    *cnt = dev_count;
   2.239 +    *devices = xmalloc_array(struct pci_dev,  *cnt);
   2.240 +    if (!*devices)
   2.241 +        return -ENOMEM;
   2.242 +    memset(*devices, 0, sizeof(struct pci_dev) * (*cnt));
   2.243 +
   2.244 +    pdev = *devices;
   2.245 +    start = tmp;
   2.246 +    while (start < end) {
   2.247 +        scope = start;
   2.248 +        path = (struct acpi_pci_path *)(scope + 1);
   2.249 +        count = (scope->length - sizeof(struct acpi_dev_scope))
   2.250 +		    /sizeof(struct acpi_pci_path);
   2.251 +        bus = scope->start_bus;
   2.252 +
   2.253 +        while (--count) {
   2.254 +            bus = read_pci_config_byte(bus, path->dev, path->fn, PCI_SECONDARY_BUS);
   2.255 +            path++;
   2.256 +        }
   2.257 +
   2.258 +        if (scope->dev_type == ACPI_DEV_ENDPOINT) {
   2.259 +            printk(KERN_WARNING PREFIX
   2.260 +                "found endpoint: bdf = %x:%x:%x\n", bus, path->dev, path->fn);
   2.261 +
   2.262 +            pdev->bus = bus;
   2.263 +            pdev->devfn = PCI_DEVFN(path->dev, path->fn);
   2.264 +            pdev++;
   2.265 +        } else if (scope->dev_type == ACPI_DEV_P2PBRIDGE) {
   2.266 +            printk(KERN_WARNING PREFIX
   2.267 +                "found bridge: bus = %x dev = %x func = %x\n", bus, path->dev, path->fn);
   2.268 +
   2.269 +            sec_bus = read_pci_config_byte(bus, path->dev, path->fn, PCI_SECONDARY_BUS);
   2.270 +            sub_bus = read_pci_config_byte(bus, path->dev, path->fn, PCI_SUBORDINATE_BUS);
   2.271 +
   2.272 +            while (sec_bus <= sub_bus) {
   2.273 +                for (dev = 0; dev < 32; dev++) {
   2.274 +                    for (func = 0; func < 8; func++) {
   2.275 +                        l = read_pci_config(sec_bus, dev, func, PCI_VENDOR_ID);
   2.276 +
   2.277 +                        /* some broken boards return 0 or ~0 if a slot is empty: */
   2.278 +                        if (l == 0xffffffff || l == 0x00000000 ||
   2.279 +                            l == 0x0000ffff || l == 0xffff0000)
   2.280 +                            break;
   2.281 +
   2.282 +                        pdev->bus = sec_bus;
   2.283 +                        pdev->devfn = PCI_DEVFN(dev, func);
   2.284 +                        pdev++;
   2.285 +                    }
   2.286 +                }
   2.287 +                sec_bus++;
   2.288 +            }
   2.289 +        } else if (scope->dev_type == ACPI_DEV_IOAPIC) {
   2.290 +            acpi_ioapic_unit = xmalloc(struct acpi_ioapic_unit);
   2.291 +            acpi_ioapic_unit->apic_id = scope->enum_id;
   2.292 +            acpi_ioapic_unit->ioapic.bdf.bus = bus;
   2.293 +            acpi_ioapic_unit->ioapic.bdf.dev = path->dev;
   2.294 +            acpi_ioapic_unit->ioapic.bdf.func = path->fn;
   2.295 +            list_add(&acpi_ioapic_unit->list, &acpi_ioapic_units);
   2.296 +            printk(KERN_WARNING PREFIX
   2.297 +                "found IOAPIC: bus = %x dev = %x func = %x\n", bus, path->dev, path->fn);
   2.298 +        } else {
   2.299 +            printk(KERN_WARNING PREFIX
   2.300 +                "found MSI HPET: bus = %x dev = %x func = %x\n", bus, path->dev, path->fn);
   2.301 +        }
   2.302 +        
   2.303 +        start += scope->length;
   2.304 +    }
   2.305 +
   2.306 +    return 0;
   2.307 +}
   2.308 +
   2.309 +static int __init
   2.310 +acpi_parse_one_drhd(struct acpi_dmar_entry_header *header)
   2.311 +{
   2.312 +    struct acpi_table_drhd * drhd = (struct acpi_table_drhd *)header;
   2.313 +    struct acpi_drhd_unit *dmaru;
   2.314 +    int ret = 0;
   2.315 +    static int include_all;
   2.316 +
   2.317 +    dmaru = xmalloc(struct acpi_drhd_unit);
   2.318 +    if (!dmaru)
   2.319 +        return -ENOMEM;
   2.320 +    memset(dmaru, 0, sizeof(struct acpi_drhd_unit));
   2.321 +
   2.322 +    dmaru->address = drhd->address;
   2.323 +    dmaru->include_all = drhd->flags & 1; /* BIT0: INCLUDE_ALL */
   2.324 +    printk(KERN_WARNING PREFIX "dmaru->address = %lx\n", dmaru->address);
   2.325 +
   2.326 +    if (!dmaru->include_all) {
   2.327 +        ret = acpi_parse_dev_scope((void *)(drhd + 1),
   2.328 +                ((void *)drhd) + header->length,
   2.329 +                &dmaru->devices_cnt, &dmaru->devices);
   2.330 +    }
   2.331 +    else {
   2.332 +        printk(KERN_WARNING PREFIX "found INCLUDE_ALL\n");
   2.333 +        /* Only allow one INCLUDE_ALL */
   2.334 +        if (include_all) {
   2.335 +            printk(KERN_WARNING PREFIX "Only one INCLUDE_ALL "
   2.336 +                "device scope is allowed\n");
   2.337 +            ret = -EINVAL;
   2.338 +        }
   2.339 +        include_all = 1;
   2.340 +    }
   2.341 +
   2.342 +    if (ret)
   2.343 +        xfree(dmaru);
   2.344 +    else
   2.345 +        acpi_register_drhd_unit(dmaru);
   2.346 +    return ret;
   2.347 +}
   2.348 +
   2.349 +static int __init
   2.350 +acpi_parse_one_rmrr(struct acpi_dmar_entry_header *header)
   2.351 +{
   2.352 +    struct acpi_table_rmrr *rmrr = (struct acpi_table_rmrr *)header;
   2.353 +    struct acpi_rmrr_unit *rmrru;
   2.354 +    int ret = 0;
   2.355 +
   2.356 +    rmrru = xmalloc(struct acpi_rmrr_unit);
   2.357 +    if (!rmrru)
   2.358 +        return -ENOMEM;
   2.359 +    memset(rmrru, 0, sizeof(struct acpi_rmrr_unit));
   2.360 +
   2.361 +#ifdef VTD_DEBUG
   2.362 +    gdprintk(XENLOG_INFO VTDPREFIX,
   2.363 +        "acpi_parse_one_rmrr: base = %lx end = %lx\n",
   2.364 +        rmrr->base_address, rmrr->end_address);
   2.365 +#endif
   2.366 +
   2.367 +    rmrru->base_address = rmrr->base_address;
   2.368 +    rmrru->end_address = rmrr->end_address;
   2.369 +    ret = acpi_parse_dev_scope((void *)(rmrr + 1),
   2.370 +            ((void*)rmrr) + header->length,
   2.371 +            &rmrru->devices_cnt, &rmrru->devices);
   2.372 +
   2.373 +    if (ret || (rmrru->devices_cnt == 0))
   2.374 +        xfree(rmrru);
   2.375 +    else
   2.376 +        acpi_register_rmrr_unit(rmrru);
   2.377 +    return ret;
   2.378 +}
   2.379 +
   2.380 +static int __init
   2.381 +acpi_parse_one_atsr(struct acpi_dmar_entry_header *header)
   2.382 +{
   2.383 +    struct acpi_table_atsr *atsr = (struct acpi_table_atsr *)header;
   2.384 +    struct acpi_atsr_unit *atsru;
   2.385 +    int ret = 0;
   2.386 +    static int all_ports;
   2.387 +
   2.388 +    atsru = xmalloc(struct acpi_atsr_unit);
   2.389 +    if (!atsru)
   2.390 +        return -ENOMEM;
   2.391 +    memset(atsru, 0, sizeof(struct acpi_atsr_unit));
   2.392 +
   2.393 +    atsru->all_ports = atsr->flags & 1; /* BIT0: ALL_PORTS */
   2.394 +    if (!atsru->all_ports) {
   2.395 +        ret = acpi_parse_dev_scope((void *)(atsr + 1),
   2.396 +                ((void *)atsr) + header->length,
   2.397 +                &atsru->devices_cnt, &atsru->devices);
   2.398 +    }
   2.399 +    else {
   2.400 +        printk(KERN_WARNING PREFIX "found ALL_PORTS\n");
   2.401 +        /* Only allow one ALL_PORTS */
   2.402 +        if (all_ports) {
   2.403 +            printk(KERN_WARNING PREFIX "Only one ALL_PORTS "
   2.404 +                "device scope is allowed\n");
   2.405 +            ret = -EINVAL;
   2.406 +        }
   2.407 +        all_ports = 1;
   2.408 +    }
   2.409 +
   2.410 +    if (ret)
   2.411 +        xfree(atsr);
   2.412 +    else
   2.413 +        acpi_register_atsr_unit(atsru);
   2.414 +    return ret;
   2.415 +}
   2.416 +
   2.417 +static void __init
   2.418 +acpi_table_print_dmar_entry(struct acpi_dmar_entry_header *header)
   2.419 +{
   2.420 +    struct acpi_table_drhd *drhd;
   2.421 +    struct acpi_table_rmrr *rmrr;
   2.422 +
   2.423 +    switch (header->type) {
   2.424 +    case ACPI_DMAR_DRHD:
   2.425 +        drhd = (struct acpi_table_drhd *)header;
   2.426 +        break;
   2.427 +    case ACPI_DMAR_RMRR:
   2.428 +        rmrr = (struct acpi_table_rmrr *)header;
   2.429 +        break;
   2.430 +    }
   2.431 +}
   2.432 +
   2.433 +static int __init
   2.434 +acpi_parse_dmar(unsigned long phys_addr, unsigned long size)
   2.435 +{
   2.436 +    struct acpi_table_dmar *dmar = NULL;
   2.437 +    struct acpi_dmar_entry_header *entry_header;
   2.438 +    int ret = 0;
   2.439 +
   2.440 +    if (!phys_addr || !size)
   2.441 +        return -EINVAL;
   2.442 +
   2.443 +    dmar = (struct acpi_table_dmar *)__acpi_map_table(phys_addr, size);
   2.444 +    if (!dmar) {
   2.445 +        printk (KERN_WARNING PREFIX "Unable to map DMAR\n");
   2.446 +        return -ENODEV;
   2.447 +    }
   2.448 +
   2.449 +    if (!dmar->haw) {
   2.450 +        printk (KERN_WARNING PREFIX "Zero: Invalid DMAR haw\n");
   2.451 +        return -EINVAL;
   2.452 +    }
   2.453 +
   2.454 +    dmar_host_address_width = dmar->haw;
   2.455 +    printk (KERN_INFO PREFIX "Host address width %d\n",
   2.456 +        dmar_host_address_width);
   2.457 +
   2.458 +    entry_header = (struct acpi_dmar_entry_header *)(dmar + 1);
   2.459 +    while (((unsigned long)entry_header) < (((unsigned long)dmar) + size)) {
   2.460 +        acpi_table_print_dmar_entry(entry_header);
   2.461 +
   2.462 +        switch (entry_header->type) {
   2.463 +        case ACPI_DMAR_DRHD:
   2.464 +            printk (KERN_INFO PREFIX "found ACPI_DMAR_DRHD\n");
   2.465 +            ret = acpi_parse_one_drhd(entry_header);
   2.466 +            break;
   2.467 +        case ACPI_DMAR_RMRR:
   2.468 +            printk (KERN_INFO PREFIX "found ACPI_DMAR_RMRR\n");
   2.469 +            ret = acpi_parse_one_rmrr(entry_header);
   2.470 +            break;
   2.471 +        case ACPI_DMAR_ATSR:
   2.472 +            printk (KERN_INFO PREFIX "found ACPI_DMAR_RMRR\n");
   2.473 +            ret = acpi_parse_one_atsr(entry_header);
   2.474 +            break;
   2.475 +        default:
   2.476 +            printk(KERN_WARNING PREFIX "Unknown DMAR structure type\n");
   2.477 +            ret = -EINVAL;
   2.478 +            break;
   2.479 +        }
   2.480 +        if (ret)
   2.481 +            break;
   2.482 +
   2.483 +        entry_header = ((void *)entry_header + entry_header->length);
   2.484 +    }
   2.485 +    return ret;
   2.486 +}
   2.487 +
   2.488 +int acpi_dmar_init(void)
   2.489 +{
   2.490 +    acpi_table_parse(ACPI_DMAR, acpi_parse_dmar);
   2.491 +    if (list_empty(&acpi_drhd_units)) {
   2.492 +        printk(KERN_ERR PREFIX "No DMAR devices found\n");
   2.493 +        return -ENODEV;
   2.494 +    } else
   2.495 +        vtd_enabled = 1;
   2.496 +    return 0;
   2.497 +}
     3.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.2 +++ b/xen/arch/x86/hvm/vmx/vtd/dmar.h	Fri Sep 14 16:40:49 2007 +0100
     3.3 @@ -0,0 +1,90 @@
     3.4 +/*
     3.5 + * Copyright (c) 2006, Intel Corporation.
     3.6 + *
     3.7 + * This program is free software; you can redistribute it and/or modify it
     3.8 + * under the terms and conditions of the GNU General Public License,
     3.9 + * version 2, as published by the Free Software Foundation.
    3.10 + *
    3.11 + * This program is distributed in the hope it will be useful, but WITHOUT
    3.12 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    3.13 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
    3.14 + * more details.
    3.15 + *
    3.16 + * You should have received a copy of the GNU General Public License along with
    3.17 + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
    3.18 + * Place - Suite 330, Boston, MA 02111-1307 USA.
    3.19 + *
    3.20 + * Copyright (C) Ashok Raj <ashok.raj@intel.com>
    3.21 + * Copyright (C) Shaohua Li <shaohua.li@intel.com>
    3.22 + */
    3.23 +
    3.24 +#ifndef _DMAR_H_
    3.25 +#define _DMAR_H_
    3.26 +
    3.27 +#include <xen/list.h>
    3.28 +#include <asm/iommu.h>
    3.29 +
    3.30 +extern u8 dmar_host_address_width;
    3.31 +
    3.32 +struct acpi_drhd_unit {
    3.33 +    struct list_head list;
    3.34 +    unsigned long    address; /* register base address of the unit */
    3.35 +    struct    pci_dev *devices; /* target devices */
    3.36 +    int    devices_cnt;
    3.37 +    u8    include_all:1;
    3.38 +    struct iommu *iommu;
    3.39 +};
    3.40 +
    3.41 +struct acpi_rmrr_unit {
    3.42 +    struct list_head list;
    3.43 +    unsigned long base_address;
    3.44 +    unsigned long end_address;
    3.45 +    struct pci_dev *devices; /* target devices */
    3.46 +    int    devices_cnt;
    3.47 +    u8    allow_all:1;
    3.48 +};
    3.49 +
    3.50 +struct acpi_atsr_unit {
    3.51 +    struct list_head list;
    3.52 +    struct    pci_dev *devices; /* target devices */
    3.53 +    int    devices_cnt;
    3.54 +    u8    all_ports:1;
    3.55 +};
    3.56 +
    3.57 +#define for_each_iommu(domain, iommu) \
    3.58 +    list_for_each_entry(iommu, \
    3.59 +        &(domain->arch.hvm_domain.hvm_iommu.iommu_list), list)
    3.60 +
    3.61 +#define for_each_pdev(domain, pdev) \
    3.62 +    list_for_each_entry(pdev, \
    3.63 +         &(domain->arch.hvm_domain.hvm_iommu.pdev_list), list)
    3.64 +
    3.65 +#define for_each_drhd_unit(drhd) \
    3.66 +    list_for_each_entry(drhd, &acpi_drhd_units, list)
    3.67 +#define for_each_rmrr_device(rmrr, pdev) \
    3.68 +    list_for_each_entry(rmrr, &acpi_rmrr_units, list) { \
    3.69 +        int _i; \
    3.70 +        for (_i = 0; _i < rmrr->devices_cnt; _i++) { \
    3.71 +            pdev = &(rmrr->devices[_i]);
    3.72 +#define end_for_each_rmrr_device(rmrr, pdev) \
    3.73 +        } \
    3.74 +    }
    3.75 +
    3.76 +struct acpi_drhd_unit * acpi_find_matched_drhd_unit(struct pci_dev *dev);
    3.77 +struct acpi_rmrr_unit * acpi_find_matched_rmrr_unit(struct pci_dev *dev);
    3.78 +
    3.79 +/* This one is for interrupt remapping */
    3.80 +struct acpi_ioapic_unit {
    3.81 +    struct list_head list;
    3.82 +    int apic_id;
    3.83 +    union {
    3.84 +        u16 info;
    3.85 +        struct {
    3.86 +            u16 bus: 8,
    3.87 +                dev: 5,
    3.88 +                func: 3;
    3.89 +        }bdf;
    3.90 +    }ioapic;
    3.91 +};
    3.92 +
    3.93 +#endif // _DMAR_H_
     4.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.2 +++ b/xen/arch/x86/hvm/vmx/vtd/intel-iommu.c	Fri Sep 14 16:40:49 2007 +0100
     4.3 @@ -0,0 +1,1927 @@
     4.4 +/*
     4.5 + * Copyright (c) 2006, Intel Corporation.
     4.6 + *
     4.7 + * This program is free software; you can redistribute it and/or modify it
     4.8 + * under the terms and conditions of the GNU General Public License,
     4.9 + * version 2, as published by the Free Software Foundation.
    4.10 + *
    4.11 + * This program is distributed in the hope it will be useful, but WITHOUT
    4.12 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    4.13 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
    4.14 + * more details.
    4.15 + *
    4.16 + * You should have received a copy of the GNU General Public License along with
    4.17 + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
    4.18 + * Place - Suite 330, Boston, MA 02111-1307 USA.
    4.19 + *
    4.20 + * Copyright (C) Ashok Raj <ashok.raj@intel.com>
    4.21 + * Copyright (C) Shaohua Li <shaohua.li@intel.com>
    4.22 + * Copyright (C) Allen Kay <allen.m.kay@intel.com> - adapted to xen
    4.23 + */
    4.24 +
    4.25 +#include <xen/init.h>
    4.26 +#include <xen/irq.h>
    4.27 +#include <xen/spinlock.h>
    4.28 +#include <xen/sched.h>
    4.29 +#include <xen/xmalloc.h>
    4.30 +#include <xen/domain_page.h>
    4.31 +#include <asm/delay.h>
    4.32 +#include <asm/string.h>
    4.33 +#include <asm/iommu.h>
    4.34 +#include <asm/hvm/vmx/intel-iommu.h>
    4.35 +#include "dmar.h"
    4.36 +#include "pci-direct.h"
    4.37 +#include "pci_regs.h"
    4.38 +#include "msi.h"
    4.39 +
    4.40 +extern void print_iommu_regs(struct acpi_drhd_unit *drhd);
    4.41 +extern void print_vtd_entries(struct domain *d, int bus, int devfn,
    4.42 +                       unsigned long gmfn);
    4.43 +extern void (*interrupt[])(void);
    4.44 +
    4.45 +#define DMAR_OPERATION_TIMEOUT (HZ*60) /* 1m */
    4.46 +
    4.47 +#define time_after(a,b)         \
    4.48 +        (typecheck(unsigned long, a) && \
    4.49 +         typecheck(unsigned long, b) && \
    4.50 +         ((long)(b) - (long)(a) < 0))
    4.51 +
    4.52 +unsigned int x86_clflush_size;
    4.53 +void clflush_cache_range(void *adr, int size)
    4.54 +{
    4.55 +    int i;
    4.56 +    for (i = 0; i < size; i += x86_clflush_size)
    4.57 +        clflush(adr + i);
    4.58 +}
    4.59 +
    4.60 +static void __iommu_flush_cache(struct iommu *iommu, void *addr, int size)
    4.61 +{
    4.62 +    if (!ecap_coherent(iommu->ecap))
    4.63 +        clflush_cache_range(addr, size);
    4.64 +}
    4.65 +
    4.66 +#define iommu_flush_cache_entry(iommu, addr) \
    4.67 +       __iommu_flush_cache(iommu, addr, 8)
    4.68 +#define iommu_flush_cache_page(iommu, addr) \
    4.69 +       __iommu_flush_cache(iommu, addr, PAGE_SIZE_4K)
    4.70 +
    4.71 +int nr_iommus;
    4.72 +/* context entry handling */
    4.73 +static struct context_entry * device_to_context_entry(struct iommu *iommu,
    4.74 +        u8 bus, u8 devfn)
    4.75 +{
    4.76 +    struct root_entry *root;
    4.77 +    struct context_entry *context;
    4.78 +    unsigned long phy_addr;
    4.79 +    unsigned long flags;
    4.80 +
    4.81 +    spin_lock_irqsave(&iommu->lock, flags);
    4.82 +    root = &iommu->root_entry[bus];
    4.83 +    if (!root_present(*root)) {
    4.84 +        phy_addr = (unsigned long) alloc_xenheap_page();
    4.85 +        if (!phy_addr) {
    4.86 +            spin_unlock_irqrestore(&iommu->lock, flags);
    4.87 +            return NULL;
    4.88 +        }
    4.89 +        memset((void *) phy_addr, 0, PAGE_SIZE);
    4.90 +        iommu_flush_cache_page(iommu, (void *)phy_addr);
    4.91 +        phy_addr = virt_to_maddr((void *)phy_addr);
    4.92 +        set_root_value(*root, phy_addr);
    4.93 +        set_root_present(*root);
    4.94 +        iommu_flush_cache_entry(iommu, root);
    4.95 +    }
    4.96 +    phy_addr = (unsigned long) get_context_addr(*root);
    4.97 +    context = (struct context_entry *)maddr_to_virt(phy_addr);
    4.98 +    spin_unlock_irqrestore(&iommu->lock, flags);
    4.99 +    return &context[devfn];
   4.100 +}
   4.101 +
   4.102 +static int device_context_mapped(struct iommu *iommu, u8 bus, u8 devfn)
   4.103 +{
   4.104 +    struct root_entry *root;
   4.105 +    struct context_entry *context;
   4.106 +    unsigned long phy_addr;
   4.107 +    int ret;
   4.108 +    unsigned long flags;
   4.109 +
   4.110 +    spin_lock_irqsave(&iommu->lock, flags);
   4.111 +    root = &iommu->root_entry[bus];
   4.112 +    if (!root_present(*root)) {
   4.113 +        ret = 0;
   4.114 +        goto out;
   4.115 +    }
   4.116 +    phy_addr = get_context_addr(*root);
   4.117 +    context = (struct context_entry *)maddr_to_virt(phy_addr);
   4.118 +    ret = context_present(context[devfn]);
   4.119 +out:
   4.120 +    spin_unlock_irqrestore(&iommu->lock, flags);
   4.121 +    return ret;
   4.122 +}
   4.123 +
   4.124 +/* page table handling */
   4.125 +#define LEVEL_STRIDE        (9)
   4.126 +#define LEVEL_MASK        ((1 << LEVEL_STRIDE) - 1)
   4.127 +#define agaw_to_level(val) ((val) + 2)
   4.128 +#define agaw_to_width(val) (30 + val * LEVEL_STRIDE)
   4.129 +#define width_to_agaw(w)  ((w - 30)/LEVEL_STRIDE)
   4.130 +#define level_to_offset_bits(l) (12 + (l - 1) * LEVEL_STRIDE)
   4.131 +#define address_level_offset(addr, level) \
   4.132 +    ((addr >> level_to_offset_bits(level)) & LEVEL_MASK)
   4.133 +#define level_mask(l) (((u64)(-1)) << level_to_offset_bits(l))
   4.134 +#define level_size(l) (1 << level_to_offset_bits(l))
   4.135 +#define align_to_level(addr, l) ((addr + level_size(l) - 1) & level_mask(l))
   4.136 +static struct dma_pte * addr_to_dma_pte(struct domain *domain, u64 addr)
   4.137 +{
   4.138 +    struct hvm_iommu *hd = domain_hvm_iommu(domain);
   4.139 +    struct acpi_drhd_unit *drhd;
   4.140 +    struct iommu *iommu;
   4.141 +    int addr_width = agaw_to_width(hd->agaw);
   4.142 +    struct dma_pte *parent, *pte = NULL, *pgd;
   4.143 +    int level = agaw_to_level(hd->agaw);
   4.144 +    int offset;
   4.145 +    unsigned long flags;
   4.146 +
   4.147 +    drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
   4.148 +    iommu = drhd->iommu;
   4.149 +
   4.150 +    addr &= (((u64)1) << addr_width) - 1;
   4.151 +    spin_lock_irqsave(&hd->mapping_lock, flags);
   4.152 +    if (!hd->pgd) {
   4.153 +        pgd = (struct dma_pte *)alloc_xenheap_page();
   4.154 +        if (!pgd && !hd->pgd) {
   4.155 +            spin_unlock_irqrestore(&hd->mapping_lock, flags);
   4.156 +            return NULL;
   4.157 +        }
   4.158 +        memset((u8*)pgd, 0, PAGE_SIZE);
   4.159 +        if (!hd->pgd)
   4.160 +            hd->pgd = pgd;
   4.161 +        else /* somebody is fast */
   4.162 +            free_xenheap_page((void *) pgd);
   4.163 +    }
   4.164 +    parent = hd->pgd;
   4.165 +    while (level > 0) {
   4.166 +        u8 *tmp;
   4.167 +        offset = address_level_offset(addr, level);
   4.168 +        pte = &parent[offset];
   4.169 +        if (level == 1)
   4.170 +            break;
   4.171 +        if (dma_pte_addr(*pte) == 0) {
   4.172 +            tmp = alloc_xenheap_page();
   4.173 +            if (tmp == NULL)
   4.174 +                gdprintk(XENLOG_ERR VTDPREFIX,
   4.175 +                    "addr_to_dma_pte: tmp == NULL\n");
   4.176 + 
   4.177 +            memset(tmp, 0, PAGE_SIZE);
   4.178 +            iommu_flush_cache_page(iommu, tmp);
   4.179 +
   4.180 +            if (!tmp && dma_pte_addr(*pte) == 0) {
   4.181 +                spin_unlock_irqrestore(&hd->mapping_lock, flags);
   4.182 +                return NULL;
   4.183 +            }
   4.184 +            if (dma_pte_addr(*pte) == 0) {
   4.185 +                dma_set_pte_addr(*pte,
   4.186 +                    virt_to_maddr(tmp));
   4.187 +                /*
   4.188 +                 * high level table always sets r/w, last level
   4.189 +                 * page table control read/write
   4.190 +                 */
   4.191 +                dma_set_pte_readable(*pte);
   4.192 +                dma_set_pte_writable(*pte);
   4.193 +                iommu_flush_cache_entry(iommu, pte);
   4.194 +            } else /* somebody is fast */
   4.195 +                free_xenheap_page(tmp);
   4.196 +        }
   4.197 +        parent = maddr_to_virt(dma_pte_addr(*pte));
   4.198 +        level--;
   4.199 +    }
   4.200 +    spin_unlock_irqrestore(&hd->mapping_lock, flags);
   4.201 +    return pte;
   4.202 +}
   4.203 +
   4.204 +/* return address's pte at specific level */
   4.205 +static struct dma_pte *dma_addr_level_pte(struct domain *domain, u64 addr,
   4.206 +        int level)
   4.207 +{
   4.208 +    struct hvm_iommu *hd = domain_hvm_iommu(domain);
   4.209 +    struct dma_pte *parent, *pte = NULL;
   4.210 +    int total = agaw_to_level(hd->agaw);
   4.211 +    int offset;
   4.212 +
   4.213 +    parent = hd->pgd;
   4.214 +    while (level <= total) {
   4.215 +        offset = address_level_offset(addr, total);
   4.216 +        pte = &parent[offset];
   4.217 +        if (level == total)
   4.218 +            return pte;
   4.219 +
   4.220 +        if (dma_pte_addr(*pte) == 0)
   4.221 +            break;
   4.222 +        parent = maddr_to_virt(dma_pte_addr(*pte));
   4.223 +        total--;
   4.224 +    }
   4.225 +    return NULL;
   4.226 +}
   4.227 +
   4.228 +static void iommu_flush_write_buffer(struct iommu *iommu)
   4.229 +{
   4.230 +	u32 val;
   4.231 +	unsigned long flag;
   4.232 +	unsigned long start_time;
   4.233 +
   4.234 +	if (!cap_rwbf(iommu->cap))
   4.235 +		return;
   4.236 +	val = iommu->gcmd | DMA_GCMD_WBF;
   4.237 +
   4.238 +	spin_lock_irqsave(&iommu->register_lock, flag);
   4.239 +	dmar_writel(iommu->reg, DMAR_GCMD_REG, val);
   4.240 +
   4.241 +	/* Make sure hardware complete it */
   4.242 +	start_time = jiffies;
   4.243 +	while (1) {
   4.244 +		val = dmar_readl(iommu->reg, DMAR_GSTS_REG);
   4.245 +		if (!(val & DMA_GSTS_WBFS))
   4.246 +			break;
   4.247 +		if (time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT))
   4.248 +			panic("DMAR hardware is malfunctional, please disable IOMMU\n");
   4.249 +		cpu_relax();
   4.250 +	}
   4.251 +	spin_unlock_irqrestore(&iommu->register_lock, flag);
   4.252 +}
   4.253 +
   4.254 +/* return value determine if we need a write buffer flush */
   4.255 +static int __iommu_flush_context(struct iommu *iommu,
   4.256 +	u16 did, u16 source_id, u8 function_mask, u64 type,
   4.257 +	int non_present_entry_flush)
   4.258 +{
   4.259 +	u64 val = 0;
   4.260 +	unsigned long flag;
   4.261 +	unsigned long start_time;
   4.262 +
   4.263 +	/*
   4.264 +	 * In the non-present entry flush case, if hardware doesn't cache
   4.265 +	 * non-present entry we do nothing and if hardware cache non-present
   4.266 +	 * entry, we flush entries of domain 0 (the domain id is used to cache
   4.267 +	 * any non-present entries)
   4.268 +	 */
   4.269 +	if (non_present_entry_flush) {
   4.270 +		if (!cap_caching_mode(iommu->cap))
   4.271 +			return 1;
   4.272 +		else
   4.273 +			did = 0;
   4.274 +	}
   4.275 +
   4.276 +        /* use register invalidation */
   4.277 +        switch (type)
   4.278 +        {
   4.279 +            case DMA_CCMD_GLOBAL_INVL:
   4.280 +                val = DMA_CCMD_GLOBAL_INVL;
   4.281 +                break;
   4.282 +            case DMA_CCMD_DOMAIN_INVL:
   4.283 +                val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
   4.284 +                break;
   4.285 +            case DMA_CCMD_DEVICE_INVL:
   4.286 +                val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
   4.287 +                  |DMA_CCMD_SID(source_id)|DMA_CCMD_FM(function_mask);
   4.288 +                break;
   4.289 +            default:
   4.290 +                BUG();
   4.291 +        }
   4.292 +        val |= DMA_CCMD_ICC;
   4.293 +
   4.294 +        spin_lock_irqsave(&iommu->register_lock, flag);
   4.295 +        dmar_writeq(iommu->reg, DMAR_CCMD_REG, val);
   4.296 +
   4.297 +        /* Make sure hardware complete it */
   4.298 +        start_time = jiffies;
   4.299 +        while (1) {
   4.300 +            val = dmar_readq(iommu->reg, DMAR_CCMD_REG);
   4.301 +            if (!(val & DMA_CCMD_ICC))
   4.302 +                break;
   4.303 +            if (time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT))
   4.304 +                panic("DMAR hardware is malfunctional, please disable IOMMU\n");
   4.305 +            cpu_relax();
   4.306 +        }
   4.307 +        spin_unlock_irqrestore(&iommu->register_lock, flag);
   4.308 +	/* flush context entry will implictly flush write buffer */
   4.309 +	return 0;
   4.310 +}
   4.311 +
   4.312 +static int inline iommu_flush_context_global(struct iommu *iommu,
   4.313 +	int non_present_entry_flush)
   4.314 +{
   4.315 +	return __iommu_flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
   4.316 +		non_present_entry_flush);
   4.317 +}
   4.318 +
   4.319 +static int inline iommu_flush_context_domain(struct iommu *iommu, u16 did,
   4.320 +	int non_present_entry_flush)
   4.321 +{
   4.322 +	return __iommu_flush_context(iommu, did, 0, 0, DMA_CCMD_DOMAIN_INVL,
   4.323 +		non_present_entry_flush);
   4.324 +}
   4.325 +
   4.326 +static int inline iommu_flush_context_device(struct iommu *iommu,
   4.327 +	u16 did, u16 source_id, u8 function_mask, int non_present_entry_flush)
   4.328 +{
   4.329 +	return __iommu_flush_context(iommu, did, source_id, function_mask,
   4.330 +		DMA_CCMD_DEVICE_INVL, non_present_entry_flush);
   4.331 +}
   4.332 +
   4.333 +/* return value determine if we need a write buffer flush */
   4.334 +static int __iommu_flush_iotlb(struct iommu *iommu, u16 did,
   4.335 +	u64 addr, unsigned int size_order, u64 type,
   4.336 +	int non_present_entry_flush)
   4.337 +{
   4.338 +	int tlb_offset = ecap_iotlb_offset(iommu->ecap);
   4.339 +	u64 val = 0, val_iva = 0;
   4.340 +	unsigned long flag;
   4.341 +	unsigned long start_time;
   4.342 +
   4.343 +	/*
   4.344 +	 * In the non-present entry flush case, if hardware doesn't cache
   4.345 +	 * non-present entry we do nothing and if hardware cache non-present
   4.346 +	 * entry, we flush entries of domain 0 (the domain id is used to cache
   4.347 +	 * any non-present entries)
   4.348 +	 */
   4.349 +	if (non_present_entry_flush) {
   4.350 +		if (!cap_caching_mode(iommu->cap))
   4.351 +			return 1;
   4.352 +		else
   4.353 +			did = 0;
   4.354 +	}
   4.355 +
   4.356 +        /* use register invalidation */
   4.357 +        switch (type) {
   4.358 +            case DMA_TLB_GLOBAL_FLUSH:
   4.359 +                /* global flush doesn't need set IVA_REG */
   4.360 +                val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
   4.361 +                break;
   4.362 +            case DMA_TLB_DSI_FLUSH:
   4.363 +                val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
   4.364 +                break;
   4.365 +            case DMA_TLB_PSI_FLUSH:
   4.366 +                val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
   4.367 +                /* Note: always flush non-leaf currently */
   4.368 +                val_iva = size_order | addr;
   4.369 +                break;
   4.370 +            default:
   4.371 +                BUG();
   4.372 +        }
   4.373 +        /* Note: set drain read/write */
   4.374 +#if 0
   4.375 +        /*
   4.376 +         * This is probably to be super secure.. Looks like we can
   4.377 +         * ignore it without any impact.
   4.378 +         */
   4.379 +        if (cap_read_drain(iommu->cap))
   4.380 +            val |= DMA_TLB_READ_DRAIN;
   4.381 +#endif
   4.382 +        if (cap_write_drain(iommu->cap))
   4.383 +            val |= DMA_TLB_WRITE_DRAIN;
   4.384 +
   4.385 +        spin_lock_irqsave(&iommu->register_lock, flag);
   4.386 +        /* Note: Only uses first TLB reg currently */
   4.387 +        if (val_iva)
   4.388 +            dmar_writeq(iommu->reg, tlb_offset, val_iva);
   4.389 +        dmar_writeq(iommu->reg, tlb_offset + 8, val);
   4.390 +
   4.391 +        /* Make sure hardware complete it */
   4.392 +        start_time = jiffies;
   4.393 +        while (1) {
   4.394 +            val = dmar_readq(iommu->reg, tlb_offset + 8);
   4.395 +            if (!(val & DMA_TLB_IVT))
   4.396 +                break;
   4.397 +            if (time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT))
   4.398 +                panic("DMAR hardware is malfunctional, please disable IOMMU\n");
   4.399 +            cpu_relax();
   4.400 +        }
   4.401 +        spin_unlock_irqrestore(&iommu->register_lock, flag);
   4.402 +
   4.403 +        /* check IOTLB invalidation granularity */
   4.404 +        if (DMA_TLB_IAIG(val) == 0)
   4.405 +            printk(KERN_ERR VTDPREFIX "IOMMU: flush IOTLB failed\n");
   4.406 +        if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
   4.407 +            printk(KERN_ERR VTDPREFIX "IOMMU: tlb flush request %x, actual %x\n",
   4.408 +              (u32)DMA_TLB_IIRG(type), (u32)DMA_TLB_IAIG(val));
   4.409 +	/* flush context entry will implictly flush write buffer */
   4.410 +	return 0;
   4.411 +}
   4.412 +
   4.413 +static int inline iommu_flush_iotlb_global(struct iommu *iommu,
   4.414 +	int non_present_entry_flush)
   4.415 +{
   4.416 +	return __iommu_flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
   4.417 +		non_present_entry_flush);
   4.418 +}
   4.419 +
   4.420 +static int inline iommu_flush_iotlb_dsi(struct iommu *iommu, u16 did,
   4.421 +	int non_present_entry_flush)
   4.422 +{
   4.423 +	return __iommu_flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH,
   4.424 +		non_present_entry_flush);
   4.425 +}
   4.426 +
   4.427 +static int inline get_alignment(u64 base, unsigned int size)
   4.428 +{
   4.429 +	int t = 0;
   4.430 +	u64 end;
   4.431 +
   4.432 +	end = base + size - 1;
   4.433 +	while (base != end) {
   4.434 +		t++;
   4.435 +		base >>= 1;
   4.436 +		end >>= 1;
   4.437 +	}
   4.438 +	return t;
   4.439 +}
   4.440 +
   4.441 +static int inline iommu_flush_iotlb_psi(struct iommu *iommu, u16 did,
   4.442 +	u64 addr, unsigned int pages, int non_present_entry_flush)
   4.443 +{
   4.444 +	unsigned int align;
   4.445 +
   4.446 +	BUG_ON(addr & (~PAGE_MASK_4K));
   4.447 +	BUG_ON(pages == 0);
   4.448 +
   4.449 +	/* Fallback to domain selective flush if no PSI support */
   4.450 +	if (!cap_pgsel_inv(iommu->cap))
   4.451 +		return iommu_flush_iotlb_dsi(iommu, did,
   4.452 +			non_present_entry_flush);
   4.453 +
   4.454 +	/*
   4.455 +	 * PSI requires page size is 2 ^ x, and the base address is naturally
   4.456 +	 * aligned to the size
   4.457 +	 */
   4.458 +	align = get_alignment(addr >> PAGE_SHIFT_4K, pages);
   4.459 +	/* Fallback to domain selective flush if size is too big */
   4.460 +	if (align > cap_max_amask_val(iommu->cap))
   4.461 +		return iommu_flush_iotlb_dsi(iommu, did,
   4.462 +			non_present_entry_flush);
   4.463 +
   4.464 +	addr >>= PAGE_SHIFT_4K + align;
   4.465 +	addr <<= PAGE_SHIFT_4K + align;
   4.466 +
   4.467 +	return __iommu_flush_iotlb(iommu, did, addr, align,
   4.468 +		DMA_TLB_PSI_FLUSH, non_present_entry_flush);
   4.469 +}
   4.470 +
   4.471 +void flush_all(void)
   4.472 +{
   4.473 +    struct acpi_drhd_unit *drhd;
   4.474 +    struct iommu *iommu;
   4.475 +    int i = 0;
   4.476 +
   4.477 +    wbinvd();
   4.478 +    for_each_drhd_unit(drhd) {
   4.479 +        iommu = drhd->iommu;
   4.480 +        iommu_flush_context_global(iommu, 0);
   4.481 +        iommu_flush_iotlb_global(iommu, 0);
   4.482 +        i++;
   4.483 +    }
   4.484 +}
   4.485 +
   4.486 +/* clear one page's page table */
   4.487 +static void dma_pte_clear_one(struct domain *domain, u64 addr)
   4.488 +{
   4.489 +    struct acpi_drhd_unit *drhd;
   4.490 +    struct iommu *iommu;
   4.491 +    struct dma_pte *pte = NULL;
   4.492 +
   4.493 +    drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
   4.494 +
   4.495 +    /* get last level pte */
   4.496 +    pte = dma_addr_level_pte(domain, addr, 1);
   4.497 +
   4.498 +    if (pte) {
   4.499 +        dma_clear_pte(*pte);
   4.500 +        iommu_flush_cache_entry(drhd->iommu, pte);
   4.501 +
   4.502 +        for_each_drhd_unit(drhd) {
   4.503 +            iommu = drhd->iommu;
   4.504 +            if (cap_caching_mode(iommu->cap))
   4.505 +            {
   4.506 +                iommu_flush_iotlb_psi(iommu, domain->domain_id, addr, 1, 0);
   4.507 +            }
   4.508 +            else if (cap_rwbf(iommu->cap))
   4.509 +                iommu_flush_write_buffer(iommu);
   4.510 +        }
   4.511 +    }
   4.512 +}
   4.513 +
   4.514 +/* clear last level pte, a tlb flush should be followed */
   4.515 +static void dma_pte_clear_range(struct domain *domain, u64 start, u64 end)
   4.516 +{
   4.517 +    struct hvm_iommu *hd = domain_hvm_iommu(domain);
   4.518 +    int addr_width = agaw_to_width(hd->agaw);
   4.519 +
   4.520 +    start &= (((u64)1) << addr_width) - 1;
   4.521 +    end &= (((u64)1) << addr_width) - 1;
   4.522 +    /* in case it's partial page */
   4.523 +    start = PAGE_ALIGN_4K(start);
   4.524 +    end &= PAGE_MASK_4K;
   4.525 +
   4.526 +    /* we don't need lock here, nobody else touches the iova range */
   4.527 +    while (start < end) {
   4.528 +        dma_pte_clear_one(domain, start);
   4.529 +        start += PAGE_SIZE_4K;
   4.530 +    }
   4.531 +}
   4.532 +
   4.533 +/* free page table pages. last level pte should already be cleared */
   4.534 +// static void dma_pte_free_pagetable(struct domain *domain, u64 start, u64 end)
   4.535 +void dma_pte_free_pagetable(struct domain *domain, u64 start, u64 end)
   4.536 +{
   4.537 +    struct acpi_drhd_unit *drhd;
   4.538 +    struct hvm_iommu *hd = domain_hvm_iommu(domain);
   4.539 +    struct iommu *iommu;
   4.540 +    int addr_width = agaw_to_width(hd->agaw);
   4.541 +    struct dma_pte *pte;
   4.542 +    int total = agaw_to_level(hd->agaw);
   4.543 +    int level;
   4.544 +    u32 tmp;
   4.545 +
   4.546 +    drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
   4.547 +    iommu = drhd->iommu;
   4.548 +
   4.549 +    start &= (((u64)1) << addr_width) - 1;
   4.550 +    end &= (((u64)1) << addr_width) - 1;
   4.551 +
   4.552 +    /* we don't need lock here, nobody else touches the iova range */
   4.553 +    level = 2;
   4.554 +    while (level <= total) {
   4.555 +        tmp = align_to_level(start, level);
   4.556 +        if (tmp >= end || (tmp + level_size(level) > end))
   4.557 +            return;
   4.558 +
   4.559 +        while (tmp < end) {
   4.560 +            pte = dma_addr_level_pte(domain, tmp, level);
   4.561 +            if (pte) {
   4.562 +                free_xenheap_page((void *) maddr_to_virt(dma_pte_addr(*pte)));
   4.563 +                dma_clear_pte(*pte);
   4.564 +                iommu_flush_cache_entry(iommu, pte);
   4.565 +            }
   4.566 +            tmp += level_size(level);
   4.567 +        }
   4.568 +        level++;
   4.569 +    }
   4.570 +    /* free pgd */
   4.571 +    if (start == 0 && end == ((((u64)1) << addr_width) - 1)) {
   4.572 +        free_xenheap_page((void *)hd->pgd);
   4.573 +        hd->pgd = NULL;
   4.574 +    }
   4.575 +}
   4.576 +
   4.577 +/* iommu handling */
   4.578 +static int iommu_set_root_entry(struct iommu *iommu)
   4.579 +{
   4.580 +    void *addr;
   4.581 +    u32 cmd, sts;
   4.582 +    struct root_entry *root;
   4.583 +    unsigned long flags;
   4.584 +
   4.585 +    if (iommu == NULL)
   4.586 +        gdprintk(XENLOG_ERR VTDPREFIX,
   4.587 +            "iommu_set_root_entry: iommu == NULL\n");
   4.588 +
   4.589 +    spin_lock_irqsave(&iommu->lock, flags);
   4.590 +    if (!iommu->root_entry) {
   4.591 +        spin_unlock_irqrestore(&iommu->lock, flags);
   4.592 +        root = (struct root_entry *)alloc_xenheap_page();
   4.593 +        memset((u8*)root, 0, PAGE_SIZE);
   4.594 +        iommu_flush_cache_page(iommu, root);
   4.595 +        spin_lock_irqsave(&iommu->lock, flags);
   4.596 +
   4.597 +        if (!root && !iommu->root_entry) {
   4.598 +            spin_unlock_irqrestore(&iommu->lock, flags);
   4.599 +            return -ENOMEM;
   4.600 +        }
   4.601 +
   4.602 +        if (!iommu->root_entry)
   4.603 +            iommu->root_entry = root;
   4.604 +        else /* somebody is fast */
   4.605 +            free_xenheap_page((void *)root);
   4.606 +    }
   4.607 +    spin_unlock_irqrestore(&iommu->lock, flags);
   4.608 +
   4.609 +    addr = iommu->root_entry;
   4.610 +    spin_lock_irqsave(&iommu->register_lock, flags);
   4.611 +    dmar_writeq(iommu->reg, DMAR_RTADDR_REG, virt_to_maddr(addr));
   4.612 +    cmd = iommu->gcmd | DMA_GCMD_SRTP;
   4.613 +    dmar_writel(iommu->reg, DMAR_GCMD_REG, cmd);
   4.614 +
   4.615 +    /* Make sure hardware complete it */
   4.616 +    while (1) {
   4.617 +        sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
   4.618 +        if (sts & DMA_GSTS_RTPS)
   4.619 +            break;
   4.620 +        cpu_relax();
   4.621 +    }
   4.622 +    spin_unlock_irqrestore(&iommu->register_lock, flags);
   4.623 +
   4.624 +    return 0;
   4.625 +}
   4.626 +
   4.627 +static int iommu_enable_translation(struct iommu *iommu)
   4.628 +{
   4.629 +    u32 sts;
   4.630 +    unsigned long flags;
   4.631 +
   4.632 +    dprintk(XENLOG_INFO VTDPREFIX,
   4.633 +        "iommu_enable_translation: enabling vt-d translation\n");
   4.634 +    spin_lock_irqsave(&iommu->register_lock, flags);
   4.635 +    iommu->gcmd |= DMA_GCMD_TE;
   4.636 +    dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd);
   4.637 +    /* Make sure hardware complete it */
   4.638 +    while (1) {
   4.639 +        sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
   4.640 +        if (sts & DMA_GSTS_TES) {
   4.641 +            break;
   4.642 +        }
   4.643 +        cpu_relax();
   4.644 +    }
   4.645 +    spin_unlock_irqrestore(&iommu->register_lock, flags);
   4.646 +    return 0;
   4.647 +}
   4.648 +
   4.649 +int iommu_disable_translation(struct iommu *iommu)
   4.650 +{
   4.651 +    u32 sts;
   4.652 +    unsigned long flags;
   4.653 +
   4.654 +    spin_lock_irqsave(&iommu->register_lock, flags);
   4.655 +    iommu->gcmd &= ~ DMA_GCMD_TE;
   4.656 +    dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd);
   4.657 +
   4.658 +    /* Make sure hardware complete it */
   4.659 +    while(1) {
   4.660 +        sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
   4.661 +        if (!(sts & DMA_GSTS_TES))
   4.662 +                break;
   4.663 +        cpu_relax();
   4.664 +    }
   4.665 +    spin_unlock_irqrestore(&iommu->register_lock, flags);
   4.666 +    return 0;
   4.667 +}
   4.668 +
   4.669 +static struct iommu *vector_to_iommu[NR_VECTORS];
   4.670 +static int iommu_page_fault_do_one(struct iommu *iommu, int type,
   4.671 +        u8 fault_reason, u16 source_id, u32 addr)
   4.672 +{
   4.673 +    dprintk(XENLOG_WARNING VTDPREFIX,
   4.674 +        "iommu_page_fault:%s: DEVICE %x:%x.%x addr %x REASON %x\n",
   4.675 +        (type ? "DMA Read" : "DMA Write"),
   4.676 +        (source_id >> 8), PCI_SLOT(source_id & 0xFF),
   4.677 +        PCI_FUNC(source_id & 0xFF), addr, fault_reason);
   4.678 +
   4.679 +    print_vtd_entries(current->domain, (source_id >> 8),(source_id & 0xff),
   4.680 +                      (addr >> PAGE_SHIFT)); 
   4.681 +    return 0;
   4.682 +}
   4.683 +
   4.684 +#define PRIMARY_FAULT_REG_LEN (16)
   4.685 +static void iommu_page_fault(int vector, void *dev_id,
   4.686 +        struct cpu_user_regs *regs)
   4.687 +{
   4.688 +    struct iommu *iommu = dev_id;
   4.689 +    int reg, fault_index;
   4.690 +    u32 fault_status;
   4.691 +    unsigned long flags;
   4.692 +
   4.693 +    dprintk(XENLOG_WARNING VTDPREFIX,
   4.694 +        "iommu_page_fault: iommu->reg = %p\n", iommu->reg);
   4.695 +
   4.696 +    spin_lock_irqsave(&iommu->register_lock, flags);
   4.697 +    fault_status = dmar_readl(iommu->reg, DMAR_FSTS_REG);
   4.698 +    spin_unlock_irqrestore(&iommu->register_lock, flags);
   4.699 +
   4.700 +    /* FIXME: ignore advanced fault log */
   4.701 +    if (!(fault_status & DMA_FSTS_PPF))
   4.702 +        return;
   4.703 +    fault_index = dma_fsts_fault_record_index(fault_status);
   4.704 +    reg = cap_fault_reg_offset(iommu->cap);
   4.705 +    while (1) {
   4.706 +        u8 fault_reason;
   4.707 +        u16 source_id;
   4.708 +        u32 guest_addr;
   4.709 +        int type;
   4.710 +        u32 data;
   4.711 +
   4.712 +        /* highest 32 bits */
   4.713 +        spin_lock_irqsave(&iommu->register_lock, flags);
   4.714 +        data = dmar_readl(iommu->reg, reg +
   4.715 +                fault_index * PRIMARY_FAULT_REG_LEN + 12);
   4.716 +        if (!(data & DMA_FRCD_F)) {
   4.717 +            spin_unlock_irqrestore(&iommu->register_lock, flags);
   4.718 +            break;
   4.719 +        }
   4.720 +
   4.721 +        fault_reason = dma_frcd_fault_reason(data);
   4.722 +        type = dma_frcd_type(data);
   4.723 +
   4.724 +        data = dmar_readl(iommu->reg, reg +
   4.725 +                fault_index * PRIMARY_FAULT_REG_LEN + 8);
   4.726 +        source_id = dma_frcd_source_id(data);
   4.727 +
   4.728 +        guest_addr = dmar_readq(iommu->reg, reg +
   4.729 +                fault_index * PRIMARY_FAULT_REG_LEN);
   4.730 +        guest_addr = dma_frcd_page_addr(guest_addr);
   4.731 +        /* clear the fault */
   4.732 +        dmar_writel(iommu->reg, reg +
   4.733 +            fault_index * PRIMARY_FAULT_REG_LEN + 12, DMA_FRCD_F);
   4.734 +        spin_unlock_irqrestore(&iommu->register_lock, flags);
   4.735 +
   4.736 +        iommu_page_fault_do_one(iommu, type, fault_reason,
   4.737 +                source_id, guest_addr);
   4.738 +
   4.739 +        fault_index++;
   4.740 +        if (fault_index > cap_num_fault_regs(iommu->cap))
   4.741 +            fault_index = 0;
   4.742 +    }
   4.743 +    /* clear primary fault overflow */
   4.744 +    if (fault_status & DMA_FSTS_PFO) {
   4.745 +        spin_lock_irqsave(&iommu->register_lock, flags);
   4.746 +        dmar_writel(iommu->reg, DMAR_FSTS_REG, DMA_FSTS_PFO);
   4.747 +        spin_unlock_irqrestore(&iommu->register_lock, flags);
   4.748 +    }
   4.749 +    return;
   4.750 +}
   4.751 +
   4.752 +static void dma_msi_unmask(unsigned int vector)
   4.753 +{
   4.754 +    struct iommu *iommu = vector_to_iommu[vector];
   4.755 +    unsigned long flags;
   4.756 +
   4.757 +    /* unmask it */
   4.758 +    spin_lock_irqsave(&iommu->register_lock, flags);
   4.759 +    dmar_writel(iommu->reg, DMAR_FECTL_REG, 0);
   4.760 +    spin_unlock_irqrestore(&iommu->register_lock, flags);
   4.761 +}
   4.762 +
   4.763 +static void dma_msi_mask(unsigned int vector)
   4.764 +{
   4.765 +    unsigned long flags;
   4.766 +    struct iommu *iommu = vector_to_iommu[vector];
   4.767 +
   4.768 +    /* mask it */
   4.769 +    spin_lock_irqsave(&iommu->register_lock, flags);
   4.770 +    dmar_writel(iommu->reg, DMAR_FECTL_REG, DMA_FECTL_IM);
   4.771 +    spin_unlock_irqrestore(&iommu->register_lock, flags);
   4.772 +}
   4.773 +
   4.774 +static unsigned int dma_msi_startup(unsigned int vector)
   4.775 +{
   4.776 +    dma_msi_unmask(vector);
   4.777 +    return 0;
   4.778 +}
   4.779 +
   4.780 +static void dma_msi_end(unsigned int vector)
   4.781 +{
   4.782 +    dma_msi_unmask(vector);
   4.783 +    ack_APIC_irq();
   4.784 +}
   4.785 +
   4.786 +static void dma_msi_data_init(struct iommu *iommu, int vector)
   4.787 +{
   4.788 +    u32 msi_data = 0;
   4.789 +    unsigned long flags;
   4.790 +
   4.791 +    /* Fixed, edge, assert mode. Follow MSI setting */
   4.792 +    msi_data |= vector & 0xff;
   4.793 +    msi_data |= 1 << 14;
   4.794 +
   4.795 +    spin_lock_irqsave(&iommu->register_lock, flags);
   4.796 +    dmar_writel(iommu->reg, DMAR_FEDATA_REG, msi_data);
   4.797 +    spin_unlock_irqrestore(&iommu->register_lock, flags);
   4.798 +}
   4.799 +
   4.800 +static void dma_msi_addr_init(struct iommu *iommu, int phy_cpu)
   4.801 +{
   4.802 +    u64 msi_address;
   4.803 +    unsigned long flags;
   4.804 +
   4.805 +    /* Physical, dedicated cpu. Follow MSI setting */
   4.806 +    msi_address = (MSI_ADDRESS_HEADER << (MSI_ADDRESS_HEADER_SHIFT + 8));
   4.807 +    msi_address |= MSI_PHYSICAL_MODE << 2;
   4.808 +    msi_address |= MSI_REDIRECTION_HINT_MODE << 3;
   4.809 +    msi_address |= phy_cpu << MSI_TARGET_CPU_SHIFT;
   4.810 +
   4.811 +    spin_lock_irqsave(&iommu->register_lock, flags);
   4.812 +    dmar_writel(iommu->reg, DMAR_FEADDR_REG, (u32)msi_address);
   4.813 +    dmar_writel(iommu->reg, DMAR_FEUADDR_REG, (u32)(msi_address >> 32));
   4.814 +    spin_unlock_irqrestore(&iommu->register_lock, flags);
   4.815 +}
   4.816 +
   4.817 +static void dma_msi_set_affinity(unsigned int vector, cpumask_t dest)
   4.818 +{
   4.819 +    struct iommu *iommu = vector_to_iommu[vector];
   4.820 +    dma_msi_addr_init(iommu, cpu_physical_id(first_cpu(dest)));
   4.821 +}
   4.822 +
   4.823 +static struct hw_interrupt_type dma_msi_type = {
   4.824 +    .typename = "DMA_MSI",
   4.825 +    .startup = dma_msi_startup,
   4.826 +    .shutdown = dma_msi_mask,
   4.827 +    .enable = dma_msi_unmask,
   4.828 +    .disable = dma_msi_mask,
   4.829 +    .ack = dma_msi_mask,
   4.830 +    .end = dma_msi_end,
   4.831 +    .set_affinity = dma_msi_set_affinity,
   4.832 +};
   4.833 +
   4.834 +int iommu_set_interrupt(struct iommu *iommu)
   4.835 +{
   4.836 +    int vector, ret;
   4.837 +    unsigned long flags;
   4.838 +
   4.839 +    vector = assign_irq_vector(AUTO_ASSIGN);
   4.840 +    vector_to_iommu[vector] = iommu;
   4.841 +
   4.842 +    /* VT-d fault is a MSI, make irq == vector */
   4.843 +    irq_vector[vector] = vector;
   4.844 +    vector_irq[vector] = vector;
   4.845 +
   4.846 +    if (!vector) {
   4.847 +        gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: no vectors\n");
   4.848 +        return -EINVAL;
   4.849 +    }
   4.850 +
   4.851 +    spin_lock_irqsave(&irq_desc[vector].lock, flags);
   4.852 +    irq_desc[vector].handler = &dma_msi_type;
   4.853 +    spin_unlock_irqrestore(&irq_desc[vector].lock, flags);
   4.854 +    set_intr_gate(vector, interrupt[vector]);
   4.855 +    ret = request_irq(vector, iommu_page_fault, 0, "dmar", iommu);
   4.856 +    if (ret)
   4.857 +        gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: can't request irq\n");
   4.858 +    return vector;
   4.859 +}
   4.860 +
   4.861 +struct iommu *iommu_alloc(void *hw_data)
   4.862 +{
   4.863 +    struct acpi_drhd_unit *drhd = (struct acpi_drhd_unit *) hw_data;
   4.864 +    struct iommu *iommu;
   4.865 +    
   4.866 +    if (nr_iommus > MAX_IOMMUS) {
   4.867 +        gdprintk(XENLOG_ERR VTDPREFIX,
   4.868 +            "IOMMU: nr_iommus %d > MAX_IOMMUS\n", nr_iommus);
   4.869 +        return NULL;
   4.870 +    }
   4.871 +        
   4.872 +    iommu = xmalloc(struct iommu);
   4.873 +    if (!iommu)
   4.874 +        return NULL;
   4.875 +    memset(iommu, 0, sizeof(struct iommu));
   4.876 +
   4.877 +    set_fixmap_nocache(FIX_IOMMU_REGS_BASE_0 + nr_iommus, drhd->address);
   4.878 +    iommu->reg = (void *) fix_to_virt(FIX_IOMMU_REGS_BASE_0 + nr_iommus);
   4.879 +    dprintk(XENLOG_INFO VTDPREFIX,
   4.880 +        "iommu_alloc: iommu->reg = %p drhd->address = %lx\n",
   4.881 +        iommu->reg, drhd->address);
   4.882 +    nr_iommus++;
   4.883 +
   4.884 +    if (!iommu->reg) {
   4.885 +        printk(KERN_ERR VTDPREFIX "IOMMU: can't mapping the region\n");
   4.886 +        goto error;
   4.887 +    }
   4.888 +
   4.889 +    iommu->cap = dmar_readq(iommu->reg, DMAR_CAP_REG);
   4.890 +    iommu->ecap = dmar_readq(iommu->reg, DMAR_ECAP_REG);
   4.891 +
   4.892 +    spin_lock_init(&iommu->lock);
   4.893 +    spin_lock_init(&iommu->register_lock);
   4.894 +
   4.895 +    drhd->iommu = iommu;
   4.896 +    return iommu;
   4.897 +error:
   4.898 +    xfree(iommu);
   4.899 +    return NULL;
   4.900 +}
   4.901 +
   4.902 +static void free_iommu(struct iommu *iommu)
   4.903 +{
   4.904 +    if (!iommu)
   4.905 +        return;
   4.906 +    if (iommu->root_entry)
   4.907 +        free_xenheap_page((void *)iommu->root_entry);
   4.908 +    if (iommu->reg)
   4.909 +        iounmap(iommu->reg);
   4.910 +    free_irq(iommu->vector);
   4.911 +    xfree(iommu);
   4.912 +}
   4.913 +
   4.914 +#define guestwidth_to_adjustwidth(gaw) ({ \
   4.915 +    int agaw; \
   4.916 +    int r = (gaw - 12) % 9; \
   4.917 +    if (r == 0) \
   4.918 +        agaw = gaw; \
   4.919 +    else \
   4.920 +        agaw = gaw + 9 - r; \
   4.921 +    if (agaw > 64) \
   4.922 +        agaw = 64; \
   4.923 +    agaw; })
   4.924 +int iommu_domain_init(struct domain *domain)
   4.925 +{
   4.926 +    struct hvm_iommu *hd = domain_hvm_iommu(domain);
   4.927 +    struct iommu *iommu = NULL;
   4.928 +    int guest_width = DEFAULT_DOMAIN_ADDRESS_WIDTH;
   4.929 +    int adjust_width, agaw;
   4.930 +    unsigned long sagaw;
   4.931 +    struct acpi_drhd_unit *drhd;
   4.932 +
   4.933 +    if (list_empty(&acpi_drhd_units))
   4.934 +        return 0;
   4.935 +    spin_lock_init(&hd->mapping_lock);
   4.936 +    spin_lock_init(&hd->iommu_list_lock);
   4.937 +    INIT_LIST_HEAD(&hd->pdev_list);
   4.938 +
   4.939 +    for_each_drhd_unit(drhd) {
   4.940 +        if (drhd->iommu)
   4.941 +            iommu = drhd->iommu;
   4.942 +        else
   4.943 +            iommu = iommu_alloc(drhd);
   4.944 +    }
   4.945 +
   4.946 +    /* calculate AGAW */
   4.947 +    if (guest_width > cap_mgaw(iommu->cap))
   4.948 +        guest_width = cap_mgaw(iommu->cap);
   4.949 +    adjust_width = guestwidth_to_adjustwidth(guest_width);
   4.950 +    agaw = width_to_agaw(adjust_width);
   4.951 +    /* FIXME: hardware doesn't support it, choose a bigger one? */
   4.952 +    sagaw = cap_sagaw(iommu->cap);
   4.953 +    if (!test_bit(agaw, &sagaw)) {
   4.954 +        gdprintk(XENLOG_ERR VTDPREFIX,
   4.955 +            "IOMMU: hardware doesn't support the agaw\n");
   4.956 +        agaw = find_next_bit(&sagaw, 5, agaw);
   4.957 +        if (agaw >= 5)
   4.958 +            return -ENODEV;
   4.959 +    }
   4.960 +    hd->agaw = agaw;
   4.961 +    return 0;
   4.962 +}
   4.963 +
   4.964 +static int domain_context_mapping_one(
   4.965 +    struct domain *domain,
   4.966 +    struct iommu *iommu,
   4.967 +    u8 bus, u8 devfn)
   4.968 +{
   4.969 +    struct hvm_iommu *hd = domain_hvm_iommu(domain);
   4.970 +    struct context_entry *context;
   4.971 +    unsigned long flags;
   4.972 +    int ret = 0;
   4.973 +
   4.974 +    context = device_to_context_entry(iommu, bus, devfn);
   4.975 +    if (!context) {
   4.976 +        gdprintk(XENLOG_INFO VTDPREFIX,
   4.977 +            "domain_context_mapping_one:context == NULL:bdf = %x:%x:%x \n",
   4.978 +            bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
   4.979 +        return -ENOMEM;
   4.980 +    }
   4.981 +    spin_lock_irqsave(&iommu->lock, flags);
   4.982 +    if (context_present(*context)) {
   4.983 +        spin_unlock_irqrestore(&iommu->lock, flags);
   4.984 +        gdprintk(XENLOG_INFO VTDPREFIX,
   4.985 +                 "domain_context_mapping_one:context present:bdf=%x:%x:%x\n",
   4.986 +                 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
   4.987 +        return 0;
   4.988 +    }
   4.989 +
   4.990 +#ifdef VTD_DEBUG
   4.991 +    dprintk(XENLOG_INFO VTDPREFIX,
   4.992 +        "context_mapping_one_1-%x:%x:%x-*context = %lx %lx\n",
   4.993 +        bus, PCI_SLOT(devfn), PCI_FUNC(devfn), context->hi, context->lo);
   4.994 +#endif
   4.995 +
   4.996 +    /*
   4.997 +     * domain_id 0 is not valid on Intel's IOMMU, force domain_id to
   4.998 +     * be 1 based as required by intel's iommu hw.
   4.999 +     */
  4.1000 +    context_set_domain_id(*context, domain->domain_id);
  4.1001 +    context_set_address_width(*context, hd->agaw);
  4.1002 +
  4.1003 +    if (ecap_pass_thru(iommu->ecap))
  4.1004 +        context_set_translation_type(*context, CONTEXT_TT_PASS_THRU);
  4.1005 +    else {
  4.1006 +        context_set_address_root(*context, virt_to_maddr(hd->pgd));
  4.1007 +        context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
  4.1008 +    }
  4.1009 +
  4.1010 +    context_set_fault_enable(*context);
  4.1011 +    context_set_present(*context);
  4.1012 +    iommu_flush_cache_entry(iommu, context);
  4.1013 +
  4.1014 +#ifdef VTD_DEBUG
  4.1015 +    dprintk(XENLOG_INFO VTDPREFIX,
  4.1016 +        "context_mapping_one_2-%x:%x:%x-*context=%lx %lx hd->pgd = %p\n",
  4.1017 +        bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
  4.1018 +        context->hi, context->lo, hd->pgd);
  4.1019 +#endif
  4.1020 +
  4.1021 +    if (iommu_flush_context_device(iommu, domain->domain_id,
  4.1022 +                    (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT, 1))
  4.1023 +        iommu_flush_write_buffer(iommu);
  4.1024 +    else
  4.1025 +        iommu_flush_iotlb_dsi(iommu, domain->domain_id, 0);
  4.1026 +    spin_unlock_irqrestore(&iommu->lock, flags);
  4.1027 +    return ret;
  4.1028 +}
  4.1029 +
  4.1030 +static int __pci_find_next_cap(u8 bus, unsigned int devfn, u8 pos, int cap)
  4.1031 +{
  4.1032 +    u8 id;
  4.1033 +    int ttl = 48;
  4.1034 +
  4.1035 +    while (ttl--) {
  4.1036 +        pos = read_pci_config_byte(bus, PCI_SLOT(devfn), PCI_FUNC(devfn), pos);
  4.1037 +        if (pos < 0x40)
  4.1038 +            break;
  4.1039 +        pos &= ~3;
  4.1040 +        id = read_pci_config_byte(bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
  4.1041 +                 pos + PCI_CAP_LIST_ID);
  4.1042 +
  4.1043 +        if (id == 0xff)
  4.1044 +            break;
  4.1045 +        if (id == cap)
  4.1046 +            return pos;
  4.1047 +        pos += PCI_CAP_LIST_NEXT;
  4.1048 +    }
  4.1049 +    return 0;
  4.1050 +}
  4.1051 +
  4.1052 +#define PCI_BASE_CLASS_BRIDGE    0x06
  4.1053 +#define PCI_CLASS_BRIDGE_PCI     0x0604
  4.1054 +
  4.1055 +#define DEV_TYPE_PCIe_ENDPOINT   1
  4.1056 +#define DEV_TYPE_PCI_BRIDGE      2
  4.1057 +#define DEV_TYPE_PCI             3
  4.1058 +
  4.1059 +int pdev_type(struct pci_dev *dev)
  4.1060 +{
  4.1061 +    u16 class_device;
  4.1062 +    u16 status;
  4.1063 +
  4.1064 +    class_device = read_pci_config_16(dev->bus, PCI_SLOT(dev->devfn),
  4.1065 +                 PCI_FUNC(dev->devfn), PCI_CLASS_DEVICE);
  4.1066 +    if (class_device == PCI_CLASS_BRIDGE_PCI)
  4.1067 +        return DEV_TYPE_PCI_BRIDGE;
  4.1068 +
  4.1069 +    status = read_pci_config_16(dev->bus, PCI_SLOT(dev->devfn),
  4.1070 +                 PCI_FUNC(dev->devfn), PCI_STATUS);
  4.1071 +
  4.1072 +    if (!(status & PCI_STATUS_CAP_LIST))
  4.1073 +        return DEV_TYPE_PCI;
  4.1074 +
  4.1075 +    if (__pci_find_next_cap(dev->bus, dev->devfn, PCI_CAPABILITY_LIST, PCI_CAP_ID_EXP))
  4.1076 +        return DEV_TYPE_PCIe_ENDPOINT;
  4.1077 +
  4.1078 +    return DEV_TYPE_PCI;
  4.1079 +}
  4.1080 +
  4.1081 +#define MAX_BUSES 256
  4.1082 +struct pci_dev bus2bridge[MAX_BUSES];
  4.1083 +
  4.1084 +static int domain_context_mapping(
  4.1085 +    struct domain *domain,
  4.1086 +    struct iommu *iommu,
  4.1087 +    struct pci_dev *pdev)
  4.1088 +{
  4.1089 +    int ret = 0;
  4.1090 +    int dev, func, sec_bus, sub_bus;
  4.1091 +    u32 type;
  4.1092 +
  4.1093 +    type = pdev_type(pdev);
  4.1094 +    if (type == DEV_TYPE_PCI_BRIDGE) {
  4.1095 +        sec_bus = read_pci_config_byte(pdev->bus, PCI_SLOT(pdev->devfn),
  4.1096 +                      PCI_FUNC(pdev->devfn), PCI_SECONDARY_BUS);
  4.1097 +
  4.1098 +        if (bus2bridge[sec_bus].bus == 0) {
  4.1099 +            bus2bridge[sec_bus].bus   =  pdev->bus;
  4.1100 +            bus2bridge[sec_bus].devfn =  pdev->devfn;
  4.1101 +        }
  4.1102 +
  4.1103 +        sub_bus = read_pci_config_byte(pdev->bus, PCI_SLOT(pdev->devfn),
  4.1104 +                      PCI_FUNC(pdev->devfn), PCI_SUBORDINATE_BUS);
  4.1105 +
  4.1106 +        if (sec_bus != sub_bus) {
  4.1107 +            dprintk(XENLOG_INFO VTDPREFIX,
  4.1108 +                "context_mapping: nested PCI bridge not supported\n");
  4.1109 +            dprintk(XENLOG_INFO VTDPREFIX,
  4.1110 +                "    bdf = %x:%x:%x sec_bus = %x sub_bus = %x\n",
  4.1111 +                pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
  4.1112 +                sec_bus, sub_bus);
  4.1113 +        }
  4.1114 +    }
  4.1115 +
  4.1116 +    if (type == DEV_TYPE_PCIe_ENDPOINT) {
  4.1117 +        gdprintk(XENLOG_INFO VTDPREFIX,
  4.1118 +            "domain_context_mapping:PCIe : bdf = %x:%x:%x\n",
  4.1119 +            pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
  4.1120 +        ret = domain_context_mapping_one(domain, iommu,
  4.1121 +                  (u8)(pdev->bus), (u8) (pdev->devfn));
  4.1122 +    }
  4.1123 +
  4.1124 +    /* PCI devices */
  4.1125 +    if (type == DEV_TYPE_PCI) {
  4.1126 +        gdprintk(XENLOG_INFO VTDPREFIX,
  4.1127 +            "domain_context_mapping:PCI: bdf = %x:%x:%x\n",
  4.1128 +            pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
  4.1129 +
  4.1130 +        if (pdev->bus == 0)
  4.1131 +            ret = domain_context_mapping_one(domain, iommu,
  4.1132 +                      (u8)(pdev->bus), (u8) (pdev->devfn));
  4.1133 +        else {
  4.1134 +            if (bus2bridge[pdev->bus].bus != 0)
  4.1135 +                gdprintk(XENLOG_ERR VTDPREFIX,
  4.1136 +                    "domain_context_mapping:bus2bridge[pdev->bus].bus==0\n");
  4.1137 +
  4.1138 +            ret = domain_context_mapping_one(domain, iommu,
  4.1139 +                      (u8)(bus2bridge[pdev->bus].bus),
  4.1140 +                      (u8)(bus2bridge[pdev->bus].devfn));
  4.1141 +
  4.1142 +            /* now map everything behind the PCI bridge */
  4.1143 +            for (dev = 0; dev < 32; dev++) {
  4.1144 +                for (func = 0; func < 8; func++) {
  4.1145 +                    ret = domain_context_mapping_one(domain, iommu,
  4.1146 +                              pdev->bus, (u8)PCI_DEVFN(dev, func));
  4.1147 +                    if (ret)
  4.1148 +                        return ret;
  4.1149 +                }
  4.1150 +            }
  4.1151 +        }
  4.1152 +    }
  4.1153 +    return ret;
  4.1154 +}
  4.1155 +
  4.1156 +static int domain_context_unmap_one(
  4.1157 +    struct domain *domain,
  4.1158 +    struct iommu *iommu,
  4.1159 +    u8 bus, u8 devfn)
  4.1160 +{
  4.1161 +    struct context_entry *context;
  4.1162 +    unsigned long flags;
  4.1163 +
  4.1164 +    context = device_to_context_entry(iommu, bus, devfn);
  4.1165 +    if (!context) {
  4.1166 +        gdprintk(XENLOG_INFO VTDPREFIX,
  4.1167 +            "domain_context_unmap_one-%x:%x:%x- context == NULL:return\n",
  4.1168 +            bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
  4.1169 +        return -ENOMEM;
  4.1170 +    }
  4.1171 +    spin_lock_irqsave(&iommu->lock, flags);
  4.1172 +    if (!context_present(*context)) {
  4.1173 +        spin_unlock_irqrestore(&iommu->lock, flags);
  4.1174 +        gdprintk(XENLOG_INFO VTDPREFIX,
  4.1175 +            "domain_context_unmap_one-%x:%x:%x- context NOT present:return\n",
  4.1176 +            bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
  4.1177 +        return 0;
  4.1178 +    }
  4.1179 +    gdprintk(XENLOG_INFO VTDPREFIX,
  4.1180 +        "domain_context_unmap_one_1:bdf = %x:%x:%x\n",
  4.1181 +        bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
  4.1182 +
  4.1183 +    context_clear_present(*context);
  4.1184 +    context_clear_entry(*context);
  4.1185 +    iommu_flush_cache_entry(iommu, context);
  4.1186 +    iommu_flush_context_global(iommu, 0);
  4.1187 +    iommu_flush_iotlb_global(iommu, 0);
  4.1188 +    spin_unlock_irqrestore(&iommu->lock, flags);
  4.1189 +
  4.1190 +    gdprintk(XENLOG_INFO VTDPREFIX,
  4.1191 +        "domain_context_unmap_one_2:bdf = %x:%x:%x\n",
  4.1192 +        bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
  4.1193 +
  4.1194 +    return 0;
  4.1195 +}
  4.1196 +
  4.1197 +static int domain_context_unmap(
  4.1198 +    struct domain *domain,
  4.1199 +    struct iommu *iommu,
  4.1200 +    struct pci_dev *pdev)
  4.1201 +{
  4.1202 +    int ret = 0;
  4.1203 +    int dev, func, sec_bus, sub_bus;
  4.1204 +    u32 type;
  4.1205 +
  4.1206 +    type = pdev_type(pdev);
  4.1207 +    if (type == DEV_TYPE_PCI_BRIDGE) {
  4.1208 +        sec_bus = read_pci_config_byte(pdev->bus, PCI_SLOT(pdev->devfn),
  4.1209 +                      PCI_FUNC(pdev->devfn), PCI_SECONDARY_BUS);
  4.1210 +        sub_bus = read_pci_config_byte(pdev->bus, PCI_SLOT(pdev->devfn),
  4.1211 +                      PCI_FUNC(pdev->devfn), PCI_SUBORDINATE_BUS);
  4.1212 +
  4.1213 +        gdprintk(XENLOG_INFO VTDPREFIX,
  4.1214 +            "domain_context_unmap:BRIDGE:%x:%x:%x sec_bus=%x sub_bus=%x\n",
  4.1215 +            pdev->bus, PCI_SLOT(pdev->devfn),
  4.1216 +            PCI_FUNC(pdev->devfn), sec_bus, sub_bus);
  4.1217 +    }
  4.1218 +
  4.1219 +    if (type == DEV_TYPE_PCIe_ENDPOINT) {
  4.1220 +        gdprintk(XENLOG_INFO VTDPREFIX,
  4.1221 +                 "domain_context_unmap:PCIe : bdf = %x:%x:%x\n",
  4.1222 +                 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
  4.1223 +        ret = domain_context_unmap_one(domain, iommu,
  4.1224 +                  (u8)(pdev->bus), (u8) (pdev->devfn));
  4.1225 +    }
  4.1226 +
  4.1227 +    /* PCI devices */
  4.1228 +    if (type == DEV_TYPE_PCI) {
  4.1229 +        gdprintk(XENLOG_INFO VTDPREFIX,
  4.1230 +                 "domain_context_unmap:PCI: bdf = %x:%x:%x\n",
  4.1231 +                 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
  4.1232 +        if (pdev->bus == 0)
  4.1233 +            ret = domain_context_unmap_one(domain, iommu,
  4.1234 +                      (u8)(pdev->bus), (u8) (pdev->devfn));
  4.1235 +        else {
  4.1236 +            if (bus2bridge[pdev->bus].bus != 0)
  4.1237 +                gdprintk(XENLOG_INFO VTDPREFIX,
  4.1238 +                         "domain_context_mapping:bus2bridge[pdev->bus].bus==0\n");
  4.1239 +
  4.1240 +            ret = domain_context_unmap_one(domain, iommu,
  4.1241 +                      (u8)(bus2bridge[pdev->bus].bus),
  4.1242 +                      (u8)(bus2bridge[pdev->bus].devfn));
  4.1243 +
  4.1244 +            /* now map everything behind the PCI bridge */
  4.1245 +            for (dev = 0; dev < 32; dev++) {
  4.1246 +                for (func = 0; func < 8; func++) {
  4.1247 +                    ret = domain_context_unmap_one(domain, iommu,
  4.1248 +                              pdev->bus, (u8)PCI_DEVFN(dev, func));
  4.1249 +                    if (ret)
  4.1250 +                        return ret;
  4.1251 +                }
  4.1252 +            }
  4.1253 +        }
  4.1254 +    }
  4.1255 +    return ret;
  4.1256 +}
  4.1257 +
  4.1258 +void reassign_device_ownership(
  4.1259 +    struct domain *source,
  4.1260 +    struct domain *target,
  4.1261 +    u8 bus, u8 devfn)
  4.1262 +{
  4.1263 +    struct hvm_iommu *source_hd = domain_hvm_iommu(source);
  4.1264 +    struct hvm_iommu *target_hd = domain_hvm_iommu(target);
  4.1265 +    struct pci_dev *pdev;
  4.1266 +    struct acpi_drhd_unit *drhd;
  4.1267 +    struct iommu *iommu;
  4.1268 +    int status;
  4.1269 +    unsigned long flags;
  4.1270 +
  4.1271 +    gdprintk(XENLOG_ERR VTDPREFIX,
  4.1272 +        "reassign_device-%x:%x:%x- source = %d target = %d\n",
  4.1273 +        bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
  4.1274 +        source->domain_id, target->domain_id);
  4.1275 +
  4.1276 +    for_each_pdev(source, pdev) {
  4.1277 +        if ( (pdev->bus != bus) || (pdev->devfn != devfn) )
  4.1278 +            continue;
  4.1279 +
  4.1280 +        pdev->bus = bus;
  4.1281 +        pdev->devfn = devfn;
  4.1282 +        drhd = acpi_find_matched_drhd_unit(pdev);
  4.1283 +        iommu = drhd->iommu;
  4.1284 +        domain_context_unmap(source, iommu, pdev);
  4.1285 +
  4.1286 +        /*
  4.1287 +         * move pci device from the source domain to target domain.
  4.1288 +         */
  4.1289 +        spin_lock_irqsave(&source_hd->iommu_list_lock, flags);
  4.1290 +        spin_lock_irqsave(&target_hd->iommu_list_lock, flags);
  4.1291 +        list_move(&pdev->list, &target_hd->pdev_list);
  4.1292 +        spin_unlock_irqrestore(&target_hd->iommu_list_lock, flags);
  4.1293 +        spin_unlock_irqrestore(&source_hd->iommu_list_lock, flags);
  4.1294 +
  4.1295 +        status = domain_context_mapping(target, iommu, pdev);
  4.1296 +        if (status != 0)
  4.1297 +            gdprintk(XENLOG_ERR VTDPREFIX, "domain_context_mapping failed\n");
  4.1298 +
  4.1299 +        /*
  4.1300 +         * We are done.
  4.1301 +         */
  4.1302 +        break;
  4.1303 +    }
  4.1304 +}
  4.1305 +
  4.1306 +void return_devices_to_dom0(struct domain *d)
  4.1307 +{
  4.1308 +    struct hvm_iommu *hd  = domain_hvm_iommu(d);
  4.1309 +    struct pci_dev *pdev;
  4.1310 +
  4.1311 +    while (!list_empty(&hd->pdev_list)) {
  4.1312 +        pdev = list_entry(hd->pdev_list.next, typeof(*pdev), list);
  4.1313 +        dprintk(XENLOG_INFO VTDPREFIX,
  4.1314 +            "return_devices_to_dom0: bdf = %x:%x:%x\n",
  4.1315 +            pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
  4.1316 +        reassign_device_ownership(d, dom0, pdev->bus, pdev->devfn);
  4.1317 +    }
  4.1318 +
  4.1319 +#ifdef VTD_DEBUG
  4.1320 +    for_each_pdev(dom0, pdev) {
  4.1321 +        dprintk(XENLOG_INFO VTDPREFIX,
  4.1322 +            "return_devices_to_dom0:%x: bdf = %x:%x:%x\n",
  4.1323 +            dom0->domain_id, pdev->bus,
  4.1324 +            PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
  4.1325 +    }
  4.1326 +#endif
  4.1327 +}
  4.1328 +
  4.1329 +void iommu_domain_teardown(struct domain *d)
  4.1330 +{
  4.1331 +  if (list_empty(&acpi_drhd_units))
  4.1332 +      return;
  4.1333 +
  4.1334 +#if CONFIG_PAGING_LEVELS == 3
  4.1335 +  {
  4.1336 +    struct hvm_iommu *hd  = domain_hvm_iommu(d);
  4.1337 +    int level = agaw_to_level(hd->agaw);
  4.1338 +    struct dma_pte *pgd = NULL;
  4.1339 +
  4.1340 +    switch (level)
  4.1341 +    {
  4.1342 +        case VTD_PAGE_TABLE_LEVEL_3:
  4.1343 +            if ( hd->pgd )
  4.1344 +                free_xenheap_page((void *)hd->pgd);
  4.1345 +            break;
  4.1346 +        case VTD_PAGE_TABLE_LEVEL_4:
  4.1347 +            if ( hd->pgd )
  4.1348 +            {
  4.1349 +                pgd = hd->pgd;
  4.1350 +                if ( pgd[0].val != 0 )
  4.1351 +                    free_xenheap_page((void*)maddr_to_virt(
  4.1352 +                        dma_pte_addr(pgd[0])));
  4.1353 +            }
  4.1354 +            break;
  4.1355 +        default:
  4.1356 +            gdprintk(XENLOG_ERR VTDPREFIX,
  4.1357 +                "Unsupported p2m table sharing level!\n");
  4.1358 +            break;
  4.1359 +    }
  4.1360 +  }
  4.1361 +#endif
  4.1362 +    return_devices_to_dom0(d);
  4.1363 +}
  4.1364 +
  4.1365 +static int domain_context_mapped(struct domain *domain, struct pci_dev *pdev)
  4.1366 +{
  4.1367 +    struct acpi_drhd_unit *drhd;
  4.1368 +    struct iommu *iommu;
  4.1369 +    int ret;
  4.1370 +
  4.1371 +    for_each_drhd_unit(drhd) {
  4.1372 +        iommu = drhd->iommu;
  4.1373 +        ret = device_context_mapped(iommu, pdev->bus, pdev->devfn);
  4.1374 +        if (ret)
  4.1375 +            return ret;
  4.1376 +    }
  4.1377 +    return 0;
  4.1378 +}
  4.1379 +
  4.1380 +int iommu_map_page(struct domain *d, paddr_t gfn, paddr_t mfn)
  4.1381 +{
  4.1382 +    struct acpi_drhd_unit *drhd;
  4.1383 +    struct iommu *iommu;
  4.1384 +    struct dma_pte *pte = NULL;
  4.1385 +
  4.1386 +    drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
  4.1387 +    iommu = drhd->iommu;
  4.1388 +
  4.1389 +    /* do nothing if dom0 and iommu supports pass thru */
  4.1390 +    if (ecap_pass_thru(iommu->ecap) && (d->domain_id == 0))
  4.1391 +        return 0;
  4.1392 +
  4.1393 +    pte = addr_to_dma_pte(d, gfn << PAGE_SHIFT_4K);
  4.1394 +    if (!pte)
  4.1395 +        return -ENOMEM;
  4.1396 +    dma_set_pte_addr(*pte, mfn << PAGE_SHIFT_4K);
  4.1397 +    dma_set_pte_prot(*pte, DMA_PTE_READ | DMA_PTE_WRITE);
  4.1398 +    iommu_flush_cache_entry(iommu, pte);
  4.1399 +
  4.1400 +    for_each_drhd_unit(drhd) {
  4.1401 +        iommu = drhd->iommu;
  4.1402 +        if (cap_caching_mode(iommu->cap))
  4.1403 +            iommu_flush_iotlb_psi(iommu, d->domain_id,
  4.1404 +                                  gfn << PAGE_SHIFT_4K, 1, 0);
  4.1405 +        else if (cap_rwbf(iommu->cap))
  4.1406 +            iommu_flush_write_buffer(iommu);
  4.1407 +    }
  4.1408 +    return 0;
  4.1409 +}
  4.1410 +
  4.1411 +int iommu_unmap_page(struct domain *d, dma_addr_t gfn)
  4.1412 +{
  4.1413 +    struct acpi_drhd_unit *drhd;
  4.1414 +    struct iommu *iommu;
  4.1415 +    struct dma_pte *pte = NULL;
  4.1416 +
  4.1417 +    drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
  4.1418 +    iommu = drhd->iommu;
  4.1419 +
  4.1420 +    /* do nothing if dom0 and iommu supports pass thru */
  4.1421 +    if (ecap_pass_thru(iommu->ecap) && (d->domain_id == 0))
  4.1422 +        return 0;
  4.1423 +
  4.1424 +    /* get last level pte */
  4.1425 +    pte = dma_addr_level_pte(d, gfn << PAGE_SHIFT_4K, 1);
  4.1426 +    dma_pte_clear_one(d, gfn << PAGE_SHIFT_4K);
  4.1427 +    
  4.1428 +    return 0;
  4.1429 +}
  4.1430 +
  4.1431 +int iommu_page_mapping(struct domain *domain, dma_addr_t iova,
  4.1432 +            void *hpa, size_t size, int prot)
  4.1433 +{
  4.1434 +    struct acpi_drhd_unit *drhd;
  4.1435 +    struct iommu *iommu;
  4.1436 +    unsigned long start_pfn, end_pfn;
  4.1437 +    struct dma_pte *pte = NULL;
  4.1438 +    int index;
  4.1439 +
  4.1440 +    drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
  4.1441 +    iommu = drhd->iommu;
  4.1442 +    if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
  4.1443 +        return -EINVAL;
  4.1444 +    iova = (iova >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K;
  4.1445 +    start_pfn = (unsigned long)(((unsigned long) hpa) >> PAGE_SHIFT_4K);
  4.1446 +    end_pfn = (unsigned long)
  4.1447 +              ((PAGE_ALIGN_4K(((unsigned long)hpa) + size)) >> PAGE_SHIFT_4K);
  4.1448 +    index = 0;
  4.1449 +    while (start_pfn < end_pfn) {
  4.1450 +        pte = addr_to_dma_pte(domain, iova + PAGE_SIZE_4K * index);
  4.1451 +        if (!pte)
  4.1452 +            return -ENOMEM;
  4.1453 +        dma_set_pte_addr(*pte, start_pfn << PAGE_SHIFT_4K);
  4.1454 +        dma_set_pte_prot(*pte, prot);
  4.1455 +        iommu_flush_cache_entry(iommu, pte);
  4.1456 +        start_pfn++;
  4.1457 +        index++;
  4.1458 +    }
  4.1459 +
  4.1460 +    for_each_drhd_unit(drhd) {
  4.1461 +        iommu = drhd->iommu;
  4.1462 +        if (cap_caching_mode(iommu->cap))
  4.1463 +            iommu_flush_iotlb_psi(iommu, domain->domain_id, iova, size, 0);
  4.1464 +        else if (cap_rwbf(iommu->cap))
  4.1465 +            iommu_flush_write_buffer(iommu);
  4.1466 +    }
  4.1467 +    return 0;
  4.1468 +}
  4.1469 +
  4.1470 +int iommu_page_unmapping(struct domain *domain, dma_addr_t addr, size_t size)
  4.1471 +{
  4.1472 +    struct dma_pte *pte = NULL;
  4.1473 +
  4.1474 +    /* get last level pte */
  4.1475 +    pte = dma_addr_level_pte(domain, addr, 1);
  4.1476 +    dma_pte_clear_range(domain, addr, addr + size);
  4.1477 +    
  4.1478 +    return 0;
  4.1479 +}
  4.1480 +
  4.1481 +void iommu_flush(struct domain *d, dma_addr_t gfn, u64 *p2m_entry)
  4.1482 +{
  4.1483 +    struct acpi_drhd_unit *drhd;
  4.1484 +    struct iommu *iommu = NULL;
  4.1485 +    struct dma_pte *pte = (struct dma_pte *) p2m_entry;
  4.1486 +
  4.1487 +    for_each_drhd_unit(drhd) {
  4.1488 +        iommu = drhd->iommu;
  4.1489 +        if (cap_caching_mode(iommu->cap))
  4.1490 +            iommu_flush_iotlb_psi(iommu, d->domain_id,
  4.1491 +                gfn << PAGE_SHIFT_4K, 1, 0);
  4.1492 +        else if (cap_rwbf(iommu->cap))
  4.1493 +            iommu_flush_write_buffer(iommu);
  4.1494 +    }
  4.1495 +    iommu_flush_cache_entry(iommu, pte);
  4.1496 +}
  4.1497 +
  4.1498 +int
  4.1499 +prepare_device(struct domain *domain, struct pci_dev dev)
  4.1500 +{
  4.1501 +    return 0;
  4.1502 +}
  4.1503 +
  4.1504 +static int iommu_prepare_rmrr_dev(
  4.1505 +    struct domain *d,
  4.1506 +    struct acpi_rmrr_unit *rmrr,
  4.1507 +    struct pci_dev *pdev)
  4.1508 +{
  4.1509 +    struct acpi_drhd_unit *drhd;
  4.1510 +    unsigned long size;
  4.1511 +    int ret;
  4.1512 +
  4.1513 +    /* page table init */
  4.1514 +    size = rmrr->end_address - rmrr->base_address + 1;
  4.1515 +    ret = iommu_page_mapping(d, rmrr->base_address,
  4.1516 +        (void *)rmrr->base_address, size,
  4.1517 +        DMA_PTE_READ|DMA_PTE_WRITE);
  4.1518 +    if (ret)
  4.1519 +        return ret;
  4.1520 +
  4.1521 +    if (domain_context_mapped(d, pdev) == 0) {
  4.1522 +        drhd = acpi_find_matched_drhd_unit(pdev);
  4.1523 +        ret = domain_context_mapping(d, drhd->iommu, pdev);
  4.1524 +        if (!ret)
  4.1525 +            return 0;
  4.1526 +    }
  4.1527 +    return ret;
  4.1528 +}
  4.1529 +
  4.1530 +void __init setup_dom0_devices(void)
  4.1531 +{
  4.1532 +    struct hvm_iommu *hd  = domain_hvm_iommu(dom0);
  4.1533 +    struct acpi_drhd_unit *drhd;
  4.1534 +    struct pci_dev *pdev;
  4.1535 +    int bus, dev, func;
  4.1536 +    u32 l;
  4.1537 +    u8 hdr_type;
  4.1538 +    int ret;
  4.1539 +
  4.1540 +#ifdef DEBUG_VTD_CONTEXT_ENTRY
  4.1541 +    for (bus = 0; bus < 256; bus++) {
  4.1542 +        for (dev = 0; dev < 32; dev++) { 
  4.1543 +            for (func = 0; func < 8; func++) { 
  4.1544 +                struct context_entry *context;
  4.1545 +                struct pci_dev device;
  4.1546 +
  4.1547 +                device.bus = bus; 
  4.1548 +                device.devfn = PCI_DEVFN(dev, func); 
  4.1549 +                drhd = acpi_find_matched_drhd_unit(&device);
  4.1550 +                context = device_to_context_entry(drhd->iommu,
  4.1551 +                    bus, PCI_DEVFN(dev, func));
  4.1552 +                if ((context->lo != 0) || (context->hi != 0))
  4.1553 +                    dprintk(XENLOG_INFO VTDPREFIX,
  4.1554 +                        "setup_dom0_devices-%x:%x:%x- context not 0\n",
  4.1555 +                        bus, dev, func);
  4.1556 +            }
  4.1557 +        }    
  4.1558 +    }        
  4.1559 +#endif
  4.1560 +
  4.1561 +    for (bus = 0; bus < 256; bus++) {
  4.1562 +        for (dev = 0; dev < 32; dev++) { 
  4.1563 +            for (func = 0; func < 8; func++) { 
  4.1564 +                l = read_pci_config(bus, dev, func, PCI_VENDOR_ID);
  4.1565 +                /* some broken boards return 0 or ~0 if a slot is empty: */
  4.1566 +                if (l == 0xffffffff || l == 0x00000000 ||
  4.1567 +                    l == 0x0000ffff || l == 0xffff0000)
  4.1568 +                    continue;
  4.1569 +                pdev = xmalloc(struct pci_dev);
  4.1570 +                pdev->bus = bus;
  4.1571 +                pdev->devfn = PCI_DEVFN(dev, func);
  4.1572 +                list_add_tail(&pdev->list, &hd->pdev_list);
  4.1573 +
  4.1574 +                drhd = acpi_find_matched_drhd_unit(pdev);
  4.1575 +                ret = domain_context_mapping(dom0, drhd->iommu, pdev);
  4.1576 +                if (ret != 0)
  4.1577 +                    gdprintk(XENLOG_ERR VTDPREFIX,
  4.1578 +                        "domain_context_mapping failed\n");
  4.1579 +
  4.1580 +                hdr_type = read_pci_config(bus, dev, func, PCI_HEADER_TYPE);
  4.1581 +                // if ((hdr_type & 0x8) == 0)
  4.1582 +                //      break;
  4.1583 +            }
  4.1584 +        }
  4.1585 +    }
  4.1586 +    for_each_pdev(dom0, pdev) {
  4.1587 +        dprintk(XENLOG_INFO VTDPREFIX,
  4.1588 +            "setup_dom0_devices: bdf = %x:%x:%x\n",
  4.1589 +            pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
  4.1590 +    }
  4.1591 +}
  4.1592 +
  4.1593 +void clear_fault_bit(struct iommu *iommu)
  4.1594 +{
  4.1595 +    u64 val;
  4.1596 +
  4.1597 +    val = dmar_readq(
  4.1598 +            iommu->reg,
  4.1599 +            cap_fault_reg_offset(dmar_readq(iommu->reg,DMAR_CAP_REG))+0x8);
  4.1600 +    dmar_writeq(
  4.1601 +            iommu->reg,
  4.1602 +            cap_fault_reg_offset(dmar_readq(iommu->reg,DMAR_CAP_REG))+8,
  4.1603 +            val);
  4.1604 +    dmar_writel(iommu->reg, DMAR_FSTS_REG, DMA_FSTS_PFO);
  4.1605 +}
  4.1606 +
  4.1607 +/*
  4.1608 + * Called from ACPI discovery code, once all DMAR's and RMRR's are done
  4.1609 + * scanning, we need to run through and initialize as much of it as necessary
  4.1610 + */
  4.1611 +int vtd_enable = 1;
  4.1612 +static void setup_vtd_enable(char *s)
  4.1613 +{
  4.1614 +    if ( !strcmp(s, "0") )
  4.1615 +        vtd_enable = 0;
  4.1616 +    else if ( !strcmp(s, "1") )
  4.1617 +        vtd_enable = 1;
  4.1618 +    else
  4.1619 +        dprintk(XENLOG_INFO VTDPREFIX,
  4.1620 +            "Unknown vtd_enable value specified: '%s'\n", s);
  4.1621 +    dprintk(XENLOG_INFO VTDPREFIX, "vtd_enable = %x\n", vtd_enable);
  4.1622 +}
  4.1623 +custom_param("vtd", setup_vtd_enable);
  4.1624 +
  4.1625 +static int init_vtd_hw(void)
  4.1626 +{
  4.1627 +    struct acpi_drhd_unit *drhd;
  4.1628 +    struct iommu *iommu;
  4.1629 +    int ret;
  4.1630 +
  4.1631 +    for_each_drhd_unit(drhd) {
  4.1632 +        iommu = drhd->iommu;
  4.1633 +        ret = iommu_set_root_entry(iommu);
  4.1634 +        if (ret) {
  4.1635 +            gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: set root entry failed\n");
  4.1636 +            return -EIO;
  4.1637 +        }
  4.1638 +    }
  4.1639 +    return 0;
  4.1640 +}
  4.1641 +
  4.1642 +static int enable_vtd_translation(void)
  4.1643 +{
  4.1644 +    struct acpi_drhd_unit *drhd;
  4.1645 +    struct iommu *iommu;
  4.1646 +    int vector = 0;
  4.1647 +
  4.1648 +    for_each_drhd_unit(drhd) {
  4.1649 +        iommu = drhd->iommu;
  4.1650 +        vector = iommu_set_interrupt(iommu);
  4.1651 +        dma_msi_data_init(iommu, vector);
  4.1652 +        dma_msi_addr_init(iommu, cpu_physical_id(first_cpu(cpu_online_map)));
  4.1653 +        iommu->vector = vector;
  4.1654 +        clear_fault_bit(iommu);
  4.1655 +        if (vtd_enable && iommu_enable_translation(iommu))
  4.1656 +            return -EIO;
  4.1657 +    }
  4.1658 +    return 0;
  4.1659 +}
  4.1660 +
  4.1661 +static void setup_dom0_rmrr(void)
  4.1662 +{
  4.1663 +    struct acpi_rmrr_unit *rmrr;
  4.1664 +    struct pci_dev *pdev;
  4.1665 +    int ret;
  4.1666 +
  4.1667 +    for_each_rmrr_device(rmrr, pdev)
  4.1668 +        ret = iommu_prepare_rmrr_dev(dom0, rmrr, pdev);
  4.1669 +        if (ret)
  4.1670 +            gdprintk(XENLOG_ERR VTDPREFIX,
  4.1671 +                "IOMMU: mapping reserved region failed\n");
  4.1672 +    end_for_each_rmrr_device(rmrr, pdev)
  4.1673 +}
  4.1674 +
  4.1675 +int iommu_setup(void)
  4.1676 +{
  4.1677 +    struct hvm_iommu *hd  = domain_hvm_iommu(dom0);
  4.1678 +    struct acpi_drhd_unit *drhd;
  4.1679 +    struct iommu *iommu;
  4.1680 +
  4.1681 +    if (list_empty(&acpi_drhd_units))
  4.1682 +        return 0;
  4.1683 +
  4.1684 +    INIT_LIST_HEAD(&hd->pdev_list);
  4.1685 +
  4.1686 +    /* start from scratch */
  4.1687 +    flush_all();
  4.1688 +
  4.1689 +    /* setup clflush size */
  4.1690 +    x86_clflush_size = ((cpuid_ebx(1) >> 8) & 0xff) * 8;
  4.1691 +
  4.1692 +    /*
  4.1693 +     * allocate IO page directory page for the domain.
  4.1694 +     */
  4.1695 +    drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
  4.1696 +    iommu = drhd->iommu;
  4.1697 +
  4.1698 +    hd->pgd = (struct dma_pte *)alloc_xenheap_page();
  4.1699 +    memset((u8*)hd->pgd, 0, PAGE_SIZE);
  4.1700 +
  4.1701 +    if (init_vtd_hw())
  4.1702 +        goto error;
  4.1703 +    setup_dom0_devices();
  4.1704 +    setup_dom0_rmrr();
  4.1705 +    if (enable_vtd_translation())
  4.1706 +        goto error;
  4.1707 +
  4.1708 +    return 0;
  4.1709 +
  4.1710 +error:
  4.1711 +    printk("iommu_setup() failed\n");
  4.1712 +    for_each_drhd_unit(drhd) {
  4.1713 +        iommu = drhd->iommu;
  4.1714 +        free_iommu(iommu);
  4.1715 +    }
  4.1716 +    return -EIO;
  4.1717 +}
  4.1718 +
  4.1719 +int assign_device(struct domain *d, u8 bus, u8 devfn)
  4.1720 +{
  4.1721 +    struct hvm_iommu *hd  = domain_hvm_iommu(d);
  4.1722 +    struct acpi_rmrr_unit *rmrr;
  4.1723 +    struct pci_dev *pdev;
  4.1724 +    int ret = 0;
  4.1725 +
  4.1726 +    if (list_empty(&acpi_drhd_units))
  4.1727 +        return ret;
  4.1728 +
  4.1729 +    dprintk(XENLOG_INFO VTDPREFIX,
  4.1730 +        "assign_device: bus = %x dev = %x func = %x\n",
  4.1731 +        bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
  4.1732 +
  4.1733 +    reassign_device_ownership(dom0, d, bus, devfn);
  4.1734 +
  4.1735 +    /* setup rmrr identify mapping just once per domain */
  4.1736 +    if (list_empty(&hd->pdev_list))
  4.1737 +        for_each_rmrr_device(rmrr, pdev)
  4.1738 +            ret = iommu_prepare_rmrr_dev(d, rmrr, pdev);
  4.1739 +            if (ret)
  4.1740 +                gdprintk(XENLOG_ERR VTDPREFIX,
  4.1741 +                    "IOMMU: mapping reserved region failed\n");
  4.1742 +        end_for_each_rmrr_device(rmrr, pdev)
  4.1743 +    return ret;
  4.1744 +}
  4.1745 +
  4.1746 +void iommu_set_pgd(struct domain *d)
  4.1747 +{
  4.1748 +    struct hvm_iommu *hd  = domain_hvm_iommu(d);
  4.1749 +    unsigned long p2m_table;
  4.1750 +
  4.1751 +    if (hd->pgd) {
  4.1752 +        gdprintk(XENLOG_INFO VTDPREFIX,
  4.1753 +            "iommu_set_pgd_1: hd->pgd = %p\n", hd->pgd);
  4.1754 +        hd->pgd = NULL;
  4.1755 +    }
  4.1756 +    p2m_table = mfn_x(pagetable_get_mfn(d->arch.phys_table));
  4.1757 +
  4.1758 +#if CONFIG_PAGING_LEVELS == 3
  4.1759 +    if ( !hd->pgd )
  4.1760 +    {
  4.1761 +        int level = agaw_to_level(hd->agaw);
  4.1762 +        struct dma_pte *pmd = NULL;
  4.1763 +        struct dma_pte *pgd = NULL;
  4.1764 +        struct dma_pte *pte = NULL;
  4.1765 +        l3_pgentry_t *l3e;
  4.1766 +        unsigned long flags;
  4.1767 +        int i;
  4.1768 +
  4.1769 +        spin_lock_irqsave(&hd->mapping_lock, flags);
  4.1770 +        if (!hd->pgd) {
  4.1771 +            pgd = (struct dma_pte *)alloc_xenheap_page();
  4.1772 +            memset((u8*)pgd, 0, PAGE_SIZE);
  4.1773 +            if (!hd->pgd)
  4.1774 +                hd->pgd = pgd;
  4.1775 +            else /* somebody is fast */
  4.1776 +                free_xenheap_page((void *) pgd);
  4.1777 +        }
  4.1778 +
  4.1779 +        l3e = map_domain_page(p2m_table);
  4.1780 +        switch(level)
  4.1781 +        {
  4.1782 +            case VTD_PAGE_TABLE_LEVEL_3:        /* Weybridge */
  4.1783 +                /* We only support 8 entries for the PAE L3 p2m table */
  4.1784 +                for ( i = 0; i < 8 ; i++ )
  4.1785 +                {
  4.1786 +                    /* Don't create new L2 entry, use ones from p2m table */
  4.1787 +                    pgd[i].val = l3e[i].l3 | _PAGE_PRESENT | _PAGE_RW;
  4.1788 +                }
  4.1789 +                break;
  4.1790 +
  4.1791 +            case VTD_PAGE_TABLE_LEVEL_4:        /* Stoakley */
  4.1792 +                /* We allocate one more page for the top vtd page table. */
  4.1793 +                pmd = (struct dma_pte *)alloc_xenheap_page();
  4.1794 +                memset((u8*)pmd, 0, PAGE_SIZE);
  4.1795 +                pte = &pgd[0];
  4.1796 +                dma_set_pte_addr(*pte, virt_to_maddr(pmd));
  4.1797 +                dma_set_pte_readable(*pte);
  4.1798 +                dma_set_pte_writable(*pte);
  4.1799 +
  4.1800 +                for ( i = 0; i < 8; i++ )
  4.1801 +                {
  4.1802 +                    /* Don't create new L2 entry, use ones from p2m table */
  4.1803 +                    pmd[i].val = l3e[i].l3 | _PAGE_PRESENT | _PAGE_RW;
  4.1804 +                }
  4.1805 +                break;
  4.1806 +            default:
  4.1807 +                gdprintk(XENLOG_ERR VTDPREFIX,
  4.1808 +                    "iommu_set_pgd:Unsupported p2m table sharing level!\n");
  4.1809 +                break;
  4.1810 +        }
  4.1811 +        unmap_domain_page(l3e);
  4.1812 +        spin_unlock_irqrestore(&hd->mapping_lock, flags);
  4.1813 +    }
  4.1814 +#elif CONFIG_PAGING_LEVELS == 4
  4.1815 +    if ( !hd->pgd )
  4.1816 +    {
  4.1817 +        int level = agaw_to_level(hd->agaw);
  4.1818 +        l3_pgentry_t *l3e;
  4.1819 +        mfn_t pgd_mfn;
  4.1820 +
  4.1821 +        switch (level)
  4.1822 +        {
  4.1823 +            case VTD_PAGE_TABLE_LEVEL_3:
  4.1824 +                l3e = map_domain_page(p2m_table);
  4.1825 +                if ( (l3e_get_flags(*l3e) & _PAGE_PRESENT) == 0 )
  4.1826 +                {
  4.1827 +                    gdprintk(XENLOG_ERR VTDPREFIX,
  4.1828 +                        "iommu_set_pgd: second level wasn't there\n");
  4.1829 +                    unmap_domain_page(l3e);
  4.1830 +                    return;
  4.1831 +                }
  4.1832 +                pgd_mfn = _mfn(l3e_get_pfn(*l3e));
  4.1833 +                unmap_domain_page(l3e);
  4.1834 +                hd->pgd = maddr_to_virt(pagetable_get_paddr(
  4.1835 +                      pagetable_from_mfn(pgd_mfn)));
  4.1836 +                break;
  4.1837 +
  4.1838 +            case VTD_PAGE_TABLE_LEVEL_4:
  4.1839 +                pgd_mfn = _mfn(p2m_table);
  4.1840 +                hd->pgd = maddr_to_virt(pagetable_get_paddr(
  4.1841 +                      pagetable_from_mfn(pgd_mfn)));
  4.1842 +                break;
  4.1843 +            default:
  4.1844 +                gdprintk(XENLOG_ERR VTDPREFIX,
  4.1845 +                    "iommu_set_pgd:Unsupported p2m table sharing level!\n");
  4.1846 +                break;
  4.1847 +        }
  4.1848 +    }
  4.1849 +#endif
  4.1850 +    gdprintk(XENLOG_INFO VTDPREFIX,
  4.1851 +        "iommu_set_pgd: hd->pgd = %p\n", hd->pgd);
  4.1852 +}
  4.1853 +
  4.1854 +
  4.1855 +u8 iommu_state[MAX_IOMMU_REGS * MAX_IOMMUS];
  4.1856 +int iommu_suspend(void)
  4.1857 +{
  4.1858 +    struct acpi_drhd_unit *drhd;
  4.1859 +    struct iommu *iommu;
  4.1860 +    int i = 0;
  4.1861 +
  4.1862 +    if (!vtd_enable)
  4.1863 +        return 0;
  4.1864 +
  4.1865 +    flush_all();
  4.1866 +    for_each_drhd_unit(drhd) {
  4.1867 +        iommu = drhd->iommu;
  4.1868 +        iommu_state[DMAR_RTADDR_REG * i] =
  4.1869 +            (u64) dmar_readq(iommu->reg, DMAR_RTADDR_REG);
  4.1870 +        iommu_state[DMAR_FECTL_REG * i] =
  4.1871 +            (u32) dmar_readl(iommu->reg, DMAR_FECTL_REG);
  4.1872 +        iommu_state[DMAR_FEDATA_REG * i] =
  4.1873 +            (u32) dmar_readl(iommu->reg, DMAR_FEDATA_REG);
  4.1874 +        iommu_state[DMAR_FEADDR_REG * i] =
  4.1875 +            (u32) dmar_readl(iommu->reg, DMAR_FEADDR_REG);
  4.1876 +        iommu_state[DMAR_FEUADDR_REG * i] =
  4.1877 +            (u32) dmar_readl(iommu->reg, DMAR_FEUADDR_REG);
  4.1878 +        iommu_state[DMAR_PLMBASE_REG * i] =
  4.1879 +            (u32) dmar_readl(iommu->reg, DMAR_PLMBASE_REG);
  4.1880 +        iommu_state[DMAR_PLMLIMIT_REG * i] =
  4.1881 +            (u32) dmar_readl(iommu->reg, DMAR_PLMLIMIT_REG);
  4.1882 +        iommu_state[DMAR_PHMBASE_REG * i] =
  4.1883 +            (u64) dmar_readq(iommu->reg, DMAR_PHMBASE_REG);
  4.1884 +        iommu_state[DMAR_PHMLIMIT_REG * i] =
  4.1885 +            (u64) dmar_readq(iommu->reg, DMAR_PHMLIMIT_REG);
  4.1886 +        i++;
  4.1887 +    }
  4.1888 +
  4.1889 +    return 0;
  4.1890 +}
  4.1891 +
  4.1892 +int iommu_resume(void)
  4.1893 +{
  4.1894 +    struct acpi_drhd_unit *drhd;
  4.1895 +    struct iommu *iommu;
  4.1896 +    int i = 0;
  4.1897 +
  4.1898 +    if (!vtd_enable)
  4.1899 +        return 0;
  4.1900 +
  4.1901 +    flush_all();
  4.1902 +
  4.1903 +    init_vtd_hw();
  4.1904 +    for_each_drhd_unit(drhd) {
  4.1905 +        iommu = drhd->iommu;
  4.1906 +        dmar_writeq( iommu->reg, DMAR_RTADDR_REG,
  4.1907 +            (u64) iommu_state[DMAR_RTADDR_REG * i]);
  4.1908 +        dmar_writel(iommu->reg, DMAR_FECTL_REG,
  4.1909 +            (u32) iommu_state[DMAR_FECTL_REG * i]);
  4.1910 +        dmar_writel(iommu->reg, DMAR_FEDATA_REG,
  4.1911 +            (u32) iommu_state[DMAR_FEDATA_REG * i]);
  4.1912 +        dmar_writel(iommu->reg, DMAR_FEADDR_REG,
  4.1913 +            (u32) iommu_state[DMAR_FEADDR_REG * i]);
  4.1914 +        dmar_writel(iommu->reg, DMAR_FEUADDR_REG,
  4.1915 +            (u32) iommu_state[DMAR_FEUADDR_REG * i]);
  4.1916 +        dmar_writel(iommu->reg, DMAR_PLMBASE_REG,
  4.1917 +            (u32) iommu_state[DMAR_PLMBASE_REG * i]);
  4.1918 +        dmar_writel(iommu->reg, DMAR_PLMLIMIT_REG,
  4.1919 +            (u32) iommu_state[DMAR_PLMLIMIT_REG * i]);
  4.1920 +        dmar_writeq(iommu->reg, DMAR_PHMBASE_REG,
  4.1921 +            (u64) iommu_state[DMAR_PHMBASE_REG * i]);
  4.1922 +        dmar_writeq(iommu->reg, DMAR_PHMLIMIT_REG,
  4.1923 +            (u64) iommu_state[DMAR_PHMLIMIT_REG * i]);
  4.1924 +
  4.1925 +        if (iommu_enable_translation(iommu))
  4.1926 +            return -EIO;
  4.1927 +        i++;
  4.1928 +    }
  4.1929 +    return 0;
  4.1930 +}
     5.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     5.2 +++ b/xen/arch/x86/hvm/vmx/vtd/io.c	Fri Sep 14 16:40:49 2007 +0100
     5.3 @@ -0,0 +1,120 @@
     5.4 +/*
     5.5 + * Copyright (c) 2006, Intel Corporation.
     5.6 + *
     5.7 + * This program is free software; you can redistribute it and/or modify it
     5.8 + * under the terms and conditions of the GNU General Public License,
     5.9 + * version 2, as published by the Free Software Foundation.
    5.10 + *
    5.11 + * This program is distributed in the hope it will be useful, but WITHOUT
    5.12 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    5.13 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
    5.14 + * more details.
    5.15 + *
    5.16 + * You should have received a copy of the GNU General Public License along with
    5.17 + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
    5.18 + * Place - Suite 330, Boston, MA 02111-1307 USA.
    5.19 + *
    5.20 + * Copyright (C) Allen Kay <allen.m.kay@intel.com>
    5.21 + * Copyright (C) Xiaohui Xin <xiaohui.xin@intel.com>
    5.22 + */
    5.23 +
    5.24 +#include <xen/init.h>
    5.25 +#include <xen/config.h>
    5.26 +#include <xen/init.h>
    5.27 +#include <xen/mm.h>
    5.28 +#include <xen/lib.h>
    5.29 +#include <xen/errno.h>
    5.30 +#include <xen/trace.h>
    5.31 +#include <xen/event.h>
    5.32 +#include <xen/hypercall.h>
    5.33 +#include <asm/current.h>
    5.34 +#include <asm/cpufeature.h>
    5.35 +#include <asm/processor.h>
    5.36 +#include <asm/msr.h>
    5.37 +#include <asm/apic.h>
    5.38 +#include <asm/paging.h>
    5.39 +#include <asm/shadow.h>
    5.40 +#include <asm/p2m.h>
    5.41 +#include <asm/hvm/hvm.h>
    5.42 +#include <asm/hvm/support.h>
    5.43 +#include <asm/hvm/vpt.h>
    5.44 +#include <asm/hvm/vpic.h>
    5.45 +#include <asm/hvm/vlapic.h>
    5.46 +#include <public/sched.h>
    5.47 +#include <xen/iocap.h>
    5.48 +#include <public/hvm/ioreq.h>
    5.49 +
    5.50 +int hvm_do_IRQ_dpci(struct domain *d, unsigned int mirq)
    5.51 +{
    5.52 +    uint32_t device, intx;
    5.53 +    uint32_t link, isa_irq;
    5.54 +    struct hvm_irq *hvm_irq;
    5.55 +
    5.56 +    if (!vtd_enabled || (d == dom0))
    5.57 +        return 0;
    5.58 +
    5.59 +    if (d->arch.hvm_domain.irq.mirq[mirq].valid)
    5.60 +    {
    5.61 +        device = d->arch.hvm_domain.irq.mirq[mirq].device;
    5.62 +        intx = d->arch.hvm_domain.irq.mirq[mirq].intx;
    5.63 +        link = hvm_pci_intx_link(device, intx);
    5.64 +        hvm_irq = &d->arch.hvm_domain.irq;
    5.65 +        isa_irq = hvm_irq->pci_link.route[link];
    5.66 +
    5.67 +        if ( !d->arch.hvm_domain.irq.girq[isa_irq].valid )
    5.68 +        {
    5.69 +            d->arch.hvm_domain.irq.girq[isa_irq].valid = 1;
    5.70 +            d->arch.hvm_domain.irq.girq[isa_irq].device = device;
    5.71 +            d->arch.hvm_domain.irq.girq[isa_irq].intx = intx;
    5.72 +            d->arch.hvm_domain.irq.girq[isa_irq].machine_gsi = mirq;
    5.73 +        }
    5.74 +
    5.75 +        if ( !test_and_set_bit(mirq, d->arch.hvm_domain.irq.dirq_mask) )
    5.76 +        {
    5.77 +            vcpu_kick(d->vcpu[0]);
    5.78 +            return 1;
    5.79 +        }
    5.80 +        else
    5.81 +            dprintk(XENLOG_INFO, "Want to pending mirq, but failed\n");
    5.82 +    }
    5.83 +    return 0;
    5.84 +}
    5.85 +
    5.86 +void hvm_dpci_eoi(unsigned int guest_gsi, union vioapic_redir_entry *ent)
    5.87 +{
    5.88 +    struct domain *d = current->domain;
    5.89 +    uint32_t device, intx, machine_gsi;
    5.90 +    irq_desc_t *desc;
    5.91 +
    5.92 +    if (d->arch.hvm_domain.irq.girq[guest_gsi].valid)
    5.93 +    {
    5.94 +        device = d->arch.hvm_domain.irq.girq[guest_gsi].device;
    5.95 +        intx = d->arch.hvm_domain.irq.girq[guest_gsi].intx;
    5.96 +        machine_gsi = d->arch.hvm_domain.irq.girq[guest_gsi].machine_gsi;
    5.97 +        gdprintk(XENLOG_INFO, "hvm_dpci_eoi:: device %x intx %x\n",
    5.98 +            device, intx);
    5.99 +        hvm_pci_intx_deassert(d, device, intx);
   5.100 +        if ( (ent == NULL) || (ent && ent->fields.mask == 0) ) {
   5.101 +            desc = &irq_desc[irq_to_vector(machine_gsi)];
   5.102 +            desc->handler->end(irq_to_vector(machine_gsi));
   5.103 +        }
   5.104 +    }
   5.105 +}
   5.106 +
   5.107 +int release_devices(struct domain *d)
   5.108 +{
   5.109 +    struct hvm_domain *hd = &d->arch.hvm_domain;
   5.110 +    uint32_t i;
   5.111 +    int ret = 0;
   5.112 +
   5.113 +    if (!vtd_enabled)
   5.114 +        return ret;
   5.115 +
   5.116 +    /* unbind irq */
   5.117 +    for (i = 0; i < NR_IRQS; i++) {
   5.118 +        if (hd->irq.mirq[i].valid)
   5.119 +            ret = pirq_guest_unbind(d, i);
   5.120 +    }
   5.121 +    iommu_domain_teardown(d);
   5.122 +    return ret;
   5.123 +}
     6.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     6.2 +++ b/xen/arch/x86/hvm/vmx/vtd/msi.h	Fri Sep 14 16:40:49 2007 +0100
     6.3 @@ -0,0 +1,128 @@
     6.4 +/*
     6.5 + * Copyright (C) 2003-2004 Intel
     6.6 + * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com)
     6.7 + */
     6.8 +
     6.9 +#ifndef MSI_H
    6.10 +#define MSI_H
    6.11 +
    6.12 +/*
    6.13 + * Assume the maximum number of hot plug slots supported by the system is about
    6.14 + * ten. The worstcase is that each of these slots is hot-added with a device,
    6.15 + * which has two MSI/MSI-X capable functions. To avoid any MSI-X driver, which
    6.16 + * attempts to request all available vectors, NR_HP_RESERVED_VECTORS is defined
    6.17 + * as below to ensure at least one message is assigned to each detected MSI/
    6.18 + * MSI-X device function.
    6.19 + */
    6.20 +#define NR_HP_RESERVED_VECTORS 	20
    6.21 +
    6.22 +extern int vector_irq[NR_VECTORS];
    6.23 +extern void (*interrupt[NR_IRQS])(void);
    6.24 +extern int pci_vector_resources(int last, int nr_released);
    6.25 +
    6.26 +/*
    6.27 + * MSI-X Address Register
    6.28 + */
    6.29 +#define PCI_MSIX_FLAGS_QSIZE		0x7FF
    6.30 +#define PCI_MSIX_FLAGS_ENABLE		(1 << 15)
    6.31 +#define PCI_MSIX_FLAGS_BIRMASK		(7 << 0)
    6.32 +#define PCI_MSIX_FLAGS_BITMASK		(1 << 0)
    6.33 +
    6.34 +#define PCI_MSIX_ENTRY_SIZE			16
    6.35 +#define  PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET	0
    6.36 +#define  PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET	4
    6.37 +#define  PCI_MSIX_ENTRY_DATA_OFFSET		8
    6.38 +#define  PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET	12
    6.39 +
    6.40 +#define msi_control_reg(base)		(base + PCI_MSI_FLAGS)
    6.41 +#define msi_lower_address_reg(base)	(base + PCI_MSI_ADDRESS_LO)
    6.42 +#define msi_upper_address_reg(base)	(base + PCI_MSI_ADDRESS_HI)
    6.43 +#define msi_data_reg(base, is64bit)	\
    6.44 +	( (is64bit == 1) ? base+PCI_MSI_DATA_64 : base+PCI_MSI_DATA_32 )
    6.45 +#define msi_mask_bits_reg(base, is64bit) \
    6.46 +	( (is64bit == 1) ? base+PCI_MSI_MASK_BIT : base+PCI_MSI_MASK_BIT-4)
    6.47 +#define msi_disable(control)		control &= ~PCI_MSI_FLAGS_ENABLE
    6.48 +#define multi_msi_capable(control) \
    6.49 +	(1 << ((control & PCI_MSI_FLAGS_QMASK) >> 1))
    6.50 +#define multi_msi_enable(control, num) \
    6.51 +	control |= (((num >> 1) << 4) & PCI_MSI_FLAGS_QSIZE);
    6.52 +#define is_64bit_address(control)	(control & PCI_MSI_FLAGS_64BIT)
    6.53 +#define is_mask_bit_support(control)	(control & PCI_MSI_FLAGS_MASKBIT)
    6.54 +#define msi_enable(control, num) multi_msi_enable(control, num); \
    6.55 +	control |= PCI_MSI_FLAGS_ENABLE
    6.56 +
    6.57 +#define msix_table_offset_reg(base)	(base + 0x04)
    6.58 +#define msix_pba_offset_reg(base)	(base + 0x08)
    6.59 +#define msix_enable(control)	 	control |= PCI_MSIX_FLAGS_ENABLE
    6.60 +#define msix_disable(control)	 	control &= ~PCI_MSIX_FLAGS_ENABLE
    6.61 +#define msix_table_size(control) 	((control & PCI_MSIX_FLAGS_QSIZE)+1)
    6.62 +#define multi_msix_capable		msix_table_size
    6.63 +#define msix_unmask(address)	 	(address & ~PCI_MSIX_FLAGS_BITMASK)
    6.64 +#define msix_mask(address)		(address | PCI_MSIX_FLAGS_BITMASK)
    6.65 +#define msix_is_pending(address) 	(address & PCI_MSIX_FLAGS_PENDMASK)
    6.66 +
    6.67 +/*
    6.68 + * MSI Defined Data Structures
    6.69 + */
    6.70 +#define MSI_ADDRESS_HEADER		0xfee
    6.71 +#define MSI_ADDRESS_HEADER_SHIFT	12
    6.72 +#define MSI_ADDRESS_HEADER_MASK		0xfff000
    6.73 +#define MSI_ADDRESS_DEST_ID_MASK	0xfff0000f
    6.74 +#define MSI_TARGET_CPU_MASK		0xff
    6.75 +#define MSI_TARGET_CPU_SHIFT		12
    6.76 +#define MSI_DELIVERY_MODE		0
    6.77 +#define MSI_LEVEL_MODE			1	/* Edge always assert */
    6.78 +#define MSI_TRIGGER_MODE		0	/* MSI is edge sensitive */
    6.79 +#define MSI_PHYSICAL_MODE		0
    6.80 +#define MSI_LOGICAL_MODE		1
    6.81 +#define MSI_REDIRECTION_HINT_MODE	0
    6.82 +
    6.83 +#define __LITTLE_ENDIAN_BITFIELD	1
    6.84 +
    6.85 +struct msg_data {
    6.86 +#if defined(__LITTLE_ENDIAN_BITFIELD)
    6.87 +	__u32	vector		:  8;
    6.88 +	__u32	delivery_mode	:  3;	/* 000b: FIXED | 001b: lowest prior */
    6.89 +	__u32	reserved_1	:  3;
    6.90 +	__u32	level		:  1;	/* 0: deassert | 1: assert */
    6.91 +	__u32	trigger		:  1;	/* 0: edge | 1: level */
    6.92 +	__u32	reserved_2	: 16;
    6.93 +#elif defined(__BIG_ENDIAN_BITFIELD)
    6.94 +	__u32	reserved_2	: 16;
    6.95 +	__u32	trigger		:  1;	/* 0: edge | 1: level */
    6.96 +	__u32	level		:  1;	/* 0: deassert | 1: assert */
    6.97 +	__u32	reserved_1	:  3;
    6.98 +	__u32	delivery_mode	:  3;	/* 000b: FIXED | 001b: lowest prior */
    6.99 +	__u32	vector		:  8;
   6.100 +#else
   6.101 +#error "Bitfield endianness not defined! Check your byteorder.h"
   6.102 +#endif
   6.103 +} __attribute__ ((packed));
   6.104 +
   6.105 +struct msg_address {
   6.106 +	union {
   6.107 +		struct {
   6.108 +#if defined(__LITTLE_ENDIAN_BITFIELD)
   6.109 +			__u32	reserved_1	:  2;
   6.110 +			__u32	dest_mode	:  1;	/*0:physic | 1:logic */
   6.111 +			__u32	redirection_hint:  1;  	/*0: dedicated CPU
   6.112 +							  1: lowest priority */
   6.113 +			__u32	reserved_2	:  4;
   6.114 + 			__u32	dest_id		: 24;	/* Destination ID */
   6.115 +#elif defined(__BIG_ENDIAN_BITFIELD)
   6.116 + 			__u32	dest_id		: 24;	/* Destination ID */
   6.117 +			__u32	reserved_2	:  4;
   6.118 +			__u32	redirection_hint:  1;  	/*0: dedicated CPU
   6.119 +							  1: lowest priority */
   6.120 +			__u32	dest_mode	:  1;	/*0:physic | 1:logic */
   6.121 +			__u32	reserved_1	:  2;
   6.122 +#else
   6.123 +#error "Bitfield endianness not defined! Check your byteorder.h"
   6.124 +#endif
   6.125 +      		}u;
   6.126 +       		__u32  value;
   6.127 +	}lo_address;
   6.128 +	__u32 	hi_address;
   6.129 +} __attribute__ ((packed));
   6.130 +
   6.131 +#endif /* MSI_H */
     7.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     7.2 +++ b/xen/arch/x86/hvm/vmx/vtd/pci-direct.h	Fri Sep 14 16:40:49 2007 +0100
     7.3 @@ -0,0 +1,48 @@
     7.4 +#ifndef ASM_PCI_DIRECT_H
     7.5 +#define ASM_PCI_DIRECT_H 1
     7.6 +
     7.7 +#include <xen/types.h>
     7.8 +#include <asm/io.h>
     7.9 +
    7.10 +/* Direct PCI access. This is used for PCI accesses in early boot before
    7.11 +   the PCI subsystem works. */ 
    7.12 +
    7.13 +#define PDprintk(x...)
    7.14 +
    7.15 +static inline u32 read_pci_config(u8 bus, u8 slot, u8 func, u8 offset)
    7.16 +{
    7.17 +    u32 v; 
    7.18 +    outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
    7.19 +    v = inl(0xcfc); 
    7.20 +    if (v != 0xffffffff)
    7.21 +        PDprintk("%x reading 4 from %x: %x\n", slot, offset, v);
    7.22 +    return v;
    7.23 +}
    7.24 +
    7.25 +static inline u8 read_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset)
    7.26 +{
    7.27 +    u8 v; 
    7.28 +    outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
    7.29 +    v = inb(0xcfc + (offset&3)); 
    7.30 +    PDprintk("%x reading 1 from %x: %x\n", slot, offset, v);
    7.31 +    return v;
    7.32 +}
    7.33 +
    7.34 +static inline u16 read_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset)
    7.35 +{
    7.36 +    u16 v; 
    7.37 +    outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
    7.38 +    v = inw(0xcfc + (offset&2)); 
    7.39 +    PDprintk("%x reading 2 from %x: %x\n", slot, offset, v);
    7.40 +    return v;
    7.41 +}
    7.42 +
    7.43 +static inline void write_pci_config(u8 bus, u8 slot, u8 func, u8 offset,
    7.44 +                    u32 val)
    7.45 +{
    7.46 +    PDprintk("%x writing to %x: %x\n", slot, offset, val); 
    7.47 +    outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
    7.48 +    outl(val, 0xcfc); 
    7.49 +}
    7.50 +
    7.51 +#endif
     8.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     8.2 +++ b/xen/arch/x86/hvm/vmx/vtd/pci_regs.h	Fri Sep 14 16:40:49 2007 +0100
     8.3 @@ -0,0 +1,449 @@
     8.4 +/*
     8.5 + *	pci_regs.h
     8.6 + *
     8.7 + *	PCI standard defines
     8.8 + *	Copyright 1994, Drew Eckhardt
     8.9 + *	Copyright 1997--1999 Martin Mares <mj@ucw.cz>
    8.10 + *
    8.11 + *	For more information, please consult the following manuals (look at
    8.12 + *	http://www.pcisig.com/ for how to get them):
    8.13 + *
    8.14 + *	PCI BIOS Specification
    8.15 + *	PCI Local Bus Specification
    8.16 + *	PCI to PCI Bridge Specification
    8.17 + *	PCI System Design Guide
    8.18 + */
    8.19 +
    8.20 +#ifndef LINUX_PCI_REGS_H
    8.21 +#define LINUX_PCI_REGS_H
    8.22 +
    8.23 +/*
    8.24 + * Under PCI, each device has 256 bytes of configuration address space,
    8.25 + * of which the first 64 bytes are standardized as follows:
    8.26 + */
    8.27 +#define PCI_VENDOR_ID		0x00	/* 16 bits */
    8.28 +#define PCI_DEVICE_ID		0x02	/* 16 bits */
    8.29 +#define PCI_COMMAND		0x04	/* 16 bits */
    8.30 +#define  PCI_COMMAND_IO		0x1	/* Enable response in I/O space */
    8.31 +#define  PCI_COMMAND_MEMORY	0x2	/* Enable response in Memory space */
    8.32 +#define  PCI_COMMAND_MASTER	0x4	/* Enable bus mastering */
    8.33 +#define  PCI_COMMAND_SPECIAL	0x8	/* Enable response to special cycles */
    8.34 +#define  PCI_COMMAND_INVALIDATE	0x10	/* Use memory write and invalidate */
    8.35 +#define  PCI_COMMAND_VGA_PALETTE 0x20	/* Enable palette snooping */
    8.36 +#define  PCI_COMMAND_PARITY	0x40	/* Enable parity checking */
    8.37 +#define  PCI_COMMAND_WAIT 	0x80	/* Enable address/data stepping */
    8.38 +#define  PCI_COMMAND_SERR	0x100	/* Enable SERR */
    8.39 +#define  PCI_COMMAND_FAST_BACK	0x200	/* Enable back-to-back writes */
    8.40 +#define  PCI_COMMAND_INTX_DISABLE 0x400 /* INTx Emulation Disable */
    8.41 +
    8.42 +#define PCI_STATUS		0x06	/* 16 bits */
    8.43 +#define  PCI_STATUS_CAP_LIST	0x10	/* Support Capability List */
    8.44 +#define  PCI_STATUS_66MHZ	0x20	/* Support 66 Mhz PCI 2.1 bus */
    8.45 +#define  PCI_STATUS_UDF		0x40	/* Support User Definable Features [obsolete] */
    8.46 +#define  PCI_STATUS_FAST_BACK	0x80	/* Accept fast-back to back */
    8.47 +#define  PCI_STATUS_PARITY	0x100	/* Detected parity error */
    8.48 +#define  PCI_STATUS_DEVSEL_MASK	0x600	/* DEVSEL timing */
    8.49 +#define  PCI_STATUS_DEVSEL_FAST		0x000
    8.50 +#define  PCI_STATUS_DEVSEL_MEDIUM	0x200
    8.51 +#define  PCI_STATUS_DEVSEL_SLOW		0x400
    8.52 +#define  PCI_STATUS_SIG_TARGET_ABORT	0x800 /* Set on target abort */
    8.53 +#define  PCI_STATUS_REC_TARGET_ABORT	0x1000 /* Master ack of " */
    8.54 +#define  PCI_STATUS_REC_MASTER_ABORT	0x2000 /* Set on master abort */
    8.55 +#define  PCI_STATUS_SIG_SYSTEM_ERROR	0x4000 /* Set when we drive SERR */
    8.56 +#define  PCI_STATUS_DETECTED_PARITY	0x8000 /* Set on parity error */
    8.57 +
    8.58 +#define PCI_CLASS_REVISION	0x08	/* High 24 bits are class, low 8 revision */
    8.59 +#define PCI_REVISION_ID		0x08	/* Revision ID */
    8.60 +#define PCI_CLASS_PROG		0x09	/* Reg. Level Programming Interface */
    8.61 +#define PCI_CLASS_DEVICE	0x0a	/* Device class */
    8.62 +
    8.63 +#define PCI_CACHE_LINE_SIZE	0x0c	/* 8 bits */
    8.64 +#define PCI_LATENCY_TIMER	0x0d	/* 8 bits */
    8.65 +#define PCI_HEADER_TYPE		0x0e	/* 8 bits */
    8.66 +#define  PCI_HEADER_TYPE_NORMAL		0
    8.67 +#define  PCI_HEADER_TYPE_BRIDGE		1
    8.68 +#define  PCI_HEADER_TYPE_CARDBUS	2
    8.69 +
    8.70 +#define PCI_BIST		0x0f	/* 8 bits */
    8.71 +#define  PCI_BIST_CODE_MASK	0x0f	/* Return result */
    8.72 +#define  PCI_BIST_START		0x40	/* 1 to start BIST, 2 secs or less */
    8.73 +#define  PCI_BIST_CAPABLE	0x80	/* 1 if BIST capable */
    8.74 +
    8.75 +/*
    8.76 + * Base addresses specify locations in memory or I/O space.
    8.77 + * Decoded size can be determined by writing a value of
    8.78 + * 0xffffffff to the register, and reading it back.  Only
    8.79 + * 1 bits are decoded.
    8.80 + */
    8.81 +#define PCI_BASE_ADDRESS_0	0x10	/* 32 bits */
    8.82 +#define PCI_BASE_ADDRESS_1	0x14	/* 32 bits [htype 0,1 only] */
    8.83 +#define PCI_BASE_ADDRESS_2	0x18	/* 32 bits [htype 0 only] */
    8.84 +#define PCI_BASE_ADDRESS_3	0x1c	/* 32 bits */
    8.85 +#define PCI_BASE_ADDRESS_4	0x20	/* 32 bits */
    8.86 +#define PCI_BASE_ADDRESS_5	0x24	/* 32 bits */
    8.87 +#define  PCI_BASE_ADDRESS_SPACE		0x01	/* 0 = memory, 1 = I/O */
    8.88 +#define  PCI_BASE_ADDRESS_SPACE_IO	0x01
    8.89 +#define  PCI_BASE_ADDRESS_SPACE_MEMORY	0x00
    8.90 +#define  PCI_BASE_ADDRESS_MEM_TYPE_MASK	0x06
    8.91 +#define  PCI_BASE_ADDRESS_MEM_TYPE_32	0x00	/* 32 bit address */
    8.92 +#define  PCI_BASE_ADDRESS_MEM_TYPE_1M	0x02	/* Below 1M [obsolete] */
    8.93 +#define  PCI_BASE_ADDRESS_MEM_TYPE_64	0x04	/* 64 bit address */
    8.94 +#define  PCI_BASE_ADDRESS_MEM_PREFETCH	0x08	/* prefetchable? */
    8.95 +#define  PCI_BASE_ADDRESS_MEM_MASK	(~0x0fUL)
    8.96 +#define  PCI_BASE_ADDRESS_IO_MASK	(~0x03UL)
    8.97 +/* bit 1 is reserved if address_space = 1 */
    8.98 +
    8.99 +/* Header type 0 (normal devices) */
   8.100 +#define PCI_CARDBUS_CIS		0x28
   8.101 +#define PCI_SUBSYSTEM_VENDOR_ID	0x2c
   8.102 +#define PCI_SUBSYSTEM_ID	0x2e
   8.103 +#define PCI_ROM_ADDRESS		0x30	/* Bits 31..11 are address, 10..1 reserved */
   8.104 +#define  PCI_ROM_ADDRESS_ENABLE	0x01
   8.105 +#define PCI_ROM_ADDRESS_MASK	(~0x7ffUL)
   8.106 +
   8.107 +#define PCI_CAPABILITY_LIST	0x34	/* Offset of first capability list entry */
   8.108 +
   8.109 +/* 0x35-0x3b are reserved */
   8.110 +#define PCI_INTERRUPT_LINE	0x3c	/* 8 bits */
   8.111 +#define PCI_INTERRUPT_PIN	0x3d	/* 8 bits */
   8.112 +#define PCI_MIN_GNT		0x3e	/* 8 bits */
   8.113 +#define PCI_MAX_LAT		0x3f	/* 8 bits */
   8.114 +
   8.115 +/* Header type 1 (PCI-to-PCI bridges) */
   8.116 +#define PCI_PRIMARY_BUS		0x18	/* Primary bus number */
   8.117 +#define PCI_SECONDARY_BUS	0x19	/* Secondary bus number */
   8.118 +#define PCI_SUBORDINATE_BUS	0x1a	/* Highest bus number behind the bridge */
   8.119 +#define PCI_SEC_LATENCY_TIMER	0x1b	/* Latency timer for secondary interface */
   8.120 +#define PCI_IO_BASE		0x1c	/* I/O range behind the bridge */
   8.121 +#define PCI_IO_LIMIT		0x1d
   8.122 +#define  PCI_IO_RANGE_TYPE_MASK	0x0fUL	/* I/O bridging type */
   8.123 +#define  PCI_IO_RANGE_TYPE_16	0x00
   8.124 +#define  PCI_IO_RANGE_TYPE_32	0x01
   8.125 +#define  PCI_IO_RANGE_MASK	(~0x0fUL)
   8.126 +#define PCI_SEC_STATUS		0x1e	/* Secondary status register, only bit 14 used */
   8.127 +#define PCI_MEMORY_BASE		0x20	/* Memory range behind */
   8.128 +#define PCI_MEMORY_LIMIT	0x22
   8.129 +#define  PCI_MEMORY_RANGE_TYPE_MASK 0x0fUL
   8.130 +#define  PCI_MEMORY_RANGE_MASK	(~0x0fUL)
   8.131 +#define PCI_PREF_MEMORY_BASE	0x24	/* Prefetchable memory range behind */
   8.132 +#define PCI_PREF_MEMORY_LIMIT	0x26
   8.133 +#define  PCI_PREF_RANGE_TYPE_MASK 0x0fUL
   8.134 +#define  PCI_PREF_RANGE_TYPE_32	0x00
   8.135 +#define  PCI_PREF_RANGE_TYPE_64	0x01
   8.136 +#define  PCI_PREF_RANGE_MASK	(~0x0fUL)
   8.137 +#define PCI_PREF_BASE_UPPER32	0x28	/* Upper half of prefetchable memory range */
   8.138 +#define PCI_PREF_LIMIT_UPPER32	0x2c
   8.139 +#define PCI_IO_BASE_UPPER16	0x30	/* Upper half of I/O addresses */
   8.140 +#define PCI_IO_LIMIT_UPPER16	0x32
   8.141 +/* 0x34 same as for htype 0 */
   8.142 +/* 0x35-0x3b is reserved */
   8.143 +#define PCI_ROM_ADDRESS1	0x38	/* Same as PCI_ROM_ADDRESS, but for htype 1 */
   8.144 +/* 0x3c-0x3d are same as for htype 0 */
   8.145 +#define PCI_BRIDGE_CONTROL	0x3e
   8.146 +#define  PCI_BRIDGE_CTL_PARITY	0x01	/* Enable parity detection on secondary interface */
   8.147 +#define  PCI_BRIDGE_CTL_SERR	0x02	/* The same for SERR forwarding */
   8.148 +#define  PCI_BRIDGE_CTL_NO_ISA	0x04	/* Disable bridging of ISA ports */
   8.149 +#define  PCI_BRIDGE_CTL_VGA	0x08	/* Forward VGA addresses */
   8.150 +#define  PCI_BRIDGE_CTL_MASTER_ABORT	0x20  /* Report master aborts */
   8.151 +#define  PCI_BRIDGE_CTL_BUS_RESET	0x40	/* Secondary bus reset */
   8.152 +#define  PCI_BRIDGE_CTL_FAST_BACK	0x80	/* Fast Back2Back enabled on secondary interface */
   8.153 +
   8.154 +/* Header type 2 (CardBus bridges) */
   8.155 +#define PCI_CB_CAPABILITY_LIST	0x14
   8.156 +/* 0x15 reserved */
   8.157 +#define PCI_CB_SEC_STATUS	0x16	/* Secondary status */
   8.158 +#define PCI_CB_PRIMARY_BUS	0x18	/* PCI bus number */
   8.159 +#define PCI_CB_CARD_BUS		0x19	/* CardBus bus number */
   8.160 +#define PCI_CB_SUBORDINATE_BUS	0x1a	/* Subordinate bus number */
   8.161 +#define PCI_CB_LATENCY_TIMER	0x1b	/* CardBus latency timer */
   8.162 +#define PCI_CB_MEMORY_BASE_0	0x1c
   8.163 +#define PCI_CB_MEMORY_LIMIT_0	0x20
   8.164 +#define PCI_CB_MEMORY_BASE_1	0x24
   8.165 +#define PCI_CB_MEMORY_LIMIT_1	0x28
   8.166 +#define PCI_CB_IO_BASE_0	0x2c
   8.167 +#define PCI_CB_IO_BASE_0_HI	0x2e
   8.168 +#define PCI_CB_IO_LIMIT_0	0x30
   8.169 +#define PCI_CB_IO_LIMIT_0_HI	0x32
   8.170 +#define PCI_CB_IO_BASE_1	0x34
   8.171 +#define PCI_CB_IO_BASE_1_HI	0x36
   8.172 +#define PCI_CB_IO_LIMIT_1	0x38
   8.173 +#define PCI_CB_IO_LIMIT_1_HI	0x3a
   8.174 +#define  PCI_CB_IO_RANGE_MASK	(~0x03UL)
   8.175 +/* 0x3c-0x3d are same as for htype 0 */
   8.176 +#define PCI_CB_BRIDGE_CONTROL	0x3e
   8.177 +#define  PCI_CB_BRIDGE_CTL_PARITY	0x01	/* Similar to standard bridge control register */
   8.178 +#define  PCI_CB_BRIDGE_CTL_SERR		0x02
   8.179 +#define  PCI_CB_BRIDGE_CTL_ISA		0x04
   8.180 +#define  PCI_CB_BRIDGE_CTL_VGA		0x08
   8.181 +#define  PCI_CB_BRIDGE_CTL_MASTER_ABORT	0x20
   8.182 +#define  PCI_CB_BRIDGE_CTL_CB_RESET	0x40	/* CardBus reset */
   8.183 +#define  PCI_CB_BRIDGE_CTL_16BIT_INT	0x80	/* Enable interrupt for 16-bit cards */
   8.184 +#define  PCI_CB_BRIDGE_CTL_PREFETCH_MEM0 0x100	/* Prefetch enable for both memory regions */
   8.185 +#define  PCI_CB_BRIDGE_CTL_PREFETCH_MEM1 0x200
   8.186 +#define  PCI_CB_BRIDGE_CTL_POST_WRITES	0x400
   8.187 +#define PCI_CB_SUBSYSTEM_VENDOR_ID	0x40
   8.188 +#define PCI_CB_SUBSYSTEM_ID		0x42
   8.189 +#define PCI_CB_LEGACY_MODE_BASE		0x44	/* 16-bit PC Card legacy mode base address (ExCa) */
   8.190 +/* 0x48-0x7f reserved */
   8.191 +
   8.192 +/* Capability lists */
   8.193 +
   8.194 +#define PCI_CAP_LIST_ID		0	/* Capability ID */
   8.195 +#define  PCI_CAP_ID_PM		0x01	/* Power Management */
   8.196 +#define  PCI_CAP_ID_AGP		0x02	/* Accelerated Graphics Port */
   8.197 +#define  PCI_CAP_ID_VPD		0x03	/* Vital Product Data */
   8.198 +#define  PCI_CAP_ID_SLOTID	0x04	/* Slot Identification */
   8.199 +#define  PCI_CAP_ID_MSI		0x05	/* Message Signalled Interrupts */
   8.200 +#define  PCI_CAP_ID_CHSWP	0x06	/* CompactPCI HotSwap */
   8.201 +#define  PCI_CAP_ID_PCIX	0x07	/* PCI-X */
   8.202 +#define  PCI_CAP_ID_HT_IRQCONF	0x08	/* HyperTransport IRQ Configuration */
   8.203 +#define  PCI_CAP_ID_SHPC 	0x0C	/* PCI Standard Hot-Plug Controller */
   8.204 +#define  PCI_CAP_ID_EXP 	0x10	/* PCI Express */
   8.205 +#define  PCI_CAP_ID_MSIX	0x11	/* MSI-X */
   8.206 +#define PCI_CAP_LIST_NEXT	1	/* Next capability in the list */
   8.207 +#define PCI_CAP_FLAGS		2	/* Capability defined flags (16 bits) */
   8.208 +#define PCI_CAP_SIZEOF		4
   8.209 +
   8.210 +/* Power Management Registers */
   8.211 +
   8.212 +#define PCI_PM_PMC		2	/* PM Capabilities Register */
   8.213 +#define  PCI_PM_CAP_VER_MASK	0x0007	/* Version */
   8.214 +#define  PCI_PM_CAP_PME_CLOCK	0x0008	/* PME clock required */
   8.215 +#define  PCI_PM_CAP_RESERVED    0x0010  /* Reserved field */
   8.216 +#define  PCI_PM_CAP_DSI		0x0020	/* Device specific initialization */
   8.217 +#define  PCI_PM_CAP_AUX_POWER	0x01C0	/* Auxilliary power support mask */
   8.218 +#define  PCI_PM_CAP_D1		0x0200	/* D1 power state support */
   8.219 +#define  PCI_PM_CAP_D2		0x0400	/* D2 power state support */
   8.220 +#define  PCI_PM_CAP_PME		0x0800	/* PME pin supported */
   8.221 +#define  PCI_PM_CAP_PME_MASK	0xF800	/* PME Mask of all supported states */
   8.222 +#define  PCI_PM_CAP_PME_D0	0x0800	/* PME# from D0 */
   8.223 +#define  PCI_PM_CAP_PME_D1	0x1000	/* PME# from D1 */
   8.224 +#define  PCI_PM_CAP_PME_D2	0x2000	/* PME# from D2 */
   8.225 +#define  PCI_PM_CAP_PME_D3	0x4000	/* PME# from D3 (hot) */
   8.226 +#define  PCI_PM_CAP_PME_D3cold	0x8000	/* PME# from D3 (cold) */
   8.227 +#define PCI_PM_CTRL		4	/* PM control and status register */
   8.228 +#define  PCI_PM_CTRL_STATE_MASK	0x0003	/* Current power state (D0 to D3) */
   8.229 +#define  PCI_PM_CTRL_NO_SOFT_RESET	0x0004	/* No reset for D3hot->D0 */
   8.230 +#define  PCI_PM_CTRL_PME_ENABLE	0x0100	/* PME pin enable */
   8.231 +#define  PCI_PM_CTRL_DATA_SEL_MASK	0x1e00	/* Data select (??) */
   8.232 +#define  PCI_PM_CTRL_DATA_SCALE_MASK	0x6000	/* Data scale (??) */
   8.233 +#define  PCI_PM_CTRL_PME_STATUS	0x8000	/* PME pin status */
   8.234 +#define PCI_PM_PPB_EXTENSIONS	6	/* PPB support extensions (??) */
   8.235 +#define  PCI_PM_PPB_B2_B3	0x40	/* Stop clock when in D3hot (??) */
   8.236 +#define  PCI_PM_BPCC_ENABLE	0x80	/* Bus power/clock control enable (??) */
   8.237 +#define PCI_PM_DATA_REGISTER	7	/* (??) */
   8.238 +#define PCI_PM_SIZEOF		8
   8.239 +
   8.240 +/* AGP registers */
   8.241 +
   8.242 +#define PCI_AGP_VERSION		2	/* BCD version number */
   8.243 +#define PCI_AGP_RFU		3	/* Rest of capability flags */
   8.244 +#define PCI_AGP_STATUS		4	/* Status register */
   8.245 +#define  PCI_AGP_STATUS_RQ_MASK	0xff000000	/* Maximum number of requests - 1 */
   8.246 +#define  PCI_AGP_STATUS_SBA	0x0200	/* Sideband addressing supported */
   8.247 +#define  PCI_AGP_STATUS_64BIT	0x0020	/* 64-bit addressing supported */
   8.248 +#define  PCI_AGP_STATUS_FW	0x0010	/* FW transfers supported */
   8.249 +#define  PCI_AGP_STATUS_RATE4	0x0004	/* 4x transfer rate supported */
   8.250 +#define  PCI_AGP_STATUS_RATE2	0x0002	/* 2x transfer rate supported */
   8.251 +#define  PCI_AGP_STATUS_RATE1	0x0001	/* 1x transfer rate supported */
   8.252 +#define PCI_AGP_COMMAND		8	/* Control register */
   8.253 +#define  PCI_AGP_COMMAND_RQ_MASK 0xff000000  /* Master: Maximum number of requests */
   8.254 +#define  PCI_AGP_COMMAND_SBA	0x0200	/* Sideband addressing enabled */
   8.255 +#define  PCI_AGP_COMMAND_AGP	0x0100	/* Allow processing of AGP transactions */
   8.256 +#define  PCI_AGP_COMMAND_64BIT	0x0020 	/* Allow processing of 64-bit addresses */
   8.257 +#define  PCI_AGP_COMMAND_FW	0x0010 	/* Force FW transfers */
   8.258 +#define  PCI_AGP_COMMAND_RATE4	0x0004	/* Use 4x rate */
   8.259 +#define  PCI_AGP_COMMAND_RATE2	0x0002	/* Use 2x rate */
   8.260 +#define  PCI_AGP_COMMAND_RATE1	0x0001	/* Use 1x rate */
   8.261 +#define PCI_AGP_SIZEOF		12
   8.262 +
   8.263 +/* Vital Product Data */
   8.264 +
   8.265 +#define PCI_VPD_ADDR		2	/* Address to access (15 bits!) */
   8.266 +#define  PCI_VPD_ADDR_MASK	0x7fff	/* Address mask */
   8.267 +#define  PCI_VPD_ADDR_F		0x8000	/* Write 0, 1 indicates completion */
   8.268 +#define PCI_VPD_DATA		4	/* 32-bits of data returned here */
   8.269 +
   8.270 +/* Slot Identification */
   8.271 +
   8.272 +#define PCI_SID_ESR		2	/* Expansion Slot Register */
   8.273 +#define  PCI_SID_ESR_NSLOTS	0x1f	/* Number of expansion slots available */
   8.274 +#define  PCI_SID_ESR_FIC	0x20	/* First In Chassis Flag */
   8.275 +#define PCI_SID_CHASSIS_NR	3	/* Chassis Number */
   8.276 +
   8.277 +/* Message Signalled Interrupts registers */
   8.278 +
   8.279 +#define PCI_MSI_FLAGS		2	/* Various flags */
   8.280 +#define  PCI_MSI_FLAGS_64BIT	0x80	/* 64-bit addresses allowed */
   8.281 +#define  PCI_MSI_FLAGS_QSIZE	0x70	/* Message queue size configured */
   8.282 +#define  PCI_MSI_FLAGS_QMASK	0x0e	/* Maximum queue size available */
   8.283 +#define  PCI_MSI_FLAGS_ENABLE	0x01	/* MSI feature enabled */
   8.284 +#define  PCI_MSI_FLAGS_MASKBIT	0x100	/* 64-bit mask bits allowed */
   8.285 +#define PCI_MSI_RFU		3	/* Rest of capability flags */
   8.286 +#define PCI_MSI_ADDRESS_LO	4	/* Lower 32 bits */
   8.287 +#define PCI_MSI_ADDRESS_HI	8	/* Upper 32 bits (if PCI_MSI_FLAGS_64BIT set) */
   8.288 +#define PCI_MSI_DATA_32		8	/* 16 bits of data for 32-bit devices */
   8.289 +#define PCI_MSI_DATA_64		12	/* 16 bits of data for 64-bit devices */
   8.290 +#define PCI_MSI_MASK_BIT	16	/* Mask bits register */
   8.291 +
   8.292 +/* CompactPCI Hotswap Register */
   8.293 +
   8.294 +#define PCI_CHSWP_CSR		2	/* Control and Status Register */
   8.295 +#define  PCI_CHSWP_DHA		0x01	/* Device Hiding Arm */
   8.296 +#define  PCI_CHSWP_EIM		0x02	/* ENUM# Signal Mask */
   8.297 +#define  PCI_CHSWP_PIE		0x04	/* Pending Insert or Extract */
   8.298 +#define  PCI_CHSWP_LOO		0x08	/* LED On / Off */
   8.299 +#define  PCI_CHSWP_PI		0x30	/* Programming Interface */
   8.300 +#define  PCI_CHSWP_EXT		0x40	/* ENUM# status - extraction */
   8.301 +#define  PCI_CHSWP_INS		0x80	/* ENUM# status - insertion */
   8.302 +
   8.303 +/* PCI-X registers */
   8.304 +
   8.305 +#define PCI_X_CMD		2	/* Modes & Features */
   8.306 +#define  PCI_X_CMD_DPERR_E	0x0001	/* Data Parity Error Recovery Enable */
   8.307 +#define  PCI_X_CMD_ERO		0x0002	/* Enable Relaxed Ordering */
   8.308 +#define  PCI_X_CMD_MAX_READ	0x000c	/* Max Memory Read Byte Count */
   8.309 +#define  PCI_X_CMD_MAX_SPLIT	0x0070	/* Max Outstanding Split Transactions */
   8.310 +#define  PCI_X_CMD_VERSION(x) 	(((x) >> 12) & 3) /* Version */
   8.311 +#define PCI_X_STATUS		4	/* PCI-X capabilities */
   8.312 +#define  PCI_X_STATUS_DEVFN	0x000000ff	/* A copy of devfn */
   8.313 +#define  PCI_X_STATUS_BUS	0x0000ff00	/* A copy of bus nr */
   8.314 +#define  PCI_X_STATUS_64BIT	0x00010000	/* 64-bit device */
   8.315 +#define  PCI_X_STATUS_133MHZ	0x00020000	/* 133 MHz capable */
   8.316 +#define  PCI_X_STATUS_SPL_DISC	0x00040000	/* Split Completion Discarded */
   8.317 +#define  PCI_X_STATUS_UNX_SPL	0x00080000	/* Unexpected Split Completion */
   8.318 +#define  PCI_X_STATUS_COMPLEX	0x00100000	/* Device Complexity */
   8.319 +#define  PCI_X_STATUS_MAX_READ	0x00600000	/* Designed Max Memory Read Count */
   8.320 +#define  PCI_X_STATUS_MAX_SPLIT	0x03800000	/* Designed Max Outstanding Split Transactions */
   8.321 +#define  PCI_X_STATUS_MAX_CUM	0x1c000000	/* Designed Max Cumulative Read Size */
   8.322 +#define  PCI_X_STATUS_SPL_ERR	0x20000000	/* Rcvd Split Completion Error Msg */
   8.323 +#define  PCI_X_STATUS_266MHZ	0x40000000	/* 266 MHz capable */
   8.324 +#define  PCI_X_STATUS_533MHZ	0x80000000	/* 533 MHz capable */
   8.325 +
   8.326 +/* PCI Express capability registers */
   8.327 +
   8.328 +#define PCI_EXP_FLAGS		2	/* Capabilities register */
   8.329 +#define PCI_EXP_FLAGS_VERS	0x000f	/* Capability version */
   8.330 +#define PCI_EXP_FLAGS_TYPE	0x00f0	/* Device/Port type */
   8.331 +#define  PCI_EXP_TYPE_ENDPOINT	0x0	/* Express Endpoint */
   8.332 +#define  PCI_EXP_TYPE_LEG_END	0x1	/* Legacy Endpoint */
   8.333 +#define  PCI_EXP_TYPE_ROOT_PORT 0x4	/* Root Port */
   8.334 +#define  PCI_EXP_TYPE_UPSTREAM	0x5	/* Upstream Port */
   8.335 +#define  PCI_EXP_TYPE_DOWNSTREAM 0x6	/* Downstream Port */
   8.336 +#define  PCI_EXP_TYPE_PCI_BRIDGE 0x7	/* PCI/PCI-X Bridge */
   8.337 +#define PCI_EXP_FLAGS_SLOT	0x0100	/* Slot implemented */
   8.338 +#define PCI_EXP_FLAGS_IRQ	0x3e00	/* Interrupt message number */
   8.339 +#define PCI_EXP_DEVCAP		4	/* Device capabilities */
   8.340 +#define  PCI_EXP_DEVCAP_PAYLOAD	0x07	/* Max_Payload_Size */
   8.341 +#define  PCI_EXP_DEVCAP_PHANTOM	0x18	/* Phantom functions */
   8.342 +#define  PCI_EXP_DEVCAP_EXT_TAG	0x20	/* Extended tags */
   8.343 +#define  PCI_EXP_DEVCAP_L0S	0x1c0	/* L0s Acceptable Latency */
   8.344 +#define  PCI_EXP_DEVCAP_L1	0xe00	/* L1 Acceptable Latency */
   8.345 +#define  PCI_EXP_DEVCAP_ATN_BUT	0x1000	/* Attention Button Present */
   8.346 +#define  PCI_EXP_DEVCAP_ATN_IND	0x2000	/* Attention Indicator Present */
   8.347 +#define  PCI_EXP_DEVCAP_PWR_IND	0x4000	/* Power Indicator Present */
   8.348 +#define  PCI_EXP_DEVCAP_PWR_VAL	0x3fc0000 /* Slot Power Limit Value */
   8.349 +#define  PCI_EXP_DEVCAP_PWR_SCL	0xc000000 /* Slot Power Limit Scale */
   8.350 +#define PCI_EXP_DEVCTL		8	/* Device Control */
   8.351 +#define  PCI_EXP_DEVCTL_CERE	0x0001	/* Correctable Error Reporting En. */
   8.352 +#define  PCI_EXP_DEVCTL_NFERE	0x0002	/* Non-Fatal Error Reporting Enable */
   8.353 +#define  PCI_EXP_DEVCTL_FERE	0x0004	/* Fatal Error Reporting Enable */
   8.354 +#define  PCI_EXP_DEVCTL_URRE	0x0008	/* Unsupported Request Reporting En. */
   8.355 +#define  PCI_EXP_DEVCTL_RELAX_EN 0x0010 /* Enable relaxed ordering */
   8.356 +#define  PCI_EXP_DEVCTL_PAYLOAD	0x00e0	/* Max_Payload_Size */
   8.357 +#define  PCI_EXP_DEVCTL_EXT_TAG	0x0100	/* Extended Tag Field Enable */
   8.358 +#define  PCI_EXP_DEVCTL_PHANTOM	0x0200	/* Phantom Functions Enable */
   8.359 +#define  PCI_EXP_DEVCTL_AUX_PME	0x0400	/* Auxiliary Power PM Enable */
   8.360 +#define  PCI_EXP_DEVCTL_NOSNOOP_EN 0x0800  /* Enable No Snoop */
   8.361 +#define  PCI_EXP_DEVCTL_READRQ	0x7000	/* Max_Read_Request_Size */
   8.362 +#define PCI_EXP_DEVSTA		10	/* Device Status */
   8.363 +#define  PCI_EXP_DEVSTA_CED	0x01	/* Correctable Error Detected */
   8.364 +#define  PCI_EXP_DEVSTA_NFED	0x02	/* Non-Fatal Error Detected */
   8.365 +#define  PCI_EXP_DEVSTA_FED	0x04	/* Fatal Error Detected */
   8.366 +#define  PCI_EXP_DEVSTA_URD	0x08	/* Unsupported Request Detected */
   8.367 +#define  PCI_EXP_DEVSTA_AUXPD	0x10	/* AUX Power Detected */
   8.368 +#define  PCI_EXP_DEVSTA_TRPND	0x20	/* Transactions Pending */
   8.369 +#define PCI_EXP_LNKCAP		12	/* Link Capabilities */
   8.370 +#define PCI_EXP_LNKCTL		16	/* Link Control */
   8.371 +#define PCI_EXP_LNKSTA		18	/* Link Status */
   8.372 +#define PCI_EXP_SLTCAP		20	/* Slot Capabilities */
   8.373 +#define PCI_EXP_SLTCTL		24	/* Slot Control */
   8.374 +#define PCI_EXP_SLTSTA		26	/* Slot Status */
   8.375 +#define PCI_EXP_RTCTL		28	/* Root Control */
   8.376 +#define  PCI_EXP_RTCTL_SECEE	0x01	/* System Error on Correctable Error */
   8.377 +#define  PCI_EXP_RTCTL_SENFEE	0x02	/* System Error on Non-Fatal Error */
   8.378 +#define  PCI_EXP_RTCTL_SEFEE	0x04	/* System Error on Fatal Error */
   8.379 +#define  PCI_EXP_RTCTL_PMEIE	0x08	/* PME Interrupt Enable */
   8.380 +#define  PCI_EXP_RTCTL_CRSSVE	0x10	/* CRS Software Visibility Enable */
   8.381 +#define PCI_EXP_RTCAP		30	/* Root Capabilities */
   8.382 +#define PCI_EXP_RTSTA		32	/* Root Status */
   8.383 +
   8.384 +/* Extended Capabilities (PCI-X 2.0 and Express) */
   8.385 +#define PCI_EXT_CAP_ID(header)		(header & 0x0000ffff)
   8.386 +#define PCI_EXT_CAP_VER(header)		((header >> 16) & 0xf)
   8.387 +#define PCI_EXT_CAP_NEXT(header)	((header >> 20) & 0xffc)
   8.388 +
   8.389 +#define PCI_EXT_CAP_ID_ERR	1
   8.390 +#define PCI_EXT_CAP_ID_VC	2
   8.391 +#define PCI_EXT_CAP_ID_DSN	3
   8.392 +#define PCI_EXT_CAP_ID_PWR	4
   8.393 +
   8.394 +/* Advanced Error Reporting */
   8.395 +#define PCI_ERR_UNCOR_STATUS	4	/* Uncorrectable Error Status */
   8.396 +#define  PCI_ERR_UNC_TRAIN	0x00000001	/* Training */
   8.397 +#define  PCI_ERR_UNC_DLP	0x00000010	/* Data Link Protocol */
   8.398 +#define  PCI_ERR_UNC_POISON_TLP	0x00001000	/* Poisoned TLP */
   8.399 +#define  PCI_ERR_UNC_FCP	0x00002000	/* Flow Control Protocol */
   8.400 +#define  PCI_ERR_UNC_COMP_TIME	0x00004000	/* Completion Timeout */
   8.401 +#define  PCI_ERR_UNC_COMP_ABORT	0x00008000	/* Completer Abort */
   8.402 +#define  PCI_ERR_UNC_UNX_COMP	0x00010000	/* Unexpected Completion */
   8.403 +#define  PCI_ERR_UNC_RX_OVER	0x00020000	/* Receiver Overflow */
   8.404 +#define  PCI_ERR_UNC_MALF_TLP	0x00040000	/* Malformed TLP */
   8.405 +#define  PCI_ERR_UNC_ECRC	0x00080000	/* ECRC Error Status */
   8.406 +#define  PCI_ERR_UNC_UNSUP	0x00100000	/* Unsupported Request */
   8.407 +#define PCI_ERR_UNCOR_MASK	8	/* Uncorrectable Error Mask */
   8.408 +	/* Same bits as above */
   8.409 +#define PCI_ERR_UNCOR_SEVER	12	/* Uncorrectable Error Severity */
   8.410 +	/* Same bits as above */
   8.411 +#define PCI_ERR_COR_STATUS	16	/* Correctable Error Status */
   8.412 +#define  PCI_ERR_COR_RCVR	0x00000001	/* Receiver Error Status */
   8.413 +#define  PCI_ERR_COR_BAD_TLP	0x00000040	/* Bad TLP Status */
   8.414 +#define  PCI_ERR_COR_BAD_DLLP	0x00000080	/* Bad DLLP Status */
   8.415 +#define  PCI_ERR_COR_REP_ROLL	0x00000100	/* REPLAY_NUM Rollover */
   8.416 +#define  PCI_ERR_COR_REP_TIMER	0x00001000	/* Replay Timer Timeout */
   8.417 +#define PCI_ERR_COR_MASK	20	/* Correctable Error Mask */
   8.418 +	/* Same bits as above */
   8.419 +#define PCI_ERR_CAP		24	/* Advanced Error Capabilities */
   8.420 +#define  PCI_ERR_CAP_FEP(x)	((x) & 31)	/* First Error Pointer */
   8.421 +#define  PCI_ERR_CAP_ECRC_GENC	0x00000020	/* ECRC Generation Capable */
   8.422 +#define  PCI_ERR_CAP_ECRC_GENE	0x00000040	/* ECRC Generation Enable */
   8.423 +#define  PCI_ERR_CAP_ECRC_CHKC	0x00000080	/* ECRC Check Capable */
   8.424 +#define  PCI_ERR_CAP_ECRC_CHKE	0x00000100	/* ECRC Check Enable */
   8.425 +#define PCI_ERR_HEADER_LOG	28	/* Header Log Register (16 bytes) */
   8.426 +#define PCI_ERR_ROOT_COMMAND	44	/* Root Error Command */
   8.427 +#define PCI_ERR_ROOT_STATUS	48
   8.428 +#define PCI_ERR_ROOT_COR_SRC	52
   8.429 +#define PCI_ERR_ROOT_SRC	54
   8.430 +
   8.431 +/* Virtual Channel */
   8.432 +#define PCI_VC_PORT_REG1	4
   8.433 +#define PCI_VC_PORT_REG2	8
   8.434 +#define PCI_VC_PORT_CTRL	12
   8.435 +#define PCI_VC_PORT_STATUS	14
   8.436 +#define PCI_VC_RES_CAP		16
   8.437 +#define PCI_VC_RES_CTRL		20
   8.438 +#define PCI_VC_RES_STATUS	26
   8.439 +
   8.440 +/* Power Budgeting */
   8.441 +#define PCI_PWR_DSR		4	/* Data Select Register */
   8.442 +#define PCI_PWR_DATA		8	/* Data Register */
   8.443 +#define  PCI_PWR_DATA_BASE(x)	((x) & 0xff)	    /* Base Power */
   8.444 +#define  PCI_PWR_DATA_SCALE(x)	(((x) >> 8) & 3)    /* Data Scale */
   8.445 +#define  PCI_PWR_DATA_PM_SUB(x)	(((x) >> 10) & 7)   /* PM Sub State */
   8.446 +#define  PCI_PWR_DATA_PM_STATE(x) (((x) >> 13) & 3) /* PM State */
   8.447 +#define  PCI_PWR_DATA_TYPE(x)	(((x) >> 15) & 7)   /* Type */
   8.448 +#define  PCI_PWR_DATA_RAIL(x)	(((x) >> 18) & 7)   /* Power Rail */
   8.449 +#define PCI_PWR_CAP		12	/* Capability */
   8.450 +#define  PCI_PWR_CAP_BUDGET(x)	((x) & 1)	/* Included in system budget */
   8.451 +
   8.452 +#endif /* LINUX_PCI_REGS_H */
     9.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     9.2 +++ b/xen/arch/x86/hvm/vmx/vtd/utils.c	Fri Sep 14 16:40:49 2007 +0100
     9.3 @@ -0,0 +1,302 @@
     9.4 +/*
     9.5 + * Copyright (c) 2006, Intel Corporation.
     9.6 + *
     9.7 + * This program is free software; you can redistribute it and/or modify it
     9.8 + * under the terms and conditions of the GNU General Public License,
     9.9 + * version 2, as published by the Free Software Foundation.
    9.10 + *
    9.11 + * This program is distributed in the hope it will be useful, but WITHOUT
    9.12 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    9.13 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
    9.14 + * more details.
    9.15 + *
    9.16 + * You should have received a copy of the GNU General Public License along with
    9.17 + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
    9.18 + * Place - Suite 330, Boston, MA 02111-1307 USA.
    9.19 + *
    9.20 + * Copyright (C) Allen Kay <allen.m.kay@intel.com>
    9.21 + */
    9.22 +
    9.23 +#include <xen/init.h>
    9.24 +#include <xen/bitmap.h>
    9.25 +#include <xen/irq.h>
    9.26 +#include <xen/spinlock.h>
    9.27 +#include <xen/sched.h>
    9.28 +#include <asm/delay.h>
    9.29 +#include <asm/iommu.h>
    9.30 +#include <asm/hvm/vmx/intel-iommu.h>
    9.31 +#include "dmar.h"
    9.32 +#include "pci-direct.h"
    9.33 +#include "pci_regs.h"
    9.34 +#include "msi.h"
    9.35 +
    9.36 +#include <xen/mm.h>
    9.37 +#include <xen/xmalloc.h>
    9.38 +
    9.39 +#if defined(__x86_64__)
    9.40 +void print_iommu_regs(struct acpi_drhd_unit *drhd)
    9.41 +{
    9.42 +    struct iommu *iommu = drhd->iommu;
    9.43 + 
    9.44 +    printk("---- print_iommu_regs ----\n"); 
    9.45 +    printk("print_iommu_regs: drhd->address = %lx\n", drhd->address);
    9.46 +    printk("print_iommu_regs: DMAR_VER_REG = %x\n",
    9.47 +                   dmar_readl(iommu->reg,DMAR_VER_REG));
    9.48 +    printk("print_iommu_regs: DMAR_CAP_REG = %lx\n",
    9.49 +                   dmar_readq(iommu->reg,DMAR_CAP_REG));
    9.50 +    printk("print_iommu_regs: n_fault_reg = %lx\n",
    9.51 +                   cap_num_fault_regs(dmar_readq(iommu->reg, DMAR_CAP_REG)));
    9.52 +    printk("print_iommu_regs: fault_recording_offset_l = %lx\n",
    9.53 +                   cap_fault_reg_offset(dmar_readq(iommu->reg, DMAR_CAP_REG)));
    9.54 +    printk("print_iommu_regs: fault_recording_offset_h = %lx\n",
    9.55 +                   cap_fault_reg_offset(dmar_readq(iommu->reg, DMAR_CAP_REG)) + 8);
    9.56 +    printk("print_iommu_regs: fault_recording_reg_l = %lx\n",
    9.57 +        dmar_readq(iommu->reg, cap_fault_reg_offset(dmar_readq(iommu->reg, DMAR_CAP_REG))));
    9.58 +    printk("print_iommu_regs: fault_recording_reg_h = %lx\n",
    9.59 +        dmar_readq(iommu->reg, cap_fault_reg_offset(dmar_readq(iommu->reg, DMAR_CAP_REG)) + 8));
    9.60 +    printk("print_iommu_regs: DMAR_ECAP_REG = %lx\n",
    9.61 +                   dmar_readq(iommu->reg,DMAR_ECAP_REG));
    9.62 +    printk("print_iommu_regs: DMAR_GCMD_REG = %x\n",
    9.63 +                   dmar_readl(iommu->reg,DMAR_GCMD_REG));
    9.64 +    printk("print_iommu_regs: DMAR_GSTS_REG = %x\n",
    9.65 +                   dmar_readl(iommu->reg,DMAR_GSTS_REG));
    9.66 +    printk("print_iommu_regs: DMAR_RTADDR_REG = %lx\n",
    9.67 +                   dmar_readq(iommu->reg,DMAR_RTADDR_REG));
    9.68 +    printk("print_iommu_regs: DMAR_CCMD_REG = %lx\n",
    9.69 +                   dmar_readq(iommu->reg,DMAR_CCMD_REG));
    9.70 +    printk("print_iommu_regs: DMAR_FSTS_REG = %x\n",
    9.71 +                   dmar_readl(iommu->reg,DMAR_FSTS_REG));
    9.72 +    printk("print_iommu_regs: DMAR_FECTL_REG = %x\n",
    9.73 +                   dmar_readl(iommu->reg,DMAR_FECTL_REG));
    9.74 +    printk("print_iommu_regs: DMAR_FEDATA_REG = %x\n",
    9.75 +                   dmar_readl(iommu->reg,DMAR_FEDATA_REG));
    9.76 +    printk("print_iommu_regs: DMAR_FEADDR_REG = %x\n",
    9.77 +                   dmar_readl(iommu->reg,DMAR_FEADDR_REG));
    9.78 +    printk("print_iommu_regs: DMAR_FEUADDR_REG = %x\n",
    9.79 +                   dmar_readl(iommu->reg,DMAR_FEUADDR_REG));
    9.80 +}
    9.81 +
    9.82 +void print_vtd_entries(struct domain *d, int bus, int devfn,
    9.83 +                       unsigned long gmfn)
    9.84 +{
    9.85 +    struct hvm_iommu *hd = domain_hvm_iommu(d);
    9.86 +    struct acpi_drhd_unit *drhd;
    9.87 +    struct iommu *iommu;
    9.88 +    struct context_entry *ctxt_entry;
    9.89 +    struct root_entry *root_entry;
    9.90 +    u64 *l3, *l2, *l1;
    9.91 +    u32 l3_index, l2_index, l1_index;
    9.92 +    u32 i = 0;
    9.93 +
    9.94 +    printk("print_vtd_entries: domain_id = %x bdf = %x:%x:%x devfn = %x, gmfn = %lx\n", d->domain_id, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), devfn, gmfn);
    9.95 +
    9.96 +    for_each_drhd_unit(drhd) {
    9.97 +        printk("---- print_vtd_entries %d ----\n", i++);
    9.98 +
    9.99 +        if (hd->pgd == NULL) {
   9.100 +            printk("    hg->pgd == NULL\n");
   9.101 +            return;
   9.102 +        }
   9.103 +
   9.104 +        iommu = drhd->iommu;
   9.105 +        root_entry = iommu->root_entry;
   9.106 +        printk("    hd->pgd = %p virt_to_maddr(hd->pgd) = %lx\n",
   9.107 +               hd->pgd, virt_to_maddr(hd->pgd));
   9.108 +
   9.109 +        printk("    root_entry = %p\n", root_entry);
   9.110 +        if (root_entry == NULL) {
   9.111 +            printk("    root_entry == NULL\n");
   9.112 +            return;
   9.113 +        }
   9.114 +
   9.115 +        printk("    root_entry[%x] = %lx\n", bus, root_entry[bus].val);
   9.116 +        printk("    maddr_to_virt(root_entry[%x]) = %p\n",
   9.117 +            bus, maddr_to_virt(root_entry[bus].val));
   9.118 +
   9.119 +        if (root_entry[bus].val == 0) {
   9.120 +            printk("    root_entry[%x].lo == 0\n", bus);
   9.121 +            return;
   9.122 +        }
   9.123 + 
   9.124 +        ctxt_entry = maddr_to_virt((root_entry[bus].val >> PAGE_SHIFT) << PAGE_SHIFT);
   9.125 +        if (ctxt_entry == NULL) {
   9.126 +            printk("    ctxt_entry == NULL\n");
   9.127 +            return;
   9.128 +        }
   9.129 +
   9.130 +        if (ctxt_entry[devfn].lo == 0) {
   9.131 +            printk("    ctxt_entry[%x].lo == 0\n", devfn);
   9.132 +            return;
   9.133 +        }
   9.134 +
   9.135 +        printk("    context = %p\n", ctxt_entry);
   9.136 +        printk("    context[%x] = %lx %lx\n",
   9.137 +               devfn, ctxt_entry[devfn].hi, ctxt_entry[devfn].lo);
   9.138 +        printk("    maddr_to_virt(context[%x].lo) = %p\n",
   9.139 +               devfn, maddr_to_virt(ctxt_entry[devfn].lo));
   9.140 +        printk("    context[%x] = %lx\n", devfn, ctxt_entry[devfn].lo); 
   9.141 +
   9.142 +        l3 = maddr_to_virt(ctxt_entry[devfn].lo);
   9.143 +        l3 = (u64*)(((u64) l3 >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K);
   9.144 +        printk("    l3 = %p\n", l3); 
   9.145 +        if (l3 == NULL) return;
   9.146 +
   9.147 +        l3_index = (gmfn >> 9 >> 9) & 0x1ff;
   9.148 +        printk("    l3_index = %x\n", l3_index);
   9.149 +        printk("    l3[%x] = %lx\n", l3_index, l3[l3_index]);
   9.150 +
   9.151 +        l2 = maddr_to_virt(l3[l3_index]);
   9.152 +        l2 = (u64*)(((u64) l2 >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K);
   9.153 +        printk("    l2 = %p\n", l2); 
   9.154 +        if (l2 == NULL) return;
   9.155 +
   9.156 +        l2_index = (gmfn >> 9) & 0x1ff;
   9.157 +        printk("    gmfn = %lx\n", gmfn);
   9.158 +        printk("    gmfn >> 9= %lx\n", gmfn >> 9);
   9.159 +        printk("    l2_index = %x\n", l2_index);
   9.160 +        printk("    l2[%x] = %lx\n", l2_index, l2[l2_index]);
   9.161 +
   9.162 +        l1 = maddr_to_virt(l2[l2_index]);
   9.163 +        l1 = (u64*)(((u64) l1 >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K);
   9.164 +        if (l1 == NULL) return;
   9.165 +        l1_index = gmfn & 0x1ff;
   9.166 +        printk("    l1 = %p\n", l1); 
   9.167 +        printk("    l1_index = %x\n", l1_index);
   9.168 +        printk("    l1[%x] = %lx\n", l1_index, l1[l1_index]); 
   9.169 +    }
   9.170 +}
   9.171 +
   9.172 +#else    // !m64
   9.173 +
   9.174 +void print_iommu_regs(struct acpi_drhd_unit *drhd)
   9.175 +{
   9.176 +    struct iommu *iommu = drhd->iommu;
   9.177 + 
   9.178 +    printk("---- print_iommu_regs ----\n"); 
   9.179 +    printk("print_iommu_regs: drhd->address = %lx\n", drhd->address);
   9.180 +    printk("print_iommu_regs: DMAR_VER_REG = %x\n",
   9.181 +                   dmar_readl(iommu->reg,DMAR_VER_REG));
   9.182 +    printk("print_iommu_regs: DMAR_CAP_REG = %llx\n",
   9.183 +                   dmar_readq(iommu->reg,DMAR_CAP_REG));
   9.184 +    printk("print_iommu_regs: n_fault_reg = %llx\n",
   9.185 +                   cap_num_fault_regs(dmar_readq(iommu->reg, DMAR_CAP_REG)));
   9.186 +    printk("print_iommu_regs: fault_recording_offset_l = %llx\n",
   9.187 +                   cap_fault_reg_offset(dmar_readq(iommu->reg, DMAR_CAP_REG)));
   9.188 +    printk("print_iommu_regs: fault_recording_offset_h = %llx\n",
   9.189 +                   cap_fault_reg_offset(dmar_readq(iommu->reg, DMAR_CAP_REG)) + 8);
   9.190 +    printk("print_iommu_regs: fault_recording_reg_l = %llx\n",
   9.191 +        dmar_readq(iommu->reg, cap_fault_reg_offset(dmar_readq(iommu->reg, DMAR_CAP_REG))));
   9.192 +    printk("print_iommu_regs: fault_recording_reg_h = %llx\n",
   9.193 +        dmar_readq(iommu->reg, cap_fault_reg_offset(dmar_readq(iommu->reg, DMAR_CAP_REG)) + 8));
   9.194 +    printk("print_iommu_regs: DMAR_ECAP_REG = %llx\n",
   9.195 +                   dmar_readq(iommu->reg,DMAR_ECAP_REG));
   9.196 +    printk("print_iommu_regs: DMAR_GCMD_REG = %x\n",
   9.197 +                   dmar_readl(iommu->reg,DMAR_GCMD_REG));
   9.198 +    printk("print_iommu_regs: DMAR_GSTS_REG = %x\n",
   9.199 +                   dmar_readl(iommu->reg,DMAR_GSTS_REG));
   9.200 +    printk("print_iommu_regs: DMAR_RTADDR_REG = %llx\n",
   9.201 +                   dmar_readq(iommu->reg,DMAR_RTADDR_REG));
   9.202 +    printk("print_iommu_regs: DMAR_CCMD_REG = %llx\n",
   9.203 +                   dmar_readq(iommu->reg,DMAR_CCMD_REG));
   9.204 +    printk("print_iommu_regs: DMAR_FSTS_REG = %x\n",
   9.205 +                   dmar_readl(iommu->reg,DMAR_FSTS_REG));
   9.206 +    printk("print_iommu_regs: DMAR_FECTL_REG = %x\n",
   9.207 +                   dmar_readl(iommu->reg,DMAR_FECTL_REG));
   9.208 +    printk("print_iommu_regs: DMAR_FEDATA_REG = %x\n",
   9.209 +                   dmar_readl(iommu->reg,DMAR_FEDATA_REG));
   9.210 +    printk("print_iommu_regs: DMAR_FEADDR_REG = %x\n",
   9.211 +                   dmar_readl(iommu->reg,DMAR_FEADDR_REG));
   9.212 +    printk("print_iommu_regs: DMAR_FEUADDR_REG = %x\n",
   9.213 +                   dmar_readl(iommu->reg,DMAR_FEUADDR_REG));
   9.214 +}
   9.215 +
   9.216 +void print_vtd_entries(struct domain *d, int bus, int devfn,
   9.217 +                       unsigned long gmfn)
   9.218 +{
   9.219 +    struct hvm_iommu *hd = domain_hvm_iommu(d);
   9.220 +    struct acpi_drhd_unit *drhd;
   9.221 +    struct iommu *iommu;
   9.222 +    struct context_entry *ctxt_entry;
   9.223 +    struct root_entry *root_entry;
   9.224 +    u64 *l3, *l2, *l1;
   9.225 +    u32 l3_index, l2_index, l1_index;
   9.226 +    u32 i = 0;
   9.227 +
   9.228 +    printk("print_vtd_entries: domain_id = %x bdf = %x:%x:%x devfn = %x, gmfn = %lx\n", d->domain_id, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), devfn, gmfn);
   9.229 +
   9.230 +    for_each_drhd_unit(drhd) {
   9.231 +        printk("---- print_vtd_entries %d ----\n", i++);
   9.232 +
   9.233 +        if (hd->pgd == NULL) {
   9.234 +            printk("    hg->pgd == NULL\n");
   9.235 +            return;
   9.236 +        }
   9.237 +
   9.238 +        iommu = drhd->iommu;
   9.239 +        root_entry = iommu->root_entry;
   9.240 +        printk("    d->pgd = %p virt_to_maddr(hd->pgd) = %lx\n",
   9.241 +               hd->pgd, virt_to_maddr(hd->pgd));
   9.242 +
   9.243 +        printk("    root_entry = %p\n", root_entry);
   9.244 +        if (root_entry == NULL) {
   9.245 +            printk("    root_entry == NULL\n");
   9.246 +            return;
   9.247 +        }
   9.248 +
   9.249 +        printk("    root_entry[%x] = %llx\n", bus, root_entry[bus].val);
   9.250 +        printk("    maddr_to_virt(root_entry[%x]) = %p\n",
   9.251 +            bus, maddr_to_virt(root_entry[bus].val));
   9.252 +
   9.253 +        if (root_entry[bus].val == 0) {
   9.254 +            printk("    root_entry[%x].lo == 0\n", bus);
   9.255 +            return;
   9.256 +        }
   9.257 + 
   9.258 +        ctxt_entry = maddr_to_virt((root_entry[bus].val >> PAGE_SHIFT) << PAGE_SHIFT);
   9.259 +        if (ctxt_entry == NULL) {
   9.260 +            printk("    ctxt_entry == NULL\n");
   9.261 +            return;
   9.262 +        }
   9.263 +
   9.264 +        if (ctxt_entry[devfn].lo == 0) {
   9.265 +            printk("    ctxt_entry[%x].lo == 0\n", devfn);
   9.266 +            return;
   9.267 +        }
   9.268 +
   9.269 +        printk("    context = %p\n", ctxt_entry);
   9.270 +        printk("    context[%x] = %llx %llx\n",
   9.271 +               devfn, ctxt_entry[devfn].hi, ctxt_entry[devfn].lo);
   9.272 +        printk("    maddr_to_virt(context[%x].lo) = %p\n",
   9.273 +               devfn, maddr_to_virt(ctxt_entry[devfn].lo));
   9.274 +        printk("    context[%x] = %llx\n", devfn, ctxt_entry[devfn].lo); 
   9.275 +
   9.276 +        l3 = maddr_to_virt(ctxt_entry[devfn].lo);
   9.277 +        l3 = (u64*)(((u32) l3 >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K);
   9.278 +        printk("    l3 = %p\n", l3); 
   9.279 +        if (l3 == NULL) return;
   9.280 +
   9.281 +        l3_index = (gmfn >> 9 >> 9) & 0x1ff;
   9.282 +        printk("    l3_index = %x\n", l3_index);
   9.283 +        printk("    l3[%x] = %llx\n", l3_index, l3[l3_index]);
   9.284 +
   9.285 +        l2 = maddr_to_virt(l3[l3_index]);
   9.286 +        l2 = (u64*)(((u32) l2 >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K);
   9.287 +        printk("    l2 = %p\n", l2); 
   9.288 +        if (l2 == NULL) return;
   9.289 +
   9.290 +        l2_index = (gmfn >> 9) & 0x1ff;
   9.291 +        printk("    gmfn = %lx\n", gmfn);
   9.292 +        printk("    gmfn >> 9= %lx\n", gmfn >> 9);
   9.293 +        printk("    l2_index = %x\n", l2_index);
   9.294 +        printk("    l2[%x] = %llx\n", l2_index, l2[l2_index]);
   9.295 +
   9.296 +        l1 = maddr_to_virt(l2[l2_index]);
   9.297 +        l1 = (u64*)(((u32) l1 >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K);
   9.298 +        if (l1 == NULL) return;
   9.299 +        l1_index = gmfn & 0x1ff;
   9.300 +        printk("    l1 = %p\n", l1); 
   9.301 +        printk("    l1_index = %x\n", l1_index);
   9.302 +        printk("    l1[%x] = %llx\n", l1_index, l1[l1_index]); 
   9.303 +    }
   9.304 +}
   9.305 +#endif    // !m64