direct-io.hg

changeset 15523:d7e3224b661a

vtd patch
author root@lweybridge0-64-fc6.sc.intel.com
date Fri Jul 27 12:59:37 2007 -0700 (2007-07-27)
parents 3de536b5e5fe
children 87707c785bba
files tools/ioemu/Makefile.target tools/ioemu/hw/dpci.c tools/ioemu/hw/pc.c tools/ioemu/hw/pci.c tools/ioemu/vl.c tools/ioemu/vl.h tools/libxc/xc_domain.c tools/libxc/xenctrl.h tools/python/xen/xend/XendConfig.py tools/python/xen/xend/image.py tools/python/xen/xm/create.py xen/arch/x86/acpi/boot.c xen/arch/x86/domain.c xen/arch/x86/domctl.c xen/arch/x86/hvm/intercept.c xen/arch/x86/hvm/io.c xen/arch/x86/hvm/vioapic.c xen/arch/x86/hvm/vmx/Makefile xen/arch/x86/hvm/vmx/intr.c xen/arch/x86/hvm/vmx/vmx.c xen/arch/x86/hvm/vmx/vtd/Makefile xen/arch/x86/hvm/vmx/vtd/dmar.c xen/arch/x86/hvm/vmx/vtd/dmar.h xen/arch/x86/hvm/vmx/vtd/intel-iommu.c xen/arch/x86/hvm/vmx/vtd/msi.h xen/arch/x86/hvm/vmx/vtd/pci-direct.h xen/arch/x86/hvm/vmx/vtd/pci_regs.h xen/arch/x86/hvm/vmx/vtd/utils.c xen/arch/x86/hvm/vpic.c xen/arch/x86/i8259.c xen/arch/x86/io_apic.c xen/arch/x86/irq.c xen/arch/x86/mm.c xen/arch/x86/mm/p2m.c xen/arch/x86/mm/shadow/multi.c xen/arch/x86/mm/shadow/private.h xen/arch/x86/mpparse.c xen/arch/x86/setup.c xen/common/grant_table.c xen/common/page_alloc.c xen/drivers/acpi/tables.c xen/include/asm-x86/acpi.h xen/include/asm-x86/fixmap.h xen/include/asm-x86/hvm/domain.h xen/include/asm-x86/hvm/io.h xen/include/asm-x86/hvm/iommu.h xen/include/asm-x86/hvm/irq.h xen/include/asm-x86/hvm/vmx/intel-iommu.h xen/include/asm-x86/iommu.h xen/include/asm-x86/mpspec_def.h xen/include/asm-x86/p2m.h xen/include/asm-x86/system.h xen/include/public/domctl.h xen/include/xen/acpi.h xen/include/xen/config.h xen/include/xen/irq.h
line diff
     1.1 --- a/tools/ioemu/Makefile.target	Fri Jul 27 13:47:03 2007 +0100
     1.2 +++ b/tools/ioemu/Makefile.target	Fri Jul 27 12:59:37 2007 -0700
     1.3 @@ -198,6 +198,7 @@ LIBS+=-lm
     1.4  LIBS+=-L../../libxc -lxenctrl -lxenguest
     1.5  LIBS+=-L../../xenstore -lxenstore
     1.6  LIBS+=-lpthread
     1.7 +LIBS+=-lpci
     1.8  ifndef CONFIG_USER_ONLY
     1.9  LIBS+=-lz
    1.10  endif
    1.11 @@ -401,6 +402,7 @@ VL_OBJS+= piix4acpi.o
    1.12  VL_OBJS+= xenstore.o
    1.13  VL_OBJS+= xen_platform.o
    1.14  VL_OBJS+= tpm_tis.o
    1.15 +VL_OBJS+= dpci.o
    1.16  CPPFLAGS += -DHAS_AUDIO
    1.17  endif
    1.18  ifeq ($(TARGET_BASE_ARCH), ppc)
     2.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.2 +++ b/tools/ioemu/hw/dpci.c	Fri Jul 27 12:59:37 2007 -0700
     2.3 @@ -0,0 +1,565 @@
     2.4 +/*
     2.5 + * Allen Kay <allen.m.kay@intel.com>
     2.6 + *
     2.7 + * This file implements direct PCI assignment to a HVM guest domain.
     2.8 + *
     2.9 + * Detailed Description:
    2.10 + *
    2.11 + * In general, we do not allow the guest OS to write directly to the
    2.12 + * assigned device's PCI config space.  The reason is that we do not
    2.13 + * want guest OS to change the physical BAR address either during sizing
    2.14 + * of the BAR or actual reassignment of BAR addresses.  At one time,
    2.15 + * we have allowed such direct access and found it caused system hangs.
    2.16 + *
    2.17 + * So the solution we have implemented is to first construct a virtual
    2.18 + * PCI device using real PCI config data of the assigned device.  Once
    2.19 + * this is constructed, it acts as any other existing QEMU virtual PCI
    2.20 + * devices.  The difference is in dpci_write_config() function, writes
    2.21 + * to the command register is passed on to the hardware while other
    2.22 + * fields are emulated - just like other virtual PCI devices.  In addition,
    2.23 + * we have also implemented code to handle guest reassignment of BAR
    2.24 + * addresses using dpci_memory_mapping and dpci_ioport_mapping hypercalls.
    2.25 + */
    2.26 +
    2.27 +#include "vl.h"
    2.28 +#include "pci/header.h"
    2.29 +#include "pci/pci.h"
    2.30 +
    2.31 +#define DEBUG_DIRECT_PCI 1
    2.32 +
    2.33 +#define PCI_VENDOR_ID        0x00    /* 16 bits */
    2.34 +#define PCI_DEVICE_ID        0x02    /* 16 bits */
    2.35 +#define PCI_COMMAND        0x04    /* 16 bits */
    2.36 +#define PCI_COMMAND_IO        0x1    /* Enable response in I/O space */
    2.37 +#define PCI_COMMAND_MEMORY    0x2    /* Enable response in Memory space */
    2.38 +#define PCI_CLASS_DEVICE        0x0a    /* Device class */
    2.39 +#define PCI_INTERRUPT_LINE    0x3c    /* 8 bits */
    2.40 +#define PCI_INTERRUPT_PIN    0x3d    /* 8 bits */
    2.41 +#define PCI_MIN_GNT        0x3e    /* 8 bits */
    2.42 +#define PCI_MAX_LAT        0x3f    /* 8 bits */
    2.43 +
    2.44 +/* just used for simpler irq handling. */
    2.45 +#define PCI_DEVICES_MAX 64
    2.46 +#define PCI_IRQ_WORDS   ((PCI_DEVICES_MAX + 31) / 32)
    2.47 +
    2.48 +extern FILE *logfile;
    2.49 +
    2.50 +static void dpci_update_mappings(PCIDevice *d)
    2.51 +{
    2.52 +    PCIIORegion *r;
    2.53 +    int cmd, i;
    2.54 +    uint32_t last_addr, new_addr, config_ofs;
    2.55 +    
    2.56 +    cmd = le16_to_cpu(*(uint16_t *)(d->config + PCI_COMMAND));
    2.57 +    for(i = 0; i < PCI_NUM_REGIONS; i++) {
    2.58 +        r = &d->io_regions[i];
    2.59 +        if (i == PCI_ROM_SLOT)
    2.60 +            config_ofs = 0x30;
    2.61 +        else
    2.62 +            config_ofs = 0x10 + i * 4;
    2.63 +        if (r->size != 0) {
    2.64 +            if (r->type & PCI_ADDRESS_SPACE_IO) {
    2.65 +                if (cmd & PCI_COMMAND_IO) {
    2.66 +
    2.67 +                    new_addr = le32_to_cpu(
    2.68 +                        *(uint32_t *)(d->config + config_ofs));
    2.69 +
    2.70 +                    new_addr = new_addr & ~(r->size - 1);
    2.71 +                    last_addr = new_addr + r->size - 1;
    2.72 +
    2.73 +                    /* NOTE: we have only 64K ioports on PC */
    2.74 +                    if (last_addr <= new_addr || new_addr == 0 ||
    2.75 +                        last_addr >= 0x10000) {
    2.76 +                        new_addr = -1;
    2.77 +                    }
    2.78 +                } else {
    2.79 +                    new_addr = -1;
    2.80 +                }
    2.81 +            } else {
    2.82 +                if (cmd & PCI_COMMAND_MEMORY) {
    2.83 +                    new_addr = le32_to_cpu(*(uint32_t *)(d->config + 
    2.84 +                                                         config_ofs));
    2.85 +                    /* the ROM slot has a specific enable bit */
    2.86 +                    if (i == PCI_ROM_SLOT && !(new_addr & 1))
    2.87 +                        goto no_mem_map;
    2.88 +                    new_addr = new_addr & ~(r->size - 1);
    2.89 +                    last_addr = new_addr + r->size - 1;
    2.90 +
    2.91 +                    /* NOTE: we do not support wrapping */
    2.92 +                    /* XXX: as we cannot support really dynamic
    2.93 +                       mappings, we handle specific values as invalid
    2.94 +                       mappings. */
    2.95 +                    if (last_addr <= new_addr || new_addr == 0 ||
    2.96 +                        last_addr == -1) {
    2.97 +                        new_addr = -1;
    2.98 +                    }
    2.99 +                } else {
   2.100 +no_mem_map:         new_addr = -1;
   2.101 +                }
   2.102 +            }
   2.103 +            /* now do the real mapping */
   2.104 +            if (new_addr != r->addr) {
   2.105 +                if (r->addr != -1) {
   2.106 +                    if (r->type & PCI_ADDRESS_SPACE_IO) {
   2.107 +                        int class;
   2.108 +                        /* NOTE: specific hack for IDE in PC case:
   2.109 +                           only one byte must be mapped. */
   2.110 +                        class = d->config[0x0a] | (d->config[0x0b] << 8);
   2.111 +                        if (class == 0x0101 && r->size == 4)
   2.112 +                            isa_unassign_ioport(r->addr + 2, 1);
   2.113 +                        else
   2.114 +                            isa_unassign_ioport(r->addr, r->size);
   2.115 +                    } else
   2.116 +                        cpu_register_physical_memory(r->addr + pci_mem_base, 
   2.117 +                                                     r->size, 
   2.118 +                                                     IO_MEM_UNASSIGNED);
   2.119 +                }
   2.120 +                r->addr = new_addr;
   2.121 +                if (r->addr != -1) {
   2.122 +                    r->map_func(d, i, r->addr, r->size, r->type);
   2.123 +                }
   2.124 +            }
   2.125 +        }
   2.126 +    }
   2.127 +}
   2.128 +
   2.129 +uint32_t dpci_read_config(
   2.130 +    PCIDevice *d, 
   2.131 +    uint32_t address, int len)
   2.132 +{
   2.133 +    uint32_t val;
   2.134 +
   2.135 +    switch(len) {
   2.136 +    case 1:
   2.137 +        val = d->config[address];
   2.138 +        break;
   2.139 +    case 2:
   2.140 +        val = le16_to_cpu(*(uint16_t *)(d->config + address));
   2.141 +        break;
   2.142 +    default:
   2.143 +    case 4:
   2.144 +        val = le32_to_cpu(*(uint32_t *)(d->config + address));
   2.145 +        break;
   2.146 +    }
   2.147 +#ifdef DEBUG_DIRECT_PCI
   2.148 +    fprintf(logfile,
   2.149 +        "dpci_read_config: address = %x len = %x val = %x\n",
   2.150 +        address, len, val);
   2.151 +#endif
   2.152 +    return val;
   2.153 +}
   2.154 +
   2.155 +
   2.156 +void dpci_write_config(
   2.157 +    PCIDevice *d, uint32_t address,
   2.158 +    uint32_t val, int len)
   2.159 +{
   2.160 +    int can_write, i, ret = 0;
   2.161 +    uint32_t end, addr;
   2.162 +    uint32_t request_val = val;
   2.163 +    uint32_t old_bar;
   2.164 +
   2.165 +    if ((address == 0x4) && (len == 2))
   2.166 +        pci_write_word(d->pci_dev, address, val);
   2.167 +
   2.168 +    if (len == 4 && ((address >= 0x10 && address < 0x10 + 4 * 6) || 
   2.169 +                     (address >= 0x30 && address < 0x34)))
   2.170 +    {
   2.171 +        PCIIORegion *r;
   2.172 +        int reg;
   2.173 +
   2.174 +        if ( address >= 0x30 )
   2.175 +            reg = PCI_ROM_SLOT;
   2.176 +        else
   2.177 +            reg = (address - 0x10) >> 2;
   2.178 +
   2.179 +        r = &d->io_regions[reg];
   2.180 +        if (r->size == 0)
   2.181 +            goto default_config;
   2.182 +
   2.183 +        /* compute the stored value */
   2.184 +        if (reg == PCI_ROM_SLOT) {
   2.185 +            /* keep ROM enable bit */
   2.186 +            val &= (~(r->size - 1)) | 1;
   2.187 +        } else {
   2.188 +            val &= ~(r->size - 1);
   2.189 +            val |= r->type;
   2.190 +        }
   2.191 +        old_bar = *(uint32_t *)(d->config + address);
   2.192 +        *(uint32_t *)(d->config + address) = cpu_to_le32(val);
   2.193 +
   2.194 +        /* dynamic pci bar */
   2.195 +        if ( (request_val == -1) && (d->old_bars[reg] == 0))
   2.196 +            d->old_bars[reg] = old_bar ;
   2.197 +        else if ((~val + 1) != r->size) {
   2.198 +            switch (r->type) {
   2.199 +                case PCI_ADDRESS_SPACE_MEM:
   2.200 +                case PCI_ADDRESS_SPACE_MEM_PREFETCH:
   2.201 +                    fprintf(logfile,
   2.202 +                        "xc_domain_memory_mapping: new = %x old = %x size = %x\n",
   2.203 +                        cpu_to_le32(val) >> 12,
   2.204 +                        d->old_bars[reg] >> 12,
   2.205 +                        r->size >> 12);
   2.206 +
   2.207 +                    ret = xc_domain_memory_mapping(xc_handle,
   2.208 +                        domid,
   2.209 +                        cpu_to_le32(val) >> 12,
   2.210 +                        d->old_bars[reg] >> 12,
   2.211 +                        r->size >> 12);
   2.212 +
   2.213 +                    if ( ret < 0 )
   2.214 +                        fprintf(logfile,
   2.215 +                            " xc_domain_memory_mapping error %d\n", ret);
   2.216 +                    break;
   2.217 +                case PCI_ADDRESS_SPACE_IO:
   2.218 +                    fprintf(logfile,
   2.219 +                        "xc_domain_ioport_mapping: val = %x r->size = %x \
   2.220 +                         d->old_bars[%x] = %x\n",
   2.221 +                        val, r->size, reg, d->old_bars[reg]);
   2.222 +
   2.223 +                    ret = xc_domain_ioport_mapping(xc_handle,
   2.224 +                        domid,
   2.225 +                        cpu_to_le32(val),
   2.226 +                        d->old_bars[reg],
   2.227 +                        r->size);
   2.228 +
   2.229 +                    if ( ret < 0 )
   2.230 +                        fprintf(logfile,
   2.231 +                            " xc_domain_ioport_mapping error %d\n", ret);
   2.232 +                    break;
   2.233 +                default:
   2.234 +                    fprintf(logfile,
   2.235 +                        "dpci_write_config: invalid r->type = %x\n", r->type);
   2.236 +            }
   2.237 +        }
   2.238 +        dpci_update_mappings(d);
   2.239 +        return;
   2.240 +    }
   2.241 + default_config:
   2.242 +    /* not efficient, but simple */
   2.243 +    addr = address;
   2.244 +    for(i = 0; i < len; i++) {
   2.245 +        /* default read/write accesses */
   2.246 +        switch(d->config[0x0e]) {
   2.247 +        case 0x00:
   2.248 +        case 0x80:
   2.249 +            switch(addr) {
   2.250 +            case 0x00:
   2.251 +            case 0x01:
   2.252 +            case 0x02:
   2.253 +            case 0x03:
   2.254 +            case 0x08:
   2.255 +            case 0x09:
   2.256 +            case 0x0a:
   2.257 +            case 0x0b:
   2.258 +            case 0x0e:
   2.259 +            case 0x10 ... 0x27: /* base */
   2.260 +            case 0x30 ... 0x33: /* rom */
   2.261 +            case 0x3d:
   2.262 +                can_write = 0;
   2.263 +                break;
   2.264 +            default:
   2.265 +                can_write = 1;
   2.266 +                break;
   2.267 +            }
   2.268 +            break;
   2.269 +        default:
   2.270 +        case 0x01:
   2.271 +            switch(addr) {
   2.272 +            case 0x00:
   2.273 +            case 0x01:
   2.274 +            case 0x02:
   2.275 +            case 0x03:
   2.276 +            case 0x08:
   2.277 +            case 0x09:
   2.278 +            case 0x0a:
   2.279 +            case 0x0b:
   2.280 +            case 0x0e:
   2.281 +            case 0x38 ... 0x3b: /* rom */
   2.282 +            case 0x3d:
   2.283 +                can_write = 0;
   2.284 +                break;
   2.285 +            default:
   2.286 +                can_write = 1;
   2.287 +                break;
   2.288 +            }
   2.289 +            break;
   2.290 +        }
   2.291 +        if (can_write) {
   2.292 +            d->config[addr] = val;
   2.293 +        }
   2.294 +        addr++;
   2.295 +        val >>= 8;
   2.296 +    }
   2.297 +    end = address + len;
   2.298 +
   2.299 +    /* if command register is modified, we must modify mappings */
   2.300 +    if (end > PCI_COMMAND && address < (PCI_COMMAND + 2)) {
   2.301 +        dpci_update_mappings(d);
   2.302 +    }
   2.303 +}
   2.304 +
   2.305 +void dpci_ioport_map(
   2.306 +    PCIDevice *pci_dev, int region_num, 
   2.307 +    uint32_t addr, uint32_t size, int type)
   2.308 +{
   2.309 +    fprintf(logfile, "dpci_ioport_map: entered\n");
   2.310 +}
   2.311 +
   2.312 +void dpci_mmio_map(
   2.313 +    PCIDevice *pci_dev, int region_num, 
   2.314 +    uint32_t addr, uint32_t size, int type)
   2.315 +{
   2.316 +    fprintf(logfile, "dpci_mmio_map: entered\n");
   2.317 +}
   2.318 +
   2.319 +static int pci_devs(const char *direct_pci)
   2.320 +{
   2.321 +    int count = 0;
   2.322 +    const char *c;
   2.323 +
   2.324 +    /* skip first "[" character */
   2.325 +    c = direct_pci + 1;
   2.326 +    while ((c = strchr(c, '[')) != NULL) {
   2.327 +        c++;
   2.328 +        count++;
   2.329 +    }
   2.330 +    return (count);
   2.331 +}
   2.332 +
   2.333 +static char *token;
   2.334 +int next_token(char *direct_pci)
   2.335 +{
   2.336 +    if (token == NULL)
   2.337 +        token = strtok(direct_pci, ",");
   2.338 +    else 
   2.339 +        token = strtok(NULL, ",");
   2.340 +    token = strchr(token, 'x');
   2.341 +    token = token + 1;
   2.342 +    return ((int) strtol(token, NULL, 16));
   2.343 +}
   2.344 +
   2.345 +static void next_bdf(
   2.346 +    char *direct_pci, int *seg,
   2.347 +    int *bus, int *dev, int *func)
   2.348 +{
   2.349 +    *seg  = next_token(direct_pci);
   2.350 +    *bus  = next_token(direct_pci);
   2.351 +    *dev  = next_token(direct_pci);
   2.352 +    *func = next_token(direct_pci);
   2.353 +}
   2.354 +
   2.355 +#define PCI_CONFIG_SIZE 0x3c
   2.356 +#define PCI_BAR_ENTRIES 0x6
   2.357 +#define DIRECT_PCI 1
   2.358 +struct pci_config_cf8 {
   2.359 +    union {
   2.360 +        unsigned int value;
   2.361 +        struct {
   2.362 +            unsigned int reserved1:2;
   2.363 +            unsigned int reg:6;
   2.364 +            unsigned int func:3;
   2.365 +            unsigned int dev:5;
   2.366 +            unsigned int bus:8;
   2.367 +            unsigned int reserved2:7;
   2.368 +            unsigned int enable:1;
   2.369 +        };
   2.370 +    };
   2.371 +};
   2.372 + 
   2.373 +static int find_cap_offset(
   2.374 +    struct pci_dev *pdev, int cap)
   2.375 +{
   2.376 +    int id;
   2.377 +    int max_cap = 48;
   2.378 +    int pos = PCI_CAPABILITY_LIST;
   2.379 +    int status;
   2.380 +
   2.381 +    status = pci_read_byte(pdev, PCI_STATUS);
   2.382 +    if ((status & PCI_STATUS_CAP_LIST) == 0)
   2.383 +        return 0;
   2.384 +
   2.385 +    while (max_cap--) {
   2.386 +        pos = pci_read_byte(pdev, pos);
   2.387 +        if (pos < 0x40)
   2.388 +            break;
   2.389 +        fprintf(logfile, "find_cap_offset: pos = %x\n", pos);
   2.390 +
   2.391 +        pos &= ~3;
   2.392 +        id = pci_read_byte(pdev, pos + PCI_CAP_LIST_ID);
   2.393 +        fprintf(logfile,
   2.394 +            "find_cap_offset: id = %x PCI_CAP_ID_EXP = %x\n",
   2.395 +            id, PCI_CAP_ID_EXP);
   2.396 +
   2.397 +        if (id == 0xff)
   2.398 +            break;
   2.399 +        if (id == cap)
   2.400 +            return pos;
   2.401 +
   2.402 +        pos += PCI_CAP_LIST_NEXT;
   2.403 +        fprintf(logfile,
   2.404 +            "find_cap_offset: pos = %x PCI_CAP_LIST_NEXT = %x\n",
   2.405 +            pos, PCI_CAP_LIST_NEXT);
   2.406 +    }
   2.407 +    return 0;
   2.408 +}
   2.409 +
   2.410 +#define PCI_EXP_DEVCAP_FLR    (1 << 28)
   2.411 +#define PCI_EXP_DEVCTL_FLR     0x1b
   2.412 +
   2.413 +void pdev_flr(struct pci_dev *pdev)
   2.414 +{
   2.415 +    int pos;
   2.416 +    int dev_cap;
   2.417 +    int dev_status;
   2.418 +
   2.419 +    pos = find_cap_offset(pdev, PCI_CAP_ID_EXP);
   2.420 +    fprintf(logfile, "pdev_flr: pos = %x\n", pos);
   2.421 +    if (pos) {
   2.422 +        dev_cap = pci_read_long(pdev, pos + PCI_EXP_DEVCAP);
   2.423 +        fprintf(logfile, "pdev_flr: dev_cap = %x\n", dev_cap);
   2.424 +        if (dev_cap & PCI_EXP_DEVCAP_FLR) {
   2.425 +            fprintf(logfile, "pdev_flr: writing %x to %x\n",
   2.426 +                    pos + PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_FLR);
   2.427 +            pci_write_word(pdev, pos + PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_FLR);
   2.428 +            do {
   2.429 +                dev_status = pci_read_long(pdev, pos + PCI_EXP_DEVSTA);
   2.430 +            } while (dev_status & PCI_EXP_DEVSTA_TRPND);
   2.431 +        }
   2.432 +    }
   2.433 +    fprintf(logfile, "pdev_flr: done\n");
   2.434 +}
   2.435 +
   2.436 +void pdev_read_pmcap(struct pci_dev *pdev, PCIDevice *d)
   2.437 +{
   2.438 +    int pos;
   2.439 +
   2.440 +    /* read power management capability */
   2.441 +    pos = find_cap_offset(pdev, PCI_CAP_ID_PM);
   2.442 +
   2.443 +    if (pos > 0) {
   2.444 +        d->config[PCI_CAPABILITY_LIST] = pos;
   2.445 +        d->config[pos] = pci_read_byte(pdev, pos);
   2.446 +        pos++;
   2.447 +
   2.448 +        /* set next cap pointer to 0 to terminate the link list */
   2.449 +        d->config[pos] = 0;
   2.450 +        pos++;
   2.451 +        d->config[pos] = pci_read_byte(pdev, pos);
   2.452 +        pos++;
   2.453 +        d->config[pos] = pci_read_byte(pdev, pos);
   2.454 +        pos++;
   2.455 +        d->config[pos] = pci_read_byte(pdev, pos);
   2.456 +        pos++;
   2.457 +        d->config[pos] = pci_read_byte(pdev, pos);
   2.458 +        pos++;
   2.459 +        d->config[pos] = pci_read_byte(pdev, pos);
   2.460 +        pos++;
   2.461 +        d->config[pos] = pci_read_byte(pdev, pos);
   2.462 +        pos++;
   2.463 +    }
   2.464 +}
   2.465 +
   2.466 +/*
   2.467 + * This function gets PCI config info of the assigned device
   2.468 + * and construct a virtual PCI device on the virtual PCI
   2.469 + * bus - similar to other virtual PCI devices such as
   2.470 + * PCnet.
   2.471 + */
   2.472 +void dpci_init(PCIBus *pci_bus, char *direct_pci)
   2.473 +{
   2.474 +    PCIDevice *d;
   2.475 +    int dev_count;
   2.476 +    int seg, bus, dev, func;
   2.477 +    struct pci_access *pci_access;
   2.478 +    struct pci_dev *pci_dev;
   2.479 +    struct pci_config_cf8 machine_bdf;
   2.480 +    int i, ret = 0;
   2.481 +    
   2.482 +    dev_count = pci_devs(direct_pci);
   2.483 +    pci_access = pci_alloc();
   2.484 +    if (pci_access == NULL)
   2.485 +        fprintf(logfile, "pci_access is NULL\n");
   2.486 +    pci_init(pci_access);
   2.487 +    pci_scan_bus(pci_access);
   2.488 +
   2.489 +    while (dev_count--) {
   2.490 +        next_bdf(direct_pci, &seg, &bus, &dev, &func);
   2.491 +        fprintf(logfile,
   2.492 +            "dpci_init:%d: seg = %x bus = %x dev = %x func = %x\n",
   2.493 +            dev_count, seg, bus, dev, func);
   2.494 +
   2.495 +        d = (PCIDevice *)pci_register_device(pci_bus, "DIRECT PCI",
   2.496 +            sizeof(PCIDevice), -1, dpci_read_config, dpci_write_config);
   2.497 +
   2.498 +        for (pci_dev = pci_access->devices; pci_dev != NULL;
   2.499 +            pci_dev = pci_dev->next)
   2.500 +        {
   2.501 +            if ((bus == pci_dev->bus) && (dev == pci_dev->dev)
   2.502 +                && (func == pci_dev->func))
   2.503 +                break;
   2.504 +        }
   2.505 +        d->pci_dev = pci_dev;
   2.506 +
   2.507 +        /* fill-in pci config info from physical device */
   2.508 +        for (i = 0; i < PCI_CONFIG_SIZE; i++)
   2.509 +            d->config[i] = pci_read_byte(pci_dev, i);
   2.510 +        d->config[0x3d] = 1;   // interrupt pin 0
   2.511 +
   2.512 +        /* read PM capability */
   2.513 +        pdev_read_pmcap(pci_dev, d);
   2.514 +
   2.515 +        /* issue PCIe FLR */
   2.516 +        pdev_flr(pci_dev);
   2.517 +
   2.518 +        /* tell XEN vmm to change iommu settings */
   2.519 +        machine_bdf.reg = 0;
   2.520 +        machine_bdf.bus = pci_dev->bus;
   2.521 +        machine_bdf.dev = pci_dev->dev;
   2.522 +        machine_bdf.func = pci_dev->func;
   2.523 +        ret = xc_assign_device(xc_handle, domid, machine_bdf.value);
   2.524 +        if ( ret < 0 )
   2.525 +            fprintf(logfile, " xc_domain_assign_device error %d\n", ret);
   2.526 +
   2.527 +        /* Should be fixed. Need to distiguish legacy or MSI capable devices 
   2.528 +         * Now we use pciback to hide the device, and the guest should not 
   2.529 +         * enable MSI capability
   2.530 +         */
   2.531 +        ret = xc_irq_mapping(xc_handle, domid, pci_dev->irq,
   2.532 +                       d->devfn >> 3, d->config[0x3d]-1);
   2.533 +        if ( ret < 0 )
   2.534 +            fprintf(logfile, " xc_domain_irq_mapping error %d\n", ret);
   2.535 +
   2.536 +#ifdef DEBUG_DIRECT_PCI
   2.537 +        for (i = 0x10; i < 0x34; i += 8) {
   2.538 +            fprintf(logfile, "%x %x %x %x %x %x %x %x\n", 
   2.539 +                    d->config[i+0], d->config[i+1], d->config[i+2], 
   2.540 +                    d->config[i+3], d->config[i+4], d->config[i+5], 
   2.541 +                    d->config[i+6], d->config[i+7]); 
   2.542 +        }
   2.543 +#endif
   2.544 +
   2.545 +        /*
   2.546 +         * Call pci_register_io_region() as it will initialize io_regions
   2.547 +         * field in PCIDevice structure.  These fields are later used in
   2.548 +         * dpci_write_config() for getting BAR sizes etc.
   2.549 +         */
   2.550 +        for (i = 0; i < PCI_BAR_ENTRIES; i++) {
   2.551 +            if (pci_dev->base_addr[i]) {
   2.552 +                int type = *((uint32_t*)(d->config + PCI_BASE_ADDRESS_0) + i);
   2.553 +                if (type & PCI_ADDRESS_SPACE_IO)
   2.554 +                    pci_register_io_region(
   2.555 +                        (PCIDevice *)d, i, pci_dev->size[i],
   2.556 +                        PCI_ADDRESS_SPACE_IO, dpci_ioport_map);
   2.557 +                else if (type & PCI_ADDRESS_SPACE_MEM_PREFETCH)
   2.558 +                    pci_register_io_region(
   2.559 +                        (PCIDevice *)d, i, pci_dev->size[i], 
   2.560 +                        PCI_ADDRESS_SPACE_MEM_PREFETCH, dpci_mmio_map);
   2.561 +                else
   2.562 +                    pci_register_io_region(
   2.563 +                        (PCIDevice *)d, i, pci_dev->size[i], 
   2.564 +                        PCI_ADDRESS_SPACE_MEM, dpci_mmio_map);
   2.565 +            }
   2.566 +        }
   2.567 +    }
   2.568 +}
     3.1 --- a/tools/ioemu/hw/pc.c	Fri Jul 27 13:47:03 2007 +0100
     3.2 +++ b/tools/ioemu/hw/pc.c	Fri Jul 27 12:59:37 2007 -0700
     3.3 @@ -465,7 +465,7 @@ static void pc_init1(uint64_t ram_size, 
     3.4                       DisplayState *ds, const char **fd_filename, int snapshot,
     3.5                       const char *kernel_filename, const char *kernel_cmdline,
     3.6                       const char *initrd_filename,
     3.7 -                     int pci_enabled)
     3.8 +                     int pci_enabled, const char *direct_pci)
     3.9  {
    3.10  #ifndef NOBIOS
    3.11      char buf[1024];
    3.12 @@ -751,6 +751,9 @@ static void pc_init1(uint64_t ram_size, 
    3.13          usb_uhci_init(pci_bus, piix3_devfn + (acpi_enabled ? 3 : 2));
    3.14      }
    3.15  
    3.16 +    if (pci_enabled && direct_pci)
    3.17 +        dpci_init(pci_bus, direct_pci);
    3.18 +    
    3.19  #ifndef CONFIG_DM
    3.20      if (pci_enabled && acpi_enabled) {
    3.21          uint8_t *eeprom_buf = qemu_mallocz(8 * 256); /* XXX: make this persistent */
    3.22 @@ -801,12 +804,14 @@ static void pc_init_pci(uint64_t ram_siz
    3.23                          int snapshot, 
    3.24                          const char *kernel_filename, 
    3.25                          const char *kernel_cmdline,
    3.26 -                        const char *initrd_filename)
    3.27 +                        const char *initrd_filename,
    3.28 +                        const char *direct_pci)
    3.29  {
    3.30      pc_init1(ram_size, vga_ram_size, boot_device,
    3.31               ds, fd_filename, snapshot,
    3.32               kernel_filename, kernel_cmdline,
    3.33 -             initrd_filename, 1);
    3.34 +             initrd_filename, 1,
    3.35 +             direct_pci);
    3.36  }
    3.37  
    3.38  static void pc_init_isa(uint64_t ram_size, int vga_ram_size, char *boot_device,
    3.39 @@ -814,12 +819,13 @@ static void pc_init_isa(uint64_t ram_siz
    3.40                          int snapshot, 
    3.41                          const char *kernel_filename, 
    3.42                          const char *kernel_cmdline,
    3.43 -                        const char *initrd_filename)
    3.44 +                        const char *initrd_filename,
    3.45 +                        const char *unused)
    3.46  {
    3.47      pc_init1(ram_size, vga_ram_size, boot_device,
    3.48               ds, fd_filename, snapshot,
    3.49               kernel_filename, kernel_cmdline,
    3.50 -             initrd_filename, 0);
    3.51 +             initrd_filename, 0, NULL);
    3.52  }
    3.53  
    3.54  QEMUMachine pc_machine = {
     4.1 --- a/tools/ioemu/hw/pci.c	Fri Jul 27 13:47:03 2007 +0100
     4.2 +++ b/tools/ioemu/hw/pci.c	Fri Jul 27 12:59:37 2007 -0700
     4.3 @@ -151,7 +151,9 @@ void pci_register_io_region(PCIDevice *p
     4.4      } else {
     4.5          addr = 0x10 + region_num * 4;
     4.6      }
     4.7 -    *(uint32_t *)(pci_dev->config + addr) = cpu_to_le32(type);
     4.8 +
     4.9 +    if ((map_func != dpci_ioport_map) && (map_func != dpci_mmio_map))
    4.10 +        *(uint32_t *)(pci_dev->config + addr) = cpu_to_le32(type);
    4.11  }
    4.12  
    4.13  target_phys_addr_t pci_to_cpu_addr(target_phys_addr_t addr)
     5.1 --- a/tools/ioemu/vl.c	Fri Jul 27 13:47:03 2007 +0100
     5.2 +++ b/tools/ioemu/vl.c	Fri Jul 27 12:59:37 2007 -0700
     5.3 @@ -6496,6 +6496,7 @@ enum {
     5.4      QEMU_OPTION_vcpus,
     5.5      QEMU_OPTION_acpi,
     5.6      QEMU_OPTION_vncviewer,
     5.7 +    QEMU_OPTION_pci,
     5.8      QEMU_OPTION_vncunused,
     5.9  };
    5.10  
    5.11 @@ -6594,6 +6595,7 @@ const QEMUOption qemu_options[] = {
    5.12      { "d", HAS_ARG, QEMU_OPTION_d },
    5.13      { "vcpus", 1, QEMU_OPTION_vcpus },
    5.14      { "acpi", 0, QEMU_OPTION_acpi },
    5.15 +    { "pci", HAS_ARG, QEMU_OPTION_pci },
    5.16      { NULL },
    5.17  };
    5.18  
    5.19 @@ -7059,6 +7061,7 @@ int main(int argc, char **argv)
    5.20  #endif
    5.21  
    5.22      char qemu_dm_logfilename[128];
    5.23 +    const char *direct_pci = NULL;
    5.24      
    5.25      /* Ensure that SIGUSR2 is blocked by default when a new thread is created,
    5.26         then only the threads that use the signal unblock it -- this fixes a
    5.27 @@ -7557,6 +7560,9 @@ int main(int argc, char **argv)
    5.28              case QEMU_OPTION_vncunused:
    5.29                  vncunused++;
    5.30                  break;
    5.31 +            case QEMU_OPTION_pci:
    5.32 +                direct_pci = optarg;
    5.33 +                break;
    5.34              }
    5.35          }
    5.36      }
    5.37 @@ -7923,7 +7929,8 @@ int main(int argc, char **argv)
    5.38  
    5.39      machine->init(ram_size, vga_ram_size, boot_device,
    5.40                    ds, fd_filename, snapshot,
    5.41 -                  kernel_filename, kernel_cmdline, initrd_filename);
    5.42 +                  kernel_filename, kernel_cmdline, initrd_filename,
    5.43 +                  direct_pci);
    5.44      free(boot_device);
    5.45  
    5.46      /* init USB devices */
     6.1 --- a/tools/ioemu/vl.h	Fri Jul 27 13:47:03 2007 +0100
     6.2 +++ b/tools/ioemu/vl.h	Fri Jul 27 12:59:37 2007 -0700
     6.3 @@ -727,7 +727,7 @@ typedef void QEMUMachineInitFunc(uint64_
     6.4                                   char *boot_device,
     6.5               DisplayState *ds, const char **fd_filename, int snapshot,
     6.6               const char *kernel_filename, const char *kernel_cmdline,
     6.7 -             const char *initrd_filename);
     6.8 +             const char *initrd_filename, const char *direct_pci);
     6.9  
    6.10  typedef struct QEMUMachine {
    6.11      const char *name;
    6.12 @@ -818,6 +818,10 @@ struct PCIDevice {
    6.13  
    6.14      /* Current IRQ levels.  Used internally by the generic PCI code.  */
    6.15      int irq_state[4];
    6.16 +
    6.17 +    /* fields used for direct pci device access */
    6.18 +    uint32_t old_bars[PCI_NUM_REGIONS];
    6.19 +    struct pci_dev *pci_dev;
    6.20  };
    6.21  
    6.22  PCIDevice *pci_register_device(PCIBus *bus, const char *name,
    6.23 @@ -1498,4 +1502,11 @@ void destroy_hvm_domain(void);
    6.24  /* VNC Authentication */
    6.25  #define AUTHCHALLENGESIZE 16
    6.26  
    6.27 +/* HVM guest pci pass through support */
    6.28 +void dpci_init(PCIBus *pci_bus, char *direct_pci);
    6.29 +void dpci_ioport_map(PCIDevice *pci_dev, int region_num, 
    6.30 +                     uint32_t addr, uint32_t size, int type);
    6.31 +void dpci_mmio_map(PCIDevice *pci_dev, int region_num, 
    6.32 +                   uint32_t addr, uint32_t size, int type);
    6.33 +
    6.34  #endif /* VL_H */
     7.1 --- a/tools/libxc/xc_domain.c	Fri Jul 27 13:47:03 2007 +0100
     7.2 +++ b/tools/libxc/xc_domain.c	Fri Jul 27 12:59:37 2007 -0700
     7.3 @@ -659,6 +659,70 @@ int xc_domain_send_trigger(int xc_handle
     7.4      return do_domctl(xc_handle, &domctl);
     7.5  }
     7.6  
     7.7 +int xc_assign_device(int xc_handle,
     7.8 +                     uint32_t domid,
     7.9 +                     uint32_t machine_bdf)
    7.10 +{
    7.11 +    DECLARE_DOMCTL;
    7.12 +
    7.13 +    domctl.cmd = XEN_DOMCTL_assign_device;
    7.14 +    domctl.domain = domid;
    7.15 +    domctl.u.assign_device.machine_bdf = machine_bdf;
    7.16 + 
    7.17 +    return do_domctl(xc_handle, &domctl);
    7.18 +}
    7.19 +
    7.20 +int xc_irq_mapping(int xc_handle,
    7.21 +                  uint32_t domid,
    7.22 +                  uint32_t machine_irq,
    7.23 +                  uint32_t device,
    7.24 +                  uint32_t intx)
    7.25 +{
    7.26 +    DECLARE_DOMCTL;
    7.27 +
    7.28 +    domctl.cmd = XEN_DOMCTL_irq_mapping;
    7.29 +    domctl.domain = domid;
    7.30 +    domctl.u.irq_mapping.machine_irq = machine_irq;
    7.31 +    domctl.u.irq_mapping.device = device;
    7.32 +    domctl.u.irq_mapping.intx = intx;
    7.33 + 
    7.34 +    return do_domctl(xc_handle, &domctl);
    7.35 +}
    7.36 +
    7.37 +int xc_domain_memory_mapping(int xc_handle,
    7.38 +                             uint32_t domid,
    7.39 +                             unsigned long first_gfn,
    7.40 +                             unsigned long first_mfn,
    7.41 +                             unsigned long nr_mfns)
    7.42 +{
    7.43 +    DECLARE_DOMCTL;
    7.44 +
    7.45 +    domctl.cmd = XEN_DOMCTL_memory_mapping;
    7.46 +    domctl.domain = domid;
    7.47 +    domctl.u.memory_mapping.first_gfn = first_gfn;
    7.48 +    domctl.u.memory_mapping.first_mfn = first_mfn;
    7.49 +    domctl.u.memory_mapping.nr_mfns = nr_mfns;
    7.50 +
    7.51 +    return do_domctl(xc_handle, &domctl);
    7.52 +}
    7.53 +
    7.54 +int xc_domain_ioport_mapping(int xc_handle,
    7.55 +                             uint32_t domid,
    7.56 +                             uint32_t first_gport,
    7.57 +                             uint32_t first_mport,
    7.58 +                             uint32_t nr_ports)
    7.59 +{
    7.60 +    DECLARE_DOMCTL;
    7.61 +
    7.62 +    domctl.cmd = XEN_DOMCTL_ioport_mapping;
    7.63 +    domctl.domain = domid;
    7.64 +    domctl.u.ioport_mapping.first_gport = first_gport;
    7.65 +    domctl.u.ioport_mapping.first_mport = first_mport;
    7.66 +    domctl.u.ioport_mapping.nr_ports = nr_ports;
    7.67 +
    7.68 +    return do_domctl(xc_handle, &domctl);
    7.69 +}
    7.70 +
    7.71  int xc_set_hvm_param(int handle, domid_t dom, int param, unsigned long value)
    7.72  {
    7.73      DECLARE_HYPERCALL;
     8.1 --- a/tools/libxc/xenctrl.h	Fri Jul 27 13:47:03 2007 +0100
     8.2 +++ b/tools/libxc/xenctrl.h	Fri Jul 27 12:59:37 2007 -0700
     8.3 @@ -859,4 +859,24 @@ int xc_ia64_save_to_nvram(int xc_handle,
     8.4  /* IA64 specific, nvram init */
     8.5  int xc_ia64_nvram_init(int xc_handle, char *dom_name, uint32_t dom);
     8.6  
     8.7 +/* HVM guest PCI pass through */
     8.8 +int xc_assign_device(int xc_handle,
     8.9 +                     uint32_t domid,
    8.10 +                     uint32_t machine_bdf);
    8.11 +int xc_irq_mapping(int xc_handle,
    8.12 +                   uint32_t domid,
    8.13 +                   uint32_t machine_irq,
    8.14 +                   uint32_t device,
    8.15 +                   uint32_t intx);
    8.16 +int xc_domain_memory_mapping(int xc_handle,
    8.17 +                             uint32_t domid,
    8.18 +                             unsigned long first_gfn,
    8.19 +                             unsigned long first_mfn,
    8.20 +                             unsigned long nr_mfns);
    8.21 +int xc_domain_ioport_mapping(int xc_handle,
    8.22 +                             uint32_t domid,
    8.23 +                             uint32_t first_gport,
    8.24 +                             uint32_t first_mport,
    8.25 +                             uint32_t nr_ports);
    8.26 +
    8.27  #endif /* XENCTRL_H */
     9.1 --- a/tools/python/xen/xend/XendConfig.py	Fri Jul 27 13:47:03 2007 +0100
     9.2 +++ b/tools/python/xen/xend/XendConfig.py	Fri Jul 27 12:59:37 2007 -0700
     9.3 @@ -127,7 +127,7 @@ XENAPI_PLATFORM_CFG = [ 'acpi', 'apic', 
     9.4                          'nographic', 'pae', 'rtc_timeoffset', 'serial', 'sdl',
     9.5                          'soundhw','stdvga', 'usb', 'usbdevice', 'vnc',
     9.6                          'vncconsole', 'vncdisplay', 'vnclisten',
     9.7 -                        'vncpasswd', 'vncunused', 'xauthority']
     9.8 +                        'vncpasswd', 'vncunused', 'xauthority', 'pci']
     9.9  
    9.10  # List of XendConfig configuration keys that have no direct equivalent
    9.11  # in the old world.
    9.12 @@ -163,6 +163,7 @@ XENAPI_CFG_TYPES = {
    9.13      'tools_version': dict,
    9.14      'other_config': dict,
    9.15      'security_label': str,
    9.16 +    'pci': str,
    9.17  }
    9.18  
    9.19  # List of legacy configuration keys that have no equivalent in the
    10.1 --- a/tools/python/xen/xend/image.py	Fri Jul 27 13:47:03 2007 +0100
    10.2 +++ b/tools/python/xen/xend/image.py	Fri Jul 27 12:59:37 2007 -0700
    10.3 @@ -299,7 +299,7 @@ class HVMImageHandler(ImageHandler):
    10.4      def parseDeviceModelArgs(self, vmConfig):
    10.5          dmargs = [ 'boot', 'fda', 'fdb', 'soundhw',
    10.6                     'localtime', 'serial', 'stdvga', 'isa',
    10.7 -                   'acpi', 'usb', 'usbdevice', 'keymap' ]
    10.8 +                   'acpi', 'usb', 'usbdevice', 'keymap', 'pci' ]
    10.9          
   10.10          ret = ['-vcpus', str(self.vm.getVCpuCount())]
   10.11  
    11.1 --- a/tools/python/xen/xm/create.py	Fri Jul 27 13:47:03 2007 +0100
    11.2 +++ b/tools/python/xen/xm/create.py	Fri Jul 27 12:59:37 2007 -0700
    11.3 @@ -730,7 +730,7 @@ def configure_hvm(config_image, vals):
    11.4               'localtime', 'serial', 'stdvga', 'isa', 'nographic', 'soundhw',
    11.5               'vnc', 'vncdisplay', 'vncunused', 'vncconsole', 'vnclisten',
    11.6               'sdl', 'display', 'xauthority', 'rtc_timeoffset', 'monitor',
    11.7 -             'acpi', 'apic', 'usb', 'usbdevice', 'keymap' ]
    11.8 +             'acpi', 'apic', 'usb', 'usbdevice', 'keymap', 'pci' ]
    11.9      for a in args:
   11.10          if a in vals.__dict__ and vals.__dict__[a] is not None:
   11.11              config_image.append([a, vals.__dict__[a]])
    12.1 --- a/xen/arch/x86/acpi/boot.c	Fri Jul 27 13:47:03 2007 +0100
    12.2 +++ b/xen/arch/x86/acpi/boot.c	Fri Jul 27 12:59:37 2007 -0700
    12.3 @@ -918,5 +918,7 @@ int __init acpi_boot_init(void)
    12.4  
    12.5  	acpi_table_parse(ACPI_HPET, acpi_parse_hpet);
    12.6  
    12.7 +	acpi_dmar_init();
    12.8 +
    12.9  	return 0;
   12.10  }
    13.1 --- a/xen/arch/x86/domain.c	Fri Jul 27 13:47:03 2007 +0100
    13.2 +++ b/xen/arch/x86/domain.c	Fri Jul 27 12:59:37 2007 -0700
    13.3 @@ -43,6 +43,7 @@
    13.4  #include <asm/hvm/hvm.h>
    13.5  #include <asm/hvm/support.h>
    13.6  #include <asm/msr.h>
    13.7 +#include <asm/iommu.h>
    13.8  #ifdef CONFIG_COMPAT
    13.9  #include <compat/vcpu.h>
   13.10  #endif
   13.11 @@ -482,6 +483,9 @@ int arch_domain_create(struct domain *d)
   13.12              virt_to_page(d->shared_info), d, XENSHARE_writable);
   13.13      }
   13.14  
   13.15 +    if (iommu_found())
   13.16 +        iommu_domain_init(d);
   13.17 +
   13.18      if ( is_hvm_domain(d) )
   13.19      {
   13.20          if ( (rc = hvm_domain_initialise(d)) != 0 )
    14.1 --- a/xen/arch/x86/domctl.c	Fri Jul 27 13:47:03 2007 +0100
    14.2 +++ b/xen/arch/x86/domctl.c	Fri Jul 27 12:59:37 2007 -0700
    14.3 @@ -24,6 +24,8 @@
    14.4  #include <asm/hvm/hvm.h>
    14.5  #include <asm/hvm/support.h>
    14.6  #include <asm/processor.h>
    14.7 +#include <xen/list.h>
    14.8 +#include <asm/iommu.h>
    14.9  
   14.10  long arch_do_domctl(
   14.11      struct xen_domctl *domctl,
   14.12 @@ -427,6 +429,156 @@ long arch_do_domctl(
   14.13      }
   14.14      break;
   14.15  
   14.16 +    case XEN_DOMCTL_assign_device:
   14.17 +    {
   14.18 +        struct domain *d;
   14.19 +        struct hvm_iommu *hd;
   14.20 +        u8 bus, devfn;
   14.21 +        struct pci_dev *pdev;
   14.22 +
   14.23 +        ret = -EINVAL;
   14.24 +        if ( unlikely((d = get_domain_by_id(domctl->domain)) == NULL) ) {
   14.25 +            gdprintk(XENLOG_ERR,
   14.26 +                "XEN_DOMCTL_assign_device: get_domain_by_id() failed\n"); 
   14.27 +            break;
   14.28 +        }
   14.29 +        hd = domain_hvm_iommu(d);
   14.30 +        bus = (domctl->u.assign_device.machine_bdf >> 16) & 0xff;
   14.31 +        devfn = (domctl->u.assign_device.machine_bdf >> 8) & 0xff;
   14.32 +
   14.33 +        if (iommu_found())
   14.34 +            ret = assign_device(d, bus, devfn);
   14.35 +        else {
   14.36 +            pdev = xmalloc(struct pci_dev);
   14.37 +            pdev->bus = bus;
   14.38 +            pdev->devfn = devfn;
   14.39 +            list_add(&pdev->list, &hd->pdev_list);
   14.40 +        }
   14.41 +
   14.42 +        gdprintk(XENLOG_INFO, "XEN_DOMCTL_assign_device: bdf = %x:%x:%x\n",
   14.43 +            bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
   14.44 +        put_domain(d);
   14.45 +    }
   14.46 +    break;
   14.47 +
   14.48 +    case XEN_DOMCTL_irq_mapping:
   14.49 +    {
   14.50 +        struct domain *d;
   14.51 +        uint32_t machine_gsi, guest_gsi;
   14.52 +        uint32_t device, intx;
   14.53 +
   14.54 +        ret = -EINVAL;
   14.55 +        if ( unlikely((d = get_domain_by_id(domctl->domain)) == NULL) ) {
   14.56 +            gdprintk(XENLOG_ERR,
   14.57 +                "XEN_DOMCTL_irq_mapping: get_domain_by_id() failed\n"); 
   14.58 +            break;
   14.59 +        }
   14.60 +        machine_gsi = domctl->u.irq_mapping.machine_irq;
   14.61 +        device = domctl->u.irq_mapping.device;
   14.62 +        intx = domctl->u.irq_mapping.intx;
   14.63 +        guest_gsi = hvm_pci_intx_gsi(device, intx);
   14.64 +
   14.65 +        d->arch.hvm_domain.irq.mirq[machine_gsi].valid = 1;
   14.66 +        d->arch.hvm_domain.irq.mirq[machine_gsi].device = device;
   14.67 +        d->arch.hvm_domain.irq.mirq[machine_gsi].intx = intx;
   14.68 +        d->arch.hvm_domain.irq.mirq[machine_gsi].guest_gsi = guest_gsi;
   14.69 +
   14.70 +        d->arch.hvm_domain.irq.girq[guest_gsi].valid = 1;
   14.71 +        d->arch.hvm_domain.irq.girq[guest_gsi].device = device;
   14.72 +        d->arch.hvm_domain.irq.girq[guest_gsi].intx = intx;
   14.73 +        d->arch.hvm_domain.irq.girq[guest_gsi].machine_gsi = machine_gsi;
   14.74 +
   14.75 +        /* Deal with gsi for legacy devices */
   14.76 +        pirq_guest_bind(d->vcpu[0], machine_gsi, BIND_PIRQ__WILL_SHARE);
   14.77 +        gdprintk(XENLOG_INFO,
   14.78 +            "XEN_DOMCTL_irq_mapping: m_irq = %x device = %x intx = %x\n",
   14.79 +            machine_gsi, domctl->u.irq_mapping.device,
   14.80 +            domctl->u.irq_mapping.intx);
   14.81 +        ret = 0;
   14.82 +        put_domain(d);
   14.83 +    }
   14.84 +    break;
   14.85 +
   14.86 +    case XEN_DOMCTL_memory_mapping:
   14.87 +    {
   14.88 +        struct domain *d;
   14.89 +        unsigned long gfn = domctl->u.memory_mapping.first_gfn;
   14.90 +        unsigned long mfn = domctl->u.memory_mapping.first_mfn;
   14.91 +        unsigned long nr_mfns = domctl->u.memory_mapping.nr_mfns;
   14.92 +        int i;
   14.93 +
   14.94 +        ret = -EINVAL;
   14.95 +        if ( (mfn + nr_mfns - 1) < mfn ) /* wrap? */
   14.96 +            break;
   14.97 +        ret = -ESRCH;
   14.98 +        if ( unlikely((d = get_domain_by_id(domctl->domain)) == NULL) ) {
   14.99 +            gdprintk(XENLOG_ERR,
  14.100 +                "XEN_DOMCTL_memory_mapping: get_domain_by_id() failed\n"); 
  14.101 +            break;
  14.102 +        }
  14.103 +        if ( !is_hvm_domain(d) )
  14.104 +            goto memory_mapping_out;
  14.105 +        if ( !iomem_access_permitted(d, mfn, mfn + nr_mfns - 1) )
  14.106 +            goto memory_mapping_out;
  14.107 +        gdprintk(XENLOG_INFO,
  14.108 +            "DOMCTL_memory_map:add: gfn= %lx mfn= %lx nr_mfns = %lx\n",
  14.109 +            gfn, mfn, nr_mfns);
  14.110 +        for ( i = 0; i < nr_mfns ; i++ )
  14.111 +            set_p2m_entry(d, gfn+i, _mfn(mfn+i),
  14.112 +                __PAGE_HYPERVISOR|_PAGE_USER|_PAGE_PCD|_PAGE_PWT);
  14.113 +        ret = iomem_permit_access(d, gfn, gfn + nr_mfns - 1);
  14.114 +memory_mapping_out:
  14.115 +        put_domain(d);
  14.116 +    }
  14.117 +    break;
  14.118 +
  14.119 +    case XEN_DOMCTL_ioport_mapping:
  14.120 +    {
  14.121 +#define MAX_IOPORTS    0x10000
  14.122 +        struct domain *d;
  14.123 +        struct hvm_iommu *hd;
  14.124 +        unsigned int fgp = domctl->u.ioport_mapping.first_gport;
  14.125 +        unsigned int fmp = domctl->u.ioport_mapping.first_mport;
  14.126 +        unsigned int np = domctl->u.ioport_mapping.nr_ports;
  14.127 +        struct g2m_ioport *g2m_ioport;
  14.128 +        int found = 0;
  14.129 +
  14.130 +        ret = -EINVAL;
  14.131 +        if ((fgp > MAX_IOPORTS) || (fmp > MAX_IOPORTS) ||
  14.132 +            ((fgp + np) > MAX_IOPORTS) || ((fmp + np) > MAX_IOPORTS))
  14.133 +        {
  14.134 +            gdprintk(XENLOG_ERR,
  14.135 +                "XEN_DOMCTL_ioport_map:invalid:gport=%x mport=%x nr_ports=%x\n",
  14.136 +                fgp, fmp, np);
  14.137 +            break;
  14.138 +        }
  14.139 +        if ( np == 0 )
  14.140 +            ret = 0;
  14.141 +        ret = -ESRCH;
  14.142 +
  14.143 +        if ( unlikely((d = get_domain_by_id(domctl->domain)) == NULL) ) {
  14.144 +            gdprintk(XENLOG_ERR,
  14.145 +                "XEN_DOMCTL_ioport_mapping: get_domain_by_id() failed\n"); 
  14.146 +            break;
  14.147 +        }
  14.148 +        hd = domain_hvm_iommu(d);
  14.149 +        list_for_each_entry(g2m_ioport, &hd->g2m_ioport_list, list)
  14.150 +            if (g2m_ioport->gport == fgp ) {
  14.151 +                g2m_ioport->mport = fmp;
  14.152 +                found = 1;
  14.153 +                break;
  14.154 +        }
  14.155 +        if ( !found ) {
  14.156 +            g2m_ioport = xmalloc(struct g2m_ioport);
  14.157 +            g2m_ioport->gport = fgp;
  14.158 +            g2m_ioport->mport = fmp;
  14.159 +            list_add_tail(&g2m_ioport->list, &hd->g2m_ioport_list);
  14.160 +        }
  14.161 +        ret = ioports_permit_access(d, fgp, fgp + np - 1);
  14.162 +        put_domain(d);
  14.163 +    }
  14.164 +    break;
  14.165 +
  14.166      default:
  14.167          ret = -ENOSYS;
  14.168          break;
    15.1 --- a/xen/arch/x86/hvm/intercept.c	Fri Jul 27 13:47:03 2007 +0100
    15.2 +++ b/xen/arch/x86/hvm/intercept.c	Fri Jul 27 12:59:37 2007 -0700
    15.3 @@ -243,6 +243,9 @@ int hvm_io_intercept(ioreq_t *p, int typ
    15.4      int i;
    15.5      unsigned long addr, size;
    15.6  
    15.7 +    if (dpci_ioport_intercept(p, type))
    15.8 +        return 1;
    15.9 +
   15.10      for (i = 0; i < handler->num_slot; i++) {
   15.11          if( type != handler->hdl_list[i].type)
   15.12              continue;
    16.1 --- a/xen/arch/x86/hvm/io.c	Fri Jul 27 13:47:03 2007 +0100
    16.2 +++ b/xen/arch/x86/hvm/io.c	Fri Jul 27 12:59:37 2007 -0700
    16.3 @@ -42,6 +42,7 @@
    16.4  #include <asm/hvm/vlapic.h>
    16.5  
    16.6  #include <public/sched.h>
    16.7 +#include <xen/iocap.h>
    16.8  #include <public/hvm/ioreq.h>
    16.9  
   16.10  #if defined (__i386__)
   16.11 @@ -873,6 +874,138 @@ void hvm_io_assist(void)
   16.12      vcpu_end_shutdown_deferral(v);
   16.13  }
   16.14  
   16.15 +int hvm_do_IRQ_dpci(struct domain *d, unsigned int mirq)
   16.16 +{
   16.17 +    uint32_t device, intx;
   16.18 +    uint32_t link, isa_irq;
   16.19 +    struct hvm_irq *hvm_irq;
   16.20 +
   16.21 +    if ((d == dom0) || !dev_assigned(d))
   16.22 +        return 0;
   16.23 +
   16.24 +    if (d->arch.hvm_domain.irq.mirq[mirq].valid)
   16.25 +    {
   16.26 +        device = d->arch.hvm_domain.irq.mirq[mirq].device;
   16.27 +        intx = d->arch.hvm_domain.irq.mirq[mirq].intx;
   16.28 +        link = hvm_pci_intx_link(device, intx);
   16.29 +        hvm_irq = &d->arch.hvm_domain.irq;
   16.30 +        isa_irq = hvm_irq->pci_link.route[link];
   16.31 +
   16.32 +        if ( !d->arch.hvm_domain.irq.girq[isa_irq].valid )
   16.33 +        {
   16.34 +            d->arch.hvm_domain.irq.girq[isa_irq].valid = 1;
   16.35 +            d->arch.hvm_domain.irq.girq[isa_irq].device = device;
   16.36 +            d->arch.hvm_domain.irq.girq[isa_irq].intx = intx;
   16.37 +            d->arch.hvm_domain.irq.girq[isa_irq].machine_gsi = mirq;
   16.38 +        }
   16.39 +
   16.40 +        if ( !test_and_set_bit(mirq, d->arch.hvm_domain.irq.dirq_mask) )
   16.41 +        {
   16.42 +            vcpu_kick(d->vcpu[0]);
   16.43 +            return 1;
   16.44 +        }
   16.45 +        else
   16.46 +            dprintk(XENLOG_INFO, "Want to pending mirq, but failed\n");
   16.47 +    }
   16.48 +    return 0;
   16.49 +}
   16.50 +
   16.51 +void hvm_dpci_eoi(unsigned int guest_gsi)
   16.52 +{
   16.53 +    struct domain *d = current->domain;
   16.54 +    uint32_t device, intx, machine_gsi;
   16.55 +    irq_desc_t *desc;
   16.56 +
   16.57 +    if (d->arch.hvm_domain.irq.girq[guest_gsi].valid)
   16.58 +    {
   16.59 +        device = d->arch.hvm_domain.irq.girq[guest_gsi].device;
   16.60 +        intx = d->arch.hvm_domain.irq.girq[guest_gsi].intx;
   16.61 +        machine_gsi = d->arch.hvm_domain.irq.girq[guest_gsi].machine_gsi;
   16.62 +        gdprintk(XENLOG_INFO, "hvm_dpci_eoi:: device %x intx %x\n", device, intx);
   16.63 +        hvm_pci_intx_deassert(d, device, intx);
   16.64 +        desc = &irq_desc[irq_to_vector(machine_gsi)];
   16.65 +        desc->handler->end(irq_to_vector(machine_gsi));
   16.66 +    }
   16.67 +}
   16.68 +
   16.69 +int dpci_ioport_intercept(ioreq_t *p, int type)
   16.70 +{
   16.71 +    struct domain *d = current->domain;
   16.72 +    struct hvm_iommu *hd = domain_hvm_iommu(d);
   16.73 +    struct g2m_ioport *g2m_ioport;
   16.74 +    unsigned int mport;
   16.75 +    int ret = 0;
   16.76 +
   16.77 +    if (!dev_assigned(d))
   16.78 +        return 0;
   16.79 +
   16.80 +    if (ioports_access_permitted(d, p->addr, p->addr + p->size - 1)) {
   16.81 +        list_for_each_entry(g2m_ioport, &hd->g2m_ioport_list, list)
   16.82 +            if ( g2m_ioport->gport == p->addr )
   16.83 +                break;
   16.84 +
   16.85 +        gdprintk(XENLOG_INFO, "dpci_ioport_intercept: gport = %x mport = %x\n",
   16.86 +                 g2m_ioport->gport, g2m_ioport->mport);
   16.87 +
   16.88 +        mport = g2m_ioport->mport;
   16.89 +        if (p->dir == 1) {
   16.90 +            for (int i = 0; i < p->count; i++)
   16.91 +            {
   16.92 +                switch (p->size)
   16.93 +                {
   16.94 +                case 1: p->data = inb(mport); break;
   16.95 +                case 2: p->data = inw(mport); break;
   16.96 +                case 4: p->data = inl(mport); break;
   16.97 +                default: gdprintk(XENLOG_ERR, "invalid IO port read size\n"); 
   16.98 +                }
   16.99 +            }
  16.100 +        }
  16.101 +        else {
  16.102 +            for (int i = 0; i < p->count; i++)
  16.103 +            { 
  16.104 +                switch (p->size)
  16.105 +                {
  16.106 +                case 1: outb(mport, p->data); break;
  16.107 +                case 2: outw(mport, p->data); break;
  16.108 +                case 4: outl(mport, p->data); break;
  16.109 +                default: gdprintk(XENLOG_ERR, "invalid io port write size\n"); 
  16.110 +                }
  16.111 +            }
  16.112 +        }
  16.113 +        ret = 1;
  16.114 +    }
  16.115 +    return ret;
  16.116 +}
  16.117 +
  16.118 +int release_devices(struct vcpu *v)
  16.119 +{
  16.120 +    int ret = 0;
  16.121 +    struct domain *d = v->domain;
  16.122 +    struct hvm_domain *hd = &d->arch.hvm_domain;
  16.123 +    uint32_t i;
  16.124 +
  16.125 +    if (! dev_assigned(d))
  16.126 +        return ret;
  16.127 +
  16.128 +    /* unbind irq */
  16.129 +    for (i = 0; i < NR_IRQS; i++) {
  16.130 +        if (hd->irq.mirq[i].valid)
  16.131 +            ret = pirq_guest_unbind(d, i);
  16.132 +    }
  16.133 +    if (iommu_found())
  16.134 +        iommu_domain_teardown(d);
  16.135 +    else {
  16.136 +        struct pci_dev *pdev;
  16.137 +        struct hvm_iommu *iommu = &d->arch.hvm_domain.hvm_iommu;
  16.138 +
  16.139 +        list_for_each_entry(pdev, &(iommu->pdev_list), list) {
  16.140 +            list_del(&(pdev->list));
  16.141 +            xfree(pdev);
  16.142 +        }
  16.143 +    }
  16.144 +    return ret;
  16.145 +}
  16.146 +
  16.147  /*
  16.148   * Local variables:
  16.149   * mode: C
    17.1 --- a/xen/arch/x86/hvm/vioapic.c	Fri Jul 27 13:47:03 2007 +0100
    17.2 +++ b/xen/arch/x86/hvm/vioapic.c	Fri Jul 27 12:59:37 2007 -0700
    17.3 @@ -462,6 +462,13 @@ void vioapic_update_EOI(struct domain *d
    17.4      ent = &vioapic->redirtbl[gsi];
    17.5  
    17.6      ent->fields.remote_irr = 0;
    17.7 +
    17.8 +    if (dev_assigned(d)) {
    17.9 +        spin_unlock(&d->arch.hvm_domain.irq_lock);
   17.10 +        hvm_dpci_eoi(gsi);
   17.11 +        return;
   17.12 +    }
   17.13 +
   17.14      if ( (ent->fields.trig_mode == VIOAPIC_LEVEL_TRIG) &&
   17.15           !ent->fields.mask &&
   17.16           hvm_irq->gsi_assert_count[gsi] )
    18.1 --- a/xen/arch/x86/hvm/vmx/Makefile	Fri Jul 27 13:47:03 2007 +0100
    18.2 +++ b/xen/arch/x86/hvm/vmx/Makefile	Fri Jul 27 12:59:37 2007 -0700
    18.3 @@ -1,5 +1,6 @@
    18.4  subdir-$(x86_32) += x86_32
    18.5  subdir-$(x86_64) += x86_64
    18.6 +subdir-y +=vtd
    18.7  
    18.8  obj-y += intr.o
    18.9  obj-y += vmcs.o
    19.1 --- a/xen/arch/x86/hvm/vmx/intr.c	Fri Jul 27 13:47:03 2007 +0100
    19.2 +++ b/xen/arch/x86/hvm/vmx/intr.c	Fri Jul 27 12:59:37 2007 -0700
    19.3 @@ -126,6 +126,27 @@ static void update_tpr_threshold(struct 
    19.4      __vmwrite(TPR_THRESHOLD, (max_irr > tpr) ? (tpr >> 4) : (max_irr >> 4));
    19.5  }
    19.6  
    19.7 +static void vmx_dirq_assist(struct domain *d)
    19.8 +{
    19.9 +    unsigned int irq;
   19.10 +    uint32_t device;
   19.11 +    uint32_t intx;
   19.12 +    struct hvm_irq *hvm_irq = &d->arch.hvm_domain.irq;
   19.13 +
   19.14 +    for (irq = find_first_bit(hvm_irq->dirq_mask, NR_IRQS);
   19.15 +         irq < NR_IRQS;
   19.16 +         irq = find_next_bit(hvm_irq->dirq_mask, NR_IRQS, irq + 1))
   19.17 +    {
   19.18 +        test_and_clear_bit(irq, &hvm_irq->dirq_mask);
   19.19 +
   19.20 +        device = hvm_irq->mirq[irq].device;
   19.21 +        intx = hvm_irq->mirq[irq].intx;
   19.22 +        gdprintk(XENLOG_INFO VTDPREFIX, "hvm_do_IRQ_dpci:injecting intr: device %x intx %x, irq= %x\n",
   19.23 +                 device, intx, irq);
   19.24 +        hvm_pci_intx_assert(d, device, intx);
   19.25 +    }
   19.26 +}
   19.27 +
   19.28  asmlinkage void vmx_intr_assist(void)
   19.29  {
   19.30      int intr_vector;
   19.31 @@ -136,6 +157,9 @@ asmlinkage void vmx_intr_assist(void)
   19.32  
   19.33      pt_update_irq(v);
   19.34  
   19.35 +    if ( v->vcpu_id == 0 )
   19.36 +        vmx_dirq_assist(v->domain);
   19.37 +
   19.38      hvm_set_callback_irq_level();
   19.39  
   19.40      update_tpr_threshold(vcpu_vlapic(v));
    20.1 --- a/xen/arch/x86/hvm/vmx/vmx.c	Fri Jul 27 13:47:03 2007 +0100
    20.2 +++ b/xen/arch/x86/hvm/vmx/vmx.c	Fri Jul 27 12:59:37 2007 -0700
    20.3 @@ -27,6 +27,7 @@
    20.4  #include <xen/domain_page.h>
    20.5  #include <xen/hypercall.h>
    20.6  #include <xen/perfc.h>
    20.7 +#include <xen/iocap.h>
    20.8  #include <asm/current.h>
    20.9  #include <asm/io.h>
   20.10  #include <asm/regs.h>
   20.11 @@ -50,6 +51,7 @@
   20.12  #include <asm/hvm/vpt.h>
   20.13  #include <public/hvm/save.h>
   20.14  #include <asm/hvm/trace.h>
   20.15 +#include <asm/iommu.h>
   20.16  
   20.17  enum handler_return { HNDL_done, HNDL_unhandled, HNDL_exception_raised };
   20.18  
   20.19 @@ -98,6 +100,7 @@ static int vmx_vcpu_initialise(struct vc
   20.20  static void vmx_vcpu_destroy(struct vcpu *v)
   20.21  {
   20.22      vmx_destroy_vmcs(v);
   20.23 +    release_devices(v);
   20.24  }
   20.25  
   20.26  static int vmx_paging_enabled(struct vcpu *v)
    21.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    21.2 +++ b/xen/arch/x86/hvm/vmx/vtd/Makefile	Fri Jul 27 12:59:37 2007 -0700
    21.3 @@ -0,0 +1,3 @@
    21.4 +obj-y += intel-iommu.o
    21.5 +obj-y += dmar.o
    21.6 +obj-y += utils.o
    22.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    22.2 +++ b/xen/arch/x86/hvm/vmx/vtd/dmar.c	Fri Jul 27 12:59:37 2007 -0700
    22.3 @@ -0,0 +1,493 @@
    22.4 +/*
    22.5 + * Copyright (c) 2006, Intel Corporation.
    22.6 + *
    22.7 + * This program is free software; you can redistribute it and/or modify it
    22.8 + * under the terms and conditions of the GNU General Public License,
    22.9 + * version 2, as published by the Free Software Foundation.
   22.10 + *
   22.11 + * This program is distributed in the hope it will be useful, but WITHOUT
   22.12 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   22.13 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
   22.14 + * more details.
   22.15 + *
   22.16 + * You should have received a copy of the GNU General Public License along with
   22.17 + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
   22.18 + * Place - Suite 330, Boston, MA 02111-1307 USA.
   22.19 + *
   22.20 + * Copyright (C) Ashok Raj <ashok.raj@intel.com>
   22.21 + * Copyright (C) Shaohua Li <shaohua.li@intel.com>
   22.22 + * Copyright (C) Allen Kay <allen.m.kay@intel.com> - adapted to xen
   22.23 + */
   22.24 +
   22.25 +#include <xen/init.h>
   22.26 +#include <xen/bitmap.h>
   22.27 +#include <xen/kernel.h>
   22.28 +#include <xen/acpi.h>
   22.29 +#include <xen/mm.h>
   22.30 +#include <xen/xmalloc.h>
   22.31 +#include <asm/string.h>
   22.32 +#include "dmar.h"
   22.33 +#include "pci-direct.h"
   22.34 +#include "pci_regs.h"
   22.35 +
   22.36 +#undef PREFIX
   22.37 +#define PREFIX VTDPREFIX "ACPI DMAR:"
   22.38 +#define DEBUG
   22.39 +
   22.40 +#define MIN_SCOPE_LEN (sizeof(struct acpi_pci_path) + sizeof(struct acpi_dev_scope))
   22.41 +
   22.42 +LIST_HEAD(acpi_drhd_units);
   22.43 +LIST_HEAD(acpi_rmrr_units);
   22.44 +LIST_HEAD(acpi_atsr_units);
   22.45 +LIST_HEAD(acpi_ioapic_units);
   22.46 +
   22.47 +u8 dmar_host_address_width;
   22.48 +
   22.49 +static int __init acpi_register_drhd_unit(struct acpi_drhd_unit *drhd)
   22.50 +{
   22.51 +    /*
   22.52 +     * add INCLUDE_ALL at the tail, so scan the list will find it at
   22.53 +     * the very end.
   22.54 +     */
   22.55 +    if (drhd->include_all)
   22.56 +        list_add_tail(&drhd->list, &acpi_drhd_units);
   22.57 +    else
   22.58 +        list_add(&drhd->list, &acpi_drhd_units);
   22.59 +    return 0;
   22.60 +}
   22.61 +
   22.62 +static int __init acpi_register_rmrr_unit(struct acpi_rmrr_unit *rmrr)
   22.63 +{
   22.64 +    list_add(&rmrr->list, &acpi_rmrr_units);
   22.65 +    return 0;
   22.66 +}
   22.67 +
   22.68 +static int acpi_pci_device_match(struct pci_dev *devices, int cnt,
   22.69 +                 struct pci_dev *dev)
   22.70 +{
   22.71 +    int i;
   22.72 +
   22.73 +    for (i = 0; i < cnt; i++) {
   22.74 +        if ((dev->bus == devices->bus) &&
   22.75 +            (dev->devfn == devices->devfn))
   22.76 +            return 1;
   22.77 +        devices++;
   22.78 +    }
   22.79 +    return 0;
   22.80 +}
   22.81 +
   22.82 +static int __init acpi_register_atsr_unit(struct acpi_atsr_unit *atsr)
   22.83 +{
   22.84 +    /*
   22.85 +     * add ALL_PORTS at the tail, so scan the list will find it at
   22.86 +     * the very end.
   22.87 +     */
   22.88 +    if (atsr->all_ports)
   22.89 +        list_add_tail(&atsr->list, &acpi_atsr_units);
   22.90 +    else
   22.91 +        list_add(&atsr->list, &acpi_atsr_units);
   22.92 +    return 0;
   22.93 +}
   22.94 +
   22.95 +struct acpi_drhd_unit * acpi_find_matched_drhd_unit(struct pci_dev *dev)
   22.96 +{
   22.97 +    struct acpi_drhd_unit *drhd;
   22.98 +    struct acpi_drhd_unit *include_all_drhd;
   22.99 +
  22.100 +    include_all_drhd = NULL;
  22.101 +    list_for_each_entry(drhd, &acpi_drhd_units, list) {
  22.102 +        if (drhd->include_all)
  22.103 +            include_all_drhd = drhd;
  22.104 +        if (acpi_pci_device_match(drhd->devices,
  22.105 +                        drhd->devices_cnt, dev))
  22.106 +        {
  22.107 +            gdprintk(XENLOG_INFO VTDPREFIX, 
  22.108 +                     "acpi_find_matched_drhd_unit: drhd->address = %lx\n",
  22.109 +                     drhd->address);
  22.110 +            return drhd;
  22.111 +        }
  22.112 +    }
  22.113 +
  22.114 +    if (include_all_drhd) {
  22.115 +        gdprintk(XENLOG_INFO VTDPREFIX, 
  22.116 +                 "acpi_find_matched_drhd_unit:include_all_drhd->addr = %lx\n",
  22.117 +                 include_all_drhd->address);
  22.118 +        return include_all_drhd;;
  22.119 +    }
  22.120 +
  22.121 +    return(NULL);
  22.122 +}
  22.123 +
  22.124 +struct acpi_rmrr_unit * acpi_find_matched_rmrr_unit(struct pci_dev *dev)
  22.125 +{
  22.126 +    struct acpi_rmrr_unit *rmrr;
  22.127 +
  22.128 +    list_for_each_entry(rmrr, &acpi_rmrr_units, list) {
  22.129 +        if (acpi_pci_device_match(rmrr->devices,
  22.130 +                        rmrr->devices_cnt, dev))
  22.131 +            goto out;
  22.132 +    }
  22.133 +    rmrr = NULL;
  22.134 +out:
  22.135 +    return rmrr;
  22.136 +}
  22.137 +
  22.138 +struct acpi_atsr_unit * acpi_find_matched_atsr_unit(struct pci_dev *dev)
  22.139 +{
  22.140 +    struct acpi_atsr_unit *atsru;
  22.141 +    struct acpi_atsr_unit *all_ports_atsru;
  22.142 +
  22.143 +    all_ports_atsru = NULL;
  22.144 +    list_for_each_entry(atsru, &acpi_atsr_units, list) {
  22.145 +        if (atsru->all_ports)
  22.146 +            all_ports_atsru = atsru;
  22.147 +        if (acpi_pci_device_match(atsru->devices, atsru->devices_cnt, dev))
  22.148 +            return atsru;
  22.149 +    }
  22.150 +    if (all_ports_atsru) {
  22.151 +        gdprintk(XENLOG_INFO VTDPREFIX, 
  22.152 +                 "acpi_find_matched_atsr_unit: all_ports_atsru\n");
  22.153 +        return all_ports_atsru;;
  22.154 +    }
  22.155 +    return(NULL);
  22.156 +}
  22.157 +
  22.158 +static int __init acpi_parse_dev_scope(void *start, void *end, int *cnt,
  22.159 +                       struct pci_dev **devices)
  22.160 +{
  22.161 +    struct acpi_dev_scope *scope;
  22.162 +    u8 bus, sub_bus, sec_bus;
  22.163 +    struct acpi_pci_path *path;
  22.164 +    struct acpi_ioapic_unit *acpi_ioapic_unit = NULL;
  22.165 +    int count, dev_count=0;
  22.166 +    struct pci_dev *pdev;
  22.167 +    u8 dev, func;
  22.168 +    u32 l;
  22.169 +    void *tmp;
  22.170 +
  22.171 +    *cnt = 0;
  22.172 +    tmp = start;
  22.173 +    while (start < end) {
  22.174 +        scope = start;
  22.175 +        if (scope->length < MIN_SCOPE_LEN ||
  22.176 +            (scope->dev_type != ACPI_DEV_ENDPOINT &&
  22.177 +            scope->dev_type != ACPI_DEV_P2PBRIDGE)) {
  22.178 +            printk(KERN_WARNING PREFIX "Invalid device scope\n");
  22.179 +            return -EINVAL;
  22.180 +        }
  22.181 +        (*cnt)++;
  22.182 +        start += scope->length;
  22.183 +    }
  22.184 +
  22.185 +    start = tmp;
  22.186 +    while (start < end) {
  22.187 +        scope = start;
  22.188 +        path = (struct acpi_pci_path *)(scope + 1);
  22.189 +        count = (scope->length - sizeof(struct acpi_dev_scope))
  22.190 +		    /sizeof(struct acpi_pci_path);
  22.191 +        bus = scope->start_bus;
  22.192 +
  22.193 +        while (--count) {
  22.194 +            bus = read_pci_config_byte(bus, path->dev,
  22.195 +                                       path->fn, PCI_SECONDARY_BUS);
  22.196 +            path++;
  22.197 +        }
  22.198 +
  22.199 +        if (scope->dev_type == ACPI_DEV_ENDPOINT) {
  22.200 +            printk(KERN_WARNING PREFIX
  22.201 +                "found endpoint: bdf = %x:%x:%x\n", bus, path->dev, path->fn);
  22.202 +                dev_count++;
  22.203 +        } else if (scope->dev_type == ACPI_DEV_P2PBRIDGE) {
  22.204 +            printk(KERN_WARNING PREFIX
  22.205 +                "found bridge: bdf = %x:%x:%x\n", bus, path->dev, path->fn);
  22.206 +
  22.207 +            sec_bus = read_pci_config_byte(bus, path->dev,
  22.208 +                                       path->fn, PCI_SECONDARY_BUS);
  22.209 +            sub_bus = read_pci_config_byte(bus, path->dev,
  22.210 +                                       path->fn, PCI_SUBORDINATE_BUS);
  22.211 +            while (sec_bus <= sub_bus) {
  22.212 +                for (dev = 0; dev < 32; dev++) {
  22.213 +                    for (func = 0; func < 8; func++) {
  22.214 +                        l = read_pci_config(sec_bus, dev, func, PCI_VENDOR_ID);
  22.215 +
  22.216 +                        /* some broken boards return 0 or ~0 if a slot is empty: */
  22.217 +                        if (l == 0xffffffff || l == 0x00000000 ||
  22.218 +                            l == 0x0000ffff || l == 0xffff0000)
  22.219 +                            break;
  22.220 +                        dev_count++;
  22.221 +                    }
  22.222 +                }
  22.223 +                sec_bus++;
  22.224 +            }
  22.225 +        } else if (scope->dev_type == ACPI_DEV_IOAPIC) {
  22.226 +            printk(KERN_WARNING PREFIX
  22.227 +                "found IOAPIC: bdf = %x:%x:%x\n", bus, path->dev, path->fn);
  22.228 +            dev_count++;
  22.229 +        } else {
  22.230 +            printk(KERN_WARNING PREFIX
  22.231 +                "found MSI HPET: bdf = %x:%x:%x\n", bus, path->dev, path->fn);
  22.232 +            dev_count++;
  22.233 +        }
  22.234 +
  22.235 +        start += scope->length;
  22.236 +    }
  22.237 +
  22.238 +    *cnt = dev_count;
  22.239 +    *devices = xmalloc_array(struct pci_dev,  *cnt);
  22.240 +    if (!*devices)
  22.241 +        return -ENOMEM;
  22.242 +    memset(*devices, 0, sizeof(struct pci_dev) * (*cnt));
  22.243 +
  22.244 +    pdev = *devices;
  22.245 +    start = tmp;
  22.246 +    while (start < end) {
  22.247 +        scope = start;
  22.248 +        path = (struct acpi_pci_path *)(scope + 1);
  22.249 +        count = (scope->length - sizeof(struct acpi_dev_scope))
  22.250 +		    /sizeof(struct acpi_pci_path);
  22.251 +        bus = scope->start_bus;
  22.252 +
  22.253 +        while (--count) {
  22.254 +            bus = read_pci_config_byte(bus, path->dev, path->fn, PCI_SECONDARY_BUS);
  22.255 +            path++;
  22.256 +        }
  22.257 +
  22.258 +        if (scope->dev_type == ACPI_DEV_ENDPOINT) {
  22.259 +            printk(KERN_WARNING PREFIX
  22.260 +                "found endpoint: bdf = %x:%x:%x\n", bus, path->dev, path->fn);
  22.261 +
  22.262 +            pdev->bus = bus;
  22.263 +            pdev->devfn = PCI_DEVFN(path->dev, path->fn);
  22.264 +            pdev++;
  22.265 +        } else if (scope->dev_type == ACPI_DEV_P2PBRIDGE) {
  22.266 +            printk(KERN_WARNING PREFIX
  22.267 +                "found bridge: bus = %x dev = %x func = %x\n", bus, path->dev, path->fn);
  22.268 +
  22.269 +            sec_bus = read_pci_config_byte(bus, path->dev, path->fn, PCI_SECONDARY_BUS);
  22.270 +            sub_bus = read_pci_config_byte(bus, path->dev, path->fn, PCI_SUBORDINATE_BUS);
  22.271 +
  22.272 +            while (sec_bus <= sub_bus) {
  22.273 +                for (dev = 0; dev < 32; dev++) {
  22.274 +                    for (func = 0; func < 8; func++) {
  22.275 +                        l = read_pci_config(sec_bus, dev, func, PCI_VENDOR_ID);
  22.276 +
  22.277 +                        /* some broken boards return 0 or ~0 if a slot is empty: */
  22.278 +                        if (l == 0xffffffff || l == 0x00000000 ||
  22.279 +                            l == 0x0000ffff || l == 0xffff0000)
  22.280 +                            break;
  22.281 +
  22.282 +                        pdev->bus = sec_bus;
  22.283 +                        pdev->devfn = PCI_DEVFN(dev, func);
  22.284 +                        pdev++;
  22.285 +                    }
  22.286 +                }
  22.287 +                sec_bus++;
  22.288 +            }
  22.289 +        } else if (scope->dev_type == ACPI_DEV_IOAPIC) {
  22.290 +            acpi_ioapic_unit = xmalloc(struct acpi_ioapic_unit);
  22.291 +            acpi_ioapic_unit->apic_id = scope->enum_id;
  22.292 +            acpi_ioapic_unit->ioapic.bdf.bus = bus;
  22.293 +            acpi_ioapic_unit->ioapic.bdf.dev = path->dev;
  22.294 +            acpi_ioapic_unit->ioapic.bdf.func = path->fn;
  22.295 +            list_add(&acpi_ioapic_unit->list, &acpi_ioapic_units);
  22.296 +            printk(KERN_WARNING PREFIX
  22.297 +                "found IOAPIC: bus = %x dev = %x func = %x\n", bus, path->dev, path->fn);
  22.298 +        } else {
  22.299 +            printk(KERN_WARNING PREFIX
  22.300 +                "found MSI HPET: bus = %x dev = %x func = %x\n", bus, path->dev, path->fn);
  22.301 +        }
  22.302 +        
  22.303 +        start += scope->length;
  22.304 +    }
  22.305 +
  22.306 +    return 0;
  22.307 +}
  22.308 +
  22.309 +static int __init
  22.310 +acpi_parse_one_drhd(struct acpi_dmar_entry_header *header)
  22.311 +{
  22.312 +    struct acpi_table_drhd * drhd = (struct acpi_table_drhd *)header;
  22.313 +    struct acpi_drhd_unit *dmaru;
  22.314 +    int ret = 0;
  22.315 +    static int include_all;
  22.316 +
  22.317 +    dmaru = xmalloc(struct acpi_drhd_unit);
  22.318 +    if (!dmaru)
  22.319 +        return -ENOMEM;
  22.320 +    memset(dmaru, 0, sizeof(struct acpi_drhd_unit));
  22.321 +
  22.322 +    dmaru->address = drhd->address;
  22.323 +    dmaru->include_all = drhd->flags & 1; /* BIT0: INCLUDE_ALL */
  22.324 +    printk(KERN_WARNING PREFIX "dmaru->address = %lx\n", dmaru->address);
  22.325 +
  22.326 +    if (!dmaru->include_all) {
  22.327 +        ret = acpi_parse_dev_scope((void *)(drhd + 1),
  22.328 +                ((void *)drhd) + header->length,
  22.329 +                &dmaru->devices_cnt, &dmaru->devices);
  22.330 +    }
  22.331 +    else {
  22.332 +        printk(KERN_WARNING PREFIX "found INCLUDE_ALL\n");
  22.333 +        /* Only allow one INCLUDE_ALL */
  22.334 +        if (include_all) {
  22.335 +            printk(KERN_WARNING PREFIX "Only one INCLUDE_ALL "
  22.336 +                "device scope is allowed\n");
  22.337 +            ret = -EINVAL;
  22.338 +        }
  22.339 +        include_all = 1;
  22.340 +    }
  22.341 +
  22.342 +    if (ret)
  22.343 +        xfree(dmaru);
  22.344 +    else
  22.345 +        acpi_register_drhd_unit(dmaru);
  22.346 +    return ret;
  22.347 +}
  22.348 +
  22.349 +static int __init
  22.350 +acpi_parse_one_rmrr(struct acpi_dmar_entry_header *header)
  22.351 +{
  22.352 +    struct acpi_table_rmrr *rmrr = (struct acpi_table_rmrr *)header;
  22.353 +    struct acpi_rmrr_unit *rmrru;
  22.354 +    int ret = 0;
  22.355 +
  22.356 +    rmrru = xmalloc(struct acpi_rmrr_unit);
  22.357 +    if (!rmrru)
  22.358 +        return -ENOMEM;
  22.359 +    memset(rmrru, 0, sizeof(struct acpi_rmrr_unit));
  22.360 +
  22.361 +#ifdef VTD_DEBUG
  22.362 +    gdprintk(XENLOG_INFO VTDPREFIX,
  22.363 +        "acpi_parse_one_rmrr: base = %lx end = %lx\n",
  22.364 +        rmrr->base_address, rmrr->end_address);
  22.365 +#endif
  22.366 +
  22.367 +    rmrru->base_address = rmrr->base_address;
  22.368 +    rmrru->end_address = rmrr->end_address;
  22.369 +    ret = acpi_parse_dev_scope((void *)(rmrr + 1),
  22.370 +            ((void*)rmrr) + header->length,
  22.371 +            &rmrru->devices_cnt, &rmrru->devices);
  22.372 +
  22.373 +    if (ret || (rmrru->devices_cnt == 0))
  22.374 +        xfree(rmrru);
  22.375 +    else
  22.376 +        acpi_register_rmrr_unit(rmrru);
  22.377 +    return ret;
  22.378 +}
  22.379 +
  22.380 +static int __init
  22.381 +acpi_parse_one_atsr(struct acpi_dmar_entry_header *header)
  22.382 +{
  22.383 +    struct acpi_table_atsr *atsr = (struct acpi_table_atsr *)header;
  22.384 +    struct acpi_atsr_unit *atsru;
  22.385 +    int ret = 0;
  22.386 +    static int all_ports;
  22.387 +
  22.388 +    atsru = xmalloc(struct acpi_atsr_unit);
  22.389 +    if (!atsru)
  22.390 +        return -ENOMEM;
  22.391 +    memset(atsru, 0, sizeof(struct acpi_atsr_unit));
  22.392 +
  22.393 +    atsru->all_ports = atsr->flags & 1; /* BIT0: ALL_PORTS */
  22.394 +    if (!atsru->all_ports) {
  22.395 +        ret = acpi_parse_dev_scope((void *)(atsr + 1),
  22.396 +                ((void *)atsr) + header->length,
  22.397 +                &atsru->devices_cnt, &atsru->devices);
  22.398 +    }
  22.399 +    else {
  22.400 +        printk(KERN_WARNING PREFIX "found ALL_PORTS\n");
  22.401 +        /* Only allow one ALL_PORTS */
  22.402 +        if (all_ports) {
  22.403 +            printk(KERN_WARNING PREFIX "Only one ALL_PORTS "
  22.404 +                "device scope is allowed\n");
  22.405 +            ret = -EINVAL;
  22.406 +        }
  22.407 +        all_ports = 1;
  22.408 +    }
  22.409 +
  22.410 +    if (ret)
  22.411 +        xfree(atsr);
  22.412 +    else
  22.413 +        acpi_register_atsr_unit(atsru);
  22.414 +    return ret;
  22.415 +}
  22.416 +
  22.417 +static void __init
  22.418 +acpi_table_print_dmar_entry(struct acpi_dmar_entry_header *header)
  22.419 +{
  22.420 +    struct acpi_table_drhd *drhd;
  22.421 +    struct acpi_table_rmrr *rmrr;
  22.422 +
  22.423 +    switch (header->type) {
  22.424 +    case ACPI_DMAR_DRHD:
  22.425 +        drhd = (struct acpi_table_drhd *)header;
  22.426 +        break;
  22.427 +    case ACPI_DMAR_RMRR:
  22.428 +        rmrr = (struct acpi_table_rmrr *)header;
  22.429 +        break;
  22.430 +    }
  22.431 +}
  22.432 +
  22.433 +static int __init
  22.434 +acpi_parse_dmar(unsigned long phys_addr, unsigned long size)
  22.435 +{
  22.436 +    struct acpi_table_dmar *dmar = NULL;
  22.437 +    struct acpi_dmar_entry_header *entry_header;
  22.438 +    int ret = 0;
  22.439 +
  22.440 +    if (!phys_addr || !size)
  22.441 +        return -EINVAL;
  22.442 +
  22.443 +    dmar = (struct acpi_table_dmar *)__acpi_map_table(phys_addr, size);
  22.444 +    if (!dmar) {
  22.445 +        printk (KERN_WARNING PREFIX "Unable to map DMAR\n");
  22.446 +        return -ENODEV;
  22.447 +    }
  22.448 +
  22.449 +    if (!dmar->haw) {
  22.450 +        printk (KERN_WARNING PREFIX "Zero: Invalid DMAR haw\n");
  22.451 +        return -EINVAL;
  22.452 +    }
  22.453 +
  22.454 +    dmar_host_address_width = dmar->haw;
  22.455 +    printk (KERN_INFO PREFIX "Host address width %d\n",
  22.456 +        dmar_host_address_width);
  22.457 +
  22.458 +    entry_header = (struct acpi_dmar_entry_header *)(dmar + 1);
  22.459 +    while (((unsigned long)entry_header) < (((unsigned long)dmar) + size)) {
  22.460 +        acpi_table_print_dmar_entry(entry_header);
  22.461 +
  22.462 +        switch (entry_header->type) {
  22.463 +        case ACPI_DMAR_DRHD:
  22.464 +            printk (KERN_INFO PREFIX "found ACPI_DMAR_DRHD\n");
  22.465 +            ret = acpi_parse_one_drhd(entry_header);
  22.466 +            break;
  22.467 +        case ACPI_DMAR_RMRR:
  22.468 +            printk (KERN_INFO PREFIX "found ACPI_DMAR_RMRR\n");
  22.469 +            ret = acpi_parse_one_rmrr(entry_header);
  22.470 +            break;
  22.471 +        case ACPI_DMAR_ATSR:
  22.472 +            printk (KERN_INFO PREFIX "found ACPI_DMAR_RMRR\n");
  22.473 +            ret = acpi_parse_one_atsr(entry_header);
  22.474 +            break;
  22.475 +        default:
  22.476 +            printk(KERN_WARNING PREFIX "Unknown DMAR structure type\n");
  22.477 +            ret = -EINVAL;
  22.478 +            break;
  22.479 +        }
  22.480 +        if (ret)
  22.481 +            break;
  22.482 +
  22.483 +        entry_header = ((void *)entry_header + entry_header->length);
  22.484 +    }
  22.485 +    return ret;
  22.486 +}
  22.487 +
  22.488 +int acpi_dmar_init(void)
  22.489 +{
  22.490 +    acpi_table_parse(ACPI_DMAR, acpi_parse_dmar);
  22.491 +    if (list_empty(&acpi_drhd_units)) {
  22.492 +        printk(KERN_ERR PREFIX "No DMAR devices found\n");
  22.493 +        return -ENODEV;
  22.494 +    }
  22.495 +    return 0;
  22.496 +}
    23.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    23.2 +++ b/xen/arch/x86/hvm/vmx/vtd/dmar.h	Fri Jul 27 12:59:37 2007 -0700
    23.3 @@ -0,0 +1,90 @@
    23.4 +/*
    23.5 + * Copyright (c) 2006, Intel Corporation.
    23.6 + *
    23.7 + * This program is free software; you can redistribute it and/or modify it
    23.8 + * under the terms and conditions of the GNU General Public License,
    23.9 + * version 2, as published by the Free Software Foundation.
   23.10 + *
   23.11 + * This program is distributed in the hope it will be useful, but WITHOUT
   23.12 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   23.13 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
   23.14 + * more details.
   23.15 + *
   23.16 + * You should have received a copy of the GNU General Public License along with
   23.17 + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
   23.18 + * Place - Suite 330, Boston, MA 02111-1307 USA.
   23.19 + *
   23.20 + * Copyright (C) Ashok Raj <ashok.raj@intel.com>
   23.21 + * Copyright (C) Shaohua Li <shaohua.li@intel.com>
   23.22 + */
   23.23 +
   23.24 +#ifndef _DMAR_H_
   23.25 +#define _DMAR_H_
   23.26 +
   23.27 +#include <xen/list.h>
   23.28 +#include <asm/iommu.h>
   23.29 +
   23.30 +extern u8 dmar_host_address_width;
   23.31 +
   23.32 +struct acpi_drhd_unit {
   23.33 +    struct list_head list;
   23.34 +    unsigned long    address; /* register base address of the unit */
   23.35 +    struct    pci_dev *devices; /* target devices */
   23.36 +    int    devices_cnt;
   23.37 +    u8    include_all:1;
   23.38 +    struct iommu *iommu;
   23.39 +};
   23.40 +
   23.41 +struct acpi_rmrr_unit {
   23.42 +    struct list_head list;
   23.43 +    unsigned long base_address;
   23.44 +    unsigned long end_address;
   23.45 +    struct pci_dev *devices; /* target devices */
   23.46 +    int    devices_cnt;
   23.47 +    u8    allow_all:1;
   23.48 +};
   23.49 +
   23.50 +struct acpi_atsr_unit {
   23.51 +    struct list_head list;
   23.52 +    struct    pci_dev *devices; /* target devices */
   23.53 +    int    devices_cnt;
   23.54 +    u8    all_ports:1;
   23.55 +};
   23.56 +
   23.57 +#define for_each_iommu(domain, iommu) \
   23.58 +    list_for_each_entry(iommu, \
   23.59 +        &(domain->arch.hvm_domain.hvm_iommu.iommu_list), list)
   23.60 +
   23.61 +#define for_each_pdev(domain, pdev) \
   23.62 +    list_for_each_entry(pdev, \
   23.63 +         &(domain->arch.hvm_domain.hvm_iommu.pdev_list), list)
   23.64 +
   23.65 +#define for_each_drhd_unit(drhd) \
   23.66 +    list_for_each_entry(drhd, &acpi_drhd_units, list)
   23.67 +#define for_each_rmrr_device(rmrr, pdev) \
   23.68 +    list_for_each_entry(rmrr, &acpi_rmrr_units, list) { \
   23.69 +        int _i; \
   23.70 +        for (_i = 0; _i < rmrr->devices_cnt; _i++) { \
   23.71 +            pdev = &(rmrr->devices[_i]);
   23.72 +#define end_for_each_rmrr_device(rmrr, pdev) \
   23.73 +        } \
   23.74 +    }
   23.75 +
   23.76 +struct acpi_drhd_unit * acpi_find_matched_drhd_unit(struct pci_dev *dev);
   23.77 +struct acpi_rmrr_unit * acpi_find_matched_rmrr_unit(struct pci_dev *dev);
   23.78 +
   23.79 +/* This one is for interrupt remapping */
   23.80 +struct acpi_ioapic_unit {
   23.81 +    struct list_head list;
   23.82 +    int apic_id;
   23.83 +    union {
   23.84 +        u16 info;
   23.85 +        struct {
   23.86 +            u16 bus: 8,
   23.87 +                dev: 5,
   23.88 +                func: 3;
   23.89 +        }bdf;
   23.90 +    }ioapic;
   23.91 +};
   23.92 +
   23.93 +#endif // _DMAR_H_
    24.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    24.2 +++ b/xen/arch/x86/hvm/vmx/vtd/intel-iommu.c	Fri Jul 27 12:59:37 2007 -0700
    24.3 @@ -0,0 +1,1928 @@
    24.4 +/*
    24.5 + * Copyright (c) 2006, Intel Corporation.
    24.6 + *
    24.7 + * This program is free software; you can redistribute it and/or modify it
    24.8 + * under the terms and conditions of the GNU General Public License,
    24.9 + * version 2, as published by the Free Software Foundation.
   24.10 + *
   24.11 + * This program is distributed in the hope it will be useful, but WITHOUT
   24.12 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   24.13 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
   24.14 + * more details.
   24.15 + *
   24.16 + * You should have received a copy of the GNU General Public License along with
   24.17 + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
   24.18 + * Place - Suite 330, Boston, MA 02111-1307 USA.
   24.19 + *
   24.20 + * Copyright (C) Ashok Raj <ashok.raj@intel.com>
   24.21 + * Copyright (C) Shaohua Li <shaohua.li@intel.com>
   24.22 + * Copyright (C) Allen Kay <allen.m.kay@intel.com> - adapted to xen
   24.23 + */
   24.24 +
   24.25 +#include <xen/init.h>
   24.26 +#include <xen/irq.h>
   24.27 +#include <xen/spinlock.h>
   24.28 +#include <xen/sched.h>
   24.29 +#include <xen/xmalloc.h>
   24.30 +#include <xen/domain_page.h>
   24.31 +#include <asm/delay.h>
   24.32 +#include <asm/string.h>
   24.33 +#include <asm/iommu.h>
   24.34 +#include <asm/hvm/vmx/intel-iommu.h>
   24.35 +#include "dmar.h"
   24.36 +#include "pci-direct.h"
   24.37 +#include "pci_regs.h"
   24.38 +#include "msi.h"
   24.39 +
   24.40 +extern void print_iommu_regs(struct acpi_drhd_unit *drhd);
   24.41 +extern void print_vtd_entries(struct domain *d, int bus, int devfn,
   24.42 +                       unsigned long gmfn);
   24.43 +
   24.44 +#define DMAR_OPERATION_TIMEOUT (HZ*60) /* 1m */
   24.45 +
   24.46 +#define time_after(a,b)         \
   24.47 +        (typecheck(unsigned long, a) && \
   24.48 +         typecheck(unsigned long, b) && \
   24.49 +         ((long)(b) - (long)(a) < 0))
   24.50 +
   24.51 +unsigned int x86_clflush_size;
   24.52 +void clflush_cache_range(void *adr, int size)
   24.53 +{
   24.54 +    int i;
   24.55 +    for (i = 0; i < size; i += x86_clflush_size)
   24.56 +        clflush(adr + i);
   24.57 +}
   24.58 +
   24.59 +static void __iommu_flush_cache(struct iommu *iommu, void *addr, int size)
   24.60 +{
   24.61 +    if (!ecap_coherent(iommu->ecap))
   24.62 +        clflush_cache_range(addr, size);
   24.63 +}
   24.64 +
   24.65 +#define iommu_flush_cache_entry(iommu, addr) \
   24.66 +       __iommu_flush_cache(iommu, addr, 8)
   24.67 +#define iommu_flush_cache_page(iommu, addr) \
   24.68 +       __iommu_flush_cache(iommu, addr, PAGE_SIZE_4K)
   24.69 +
   24.70 +int nr_iommus;
   24.71 +/* context entry handling */
   24.72 +static struct context_entry * device_to_context_entry(struct iommu *iommu,
   24.73 +        u8 bus, u8 devfn)
   24.74 +{
   24.75 +    struct root_entry *root;
   24.76 +    struct context_entry *context;
   24.77 +    unsigned long phy_addr;
   24.78 +    unsigned long flags;
   24.79 +
   24.80 +    spin_lock_irqsave(&iommu->lock, flags);
   24.81 +    root = &iommu->root_entry[bus];
   24.82 +    if (!root_present(*root)) {
   24.83 +        phy_addr = (unsigned long) alloc_xenheap_page();
   24.84 +        if (!phy_addr) {
   24.85 +            spin_unlock_irqrestore(&iommu->lock, flags);
   24.86 +            return NULL;
   24.87 +        }
   24.88 +        memset((void *) phy_addr, 0, PAGE_SIZE);
   24.89 +        iommu_flush_cache_page(iommu, (void *)phy_addr);
   24.90 +        phy_addr = virt_to_maddr((void *)phy_addr);
   24.91 +        set_root_value(*root, phy_addr);
   24.92 +        set_root_present(*root);
   24.93 +        iommu_flush_cache_entry(iommu, root);
   24.94 +    }
   24.95 +    phy_addr = (unsigned long) get_context_addr(*root);
   24.96 +    context = (struct context_entry *)maddr_to_virt(phy_addr);
   24.97 +    spin_unlock_irqrestore(&iommu->lock, flags);
   24.98 +    return &context[devfn];
   24.99 +}
  24.100 +
  24.101 +static int device_context_mapped(struct iommu *iommu, u8 bus, u8 devfn)
  24.102 +{
  24.103 +    struct root_entry *root;
  24.104 +    struct context_entry *context;
  24.105 +    unsigned long phy_addr;
  24.106 +    int ret;
  24.107 +    unsigned long flags;
  24.108 +
  24.109 +    spin_lock_irqsave(&iommu->lock, flags);
  24.110 +    root = &iommu->root_entry[bus];
  24.111 +    if (!root_present(*root)) {
  24.112 +        ret = 0;
  24.113 +        goto out;
  24.114 +    }
  24.115 +    phy_addr = get_context_addr(*root);
  24.116 +    context = (struct context_entry *)maddr_to_virt(phy_addr);
  24.117 +    ret = context_present(context[devfn]);
  24.118 +out:
  24.119 +    spin_unlock_irqrestore(&iommu->lock, flags);
  24.120 +    return ret;
  24.121 +}
  24.122 +
  24.123 +/* page table handling */
  24.124 +#define LEVEL_STRIDE        (9)
  24.125 +#define LEVEL_MASK        ((1 << LEVEL_STRIDE) - 1)
  24.126 +#define agaw_to_level(val) ((val) + 2)
  24.127 +#define agaw_to_width(val) (30 + val * LEVEL_STRIDE)
  24.128 +#define width_to_agaw(w)  ((w - 30)/LEVEL_STRIDE)
  24.129 +#define level_to_offset_bits(l) (12 + (l - 1) * LEVEL_STRIDE)
  24.130 +#define address_level_offset(addr, level) \
  24.131 +    ((addr >> level_to_offset_bits(level)) & LEVEL_MASK)
  24.132 +#define level_mask(l) (((u64)(-1)) << level_to_offset_bits(l))
  24.133 +#define level_size(l) (1 << level_to_offset_bits(l))
  24.134 +#define align_to_level(addr, l) ((addr + level_size(l) - 1) & level_mask(l))
  24.135 +static struct dma_pte * addr_to_dma_pte(struct domain *domain, u64 addr)
  24.136 +{
  24.137 +    struct hvm_iommu *hd = domain_hvm_iommu(domain);
  24.138 +    struct acpi_drhd_unit *drhd;
  24.139 +    struct iommu *iommu;
  24.140 +    int addr_width = agaw_to_width(hd->agaw);
  24.141 +    struct dma_pte *parent, *pte = NULL, *pgd;
  24.142 +    int level = agaw_to_level(hd->agaw);
  24.143 +    int offset;
  24.144 +    unsigned long flags;
  24.145 +
  24.146 +    drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
  24.147 +    iommu = drhd->iommu;
  24.148 +
  24.149 +    addr &= (((u64)1) << addr_width) - 1;
  24.150 +    spin_lock_irqsave(&hd->mapping_lock, flags);
  24.151 +    if (!hd->pgd) {
  24.152 +        pgd = (struct dma_pte *)alloc_xenheap_page();
  24.153 +        if (!pgd && !hd->pgd) {
  24.154 +            spin_unlock_irqrestore(&hd->mapping_lock, flags);
  24.155 +            return NULL;
  24.156 +        }
  24.157 +        memset((u8*)pgd, 0, PAGE_SIZE);
  24.158 +        if (!hd->pgd)
  24.159 +            hd->pgd = pgd;
  24.160 +        else /* somebody is fast */
  24.161 +            free_xenheap_page((void *) pgd);
  24.162 +    }
  24.163 +    parent = hd->pgd;
  24.164 +    while (level > 0) {
  24.165 +        u8 *tmp;
  24.166 +        offset = address_level_offset(addr, level);
  24.167 +        pte = &parent[offset];
  24.168 +        if (level == 1)
  24.169 +            break;
  24.170 +        if (dma_pte_addr(*pte) == 0) {
  24.171 +            tmp = alloc_xenheap_page();
  24.172 +            if (tmp == NULL)
  24.173 +                gdprintk(XENLOG_ERR VTDPREFIX,
  24.174 +                    "addr_to_dma_pte: tmp == NULL\n");
  24.175 + 
  24.176 +            memset(tmp, 0, PAGE_SIZE);
  24.177 +            iommu_flush_cache_page(iommu, tmp);
  24.178 +
  24.179 +            if (!tmp && dma_pte_addr(*pte) == 0) {
  24.180 +                spin_unlock_irqrestore(&hd->mapping_lock, flags);
  24.181 +                return NULL;
  24.182 +            }
  24.183 +            if (dma_pte_addr(*pte) == 0) {
  24.184 +                dma_set_pte_addr(*pte,
  24.185 +                    virt_to_maddr(tmp));
  24.186 +                /*
  24.187 +                 * high level table always sets r/w, last level
  24.188 +                 * page table control read/write
  24.189 +                 */
  24.190 +                dma_set_pte_readable(*pte);
  24.191 +                dma_set_pte_writable(*pte);
  24.192 +                iommu_flush_cache_entry(iommu, pte);
  24.193 +            } else /* somebody is fast */
  24.194 +                free_xenheap_page(tmp);
  24.195 +        }
  24.196 +        parent = maddr_to_virt(dma_pte_addr(*pte));
  24.197 +        level--;
  24.198 +    }
  24.199 +    spin_unlock_irqrestore(&hd->mapping_lock, flags);
  24.200 +    return pte;
  24.201 +}
  24.202 +
  24.203 +/* return address's pte at specific level */
  24.204 +static struct dma_pte *dma_addr_level_pte(struct domain *domain, u64 addr,
  24.205 +        int level)
  24.206 +{
  24.207 +    struct hvm_iommu *hd = domain_hvm_iommu(domain);
  24.208 +    struct dma_pte *parent, *pte = NULL;
  24.209 +    int total = agaw_to_level(hd->agaw);
  24.210 +    int offset;
  24.211 +
  24.212 +    parent = hd->pgd;
  24.213 +    while (level <= total) {
  24.214 +        offset = address_level_offset(addr, total);
  24.215 +        pte = &parent[offset];
  24.216 +        if (level == total)
  24.217 +            return pte;
  24.218 +
  24.219 +        if (dma_pte_addr(*pte) == 0)
  24.220 +            break;
  24.221 +        parent = maddr_to_virt(dma_pte_addr(*pte));
  24.222 +        total--;
  24.223 +    }
  24.224 +    return NULL;
  24.225 +}
  24.226 +
  24.227 +static void iommu_flush_write_buffer(struct iommu *iommu)
  24.228 +{
  24.229 +	u32 val;
  24.230 +	unsigned long flag;
  24.231 +	unsigned long start_time;
  24.232 +
  24.233 +	if (!cap_rwbf(iommu->cap))
  24.234 +		return;
  24.235 +	val = iommu->gcmd | DMA_GCMD_WBF;
  24.236 +
  24.237 +	spin_lock_irqsave(&iommu->register_lock, flag);
  24.238 +	dmar_writel(iommu->reg, DMAR_GCMD_REG, val);
  24.239 +
  24.240 +	/* Make sure hardware complete it */
  24.241 +	start_time = jiffies;
  24.242 +	while (1) {
  24.243 +		val = dmar_readl(iommu->reg, DMAR_GSTS_REG);
  24.244 +		if (!(val & DMA_GSTS_WBFS))
  24.245 +			break;
  24.246 +		if (time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT))
  24.247 +			panic("DMAR hardware is malfunctional, please disable IOMMU\n");
  24.248 +		cpu_relax();
  24.249 +	}
  24.250 +	spin_unlock_irqrestore(&iommu->register_lock, flag);
  24.251 +}
  24.252 +
  24.253 +/* return value determine if we need a write buffer flush */
  24.254 +static int __iommu_flush_context(struct iommu *iommu,
  24.255 +	u16 did, u16 source_id, u8 function_mask, u64 type,
  24.256 +	int non_present_entry_flush)
  24.257 +{
  24.258 +	u64 val = 0;
  24.259 +	unsigned long flag;
  24.260 +	unsigned long start_time;
  24.261 +
  24.262 +	/*
  24.263 +	 * In the non-present entry flush case, if hardware doesn't cache
  24.264 +	 * non-present entry we do nothing and if hardware cache non-present
  24.265 +	 * entry, we flush entries of domain 0 (the domain id is used to cache
  24.266 +	 * any non-present entries)
  24.267 +	 */
  24.268 +	if (non_present_entry_flush) {
  24.269 +		if (!cap_caching_mode(iommu->cap))
  24.270 +			return 1;
  24.271 +		else
  24.272 +			did = 0;
  24.273 +	}
  24.274 +
  24.275 +        /* use register invalidation */
  24.276 +        switch (type)
  24.277 +        {
  24.278 +            case DMA_CCMD_GLOBAL_INVL:
  24.279 +                val = DMA_CCMD_GLOBAL_INVL;
  24.280 +                break;
  24.281 +            case DMA_CCMD_DOMAIN_INVL:
  24.282 +                val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
  24.283 +                break;
  24.284 +            case DMA_CCMD_DEVICE_INVL:
  24.285 +                val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
  24.286 +                  |DMA_CCMD_SID(source_id)|DMA_CCMD_FM(function_mask);
  24.287 +                break;
  24.288 +            default:
  24.289 +                BUG();
  24.290 +        }
  24.291 +        val |= DMA_CCMD_ICC;
  24.292 +
  24.293 +        spin_lock_irqsave(&iommu->register_lock, flag);
  24.294 +        dmar_writeq(iommu->reg, DMAR_CCMD_REG, val);
  24.295 +
  24.296 +        /* Make sure hardware complete it */
  24.297 +        start_time = jiffies;
  24.298 +        while (1) {
  24.299 +            val = dmar_readq(iommu->reg, DMAR_CCMD_REG);
  24.300 +            if (!(val & DMA_CCMD_ICC))
  24.301 +                break;
  24.302 +            if (time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT))
  24.303 +                panic("DMAR hardware is malfunctional, please disable IOMMU\n");
  24.304 +            cpu_relax();
  24.305 +        }
  24.306 +        spin_unlock_irqrestore(&iommu->register_lock, flag);
  24.307 +	/* flush context entry will implictly flush write buffer */
  24.308 +	return 0;
  24.309 +}
  24.310 +
  24.311 +static int inline iommu_flush_context_global(struct iommu *iommu,
  24.312 +	int non_present_entry_flush)
  24.313 +{
  24.314 +	return __iommu_flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
  24.315 +		non_present_entry_flush);
  24.316 +}
  24.317 +
  24.318 +static int inline iommu_flush_context_domain(struct iommu *iommu, u16 did,
  24.319 +	int non_present_entry_flush)
  24.320 +{
  24.321 +	return __iommu_flush_context(iommu, did, 0, 0, DMA_CCMD_DOMAIN_INVL,
  24.322 +		non_present_entry_flush);
  24.323 +}
  24.324 +
  24.325 +static int inline iommu_flush_context_device(struct iommu *iommu,
  24.326 +	u16 did, u16 source_id, u8 function_mask, int non_present_entry_flush)
  24.327 +{
  24.328 +	return __iommu_flush_context(iommu, did, source_id, function_mask,
  24.329 +		DMA_CCMD_DEVICE_INVL, non_present_entry_flush);
  24.330 +}
  24.331 +
  24.332 +/* return value determine if we need a write buffer flush */
  24.333 +static int __iommu_flush_iotlb(struct iommu *iommu, u16 did,
  24.334 +	u64 addr, unsigned int size_order, u64 type,
  24.335 +	int non_present_entry_flush)
  24.336 +{
  24.337 +	int tlb_offset = ecap_iotlb_offset(iommu->ecap);
  24.338 +	u64 val = 0, val_iva = 0;
  24.339 +	unsigned long flag;
  24.340 +	unsigned long start_time;
  24.341 +
  24.342 +	/*
  24.343 +	 * In the non-present entry flush case, if hardware doesn't cache
  24.344 +	 * non-present entry we do nothing and if hardware cache non-present
  24.345 +	 * entry, we flush entries of domain 0 (the domain id is used to cache
  24.346 +	 * any non-present entries)
  24.347 +	 */
  24.348 +	if (non_present_entry_flush) {
  24.349 +		if (!cap_caching_mode(iommu->cap))
  24.350 +			return 1;
  24.351 +		else
  24.352 +			did = 0;
  24.353 +	}
  24.354 +
  24.355 +        /* use register invalidation */
  24.356 +        switch (type) {
  24.357 +            case DMA_TLB_GLOBAL_FLUSH:
  24.358 +                /* global flush doesn't need set IVA_REG */
  24.359 +                val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
  24.360 +                break;
  24.361 +            case DMA_TLB_DSI_FLUSH:
  24.362 +                val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
  24.363 +                break;
  24.364 +            case DMA_TLB_PSI_FLUSH:
  24.365 +                val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
  24.366 +                /* Note: always flush non-leaf currently */
  24.367 +                val_iva = size_order | addr;
  24.368 +                break;
  24.369 +            default:
  24.370 +                BUG();
  24.371 +        }
  24.372 +        /* Note: set drain read/write */
  24.373 +#if 0
  24.374 +        /*
  24.375 +         * This is probably to be super secure.. Looks like we can
  24.376 +         * ignore it without any impact.
  24.377 +         */
  24.378 +        if (cap_read_drain(iommu->cap))
  24.379 +            val |= DMA_TLB_READ_DRAIN;
  24.380 +#endif
  24.381 +        if (cap_write_drain(iommu->cap))
  24.382 +            val |= DMA_TLB_WRITE_DRAIN;
  24.383 +
  24.384 +        spin_lock_irqsave(&iommu->register_lock, flag);
  24.385 +        /* Note: Only uses first TLB reg currently */
  24.386 +        if (val_iva)
  24.387 +            dmar_writeq(iommu->reg, tlb_offset, val_iva);
  24.388 +        dmar_writeq(iommu->reg, tlb_offset + 8, val);
  24.389 +
  24.390 +        /* Make sure hardware complete it */
  24.391 +        start_time = jiffies;
  24.392 +        while (1) {
  24.393 +            val = dmar_readq(iommu->reg, tlb_offset + 8);
  24.394 +            if (!(val & DMA_TLB_IVT))
  24.395 +                break;
  24.396 +            if (time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT))
  24.397 +                panic("DMAR hardware is malfunctional, please disable IOMMU\n");
  24.398 +            cpu_relax();
  24.399 +        }
  24.400 +        spin_unlock_irqrestore(&iommu->register_lock, flag);
  24.401 +
  24.402 +        /* check IOTLB invalidation granularity */
  24.403 +        if (DMA_TLB_IAIG(val) == 0)
  24.404 +            printk(KERN_ERR VTDPREFIX "IOMMU: flush IOTLB failed\n");
  24.405 +        if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
  24.406 +            printk(KERN_ERR VTDPREFIX "IOMMU: tlb flush request %x, actual %x\n",
  24.407 +              (u32)DMA_TLB_IIRG(type), (u32)DMA_TLB_IAIG(val));
  24.408 +	/* flush context entry will implictly flush write buffer */
  24.409 +	return 0;
  24.410 +}
  24.411 +
  24.412 +static int inline iommu_flush_iotlb_global(struct iommu *iommu,
  24.413 +	int non_present_entry_flush)
  24.414 +{
  24.415 +	return __iommu_flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
  24.416 +		non_present_entry_flush);
  24.417 +}
  24.418 +
  24.419 +static int inline iommu_flush_iotlb_dsi(struct iommu *iommu, u16 did,
  24.420 +	int non_present_entry_flush)
  24.421 +{
  24.422 +	return __iommu_flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH,
  24.423 +		non_present_entry_flush);
  24.424 +}
  24.425 +
  24.426 +static int inline get_alignment(u64 base, unsigned int size)
  24.427 +{
  24.428 +	int t = 0;
  24.429 +	u64 end;
  24.430 +
  24.431 +	end = base + size - 1;
  24.432 +	while (base != end) {
  24.433 +		t++;
  24.434 +		base >>= 1;
  24.435 +		end >>= 1;
  24.436 +	}
  24.437 +	return t;
  24.438 +}
  24.439 +
  24.440 +static int inline iommu_flush_iotlb_psi(struct iommu *iommu, u16 did,
  24.441 +	u64 addr, unsigned int pages, int non_present_entry_flush)
  24.442 +{
  24.443 +	unsigned int align;
  24.444 +
  24.445 +	BUG_ON(addr & (~PAGE_MASK_4K));
  24.446 +	BUG_ON(pages == 0);
  24.447 +
  24.448 +	/* Fallback to domain selective flush if no PSI support */
  24.449 +	if (!cap_pgsel_inv(iommu->cap))
  24.450 +		return iommu_flush_iotlb_dsi(iommu, did,
  24.451 +			non_present_entry_flush);
  24.452 +
  24.453 +	/*
  24.454 +	 * PSI requires page size is 2 ^ x, and the base address is naturally
  24.455 +	 * aligned to the size
  24.456 +	 */
  24.457 +	align = get_alignment(addr >> PAGE_SHIFT_4K, pages);
  24.458 +	/* Fallback to domain selective flush if size is too big */
  24.459 +	if (align > cap_max_amask_val(iommu->cap))
  24.460 +		return iommu_flush_iotlb_dsi(iommu, did,
  24.461 +			non_present_entry_flush);
  24.462 +
  24.463 +	addr >>= PAGE_SHIFT_4K + align;
  24.464 +	addr <<= PAGE_SHIFT_4K + align;
  24.465 +
  24.466 +	return __iommu_flush_iotlb(iommu, did, addr, align,
  24.467 +		DMA_TLB_PSI_FLUSH, non_present_entry_flush);
  24.468 +}
  24.469 +
  24.470 +void flush_all(void)
  24.471 +{
  24.472 +    struct acpi_drhd_unit *drhd;
  24.473 +    struct iommu *iommu;
  24.474 +    int i = 0;
  24.475 +
  24.476 +    wbinvd();
  24.477 +    for_each_drhd_unit(drhd) {
  24.478 +        iommu = drhd->iommu;
  24.479 +        iommu_flush_context_global(iommu, 0);
  24.480 +        iommu_flush_iotlb_global(iommu, 0);
  24.481 +        i++;
  24.482 +    }
  24.483 +}
  24.484 +
  24.485 +/* clear one page's page table */
  24.486 +static void dma_pte_clear_one(struct domain *domain, u64 addr)
  24.487 +{
  24.488 +    struct acpi_drhd_unit *drhd;
  24.489 +    struct iommu *iommu;
  24.490 +    struct dma_pte *pte = NULL;
  24.491 +
  24.492 +    drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
  24.493 +
  24.494 +    /* get last level pte */
  24.495 +    pte = dma_addr_level_pte(domain, addr, 1);
  24.496 +
  24.497 +    if (pte) {
  24.498 +        dma_clear_pte(*pte);
  24.499 +        iommu_flush_cache_entry(drhd->iommu, pte);
  24.500 +
  24.501 +        for_each_drhd_unit(drhd) {
  24.502 +            iommu = drhd->iommu;
  24.503 +            if (cap_caching_mode(iommu->cap))
  24.504 +            {
  24.505 +                iommu_flush_iotlb_psi(iommu, domain->domain_id, addr, 1, 0);
  24.506 +            }
  24.507 +            else if (cap_rwbf(iommu->cap))
  24.508 +                iommu_flush_write_buffer(iommu);
  24.509 +        }
  24.510 +    }
  24.511 +}
  24.512 +
  24.513 +/* clear last level pte, a tlb flush should be followed */
  24.514 +static void dma_pte_clear_range(struct domain *domain, u64 start, u64 end)
  24.515 +{
  24.516 +    struct hvm_iommu *hd = domain_hvm_iommu(domain);
  24.517 +    int addr_width = agaw_to_width(hd->agaw);
  24.518 +
  24.519 +    start &= (((u64)1) << addr_width) - 1;
  24.520 +    end &= (((u64)1) << addr_width) - 1;
  24.521 +    /* in case it's partial page */
  24.522 +    start = PAGE_ALIGN_4K(start);
  24.523 +    end &= PAGE_MASK_4K;
  24.524 +
  24.525 +    /* we don't need lock here, nobody else touches the iova range */
  24.526 +    while (start < end) {
  24.527 +        dma_pte_clear_one(domain, start);
  24.528 +        start += PAGE_SIZE_4K;
  24.529 +    }
  24.530 +}
  24.531 +
  24.532 +/* free page table pages. last level pte should already be cleared */
  24.533 +// static void dma_pte_free_pagetable(struct domain *domain, u64 start, u64 end)
  24.534 +void dma_pte_free_pagetable(struct domain *domain, u64 start, u64 end)
  24.535 +{
  24.536 +    struct acpi_drhd_unit *drhd;
  24.537 +    struct hvm_iommu *hd = domain_hvm_iommu(domain);
  24.538 +    struct iommu *iommu;
  24.539 +    int addr_width = agaw_to_width(hd->agaw);
  24.540 +    struct dma_pte *pte;
  24.541 +    int total = agaw_to_level(hd->agaw);
  24.542 +    int level;
  24.543 +    u32 tmp;
  24.544 +
  24.545 +    drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
  24.546 +    iommu = drhd->iommu;
  24.547 +
  24.548 +    start &= (((u64)1) << addr_width) - 1;
  24.549 +    end &= (((u64)1) << addr_width) - 1;
  24.550 +
  24.551 +    /* we don't need lock here, nobody else touches the iova range */
  24.552 +    level = 2;
  24.553 +    while (level <= total) {
  24.554 +        tmp = align_to_level(start, level);
  24.555 +        if (tmp >= end || (tmp + level_size(level) > end))
  24.556 +            return;
  24.557 +
  24.558 +        while (tmp < end) {
  24.559 +            pte = dma_addr_level_pte(domain, tmp, level);
  24.560 +            if (pte) {
  24.561 +                free_xenheap_page((void *) maddr_to_virt(dma_pte_addr(*pte)));
  24.562 +                dma_clear_pte(*pte);
  24.563 +                iommu_flush_cache_entry(iommu, pte);
  24.564 +            }
  24.565 +            tmp += level_size(level);
  24.566 +        }
  24.567 +        level++;
  24.568 +    }
  24.569 +    /* free pgd */
  24.570 +    if (start == 0 && end == ((((u64)1) << addr_width) - 1)) {
  24.571 +        free_xenheap_page((void *)hd->pgd);
  24.572 +        hd->pgd = NULL;
  24.573 +    }
  24.574 +}
  24.575 +
  24.576 +/* iommu handling */
  24.577 +static int iommu_set_root_entry(struct iommu *iommu)
  24.578 +{
  24.579 +    void *addr;
  24.580 +    u32 cmd, sts;
  24.581 +    struct root_entry *root;
  24.582 +    unsigned long flags;
  24.583 +
  24.584 +    if (iommu == NULL)
  24.585 +        gdprintk(XENLOG_ERR VTDPREFIX,
  24.586 +            "iommu_set_root_entry: iommu == NULL\n");
  24.587 +
  24.588 +    spin_lock_irqsave(&iommu->lock, flags);
  24.589 +    if (!iommu->root_entry) {
  24.590 +        spin_unlock_irqrestore(&iommu->lock, flags);
  24.591 +        root = (struct root_entry *)alloc_xenheap_page();
  24.592 +        memset((u8*)root, 0, PAGE_SIZE);
  24.593 +        iommu_flush_cache_page(iommu, root);
  24.594 +        spin_lock_irqsave(&iommu->lock, flags);
  24.595 +
  24.596 +        if (!root && !iommu->root_entry) {
  24.597 +            spin_unlock_irqrestore(&iommu->lock, flags);
  24.598 +            return -ENOMEM;
  24.599 +        }
  24.600 +
  24.601 +        if (!iommu->root_entry)
  24.602 +            iommu->root_entry = root;
  24.603 +        else /* somebody is fast */
  24.604 +            free_xenheap_page((void *)root);
  24.605 +    }
  24.606 +    spin_unlock_irqrestore(&iommu->lock, flags);
  24.607 +
  24.608 +    addr = iommu->root_entry;
  24.609 +    spin_lock_irqsave(&iommu->register_lock, flags);
  24.610 +    dmar_writeq(iommu->reg, DMAR_RTADDR_REG, virt_to_maddr(addr));
  24.611 +    cmd = iommu->gcmd | DMA_GCMD_SRTP;
  24.612 +    dmar_writel(iommu->reg, DMAR_GCMD_REG, cmd);
  24.613 +
  24.614 +    /* Make sure hardware complete it */
  24.615 +    while (1) {
  24.616 +        sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
  24.617 +        if (sts & DMA_GSTS_RTPS)
  24.618 +            break;
  24.619 +        cpu_relax();
  24.620 +    }
  24.621 +    spin_unlock_irqrestore(&iommu->register_lock, flags);
  24.622 +
  24.623 +    return 0;
  24.624 +}
  24.625 +
  24.626 +static int iommu_enable_translation(struct iommu *iommu)
  24.627 +{
  24.628 +    u32 sts;
  24.629 +    unsigned long flags;
  24.630 +
  24.631 +    dprintk(XENLOG_INFO VTDPREFIX,
  24.632 +        "iommu_enable_translation: enabling vt-d translation\n");
  24.633 +    spin_lock_irqsave(&iommu->register_lock, flags);
  24.634 +    iommu->gcmd |= DMA_GCMD_TE;
  24.635 +    dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd);
  24.636 +    /* Make sure hardware complete it */
  24.637 +    while (1) {
  24.638 +        sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
  24.639 +        if (sts & DMA_GSTS_TES) {
  24.640 +            break;
  24.641 +        }
  24.642 +        cpu_relax();
  24.643 +    }
  24.644 +    spin_unlock_irqrestore(&iommu->register_lock, flags);
  24.645 +    return 0;
  24.646 +}
  24.647 +
  24.648 +int iommu_disable_translation(struct iommu *iommu)
  24.649 +{
  24.650 +    u32 sts;
  24.651 +    unsigned long flags;
  24.652 +
  24.653 +    spin_lock_irqsave(&iommu->register_lock, flags);
  24.654 +    iommu->gcmd &= ~ DMA_GCMD_TE;
  24.655 +    dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd);
  24.656 +
  24.657 +    /* Make sure hardware complete it */
  24.658 +    while(1) {
  24.659 +        sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
  24.660 +        if (!(sts & DMA_GSTS_TES))
  24.661 +                break;
  24.662 +        cpu_relax();
  24.663 +    }
  24.664 +    spin_unlock_irqrestore(&iommu->register_lock, flags);
  24.665 +    return 0;
  24.666 +}
  24.667 +
  24.668 +static struct iommu *vector_to_iommu[NR_VECTORS];
  24.669 +static int iommu_page_fault_do_one(struct iommu *iommu, int type,
  24.670 +        u8 fault_reason, u16 source_id, u32 addr)
  24.671 +{
  24.672 +    dprintk(XENLOG_WARNING VTDPREFIX,
  24.673 +        "iommu_page_fault:%s: DEVICE %x:%x.%x addr %x REASON %x\n",
  24.674 +        (type ? "DMA Read" : "DMA Write"),
  24.675 +        (source_id >> 8), PCI_SLOT(source_id & 0xFF),
  24.676 +        PCI_FUNC(source_id & 0xFF), addr, fault_reason);
  24.677 +
  24.678 +    print_vtd_entries(current->domain, (source_id >> 8),(source_id & 0xff),
  24.679 +                      (addr >> PAGE_SHIFT)); 
  24.680 +    return 0;
  24.681 +}
  24.682 +
  24.683 +#define PRIMARY_FAULT_REG_LEN (16)
  24.684 +static void iommu_page_fault(int vector, void *dev_id,
  24.685 +        struct cpu_user_regs *regs)
  24.686 +{
  24.687 +    struct iommu *iommu = dev_id;
  24.688 +    int reg, fault_index;
  24.689 +    u32 fault_status;
  24.690 +    unsigned long flags;
  24.691 +
  24.692 +    dprintk(XENLOG_WARNING VTDPREFIX,
  24.693 +        "iommu_page_fault: iommu->reg = %p\n", iommu->reg);
  24.694 +
  24.695 +    spin_lock_irqsave(&iommu->register_lock, flags);
  24.696 +    fault_status = dmar_readl(iommu->reg, DMAR_FSTS_REG);
  24.697 +    spin_unlock_irqrestore(&iommu->register_lock, flags);
  24.698 +
  24.699 +    /* FIXME: ignore advanced fault log */
  24.700 +    if (!(fault_status & DMA_FSTS_PPF))
  24.701 +        return;
  24.702 +    fault_index = dma_fsts_fault_record_index(fault_status);
  24.703 +    reg = cap_fault_reg_offset(iommu->cap);
  24.704 +    while (1) {
  24.705 +        u8 fault_reason;
  24.706 +        u16 source_id;
  24.707 +        u32 guest_addr;
  24.708 +        int type;
  24.709 +        u32 data;
  24.710 +
  24.711 +        /* highest 32 bits */
  24.712 +        spin_lock_irqsave(&iommu->register_lock, flags);
  24.713 +        data = dmar_readl(iommu->reg, reg +
  24.714 +                fault_index * PRIMARY_FAULT_REG_LEN + 12);
  24.715 +        if (!(data & DMA_FRCD_F)) {
  24.716 +            spin_unlock_irqrestore(&iommu->register_lock, flags);
  24.717 +            break;
  24.718 +        }
  24.719 +
  24.720 +        fault_reason = dma_frcd_fault_reason(data);
  24.721 +        type = dma_frcd_type(data);
  24.722 +
  24.723 +        data = dmar_readl(iommu->reg, reg +
  24.724 +                fault_index * PRIMARY_FAULT_REG_LEN + 8);
  24.725 +        source_id = dma_frcd_source_id(data);
  24.726 +
  24.727 +        guest_addr = dmar_readq(iommu->reg, reg +
  24.728 +                fault_index * PRIMARY_FAULT_REG_LEN);
  24.729 +        guest_addr = dma_frcd_page_addr(guest_addr);
  24.730 +        /* clear the fault */
  24.731 +        dmar_writel(iommu->reg, reg +
  24.732 +            fault_index * PRIMARY_FAULT_REG_LEN + 12, DMA_FRCD_F);
  24.733 +        spin_unlock_irqrestore(&iommu->register_lock, flags);
  24.734 +
  24.735 +        iommu_page_fault_do_one(iommu, type, fault_reason,
  24.736 +                source_id, guest_addr);
  24.737 +
  24.738 +        fault_index++;
  24.739 +        if (fault_index > cap_num_fault_regs(iommu->cap))
  24.740 +            fault_index = 0;
  24.741 +    }
  24.742 +    /* clear primary fault overflow */
  24.743 +    if (fault_status & DMA_FSTS_PFO) {
  24.744 +        spin_lock_irqsave(&iommu->register_lock, flags);
  24.745 +        dmar_writel(iommu->reg, DMAR_FSTS_REG, DMA_FSTS_PFO);
  24.746 +        spin_unlock_irqrestore(&iommu->register_lock, flags);
  24.747 +    }
  24.748 +    return;
  24.749 +}
  24.750 +
  24.751 +static void dma_msi_unmask(unsigned int vector)
  24.752 +{
  24.753 +    struct iommu *iommu = vector_to_iommu[vector];
  24.754 +    unsigned long flags;
  24.755 +
  24.756 +    /* unmask it */
  24.757 +    spin_lock_irqsave(&iommu->register_lock, flags);
  24.758 +    dmar_writel(iommu->reg, DMAR_FECTL_REG, 0);
  24.759 +    spin_unlock_irqrestore(&iommu->register_lock, flags);
  24.760 +}
  24.761 +
  24.762 +static void dma_msi_mask(unsigned int vector)
  24.763 +{
  24.764 +    unsigned long flags;
  24.765 +    struct iommu *iommu = vector_to_iommu[vector];
  24.766 +
  24.767 +    /* mask it */
  24.768 +    spin_lock_irqsave(&iommu->register_lock, flags);
  24.769 +    dmar_writel(iommu->reg, DMAR_FECTL_REG, DMA_FECTL_IM);
  24.770 +    spin_unlock_irqrestore(&iommu->register_lock, flags);
  24.771 +}
  24.772 +
  24.773 +static unsigned int dma_msi_startup(unsigned int vector)
  24.774 +{
  24.775 +    dma_msi_unmask(vector);
  24.776 +    return 0;
  24.777 +}
  24.778 +
  24.779 +static void dma_msi_end(unsigned int vector)
  24.780 +{
  24.781 +    dma_msi_unmask(vector);
  24.782 +    ack_APIC_irq();
  24.783 +}
  24.784 +
  24.785 +static void dma_msi_data_init(struct iommu *iommu, int vector)
  24.786 +{
  24.787 +    u32 msi_data = 0;
  24.788 +    unsigned long flags;
  24.789 +
  24.790 +    /* Fixed, edge, assert mode. Follow MSI setting */
  24.791 +    msi_data |= vector & 0xff;
  24.792 +    msi_data |= 1 << 14;
  24.793 +
  24.794 +    spin_lock_irqsave(&iommu->register_lock, flags);
  24.795 +    dmar_writel(iommu->reg, DMAR_FEDATA_REG, msi_data);
  24.796 +    spin_unlock_irqrestore(&iommu->register_lock, flags);
  24.797 +}
  24.798 +
  24.799 +static void dma_msi_addr_init(struct iommu *iommu, int phy_cpu)
  24.800 +{
  24.801 +    u64 msi_address;
  24.802 +    unsigned long flags;
  24.803 +
  24.804 +    /* Physical, dedicated cpu. Follow MSI setting */
  24.805 +    msi_address = (MSI_ADDRESS_HEADER << (MSI_ADDRESS_HEADER_SHIFT + 8));
  24.806 +    msi_address |= MSI_PHYSICAL_MODE << 2;
  24.807 +    msi_address |= MSI_REDIRECTION_HINT_MODE << 3;
  24.808 +    msi_address |= phy_cpu << MSI_TARGET_CPU_SHIFT;
  24.809 +
  24.810 +    spin_lock_irqsave(&iommu->register_lock, flags);
  24.811 +    dmar_writel(iommu->reg, DMAR_FEADDR_REG, (u32)msi_address);
  24.812 +    dmar_writel(iommu->reg, DMAR_FEUADDR_REG, (u32)(msi_address >> 32));
  24.813 +    spin_unlock_irqrestore(&iommu->register_lock, flags);
  24.814 +}
  24.815 +
  24.816 +static void dma_msi_set_affinity(unsigned int vector, cpumask_t dest)
  24.817 +{
  24.818 +    struct iommu *iommu = vector_to_iommu[vector];
  24.819 +    dma_msi_addr_init(iommu, cpu_physical_id(first_cpu(dest)));
  24.820 +}
  24.821 +
  24.822 +static struct hw_interrupt_type dma_msi_type = {
  24.823 +    .typename = "DMA_MSI",
  24.824 +    .startup = dma_msi_startup,
  24.825 +    .shutdown = dma_msi_mask,
  24.826 +    .enable = dma_msi_unmask,
  24.827 +    .disable = dma_msi_mask,
  24.828 +    .ack = dma_msi_mask,
  24.829 +    .end = dma_msi_end,
  24.830 +    .set_affinity = dma_msi_set_affinity,
  24.831 +};
  24.832 +
  24.833 +int iommu_set_interrupt(struct iommu *iommu)
  24.834 +{
  24.835 +    int vector, ret;
  24.836 +    unsigned long flags;
  24.837 +
  24.838 +    vector = assign_irq_vector(AUTO_ASSIGN);
  24.839 +    vector_to_iommu[vector] = iommu;
  24.840 +
  24.841 +    /* VT-d fault is a MSI, make irq == vector */
  24.842 +    irq_vector[vector] = vector;
  24.843 +    vector_irq[vector] = vector;
  24.844 +
  24.845 +    if (!vector) {
  24.846 +        gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: no vectors\n");
  24.847 +        return -EINVAL;
  24.848 +    }
  24.849 +
  24.850 +    spin_lock_irqsave(&irq_desc[vector].lock, flags);
  24.851 +    irq_desc[vector].handler = &dma_msi_type;
  24.852 +    spin_unlock_irqrestore(&irq_desc[vector].lock, flags);
  24.853 +    set_intr_gate(vector, interrupt[vector]);
  24.854 +    ret = request_irq(vector, iommu_page_fault, 0, "dmar", iommu);
  24.855 +    if (ret)
  24.856 +        gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: can't request irq\n");
  24.857 +    return vector;
  24.858 +}
  24.859 +
  24.860 +struct iommu *iommu_alloc(void *hw_data)
  24.861 +{
  24.862 +    struct acpi_drhd_unit *drhd = (struct acpi_drhd_unit *) hw_data;
  24.863 +    struct iommu *iommu;
  24.864 +    
  24.865 +    if (nr_iommus > MAX_IOMMUS) {
  24.866 +        gdprintk(XENLOG_ERR VTDPREFIX,
  24.867 +            "IOMMU: nr_iommus %d > MAX_IOMMUS\n", nr_iommus);
  24.868 +        return NULL;
  24.869 +    }
  24.870 +        
  24.871 +    iommu = xmalloc(struct iommu);
  24.872 +    if (!iommu)
  24.873 +        return NULL;
  24.874 +    memset(iommu, 0, sizeof(struct iommu));
  24.875 +
  24.876 +    set_fixmap_nocache(FIX_IOMMU_REGS_BASE_0 + nr_iommus, drhd->address);
  24.877 +    iommu->reg = (void *) fix_to_virt(FIX_IOMMU_REGS_BASE_0 + nr_iommus);
  24.878 +    dprintk(XENLOG_INFO VTDPREFIX,
  24.879 +        "iommu_alloc: iommu->reg = %p drhd->address = %lx\n",
  24.880 +        iommu->reg, drhd->address);
  24.881 +    nr_iommus++;
  24.882 +
  24.883 +    if (!iommu->reg) {
  24.884 +        printk(KERN_ERR VTDPREFIX "IOMMU: can't mapping the region\n");
  24.885 +        goto error;
  24.886 +    }
  24.887 +
  24.888 +    iommu->cap = dmar_readq(iommu->reg, DMAR_CAP_REG);
  24.889 +    iommu->ecap = dmar_readq(iommu->reg, DMAR_ECAP_REG);
  24.890 +
  24.891 +    spin_lock_init(&iommu->lock);
  24.892 +    spin_lock_init(&iommu->register_lock);
  24.893 +
  24.894 +    drhd->iommu = iommu;
  24.895 +    return iommu;
  24.896 +error:
  24.897 +    xfree(iommu);
  24.898 +    return NULL;
  24.899 +}
  24.900 +
  24.901 +static void free_iommu(struct iommu *iommu)
  24.902 +{
  24.903 +    if (!iommu)
  24.904 +        return;
  24.905 +    if (iommu->root_entry)
  24.906 +        free_xenheap_page((void *)iommu->root_entry);
  24.907 +    if (iommu->reg)
  24.908 +        iounmap(iommu->reg);
  24.909 +    free_irq(iommu->vector);
  24.910 +    xfree(iommu);
  24.911 +}
  24.912 +
  24.913 +#define guestwidth_to_adjustwidth(gaw) ({ \
  24.914 +    int agaw; \
  24.915 +    int r = (gaw - 12) % 9; \
  24.916 +    if (r == 0) \
  24.917 +        agaw = gaw; \
  24.918 +    else \
  24.919 +        agaw = gaw + 9 - r; \
  24.920 +    if (agaw > 64) \
  24.921 +        agaw = 64; \
  24.922 +    agaw; })
  24.923 +int iommu_domain_init(struct domain *domain)
  24.924 +{
  24.925 +    struct hvm_iommu *hd = domain_hvm_iommu(domain);
  24.926 +    struct iommu *iommu = NULL;
  24.927 +    int guest_width = DEFAULT_DOMAIN_ADDRESS_WIDTH;
  24.928 +    int adjust_width, agaw;
  24.929 +    unsigned long sagaw;
  24.930 +    struct acpi_drhd_unit *drhd;
  24.931 +
  24.932 +    if (list_empty(&acpi_drhd_units))
  24.933 +        return 0;
  24.934 +    spin_lock_init(&hd->mapping_lock);
  24.935 +    spin_lock_init(&hd->iommu_list_lock);
  24.936 +    INIT_LIST_HEAD(&hd->pdev_list);
  24.937 +    INIT_LIST_HEAD(&hd->g2m_ioport_list);
  24.938 +
  24.939 +    for_each_drhd_unit(drhd) {
  24.940 +        if (drhd->iommu)
  24.941 +            iommu = drhd->iommu;
  24.942 +        else
  24.943 +            iommu = iommu_alloc(drhd);
  24.944 +    }
  24.945 +
  24.946 +    /* calculate AGAW */
  24.947 +    if (guest_width > cap_mgaw(iommu->cap))
  24.948 +        guest_width = cap_mgaw(iommu->cap);
  24.949 +    adjust_width = guestwidth_to_adjustwidth(guest_width);
  24.950 +    agaw = width_to_agaw(adjust_width);
  24.951 +    /* FIXME: hardware doesn't support it, choose a bigger one? */
  24.952 +    sagaw = cap_sagaw(iommu->cap);
  24.953 +    if (!test_bit(agaw, &sagaw)) {
  24.954 +        gdprintk(XENLOG_ERR VTDPREFIX,
  24.955 +            "IOMMU: hardware doesn't support the agaw\n");
  24.956 +        agaw = find_next_bit(&sagaw, 5, agaw);
  24.957 +        if (agaw >= 5)
  24.958 +            return -ENODEV;
  24.959 +    }
  24.960 +    hd->agaw = agaw;
  24.961 +    return 0;
  24.962 +}
  24.963 +
  24.964 +static int domain_context_mapping_one(
  24.965 +    struct domain *domain,
  24.966 +    struct iommu *iommu,
  24.967 +    u8 bus, u8 devfn)
  24.968 +{
  24.969 +    struct hvm_iommu *hd = domain_hvm_iommu(domain);
  24.970 +    struct context_entry *context;
  24.971 +    unsigned long flags;
  24.972 +    int ret = 0;
  24.973 +
  24.974 +    context = device_to_context_entry(iommu, bus, devfn);
  24.975 +    if (!context) {
  24.976 +        gdprintk(XENLOG_INFO VTDPREFIX,
  24.977 +            "domain_context_mapping_one:context == NULL:bdf = %x:%x:%x \n",
  24.978 +            bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
  24.979 +        return -ENOMEM;
  24.980 +    }
  24.981 +    spin_lock_irqsave(&iommu->lock, flags);
  24.982 +    if (context_present(*context)) {
  24.983 +        spin_unlock_irqrestore(&iommu->lock, flags);
  24.984 +        gdprintk(XENLOG_INFO VTDPREFIX,
  24.985 +                 "domain_context_mapping_one:context present:bdf=%x:%x:%x\n",
  24.986 +                 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
  24.987 +        return 0;
  24.988 +    }
  24.989 +
  24.990 +#ifdef VTD_DEBUG
  24.991 +    dprintk(XENLOG_INFO VTDPREFIX,
  24.992 +        "context_mapping_one_1-%x:%x:%x-*context = %lx %lx\n",
  24.993 +        bus, PCI_SLOT(devfn), PCI_FUNC(devfn), context->hi, context->lo);
  24.994 +#endif
  24.995 +
  24.996 +    /*
  24.997 +     * domain_id 0 is not valid on Intel's IOMMU, force domain_id to
  24.998 +     * be 1 based as required by intel's iommu hw.
  24.999 +     */
 24.1000 +    context_set_domain_id(*context, domain->domain_id);
 24.1001 +    context_set_address_width(*context, hd->agaw);
 24.1002 +
 24.1003 +    if (ecap_pass_thru(iommu->ecap))
 24.1004 +        context_set_translation_type(*context, CONTEXT_TT_PASS_THRU);
 24.1005 +    else {
 24.1006 +        context_set_address_root(*context, virt_to_maddr(hd->pgd));
 24.1007 +        context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
 24.1008 +    }
 24.1009 +
 24.1010 +    context_set_fault_enable(*context);
 24.1011 +    context_set_present(*context);
 24.1012 +    iommu_flush_cache_entry(iommu, context);
 24.1013 +
 24.1014 +#ifdef VTD_DEBUG
 24.1015 +    dprintk(XENLOG_INFO VTDPREFIX,
 24.1016 +        "context_mapping_one_2-%x:%x:%x-*context=%lx %lx hd->pgd = %p\n",
 24.1017 +        bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
 24.1018 +        context->hi, context->lo, hd->pgd);
 24.1019 +#endif
 24.1020 +
 24.1021 +    if (iommu_flush_context_device(iommu, domain->domain_id,
 24.1022 +                    (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT, 1))
 24.1023 +        iommu_flush_write_buffer(iommu);
 24.1024 +    else
 24.1025 +        iommu_flush_iotlb_dsi(iommu, domain->domain_id, 0);
 24.1026 +    spin_unlock_irqrestore(&iommu->lock, flags);
 24.1027 +    return ret;
 24.1028 +}
 24.1029 +
 24.1030 +static int __pci_find_next_cap(u8 bus, unsigned int devfn, u8 pos, int cap)
 24.1031 +{
 24.1032 +    u8 id;
 24.1033 +    int ttl = 48;
 24.1034 +
 24.1035 +    while (ttl--) {
 24.1036 +        pos = read_pci_config_byte(bus, PCI_SLOT(devfn), PCI_FUNC(devfn), pos);
 24.1037 +        if (pos < 0x40)
 24.1038 +            break;
 24.1039 +        pos &= ~3;
 24.1040 +        id = read_pci_config_byte(bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
 24.1041 +                 pos + PCI_CAP_LIST_ID);
 24.1042 +
 24.1043 +        if (id == 0xff)
 24.1044 +            break;
 24.1045 +        if (id == cap)
 24.1046 +            return pos;
 24.1047 +        pos += PCI_CAP_LIST_NEXT;
 24.1048 +    }
 24.1049 +    return 0;
 24.1050 +}
 24.1051 +
 24.1052 +#define PCI_BASE_CLASS_BRIDGE    0x06
 24.1053 +#define PCI_CLASS_BRIDGE_PCI     0x0604
 24.1054 +
 24.1055 +#define DEV_TYPE_PCIe_ENDPOINT   1
 24.1056 +#define DEV_TYPE_PCI_BRIDGE      2
 24.1057 +#define DEV_TYPE_PCI             3
 24.1058 +
 24.1059 +int pdev_type(struct pci_dev *dev)
 24.1060 +{
 24.1061 +    u16 class_device;
 24.1062 +    u16 status;
 24.1063 +
 24.1064 +    class_device = read_pci_config_16(dev->bus, PCI_SLOT(dev->devfn),
 24.1065 +                 PCI_FUNC(dev->devfn), PCI_CLASS_DEVICE);
 24.1066 +    if (class_device == PCI_CLASS_BRIDGE_PCI)
 24.1067 +        return DEV_TYPE_PCI_BRIDGE;
 24.1068 +
 24.1069 +    status = read_pci_config_16(dev->bus, PCI_SLOT(dev->devfn),
 24.1070 +                 PCI_FUNC(dev->devfn), PCI_STATUS);
 24.1071 +
 24.1072 +    if (!(status & PCI_STATUS_CAP_LIST))
 24.1073 +        return DEV_TYPE_PCI;
 24.1074 +
 24.1075 +    if (__pci_find_next_cap(dev->bus, dev->devfn, PCI_CAPABILITY_LIST, PCI_CAP_ID_EXP))
 24.1076 +        return DEV_TYPE_PCIe_ENDPOINT;
 24.1077 +
 24.1078 +    return DEV_TYPE_PCI;
 24.1079 +}
 24.1080 +
 24.1081 +#define MAX_BUSES 256
 24.1082 +struct pci_dev bus2bridge[MAX_BUSES];
 24.1083 +
 24.1084 +static int domain_context_mapping(
 24.1085 +    struct domain *domain,
 24.1086 +    struct iommu *iommu,
 24.1087 +    struct pci_dev *pdev)
 24.1088 +{
 24.1089 +    int ret = 0;
 24.1090 +    int dev, func, sec_bus, sub_bus;
 24.1091 +    u32 type;
 24.1092 +
 24.1093 +    type = pdev_type(pdev);
 24.1094 +    if (type == DEV_TYPE_PCI_BRIDGE) {
 24.1095 +        sec_bus = read_pci_config_byte(pdev->bus, PCI_SLOT(pdev->devfn),
 24.1096 +                      PCI_FUNC(pdev->devfn), PCI_SECONDARY_BUS);
 24.1097 +
 24.1098 +        if (bus2bridge[sec_bus].bus == 0) {
 24.1099 +            bus2bridge[sec_bus].bus   =  pdev->bus;
 24.1100 +            bus2bridge[sec_bus].devfn =  pdev->devfn;
 24.1101 +        }
 24.1102 +
 24.1103 +        sub_bus = read_pci_config_byte(pdev->bus, PCI_SLOT(pdev->devfn),
 24.1104 +                      PCI_FUNC(pdev->devfn), PCI_SUBORDINATE_BUS);
 24.1105 +
 24.1106 +        if (sec_bus != sub_bus) {
 24.1107 +            dprintk(XENLOG_INFO VTDPREFIX,
 24.1108 +                "context_mapping: nested PCI bridge not supported\n");
 24.1109 +            dprintk(XENLOG_INFO VTDPREFIX,
 24.1110 +                "    bdf = %x:%x:%x sec_bus = %x sub_bus = %x\n",
 24.1111 +                pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
 24.1112 +                sec_bus, sub_bus);
 24.1113 +        }
 24.1114 +    }
 24.1115 +
 24.1116 +    if (type == DEV_TYPE_PCIe_ENDPOINT) {
 24.1117 +        gdprintk(XENLOG_INFO VTDPREFIX,
 24.1118 +            "domain_context_mapping:PCIe : bdf = %x:%x:%x\n",
 24.1119 +            pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
 24.1120 +        ret = domain_context_mapping_one(domain, iommu,
 24.1121 +                  (u8)(pdev->bus), (u8) (pdev->devfn));
 24.1122 +    }
 24.1123 +
 24.1124 +    /* PCI devices */
 24.1125 +    if (type == DEV_TYPE_PCI) {
 24.1126 +        gdprintk(XENLOG_INFO VTDPREFIX,
 24.1127 +            "domain_context_mapping:PCI: bdf = %x:%x:%x\n",
 24.1128 +            pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
 24.1129 +
 24.1130 +        if (pdev->bus == 0)
 24.1131 +            ret = domain_context_mapping_one(domain, iommu,
 24.1132 +                      (u8)(pdev->bus), (u8) (pdev->devfn));
 24.1133 +        else {
 24.1134 +            if (bus2bridge[pdev->bus].bus != 0)
 24.1135 +                gdprintk(XENLOG_ERR VTDPREFIX,
 24.1136 +                    "domain_context_mapping:bus2bridge[pdev->bus].bus==0\n");
 24.1137 +
 24.1138 +            ret = domain_context_mapping_one(domain, iommu,
 24.1139 +                      (u8)(bus2bridge[pdev->bus].bus),
 24.1140 +                      (u8)(bus2bridge[pdev->bus].devfn));
 24.1141 +
 24.1142 +            /* now map everything behind the PCI bridge */
 24.1143 +            for (dev = 0; dev < 32; dev++) {
 24.1144 +                for (func = 0; func < 8; func++) {
 24.1145 +                    ret = domain_context_mapping_one(domain, iommu,
 24.1146 +                              pdev->bus, (u8)PCI_DEVFN(dev, func));
 24.1147 +                    if (ret)
 24.1148 +                        return ret;
 24.1149 +                }
 24.1150 +            }
 24.1151 +        }
 24.1152 +    }
 24.1153 +    return ret;
 24.1154 +}
 24.1155 +
 24.1156 +static int domain_context_unmap_one(
 24.1157 +    struct domain *domain,
 24.1158 +    struct iommu *iommu,
 24.1159 +    u8 bus, u8 devfn)
 24.1160 +{
 24.1161 +    struct context_entry *context;
 24.1162 +    unsigned long flags;
 24.1163 +
 24.1164 +    context = device_to_context_entry(iommu, bus, devfn);
 24.1165 +    if (!context) {
 24.1166 +        gdprintk(XENLOG_INFO VTDPREFIX,
 24.1167 +            "domain_context_unmap_one-%x:%x:%x- context == NULL:return\n",
 24.1168 +            bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
 24.1169 +        return -ENOMEM;
 24.1170 +    }
 24.1171 +    spin_lock_irqsave(&iommu->lock, flags);
 24.1172 +    if (!context_present(*context)) {
 24.1173 +        spin_unlock_irqrestore(&iommu->lock, flags);
 24.1174 +        gdprintk(XENLOG_INFO VTDPREFIX,
 24.1175 +            "domain_context_unmap_one-%x:%x:%x- context NOT present:return\n",
 24.1176 +            bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
 24.1177 +        return 0;
 24.1178 +    }
 24.1179 +    gdprintk(XENLOG_INFO VTDPREFIX,
 24.1180 +        "domain_context_unmap_one_1:bdf = %x:%x:%x\n",
 24.1181 +        bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
 24.1182 +
 24.1183 +    context_clear_present(*context);
 24.1184 +    context_clear_entry(*context);
 24.1185 +    iommu_flush_cache_entry(iommu, context);
 24.1186 +    iommu_flush_context_global(iommu, 0);
 24.1187 +    iommu_flush_iotlb_global(iommu, 0);
 24.1188 +    spin_unlock_irqrestore(&iommu->lock, flags);
 24.1189 +
 24.1190 +    gdprintk(XENLOG_INFO VTDPREFIX,
 24.1191 +        "domain_context_unmap_one_2:bdf = %x:%x:%x\n",
 24.1192 +        bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
 24.1193 +
 24.1194 +    return 0;
 24.1195 +}
 24.1196 +
 24.1197 +static int domain_context_unmap(
 24.1198 +    struct domain *domain,
 24.1199 +    struct iommu *iommu,
 24.1200 +    struct pci_dev *pdev)
 24.1201 +{
 24.1202 +    int ret = 0;
 24.1203 +    int dev, func, sec_bus, sub_bus;
 24.1204 +    u32 type;
 24.1205 +
 24.1206 +    type = pdev_type(pdev);
 24.1207 +    if (type == DEV_TYPE_PCI_BRIDGE) {
 24.1208 +        sec_bus = read_pci_config_byte(pdev->bus, PCI_SLOT(pdev->devfn),
 24.1209 +                      PCI_FUNC(pdev->devfn), PCI_SECONDARY_BUS);
 24.1210 +        sub_bus = read_pci_config_byte(pdev->bus, PCI_SLOT(pdev->devfn),
 24.1211 +                      PCI_FUNC(pdev->devfn), PCI_SUBORDINATE_BUS);
 24.1212 +
 24.1213 +        gdprintk(XENLOG_INFO VTDPREFIX,
 24.1214 +            "domain_context_unmap:BRIDGE:%x:%x:%x sec_bus=%x sub_bus=%x\n",
 24.1215 +            pdev->bus, PCI_SLOT(pdev->devfn),
 24.1216 +            PCI_FUNC(pdev->devfn), sec_bus, sub_bus);
 24.1217 +    }
 24.1218 +
 24.1219 +    if (type == DEV_TYPE_PCIe_ENDPOINT) {
 24.1220 +        gdprintk(XENLOG_INFO VTDPREFIX,
 24.1221 +                 "domain_context_unmap:PCIe : bdf = %x:%x:%x\n",
 24.1222 +                 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
 24.1223 +        ret = domain_context_unmap_one(domain, iommu,
 24.1224 +                  (u8)(pdev->bus), (u8) (pdev->devfn));
 24.1225 +    }
 24.1226 +
 24.1227 +    /* PCI devices */
 24.1228 +    if (type == DEV_TYPE_PCI) {
 24.1229 +        gdprintk(XENLOG_INFO VTDPREFIX,
 24.1230 +                 "domain_context_unmap:PCI: bdf = %x:%x:%x\n",
 24.1231 +                 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
 24.1232 +        if (pdev->bus == 0)
 24.1233 +            ret = domain_context_unmap_one(domain, iommu,
 24.1234 +                      (u8)(pdev->bus), (u8) (pdev->devfn));
 24.1235 +        else {
 24.1236 +            if (bus2bridge[pdev->bus].bus != 0)
 24.1237 +                gdprintk(XENLOG_INFO VTDPREFIX,
 24.1238 +                         "domain_context_mapping:bus2bridge[pdev->bus].bus==0\n");
 24.1239 +
 24.1240 +            ret = domain_context_unmap_one(domain, iommu,
 24.1241 +                      (u8)(bus2bridge[pdev->bus].bus),
 24.1242 +                      (u8)(bus2bridge[pdev->bus].devfn));
 24.1243 +
 24.1244 +            /* now map everything behind the PCI bridge */
 24.1245 +            for (dev = 0; dev < 32; dev++) {
 24.1246 +                for (func = 0; func < 8; func++) {
 24.1247 +                    ret = domain_context_unmap_one(domain, iommu,
 24.1248 +                              pdev->bus, (u8)PCI_DEVFN(dev, func));
 24.1249 +                    if (ret)
 24.1250 +                        return ret;
 24.1251 +                }
 24.1252 +            }
 24.1253 +        }
 24.1254 +    }
 24.1255 +    return ret;
 24.1256 +}
 24.1257 +
 24.1258 +void reassign_device_ownership(
 24.1259 +    struct domain *source,
 24.1260 +    struct domain *target,
 24.1261 +    u8 bus, u8 devfn)
 24.1262 +{
 24.1263 +    struct hvm_iommu *source_hd = domain_hvm_iommu(source);
 24.1264 +    struct hvm_iommu *target_hd = domain_hvm_iommu(target);
 24.1265 +    struct pci_dev *pdev;
 24.1266 +    struct acpi_drhd_unit *drhd;
 24.1267 +    struct iommu *iommu;
 24.1268 +    int status;
 24.1269 +    unsigned long flags;
 24.1270 +
 24.1271 +    gdprintk(XENLOG_ERR VTDPREFIX,
 24.1272 +        "reassign_device-%x:%x:%x- source = %d target = %d\n",
 24.1273 +        bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
 24.1274 +        source->domain_id, target->domain_id);
 24.1275 +
 24.1276 +    for_each_pdev(source, pdev) {
 24.1277 +        if ( (pdev->bus != bus) || (pdev->devfn != devfn) )
 24.1278 +            continue;
 24.1279 +
 24.1280 +        pdev->bus = bus;
 24.1281 +        pdev->devfn = devfn;
 24.1282 +        drhd = acpi_find_matched_drhd_unit(pdev);
 24.1283 +        iommu = drhd->iommu;
 24.1284 +        domain_context_unmap(source, iommu, pdev);
 24.1285 +
 24.1286 +        /*
 24.1287 +         * move pci device from the source domain to target domain.
 24.1288 +         */
 24.1289 +        spin_lock_irqsave(&source_hd->iommu_list_lock, flags);
 24.1290 +        spin_lock_irqsave(&target_hd->iommu_list_lock, flags);
 24.1291 +        list_move(&pdev->list, &target_hd->pdev_list);
 24.1292 +        spin_unlock_irqrestore(&target_hd->iommu_list_lock, flags);
 24.1293 +        spin_unlock_irqrestore(&source_hd->iommu_list_lock, flags);
 24.1294 +
 24.1295 +        status = domain_context_mapping(target, iommu, pdev);
 24.1296 +        if (status != 0)
 24.1297 +            gdprintk(XENLOG_ERR VTDPREFIX, "domain_context_mapping failed\n");
 24.1298 +
 24.1299 +        /*
 24.1300 +         * We are done.
 24.1301 +         */
 24.1302 +        break;
 24.1303 +    }
 24.1304 +}
 24.1305 +
 24.1306 +void return_devices_to_dom0(struct domain *d)
 24.1307 +{
 24.1308 +    struct hvm_iommu *hd  = domain_hvm_iommu(d);
 24.1309 +    struct pci_dev *pdev;
 24.1310 +
 24.1311 +    while (!list_empty(&hd->pdev_list)) {
 24.1312 +        pdev = list_entry(hd->pdev_list.next, typeof(*pdev), list);
 24.1313 +        dprintk(XENLOG_INFO VTDPREFIX,
 24.1314 +            "return_devices_to_dom0: bdf = %x:%x:%x\n",
 24.1315 +            pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
 24.1316 +        reassign_device_ownership(d, dom0, pdev->bus, pdev->devfn);
 24.1317 +    }
 24.1318 +
 24.1319 +#ifdef VTD_DEBUG
 24.1320 +    for_each_pdev(dom0, pdev) {
 24.1321 +        dprintk(XENLOG_INFO VTDPREFIX,
 24.1322 +            "return_devices_to_dom0:%x: bdf = %x:%x:%x\n",
 24.1323 +            dom0->domain_id, pdev->bus,
 24.1324 +            PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
 24.1325 +    }
 24.1326 +#endif
 24.1327 +}
 24.1328 +
 24.1329 +void iommu_domain_teardown(struct domain *d)
 24.1330 +{
 24.1331 +  if (list_empty(&acpi_drhd_units))
 24.1332 +      return;
 24.1333 +
 24.1334 +#if CONFIG_PAGING_LEVELS == 3
 24.1335 +  {
 24.1336 +    struct hvm_iommu *hd  = domain_hvm_iommu(d);
 24.1337 +    int level = agaw_to_level(hd->agaw);
 24.1338 +    struct dma_pte *pgd = NULL;
 24.1339 +
 24.1340 +    switch (level)
 24.1341 +    {
 24.1342 +        case VTD_PAGE_TABLE_LEVEL_3:
 24.1343 +            if ( hd->pgd )
 24.1344 +                free_xenheap_page((void *)hd->pgd);
 24.1345 +            break;
 24.1346 +        case VTD_PAGE_TABLE_LEVEL_4:
 24.1347 +            if ( hd->pgd )
 24.1348 +            {
 24.1349 +                pgd = hd->pgd;
 24.1350 +                if ( pgd[0].val != 0 )
 24.1351 +                    free_xenheap_page((void*)maddr_to_virt(
 24.1352 +                        dma_pte_addr(pgd[0])));
 24.1353 +            }
 24.1354 +            break;
 24.1355 +        default:
 24.1356 +            gdprintk(XENLOG_ERR VTDPREFIX,
 24.1357 +                "Unsupported p2m table sharing level!\n");
 24.1358 +            break;
 24.1359 +    }
 24.1360 +  }
 24.1361 +#endif
 24.1362 +    return_devices_to_dom0(d);
 24.1363 +}
 24.1364 +
 24.1365 +static int domain_context_mapped(struct domain *domain, struct pci_dev *pdev)
 24.1366 +{
 24.1367 +    struct acpi_drhd_unit *drhd;
 24.1368 +    struct iommu *iommu;
 24.1369 +    int ret;
 24.1370 +
 24.1371 +    for_each_drhd_unit(drhd) {
 24.1372 +        iommu = drhd->iommu;
 24.1373 +        ret = device_context_mapped(iommu, pdev->bus, pdev->devfn);
 24.1374 +        if (ret)
 24.1375 +            return ret;
 24.1376 +    }
 24.1377 +    return 0;
 24.1378 +}
 24.1379 +
 24.1380 +int iommu_map_page(struct domain *d, paddr_t gfn, paddr_t mfn)
 24.1381 +{
 24.1382 +    struct acpi_drhd_unit *drhd;
 24.1383 +    struct iommu *iommu;
 24.1384 +    struct dma_pte *pte = NULL;
 24.1385 +
 24.1386 +    drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
 24.1387 +    iommu = drhd->iommu;
 24.1388 +
 24.1389 +    /* do nothing if dom0 and iommu supports pass thru */
 24.1390 +    if (ecap_pass_thru(iommu->ecap) && (d->domain_id == 0))
 24.1391 +        return 0;
 24.1392 +
 24.1393 +    pte = addr_to_dma_pte(d, gfn << PAGE_SHIFT_4K);
 24.1394 +    if (!pte)
 24.1395 +        return -ENOMEM;
 24.1396 +    dma_set_pte_addr(*pte, mfn << PAGE_SHIFT_4K);
 24.1397 +    dma_set_pte_prot(*pte, DMA_PTE_READ | DMA_PTE_WRITE);
 24.1398 +    iommu_flush_cache_entry(iommu, pte);
 24.1399 +
 24.1400 +    for_each_drhd_unit(drhd) {
 24.1401 +        iommu = drhd->iommu;
 24.1402 +        if (cap_caching_mode(iommu->cap))
 24.1403 +            iommu_flush_iotlb_psi(iommu, d->domain_id,
 24.1404 +                                  gfn << PAGE_SHIFT_4K, 1, 0);
 24.1405 +        else if (cap_rwbf(iommu->cap))
 24.1406 +            iommu_flush_write_buffer(iommu);
 24.1407 +    }
 24.1408 +    return 0;
 24.1409 +}
 24.1410 +
 24.1411 +int iommu_unmap_page(struct domain *d, dma_addr_t gfn)
 24.1412 +{
 24.1413 +    struct acpi_drhd_unit *drhd;
 24.1414 +    struct iommu *iommu;
 24.1415 +    struct dma_pte *pte = NULL;
 24.1416 +
 24.1417 +    drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
 24.1418 +    iommu = drhd->iommu;
 24.1419 +
 24.1420 +    /* do nothing if dom0 and iommu supports pass thru */
 24.1421 +    if (ecap_pass_thru(iommu->ecap) && (d->domain_id == 0))
 24.1422 +        return 0;
 24.1423 +
 24.1424 +    /* get last level pte */
 24.1425 +    pte = dma_addr_level_pte(d, gfn << PAGE_SHIFT_4K, 1);
 24.1426 +    dma_pte_clear_one(d, gfn << PAGE_SHIFT_4K);
 24.1427 +    
 24.1428 +    return 0;
 24.1429 +}
 24.1430 +
 24.1431 +int iommu_page_mapping(struct domain *domain, dma_addr_t iova,
 24.1432 +            void *hpa, size_t size, int prot)
 24.1433 +{
 24.1434 +    struct acpi_drhd_unit *drhd;
 24.1435 +    struct iommu *iommu;
 24.1436 +    unsigned long start_pfn, end_pfn;
 24.1437 +    struct dma_pte *pte = NULL;
 24.1438 +    int index;
 24.1439 +
 24.1440 +    drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
 24.1441 +    iommu = drhd->iommu;
 24.1442 +    if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
 24.1443 +        return -EINVAL;
 24.1444 +    iova = (iova >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K;
 24.1445 +    start_pfn = (unsigned long)(((unsigned long) hpa) >> PAGE_SHIFT_4K);
 24.1446 +    end_pfn = (unsigned long)
 24.1447 +              ((PAGE_ALIGN_4K(((unsigned long)hpa) + size)) >> PAGE_SHIFT_4K);
 24.1448 +    index = 0;
 24.1449 +    while (start_pfn < end_pfn) {
 24.1450 +        pte = addr_to_dma_pte(domain, iova + PAGE_SIZE_4K * index);
 24.1451 +        if (!pte)
 24.1452 +            return -ENOMEM;
 24.1453 +        dma_set_pte_addr(*pte, start_pfn << PAGE_SHIFT_4K);
 24.1454 +        dma_set_pte_prot(*pte, prot);
 24.1455 +        iommu_flush_cache_entry(iommu, pte);
 24.1456 +        start_pfn++;
 24.1457 +        index++;
 24.1458 +    }
 24.1459 +
 24.1460 +    for_each_drhd_unit(drhd) {
 24.1461 +        iommu = drhd->iommu;
 24.1462 +        if (cap_caching_mode(iommu->cap))
 24.1463 +            iommu_flush_iotlb_psi(iommu, domain->domain_id, iova, size, 0);
 24.1464 +        else if (cap_rwbf(iommu->cap))
 24.1465 +            iommu_flush_write_buffer(iommu);
 24.1466 +    }
 24.1467 +    return 0;
 24.1468 +}
 24.1469 +
 24.1470 +int iommu_page_unmapping(struct domain *domain, dma_addr_t addr, size_t size)
 24.1471 +{
 24.1472 +    struct dma_pte *pte = NULL;
 24.1473 +
 24.1474 +    /* get last level pte */
 24.1475 +    pte = dma_addr_level_pte(domain, addr, 1);
 24.1476 +    dma_pte_clear_range(domain, addr, addr + size);
 24.1477 +    
 24.1478 +    return 0;
 24.1479 +}
 24.1480 +
 24.1481 +void iommu_flush(struct domain *d, dma_addr_t gfn, u64 *p2m_entry)
 24.1482 +{
 24.1483 +    struct acpi_drhd_unit *drhd;
 24.1484 +    struct iommu *iommu = NULL;
 24.1485 +    struct dma_pte *pte = (struct dma_pte *) p2m_entry;
 24.1486 +
 24.1487 +    for_each_drhd_unit(drhd) {
 24.1488 +        iommu = drhd->iommu;
 24.1489 +        if (cap_caching_mode(iommu->cap))
 24.1490 +            iommu_flush_iotlb_psi(iommu, d->domain_id,
 24.1491 +                gfn << PAGE_SHIFT_4K, 1, 0);
 24.1492 +        else if (cap_rwbf(iommu->cap))
 24.1493 +            iommu_flush_write_buffer(iommu);
 24.1494 +    }
 24.1495 +    iommu_flush_cache_entry(iommu, pte);
 24.1496 +}
 24.1497 +
 24.1498 +int
 24.1499 +prepare_device(struct domain *domain, struct pci_dev dev)
 24.1500 +{
 24.1501 +    return 0;
 24.1502 +}
 24.1503 +
 24.1504 +static int iommu_prepare_rmrr_dev(
 24.1505 +    struct domain *d,
 24.1506 +    struct acpi_rmrr_unit *rmrr,
 24.1507 +    struct pci_dev *pdev)
 24.1508 +{
 24.1509 +    struct acpi_drhd_unit *drhd;
 24.1510 +    unsigned long size;
 24.1511 +    int ret;
 24.1512 +
 24.1513 +    /* page table init */
 24.1514 +    size = rmrr->end_address - rmrr->base_address + 1;
 24.1515 +    ret = iommu_page_mapping(d, rmrr->base_address,
 24.1516 +        (void *)rmrr->base_address, size,
 24.1517 +        DMA_PTE_READ|DMA_PTE_WRITE);
 24.1518 +    if (ret)
 24.1519 +        return ret;
 24.1520 +
 24.1521 +    if (domain_context_mapped(d, pdev) == 0) {
 24.1522 +        drhd = acpi_find_matched_drhd_unit(pdev);
 24.1523 +        ret = domain_context_mapping(d, drhd->iommu, pdev);
 24.1524 +        if (!ret)
 24.1525 +            return 0;
 24.1526 +    }
 24.1527 +    return ret;
 24.1528 +}
 24.1529 +
 24.1530 +void __init setup_dom0_devices(void)
 24.1531 +{
 24.1532 +    struct hvm_iommu *hd  = domain_hvm_iommu(dom0);
 24.1533 +    struct acpi_drhd_unit *drhd;
 24.1534 +    struct pci_dev *pdev;
 24.1535 +    int bus, dev, func;
 24.1536 +    u32 l;
 24.1537 +    u8 hdr_type;
 24.1538 +    int ret;
 24.1539 +
 24.1540 +#ifdef DEBUG_VTD_CONTEXT_ENTRY
 24.1541 +    for (bus = 0; bus < 256; bus++) {
 24.1542 +        for (dev = 0; dev < 32; dev++) { 
 24.1543 +            for (func = 0; func < 8; func++) { 
 24.1544 +                struct context_entry *context;
 24.1545 +                struct pci_dev device;
 24.1546 +
 24.1547 +                device.bus = bus; 
 24.1548 +                device.devfn = PCI_DEVFN(dev, func); 
 24.1549 +                drhd = acpi_find_matched_drhd_unit(&device);
 24.1550 +                context = device_to_context_entry(drhd->iommu,
 24.1551 +                    bus, PCI_DEVFN(dev, func));
 24.1552 +                if ((context->lo != 0) || (context->hi != 0))
 24.1553 +                    dprintk(XENLOG_INFO VTDPREFIX,
 24.1554 +                        "setup_dom0_devices-%x:%x:%x- context not 0\n",
 24.1555 +                        bus, dev, func);
 24.1556 +            }
 24.1557 +        }    
 24.1558 +    }        
 24.1559 +#endif
 24.1560 +
 24.1561 +    for (bus = 0; bus < 256; bus++) {
 24.1562 +        for (dev = 0; dev < 32; dev++) { 
 24.1563 +            for (func = 0; func < 8; func++) { 
 24.1564 +                l = read_pci_config(bus, dev, func, PCI_VENDOR_ID);
 24.1565 +                /* some broken boards return 0 or ~0 if a slot is empty: */
 24.1566 +                if (l == 0xffffffff || l == 0x00000000 ||
 24.1567 +                    l == 0x0000ffff || l == 0xffff0000)
 24.1568 +                    continue;
 24.1569 +                pdev = xmalloc(struct pci_dev);
 24.1570 +                pdev->bus = bus;
 24.1571 +                pdev->devfn = PCI_DEVFN(dev, func);
 24.1572 +                list_add_tail(&pdev->list, &hd->pdev_list);
 24.1573 +
 24.1574 +                drhd = acpi_find_matched_drhd_unit(pdev);
 24.1575 +                ret = domain_context_mapping(dom0, drhd->iommu, pdev);
 24.1576 +                if (ret != 0)
 24.1577 +                    gdprintk(XENLOG_ERR VTDPREFIX,
 24.1578 +                        "domain_context_mapping failed\n");
 24.1579 +
 24.1580 +                hdr_type = read_pci_config(bus, dev, func, PCI_HEADER_TYPE);
 24.1581 +                // if ((hdr_type & 0x8) == 0)
 24.1582 +                //      break;
 24.1583 +            }
 24.1584 +        }
 24.1585 +    }
 24.1586 +    for_each_pdev(dom0, pdev) {
 24.1587 +        dprintk(XENLOG_INFO VTDPREFIX,
 24.1588 +            "setup_dom0_devices: bdf = %x:%x:%x\n",
 24.1589 +            pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
 24.1590 +    }
 24.1591 +}
 24.1592 +
 24.1593 +void clear_fault_bit(struct iommu *iommu)
 24.1594 +{
 24.1595 +    u64 val;
 24.1596 +
 24.1597 +    val = dmar_readq(
 24.1598 +            iommu->reg,
 24.1599 +            cap_fault_reg_offset(dmar_readq(iommu->reg,DMAR_CAP_REG))+0x8);
 24.1600 +    dmar_writeq(
 24.1601 +            iommu->reg,
 24.1602 +            cap_fault_reg_offset(dmar_readq(iommu->reg,DMAR_CAP_REG))+8,
 24.1603 +            val);
 24.1604 +    dmar_writel(iommu->reg, DMAR_FSTS_REG, DMA_FSTS_PFO);
 24.1605 +}
 24.1606 +
 24.1607 +/*
 24.1608 + * Called from ACPI discovery code, once all DMAR's and RMRR's are done
 24.1609 + * scanning, we need to run through and initialize as much of it as necessary
 24.1610 + */
 24.1611 +int vtd_enable = 1;
 24.1612 +static void setup_vtd_enable(char *s)
 24.1613 +{
 24.1614 +    if ( !strcmp(s, "0") )
 24.1615 +        vtd_enable = 0;
 24.1616 +    else if ( !strcmp(s, "1") )
 24.1617 +        vtd_enable = 1;
 24.1618 +    else
 24.1619 +        dprintk(XENLOG_INFO VTDPREFIX,
 24.1620 +            "Unknown vtd_enable value specified: '%s'\n", s);
 24.1621 +    dprintk(XENLOG_INFO VTDPREFIX, "vtd_enable = %x\n", vtd_enable);
 24.1622 +}
 24.1623 +custom_param("vtd", setup_vtd_enable);
 24.1624 +
 24.1625 +static int init_vtd_hw(void)
 24.1626 +{
 24.1627 +    struct acpi_drhd_unit *drhd;
 24.1628 +    struct iommu *iommu;
 24.1629 +    int ret;
 24.1630 +
 24.1631 +    for_each_drhd_unit(drhd) {
 24.1632 +        iommu = drhd->iommu;
 24.1633 +        ret = iommu_set_root_entry(iommu);
 24.1634 +        if (ret) {
 24.1635 +            gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: set root entry failed\n");
 24.1636 +            return -EIO;
 24.1637 +        }
 24.1638 +    }
 24.1639 +    return 0;
 24.1640 +}
 24.1641 +
 24.1642 +static int enable_vtd_translation(void)
 24.1643 +{
 24.1644 +    struct acpi_drhd_unit *drhd;
 24.1645 +    struct iommu *iommu;
 24.1646 +    int vector = 0;
 24.1647 +
 24.1648 +    for_each_drhd_unit(drhd) {
 24.1649 +        iommu = drhd->iommu;
 24.1650 +        vector = iommu_set_interrupt(iommu);
 24.1651 +        dma_msi_data_init(iommu, vector);
 24.1652 +        dma_msi_addr_init(iommu, cpu_physical_id(first_cpu(cpu_online_map)));
 24.1653 +        iommu->vector = vector;
 24.1654 +        clear_fault_bit(iommu);
 24.1655 +        if (vtd_enable && iommu_enable_translation(iommu))
 24.1656 +            return -EIO;
 24.1657 +    }
 24.1658 +    return 0;
 24.1659 +}
 24.1660 +
 24.1661 +static void setup_dom0_rmrr(void)
 24.1662 +{
 24.1663 +    struct acpi_rmrr_unit *rmrr;
 24.1664 +    struct pci_dev *pdev;
 24.1665 +    int ret;
 24.1666 +
 24.1667 +    for_each_rmrr_device(rmrr, pdev)
 24.1668 +        ret = iommu_prepare_rmrr_dev(dom0, rmrr, pdev);
 24.1669 +        if (ret)
 24.1670 +            gdprintk(XENLOG_ERR VTDPREFIX,
 24.1671 +                "IOMMU: mapping reserved region failed\n");
 24.1672 +    end_for_each_rmrr_device(rmrr, pdev)
 24.1673 +}
 24.1674 +
 24.1675 +int iommu_setup(void)
 24.1676 +{
 24.1677 +    struct hvm_iommu *hd  = domain_hvm_iommu(dom0);
 24.1678 +    struct acpi_drhd_unit *drhd;
 24.1679 +    struct iommu *iommu;
 24.1680 +
 24.1681 +    if (list_empty(&acpi_drhd_units))
 24.1682 +        return 0;
 24.1683 +
 24.1684 +    INIT_LIST_HEAD(&hd->pdev_list);
 24.1685 +    INIT_LIST_HEAD(&hd->g2m_ioport_list);
 24.1686 +
 24.1687 +    /* start from scratch */
 24.1688 +    flush_all();
 24.1689 +
 24.1690 +    /* setup clflush size */
 24.1691 +    x86_clflush_size = ((cpuid_ebx(1) >> 8) & 0xff) * 8;
 24.1692 +
 24.1693 +    /*
 24.1694 +     * allocate IO page directory page for the domain.
 24.1695 +     */
 24.1696 +    drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
 24.1697 +    iommu = drhd->iommu;
 24.1698 +
 24.1699 +    hd->pgd = (struct dma_pte *)alloc_xenheap_page();
 24.1700 +    memset((u8*)hd->pgd, 0, PAGE_SIZE);
 24.1701 +
 24.1702 +    if (init_vtd_hw())
 24.1703 +        goto error;
 24.1704 +    setup_dom0_devices();
 24.1705 +    setup_dom0_rmrr();
 24.1706 +    if (enable_vtd_translation())
 24.1707 +        goto error;
 24.1708 +
 24.1709 +    return 0;
 24.1710 +
 24.1711 +error:
 24.1712 +    printk("iommu_setup() failed\n");
 24.1713 +    for_each_drhd_unit(drhd) {
 24.1714 +        iommu = drhd->iommu;
 24.1715 +        free_iommu(iommu);
 24.1716 +    }
 24.1717 +    return -EIO;
 24.1718 +}
 24.1719 +
 24.1720 +int assign_device(struct domain *d, u8 bus, u8 devfn)
 24.1721 +{
 24.1722 +    struct hvm_iommu *hd  = domain_hvm_iommu(d);
 24.1723 +    struct acpi_rmrr_unit *rmrr;
 24.1724 +    struct pci_dev *pdev;
 24.1725 +    int ret = 0;
 24.1726 +
 24.1727 +    if (list_empty(&acpi_drhd_units))
 24.1728 +        return ret;
 24.1729 +
 24.1730 +    dprintk(XENLOG_INFO VTDPREFIX,
 24.1731 +        "assign_device: bus = %x dev = %x func = %x\n",
 24.1732 +        bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
 24.1733 +
 24.1734 +    reassign_device_ownership(dom0, d, bus, devfn);
 24.1735 +
 24.1736 +    /* setup rmrr identify mapping just once per domain */
 24.1737 +    if (list_empty(&hd->pdev_list))
 24.1738 +        for_each_rmrr_device(rmrr, pdev)
 24.1739 +            ret = iommu_prepare_rmrr_dev(d, rmrr, pdev);
 24.1740 +            if (ret)
 24.1741 +                gdprintk(XENLOG_ERR VTDPREFIX,
 24.1742 +                    "IOMMU: mapping reserved region failed\n");
 24.1743 +        end_for_each_rmrr_device(rmrr, pdev)
 24.1744 +    return ret;
 24.1745 +}
 24.1746 +
 24.1747 +void iommu_set_pgd(struct domain *d)
 24.1748 +{
 24.1749 +    struct hvm_iommu *hd  = domain_hvm_iommu(d);
 24.1750 +    unsigned long p2m_table;
 24.1751 +
 24.1752 +    if (hd->pgd) {
 24.1753 +        gdprintk(XENLOG_INFO VTDPREFIX,
 24.1754 +            "iommu_set_pgd_1: hd->pgd = %p\n", hd->pgd);
 24.1755 +        hd->pgd = NULL;
 24.1756 +    }
 24.1757 +    p2m_table = mfn_x(pagetable_get_mfn(d->arch.phys_table));
 24.1758 +
 24.1759 +#if CONFIG_PAGING_LEVELS == 3
 24.1760 +    if ( !hd->pgd )
 24.1761 +    {
 24.1762 +        int level = agaw_to_level(hd->agaw);
 24.1763 +        struct dma_pte *pmd = NULL;
 24.1764 +        struct dma_pte *pgd = NULL;
 24.1765 +        struct dma_pte *pte = NULL;
 24.1766 +        l3_pgentry_t *l3e;
 24.1767 +        unsigned long flags;
 24.1768 +        int i;
 24.1769 +
 24.1770 +        spin_lock_irqsave(&hd->mapping_lock, flags);
 24.1771 +        if (!hd->pgd) {
 24.1772 +            pgd = (struct dma_pte *)alloc_xenheap_page();
 24.1773 +            memset((u8*)pgd, 0, PAGE_SIZE);
 24.1774 +            if (!hd->pgd)
 24.1775 +                hd->pgd = pgd;
 24.1776 +            else /* somebody is fast */
 24.1777 +                free_xenheap_page((void *) pgd);
 24.1778 +        }
 24.1779 +
 24.1780 +        l3e = map_domain_page(p2m_table);
 24.1781 +        switch(level)
 24.1782 +        {
 24.1783 +            case VTD_PAGE_TABLE_LEVEL_3:        /* Weybridge */
 24.1784 +                /* We only support 8 entries for the PAE L3 p2m table */
 24.1785 +                for ( i = 0; i < 8 ; i++ )
 24.1786 +                {
 24.1787 +                    /* Don't create new L2 entry, use ones from p2m table */
 24.1788 +                    pgd[i].val = l3e[i].l3 | _PAGE_PRESENT | _PAGE_RW;
 24.1789 +                }
 24.1790 +                break;
 24.1791 +
 24.1792 +            case VTD_PAGE_TABLE_LEVEL_4:        /* Stoakley */
 24.1793 +                /* We allocate one more page for the top vtd page table. */
 24.1794 +                pmd = (struct dma_pte *)alloc_xenheap_page();
 24.1795 +                memset((u8*)pmd, 0, PAGE_SIZE);
 24.1796 +                pte = &pgd[0];
 24.1797 +                dma_set_pte_addr(*pte, virt_to_maddr(pmd));
 24.1798 +                dma_set_pte_readable(*pte);
 24.1799 +                dma_set_pte_writable(*pte);
 24.1800 +
 24.1801 +                for ( i = 0; i < 8; i++ )
 24.1802 +                {
 24.1803 +                    /* Don't create new L2 entry, use ones from p2m table */
 24.1804 +                    pmd[i].val = l3e[i].l3 | _PAGE_PRESENT | _PAGE_RW;
 24.1805 +                }
 24.1806 +                break;
 24.1807 +            default:
 24.1808 +                gdprintk(XENLOG_ERR VTDPREFIX,
 24.1809 +                    "iommu_set_pgd:Unsupported p2m table sharing level!\n");
 24.1810 +                break;
 24.1811 +        }
 24.1812 +        unmap_domain_page(l3e);
 24.1813 +        spin_unlock_irqrestore(&hd->mapping_lock, flags);
 24.1814 +    }
 24.1815 +#elif CONFIG_PAGING_LEVELS == 4
 24.1816 +    if ( !hd->pgd )
 24.1817 +    {
 24.1818 +        int level = agaw_to_level(hd->agaw);
 24.1819 +        l3_pgentry_t *l3e;
 24.1820 +        mfn_t pgd_mfn;
 24.1821 +
 24.1822 +        switch (level)
 24.1823 +        {
 24.1824 +            case VTD_PAGE_TABLE_LEVEL_3:
 24.1825 +                l3e = map_domain_page(p2m_table);
 24.1826 +                if ( (l3e_get_flags(*l3e) & _PAGE_PRESENT) == 0 )
 24.1827 +                {
 24.1828 +                    gdprintk(XENLOG_ERR VTDPREFIX,
 24.1829 +                        "iommu_set_pgd: second level wasn't there\n");
 24.1830 +                    unmap_domain_page(l3e);
 24.1831 +                    return;
 24.1832 +                }
 24.1833 +                pgd_mfn = _mfn(l3e_get_pfn(*l3e));
 24.1834 +                unmap_domain_page(l3e);
 24.1835 +                hd->pgd = maddr_to_virt(pagetable_get_paddr(
 24.1836 +                      pagetable_from_mfn(pgd_mfn)));
 24.1837 +                break;
 24.1838 +
 24.1839 +            case VTD_PAGE_TABLE_LEVEL_4:
 24.1840 +                pgd_mfn = _mfn(p2m_table);
 24.1841 +                hd->pgd = maddr_to_virt(pagetable_get_paddr(
 24.1842 +                      pagetable_from_mfn(pgd_mfn)));
 24.1843 +                break;
 24.1844 +            default:
 24.1845 +                gdprintk(XENLOG_ERR VTDPREFIX,
 24.1846 +                    "iommu_set_pgd:Unsupported p2m table sharing level!\n");
 24.1847 +                break;
 24.1848 +        }
 24.1849 +    }
 24.1850 +#endif
 24.1851 +    gdprintk(XENLOG_INFO VTDPREFIX,
 24.1852 +        "iommu_set_pgd_2: hd->pgd = %p\n", hd->pgd);
 24.1853 +}
 24.1854 +
 24.1855 +
 24.1856 +u8 iommu_state[MAX_IOMMU_REGS * MAX_IOMMUS];
 24.1857 +int iommu_suspend(void)
 24.1858 +{
 24.1859 +    struct acpi_drhd_unit *drhd;
 24.1860 +    struct iommu *iommu;
 24.1861 +    int i = 0;
 24.1862 +
 24.1863 +    if (!vtd_enable)
 24.1864 +        return 0;
 24.1865 +
 24.1866 +    flush_all();
 24.1867 +    for_each_drhd_unit(drhd) {
 24.1868 +        iommu = drhd->iommu;
 24.1869 +        iommu_state[DMAR_RTADDR_REG * i] =
 24.1870 +            (u64) dmar_readq(iommu->reg, DMAR_RTADDR_REG);
 24.1871 +        iommu_state[DMAR_FECTL_REG * i] =
 24.1872 +            (u32) dmar_readl(iommu->reg, DMAR_FECTL_REG);
 24.1873 +        iommu_state[DMAR_FEDATA_REG * i] =
 24.1874 +            (u32) dmar_readl(iommu->reg, DMAR_FEDATA_REG);
 24.1875 +        iommu_state[DMAR_FEADDR_REG * i] =
 24.1876 +            (u32) dmar_readl(iommu->reg, DMAR_FEADDR_REG);
 24.1877 +        iommu_state[DMAR_FEUADDR_REG * i] =
 24.1878 +            (u32) dmar_readl(iommu->reg, DMAR_FEUADDR_REG);
 24.1879 +        iommu_state[DMAR_PLMBASE_REG * i] =
 24.1880 +            (u32) dmar_readl(iommu->reg, DMAR_PLMBASE_REG);
 24.1881 +        iommu_state[DMAR_PLMLIMIT_REG * i] =
 24.1882 +            (u32) dmar_readl(iommu->reg, DMAR_PLMLIMIT_REG);
 24.1883 +        iommu_state[DMAR_PHMBASE_REG * i] =
 24.1884 +            (u64) dmar_readq(iommu->reg, DMAR_PHMBASE_REG);
 24.1885 +        iommu_state[DMAR_PHMLIMIT_REG * i] =
 24.1886 +            (u64) dmar_readq(iommu->reg, DMAR_PHMLIMIT_REG);
 24.1887 +        i++;
 24.1888 +    }
 24.1889 +
 24.1890 +    return 0;
 24.1891 +}
 24.1892 +
 24.1893 +int iommu_resume(void)
 24.1894 +{
 24.1895 +    struct acpi_drhd_unit *drhd;
 24.1896 +    struct iommu *iommu;
 24.1897 +    int i = 0;
 24.1898 +
 24.1899 +    if (!vtd_enable)
 24.1900 +        return 0;
 24.1901 +
 24.1902 +    flush_all();
 24.1903 +
 24.1904 +    init_vtd_hw();
 24.1905 +    for_each_drhd_unit(drhd) {
 24.1906 +        iommu = drhd->iommu;
 24.1907 +        dmar_writeq( iommu->reg, DMAR_RTADDR_REG,
 24.1908 +            (u64) iommu_state[DMAR_RTADDR_REG * i]);
 24.1909 +        dmar_writel(iommu->reg, DMAR_FECTL_REG,
 24.1910 +            (u32) iommu_state[DMAR_FECTL_REG * i]);
 24.1911 +        dmar_writel(iommu->reg, DMAR_FEDATA_REG,
 24.1912 +            (u32) iommu_state[DMAR_FEDATA_REG * i]);
 24.1913 +        dmar_writel(iommu->reg, DMAR_FEADDR_REG,
 24.1914 +            (u32) iommu_state[DMAR_FEADDR_REG * i]);
 24.1915 +        dmar_writel(iommu->reg, DMAR_FEUADDR_REG,
 24.1916 +            (u32) iommu_state[DMAR_FEUADDR_REG * i]);
 24.1917 +        dmar_writel(iommu->reg, DMAR_PLMBASE_REG,
 24.1918 +            (u32) iommu_state[DMAR_PLMBASE_REG * i]);
 24.1919 +        dmar_writel(iommu->reg, DMAR_PLMLIMIT_REG,
 24.1920 +            (u32) iommu_state[DMAR_PLMLIMIT_REG * i]);
 24.1921 +        dmar_writeq(iommu->reg, DMAR_PHMBASE_REG,
 24.1922 +            (u64) iommu_state[DMAR_PHMBASE_REG * i]);
 24.1923 +        dmar_writeq(iommu->reg, DMAR_PHMLIMIT_REG,
 24.1924 +            (u64) iommu_state[DMAR_PHMLIMIT_REG * i]);
 24.1925 +
 24.1926 +        if (iommu_enable_translation(iommu))
 24.1927 +            return -EIO;
 24.1928 +        i++;
 24.1929 +    }
 24.1930 +    return 0;
 24.1931 +}
    25.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    25.2 +++ b/xen/arch/x86/hvm/vmx/vtd/msi.h	Fri Jul 27 12:59:37 2007 -0700
    25.3 @@ -0,0 +1,128 @@
    25.4 +/*
    25.5 + * Copyright (C) 2003-2004 Intel
    25.6 + * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com)
    25.7 + */
    25.8 +
    25.9 +#ifndef MSI_H
   25.10 +#define MSI_H
   25.11 +
   25.12 +/*
   25.13 + * Assume the maximum number of hot plug slots supported by the system is about
   25.14 + * ten. The worstcase is that each of these slots is hot-added with a device,
   25.15 + * which has two MSI/MSI-X capable functions. To avoid any MSI-X driver, which
   25.16 + * attempts to request all available vectors, NR_HP_RESERVED_VECTORS is defined
   25.17 + * as below to ensure at least one message is assigned to each detected MSI/
   25.18 + * MSI-X device function.
   25.19 + */
   25.20 +#define NR_HP_RESERVED_VECTORS 	20
   25.21 +
   25.22 +extern int vector_irq[NR_VECTORS];
   25.23 +extern void (*interrupt[NR_IRQS])(void);
   25.24 +extern int pci_vector_resources(int last, int nr_released);
   25.25 +
   25.26 +/*
   25.27 + * MSI-X Address Register
   25.28 + */
   25.29 +#define PCI_MSIX_FLAGS_QSIZE		0x7FF
   25.30 +#define PCI_MSIX_FLAGS_ENABLE		(1 << 15)
   25.31 +#define PCI_MSIX_FLAGS_BIRMASK		(7 << 0)
   25.32 +#define PCI_MSIX_FLAGS_BITMASK		(1 << 0)
   25.33 +
   25.34 +#define PCI_MSIX_ENTRY_SIZE			16
   25.35 +#define  PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET	0
   25.36 +#define  PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET	4
   25.37 +#define  PCI_MSIX_ENTRY_DATA_OFFSET		8
   25.38 +#define  PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET	12
   25.39 +
   25.40 +#define msi_control_reg(base)		(base + PCI_MSI_FLAGS)
   25.41 +#define msi_lower_address_reg(base)	(base + PCI_MSI_ADDRESS_LO)
   25.42 +#define msi_upper_address_reg(base)	(base + PCI_MSI_ADDRESS_HI)
   25.43 +#define msi_data_reg(base, is64bit)	\
   25.44 +	( (is64bit == 1) ? base+PCI_MSI_DATA_64 : base+PCI_MSI_DATA_32 )
   25.45 +#define msi_mask_bits_reg(base, is64bit) \
   25.46 +	( (is64bit == 1) ? base+PCI_MSI_MASK_BIT : base+PCI_MSI_MASK_BIT-4)
   25.47 +#define msi_disable(control)		control &= ~PCI_MSI_FLAGS_ENABLE
   25.48 +#define multi_msi_capable(control) \
   25.49 +	(1 << ((control & PCI_MSI_FLAGS_QMASK) >> 1))
   25.50 +#define multi_msi_enable(control, num) \
   25.51 +	control |= (((num >> 1) << 4) & PCI_MSI_FLAGS_QSIZE);
   25.52 +#define is_64bit_address(control)	(control & PCI_MSI_FLAGS_64BIT)
   25.53 +#define is_mask_bit_support(control)	(control & PCI_MSI_FLAGS_MASKBIT)
   25.54 +#define msi_enable(control, num) multi_msi_enable(control, num); \
   25.55 +	control |= PCI_MSI_FLAGS_ENABLE
   25.56 +
   25.57 +#define msix_table_offset_reg(base)	(base + 0x04)
   25.58 +#define msix_pba_offset_reg(base)	(base + 0x08)
   25.59 +#define msix_enable(control)	 	control |= PCI_MSIX_FLAGS_ENABLE
   25.60 +#define msix_disable(control)	 	control &= ~PCI_MSIX_FLAGS_ENABLE
   25.61 +#define msix_table_size(control) 	((control & PCI_MSIX_FLAGS_QSIZE)+1)
   25.62 +#define multi_msix_capable		msix_table_size
   25.63 +#define msix_unmask(address)	 	(address & ~PCI_MSIX_FLAGS_BITMASK)
   25.64 +#define msix_mask(address)		(address | PCI_MSIX_FLAGS_BITMASK)
   25.65 +#define msix_is_pending(address) 	(address & PCI_MSIX_FLAGS_PENDMASK)
   25.66 +
   25.67 +/*
   25.68 + * MSI Defined Data Structures
   25.69 + */
   25.70 +#define MSI_ADDRESS_HEADER		0xfee
   25.71 +#define MSI_ADDRESS_HEADER_SHIFT	12
   25.72 +#define MSI_ADDRESS_HEADER_MASK		0xfff000
   25.73 +#define MSI_ADDRESS_DEST_ID_MASK	0xfff0000f
   25.74 +#define MSI_TARGET_CPU_MASK		0xff
   25.75 +#define MSI_TARGET_CPU_SHIFT		12
   25.76 +#define MSI_DELIVERY_MODE		0
   25.77 +#define MSI_LEVEL_MODE			1	/* Edge always assert */
   25.78 +#define MSI_TRIGGER_MODE		0	/* MSI is edge sensitive */
   25.79 +#define MSI_PHYSICAL_MODE		0
   25.80 +#define MSI_LOGICAL_MODE		1
   25.81 +#define MSI_REDIRECTION_HINT_MODE	0
   25.82 +
   25.83 +#define __LITTLE_ENDIAN_BITFIELD	1
   25.84 +
   25.85 +struct msg_data {
   25.86 +#if defined(__LITTLE_ENDIAN_BITFIELD)
   25.87 +	__u32	vector		:  8;
   25.88 +	__u32	delivery_mode	:  3;	/* 000b: FIXED | 001b: lowest prior */
   25.89 +	__u32	reserved_1	:  3;
   25.90 +	__u32	level		:  1;	/* 0: deassert | 1: assert */
   25.91 +	__u32	trigger		:  1;	/* 0: edge | 1: level */
   25.92 +	__u32	reserved_2	: 16;
   25.93 +#elif defined(__BIG_ENDIAN_BITFIELD)
   25.94 +	__u32	reserved_2	: 16;
   25.95 +	__u32	trigger		:  1;	/* 0: edge | 1: level */
   25.96 +	__u32	level		:  1;	/* 0: deassert | 1: assert */
   25.97 +	__u32	reserved_1	:  3;
   25.98 +	__u32	delivery_mode	:  3;	/* 000b: FIXED | 001b: lowest prior */
   25.99 +	__u32	vector		:  8;
  25.100 +#else
  25.101 +#error "Bitfield endianness not defined! Check your byteorder.h"
  25.102 +#endif
  25.103 +} __attribute__ ((packed));
  25.104 +
  25.105 +struct msg_address {
  25.106 +	union {
  25.107 +		struct {
  25.108 +#if defined(__LITTLE_ENDIAN_BITFIELD)
  25.109 +			__u32	reserved_1	:  2;
  25.110 +			__u32	dest_mode	:  1;	/*0:physic | 1:logic */
  25.111 +			__u32	redirection_hint:  1;  	/*0: dedicated CPU
  25.112 +							  1: lowest priority */
  25.113 +			__u32	reserved_2	:  4;
  25.114 + 			__u32	dest_id		: 24;	/* Destination ID */
  25.115 +#elif defined(__BIG_ENDIAN_BITFIELD)
  25.116 + 			__u32	dest_id		: 24;	/* Destination ID */
  25.117 +			__u32	reserved_2	:  4;
  25.118 +			__u32	redirection_hint:  1;  	/*0: dedicated CPU
  25.119 +							  1: lowest priority */
  25.120 +			__u32	dest_mode	:  1;	/*0:physic | 1:logic */
  25.121 +			__u32	reserved_1	:  2;
  25.122 +#else
  25.123 +#error "Bitfield endianness not defined! Check your byteorder.h"
  25.124 +#endif
  25.125 +      		}u;
  25.126 +       		__u32  value;
  25.127 +	}lo_address;
  25.128 +	__u32 	hi_address;
  25.129 +} __attribute__ ((packed));
  25.130 +
  25.131 +#endif /* MSI_H */
    26.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    26.2 +++ b/xen/arch/x86/hvm/vmx/vtd/pci-direct.h	Fri Jul 27 12:59:37 2007 -0700
    26.3 @@ -0,0 +1,48 @@
    26.4 +#ifndef ASM_PCI_DIRECT_H
    26.5 +#define ASM_PCI_DIRECT_H 1
    26.6 +
    26.7 +#include <xen/types.h>
    26.8 +#include <asm/io.h>
    26.9 +
   26.10 +/* Direct PCI access. This is used for PCI accesses in early boot before
   26.11 +   the PCI subsystem works. */ 
   26.12 +
   26.13 +#define PDprintk(x...)
   26.14 +
   26.15 +static inline u32 read_pci_config(u8 bus, u8 slot, u8 func, u8 offset)
   26.16 +{
   26.17 +    u32 v; 
   26.18 +    outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
   26.19 +    v = inl(0xcfc); 
   26.20 +    if (v != 0xffffffff)
   26.21 +        PDprintk("%x reading 4 from %x: %x\n", slot, offset, v);
   26.22 +    return v;
   26.23 +}
   26.24 +
   26.25 +static inline u8 read_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset)
   26.26 +{
   26.27 +    u8 v; 
   26.28 +    outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
   26.29 +    v = inb(0xcfc + (offset&3)); 
   26.30 +    PDprintk("%x reading 1 from %x: %x\n", slot, offset, v);
   26.31 +    return v;
   26.32 +}
   26.33 +
   26.34 +static inline u16 read_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset)
   26.35 +{
   26.36 +    u16 v; 
   26.37 +    outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
   26.38 +    v = inw(0xcfc + (offset&2)); 
   26.39 +    PDprintk("%x reading 2 from %x: %x\n", slot, offset, v);
   26.40 +    return v;
   26.41 +}
   26.42 +
   26.43 +static inline void write_pci_config(u8 bus, u8 slot, u8 func, u8 offset,
   26.44 +                    u32 val)
   26.45 +{
   26.46 +    PDprintk("%x writing to %x: %x\n", slot, offset, val); 
   26.47 +    outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
   26.48 +    outl(val, 0xcfc); 
   26.49 +}
   26.50 +
   26.51 +#endif
    27.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    27.2 +++ b/xen/arch/x86/hvm/vmx/vtd/pci_regs.h	Fri Jul 27 12:59:37 2007 -0700
    27.3 @@ -0,0 +1,449 @@
    27.4 +/*
    27.5 + *	pci_regs.h
    27.6 + *
    27.7 + *	PCI standard defines
    27.8 + *	Copyright 1994, Drew Eckhardt
    27.9 + *	Copyright 1997--1999 Martin Mares <mj@ucw.cz>
   27.10 + *
   27.11 + *	For more information, please consult the following manuals (look at
   27.12 + *	http://www.pcisig.com/ for how to get them):
   27.13 + *
   27.14 + *	PCI BIOS Specification
   27.15 + *	PCI Local Bus Specification
   27.16 + *	PCI to PCI Bridge Specification
   27.17 + *	PCI System Design Guide
   27.18 + */
   27.19 +
   27.20 +#ifndef LINUX_PCI_REGS_H
   27.21 +#define LINUX_PCI_REGS_H
   27.22 +
   27.23 +/*
   27.24 + * Under PCI, each device has 256 bytes of configuration address space,
   27.25 + * of which the first 64 bytes are standardized as follows:
   27.26 + */
   27.27 +#define PCI_VENDOR_ID		0x00	/* 16 bits */
   27.28 +#define PCI_DEVICE_ID		0x02	/* 16 bits */
   27.29 +#define PCI_COMMAND		0x04	/* 16 bits */
   27.30 +#define  PCI_COMMAND_IO		0x1	/* Enable response in I/O space */
   27.31 +#define  PCI_COMMAND_MEMORY	0x2	/* Enable response in Memory space */
   27.32 +#define  PCI_COMMAND_MASTER	0x4	/* Enable bus mastering */
   27.33 +#define  PCI_COMMAND_SPECIAL	0x8	/* Enable response to special cycles */
   27.34 +#define  PCI_COMMAND_INVALIDATE	0x10	/* Use memory write and invalidate */
   27.35 +#define  PCI_COMMAND_VGA_PALETTE 0x20	/* Enable palette snooping */
   27.36 +#define  PCI_COMMAND_PARITY	0x40	/* Enable parity checking */
   27.37 +#define  PCI_COMMAND_WAIT 	0x80	/* Enable address/data stepping */
   27.38 +#define  PCI_COMMAND_SERR	0x100	/* Enable SERR */
   27.39 +#define  PCI_COMMAND_FAST_BACK	0x200	/* Enable back-to-back writes */
   27.40 +#define  PCI_COMMAND_INTX_DISABLE 0x400 /* INTx Emulation Disable */
   27.41 +
   27.42 +#define PCI_STATUS		0x06	/* 16 bits */
   27.43 +#define  PCI_STATUS_CAP_LIST	0x10	/* Support Capability List */
   27.44 +#define  PCI_STATUS_66MHZ	0x20	/* Support 66 Mhz PCI 2.1 bus */
   27.45 +#define  PCI_STATUS_UDF		0x40	/* Support User Definable Features [obsolete] */
   27.46 +#define  PCI_STATUS_FAST_BACK	0x80	/* Accept fast-back to back */
   27.47 +#define  PCI_STATUS_PARITY	0x100	/* Detected parity error */
   27.48 +#define  PCI_STATUS_DEVSEL_MASK	0x600	/* DEVSEL timing */
   27.49 +#define  PCI_STATUS_DEVSEL_FAST		0x000
   27.50 +#define  PCI_STATUS_DEVSEL_MEDIUM	0x200
   27.51 +#define  PCI_STATUS_DEVSEL_SLOW		0x400
   27.52 +#define  PCI_STATUS_SIG_TARGET_ABORT	0x800 /* Set on target abort */
   27.53 +#define  PCI_STATUS_REC_TARGET_ABORT	0x1000 /* Master ack of " */
   27.54 +#define  PCI_STATUS_REC_MASTER_ABORT	0x2000 /* Set on master abort */
   27.55 +#define  PCI_STATUS_SIG_SYSTEM_ERROR	0x4000 /* Set when we drive SERR */
   27.56 +#define  PCI_STATUS_DETECTED_PARITY	0x8000 /* Set on parity error */
   27.57 +
   27.58 +#define PCI_CLASS_REVISION	0x08	/* High 24 bits are class, low 8 revision */
   27.59 +#define PCI_REVISION_ID		0x08	/* Revision ID */
   27.60 +#define PCI_CLASS_PROG		0x09	/* Reg. Level Programming Interface */
   27.61 +#define PCI_CLASS_DEVICE	0x0a	/* Device class */
   27.62 +
   27.63 +#define PCI_CACHE_LINE_SIZE	0x0c	/* 8 bits */
   27.64 +#define PCI_LATENCY_TIMER	0x0d	/* 8 bits */
   27.65 +#define PCI_HEADER_TYPE		0x0e	/* 8 bits */
   27.66 +#define  PCI_HEADER_TYPE_NORMAL		0
   27.67 +#define  PCI_HEADER_TYPE_BRIDGE		1
   27.68 +#define  PCI_HEADER_TYPE_CARDBUS	2
   27.69 +
   27.70 +#define PCI_BIST		0x0f	/* 8 bits */
   27.71 +#define  PCI_BIST_CODE_MASK	0x0f	/* Return result */
   27.72 +#define  PCI_BIST_START		0x40	/* 1 to start BIST, 2 secs or less */
   27.73 +#define  PCI_BIST_CAPABLE	0x80	/* 1 if BIST capable */
   27.74 +
   27.75 +/*
   27.76 + * Base addresses specify locations in memory or I/O space.
   27.77 + * Decoded size can be determined by writing a value of
   27.78 + * 0xffffffff to the register, and reading it back.  Only
   27.79 + * 1 bits are decoded.
   27.80 + */
   27.81 +#define PCI_BASE_ADDRESS_0	0x10	/* 32 bits */
   27.82 +#define PCI_BASE_ADDRESS_1	0x14	/* 32 bits [htype 0,1 only] */
   27.83 +#define PCI_BASE_ADDRESS_2	0x18	/* 32 bits [htype 0 only] */
   27.84 +#define PCI_BASE_ADDRESS_3	0x1c	/* 32 bits */
   27.85 +#define PCI_BASE_ADDRESS_4	0x20	/* 32 bits */
   27.86 +#define PCI_BASE_ADDRESS_5	0x24	/* 32 bits */
   27.87 +#define  PCI_BASE_ADDRESS_SPACE		0x01	/* 0 = memory, 1 = I/O */
   27.88 +#define  PCI_BASE_ADDRESS_SPACE_IO	0x01
   27.89 +#define  PCI_BASE_ADDRESS_SPACE_MEMORY	0x00
   27.90 +#define  PCI_BASE_ADDRESS_MEM_TYPE_MASK	0x06
   27.91 +#define  PCI_BASE_ADDRESS_MEM_TYPE_32	0x00	/* 32 bit address */
   27.92 +#define  PCI_BASE_ADDRESS_MEM_TYPE_1M	0x02	/* Below 1M [obsolete] */
   27.93 +#define  PCI_BASE_ADDRESS_MEM_TYPE_64	0x04	/* 64 bit address */
   27.94 +#define  PCI_BASE_ADDRESS_MEM_PREFETCH	0x08	/* prefetchable? */
   27.95 +#define  PCI_BASE_ADDRESS_MEM_MASK	(~0x0fUL)
   27.96 +#define  PCI_BASE_ADDRESS_IO_MASK	(~0x03UL)
   27.97 +/* bit 1 is reserved if address_space = 1 */
   27.98 +
   27.99 +/* Header type 0 (normal devices) */
  27.100 +#define PCI_CARDBUS_CIS		0x28
  27.101 +#define PCI_SUBSYSTEM_VENDOR_ID	0x2c
  27.102 +#define PCI_SUBSYSTEM_ID	0x2e
  27.103 +#define PCI_ROM_ADDRESS		0x30	/* Bits 31..11 are address, 10..1 reserved */
  27.104 +#define  PCI_ROM_ADDRESS_ENABLE	0x01
  27.105 +#define PCI_ROM_ADDRESS_MASK	(~0x7ffUL)
  27.106 +
  27.107 +#define PCI_CAPABILITY_LIST	0x34	/* Offset of first capability list entry */
  27.108 +
  27.109 +/* 0x35-0x3b are reserved */
  27.110 +#define PCI_INTERRUPT_LINE	0x3c	/* 8 bits */
  27.111 +#define PCI_INTERRUPT_PIN	0x3d	/* 8 bits */
  27.112 +#define PCI_MIN_GNT		0x3e	/* 8 bits */
  27.113 +#define PCI_MAX_LAT		0x3f	/* 8 bits */
  27.114 +
  27.115 +/* Header type 1 (PCI-to-PCI bridges) */
  27.116 +#define PCI_PRIMARY_BUS		0x18	/* Primary bus number */
  27.117 +#define PCI_SECONDARY_BUS	0x19	/* Secondary bus number */
  27.118 +#define PCI_SUBORDINATE_BUS	0x1a	/* Highest bus number behind the bridge */
  27.119 +#define PCI_SEC_LATENCY_TIMER	0x1b	/* Latency timer for secondary interface */
  27.120 +#define PCI_IO_BASE		0x1c	/* I/O range behind the bridge */
  27.121 +#define PCI_IO_LIMIT		0x1d
  27.122 +#define  PCI_IO_RANGE_TYPE_MASK	0x0fUL	/* I/O bridging type */
  27.123 +#define  PCI_IO_RANGE_TYPE_16	0x00
  27.124 +#define  PCI_IO_RANGE_TYPE_32	0x01
  27.125 +#define  PCI_IO_RANGE_MASK	(~0x0fUL)
  27.126 +#define PCI_SEC_STATUS		0x1e	/* Secondary status register, only bit 14 used */
  27.127 +#define PCI_MEMORY_BASE		0x20	/* Memory range behind */
  27.128 +#define PCI_MEMORY_LIMIT	0x22
  27.129 +#define  PCI_MEMORY_RANGE_TYPE_MASK 0x0fUL
  27.130 +#define  PCI_MEMORY_RANGE_MASK	(~0x0fUL)
  27.131 +#define PCI_PREF_MEMORY_BASE	0x24	/* Prefetchable memory range behind */
  27.132 +#define PCI_PREF_MEMORY_LIMIT	0x26
  27.133 +#define  PCI_PREF_RANGE_TYPE_MASK 0x0fUL
  27.134 +#define  PCI_PREF_RANGE_TYPE_32	0x00
  27.135 +#define  PCI_PREF_RANGE_TYPE_64	0x01
  27.136 +#define  PCI_PREF_RANGE_MASK	(~0x0fUL)
  27.137 +#define PCI_PREF_BASE_UPPER32	0x28	/* Upper half of prefetchable memory range */
  27.138 +#define PCI_PREF_LIMIT_UPPER32	0x2c
  27.139 +#define PCI_IO_BASE_UPPER16	0x30	/* Upper half of I/O addresses */
  27.140 +#define PCI_IO_LIMIT_UPPER16	0x32
  27.141 +/* 0x34 same as for htype 0 */
  27.142 +/* 0x35-0x3b is reserved */
  27.143 +#define PCI_ROM_ADDRESS1	0x38	/* Same as PCI_ROM_ADDRESS, but for htype 1 */
  27.144 +/* 0x3c-0x3d are same as for htype 0 */
  27.145 +#define PCI_BRIDGE_CONTROL	0x3e
  27.146 +#define  PCI_BRIDGE_CTL_PARITY	0x01	/* Enable parity detection on secondary interface */
  27.147 +#define  PCI_BRIDGE_CTL_SERR	0x02	/* The same for SERR forwarding */
  27.148 +#define  PCI_BRIDGE_CTL_NO_ISA	0x04	/* Disable bridging of ISA ports */
  27.149 +#define  PCI_BRIDGE_CTL_VGA	0x08	/* Forward VGA addresses */
  27.150 +#define  PCI_BRIDGE_CTL_MASTER_ABORT	0x20  /* Report master aborts */
  27.151 +#define  PCI_BRIDGE_CTL_BUS_RESET	0x40	/* Secondary bus reset */
  27.152 +#define  PCI_BRIDGE_CTL_FAST_BACK	0x80	/* Fast Back2Back enabled on secondary interface */
  27.153 +
  27.154 +/* Header type 2 (CardBus bridges) */
  27.155 +#define PCI_CB_CAPABILITY_LIST	0x14
  27.156 +/* 0x15 reserved */
  27.157 +#define PCI_CB_SEC_STATUS	0x16	/* Secondary status */
  27.158 +#define PCI_CB_PRIMARY_BUS	0x18	/* PCI bus number */
  27.159 +#define PCI_CB_CARD_BUS		0x19	/* CardBus bus number */
  27.160 +#define PCI_CB_SUBORDINATE_BUS	0x1a	/* Subordinate bus number */
  27.161 +#define PCI_CB_LATENCY_TIMER	0x1b	/* CardBus latency timer */
  27.162 +#define PCI_CB_MEMORY_BASE_0	0x1c
  27.163 +#define PCI_CB_MEMORY_LIMIT_0	0x20
  27.164 +#define PCI_CB_MEMORY_BASE_1	0x24
  27.165 +#define PCI_CB_MEMORY_LIMIT_1	0x28
  27.166 +#define PCI_CB_IO_BASE_0	0x2c
  27.167 +#define PCI_CB_IO_BASE_0_HI	0x2e
  27.168 +#define PCI_CB_IO_LIMIT_0	0x30
  27.169 +#define PCI_CB_IO_LIMIT_0_HI	0x32
  27.170 +#define PCI_CB_IO_BASE_1	0x34
  27.171 +#define PCI_CB_IO_BASE_1_HI	0x36
  27.172 +#define PCI_CB_IO_LIMIT_1	0x38
  27.173 +#define PCI_CB_IO_LIMIT_1_HI	0x3a
  27.174 +#define  PCI_CB_IO_RANGE_MASK	(~0x03UL)
  27.175 +/* 0x3c-0x3d are same as for htype 0 */
  27.176 +#define PCI_CB_BRIDGE_CONTROL	0x3e
  27.177 +#define  PCI_CB_BRIDGE_CTL_PARITY	0x01	/* Similar to standard bridge control register */
  27.178 +#define  PCI_CB_BRIDGE_CTL_SERR		0x02
  27.179 +#define  PCI_CB_BRIDGE_CTL_ISA		0x04
  27.180 +#define  PCI_CB_BRIDGE_CTL_VGA		0x08
  27.181 +#define  PCI_CB_BRIDGE_CTL_MASTER_ABORT	0x20
  27.182 +#define  PCI_CB_BRIDGE_CTL_CB_RESET	0x40	/* CardBus reset */
  27.183 +#define  PCI_CB_BRIDGE_CTL_16BIT_INT	0x80	/* Enable interrupt for 16-bit cards */
  27.184 +#define  PCI_CB_BRIDGE_CTL_PREFETCH_MEM0 0x100	/* Prefetch enable for both memory regions */
  27.185 +#define  PCI_CB_BRIDGE_CTL_PREFETCH_MEM1 0x200
  27.186 +#define  PCI_CB_BRIDGE_CTL_POST_WRITES	0x400
  27.187 +#define PCI_CB_SUBSYSTEM_VENDOR_ID	0x40
  27.188 +#define PCI_CB_SUBSYSTEM_ID		0x42
  27.189 +#define PCI_CB_LEGACY_MODE_BASE		0x44	/* 16-bit PC Card legacy mode base address (ExCa) */
  27.190 +/* 0x48-0x7f reserved */
  27.191 +
  27.192 +/* Capability lists */
  27.193 +
  27.194 +#define PCI_CAP_LIST_ID		0	/* Capability ID */
  27.195 +#define  PCI_CAP_ID_PM		0x01	/* Power Management */
  27.196 +#define  PCI_CAP_ID_AGP		0x02	/* Accelerated Graphics Port */
  27.197 +#define  PCI_CAP_ID_VPD		0x03	/* Vital Product Data */
  27.198 +#define  PCI_CAP_ID_SLOTID	0x04	/* Slot Identification */
  27.199 +#define  PCI_CAP_ID_MSI		0x05	/* Message Signalled Interrupts */
  27.200 +#define  PCI_CAP_ID_CHSWP	0x06	/* CompactPCI HotSwap */
  27.201 +#define  PCI_CAP_ID_PCIX	0x07	/* PCI-X */
  27.202 +#define  PCI_CAP_ID_HT_IRQCONF	0x08	/* HyperTransport IRQ Configuration */
  27.203 +#define  PCI_CAP_ID_SHPC 	0x0C	/* PCI Standard Hot-Plug Controller */
  27.204 +#define  PCI_CAP_ID_EXP 	0x10	/* PCI Express */
  27.205 +#define  PCI_CAP_ID_MSIX	0x11	/* MSI-X */
  27.206 +#define PCI_CAP_LIST_NEXT	1	/* Next capability in the list */
  27.207 +#define PCI_CAP_FLAGS		2	/* Capability defined flags (16 bits) */
  27.208 +#define PCI_CAP_SIZEOF		4
  27.209 +
  27.210 +/* Power Management Registers */
  27.211 +
  27.212 +#define PCI_PM_PMC		2	/* PM Capabilities Register */
  27.213 +#define  PCI_PM_CAP_VER_MASK	0x0007	/* Version */
  27.214 +#define  PCI_PM_CAP_PME_CLOCK	0x0008	/* PME clock required */
  27.215 +#define  PCI_PM_CAP_RESERVED    0x0010  /* Reserved field */
  27.216 +#define  PCI_PM_CAP_DSI		0x0020	/* Device specific initialization */
  27.217 +#define  PCI_PM_CAP_AUX_POWER	0x01C0	/* Auxilliary power support mask */
  27.218 +#define  PCI_PM_CAP_D1		0x0200	/* D1 power state support */
  27.219 +#define  PCI_PM_CAP_D2		0x0400	/* D2 power state support */
  27.220 +#define  PCI_PM_CAP_PME		0x0800	/* PME pin supported */
  27.221 +#define  PCI_PM_CAP_PME_MASK	0xF800	/* PME Mask of all supported states */
  27.222 +#define  PCI_PM_CAP_PME_D0	0x0800	/* PME# from D0 */
  27.223 +#define  PCI_PM_CAP_PME_D1	0x1000	/* PME# from D1 */
  27.224 +#define  PCI_PM_CAP_PME_D2	0x2000	/* PME# from D2 */
  27.225 +#define  PCI_PM_CAP_PME_D3	0x4000	/* PME# from D3 (hot) */
  27.226 +#define  PCI_PM_CAP_PME_D3cold	0x8000	/* PME# from D3 (cold) */
  27.227 +#define PCI_PM_CTRL		4	/* PM control and status register */
  27.228 +#define  PCI_PM_CTRL_STATE_MASK	0x0003	/* Current power state (D0 to D3) */
  27.229 +#define  PCI_PM_CTRL_NO_SOFT_RESET	0x0004	/* No reset for D3hot->D0 */
  27.230 +#define  PCI_PM_CTRL_PME_ENABLE	0x0100	/* PME pin enable */
  27.231 +#define  PCI_PM_CTRL_DATA_SEL_MASK	0x1e00	/* Data select (??) */
  27.232 +#define  PCI_PM_CTRL_DATA_SCALE_MASK	0x6000	/* Data scale (??) */
  27.233 +#define  PCI_PM_CTRL_PME_STATUS	0x8000	/* PME pin status */
  27.234 +#define PCI_PM_PPB_EXTENSIONS	6	/* PPB support extensions (??) */
  27.235 +#define  PCI_PM_PPB_B2_B3	0x40	/* Stop clock when in D3hot (??) */
  27.236 +#define  PCI_PM_BPCC_ENABLE	0x80	/* Bus power/clock control enable (??) */
  27.237 +#define PCI_PM_DATA_REGISTER	7	/* (??) */
  27.238 +#define PCI_PM_SIZEOF		8
  27.239 +
  27.240 +/* AGP registers */
  27.241 +
  27.242 +#define PCI_AGP_VERSION		2	/* BCD version number */
  27.243 +#define PCI_AGP_RFU		3	/* Rest of capability flags */
  27.244 +#define PCI_AGP_STATUS		4	/* Status register */
  27.245 +#define  PCI_AGP_STATUS_RQ_MASK	0xff000000	/* Maximum number of requests - 1 */
  27.246 +#define  PCI_AGP_STATUS_SBA	0x0200	/* Sideband addressing supported */
  27.247 +#define  PCI_AGP_STATUS_64BIT	0x0020	/* 64-bit addressing supported */
  27.248 +#define  PCI_AGP_STATUS_FW	0x0010	/* FW transfers supported */
  27.249 +#define  PCI_AGP_STATUS_RATE4	0x0004	/* 4x transfer rate supported */
  27.250 +#define  PCI_AGP_STATUS_RATE2	0x0002	/* 2x transfer rate supported */
  27.251 +#define  PCI_AGP_STATUS_RATE1	0x0001	/* 1x transfer rate supported */
  27.252 +#define PCI_AGP_COMMAND		8	/* Control register */
  27.253 +#define  PCI_AGP_COMMAND_RQ_MASK 0xff000000  /* Master: Maximum number of requests */
  27.254 +#define  PCI_AGP_COMMAND_SBA	0x0200	/* Sideband addressing enabled */
  27.255 +#define  PCI_AGP_COMMAND_AGP	0x0100	/* Allow processing of AGP transactions */
  27.256 +#define  PCI_AGP_COMMAND_64BIT	0x0020 	/* Allow processing of 64-bit addresses */
  27.257 +#define  PCI_AGP_COMMAND_FW	0x0010 	/* Force FW transfers */
  27.258 +#define  PCI_AGP_COMMAND_RATE4	0x0004	/* Use 4x rate */
  27.259 +#define  PCI_AGP_COMMAND_RATE2	0x0002	/* Use 2x rate */
  27.260 +#define  PCI_AGP_COMMAND_RATE1	0x0001	/* Use 1x rate */
  27.261 +#define PCI_AGP_SIZEOF		12
  27.262 +
  27.263 +/* Vital Product Data */
  27.264 +
  27.265 +#define PCI_VPD_ADDR		2	/* Address to access (15 bits!) */
  27.266 +#define  PCI_VPD_ADDR_MASK	0x7fff	/* Address mask */
  27.267 +#define  PCI_VPD_ADDR_F		0x8000	/* Write 0, 1 indicates completion */
  27.268 +#define PCI_VPD_DATA		4	/* 32-bits of data returned here */
  27.269 +
  27.270 +/* Slot Identification */
  27.271 +
  27.272 +#define PCI_SID_ESR		2	/* Expansion Slot Register */
  27.273 +#define  PCI_SID_ESR_NSLOTS	0x1f	/* Number of expansion slots available */
  27.274 +#define  PCI_SID_ESR_FIC	0x20	/* First In Chassis Flag */
  27.275 +#define PCI_SID_CHASSIS_NR	3	/* Chassis Number */
  27.276 +
  27.277 +/* Message Signalled Interrupts registers */
  27.278 +
  27.279 +#define PCI_MSI_FLAGS		2	/* Various flags */
  27.280 +#define  PCI_MSI_FLAGS_64BIT	0x80	/* 64-bit addresses allowed */
  27.281 +#define  PCI_MSI_FLAGS_QSIZE	0x70	/* Message queue size configured */
  27.282 +#define  PCI_MSI_FLAGS_QMASK	0x0e	/* Maximum queue size available */
  27.283 +#define  PCI_MSI_FLAGS_ENABLE	0x01	/* MSI feature enabled */
  27.284 +#define  PCI_MSI_FLAGS_MASKBIT	0x100	/* 64-bit mask bits allowed */
  27.285 +#define PCI_MSI_RFU		3	/* Rest of capability flags */
  27.286 +#define PCI_MSI_ADDRESS_LO	4	/* Lower 32 bits */
  27.287 +#define PCI_MSI_ADDRESS_HI	8	/* Upper 32 bits (if PCI_MSI_FLAGS_64BIT set) */
  27.288 +#define PCI_MSI_DATA_32		8	/* 16 bits of data for 32-bit devices */
  27.289 +#define PCI_MSI_DATA_64		12	/* 16 bits of data for 64-bit devices */
  27.290 +#define PCI_MSI_MASK_BIT	16	/* Mask bits register */
  27.291 +
  27.292 +/* CompactPCI Hotswap Register */
  27.293 +
  27.294 +#define PCI_CHSWP_CSR		2	/* Control and Status Register */
  27.295 +#define  PCI_CHSWP_DHA		0x01	/* Device Hiding Arm */
  27.296 +#define  PCI_CHSWP_EIM		0x02	/* ENUM# Signal Mask */
  27.297 +#define  PCI_CHSWP_PIE		0x04	/* Pending Insert or Extract */
  27.298 +#define  PCI_CHSWP_LOO		0x08	/* LED On / Off */
  27.299 +#define  PCI_CHSWP_PI		0x30	/* Programming Interface */
  27.300 +#define  PCI_CHSWP_EXT		0x40	/* ENUM# status - extraction */
  27.301 +#define  PCI_CHSWP_INS		0x80	/* ENUM# status - insertion */
  27.302 +
  27.303 +/* PCI-X registers */
  27.304 +
  27.305 +#define PCI_X_CMD		2	/* Modes & Features */
  27.306 +#define  PCI_X_CMD_DPERR_E	0x0001	/* Data Parity Error Recovery Enable */
  27.307 +#define  PCI_X_CMD_ERO		0x0002	/* Enable Relaxed Ordering */
  27.308 +#define  PCI_X_CMD_MAX_READ	0x000c	/* Max Memory Read Byte Count */
  27.309 +#define  PCI_X_CMD_MAX_SPLIT	0x0070	/* Max Outstanding Split Transactions */
  27.310 +#define  PCI_X_CMD_VERSION(x) 	(((x) >> 12) & 3) /* Version */
  27.311 +#define PCI_X_STATUS		4	/* PCI-X capabilities */
  27.312 +#define  PCI_X_STATUS_DEVFN	0x000000ff	/* A copy of devfn */
  27.313 +#define  PCI_X_STATUS_BUS	0x0000ff00	/* A copy of bus nr */
  27.314 +#define  PCI_X_STATUS_64BIT	0x00010000	/* 64-bit device */
  27.315 +#define  PCI_X_STATUS_133MHZ	0x00020000	/* 133 MHz capable */
  27.316 +#define  PCI_X_STATUS_SPL_DISC	0x00040000	/* Split Completion Discarded */
  27.317 +#define  PCI_X_STATUS_UNX_SPL	0x00080000	/* Unexpected Split Completion */
  27.318 +#define  PCI_X_STATUS_COMPLEX	0x00100000	/* Device Complexity */
  27.319 +#define  PCI_X_STATUS_MAX_READ	0x00600000	/* Designed Max Memory Read Count */
  27.320 +#define  PCI_X_STATUS_MAX_SPLIT	0x03800000	/* Designed Max Outstanding Split Transactions */
  27.321 +#define  PCI_X_STATUS_MAX_CUM	0x1c000000	/* Designed Max Cumulative Read Size */
  27.322 +#define  PCI_X_STATUS_SPL_ERR	0x20000000	/* Rcvd Split Completion Error Msg */
  27.323 +#define  PCI_X_STATUS_266MHZ	0x40000000	/* 266 MHz capable */
  27.324 +#define  PCI_X_STATUS_533MHZ	0x80000000	/* 533 MHz capable */
  27.325 +
  27.326 +/* PCI Express capability registers */
  27.327 +
  27.328 +#define PCI_EXP_FLAGS		2	/* Capabilities register */
  27.329 +#define PCI_EXP_FLAGS_VERS	0x000f	/* Capability version */
  27.330 +#define PCI_EXP_FLAGS_TYPE	0x00f0	/* Device/Port type */
  27.331 +#define  PCI_EXP_TYPE_ENDPOINT	0x0	/* Express Endpoint */
  27.332 +#define  PCI_EXP_TYPE_LEG_END	0x1	/* Legacy Endpoint */
  27.333 +#define  PCI_EXP_TYPE_ROOT_PORT 0x4	/* Root Port */
  27.334 +#define  PCI_EXP_TYPE_UPSTREAM	0x5	/* Upstream Port */
  27.335 +#define  PCI_EXP_TYPE_DOWNSTREAM 0x6	/* Downstream Port */
  27.336 +#define  PCI_EXP_TYPE_PCI_BRIDGE 0x7	/* PCI/PCI-X Bridge */
  27.337 +#define PCI_EXP_FLAGS_SLOT	0x0100	/* Slot implemented */
  27.338 +#define PCI_EXP_FLAGS_IRQ	0x3e00	/* Interrupt message number */
  27.339 +#define PCI_EXP_DEVCAP		4	/* Device capabilities */
  27.340 +#define  PCI_EXP_DEVCAP_PAYLOAD	0x07	/* Max_Payload_Size */
  27.341 +#define  PCI_EXP_DEVCAP_PHANTOM	0x18	/* Phantom functions */
  27.342 +#define  PCI_EXP_DEVCAP_EXT_TAG	0x20	/* Extended tags */
  27.343 +#define  PCI_EXP_DEVCAP_L0S	0x1c0	/* L0s Acceptable Latency */
  27.344 +#define  PCI_EXP_DEVCAP_L1	0xe00	/* L1 Acceptable Latency */
  27.345 +#define  PCI_EXP_DEVCAP_ATN_BUT	0x1000	/* Attention Button Present */
  27.346 +#define  PCI_EXP_DEVCAP_ATN_IND	0x2000	/* Attention Indicator Present */
  27.347 +#define  PCI_EXP_DEVCAP_PWR_IND	0x4000	/* Power Indicator Present */
  27.348 +#define  PCI_EXP_DEVCAP_PWR_VAL	0x3fc0000 /* Slot Power Limit Value */
  27.349 +#define  PCI_EXP_DEVCAP_PWR_SCL	0xc000000 /* Slot Power Limit Scale */
  27.350 +#define PCI_EXP_DEVCTL		8	/* Device Control */
  27.351 +#define  PCI_EXP_DEVCTL_CERE	0x0001	/* Correctable Error Reporting En. */
  27.352 +#define  PCI_EXP_DEVCTL_NFERE	0x0002	/* Non-Fatal Error Reporting Enable */
  27.353 +#define  PCI_EXP_DEVCTL_FERE	0x0004	/* Fatal Error Reporting Enable */
  27.354 +#define  PCI_EXP_DEVCTL_URRE	0x0008	/* Unsupported Request Reporting En. */
  27.355 +#define  PCI_EXP_DEVCTL_RELAX_EN 0x0010 /* Enable relaxed ordering */
  27.356 +#define  PCI_EXP_DEVCTL_PAYLOAD	0x00e0	/* Max_Payload_Size */
  27.357 +#define  PCI_EXP_DEVCTL_EXT_TAG	0x0100	/* Extended Tag Field Enable */
  27.358 +#define  PCI_EXP_DEVCTL_PHANTOM	0x0200	/* Phantom Functions Enable */
  27.359 +#define  PCI_EXP_DEVCTL_AUX_PME	0x0400	/* Auxiliary Power PM Enable */
  27.360 +#define  PCI_EXP_DEVCTL_NOSNOOP_EN 0x0800  /* Enable No Snoop */
  27.361 +#define  PCI_EXP_DEVCTL_READRQ	0x7000	/* Max_Read_Request_Size */
  27.362 +#define PCI_EXP_DEVSTA		10	/* Device Status */
  27.363 +#define  PCI_EXP_DEVSTA_CED	0x01	/* Correctable Error Detected */
  27.364 +#define  PCI_EXP_DEVSTA_NFED	0x02	/* Non-Fatal Error Detected */
  27.365 +#define  PCI_EXP_DEVSTA_FED	0x04	/* Fatal Error Detected */
  27.366 +#define  PCI_EXP_DEVSTA_URD	0x08	/* Unsupported Request Detected */
  27.367 +#define  PCI_EXP_DEVSTA_AUXPD	0x10	/* AUX Power Detected */
  27.368 +#define  PCI_EXP_DEVSTA_TRPND	0x20	/* Transactions Pending */
  27.369 +#define PCI_EXP_LNKCAP		12	/* Link Capabilities */
  27.370 +#define PCI_EXP_LNKCTL		16	/* Link Control */
  27.371 +#define PCI_EXP_LNKSTA		18	/* Link Status */
  27.372 +#define PCI_EXP_SLTCAP		20	/* Slot Capabilities */
  27.373 +#define PCI_EXP_SLTCTL		24	/* Slot Control */
  27.374 +#define PCI_EXP_SLTSTA		26	/* Slot Status */
  27.375 +#define PCI_EXP_RTCTL		28	/* Root Control */
  27.376 +#define  PCI_EXP_RTCTL_SECEE	0x01	/* System Error on Correctable Error */
  27.377 +#define  PCI_EXP_RTCTL_SENFEE	0x02	/* System Error on Non-Fatal Error */
  27.378 +#define  PCI_EXP_RTCTL_SEFEE	0x04	/* System Error on Fatal Error */
  27.379 +#define  PCI_EXP_RTCTL_PMEIE	0x08	/* PME Interrupt Enable */
  27.380 +#define  PCI_EXP_RTCTL_CRSSVE	0x10	/* CRS Software Visibility Enable */
  27.381 +#define PCI_EXP_RTCAP		30	/* Root Capabilities */
  27.382 +#define PCI_EXP_RTSTA		32	/* Root Status */
  27.383 +
  27.384 +/* Extended Capabilities (PCI-X 2.0 and Express) */
  27.385 +#define PCI_EXT_CAP_ID(header)		(header & 0x0000ffff)
  27.386 +#define PCI_EXT_CAP_VER(header)		((header >> 16) & 0xf)
  27.387 +#define PCI_EXT_CAP_NEXT(header)	((header >> 20) & 0xffc)
  27.388 +
  27.389 +#define PCI_EXT_CAP_ID_ERR	1
  27.390 +#define PCI_EXT_CAP_ID_VC	2
  27.391 +#define PCI_EXT_CAP_ID_DSN	3
  27.392 +#define PCI_EXT_CAP_ID_PWR	4
  27.393 +
  27.394 +/* Advanced Error Reporting */
  27.395 +#define PCI_ERR_UNCOR_STATUS	4	/* Uncorrectable Error Status */
  27.396 +#define  PCI_ERR_UNC_TRAIN	0x00000001	/* Training */
  27.397 +#define  PCI_ERR_UNC_DLP	0x00000010	/* Data Link Protocol */
  27.398 +#define  PCI_ERR_UNC_POISON_TLP	0x00001000	/* Poisoned TLP */
  27.399 +#define  PCI_ERR_UNC_FCP	0x00002000	/* Flow Control Protocol */
  27.400 +#define  PCI_ERR_UNC_COMP_TIME	0x00004000	/* Completion Timeout */
  27.401 +#define  PCI_ERR_UNC_COMP_ABORT	0x00008000	/* Completer Abort */
  27.402 +#define  PCI_ERR_UNC_UNX_COMP	0x00010000	/* Unexpected Completion */
  27.403 +#define  PCI_ERR_UNC_RX_OVER	0x00020000	/* Receiver Overflow */
  27.404 +#define  PCI_ERR_UNC_MALF_TLP	0x00040000	/* Malformed TLP */
  27.405 +#define  PCI_ERR_UNC_ECRC	0x00080000	/* ECRC Error Status */
  27.406 +#define  PCI_ERR_UNC_UNSUP	0x00100000	/* Unsupported Request */
  27.407 +#define PCI_ERR_UNCOR_MASK	8	/* Uncorrectable Error Mask */
  27.408 +	/* Same bits as above */
  27.409 +#define PCI_ERR_UNCOR_SEVER	12	/* Uncorrectable Error Severity */
  27.410 +	/* Same bits as above */
  27.411 +#define PCI_ERR_COR_STATUS	16	/* Correctable Error Status */
  27.412 +#define  PCI_ERR_COR_RCVR	0x00000001	/* Receiver Error Status */
  27.413 +#define  PCI_ERR_COR_BAD_TLP	0x00000040	/* Bad TLP Status */
  27.414 +#define  PCI_ERR_COR_BAD_DLLP	0x00000080	/* Bad DLLP Status */
  27.415 +#define  PCI_ERR_COR_REP_ROLL	0x00000100	/* REPLAY_NUM Rollover */
  27.416 +#define  PCI_ERR_COR_REP_TIMER	0x00001000	/* Replay Timer Timeout */
  27.417 +#define PCI_ERR_COR_MASK	20	/* Correctable Error Mask */
  27.418 +	/* Same bits as above */
  27.419 +#define PCI_ERR_CAP		24	/* Advanced Error Capabilities */
  27.420 +#define  PCI_ERR_CAP_FEP(x)	((x) & 31)	/* First Error Pointer */
  27.421 +#define  PCI_ERR_CAP_ECRC_GENC	0x00000020	/* ECRC Generation Capable */
  27.422 +#define  PCI_ERR_CAP_ECRC_GENE	0x00000040	/* ECRC Generation Enable */
  27.423 +#define  PCI_ERR_CAP_ECRC_CHKC	0x00000080	/* ECRC Check Capable */
  27.424 +#define  PCI_ERR_CAP_ECRC_CHKE	0x00000100	/* ECRC Check Enable */
  27.425 +#define PCI_ERR_HEADER_LOG	28	/* Header Log Register (16 bytes) */
  27.426 +#define PCI_ERR_ROOT_COMMAND	44	/* Root Error Command */
  27.427 +#define PCI_ERR_ROOT_STATUS	48
  27.428 +#define PCI_ERR_ROOT_COR_SRC	52
  27.429 +#define PCI_ERR_ROOT_SRC	54
  27.430 +
  27.431 +/* Virtual Channel */
  27.432 +#define PCI_VC_PORT_REG1	4
  27.433 +#define PCI_VC_PORT_REG2	8
  27.434 +#define PCI_VC_PORT_CTRL	12
  27.435 +#define PCI_VC_PORT_STATUS	14
  27.436 +#define PCI_VC_RES_CAP		16
  27.437 +#define PCI_VC_RES_CTRL		20
  27.438 +#define PCI_VC_RES_STATUS	26
  27.439 +
  27.440 +/* Power Budgeting */
  27.441 +#define PCI_PWR_DSR		4	/* Data Select Register */
  27.442 +#define PCI_PWR_DATA		8	/* Data Register */
  27.443 +#define  PCI_PWR_DATA_BASE(x)	((x) & 0xff)	    /* Base Power */
  27.444 +#define  PCI_PWR_DATA_SCALE(x)	(((x) >> 8) & 3)    /* Data Scale */
  27.445 +#define  PCI_PWR_DATA_PM_SUB(x)	(((x) >> 10) & 7)   /* PM Sub State */
  27.446 +#define  PCI_PWR_DATA_PM_STATE(x) (((x) >> 13) & 3) /* PM State */
  27.447 +#define  PCI_PWR_DATA_TYPE(x)	(((x) >> 15) & 7)   /* Type */
  27.448 +#define  PCI_PWR_DATA_RAIL(x)	(((x) >> 18) & 7)   /* Power Rail */
  27.449 +#define PCI_PWR_CAP		12	/* Capability */
  27.450 +#define  PCI_PWR_CAP_BUDGET(x)	((x) & 1)	/* Included in system budget */
  27.451 +
  27.452 +#endif /* LINUX_PCI_REGS_H */
    28.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    28.2 +++ b/xen/arch/x86/hvm/vmx/vtd/utils.c	Fri Jul 27 12:59:37 2007 -0700
    28.3 @@ -0,0 +1,302 @@
    28.4 +/*
    28.5 + * Copyright (c) 2006, Intel Corporation.
    28.6 + *
    28.7 + * This program is free software; you can redistribute it and/or modify it
    28.8 + * under the terms and conditions of the GNU General Public License,
    28.9 + * version 2, as published by the Free Software Foundation.
   28.10 + *
   28.11 + * This program is distributed in the hope it will be useful, but WITHOUT
   28.12 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   28.13 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
   28.14 + * more details.
   28.15 + *
   28.16 + * You should have received a copy of the GNU General Public License along with
   28.17 + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
   28.18 + * Place - Suite 330, Boston, MA 02111-1307 USA.
   28.19 + *
   28.20 + * Copyright (C) Allen Kay <allen.m.kay@intel.com>
   28.21 + */
   28.22 +
   28.23 +#include <xen/init.h>
   28.24 +#include <xen/bitmap.h>
   28.25 +#include <xen/irq.h>
   28.26 +#include <xen/spinlock.h>
   28.27 +#include <xen/sched.h>
   28.28 +#include <asm/delay.h>
   28.29 +#include <asm/iommu.h>
   28.30 +#include <asm/hvm/vmx/intel-iommu.h>
   28.31 +#include "dmar.h"
   28.32 +#include "pci-direct.h"
   28.33 +#include "pci_regs.h"
   28.34 +#include "msi.h"
   28.35 +
   28.36 +#include <xen/mm.h>
   28.37 +#include <xen/xmalloc.h>
   28.38 +
   28.39 +#if defined(__x86_64__)
   28.40 +void print_iommu_regs(struct acpi_drhd_unit *drhd)
   28.41 +{
   28.42 +    struct iommu *iommu = drhd->iommu;
   28.43 + 
   28.44 +    printk("---- print_iommu_regs ----\n"); 
   28.45 +    printk("print_iommu_regs: drhd->address = %lx\n", drhd->address);
   28.46 +    printk("print_iommu_regs: DMAR_VER_REG = %x\n",
   28.47 +                   dmar_readl(iommu->reg,DMAR_VER_REG));
   28.48 +    printk("print_iommu_regs: DMAR_CAP_REG = %lx\n",
   28.49 +                   dmar_readq(iommu->reg,DMAR_CAP_REG));
   28.50 +    printk("print_iommu_regs: n_fault_reg = %lx\n",
   28.51 +                   cap_num_fault_regs(dmar_readq(iommu->reg, DMAR_CAP_REG)));
   28.52 +    printk("print_iommu_regs: fault_recording_offset_l = %lx\n",
   28.53 +                   cap_fault_reg_offset(dmar_readq(iommu->reg, DMAR_CAP_REG)));
   28.54 +    printk("print_iommu_regs: fault_recording_offset_h = %lx\n",
   28.55 +                   cap_fault_reg_offset(dmar_readq(iommu->reg, DMAR_CAP_REG)) + 8);
   28.56 +    printk("print_iommu_regs: fault_recording_reg_l = %lx\n",
   28.57 +        dmar_readq(iommu->reg, cap_fault_reg_offset(dmar_readq(iommu->reg, DMAR_CAP_REG))));
   28.58 +    printk("print_iommu_regs: fault_recording_reg_h = %lx\n",
   28.59 +        dmar_readq(iommu->reg, cap_fault_reg_offset(dmar_readq(iommu->reg, DMAR_CAP_REG)) + 8));
   28.60 +    printk("print_iommu_regs: DMAR_ECAP_REG = %lx\n",
   28.61 +                   dmar_readq(iommu->reg,DMAR_ECAP_REG));
   28.62 +    printk("print_iommu_regs: DMAR_GCMD_REG = %x\n",
   28.63 +                   dmar_readl(iommu->reg,DMAR_GCMD_REG));
   28.64 +    printk("print_iommu_regs: DMAR_GSTS_REG = %x\n",
   28.65 +                   dmar_readl(iommu->reg,DMAR_GSTS_REG));
   28.66 +    printk("print_iommu_regs: DMAR_RTADDR_REG = %lx\n",
   28.67 +                   dmar_readq(iommu->reg,DMAR_RTADDR_REG));
   28.68 +    printk("print_iommu_regs: DMAR_CCMD_REG = %lx\n",
   28.69 +                   dmar_readq(iommu->reg,DMAR_CCMD_REG));
   28.70 +    printk("print_iommu_regs: DMAR_FSTS_REG = %x\n",
   28.71 +                   dmar_readl(iommu->reg,DMAR_FSTS_REG));
   28.72 +    printk("print_iommu_regs: DMAR_FECTL_REG = %x\n",
   28.73 +                   dmar_readl(iommu->reg,DMAR_FECTL_REG));
   28.74 +    printk("print_iommu_regs: DMAR_FEDATA_REG = %x\n",
   28.75 +                   dmar_readl(iommu->reg,DMAR_FEDATA_REG));
   28.76 +    printk("print_iommu_regs: DMAR_FEADDR_REG = %x\n",
   28.77 +                   dmar_readl(iommu->reg,DMAR_FEADDR_REG));
   28.78 +    printk("print_iommu_regs: DMAR_FEUADDR_REG = %x\n",
   28.79 +                   dmar_readl(iommu->reg,DMAR_FEUADDR_REG));
   28.80 +}
   28.81 +
   28.82 +void print_vtd_entries(struct domain *d, int bus, int devfn,
   28.83 +                       unsigned long gmfn)
   28.84 +{
   28.85 +    struct hvm_iommu *hd = domain_hvm_iommu(d);
   28.86 +    struct acpi_drhd_unit *drhd;
   28.87 +    struct iommu *iommu;
   28.88 +    struct context_entry *ctxt_entry;
   28.89 +    struct root_entry *root_entry;
   28.90 +    u64 *l3, *l2, *l1;
   28.91 +    u32 l3_index, l2_index, l1_index;
   28.92 +    u32 i = 0;
   28.93 +
   28.94 +    printk("print_vtd_entries: domain_id = %x bdf = %x:%x:%x devfn = %x, gmfn = %lx\n", d->domain_id, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), devfn, gmfn);
   28.95 +
   28.96 +    for_each_drhd_unit(drhd) {
   28.97 +        printk("---- print_vtd_entries %d ----\n", i++);
   28.98 +
   28.99 +        if (hd->pgd == NULL) {
  28.100 +            printk("    hg->pgd == NULL\n");
  28.101 +            return;
  28.102 +        }
  28.103 +
  28.104 +        iommu = drhd->iommu;
  28.105 +        root_entry = iommu->root_entry;
  28.106 +        printk("    hd->pgd = %p virt_to_maddr(hd->pgd) = %lx\n",
  28.107 +               hd->pgd, virt_to_maddr(hd->pgd));
  28.108 +
  28.109 +        printk("    root_entry = %p\n", root_entry);
  28.110 +        if (root_entry == NULL) {
  28.111 +            printk("    root_entry == NULL\n");
  28.112 +            return;
  28.113 +        }
  28.114 +
  28.115 +        printk("    root_entry[%x] = %lx\n", bus, root_entry[bus].val);
  28.116 +        printk("    maddr_to_virt(root_entry[%x]) = %p\n",
  28.117 +            bus, maddr_to_virt(root_entry[bus].val));
  28.118 +
  28.119 +        if (root_entry[bus].val == 0) {
  28.120 +            printk("    root_entry[%x].lo == 0\n", bus);
  28.121 +            return;
  28.122 +        }
  28.123 + 
  28.124 +        ctxt_entry = maddr_to_virt((root_entry[bus].val >> PAGE_SHIFT) << PAGE_SHIFT);
  28.125 +        if (ctxt_entry == NULL) {
  28.126 +            printk("    ctxt_entry == NULL\n");
  28.127 +            return;
  28.128 +        }
  28.129 +
  28.130 +        if (ctxt_entry[devfn].lo == 0) {
  28.131 +            printk("    ctxt_entry[%x].lo == 0\n", devfn);
  28.132 +            return;
  28.133 +        }
  28.134 +
  28.135 +        printk("    context = %p\n", ctxt_entry);
  28.136 +        printk("    context[%x] = %lx %lx\n",
  28.137 +               devfn, ctxt_entry[devfn].hi, ctxt_entry[devfn].lo);
  28.138 +        printk("    maddr_to_virt(context[%x].lo) = %p\n",
  28.139 +               devfn, maddr_to_virt(ctxt_entry[devfn].lo));
  28.140 +        printk("    context[%x] = %lx\n", devfn, ctxt_entry[devfn].lo); 
  28.141 +
  28.142 +        l3 = maddr_to_virt(ctxt_entry[devfn].lo);
  28.143 +        l3 = (u64*)(((u64) l3 >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K);
  28.144 +        printk("    l3 = %p\n", l3); 
  28.145 +        if (l3 == NULL) return;
  28.146 +
  28.147 +        l3_index = (gmfn >> 9 >> 9) & 0x1ff;
  28.148 +        printk("    l3_index = %x\n", l3_index);
  28.149 +        printk("    l3[%x] = %lx\n", l3_index, l3[l3_index]);
  28.150 +
  28.151 +        l2 = maddr_to_virt(l3[l3_index]);
  28.152 +        l2 = (u64*)(((u64) l2 >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K);
  28.153 +        printk("    l2 = %p\n", l2); 
  28.154 +        if (l2 == NULL) return;
  28.155 +
  28.156 +        l2_index = (gmfn >> 9) & 0x1ff;
  28.157 +        printk("    gmfn = %lx\n", gmfn);
  28.158 +        printk("    gmfn >> 9= %lx\n", gmfn >> 9);
  28.159 +        printk("    l2_index = %x\n", l2_index);
  28.160 +        printk("    l2[%x] = %lx\n", l2_index, l2[l2_index]);
  28.161 +
  28.162 +        l1 = maddr_to_virt(l2[l2_index]);
  28.163 +        l1 = (u64*)(((u64) l1 >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K);
  28.164 +        if (l1 == NULL) return;
  28.165 +        l1_index = gmfn & 0x1ff;
  28.166 +        printk("    l1 = %p\n", l1); 
  28.167 +        printk("    l1_index = %x\n", l1_index);
  28.168 +        printk("    l1[%x] = %lx\n", l1_index, l1[l1_index]); 
  28.169 +    }
  28.170 +}
  28.171 +
  28.172 +#else    // !m64
  28.173 +
  28.174 +void print_iommu_regs(struct acpi_drhd_unit *drhd)
  28.175 +{
  28.176 +    struct iommu *iommu = drhd->iommu;
  28.177 + 
  28.178 +    printk("---- print_iommu_regs ----\n"); 
  28.179 +    printk("print_iommu_regs: drhd->address = %lx\n", drhd->address);
  28.180 +    printk("print_iommu_regs: DMAR_VER_REG = %x\n",
  28.181 +                   dmar_readl(iommu->reg,DMAR_VER_REG));
  28.182 +    printk("print_iommu_regs: DMAR_CAP_REG = %llx\n",
  28.183 +                   dmar_readq(iommu->reg,DMAR_CAP_REG));
  28.184 +    printk("print_iommu_regs: n_fault_reg = %llx\n",
  28.185 +                   cap_num_fault_regs(dmar_readq(iommu->reg, DMAR_CAP_REG)));
  28.186 +    printk("print_iommu_regs: fault_recording_offset_l = %llx\n",
  28.187 +                   cap_fault_reg_offset(dmar_readq(iommu->reg, DMAR_CAP_REG)));
  28.188 +    printk("print_iommu_regs: fault_recording_offset_h = %llx\n",
  28.189 +                   cap_fault_reg_offset(dmar_readq(iommu->reg, DMAR_CAP_REG)) + 8);
  28.190 +    printk("print_iommu_regs: fault_recording_reg_l = %llx\n",
  28.191 +        dmar_readq(iommu->reg, cap_fault_reg_offset(dmar_readq(iommu->reg, DMAR_CAP_REG))));
  28.192 +    printk("print_iommu_regs: fault_recording_reg_h = %llx\n",
  28.193 +        dmar_readq(iommu->reg, cap_fault_reg_offset(dmar_readq(iommu->reg, DMAR_CAP_REG)) + 8));
  28.194 +    printk("print_iommu_regs: DMAR_ECAP_REG = %llx\n",
  28.195 +                   dmar_readq(iommu->reg,DMAR_ECAP_REG));
  28.196 +    printk("print_iommu_regs: DMAR_GCMD_REG = %x\n",
  28.197 +                   dmar_readl(iommu->reg,DMAR_GCMD_REG));
  28.198 +    printk("print_iommu_regs: DMAR_GSTS_REG = %x\n",
  28.199 +                   dmar_readl(iommu->reg,DMAR_GSTS_REG));
  28.200 +    printk("print_iommu_regs: DMAR_RTADDR_REG = %llx\n",
  28.201 +                   dmar_readq(iommu->reg,DMAR_RTADDR_REG));
  28.202 +    printk("print_iommu_regs: DMAR_CCMD_REG = %llx\n",
  28.203 +                   dmar_readq(iommu->reg,DMAR_CCMD_REG));
  28.204 +    printk("print_iommu_regs: DMAR_FSTS_REG = %x\n",
  28.205 +                   dmar_readl(iommu->reg,DMAR_FSTS_REG));
  28.206 +    printk("print_iommu_regs: DMAR_FECTL_REG = %x\n",
  28.207 +                   dmar_readl(iommu->reg,DMAR_FECTL_REG));
  28.208 +    printk("print_iommu_regs: DMAR_FEDATA_REG = %x\n",
  28.209 +                   dmar_readl(iommu->reg,DMAR_FEDATA_REG));
  28.210 +    printk("print_iommu_regs: DMAR_FEADDR_REG = %x\n",
  28.211 +                   dmar_readl(iommu->reg,DMAR_FEADDR_REG));
  28.212 +    printk("print_iommu_regs: DMAR_FEUADDR_REG = %x\n",
  28.213 +                   dmar_readl(iommu->reg,DMAR_FEUADDR_REG));
  28.214 +}
  28.215 +
  28.216 +void print_vtd_entries(struct domain *d, int bus, int devfn,
  28.217 +                       unsigned long gmfn)
  28.218 +{
  28.219 +    struct hvm_iommu *hd = domain_hvm_iommu(d);
  28.220 +    struct acpi_drhd_unit *drhd;
  28.221 +    struct iommu *iommu;
  28.222 +    struct context_entry *ctxt_entry;
  28.223 +    struct root_entry *root_entry;
  28.224 +    u64 *l3, *l2, *l1;
  28.225 +    u32 l3_index, l2_index, l1_index;
  28.226 +    u32 i = 0;
  28.227 +
  28.228 +    printk("print_vtd_entries: domain_id = %x bdf = %x:%x:%x devfn = %x, gmfn = %lx\n", d->domain_id, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), devfn, gmfn);
  28.229 +
  28.230 +    for_each_drhd_unit(drhd) {
  28.231 +        printk("---- print_vtd_entries %d ----\n", i++);
  28.232 +
  28.233 +        if (hd->pgd == NULL) {
  28.234 +            printk("    hg->pgd == NULL\n");
  28.235 +            return;
  28.236 +        }
  28.237 +
  28.238 +        iommu = drhd->iommu;
  28.239 +        root_entry = iommu->root_entry;
  28.240 +        printk("    d->pgd = %p virt_to_maddr(hd->pgd) = %lx\n",
  28.241 +               hd->pgd, virt_to_maddr(hd->pgd));
  28.242 +
  28.243 +        printk("    root_entry = %p\n", root_entry);
  28.244 +        if (root_entry == NULL) {
  28.245 +            printk("    root_entry == NULL\n");
  28.246 +            return;
  28.247 +        }
  28.248 +
  28.249 +        printk("    root_entry[%x] = %llx\n", bus, root_entry[bus].val);
  28.250 +        printk("    maddr_to_virt(root_entry[%x]) = %p\n",
  28.251 +            bus, maddr_to_virt(root_entry[bus].val));
  28.252 +
  28.253 +        if (root_entry[bus].val == 0) {
  28.254 +            printk("    root_entry[%x].lo == 0\n", bus);
  28.255 +            return;
  28.256 +        }
  28.257 + 
  28.258 +        ctxt_entry = maddr_to_virt((root_entry[bus].val >> PAGE_SHIFT) << PAGE_SHIFT);
  28.259 +        if (ctxt_entry == NULL) {
  28.260 +            printk("    ctxt_entry == NULL\n");
  28.261 +            return;
  28.262 +        }
  28.263 +
  28.264 +        if (ctxt_entry[devfn].lo == 0) {
  28.265 +            printk("    ctxt_entry[%x].lo == 0\n", devfn);
  28.266 +            return;
  28.267 +        }
  28.268 +
  28.269 +        printk("    context = %p\n", ctxt_entry);
  28.270 +        printk("    context[%x] = %llx %llx\n",
  28.271 +               devfn, ctxt_entry[devfn].hi, ctxt_entry[devfn].lo);
  28.272 +        printk("    maddr_to_virt(context[%x].lo) = %p\n",
  28.273 +               devfn, maddr_to_virt(ctxt_entry[devfn].lo));
  28.274 +        printk("    context[%x] = %llx\n", devfn, ctxt_entry[devfn].lo); 
  28.275 +
  28.276 +        l3 = maddr_to_virt(ctxt_entry[devfn].lo);
  28.277 +        l3 = (u64*)(((u32) l3 >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K);
  28.278 +        printk("    l3 = %p\n", l3); 
  28.279 +        if (l3 == NULL) return;
  28.280 +
  28.281 +        l3_index = (gmfn >> 9 >> 9) & 0x1ff;
  28.282 +        printk("    l3_index = %x\n", l3_index);
  28.283 +        printk("    l3[%x] = %llx\n", l3_index, l3[l3_index]);
  28.284 +
  28.285 +        l2 = maddr_to_virt(l3[l3_index]);
  28.286 +        l2 = (u64*)(((u32) l2 >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K);
  28.287 +        printk("    l2 = %p\n", l2); 
  28.288 +        if (l2 == NULL) return;
  28.289 +
  28.290 +        l2_index = (gmfn >> 9) & 0x1ff;
  28.291 +        printk("    gmfn = %lx\n", gmfn);
  28.292 +        printk("    gmfn >> 9= %lx\n", gmfn >> 9);
  28.293 +        printk("    l2_index = %x\n", l2_index);
  28.294 +        printk("    l2[%x] = %llx\n", l2_index, l2[l2_index]);
  28.295 +
  28.296 +        l1 = maddr_to_virt(l2[l2_index]);
  28.297 +        l1 = (u64*)(((u32) l1 >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K);
  28.298 +        if (l1 == NULL) return;
  28.299 +        l1_index = gmfn & 0x1ff;
  28.300 +        printk("    l1 = %p\n", l1); 
  28.301 +        printk("    l1_index = %x\n", l1_index);
  28.302 +        printk("    l1[%x] = %llx\n", l1_index, l1[l1_index]); 
  28.303 +    }
  28.304 +}
  28.305 +#endif    // !m64
    29.1 --- a/xen/arch/x86/hvm/vpic.c	Fri Jul 27 13:47:03 2007 +0100
    29.2 +++ b/xen/arch/x86/hvm/vpic.c	Fri Jul 27 12:59:37 2007 -0700
    29.3 @@ -177,8 +177,9 @@ static int vpic_intack(struct hvm_hw_vpi
    29.4  static void vpic_ioport_write(
    29.5      struct hvm_hw_vpic *vpic, uint32_t addr, uint32_t val)
    29.6  {
    29.7 -    int priority, cmd, irq;
    29.8 +    int priority, cmd, irq = 0, pic_eoi = 0;
    29.9      uint8_t mask;
   29.10 +    uint32_t old_addr = addr;
   29.11  
   29.12      vpic_lock(vpic);
   29.13  
   29.14 @@ -250,6 +251,13 @@ static void vpic_ioport_write(
   29.15                  vpic->isr &= ~(1 << irq);
   29.16                  if ( cmd == 7 )
   29.17                      vpic->priority_add = (irq + 1) & 7;
   29.18 +                /*
   29.19 +                 * chipset register 0xa0 is i8259 EOI register
   29.20 +                 */
   29.21 +                if (( old_addr & 0xa0) == 0xa0 )
   29.22 +                    irq = irq | 0x8;
   29.23 +                if ( irq != 0 )
   29.24 +                    pic_eoi = 1;
   29.25                  break;
   29.26              case 6: /* Set Priority                */
   29.27                  vpic->priority_add = (val + 1) & 7;
   29.28 @@ -295,6 +303,12 @@ static void vpic_ioport_write(
   29.29      vpic_update_int_output(vpic);
   29.30  
   29.31      vpic_unlock(vpic);
   29.32 +
   29.33 +    if ( pic_eoi && irq != 0)
   29.34 +    {
   29.35 +        hvm_dpci_eoi(irq);
   29.36 +        pic_eoi = 0;
   29.37 +    }
   29.38  }
   29.39  
   29.40  static uint32_t vpic_ioport_read(struct hvm_hw_vpic *vpic, uint32_t addr)
    30.1 --- a/xen/arch/x86/i8259.c	Fri Jul 27 13:47:03 2007 +0100
    30.2 +++ b/xen/arch/x86/i8259.c	Fri Jul 27 12:59:37 2007 -0700
    30.3 @@ -83,7 +83,7 @@ BUILD_SMP_INTERRUPT(thermal_interrupt,TH
    30.4      IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \
    30.5      IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f)
    30.6  
    30.7 -    static void (*interrupt[])(void) = {
    30.8 +    void (*interrupt[])(void) = {
    30.9          IRQLIST_16(0x0), IRQLIST_16(0x1), IRQLIST_16(0x2), IRQLIST_16(0x3),
   30.10          IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7),
   30.11          IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb),
    31.1 --- a/xen/arch/x86/io_apic.c	Fri Jul 27 13:47:03 2007 +0100
    31.2 +++ b/xen/arch/x86/io_apic.c	Fri Jul 27 12:59:37 2007 -0700
    31.3 @@ -187,6 +187,68 @@ static void __modify_IO_APIC_irq (unsign
    31.4      }
    31.5  }
    31.6  
    31.7 +int real_vector[MAX_IRQ_SOURCES];
    31.8 +int fake_vector=-1;
    31.9 +
   31.10 +/*
   31.11 + * Following 2 functions are used to workaround spurious interrupt
   31.12 + * problem related to mask/unmask of interrupts.  Instead we program
   31.13 + * an unused vector in the IOAPIC before issueing EOI to LAPIC.
   31.14 + */
   31.15 +static void write_fake_IO_APIC_vector (unsigned int irq)
   31.16 +{
   31.17 +    struct irq_pin_list *entry = irq_2_pin + irq;
   31.18 +    unsigned int pin, reg;
   31.19 +    unsigned long flags;
   31.20 +
   31.21 +    spin_lock_irqsave(&ioapic_lock, flags);
   31.22 +    for (;;) {
   31.23 +        pin = entry->pin;
   31.24 +        if (pin == -1)
   31.25 +            break;
   31.26 +        reg = io_apic_read(entry->apic, 0x10 + pin*2);
   31.27 +        real_vector[irq] = reg & 0xff;
   31.28 +        reg &= ~0xff;
   31.29 +
   31.30 +        if (fake_vector == -1)
   31.31 +            fake_vector = assign_irq_vector(MAX_IRQ_SOURCES-1);
   31.32 +
   31.33 +        reg |= fake_vector;
   31.34 +        io_apic_write(entry->apic, 0x10 + pin*2, reg);
   31.35 +
   31.36 +        if (!entry->next)
   31.37 +            break;
   31.38 +        entry = irq_2_pin + entry->next;
   31.39 +    }
   31.40 +    spin_unlock_irqrestore(&ioapic_lock, flags);
   31.41 +}
   31.42 +
   31.43 +static void restore_real_IO_APIC_vector (unsigned int irq)
   31.44 +{
   31.45 +    struct irq_pin_list *entry = irq_2_pin + irq;
   31.46 +    unsigned int pin, reg;
   31.47 +    unsigned long flags;
   31.48 +
   31.49 +    spin_lock_irqsave(&ioapic_lock, flags);
   31.50 +    for (;;) {
   31.51 +        pin = entry->pin;
   31.52 +        if (pin == -1)
   31.53 +            break;
   31.54 +
   31.55 +        reg = io_apic_read(entry->apic, 0x10 + pin*2);
   31.56 +        reg &= ~0xff;
   31.57 +        reg |= real_vector[irq];
   31.58 +        io_apic_write(entry->apic, 0x10 + pin*2, reg);
   31.59 +        mb();
   31.60 +        *(IO_APIC_BASE(entry->apic) + 0x10) = reg & 0xff;
   31.61 +
   31.62 +        if (!entry->next)
   31.63 +            break;
   31.64 +        entry = irq_2_pin + entry->next;
   31.65 +    }
   31.66 +    spin_unlock_irqrestore(&ioapic_lock, flags);
   31.67 +}
   31.68 +
   31.69  /* mask = 1 */
   31.70  static void __mask_IO_APIC_irq (unsigned int irq)
   31.71  {
   31.72 @@ -1359,7 +1421,7 @@ static void mask_and_ack_level_ioapic_ir
   31.73      if ( ioapic_ack_new )
   31.74          return;
   31.75  
   31.76 -    mask_IO_APIC_irq(irq);
   31.77 +    write_fake_IO_APIC_vector(irq);
   31.78  /*
   31.79   * It appears there is an erratum which affects at least version 0x11
   31.80   * of I/O APIC (that's the 82093AA and cores integrated into various
   31.81 @@ -1402,7 +1464,7 @@ static void end_level_ioapic_irq (unsign
   31.82      if ( !ioapic_ack_new )
   31.83      {
   31.84          if ( !(irq_desc[IO_APIC_VECTOR(irq)].status & IRQ_DISABLED) )
   31.85 -            unmask_IO_APIC_irq(irq);
   31.86 +            restore_real_IO_APIC_vector(irq);
   31.87          return;
   31.88      }
   31.89  
    32.1 --- a/xen/arch/x86/irq.c	Fri Jul 27 13:47:03 2007 +0100
    32.2 +++ b/xen/arch/x86/irq.c	Fri Jul 27 12:59:37 2007 -0700
    32.3 @@ -16,6 +16,7 @@
    32.4  #include <xen/compat.h>
    32.5  #include <asm/current.h>
    32.6  #include <asm/smpboot.h>
    32.7 +#include <asm/iommu.h>
    32.8  
    32.9  /* opt_noirqbalance: If true, software IRQ balancing/affinity is disabled. */
   32.10  int opt_noirqbalance = 0;
   32.11 @@ -98,6 +99,39 @@ asmlinkage void do_IRQ(struct cpu_user_r
   32.12      spin_unlock(&desc->lock);
   32.13  }
   32.14  
   32.15 +int request_irq(unsigned int irq,
   32.16 +        void (*handler)(int, void *, struct cpu_user_regs *),
   32.17 +        unsigned long irqflags, const char * devname, void *dev_id)
   32.18 +{
   32.19 +    struct irqaction * action;
   32.20 +    int retval;
   32.21 +
   32.22 +    /*
   32.23 +     * Sanity-check: shared interrupts must pass in a real dev-ID,
   32.24 +     * otherwise we'll have trouble later trying to figure out
   32.25 +     * which interrupt is which (messes up the interrupt freeing
   32.26 +     * logic etc).
   32.27 +     */
   32.28 +    if (irq >= NR_IRQS)
   32.29 +        return -EINVAL;
   32.30 +    if (!handler)
   32.31 +        return -EINVAL;
   32.32 +
   32.33 +    action = xmalloc(struct irqaction);
   32.34 +    if (!action)
   32.35 +        return -ENOMEM;
   32.36 +
   32.37 +    action->handler = handler;
   32.38 +    action->name = devname;
   32.39 +    action->dev_id = dev_id;
   32.40 +
   32.41 +    retval = setup_irq(irq, action);
   32.42 +    if (retval)
   32.43 +        xfree(action);
   32.44 +
   32.45 +    return retval;
   32.46 +}
   32.47 +
   32.48  void free_irq(unsigned int irq)
   32.49  {
   32.50      unsigned int  vector = irq_to_vector(irq);
   32.51 @@ -203,7 +237,8 @@ static void __do_IRQ_guest(int vector)
   32.52          if ( (action->ack_type != ACKTYPE_NONE) &&
   32.53               !test_and_set_bit(irq, d->pirq_mask) )
   32.54              action->in_flight++;
   32.55 -        send_guest_pirq(d, irq);
   32.56 +        if (!hvm_do_IRQ_dpci(d, irq))
   32.57 +            send_guest_pirq(d, irq);
   32.58      }
   32.59  }
   32.60  
    33.1 --- a/xen/arch/x86/mm.c	Fri Jul 27 13:47:03 2007 +0100
    33.2 +++ b/xen/arch/x86/mm.c	Fri Jul 27 12:59:37 2007 -0700
    33.3 @@ -2628,6 +2628,9 @@ static int create_grant_va_mapping(
    33.4      if ( !okay )
    33.5              return GNTST_general_error;
    33.6  
    33.7 +    if ( iommu_found() )
    33.8 +        iommu_map_page(d, l1e_get_pfn(nl1e), l1e_get_pfn(nl1e));
    33.9 +
   33.10      if ( !paging_mode_refcounts(d) )
   33.11          put_page_from_l1e(ol1e, d);
   33.12  
   33.13 @@ -2638,9 +2641,10 @@ static int replace_grant_va_mapping(
   33.14      unsigned long addr, unsigned long frame, l1_pgentry_t nl1e, struct vcpu *v)
   33.15  {
   33.16      l1_pgentry_t *pl1e, ol1e;
   33.17 +    struct domain *d = v->domain;
   33.18      unsigned long gl1mfn;
   33.19      int rc = 0;
   33.20 -    
   33.21 +
   33.22      pl1e = guest_map_l1e(v, addr, &gl1mfn);
   33.23      if ( !pl1e )
   33.24      {
   33.25 @@ -2666,6 +2670,9 @@ static int replace_grant_va_mapping(
   33.26          goto out;
   33.27      }
   33.28  
   33.29 +    if ( iommu_found() )
   33.30 +        iommu_unmap_page(v->domain, mfn_to_gfn(d, l1e_get_pfn(ol1e)));
   33.31 +
   33.32   out:
   33.33      guest_unmap_l1e(v, pl1e);
   33.34      return rc;
   33.35 @@ -2777,6 +2784,8 @@ int steal_page(
   33.36      if ( !(memflags & MEMF_no_refcount) )
   33.37          d->tot_pages--;
   33.38      list_del(&page->list);
   33.39 +    if (iommu_found())
   33.40 +        iommu_unmap_page(d, page_to_mfn(page));
   33.41  
   33.42      spin_unlock(&d->page_alloc_lock);
   33.43  
    34.1 --- a/xen/arch/x86/mm/p2m.c	Fri Jul 27 13:47:03 2007 +0100
    34.2 +++ b/xen/arch/x86/mm/p2m.c	Fri Jul 27 12:59:37 2007 -0700
    34.3 @@ -27,6 +27,8 @@
    34.4  #include <asm/page.h>
    34.5  #include <asm/paging.h>
    34.6  #include <asm/p2m.h>
    34.7 +#include <asm/iommu.h>
    34.8 +#include <xen/iocap.h>
    34.9  
   34.10  /* Debugging and auditing of the P2M code? */
   34.11  #define P2M_AUDIT     0
   34.12 @@ -175,7 +177,7 @@ p2m_next_level(struct domain *d, mfn_t *
   34.13  }
   34.14  
   34.15  // Returns 0 on error (out of memory)
   34.16 -static int
   34.17 +int
   34.18  set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn, u32 l1e_flags)
   34.19  {
   34.20      // XXX -- this might be able to be faster iff current->domain == d
   34.21 @@ -219,7 +221,8 @@ set_p2m_entry(struct domain *d, unsigned
   34.22      if ( mfn_valid(mfn) && (gfn > d->arch.p2m.max_mapped_pfn) ) 
   34.23          d->arch.p2m.max_mapped_pfn = gfn;
   34.24  
   34.25 -    if ( mfn_valid(mfn) )
   34.26 +    if ( mfn_valid(mfn) ||
   34.27 +         (d->iomem_caps && iomem_access_permitted(d, gfn, gfn)) )
   34.28          entry_content = l1e_from_pfn(mfn_x(mfn), l1e_flags);
   34.29      else
   34.30          entry_content = l1e_empty();
   34.31 @@ -230,6 +233,9 @@ set_p2m_entry(struct domain *d, unsigned
   34.32      /* Success */
   34.33      rv = 1;
   34.34   
   34.35 +    if (iommu_found() && is_hvm_domain(d) && mfn_valid(mfn))
   34.36 +        iommu_flush(d, gfn, (u64*)p2m_entry);
   34.37 +
   34.38   out:
   34.39      unmap_domain_page(table);
   34.40      return rv;
   34.41 @@ -264,7 +270,10 @@ int p2m_alloc_table(struct domain *d,
   34.42      struct page_info *page, *p2m_top;
   34.43      unsigned int page_count = 0;
   34.44      unsigned long gfn;
   34.45 -    
   34.46 +
   34.47 +    printk("p2m_alloc_table: CONFIG_PAGING_LEVELS = %x\n",
   34.48 +           CONFIG_PAGING_LEVELS);
   34.49 +
   34.50      p2m_lock(d);
   34.51  
   34.52      if ( pagetable_get_pfn(d->arch.phys_table) != 0 )
   34.53 @@ -327,6 +336,11 @@ int p2m_alloc_table(struct domain *d,
   34.54              goto error;
   34.55      }
   34.56  
   34.57 +#if CONFIG_PAGING_LEVELS >= 3
   34.58 +    if (iommu_found() && is_hvm_domain(d))
   34.59 +        iommu_set_pgd(d);
   34.60 +#endif
   34.61 +
   34.62      P2M_PRINTK("p2m table initialised (%u pages)\n", page_count);
   34.63      p2m_unlock(d);
   34.64      return 0;
    35.1 --- a/xen/arch/x86/mm/shadow/multi.c	Fri Jul 27 13:47:03 2007 +0100
    35.2 +++ b/xen/arch/x86/mm/shadow/multi.c	Fri Jul 27 12:59:37 2007 -0700
    35.3 @@ -28,6 +28,7 @@
    35.4  #include <xen/sched.h>
    35.5  #include <xen/perfc.h>
    35.6  #include <xen/domain_page.h>
    35.7 +#include <xen/iocap.h>
    35.8  #include <asm/page.h>
    35.9  #include <asm/current.h>
   35.10  #include <asm/shadow.h>
   35.11 @@ -667,7 +668,8 @@ static always_inline void
   35.12      // case of a prefetch, an invalid mfn means that we can not usefully
   35.13      // shadow anything, and so we return early.
   35.14      //
   35.15 -    if ( !mfn_valid(target_mfn) )
   35.16 +    if ( !mfn_valid(target_mfn) && 
   35.17 +         !iomem_access_permitted(d, mfn_x(target_mfn), mfn_x(target_mfn)) )
   35.18      {
   35.19          ASSERT((ft == ft_prefetch));
   35.20          *sp = shadow_l1e_empty();
   35.21 @@ -2645,7 +2647,10 @@ static int sh_page_fault(struct vcpu *v,
   35.22      //
   35.23  
   35.24  #if (SHADOW_OPTIMIZATIONS & SHOPT_FAST_FAULT_PATH) && SHADOW_PAGING_LEVELS > 2
   35.25 -    if ( (regs->error_code & PFEC_reserved_bit) )
   35.26 +    if ( (regs->error_code & PFEC_reserved_bit)
   35.27 +            && !iomem_access_permitted(d,
   35.28 +                                       paging_gva_to_gfn(v, va),
   35.29 +                                       paging_gva_to_gfn(v, va)) )
   35.30      {
   35.31          /* The only reasons for reserved bits to be set in shadow entries 
   35.32           * are the two "magic" shadow_l1e entries. */
   35.33 @@ -2836,8 +2841,16 @@ static int sh_page_fault(struct vcpu *v,
   35.34      }
   35.35  
   35.36      /* Calculate the shadow entry and write it */
   35.37 -    l1e_propagate_from_guest(v, (gw.l1e) ? gw.l1e : &gw.eff_l1e, gw.l1mfn, 
   35.38 +    if ( iomem_access_permitted(d, mfn_x(gmfn), mfn_x(gmfn)) )
   35.39 +        l1e_propagate_from_guest(v, (gw.l1e) ? gw.l1e : &gw.eff_l1e, gw.l1mfn, 
   35.40 +                             gmfn, &sl1e, ft, 0);
   35.41 +    else
   35.42 +        l1e_propagate_from_guest(v, (gw.l1e) ? gw.l1e : &gw.eff_l1e, gw.l1mfn, 
   35.43                               gmfn, &sl1e, ft, mmio);
   35.44 +
   35.45 +    if ( iomem_access_permitted(d, mfn_x(gmfn), mfn_x(gmfn)) )
   35.46 +        sl1e.l1 |= _PAGE_PCD | _PAGE_PWT;
   35.47 +
   35.48      r = shadow_set_l1e(v, ptr_sl1e, sl1e, sl1mfn);
   35.49  
   35.50  #if SHADOW_OPTIMIZATIONS & SHOPT_PREFETCH
   35.51 @@ -2860,7 +2873,7 @@ static int sh_page_fault(struct vcpu *v,
   35.52          }
   35.53      }
   35.54  
   35.55 -    if ( mmio ) 
   35.56 +    if (mmio & !iomem_access_permitted(d, mfn_x(gmfn), mfn_x(gmfn)))
   35.57      {
   35.58          gpa = guest_walk_to_gpa(&gw);
   35.59          goto mmio;
    36.1 --- a/xen/arch/x86/mm/shadow/private.h	Fri Jul 27 13:47:03 2007 +0100
    36.2 +++ b/xen/arch/x86/mm/shadow/private.h	Fri Jul 27 12:59:37 2007 -0700
    36.3 @@ -447,7 +447,7 @@ sh_mfn_is_a_page_table(mfn_t gmfn)
    36.4      struct domain *owner;
    36.5      unsigned long type_info;
    36.6  
    36.7 -    if ( !mfn_valid(gmfn) )
    36.8 +    if ( !mfn_valid(gmfn) || unlikely(mfn_x(gmfn) > max_page) )
    36.9          return 0;
   36.10  
   36.11      owner = page_get_owner(page);
    37.1 --- a/xen/arch/x86/mpparse.c	Fri Jul 27 13:47:03 2007 +0100
    37.2 +++ b/xen/arch/x86/mpparse.c	Fri Jul 27 12:59:37 2007 -0700
    37.3 @@ -860,13 +860,7 @@ void __devinit mp_register_lapic (
    37.4  #define MP_ISA_BUS		0
    37.5  #define MP_MAX_IOAPIC_PIN	127
    37.6  
    37.7 -static struct mp_ioapic_routing {
    37.8 -	int			apic_id;
    37.9 -	int			gsi_base;
   37.10 -	int			gsi_end;
   37.11 -	u32			pin_programmed[4];
   37.12 -} mp_ioapic_routing[MAX_IO_APICS];
   37.13 -
   37.14 +struct mp_ioapic_routing mp_ioapic_routing[MAX_IO_APICS];
   37.15  
   37.16  static int mp_find_ioapic (
   37.17  	int			gsi)
    38.1 --- a/xen/arch/x86/setup.c	Fri Jul 27 13:47:03 2007 +0100
    38.2 +++ b/xen/arch/x86/setup.c	Fri Jul 27 12:59:37 2007 -0700
    38.3 @@ -1051,6 +1051,9 @@ void __init __start_xen(unsigned long mb
    38.4          _initrd_len   = mod[initrdidx].mod_end - mod[initrdidx].mod_start;
    38.5      }
    38.6  
    38.7 +    if (iommu_setup() != 0)
    38.8 +        panic("iommu_setup() failed\n");
    38.9 +
   38.10      /*
   38.11       * We're going to setup domain0 using the module(s) that we stashed safely
   38.12       * above our heap. The second module, if present, is an initrd ramdisk.
    39.1 --- a/xen/common/grant_table.c	Fri Jul 27 13:47:03 2007 +0100
    39.2 +++ b/xen/common/grant_table.c	Fri Jul 27 12:59:37 2007 -0700
    39.3 @@ -949,6 +949,8 @@ gnttab_transfer(
    39.4          if ( unlikely(e->tot_pages++ == 0) )
    39.5              get_knownalive_domain(e);
    39.6          list_add_tail(&page->list, &e->page_list);
    39.7 +        if (iommu_found())
    39.8 +            iommu_map_page(e, mfn, mfn);
    39.9          page_set_owner(page, e);
   39.10  
   39.11          spin_unlock(&e->page_alloc_lock);
    40.1 --- a/xen/common/page_alloc.c	Fri Jul 27 13:47:03 2007 +0100
    40.2 +++ b/xen/common/page_alloc.c	Fri Jul 27 12:59:37 2007 -0700
    40.3 @@ -37,6 +37,7 @@
    40.4  #include <xen/numa.h>
    40.5  #include <xen/nodemask.h>
    40.6  #include <asm/page.h>
    40.7 +#include <asm/iommu.h>
    40.8  
    40.9  /*
   40.10   * Comma-separated list of hexadecimal page numbers containing bad bytes.
   40.11 @@ -804,6 +805,9 @@ int assign_pages(
   40.12          wmb(); /* Domain pointer must be visible before updating refcnt. */
   40.13          pg[i].count_info = PGC_allocated | 1;
   40.14          list_add_tail(&pg[i].list, &d->page_list);
   40.15 +
   40.16 +        if (iommu_found() && !is_hvm_domain(d) && (dom0 != NULL))
   40.17 +            iommu_map_page(d, page_to_mfn(&pg[i]), page_to_mfn(&pg[i]));
   40.18      }
   40.19  
   40.20      spin_unlock(&d->page_alloc_lock);
   40.21 @@ -872,7 +876,7 @@ void free_domheap_pages(struct page_info
   40.22  {
   40.23      int            i, drop_dom_ref;
   40.24      struct domain *d = page_get_owner(pg);
   40.25 -
   40.26 + 
   40.27      ASSERT(!in_irq());
   40.28  
   40.29      if ( unlikely(is_xen_heap_frame(pg)) )
   40.30 @@ -897,6 +901,8 @@ void free_domheap_pages(struct page_info
   40.31          {
   40.32              BUG_ON((pg[i].u.inuse.type_info & PGT_count_mask) != 0);
   40.33              list_del(&pg[i].list);
   40.34 +            if ( iommu_found() && !is_hvm_domain(d) )
   40.35 +                iommu_unmap_page(d, page_to_mfn(&pg[i]));
   40.36          }
   40.37  
   40.38          d->tot_pages -= 1 << order;
    41.1 --- a/xen/drivers/acpi/tables.c	Fri Jul 27 13:47:03 2007 +0100
    41.2 +++ b/xen/drivers/acpi/tables.c	Fri Jul 27 12:59:37 2007 -0700
    41.3 @@ -59,6 +59,7 @@ static char *acpi_table_signatures[ACPI_
    41.4  	[ACPI_SPMI] = "SPMI",
    41.5  	[ACPI_HPET] = "HPET",
    41.6  	[ACPI_MCFG] = "MCFG",
    41.7 +	[ACPI_DMAR] = "DMAR",
    41.8  };
    41.9  
   41.10  static char *mps_inti_flags_polarity[] = { "dfl", "high", "res", "low" };
    42.1 --- a/xen/include/asm-x86/acpi.h	Fri Jul 27 13:47:03 2007 +0100
    42.2 +++ b/xen/include/asm-x86/acpi.h	Fri Jul 27 12:59:37 2007 -0700
    42.3 @@ -178,4 +178,6 @@ extern void acpi_reserve_bootmem(void);
    42.4  extern u8 x86_acpiid_to_apicid[];
    42.5  #define MAX_LOCAL_APIC 256
    42.6  
    42.7 +extern int acpi_dmar_init(void);
    42.8 +
    42.9  #endif /*_ASM_ACPI_H*/
    43.1 --- a/xen/include/asm-x86/fixmap.h	Fri Jul 27 13:47:03 2007 +0100
    43.2 +++ b/xen/include/asm-x86/fixmap.h	Fri Jul 27 12:59:37 2007 -0700
    43.3 @@ -17,6 +17,7 @@
    43.4  #include <asm/acpi.h>
    43.5  #include <asm/page.h>
    43.6  #include <xen/kexec.h>
    43.7 +#include <asm/iommu.h>
    43.8  
    43.9  /*
   43.10   * Here we define all the compile-time 'special' virtual
   43.11 @@ -40,6 +41,8 @@ enum fixed_addresses {
   43.12      FIX_KEXEC_BASE_0,
   43.13      FIX_KEXEC_BASE_END = FIX_KEXEC_BASE_0 \
   43.14        + ((KEXEC_XEN_NO_PAGES >> 1) * KEXEC_IMAGE_NR) - 1,
   43.15 +    FIX_IOMMU_REGS_BASE_0,
   43.16 +    FIX_IOMMU_REGS_END = FIX_IOMMU_REGS_BASE_0 + MAX_IOMMUS-1,
   43.17      __end_of_fixed_addresses
   43.18  };
   43.19  
    44.1 --- a/xen/include/asm-x86/hvm/domain.h	Fri Jul 27 13:47:03 2007 +0100
    44.2 +++ b/xen/include/asm-x86/hvm/domain.h	Fri Jul 27 12:59:37 2007 -0700
    44.3 @@ -21,10 +21,12 @@
    44.4  #ifndef __ASM_X86_HVM_DOMAIN_H__
    44.5  #define __ASM_X86_HVM_DOMAIN_H__
    44.6  
    44.7 +#include <asm/iommu.h>
    44.8  #include <asm/hvm/irq.h>
    44.9  #include <asm/hvm/vpt.h>
   44.10  #include <asm/hvm/vlapic.h>
   44.11  #include <asm/hvm/io.h>
   44.12 +#include <asm/hvm/iommu.h>
   44.13  #include <public/hvm/params.h>
   44.14  #include <public/hvm/save.h>
   44.15  
   44.16 @@ -57,6 +59,8 @@ struct hvm_domain {
   44.17      uint64_t               params[HVM_NR_PARAMS];
   44.18  
   44.19      unsigned long          vmx_apic_access_mfn;
   44.20 +
   44.21 +    struct hvm_iommu       hvm_iommu;
   44.22  };
   44.23  
   44.24  #endif /* __ASM_X86_HVM_DOMAIN_H__ */
    45.1 --- a/xen/include/asm-x86/hvm/io.h	Fri Jul 27 13:47:03 2007 +0100
    45.2 +++ b/xen/include/asm-x86/hvm/io.h	Fri Jul 27 12:59:37 2007 -0700
    45.3 @@ -151,6 +151,7 @@ void send_invalidate_req(void);
    45.4  extern void handle_mmio(unsigned long gpa);
    45.5  extern void hvm_interrupt_post(struct vcpu *v, int vector, int type);
    45.6  extern void hvm_io_assist(void);
    45.7 +extern void hvm_dpci_eoi(unsigned int guest_irq);
    45.8  
    45.9  #endif /* __ASM_X86_HVM_IO_H__ */
   45.10  
    46.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    46.2 +++ b/xen/include/asm-x86/hvm/iommu.h	Fri Jul 27 12:59:37 2007 -0700
    46.3 @@ -0,0 +1,40 @@
    46.4 +/*
    46.5 + * Copyright (c) 2006, Intel Corporation.
    46.6 + *
    46.7 + * This program is free software; you can redistribute it and/or modify it
    46.8 + * under the terms and conditions of the GNU General Public License,
    46.9 + * version 2, as published by the Free Software Foundation.
   46.10 + *
   46.11 + * This program is distributed in the hope it will be useful, but WITHOUT
   46.12 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   46.13 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
   46.14 + * more details.
   46.15 + *
   46.16 + * You should have received a copy of the GNU General Public License along with
   46.17 + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
   46.18 + * Place - Suite 330, Boston, MA 02111-1307 USA.
   46.19 + *
   46.20 + * Copyright (C) Allen Kay <allen.m.kay@intel.com>
   46.21 + */
   46.22 +
   46.23 +#ifndef __ASM_X86_HVM_IOMMU_H__
   46.24 +#define __ASM_X86_HVM_IOMMU_H__
   46.25 +
   46.26 +#include <asm/iommu.h>
   46.27 +#include <asm/hvm/irq.h>
   46.28 +#include <asm/hvm/vpt.h>
   46.29 +#include <asm/hvm/vlapic.h>
   46.30 +#include <asm/hvm/io.h>
   46.31 +#include <public/hvm/params.h>
   46.32 +#include <public/hvm/save.h>
   46.33 +
   46.34 +struct hvm_iommu {
   46.35 +    spinlock_t iommu_list_lock;    /* protect iommu specific lists */
   46.36 +    struct list_head pdev_list;    /* direct accessed pci devices */
   46.37 +    struct list_head g2m_ioport_list;  /* guest to machine ioport mapping */
   46.38 +    struct dma_pte *pgd;           /* io page directory root */
   46.39 +    spinlock_t mapping_lock;       /* io page table lock */
   46.40 +    int agaw;     /* adjusted guest address width, 0 is level 2 30-bit */
   46.41 +};
   46.42 +
   46.43 +#endif // __ASM_X86_HVM_IOMMU_H__
    47.1 --- a/xen/include/asm-x86/hvm/irq.h	Fri Jul 27 13:47:03 2007 +0100
    47.2 +++ b/xen/include/asm-x86/hvm/irq.h	Fri Jul 27 12:59:37 2007 -0700
    47.3 @@ -29,6 +29,16 @@
    47.4  #include <asm/hvm/vioapic.h>
    47.5  #include <public/hvm/save.h>
    47.6  
    47.7 +struct hvm_irq_mapping {
    47.8 +    uint8_t valid;
    47.9 +    uint8_t device;
   47.10 +    uint8_t intx;
   47.11 +    union {
   47.12 +        uint8_t guest_gsi;
   47.13 +        uint8_t machine_gsi;
   47.14 +    };
   47.15 +};
   47.16 +  
   47.17  struct hvm_irq {
   47.18      /*
   47.19       * Virtual interrupt wires for a single PCI bus.
   47.20 @@ -88,6 +98,12 @@ struct hvm_irq {
   47.21  
   47.22      /* Last VCPU that was delivered a LowestPrio interrupt. */
   47.23      u8 round_robin_prev_vcpu;
   47.24 +
   47.25 +    /* machine irq to guest device/intx mapping */
   47.26 +    struct hvm_irq_mapping mirq[NR_IRQS];
   47.27 +    /* guest irq to guest device/intx mapping */
   47.28 +    struct hvm_irq_mapping girq[NR_IRQS];
   47.29 +    DECLARE_BITMAP(dirq_mask, NR_IRQS);
   47.30  };
   47.31  
   47.32  #define hvm_pci_intx_gsi(dev, intx)  \
    48.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    48.2 +++ b/xen/include/asm-x86/hvm/vmx/intel-iommu.h	Fri Jul 27 12:59:37 2007 -0700
    48.3 @@ -0,0 +1,401 @@
    48.4 +/*
    48.5 + * Copyright (c) 2006, Intel Corporation.
    48.6 + *
    48.7 + * This program is free software; you can redistribute it and/or modify it
    48.8 + * under the terms and conditions of the GNU General Public License,
    48.9 + * version 2, as published by the Free Software Foundation.
   48.10 + *
   48.11 + * This program is distributed in the hope it will be useful, but WITHOUT
   48.12 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   48.13 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
   48.14 + * more details.
   48.15 + *
   48.16 + * You should have received a copy of the GNU General Public License along with
   48.17 + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
   48.18 + * Place - Suite 330, Boston, MA 02111-1307 USA.
   48.19 + *
   48.20 + * Copyright (C) Ashok Raj <ashok.raj@intel.com>
   48.21 + */
   48.22 +
   48.23 +#ifndef _INTEL_IOMMU_H_
   48.24 +#define _INTEL_IOMMU_H_
   48.25 +
   48.26 +#include <xen/types.h>
   48.27 +
   48.28 +/*
   48.29 + * Intel IOMMU register specification per version 1.0 public spec.
   48.30 + */
   48.31 +
   48.32 +#define    DMAR_VER_REG    0x0    /* Arch version supported by this IOMMU */
   48.33 +#define    DMAR_CAP_REG    0x8    /* Hardware supported capabilities */
   48.34 +#define    DMAR_ECAP_REG    0x10    /* Extended capabilities supported */
   48.35 +#define    DMAR_GCMD_REG    0x18    /* Global command register */
   48.36 +#define    DMAR_GSTS_REG    0x1c    /* Global status register */
   48.37 +#define    DMAR_RTADDR_REG    0x20    /* Root entry table */
   48.38 +#define    DMAR_CCMD_REG    0x28    /* Context command reg */
   48.39 +#define    DMAR_FSTS_REG    0x34    /* Fault Status register */
   48.40 +#define    DMAR_FECTL_REG    0x38    /* Fault control register */
   48.41 +#define    DMAR_FEDATA_REG    0x3c    /* Fault event interrupt data register */
   48.42 +#define    DMAR_FEADDR_REG    0x40    /* Fault event interrupt addr register */
   48.43 +#define    DMAR_FEUADDR_REG 0x44    /* Upper address register */
   48.44 +#define    DMAR_AFLOG_REG    0x58    /* Advanced Fault control */
   48.45 +#define    DMAR_PMEN_REG    0x64    /* Enable Protected Memory Region */
   48.46 +#define    DMAR_PLMBASE_REG 0x68    /* PMRR Low addr */
   48.47 +#define    DMAR_PLMLIMIT_REG 0x6c    /* PMRR low limit */
   48.48 +#define    DMAR_PHMBASE_REG 0x70    /* pmrr high base addr */
   48.49 +#define    DMAR_PHMLIMIT_REG 0x78    /* pmrr high limit */
   48.50 +#define    DMAR_IQH_REG    0x80    /* invalidation queue head */
   48.51 +#define    DMAR_IQT_REG    0x88    /* invalidation queue tail */
   48.52 +#define    DMAR_IQA_REG    0x90    /* invalidation queue addr */
   48.53 +#define    DMAR_IRTA_REG   0xB8    /* intr remap */
   48.54 +
   48.55 +#define OFFSET_STRIDE        (9)
   48.56 +#define dmar_readl(dmar, reg) readl(dmar + reg)
   48.57 +#define dmar_writel(dmar, reg, val) writel(val, dmar + reg)
   48.58 +#define dmar_readq(dmar, reg) ({ \
   48.59 +        u32 lo, hi; \
   48.60 +        lo = dmar_readl(dmar, reg); \
   48.61 +        hi = dmar_readl(dmar, reg + 4); \
   48.62 +        (((u64) hi) << 32) + lo; })
   48.63 +#define dmar_writeq(dmar, reg, val) do {\
   48.64 +        dmar_writel(dmar, reg, (u32)val); \
   48.65 +        dmar_writel(dmar, reg + 4, (u32)((u64) val >> 32)); \
   48.66 +    } while (0)
   48.67 +
   48.68 +#define VER_MAJOR(v)        (((v) & 0xf0) >> 4)
   48.69 +#define VER_MINOR(v)        ((v) & 0x0f)
   48.70 +
   48.71 +/*
   48.72 + * Decoding Capability Register
   48.73 + */
   48.74 +#define cap_read_drain(c)    (((c) >> 55) & 1)
   48.75 +#define cap_write_drain(c)    (((c) >> 54) & 1)
   48.76 +#define cap_max_amask_val(c)    (((c) >> 48) & 0x3f)
   48.77 +#define cap_num_fault_regs(c)    ((((c) >> 40) & 0xff) + 1)
   48.78 +#define cap_pgsel_inv(c)       (((c) >> 39) & 1)
   48.79 +
   48.80 +#define cap_super_page_val(c)    (((c) >> 34) & 0xf)
   48.81 +#define cap_super_offset(c)    (((find_first_bit(&cap_super_page_val(c), 4)) \
   48.82 +                    * OFFSET_STRIDE) + 21)
   48.83 +
   48.84 +#define cap_fault_reg_offset(c)    ((((c) >> 24) & 0x3ff) * 16)
   48.85 +
   48.86 +#define cap_isoch(c)        (((c) >> 23) & 1)
   48.87 +#define cap_qos(c)        (((c) >> 22) & 1)
   48.88 +#define cap_mgaw(c)        ((((c) >> 16) & 0x3f) + 1)
   48.89 +#define cap_sagaw(c)        (((c) >> 8) & 0x1f)
   48.90 +#define cap_caching_mode(c)    (((c) >> 7) & 1)
   48.91 +#define cap_phmr(c)        (((c) >> 6) & 1)
   48.92 +#define cap_plmr(c)        (((c) >> 5) & 1)
   48.93 +#define cap_rwbf(c)        (((c) >> 4) & 1)
   48.94 +#define cap_afl(c)        (((c) >> 3) & 1)
   48.95 +#define cap_ndoms(c)        (2 ^ (4 + 2 * ((c) & 0x7)))
   48.96 +/*
   48.97 + * Extended Capability Register
   48.98 + */
   48.99 +
  48.100 +#define ecap_niotlb_iunits(e)    ((((e) >> 24) & 0xff) + 1)
  48.101 +#define ecap_iotlb_offset(e)     ((((e) >> 8) & 0x3ff) * 16)
  48.102 +#define ecap_coherent(e)         ((e >> 0) & 0x1)
  48.103 +#define ecap_queued_inval(e)     ((e >> 1) & 0x1)
  48.104 +#define ecap_dev_iotlb(e)        ((e >> 2) & 0x1)
  48.105 +#define ecap_intr_remap(e)       ((e >> 3) & 0x1)
  48.106 +#define ecap_ext_intr(e)         ((e >> 4) & 0x1)
  48.107 +#define ecap_cache_hints(e)      ((e >> 5) & 0x1)
  48.108 +#define ecap_pass_thru(e)        ((e >> 6) & 0x1)
  48.109 +
  48.110 +#define PAGE_SHIFT_4K        (12)
  48.111 +#define PAGE_SIZE_4K        (1UL << PAGE_SHIFT_4K)
  48.112 +#define PAGE_MASK_4K        (((u64)-1) << PAGE_SHIFT_4K)
  48.113 +#define PAGE_ALIGN_4K(addr)    (((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K)
  48.114 +
  48.115 +/* IOTLB_REG */
  48.116 +#define DMA_TLB_FLUSH_GRANU_OFFSET  60
  48.117 +#define DMA_TLB_GLOBAL_FLUSH (((u64)1) << 60)
  48.118 +#define DMA_TLB_DSI_FLUSH (((u64)2) << 60)
  48.119 +#define DMA_TLB_PSI_FLUSH (((u64)3) << 60)
  48.120 +#define DMA_TLB_IIRG(x) (((x) >> 60) & 7) 
  48.121 +#define DMA_TLB_IAIG(val) (((val) >> 57) & 7)
  48.122 +#define DMA_TLB_DID(x) (((u64)(x & 0xffff)) << 32)
  48.123 +
  48.124 +#define DMA_TLB_READ_DRAIN (((u64)1) << 49)
  48.125 +#define DMA_TLB_WRITE_DRAIN (((u64)1) << 48)
  48.126 +#define DMA_TLB_IVT (((u64)1) << 63)
  48.127 +
  48.128 +#define DMA_TLB_IVA_ADDR(x) ((((u64)x) >> 12) << 12)
  48.129 +#define DMA_TLB_IVA_HINT(x) ((((u64)x) & 1) << 6)
  48.130 +
  48.131 +/* GCMD_REG */
  48.132 +#define DMA_GCMD_TE (((u64)1) << 31)
  48.133 +#define DMA_GCMD_SRTP (((u64)1) << 30)
  48.134 +#define DMA_GCMD_SFL (((u64)1) << 29)
  48.135 +#define DMA_GCMD_EAFL (((u64)1) << 28)
  48.136 +#define DMA_GCMD_WBF (((u64)1) << 27)
  48.137 +#define DMA_GCMD_QIE (((u64)1) << 26)
  48.138 +#define DMA_GCMD_IRE (((u64)1) << 25)
  48.139 +#define DMA_GCMD_SIRTP (((u64)1) << 24)
  48.140 +
  48.141 +/* GSTS_REG */
  48.142 +#define DMA_GSTS_TES (((u64)1) << 31)
  48.143 +#define DMA_GSTS_RTPS (((u64)1) << 30)
  48.144 +#define DMA_GSTS_FLS (((u64)1) << 29)
  48.145 +#define DMA_GSTS_AFLS (((u64)1) << 28)
  48.146 +#define DMA_GSTS_WBFS (((u64)1) << 27)
  48.147 +#define DMA_GSTS_IRTPS (((u64)1) << 24)
  48.148 +#define DMA_GSTS_QIES   (((u64)1) <<26)
  48.149 +#define DMA_GSTS_IRES   (((u64)1) <<25)
  48.150 +
  48.151 +/* CCMD_REG */
  48.152 +#define DMA_CCMD_INVL_GRANU_OFFSET  61
  48.153 +#define DMA_CCMD_ICC (((u64)1) << 63)
  48.154 +#define DMA_CCMD_GLOBAL_INVL (((u64)1) << 61)
  48.155 +#define DMA_CCMD_DOMAIN_INVL (((u64)2) << 61)
  48.156 +#define DMA_CCMD_DEVICE_INVL (((u64)3) << 61)
  48.157 +#define DMA_CCMD_FM(m) (((u64)((m) & 0x3)) << 32)
  48.158 +#define DMA_CCMD_CIRG(x) ((((u64)3) << 61) & x)
  48.159 +#define DMA_CCMD_MASK_NOBIT 0
  48.160 +#define DMA_CCMD_MASK_1BIT 1
  48.161 +#define DMA_CCMD_MASK_2BIT 2
  48.162 +#define DMA_CCMD_MASK_3BIT 3
  48.163 +#define DMA_CCMD_SID(s) (((u64)((s) & 0xffff)) << 16)
  48.164 +#define DMA_CCMD_DID(d) ((u64)((d) & 0xffff))
  48.165 +
  48.166 +#define DMA_CCMD_CAIG_MASK(x) (((u64)x) & ((u64) 0x3 << 59))
  48.167 +
  48.168 +/* FECTL_REG */
  48.169 +#define DMA_FECTL_IM (((u64)1) << 31)
  48.170 +
  48.171 +/* FSTS_REG */
  48.172 +#define DMA_FSTS_PPF ((u64)2)
  48.173 +#define DMA_FSTS_PFO ((u64)1)
  48.174 +#define dma_fsts_fault_record_index(s) (((s) >> 8) & 0xff)
  48.175 +
  48.176 +/* FRCD_REG, 32 bits access */
  48.177 +#define DMA_FRCD_F (((u64)1) << 31)
  48.178 +#define dma_frcd_type(d) ((d >> 30) & 1)
  48.179 +#define dma_frcd_fault_reason(c) (c & 0xff)
  48.180 +#define dma_frcd_source_id(c) (c & 0xffff)
  48.181 +#define dma_frcd_page_addr(d) (d & (((u64)-1) << 12)) /* low 64 bit */
  48.182 +
  48.183 +/*
  48.184 + * 0: Present
  48.185 + * 1-11: Reserved
  48.186 + * 12-63: Context Ptr (12 - (haw-1))
  48.187 + * 64-127: Reserved
  48.188 + */
  48.189 +struct root_entry {
  48.190 +    u64    val;
  48.191 +    u64    rsvd1;
  48.192 +};
  48.193 +#define root_present(root)    ((root).val & 1)
  48.194 +#define set_root_present(root) do {(root).val |= 1;} while(0)
  48.195 +#define get_context_addr(root) ((root).val & PAGE_MASK_4K)
  48.196 +#define set_root_value(root, value) \
  48.197 +    do {(root).val |= ((value) & PAGE_MASK_4K);} while(0)
  48.198 +
  48.199 +struct context_entry {
  48.200 +    u64 lo;
  48.201 +    u64 hi;
  48.202 +};
  48.203 +#define ROOT_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct root_entry))
  48.204 +#define context_present(c) ((c).lo & 1)
  48.205 +#define context_fault_disable(c) (((c).lo >> 1) & 1)
  48.206 +#define context_translation_type(c) (((c).lo >> 2) & 3)
  48.207 +#define context_address_root(c) ((c).lo & PAGE_MASK_4K)
  48.208 +#define context_address_width(c) ((c).hi &  7)
  48.209 +#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1))
  48.210 +
  48.211 +#define context_set_present(c) do {(c).lo |= 1;} while(0)
  48.212 +#define context_clear_present(c) do {(c).lo &= ~1;} while(0)
  48.213 +#define context_set_fault_enable(c) \
  48.214 +    do {(c).lo &= (((u64)-1) << 2) | 1;} while(0)
  48.215 +
  48.216 +#define context_set_translation_type(c, val) do { \
  48.217 +        (c).lo &= (((u64)-1) << 4) | 3; \
  48.218 +        (c).lo |= (val & 3) << 2; \
  48.219 +    } while(0)
  48.220 +#define CONTEXT_TT_MULTI_LEVEL 0
  48.221 +#define CONTEXT_TT_DEV_IOTLB   1
  48.222 +#define CONTEXT_TT_PASS_THRU   2
  48.223 +
  48.224 +#define context_set_address_root(c, val) \
  48.225 +    do {(c).lo &= 0xfff; (c).lo |= (val) & PAGE_MASK_4K ;} while(0)
  48.226 +#define context_set_address_width(c, val) \
  48.227 +    do {(c).hi &= 0xfffffff8; (c).hi |= (val) & 7;} while(0)
  48.228 +#define context_set_domain_id(c, val) \
  48.229 +    do {(c).hi &= 0xff; (c).hi |= ((val + 1) & ((1 << 16) - 1)) << 8;} while(0)
  48.230 +#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while(0)
  48.231 +
  48.232 +/*
  48.233 + * 0: readable
  48.234 + * 1: writable
  48.235 + * 2-6: reserved
  48.236 + * 7: super page
  48.237 + * 8-11: available
  48.238 + * 12-63: Host physcial address
  48.239 + */
  48.240 +struct dma_pte {
  48.241 +    u64 val;
  48.242 +};
  48.243 +#define dma_clear_pte(p)    do {(p).val = 0;} while(0)
  48.244 +#define dma_set_pte_readable(p) do {(p).val |= 1;} while(0)
  48.245 +#define dma_set_pte_writable(p) do {(p).val |= 2;} while(0)
  48.246 +#define dma_set_pte_superpage(p) do {(p).val |= 8;} while(0)
  48.247 +#define dma_set_pte_prot(p, prot) do { (p).val = (((p).val >> 2) << 2) | ((prot) & 3);} while (0)
  48.248 +#define dma_pte_addr(p) ((p).val & PAGE_MASK_4K)
  48.249 +#define dma_set_pte_addr(p, addr) do {(p).val |= ((addr) >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K;} while(0)
  48.250 +#define DMA_PTE_READ (1)
  48.251 +#define DMA_PTE_WRITE (2)
  48.252 +#define dma_pte_present(p) (((p).val & 3) != 0)
  48.253 +
  48.254 +/* interrupt remap entry */
  48.255 +struct iremap_entry {
  48.256 +    struct {
  48.257 +        u64 present : 1,
  48.258 +            fpd     : 1,
  48.259 +            dm      : 1,
  48.260 +            rh      : 1,
  48.261 +            tm      : 1,
  48.262 +            dlm     : 3,
  48.263 +            avail   : 4,
  48.264 +            res_1   : 4,
  48.265 +            vector  : 8,
  48.266 +            res_2   : 8,
  48.267 +            dst     : 32;
  48.268 +    }lo;
  48.269 +    struct {
  48.270 +        u64 sid     : 16,
  48.271 +            sq      : 2,
  48.272 +            svt     : 2,
  48.273 +            res_1   : 44;
  48.274 +    }hi;
  48.275 +};
  48.276 +#define IREMAP_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct iremap_entry))
  48.277 +#define iremap_present(v) ((v).lo & 1)
  48.278 +#define iremap_fault_disable(v) (((v).lo >> 1) & 1)
  48.279 +
  48.280 +#define iremap_set_present(v) do {(v).lo |= 1;} while(0)
  48.281 +#define iremap_clear_present(v) do {(v).lo &= ~1;} while(0)
  48.282 +
  48.283 +/* queue invalidation entry */
  48.284 +struct qinval_entry {
  48.285 +    union {
  48.286 +        struct {
  48.287 +            struct {
  48.288 +                u64 type    : 4,
  48.289 +                    granu   : 2,
  48.290 +                    res_1   : 10,
  48.291 +                    did     : 16,
  48.292 +                    sid     : 16,
  48.293 +                    fm      : 2,
  48.294 +                    res_2   : 14;
  48.295 +            }lo;
  48.296 +            struct {
  48.297 +                u64 res;
  48.298 +            }hi;
  48.299 +        }cc_inv_dsc;
  48.300 +        struct {
  48.301 +            struct {
  48.302 +                u64 type    : 4,
  48.303 +                    granu   : 2,
  48.304 +                    dw      : 1,
  48.305 +                    dr      : 1,
  48.306 +                    res_1   : 8,
  48.307 +                    did     : 16,
  48.308 +                    res_2   : 32;
  48.309 +            }lo;
  48.310 +            struct {
  48.311 +                u64 am      : 6,
  48.312 +                    ih      : 1,
  48.313 +                    res_1   : 5,
  48.314 +                    addr    : 52;
  48.315 +            }hi;
  48.316 +        }iotlb_inv_dsc;
  48.317 +        struct {
  48.318 +            struct {
  48.319 +                u64 type    : 4,
  48.320 +                    res_1   : 12,
  48.321 +                    max_invs_pend: 5,
  48.322 +                    res_2   : 11,
  48.323 +                    sid     : 16,
  48.324 +                    res_3   : 16;
  48.325 +            }lo;
  48.326 +            struct {
  48.327 +                u64 size    : 1,
  48.328 +                    res_1   : 11,
  48.329 +                    addr    : 52;
  48.330 +            }hi;
  48.331 +        }dev_iotlb_inv_dsc;
  48.332 +        struct {
  48.333 +            struct {
  48.334 +                u64 type    : 4,
  48.335 +                    granu   : 1,
  48.336 +                    res_1   : 22,
  48.337 +                    im      : 5,
  48.338 +                    iidx    : 16,
  48.339 +                    res_2   : 16;
  48.340 +            }lo;
  48.341 +            struct {
  48.342 +                u64 res;
  48.343 +            }hi;
  48.344 +        }iec_inv_dsc;
  48.345 +        struct {
  48.346 +            struct {
  48.347 +                u64 type    : 4,
  48.348 +                    iflag   : 1,
  48.349 +                    sw      : 1,
  48.350 +                    fn      : 1,
  48.351 +                    res_1   : 25,
  48.352 +                    sdata   : 32;
  48.353 +            }lo;
  48.354 +            struct {
  48.355 +                u64 res_1   : 2,
  48.356 +                    saddr   : 62;
  48.357 +            }hi;
  48.358 +        }inv_wait_dsc;
  48.359 +    }q;
  48.360 +};
  48.361 +
  48.362 +struct poll_info {
  48.363 +    u64 saddr;
  48.364 +    u32 udata;
  48.365 +};
  48.366 +
  48.367 +#define QINVAL_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct qinval_entry))
  48.368 +#define qinval_present(v) ((v).lo & 1)
  48.369 +#define qinval_fault_disable(v) (((v).lo >> 1) & 1)
  48.370 +
  48.371 +#define qinval_set_present(v) do {(v).lo |= 1;} while(0)
  48.372 +#define qinval_clear_present(v) do {(v).lo &= ~1;} while(0)
  48.373 +
  48.374 +#define RESERVED_VAL        0
  48.375 +
  48.376 +#define TYPE_INVAL_CONTEXT  1
  48.377 +#define TYPE_INVAL_IOTLB    2
  48.378 +#define TYPE_INVAL_DEVICE_IOTLB 3
  48.379 +#define TYPE_INVAL_IEC          4
  48.380 +#define TYPE_INVAL_WAIT         5
  48.381 +
  48.382 +#define NOTIFY_TYPE_POLL        1
  48.383 +#define NOTIFY_TYPE_INTR        1
  48.384 +#define INTERRUTP_FLAG          1
  48.385 +#define STATUS_WRITE            1
  48.386 +#define FENCE_FLAG              1
  48.387 +
  48.388 +#define IEC_GLOBAL_INVL         0
  48.389 +#define IEC_INDEX_INVL          1
  48.390 +
  48.391 +#define VTD_PAGE_TABLE_LEVEL_3  3
  48.392 +#define VTD_PAGE_TABLE_LEVEL_4  4
  48.393 +
  48.394 +typedef paddr_t dma_addr_t;
  48.395 +
  48.396 +#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
  48.397 +#define MAX_IOMMUS 32
  48.398 +#define MAX_IOMMU_REGS 0xc0
  48.399 +
  48.400 +extern struct list_head acpi_drhd_units;
  48.401 +extern struct list_head acpi_rmrr_units;
  48.402 +extern struct list_head acpi_ioapic_units;
  48.403 +
  48.404 +#endif
    49.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    49.2 +++ b/xen/include/asm-x86/iommu.h	Fri Jul 27 12:59:37 2007 -0700
    49.3 @@ -0,0 +1,86 @@
    49.4 +/*
    49.5 + * Copyright (c) 2006, Intel Corporation.
    49.6 + *
    49.7 + * This program is free software; you can redistribute it and/or modify it
    49.8 + * under the terms and conditions of the GNU General Public License,
    49.9 + * version 2, as published by the Free Software Foundation.
   49.10 + *
   49.11 + * This program is distributed in the hope it will be useful, but WITHOUT
   49.12 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   49.13 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
   49.14 + * more details.
   49.15 + *
   49.16 + * You should have received a copy of the GNU General Public License along with
   49.17 + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
   49.18 + * Place - Suite 330, Boston, MA 02111-1307 USA.
   49.19 + *
   49.20 + * Copyright (C) Allen Kay <allen.m.kay@intel.com>
   49.21 + */
   49.22 +
   49.23 +#ifndef _IOMMU_H_
   49.24 +#define _IOMMU_H_
   49.25 +
   49.26 +#include <xen/init.h>
   49.27 +#include <xen/bitmap.h>
   49.28 +#include <xen/irq.h>
   49.29 +#include <xen/spinlock.h>
   49.30 +#include <xen/mm.h>
   49.31 +#include <xen/xmalloc.h>
   49.32 +#include <asm/hvm/vmx/intel-iommu.h>
   49.33 +#include <public/hvm/ioreq.h>
   49.34 +
   49.35 +#define iommu_found()    (!list_empty(&acpi_drhd_units))
   49.36 +#define dev_assigned(d)  (!list_empty(&d->arch.hvm_domain.hvm_iommu.pdev_list))
   49.37 +#define domain_hvm_iommu(d)     (&d->arch.hvm_domain.hvm_iommu)
   49.38 +#define domain_vmx_iommu(d)     (&d->arch.hvm_domain.hvm_iommu.vmx_iommu)
   49.39 +
   49.40 +/*
   49.41 + * The PCI interface treats multi-function devices as independent
   49.42 + * devices.  The slot/function address of each device is encoded
   49.43 + * in a single byte as follows:
   49.44 + *
   49.45 + * 15:8 = bus
   49.46 + *  7:3 = slot
   49.47 + *  2:0 = function
   49.48 + */
   49.49 +#define PCI_DEVFN(slot,func)  (((slot & 0x1f) << 3) | (func & 0x07))
   49.50 +#define PCI_SLOT(devfn)       (((devfn) >> 3) & 0x1f)
   49.51 +#define PCI_FUNC(devfn)       ((devfn) & 0x07)
   49.52 +
   49.53 +struct pci_dev {
   49.54 +    struct list_head list;
   49.55 +    u8 bus;
   49.56 +    u8 devfn;
   49.57 +};
   49.58 +
   49.59 +struct g2m_ioport {
   49.60 +    struct list_head list;
   49.61 +    unsigned int gport;
   49.62 +    unsigned int mport;
   49.63 +};
   49.64 +
   49.65 +struct iommu {
   49.66 +    struct list_head list;
   49.67 +    void __iomem *reg; /* Pointer to hardware regs, virtual addr */
   49.68 +    u32	gcmd;          /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */
   49.69 +    u64	cap;
   49.70 +    u64	ecap;
   49.71 +    spinlock_t lock; /* protect context, domain ids */
   49.72 +    spinlock_t register_lock; /* protect iommu register handling */
   49.73 +    struct root_entry *root_entry; /* virtual address */
   49.74 +    unsigned int vector;
   49.75 +};
   49.76 +
   49.77 +int iommu_setup(void);
   49.78 +int iommu_domain_init(struct domain *d);
   49.79 +int assign_device(struct domain *d, u8 bus, u8 devfn);
   49.80 +int release_devices(struct vcpu *v);
   49.81 +int hvm_do_IRQ_dpci(struct domain *d, unsigned int irq);
   49.82 +int dpci_ioport_intercept(ioreq_t *p, int type);
   49.83 +int iommu_map_page(struct domain *d, dma_addr_t gfn, dma_addr_t mfn);
   49.84 +int iommu_unmap_page(struct domain *d, dma_addr_t gfn);
   49.85 +void iommu_flush(struct domain *d, dma_addr_t gfn, u64 *p2m_entry);
   49.86 +void iommu_set_pgd(struct domain *d);
   49.87 +void iommu_domain_teardown(struct domain *d);
   49.88 +
   49.89 +#endif // _IOMMU_H_
    50.1 --- a/xen/include/asm-x86/mpspec_def.h	Fri Jul 27 13:47:03 2007 +0100
    50.2 +++ b/xen/include/asm-x86/mpspec_def.h	Fri Jul 27 12:59:37 2007 -0700
    50.3 @@ -184,5 +184,13 @@ enum mp_bustype {
    50.4  	MP_BUS_MCA,
    50.5  	MP_BUS_NEC98
    50.6  };
    50.7 +
    50.8 +struct mp_ioapic_routing {
    50.9 +    int         apic_id;
   50.10 +    int         gsi_base;
   50.11 +    int         gsi_end;
   50.12 +    u32         pin_programmed[4];
   50.13 +};
   50.14 +
   50.15  #endif
   50.16  
    51.1 --- a/xen/include/asm-x86/p2m.h	Fri Jul 27 13:47:03 2007 +0100
    51.2 +++ b/xen/include/asm-x86/p2m.h	Fri Jul 27 12:59:37 2007 -0700
    51.3 @@ -104,6 +104,8 @@ gl1e_to_ml1e(struct domain *d, l1_pgentr
    51.4  }
    51.5  
    51.6  
    51.7 +/* Directly set p2m entry */
    51.8 +int set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn, u32 flags);
    51.9  
   51.10  /* Init the datastructures for later use by the p2m code */
   51.11  void p2m_init(struct domain *d);
    52.1 --- a/xen/include/asm-x86/system.h	Fri Jul 27 13:47:03 2007 +0100
    52.2 +++ b/xen/include/asm-x86/system.h	Fri Jul 27 12:59:37 2007 -0700
    52.3 @@ -14,6 +14,9 @@
    52.4  #define wbinvd() \
    52.5  	__asm__ __volatile__ ("wbinvd": : :"memory");
    52.6  
    52.7 +#define clflush(a) \
    52.8 +	__asm__ __volatile__ ("clflush (%0)": :"r"(a));
    52.9 +
   52.10  #define nop() __asm__ __volatile__ ("nop")
   52.11  
   52.12  #define xchg(ptr,v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr))))
    53.1 --- a/xen/include/public/domctl.h	Fri Jul 27 13:47:03 2007 +0100
    53.2 +++ b/xen/include/public/domctl.h	Fri Jul 27 12:59:37 2007 -0700
    53.3 @@ -432,7 +432,45 @@ struct xen_domctl_sendtrigger {
    53.4  typedef struct xen_domctl_sendtrigger xen_domctl_sendtrigger_t;
    53.5  DEFINE_XEN_GUEST_HANDLE(xen_domctl_sendtrigger_t);
    53.6  
    53.7 - 
    53.8 +
    53.9 +#define XEN_DOMCTL_assign_device        37
   53.10 +struct xen_domctl_assign_device {
   53.11 +    uint32_t  machine_bdf;   /* machine PCI ID of assigned device */
   53.12 +};
   53.13 +typedef struct xen_domctl_assign_device xen_domctl_assign_device_t;
   53.14 +DEFINE_XEN_GUEST_HANDLE(xen_domctl_assign_device_t);
   53.15 +
   53.16 +
   53.17 +#define XEN_DOMCTL_irq_mapping        38
   53.18 +struct xen_domctl_irq_mapping {
   53.19 +    uint32_t machine_irq;     /* machine irq to be mapped to guest */
   53.20 +    uint32_t device;          /* pci device number */
   53.21 +    uint32_t intx;            /* intx in vPCI space */
   53.22 +};
   53.23 +typedef struct xen_domctl_irq_mapping xen_domctl_irq_mapping_t;
   53.24 +DEFINE_XEN_GUEST_HANDLE(xen_domctl_irq_mapping_t);
   53.25 +
   53.26 +
   53.27 +#define XEN_DOMCTL_memory_mapping   39
   53.28 +struct xen_domctl_memory_mapping {
   53.29 +    uint64_t first_gfn;       /* first hvm guest physical page in range */
   53.30 +    uint64_t first_mfn;       /* first machine page in range */
   53.31 +    uint64_t nr_mfns;         /* number of pages in range */
   53.32 +};
   53.33 +typedef struct xen_domctl_memory_mapping xen_domctl_memory_mapping_t;
   53.34 +DEFINE_XEN_GUEST_HANDLE(xen_domctl_memory_mapping_t);
   53.35 +
   53.36 +
   53.37 +#define XEN_DOMCTL_ioport_mapping    40
   53.38 +struct xen_domctl_ioport_mapping {
   53.39 +    uint32_t first_gport;     /* first guest IO port*/
   53.40 +    uint32_t first_mport;     /* first machine IO port */
   53.41 +    uint32_t nr_ports;        /* size of port range */
   53.42 +};
   53.43 +typedef struct xen_domctl_ioport_mapping xen_domctl_ioport_mapping_t;
   53.44 +DEFINE_XEN_GUEST_HANDLE(xen_domctl_ioport_mapping_t);
   53.45 +
   53.46 +
   53.47  struct xen_domctl {
   53.48      uint32_t cmd;
   53.49      uint32_t interface_version; /* XEN_DOMCTL_INTERFACE_VERSION */
   53.50 @@ -462,6 +500,10 @@ struct xen_domctl {
   53.51          struct xen_domctl_hvmcontext        hvmcontext;
   53.52          struct xen_domctl_address_size      address_size;
   53.53          struct xen_domctl_sendtrigger       sendtrigger;
   53.54 +        struct xen_domctl_assign_device     assign_device;
   53.55 +        struct xen_domctl_irq_mapping       irq_mapping;
   53.56 +        struct xen_domctl_memory_mapping    memory_mapping;
   53.57 +        struct xen_domctl_ioport_mapping    ioport_mapping;
   53.58          uint8_t                             pad[128];
   53.59      } u;
   53.60  };
    54.1 --- a/xen/include/xen/acpi.h	Fri Jul 27 13:47:03 2007 +0100
    54.2 +++ b/xen/include/xen/acpi.h	Fri Jul 27 12:59:37 2007 -0700
    54.3 @@ -367,9 +367,79 @@ enum acpi_table_id {
    54.4  	ACPI_SPMI,
    54.5  	ACPI_HPET,
    54.6  	ACPI_MCFG,
    54.7 +	ACPI_DMAR,
    54.8  	ACPI_TABLE_COUNT
    54.9  };
   54.10  
   54.11 +/* DMA Remapping Reporting Table (DMAR) */
   54.12 +
   54.13 +#define DMAR_FLAGS_INTR_REMAP 0x1       /* intr remap supported */
   54.14 +struct acpi_table_dmar {
   54.15 +	struct acpi_table_header	header;
   54.16 +	u8				haw;	/* Host address Width */
   54.17 +	u8				flags;
   54.18 +	u8				reserved[10];
   54.19 +} __attribute__ ((packed));
   54.20 +
   54.21 +struct acpi_dmar_entry_header {
   54.22 +	u16	type;
   54.23 +	u16	length;
   54.24 +} __attribute__((packed));
   54.25 +
   54.26 +enum acpi_dmar_entry_type {
   54.27 +	ACPI_DMAR_DRHD = 0,
   54.28 +	ACPI_DMAR_RMRR,
   54.29 +	ACPI_DMAR_ATSR,
   54.30 +	ACPI_DMAR_ENTRY_COUNT
   54.31 +};
   54.32 +
   54.33 +#define DRHD_FLAGS_INCLUDE_ALL	0x1       /* drhd remaps remaining devices */
   54.34 +struct acpi_table_drhd {
   54.35 +	struct	acpi_dmar_entry_header header;
   54.36 +	u8	flags;
   54.37 +	u8	reserved;
   54.38 +	u16	segment;
   54.39 +	u64	address; /* register base address for this drhd */
   54.40 +} __attribute__ ((packed));
   54.41 +
   54.42 +struct acpi_table_rmrr {
   54.43 +	struct	acpi_dmar_entry_header header;
   54.44 +	u16	reserved;
   54.45 +        u16     segment;
   54.46 +	u64	base_address;
   54.47 +	u64	end_address;
   54.48 +} __attribute__ ((packed));
   54.49 +
   54.50 +struct acpi_table_atsr {
   54.51 +        struct  acpi_dmar_entry_header header;
   54.52 +        u8      flags;
   54.53 +        u8      reserved;
   54.54 +        u16     segment;
   54.55 +} __attribute__ ((packed));
   54.56 +
   54.57 +enum acpi_dev_scope_type {
   54.58 +	ACPI_DEV_ENDPOINT=0x01,	/* PCI Endpoing device */
   54.59 +	ACPI_DEV_P2PBRIDGE,	/* PCI-PCI Bridge */
   54.60 +	ACPI_DEV_IOAPIC,	/* IOAPIC device*/
   54.61 +	ACPI_DEV_MSI_HPET,	/* MSI capable HPET*/
   54.62 +	ACPI_DEV_ENTRY_COUNT
   54.63 +};
   54.64 +
   54.65 +struct acpi_dev_scope {
   54.66 +	u8	dev_type;
   54.67 +	u8	length;
   54.68 +	u8	reserved[2];
   54.69 +	u8	enum_id;
   54.70 +	u8	start_bus;
   54.71 +} __attribute__((packed));
   54.72 +
   54.73 +struct acpi_pci_path {
   54.74 +	u8	dev;
   54.75 +	u8	fn;
   54.76 +} __attribute__((packed));
   54.77 +
   54.78 +typedef int (*acpi_dmar_entry_handler) (struct acpi_dmar_entry_header *header, const unsigned long end);
   54.79 +
   54.80  typedef int (*acpi_table_handler) (unsigned long phys_addr, unsigned long size);
   54.81  
   54.82  extern acpi_table_handler acpi_table_ops[ACPI_TABLE_COUNT];
    55.1 --- a/xen/include/xen/config.h	Fri Jul 27 13:47:03 2007 +0100
    55.2 +++ b/xen/include/xen/config.h	Fri Jul 27 12:59:37 2007 -0700
    55.3 @@ -60,6 +60,8 @@
    55.4  #define KERN_INFO      XENLOG_INFO
    55.5  #define KERN_DEBUG     XENLOG_DEBUG
    55.6  
    55.7 +/* VTD Specific Prefix */
    55.8 +#define VTDPREFIX       "[VT-D]"
    55.9  /* Linux 'checker' project. */
   55.10  #define __iomem
   55.11  #define __user
    56.1 --- a/xen/include/xen/irq.h	Fri Jul 27 13:47:03 2007 +0100
    56.2 +++ b/xen/include/xen/irq.h	Fri Jul 27 12:59:37 2007 -0700
    56.3 @@ -63,6 +63,9 @@ extern irq_desc_t irq_desc[NR_IRQS];
    56.4  
    56.5  extern int setup_irq(unsigned int, struct irqaction *);
    56.6  extern void free_irq(unsigned int);
    56.7 +extern int request_irq(unsigned int irq,
    56.8 +               void (*handler)(int, void *, struct cpu_user_regs *),
    56.9 +               unsigned long irqflags, const char * devname, void *dev_id);
   56.10  
   56.11  extern hw_irq_controller no_irq_type;
   56.12  extern void no_action(int cpl, void *dev_id, struct cpu_user_regs *regs);