direct-io.hg

changeset 15531:b4d5751e8783

Cleaned neo ioemu patch (50:50 merge)
author Guy Zana <guy@neocleus.com>
date Wed Aug 29 00:49:54 2007 +0300 (2007-08-29)
parents 69ec2ef3d132
children 9163bea7b7bd
files tools/ioemu/Makefile.target tools/ioemu/hw/pass-through.c tools/ioemu/hw/pass-through.h tools/ioemu/hw/pc.c tools/ioemu/vl.c tools/ioemu/vl.h
line diff
     1.1 --- a/tools/ioemu/Makefile.target	Wed Aug 29 00:48:01 2007 +0300
     1.2 +++ b/tools/ioemu/Makefile.target	Wed Aug 29 00:49:54 2007 +0300
     1.3 @@ -198,6 +198,7 @@ LIBS+=-lm
     1.4  LIBS+=-L../../libxc -lxenctrl -lxenguest
     1.5  LIBS+=-L../../xenstore -lxenstore
     1.6  LIBS+=-lpthread
     1.7 +LIBS+=-lpci
     1.8  ifndef CONFIG_USER_ONLY
     1.9  LIBS+=-lz
    1.10  endif
    1.11 @@ -401,6 +402,7 @@ VL_OBJS+= piix4acpi.o
    1.12  VL_OBJS+= xenstore.o
    1.13  VL_OBJS+= xen_platform.o
    1.14  VL_OBJS+= tpm_tis.o
    1.15 +VL_OBJS+= pass-through.o
    1.16  CPPFLAGS += -DHAS_AUDIO
    1.17  endif
    1.18  ifeq ($(TARGET_BASE_ARCH), ppc)
     2.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.2 +++ b/tools/ioemu/hw/pass-through.c	Wed Aug 29 00:49:54 2007 +0300
     2.3 @@ -0,0 +1,453 @@
     2.4 +/*
     2.5 + * Copyright (c) 2007, Neocleus Corporation.
     2.6 + * Copyright (c) 2007, Intel Corporation.
     2.7 + *
     2.8 + * This program is free software; you can redistribute it and/or modify it
     2.9 + * under the terms and conditions of the GNU General Public License,
    2.10 + * version 2, as published by the Free Software Foundation.
    2.11 + *
    2.12 + * This program is distributed in the hope it will be useful, but WITHOUT
    2.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    2.14 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
    2.15 + * more details.
    2.16 + *
    2.17 + * You should have received a copy of the GNU General Public License along with
    2.18 + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
    2.19 + * Place - Suite 330, Boston, MA 02111-1307 USA.
    2.20 + *
    2.21 + * Alex Novik <alex@neocleus.com>
    2.22 + * Allen Kay <allen.m.kay@intel.com>
    2.23 + * Guy Zana <guy@neocleus.com>
    2.24 + *
    2.25 + * This file implements direct PCI assignment to a HVM guest
    2.26 + *
    2.27 + */
    2.28 +#include "vl.h"
    2.29 +#include "pass-through.h"
    2.30 +#include "pci/header.h"
    2.31 +#include "pci/pci.h"
    2.32 +
    2.33 +extern FILE *logfile;
    2.34 +char *token;
    2.35 +
    2.36 +int pci_devs(const char *direct_pci)
    2.37 +{
    2.38 +    int count = 0;
    2.39 +    const char *c;
    2.40 +
    2.41 +    /* skip first "[" character */
    2.42 +    c = direct_pci + 1;
    2.43 +    while ((c = strchr(c, '[')) != NULL) {
    2.44 +        c++;
    2.45 +        count++;
    2.46 +    }
    2.47 +    return (count);
    2.48 +}
    2.49 +
    2.50 +int next_token(char *direct_pci)
    2.51 +{
    2.52 +    if (token == NULL)
    2.53 +        token = strtok(direct_pci, ",");
    2.54 +    else 
    2.55 +        token = strtok(NULL, ",");
    2.56 +    token = strchr(token, 'x');
    2.57 +    token = token + 1;
    2.58 +    return ((int) strtol(token, NULL, 16));
    2.59 +}
    2.60 +
    2.61 +void next_bdf(char *direct_pci, int *seg,
    2.62 +              int *bus, int *dev, int *func)
    2.63 +{
    2.64 +    *seg  = next_token(direct_pci);
    2.65 +    *bus  = next_token(direct_pci);
    2.66 +    *dev  = next_token(direct_pci);
    2.67 +    *func = next_token(direct_pci);
    2.68 +}
    2.69 +
    2.70 +uint8_t find_cap_offset(struct pci_dev *pci_dev, uint8_t cap)
    2.71 +{
    2.72 +    int id;
    2.73 +    int max_cap = 48;
    2.74 +    int pos = PCI_CAPABILITY_LIST;
    2.75 +    int status;
    2.76 +
    2.77 +    status = pci_read_byte(pci_dev, PCI_STATUS);
    2.78 +    if ( (status & PCI_STATUS_CAP_LIST) == 0 )
    2.79 +        return 0;
    2.80 +
    2.81 +    while ( max_cap-- )
    2.82 +    {
    2.83 +        pos = pci_read_byte(pci_dev, pos);
    2.84 +        if ( pos < 0x40 )
    2.85 +            break;
    2.86 +
    2.87 +        pos &= ~3;
    2.88 +        id = pci_read_byte(pci_dev, pos + PCI_CAP_LIST_ID);
    2.89 +
    2.90 +        if ( id == 0xff )
    2.91 +            break;
    2.92 +        if ( id == cap )
    2.93 +            return pos;
    2.94 +
    2.95 +        pos += PCI_CAP_LIST_NEXT;
    2.96 +    }
    2.97 +    return 0;
    2.98 +}
    2.99 +
   2.100 +void pdev_flr(struct pci_dev *pci_dev)
   2.101 +{
   2.102 +    int pos;
   2.103 +    int dev_cap;
   2.104 +    int dev_status;
   2.105 +
   2.106 +    pos = find_cap_offset(pci_dev, PCI_CAP_ID_EXP);
   2.107 +    if ( pos )
   2.108 +    {
   2.109 +        dev_cap = pci_read_long(pci_dev, pos + PCI_EXP_DEVCAP);
   2.110 +        if ( dev_cap & PCI_EXP_DEVCAP_FLR )
   2.111 +        {
   2.112 +            pci_write_word(pci_dev, pos + PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_FLR);
   2.113 +            do {
   2.114 +                dev_status = pci_read_long(pci_dev, pos + PCI_EXP_DEVSTA);
   2.115 +            } while (dev_status & PCI_EXP_DEVSTA_TRPND);
   2.116 +        }
   2.117 +    }
   2.118 +}
   2.119 +
   2.120 +/* Being called each time a mmio region has been updated */
   2.121 +void pt_iomem_map(PCIDevice *d, int i, uint32_t e_phys, uint32_t e_size,
   2.122 +                  int type)
   2.123 +{
   2.124 +    struct pt_dev *assigned_device  = (struct pt_dev *)d; 
   2.125 +    uint32_t old_ebase = assigned_device->bases[i].e_physbase;
   2.126 +    int first_map = ( assigned_device->bases[i].e_size == 0 );
   2.127 +    int ret = 0;
   2.128 +
   2.129 +    assigned_device->bases[i].e_physbase = e_phys;
   2.130 +    assigned_device->bases[i].e_size= e_size;
   2.131 +
   2.132 +    PT_LOG("e_phys=%08x maddr=%08x type=%d len=%08x index=%d\n",
   2.133 +        e_phys, assigned_device->bases[i].access.maddr, type, e_size, i);
   2.134 +
   2.135 +    if ( e_size == 0 )
   2.136 +        return;
   2.137 +
   2.138 +    if ( !first_map )
   2.139 +    {
   2.140 +        /* Remove old mapping */
   2.141 +        ret = xc_domain_memory_mapping(xc_handle, domid, old_ebase >> 12,
   2.142 +                assigned_device->bases[i].access.maddr >> 12,
   2.143 +                (e_size+0xFFF) >> 12,
   2.144 +                DPCI_REMOVE_MAPPING);
   2.145 +        if ( ret != 0 )
   2.146 +        {
   2.147 +            PT_LOG("Error: remove old mapping failed!\n");
   2.148 +            return;
   2.149 +        }
   2.150 +    }
   2.151 +
   2.152 +    /* Create new mapping */
   2.153 +    ret = xc_domain_memory_mapping(xc_handle, domid,
   2.154 +            assigned_device->bases[i].e_physbase >> 12,
   2.155 +            assigned_device->bases[i].access.maddr >> 12,
   2.156 +            (e_size+0xFFF) >> 12,
   2.157 +            DPCI_ADD_MAPPING);
   2.158 +    if ( ret != 0 )
   2.159 +        PT_LOG("Error: create new mapping failed!\n");
   2.160 +
   2.161 +}
   2.162 +
   2.163 +/* Being called each time a pio region has been updated */
   2.164 +void pt_ioport_map(PCIDevice *d, int i,
   2.165 +                   uint32_t e_phys, uint32_t e_size, int type)
   2.166 +{
   2.167 +    struct pt_dev *assigned_device  = (struct pt_dev *)d;
   2.168 +    uint32_t old_ebase = assigned_device->bases[i].e_physbase;
   2.169 +    int first_map = ( assigned_device->bases[i].e_size == 0 );
   2.170 +    int ret = 0;
   2.171 +
   2.172 +    assigned_device->bases[i].e_physbase = e_phys;
   2.173 +    assigned_device->bases[i].e_size= e_size;
   2.174 +
   2.175 +    PT_LOG("e_phys=%04x pio_base=%04x len=%04x index=%d\n",
   2.176 +        (uint16_t)e_phys, (uint16_t)assigned_device->bases[i].access.pio_base,
   2.177 +        (uint16_t)e_size, i);
   2.178 +
   2.179 +    if ( e_size == 0 )
   2.180 +        return;
   2.181 +
   2.182 +    if ( !first_map )
   2.183 +    {
   2.184 +        /* Remove old mapping */
   2.185 +        ret = xc_domain_ioport_mapping(xc_handle, domid, old_ebase,
   2.186 +                    assigned_device->bases[i].access.pio_base, e_size,
   2.187 +                    DPCI_REMOVE_MAPPING);
   2.188 +        if ( ret != 0 )
   2.189 +        {
   2.190 +            PT_LOG("Error: remove old mapping failed!\n");
   2.191 +            return;
   2.192 +        }
   2.193 +    }
   2.194 +
   2.195 +    /* Create new mapping */
   2.196 +    ret = xc_domain_ioport_mapping(xc_handle, domid, e_phys,
   2.197 +                assigned_device->bases[i].access.pio_base, e_size,
   2.198 +                DPCI_ADD_MAPPING);
   2.199 +    if ( ret != 0 )
   2.200 +        PT_LOG("Error: create new mapping failed!\n");
   2.201 +
   2.202 +}
   2.203 +
   2.204 +static void pt_pci_write_config(PCIDevice *d, uint32_t address, uint32_t val,
   2.205 +                                int len)
   2.206 +{
   2.207 +    struct pt_dev *assigned_device = (struct pt_dev *)d;
   2.208 +    struct pci_dev *pci_dev = assigned_device->pci_dev;
   2.209 +
   2.210 +#ifdef PT_DEBUG_PCI_CONFIG_ACCESS
   2.211 +    PT_LOG("(%x.%x): address=%04x val=0x%08x len=%d\n",
   2.212 +       (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val, len);
   2.213 +#endif
   2.214 +
   2.215 +    /* Pre-write hooking */
   2.216 +    switch ( address ) {
   2.217 +    case 0x0C ... 0x3F:
   2.218 +        pci_default_write_config(d, address, val, len);
   2.219 +        return;
   2.220 +    }
   2.221 +
   2.222 +    /* PCI config pass-through */
   2.223 +    switch (len){
   2.224 +    case 1:
   2.225 +        pci_write_byte(pci_dev, address, val);
   2.226 +        break;
   2.227 +    case 2:
   2.228 +        pci_write_word(pci_dev, address, val);
   2.229 +        break;
   2.230 +    case 4:
   2.231 +        pci_write_long(pci_dev, address, val);
   2.232 +        break;
   2.233 +    }
   2.234 +
   2.235 +    /* Post-write hooking */
   2.236 +    switch (address) {
   2.237 +    case 0x04:                  /* CMD register (enable IO access trap) */
   2.238 +        pci_default_write_config(d, address, val, len);
   2.239 +        break;
   2.240 +    }
   2.241 +}
   2.242 +
   2.243 +static uint32_t pt_pci_read_config(PCIDevice *d, uint32_t address, int len)
   2.244 +{
   2.245 +    struct pt_dev *assigned_device = (struct pt_dev *)d;
   2.246 +    struct pci_dev *pci_dev = assigned_device->pci_dev;
   2.247 +    uint32_t val = 0xFF;
   2.248 +
   2.249 +    /* Pre-hooking */
   2.250 +    switch ( address ) {
   2.251 +    case 0x0C ... 0x3F:
   2.252 +        val = pci_default_read_config(d, address, len);
   2.253 +        goto exit;
   2.254 +    }
   2.255 +
   2.256 +    switch ( len ) {
   2.257 +    case 1:
   2.258 +        val = pci_read_byte(pci_dev, address);
   2.259 +        break;
   2.260 +    case 2:
   2.261 +        val = pci_read_word(pci_dev, address);
   2.262 +        break;
   2.263 +    case 4:
   2.264 +        val = pci_read_long(pci_dev, address);
   2.265 +        break;
   2.266 +    }
   2.267 +
   2.268 +exit:
   2.269 +
   2.270 +#ifdef PT_DEBUG_PCI_CONFIG_ACCESS
   2.271 +    PT_LOG("(%x.%x): address=%04x val=0x%08x len=%d\n",
   2.272 +       (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val, len);
   2.273 +#endif
   2.274 +
   2.275 +    return val;
   2.276 +}
   2.277 +
   2.278 +static int pt_register_regions(struct pt_dev *assigned_device)
   2.279 +{
   2.280 +    int i = 0;
   2.281 +    uint32_t bar_data = 0;
   2.282 +    struct pci_dev *pci_dev = assigned_device->pci_dev;
   2.283 +    PCIDevice *d = &assigned_device->dev;
   2.284 +
   2.285 +    /* Register PIO/MMIO BARs */
   2.286 +    for ( i=0; i < PCI_BAR_ENTRIES; i++ )
   2.287 +    {
   2.288 +        if ( pci_dev->base_addr[i] )
   2.289 +        {
   2.290 +            assigned_device->bases[i].e_physbase = pci_dev->base_addr[i];
   2.291 +            assigned_device->bases[i].access.u = pci_dev->base_addr[i];
   2.292 +
   2.293 +            /* Register current region */
   2.294 +            bar_data = *((uint32_t*)(d->config + PCI_BASE_ADDRESS_0) + i);
   2.295 +            if ( bar_data & PCI_ADDRESS_SPACE_IO )
   2.296 +                pci_register_io_region((PCIDevice *)assigned_device, i,
   2.297 +                    (uint32_t)pci_dev->size[i], PCI_ADDRESS_SPACE_IO,
   2.298 +                    pt_ioport_map);
   2.299 +            else if ( bar_data & PCI_ADDRESS_SPACE_MEM_PREFETCH )
   2.300 +                pci_register_io_region((PCIDevice *)assigned_device, i,
   2.301 +                    (uint32_t)pci_dev->size[i], PCI_ADDRESS_SPACE_MEM_PREFETCH,
   2.302 +                    pt_iomem_map);
   2.303 +            else
   2.304 +                pci_register_io_region((PCIDevice *)assigned_device, i, 
   2.305 +                    (uint32_t)pci_dev->size[i], PCI_ADDRESS_SPACE_MEM,
   2.306 +                    pt_iomem_map);
   2.307 +
   2.308 +            PT_LOG("IO region registered (size=0x%08x base_addr=0x%08x)\n",
   2.309 +                (uint32_t)(pci_dev->size[i]),
   2.310 +                (uint32_t)(pci_dev->base_addr[i]));
   2.311 +        }
   2.312 +    }
   2.313 +
   2.314 +    /* Register expansion ROM address */
   2.315 +    if ( pci_dev->rom_base_addr && pci_dev->rom_size )
   2.316 +    {
   2.317 +        assigned_device->bases[PCI_ROM_SLOT].e_physbase =
   2.318 +            pci_dev->rom_base_addr;
   2.319 +        assigned_device->bases[PCI_ROM_SLOT].access.maddr =
   2.320 +            pci_dev->rom_base_addr;
   2.321 +        pci_register_io_region((PCIDevice *)assigned_device, PCI_ROM_SLOT,
   2.322 +            pci_dev->rom_size, PCI_ADDRESS_SPACE_MEM_PREFETCH,
   2.323 +            pt_iomem_map);
   2.324 +
   2.325 +        PT_LOG("Expansion ROM registered (size=0x%08x base_addr=0x%08x)\n",
   2.326 +            (uint32_t)(pci_dev->rom_size), (uint32_t)(pci_dev->rom_base_addr));
   2.327 +    }
   2.328 +
   2.329 +    return 0;
   2.330 +}
   2.331 +
   2.332 +struct pt_dev * register_real_device(PCIBus *e_bus,
   2.333 +        const char *e_dev_name, int e_devfn, uint8_t r_bus, uint8_t r_dev,
   2.334 +        uint8_t r_func, uint32_t machine_irq, struct pci_access *pci_access)
   2.335 +{
   2.336 +    int rc, i;
   2.337 +    struct pt_dev *assigned_device = NULL;
   2.338 +    struct pci_dev *pci_dev;
   2.339 +    struct pci_config_cf8 machine_bdf;
   2.340 +    uint8_t e_device, e_intx;
   2.341 +
   2.342 +    PT_LOG("Assigning real physical device %02x:%02x.%x ...\n",
   2.343 +        r_bus, r_dev, r_func);
   2.344 +
   2.345 +    /* Find real device structure */
   2.346 +    for (pci_dev = pci_access->devices; pci_dev != NULL;
   2.347 +         pci_dev = pci_dev->next)
   2.348 +    {
   2.349 +        if ((r_bus == pci_dev->bus) && (r_dev == pci_dev->dev)
   2.350 +            && (r_func == pci_dev->func))
   2.351 +            break;
   2.352 +    }
   2.353 +    if ( pci_dev == NULL )
   2.354 +    {
   2.355 +        PT_LOG("Error: couldn't locate device in libpci structures\n");
   2.356 +        return NULL;
   2.357 +    }
   2.358 +
   2.359 +    /* Register device */
   2.360 +    assigned_device = (struct pt_dev *) pci_register_device(e_bus, e_dev_name,
   2.361 +                                sizeof(struct pt_dev), e_devfn,
   2.362 +                                pt_pci_read_config, pt_pci_write_config);
   2.363 +    if ( assigned_device == NULL )
   2.364 +    {
   2.365 +        PT_LOG("Error: couldn't register real device\n");
   2.366 +        return NULL;
   2.367 +    }
   2.368 +
   2.369 +    assigned_device->pci_dev = pci_dev;
   2.370 +
   2.371 +    /* Issue PCIe FLR */
   2.372 +    pdev_flr(pci_dev);
   2.373 +
   2.374 +    /* Tell XEN vmm to change iommu settings */
   2.375 +    machine_bdf.reg = 0;
   2.376 +    machine_bdf.bus = r_bus;
   2.377 +    machine_bdf.dev = r_dev;
   2.378 +    machine_bdf.func = r_func;
   2.379 +    rc = xc_assign_device(xc_handle, domid, machine_bdf.value);
   2.380 +    if ( rc < 0 )
   2.381 +        PT_LOG("Error: xc_domain_assign_device error %d\n", rc);
   2.382 +
   2.383 +    /* Initialize virtualized PCI configuration (Extended 256 Bytes) */
   2.384 +    for ( i = 0; i < PCI_CONFIG_SIZE; i++ )
   2.385 +        assigned_device->dev.config[i] = pci_read_byte(pci_dev, i);
   2.386 +
   2.387 +    /* Handle real device's MMIO/PIO BARs */
   2.388 +    pt_register_regions(assigned_device);
   2.389 +    
   2.390 +    /* Bind interrupt */
   2.391 +    e_device = (assigned_device->dev.devfn >> 3) & 0x1f;
   2.392 +    e_intx = assigned_device->dev.config[0x3d]-1;
   2.393 +
   2.394 +    if ( PT_MACHINE_IRQ_AUTO == machine_irq )
   2.395 +        machine_irq = pci_dev->irq;
   2.396 +
   2.397 +    /* bind machine_irq to device */
   2.398 +    if ( 0 != machine_irq )
   2.399 +    {
   2.400 +        rc = xc_domain_bind_pt_pci_irq(xc_handle, domid, machine_irq, 0,
   2.401 +                                       e_device, e_intx);
   2.402 +        if ( rc < 0 )
   2.403 +        {
   2.404 +            /* TBD: unregister device in case of an error */
   2.405 +            PT_LOG("Error: Binding of interrupt failed! rc=%d\n", rc);
   2.406 +        }
   2.407 +    }
   2.408 +    else {
   2.409 +        /* Disable PCI intx assertion (turn on bit10 of devctl) */
   2.410 +        assigned_device->dev.config[0x05] |= 0x04;
   2.411 +        pci_write_word(pci_dev, 0x04,
   2.412 +            *(uint16_t *)(&assigned_device->dev.config[0x04]));
   2.413 +    }
   2.414 +
   2.415 +    PT_LOG("Real physical device %02x:%02x.%x registered successfuly!\n", 
   2.416 +        r_bus, r_dev, r_func);
   2.417 +
   2.418 +    return assigned_device;
   2.419 +}
   2.420 +
   2.421 +int pt_init(PCIBus *e_bus, char *direct_pci)
   2.422 +{
   2.423 +    int i;
   2.424 +    int seg, b, d, f;
   2.425 +    struct pt_dev *pt_dev;
   2.426 +    struct pci_access *pci_access;
   2.427 +
   2.428 +    /* Initialize libpci */
   2.429 +    pci_access = pci_alloc();
   2.430 +    if ( pci_access == NULL )
   2.431 +    {
   2.432 +        PT_LOG("pci_access is NULL\n");
   2.433 +        return -1;
   2.434 +    }
   2.435 +    pci_init(pci_access);
   2.436 +    pci_scan_bus(pci_access);
   2.437 +
   2.438 +    /* Assign given devices to guest */
   2.439 +    for ( i = 0; i < pci_devs(direct_pci); i++ )
   2.440 +    {
   2.441 +        /* Get next device bdf (bus, device, function) */
   2.442 +        next_bdf(direct_pci, &seg, &b, &d, &f);
   2.443 +
   2.444 +        /* Register real device with the emulated bus */
   2.445 +        pt_dev = register_real_device(e_bus, "DIRECT PCI", PT_VIRT_DEVFN_AUTO,
   2.446 +            b, d, f, PT_MACHINE_IRQ_AUTO, pci_access);
   2.447 +        if ( pt_dev == NULL )
   2.448 +        {
   2.449 +            PT_LOG("Error: Registration failed (%02x:%02x.%x)\n", b, d, f);
   2.450 +            return -1;
   2.451 +        }
   2.452 +    }
   2.453 +
   2.454 +    /* Success */
   2.455 +    return 0;
   2.456 +}
     3.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.2 +++ b/tools/ioemu/hw/pass-through.h	Wed Aug 29 00:49:54 2007 +0300
     3.3 @@ -0,0 +1,89 @@
     3.4 +/*
     3.5 + * Copyright (c) 2007, Neocleus Corporation.
     3.6 + * Copyright (c) 2007, Intel Corporation.
     3.7 + *
     3.8 + * This program is free software; you can redistribute it and/or modify it
     3.9 + * under the terms and conditions of the GNU General Public License,
    3.10 + * version 2, as published by the Free Software Foundation.
    3.11 + *
    3.12 + * This program is distributed in the hope it will be useful, but WITHOUT
    3.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    3.14 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
    3.15 + * more details.
    3.16 + *
    3.17 + * You should have received a copy of the GNU General Public License along with
    3.18 + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
    3.19 + * Place - Suite 330, Boston, MA 02111-1307 USA.
    3.20 + */
    3.21 +#ifndef __PASSTHROUGH_H__
    3.22 +#define __PASSTHROUGH_H__
    3.23 +
    3.24 +#include "vl.h"
    3.25 +#include "pci/header.h"
    3.26 +#include "pci/pci.h"
    3.27 +
    3.28 +/* Log acesss */
    3.29 +#define PT_LOGGING_ENABLED
    3.30 +
    3.31 +#ifdef PT_LOGGING_ENABLED
    3.32 +#define PT_LOG(_f, _a...)   fprintf(logfile, "%s: " _f, __func__, ##_a)
    3.33 +#else
    3.34 +#define PT_LOG(_f, _a...)
    3.35 +#endif
    3.36 +
    3.37 +/* Some compilation flags */
    3.38 +// #define PT_DEBUG_PCI_CONFIG_ACCESS
    3.39 +
    3.40 +#define PT_MACHINE_IRQ_AUTO (0xFFFFFFFF)
    3.41 +#define PT_VIRT_DEVFN_AUTO  (-1)
    3.42 +
    3.43 +/* Misc PCI constants that should be moved to a separate library :) */
    3.44 +#define PCI_CONFIG_SIZE         (256)
    3.45 +#define PCI_EXP_DEVCAP_FLR      (1 << 28)
    3.46 +#define PCI_EXP_DEVCTL_FLR      (0x1b)
    3.47 +#define PCI_BAR_ENTRIES         (6)
    3.48 +
    3.49 +struct pt_region {
    3.50 +    /* Virtual phys base & size */
    3.51 +    uint32_t e_physbase;
    3.52 +    uint32_t e_size;
    3.53 +    /* Index of region in qemu */
    3.54 +    uint32_t memory_index;
    3.55 +    /* Translation of the emulated address */
    3.56 +    union {
    3.57 +        uint32_t maddr;
    3.58 +        uint32_t pio_base;
    3.59 +        uint32_t u;
    3.60 +    } access;
    3.61 +};
    3.62 +
    3.63 +/*
    3.64 +    This structure holds the context of the mapping functions
    3.65 +    and data that is relevant for qemu device management.
    3.66 +*/
    3.67 +struct pt_dev {
    3.68 +    PCIDevice dev;
    3.69 +    struct pci_dev *pci_dev;                     /* libpci struct */
    3.70 +    struct pt_region bases[PCI_NUM_REGIONS];    /* Access regions */
    3.71 +};
    3.72 +
    3.73 +/* Used for formatting PCI BDF into cf8 format */
    3.74 +struct pci_config_cf8 {
    3.75 +    union {
    3.76 +        unsigned int value;
    3.77 +        struct {
    3.78 +            unsigned int reserved1:2;
    3.79 +            unsigned int reg:6;
    3.80 +            unsigned int func:3;
    3.81 +            unsigned int dev:5;
    3.82 +            unsigned int bus:8;
    3.83 +            unsigned int reserved2:7;
    3.84 +            unsigned int enable:1;
    3.85 +        };
    3.86 +    };
    3.87 +};
    3.88 +
    3.89 +int pt_init(PCIBus * e_bus, char * direct_pci);
    3.90 +
    3.91 +#endif /* __PASSTHROUGH_H__ */
    3.92 +
     4.1 --- a/tools/ioemu/hw/pc.c	Wed Aug 29 00:48:01 2007 +0300
     4.2 +++ b/tools/ioemu/hw/pc.c	Wed Aug 29 00:49:54 2007 +0300
     4.3 @@ -465,7 +465,7 @@ static void pc_init1(uint64_t ram_size, 
     4.4                       DisplayState *ds, const char **fd_filename, int snapshot,
     4.5                       const char *kernel_filename, const char *kernel_cmdline,
     4.6                       const char *initrd_filename,
     4.7 -                     int pci_enabled)
     4.8 +                     int pci_enabled, const char *direct_pci)
     4.9  {
    4.10  #ifndef NOBIOS
    4.11      char buf[1024];
    4.12 @@ -480,6 +480,7 @@ static void pc_init1(uint64_t ram_size, 
    4.13      int piix3_devfn = -1;
    4.14      CPUState *env;
    4.15      NICInfo *nd;
    4.16 +    int rc;
    4.17  
    4.18      linux_boot = (kernel_filename != NULL);
    4.19  
    4.20 @@ -665,6 +666,17 @@ static void pc_init1(uint64_t ram_size, 
    4.21          }
    4.22      }
    4.23  
    4.24 +    /* Pass-through Initialization */
    4.25 +    if ( pci_enabled && direct_pci )
    4.26 +    {
    4.27 +        rc = pt_init(pci_bus, direct_pci); 
    4.28 +        if ( rc < 0 )
    4.29 +        {
    4.30 +            fprintf(logfile, "Error: Initialization failed for pass-through devices\n");
    4.31 +            exit(1);
    4.32 +        }
    4.33 +    }
    4.34 +
    4.35      rtc_state = rtc_init(0x70, 8);
    4.36  
    4.37      register_ioport_read(0x92, 1, 1, ioport92_read, NULL);
    4.38 @@ -801,12 +813,14 @@ static void pc_init_pci(uint64_t ram_siz
    4.39                          int snapshot, 
    4.40                          const char *kernel_filename, 
    4.41                          const char *kernel_cmdline,
    4.42 -                        const char *initrd_filename)
    4.43 +                        const char *initrd_filename,
    4.44 +                        const char *direct_pci)
    4.45  {
    4.46      pc_init1(ram_size, vga_ram_size, boot_device,
    4.47               ds, fd_filename, snapshot,
    4.48               kernel_filename, kernel_cmdline,
    4.49 -             initrd_filename, 1);
    4.50 +             initrd_filename, 1,
    4.51 +             direct_pci);
    4.52  }
    4.53  
    4.54  static void pc_init_isa(uint64_t ram_size, int vga_ram_size, char *boot_device,
    4.55 @@ -814,12 +828,13 @@ static void pc_init_isa(uint64_t ram_siz
    4.56                          int snapshot, 
    4.57                          const char *kernel_filename, 
    4.58                          const char *kernel_cmdline,
    4.59 -                        const char *initrd_filename)
    4.60 +                        const char *initrd_filename,
    4.61 +                        const char *unused)
    4.62  {
    4.63      pc_init1(ram_size, vga_ram_size, boot_device,
    4.64               ds, fd_filename, snapshot,
    4.65               kernel_filename, kernel_cmdline,
    4.66 -             initrd_filename, 0);
    4.67 +             initrd_filename, 0, NULL);
    4.68  }
    4.69  
    4.70  QEMUMachine pc_machine = {
     5.1 --- a/tools/ioemu/vl.c	Wed Aug 29 00:48:01 2007 +0300
     5.2 +++ b/tools/ioemu/vl.c	Wed Aug 29 00:49:54 2007 +0300
     5.3 @@ -133,6 +133,7 @@ static DisplayState display_state;
     5.4  int nographic;
     5.5  int vncviewer;
     5.6  int vncunused;
     5.7 +int is_nativedom;
     5.8  const char* keyboard_layout = NULL;
     5.9  int64_t ticks_per_sec;
    5.10  char *boot_device = NULL;
    5.11 @@ -6497,6 +6498,9 @@ enum {
    5.12      QEMU_OPTION_acpi,
    5.13      QEMU_OPTION_vncviewer,
    5.14      QEMU_OPTION_vncunused,
    5.15 +
    5.16 +    QEMU_OPTION_nativedom,
    5.17 +    QEMU_OPTION_pci,
    5.18  };
    5.19  
    5.20  typedef struct QEMUOption {
    5.21 @@ -6594,6 +6598,10 @@ const QEMUOption qemu_options[] = {
    5.22      { "d", HAS_ARG, QEMU_OPTION_d },
    5.23      { "vcpus", 1, QEMU_OPTION_vcpus },
    5.24      { "acpi", 0, QEMU_OPTION_acpi },
    5.25 +
    5.26 +    { "nativedom", HAS_ARG, QEMU_OPTION_nativedom },
    5.27 +    { "pci", HAS_ARG, QEMU_OPTION_pci},
    5.28 +    
    5.29      { NULL },
    5.30  };
    5.31  
    5.32 @@ -7059,7 +7067,8 @@ int main(int argc, char **argv)
    5.33  #endif
    5.34  
    5.35      char qemu_dm_logfilename[128];
    5.36 -    
    5.37 +    const char *direct_pci = NULL;
    5.38 +
    5.39      /* Ensure that SIGUSR2 is blocked by default when a new thread is created,
    5.40         then only the threads that use the signal unblock it -- this fixes a
    5.41         race condition in Qcow support where the AIO signal is misdelivered.  */
    5.42 @@ -7557,6 +7566,12 @@ int main(int argc, char **argv)
    5.43              case QEMU_OPTION_vncunused:
    5.44                  vncunused++;
    5.45                  break;
    5.46 +            case QEMU_OPTION_nativedom:
    5.47 +                is_nativedom=1;
    5.48 +                break;
    5.49 +            case QEMU_OPTION_pci:
    5.50 +                direct_pci = optarg;
    5.51 +                break;
    5.52              }
    5.53          }
    5.54      }
    5.55 @@ -7614,6 +7629,21 @@ int main(int argc, char **argv)
    5.56  #endif
    5.57  
    5.58  #ifdef CONFIG_DM
    5.59 +    xc_handle = xc_interface_open();
    5.60 +
    5.61 +    /* Compute the RAM size of nativedom */    
    5.62 +    if ( is_nativedom )
    5.63 +    {
    5.64 +        if ( !xc_is_nativedom_enabled(xc_handle) )
    5.65 +        {
    5.66 +            fprintf(logfile, "Error: NativeDom is not enabled. Use the enable_nativedom=1 boot parameter\n");
    5.67 +            exit(1);
    5.68 +        }
    5.69 +
    5.70 +        /* Recompute the size of RAM */
    5.71 +        ram_size = xc_get_nativedom_last_mfn(xc_handle) << 12;
    5.72 +    }
    5.73 +
    5.74      bdrv_init();
    5.75      xenstore_parse_domain_config(domid);
    5.76  #endif /* CONFIG_DM */
    5.77 @@ -7712,8 +7742,6 @@ int main(int argc, char **argv)
    5.78  
    5.79  #ifdef CONFIG_DM
    5.80  
    5.81 -    xc_handle = xc_interface_open();
    5.82 -
    5.83  #if defined(__i386__) || defined(__x86_64__)
    5.84  
    5.85      if (qemu_map_cache_init()) {
    5.86 @@ -7923,7 +7951,8 @@ int main(int argc, char **argv)
    5.87  
    5.88      machine->init(ram_size, vga_ram_size, boot_device,
    5.89                    ds, fd_filename, snapshot,
    5.90 -                  kernel_filename, kernel_cmdline, initrd_filename);
    5.91 +                  kernel_filename, kernel_cmdline, initrd_filename,
    5.92 +                  direct_pci);
    5.93      free(boot_device);
    5.94  
    5.95      /* init USB devices */
     6.1 --- a/tools/ioemu/vl.h	Wed Aug 29 00:48:01 2007 +0300
     6.2 +++ b/tools/ioemu/vl.h	Wed Aug 29 00:49:54 2007 +0300
     6.3 @@ -727,7 +727,7 @@ typedef void QEMUMachineInitFunc(uint64_
     6.4                                   char *boot_device,
     6.5               DisplayState *ds, const char **fd_filename, int snapshot,
     6.6               const char *kernel_filename, const char *kernel_cmdline,
     6.7 -             const char *initrd_filename);
     6.8 +             const char *initrd_filename, const char *direct_pci);
     6.9  
    6.10  typedef struct QEMUMachine {
    6.11      const char *name;