From e7911109f4321e9ba0cc56a253b653600aa46bea Mon Sep 17 00:00:00 2001 From: Ian Jackson Date: Wed, 31 Dec 2008 16:00:20 +0000 Subject: [PATCH] disable qemu PCI devices in HVM domains Magic ioport (0x10) protocol for negotating with guest PV drivers during startup, and allowing PV drivers to disable hardware emulations thus preventing guest from seeing the same device through two paths. Protocol and implementation from the Citrix Xenserver product line. Documentation (protocol spec) will follow in a moment. Contributed-By: Steven Smith Signed-off-by: Ian Jackson --- block-raw-posix.c | 5 ++ hw/ide.c | 48 ++++++++++ hw/pc.h | 2 + hw/pci.c | 43 +++++++++ hw/xen_platform.c | 219 ++++++++++++++++++++++++++++++++++++++++++++++ i386-dm/exec-dm.c | 13 ++- qemu-xen.h | 7 +- vl.c | 59 ++++++++++--- xenstore.c | 28 ++++++ 9 files changed, 408 insertions(+), 16 deletions(-) diff --git a/block-raw-posix.c b/block-raw-posix.c index 044d1f485..f705c51cf 100644 --- a/block-raw-posix.c +++ b/block-raw-posix.c @@ -51,6 +51,7 @@ #include #include #include +#include #endif #ifdef __FreeBSD__ #include @@ -792,6 +793,10 @@ static void raw_close(BlockDriverState *bs) { BDRVRawState *s = bs->opaque; if (s->fd >= 0) { +#ifndef CONFIG_STUBDOM + /* Invalidate buffer cache for this device. */ + ioctl(s->fd, BLKFLSBUF, 0); +#endif close(s->fd); s->fd = -1; if (s->aligned_buf != NULL) diff --git a/hw/ide.c b/hw/ide.c index bf97d6917..da7057cab 100644 --- a/hw/ide.c +++ b/hw/ide.c @@ -501,6 +501,7 @@ typedef struct PCIIDEState { int type; /* see IDE_TYPE_xxx */ } PCIIDEState; +static PCIIDEState *principal_ide_controller; #if defined(__ia64__) #include @@ -2906,6 +2907,47 @@ static void ide_reset(IDEState *s) s->media_changed = 0; } +/* Unplug all of the IDE hard disks, starting at index @start in the + table. */ +static void _ide_unplug_harddisks(int start) +{ + IDEState *s; + int i, j; + + if (!principal_ide_controller) { + fprintf(stderr, "No principal controller?\n"); + return; + } + for (i = start; i < 4; i++) { + s = principal_ide_controller->ide_if + i; + if (!s->bs) + continue; /* drive not present */ + if (s->is_cdrom) + continue; /* cdrom */ + /* Is a hard disk, unplug it. */ + for (j = 0; j < nb_drives; j++) + if (drives_table[j].bdrv == s->bs) + drives_table[j].bdrv = NULL; + bdrv_close(s->bs); + s->bs = NULL; + ide_reset(s); + } +} + +/* Unplug all hard disks except for the primary master (which will + almost always be the boot device). */ +void ide_unplug_aux_harddisks(void) +{ + _ide_unplug_harddisks(1); +} + +/* Unplug all hard disks, including the boot device. */ +void ide_unplug_harddisks(void) +{ + _ide_unplug_harddisks(0); +} + + struct partition { uint8_t boot_ind; /* 0x80 - active */ uint8_t head; /* starting head */ @@ -3423,6 +3465,9 @@ void pci_cmd646_ide_init(PCIBus *bus, BlockDriverState **hd_table, sizeof(PCIIDEState), -1, NULL, NULL); + if (principal_ide_controller) + abort(); + principal_ide_controller = d; d->type = IDE_TYPE_CMD646; pci_conf = d->dev.config; pci_conf[0x00] = 0x95; // CMD646 @@ -3557,6 +3602,9 @@ void pci_piix3_ide_init(PCIBus *bus, BlockDriverState **hd_table, int devfn, sizeof(PCIIDEState), devfn, NULL, NULL); + if (principal_ide_controller) + abort(); + principal_ide_controller = d; d->type = IDE_TYPE_PIIX3; pci_conf = d->dev.config; diff --git a/hw/pc.h b/hw/pc.h index 02b70188a..455306d2b 100644 --- a/hw/pc.h +++ b/hw/pc.h @@ -150,6 +150,8 @@ void pci_piix3_ide_init(PCIBus *bus, BlockDriverState **hd_table, int devfn, qemu_irq *pic); void pci_piix4_ide_init(PCIBus *bus, BlockDriverState **hd_table, int devfn, qemu_irq *pic); +void ide_unplug_harddisks(void); +void ide_unplug_aux_harddisks(void); /* ne2000.c */ diff --git a/hw/pci.c b/hw/pci.c index 2d4099b32..e72c6694c 100644 --- a/hw/pci.c +++ b/hw/pci.c @@ -26,6 +26,9 @@ #include "console.h" #include "net.h" +#include "exec-all.h" +#include "qemu-xen.h" + //#define DEBUG_PCI struct PCIBus { @@ -648,6 +651,46 @@ void pci_nic_init(PCIBus *bus, NICInfo *nd, int devfn) } } +void pci_unplug_netifs(void) +{ + PCIBus *bus; + PCIDevice *dev; + PCIIORegion *region; + int x; + int i; + + /* We only support one PCI bus */ + for (bus = first_bus; bus; bus = NULL) { + for (x = 0; x < 256; x++) { + dev = bus->devices[x]; + if (dev && + dev->config[0xa] == 0 && + dev->config[0xb] == 2) { + /* Found a netif. Remove it from the bus. Note that + we don't free it here, since there could still be + references to it floating around. There are only + ever one or two structures leaked, and it's not + worth finding them all. */ + bus->devices[x] = NULL; + for (i = 0; i < PCI_NUM_REGIONS; i++) { + region = &dev->io_regions[i]; + if (region->addr == (uint32_t)-1 || + region->size == 0) + continue; + fprintf(logfile, "region type %d at [%x,%x).\n", + region->type, region->addr, + region->addr+region->size); + if (region->type == PCI_ADDRESS_SPACE_IO) { + isa_unassign_ioport(region->addr, region->size); + } else if (region->type == PCI_ADDRESS_SPACE_MEM) { + unregister_iomem(region->addr); + } + } + } + } + } +} + typedef struct { PCIDevice dev; PCIBus *bus; diff --git a/hw/xen_platform.c b/hw/xen_platform.c index 937b802c6..84a2b1917 100644 --- a/hw/xen_platform.c +++ b/hw/xen_platform.c @@ -24,13 +24,20 @@ */ #include "hw.h" +#include "pc.h" #include "pci.h" #include "irq.h" #include "qemu-xen.h" +#include #include +static int drivers_blacklisted; +static uint16_t driver_product_version; +static int throttling_disabled; extern FILE *logfile; +static char log_buffer[4096]; +static int log_buffer_off; #define PFFLAG_ROM_LOCK 1 /* Sets whether ROM memory area is RW or RO */ @@ -41,6 +48,88 @@ typedef struct PCIXenPlatformState uint64_t vga_stolen_ram; } PCIXenPlatformState; +/* We throttle access to dom0 syslog, to avoid DOS attacks. This is + modelled as a token bucket, with one token for every byte of log. + The bucket size is 128KB (->1024 lines of 128 bytes each) and + refills at 256B/s. It starts full. The guest is blocked if no + tokens are available when it tries to generate a log message. */ +#define BUCKET_MAX_SIZE (128*1024) +#define BUCKET_FILL_RATE 256 + +static void throttle(unsigned count) +{ + static unsigned available; + static struct timespec last_refil; + static int started; + static int warned; + + struct timespec waiting_for, now; + double delay; + struct timespec ts; + + if (throttling_disabled) + return; + + if (!started) { + clock_gettime(CLOCK_MONOTONIC, &last_refil); + available = BUCKET_MAX_SIZE; + started = 1; + } + + if (count > BUCKET_MAX_SIZE) { + fprintf(logfile, "tried to get %d tokens, but bucket size is %d\n", + BUCKET_MAX_SIZE, count); + exit(1); + } + + if (available < count) { + /* The bucket is empty. Refil it */ + + /* When will it be full enough to handle this request? */ + delay = (double)(count - available) / BUCKET_FILL_RATE; + waiting_for = last_refil; + waiting_for.tv_sec += delay; + waiting_for.tv_nsec += (delay - (int)delay) * 1e9; + if (waiting_for.tv_nsec >= 1000000000) { + waiting_for.tv_nsec -= 1000000000; + waiting_for.tv_sec++; + } + + /* How long do we have to wait? (might be negative) */ + clock_gettime(CLOCK_MONOTONIC, &now); + ts.tv_sec = waiting_for.tv_sec - now.tv_sec; + ts.tv_nsec = waiting_for.tv_nsec - now.tv_nsec; + if (ts.tv_nsec < 0) { + ts.tv_sec--; + ts.tv_nsec += 1000000000; + } + + /* Wait for it. */ + if (ts.tv_sec > 0 || + (ts.tv_sec == 0 && ts.tv_nsec > 0)) { + if (!warned) { + fprintf(logfile, "throttling guest access to syslog"); + warned = 1; + } + while (nanosleep(&ts, &ts) < 0 && errno == EINTR) + ; + } + + /* Refil */ + clock_gettime(CLOCK_MONOTONIC, &now); + delay = (now.tv_sec - last_refil.tv_sec) + + (now.tv_nsec - last_refil.tv_nsec) * 1.0e-9; + available += BUCKET_FILL_RATE * delay; + if (available > BUCKET_MAX_SIZE) + available = BUCKET_MAX_SIZE; + last_refil = now; + } + + assert(available >= count); + + available -= count; +} + static uint32_t xen_platform_ioport_readb(void *opaque, uint32_t addr) { PCIXenPlatformState *s = opaque; @@ -68,6 +157,19 @@ static void xen_platform_ioport_writeb(void *opaque, uint32_t addr, uint32_t val d->platform_flags = val & PFFLAG_ROM_LOCK; break; } + case 8: + { + if (val == '\n' || log_buffer_off == sizeof(log_buffer) - 1) { + /* Flush buffer */ + log_buffer[log_buffer_off] = 0; + throttle(log_buffer_off); + fprintf(logfile, "%s\n", log_buffer); + log_buffer_off = 0; + break; + } + log_buffer[log_buffer_off++] = val; + } + break; default: break; } @@ -163,6 +265,113 @@ static void platform_mmio_map(PCIDevice *d, int region_num, cpu_register_physical_memory(addr, 0x1000000, mmio_io_addr); } +#define UNPLUG_ALL_IDE_DISKS 1 +#define UNPLUG_ALL_NICS 2 +#define UNPLUG_AUX_IDE_DISKS 4 + +static void platform_fixed_ioport_write2(void *opaque, uint32_t addr, uint32_t val) +{ + switch (addr - 0x10) { + case 0: + /* Unplug devices. Value is a bitmask of which devices to + unplug, with bit 0 the IDE devices, bit 1 the network + devices, and bit 2 the non-primary-master IDE devices. */ + if (val & UNPLUG_ALL_IDE_DISKS) + ide_unplug_harddisks(); + if (val & UNPLUG_ALL_NICS) { + pci_unplug_netifs(); + net_tap_shutdown_all(); + } + if (val & UNPLUG_AUX_IDE_DISKS) { + ide_unplug_aux_harddisks(); + } + break; + case 2: + switch (val) { + case 1: + fprintf(logfile, "Citrix Windows PV drivers loaded in guest\n"); + break; + case 0: + fprintf(logfile, "Guest claimed to be running PV product 0?\n"); + break; + default: + fprintf(logfile, "Unknown PV product %d loaded in guest\n", val); + break; + } + driver_product_version = val; + break; + } +} + +static void platform_fixed_ioport_write4(void *opaque, uint32_t addr, + uint32_t val) +{ + switch (addr - 0x10) { + case 0: + /* PV driver version */ + if (driver_product_version == 0) { + fprintf(logfile, + "Drivers tried to set their version number (%d) before setting the product number?\n", + val); + return; + } + fprintf(logfile, "PV driver build %d\n", val); + if (xenstore_pv_driver_build_blacklisted(driver_product_version, + val)) { + fprintf(logfile, "Drivers are blacklisted!\n"); + drivers_blacklisted = 1; + } + break; + } +} + + +static void platform_fixed_ioport_write1(void *opaque, uint32_t addr, uint32_t val) +{ + switch (addr - 0x10) { + case 2: + /* Send bytes to syslog */ + if (val == '\n' || log_buffer_off == sizeof(log_buffer) - 1) { + /* Flush buffer */ + log_buffer[log_buffer_off] = 0; + throttle(log_buffer_off); + fprintf(logfile, "%s\n", log_buffer); + log_buffer_off = 0; + break; + } + log_buffer[log_buffer_off++] = val; + break; + } +} + +static uint32_t platform_fixed_ioport_read2(void *opaque, uint32_t addr) +{ + switch (addr - 0x10) { + case 0: + if (drivers_blacklisted) { + /* The drivers will recognise this magic number and refuse + * to do anything. */ + return 0xd249; + } else { + /* Magic value so that you can identify the interface. */ + return 0x49d2; + } + default: + return 0xffff; + } +} + +static uint32_t platform_fixed_ioport_read1(void *opaque, uint32_t addr) +{ + switch (addr - 0x10) { + case 2: + /* Version number */ + return 1; + default: + return 0xff; + } +} + struct pci_config_header { uint16_t vendor_id; uint16_t device_id; @@ -224,6 +433,7 @@ void pci_xen_platform_init(PCIBus *bus) { PCIXenPlatformState *d; struct pci_config_header *pch; + struct stat stbuf; printf("Register xen platform.\n"); d = (PCIXenPlatformState *)pci_register_device( @@ -255,4 +465,13 @@ void pci_xen_platform_init(PCIBus *bus) register_savevm("platform", 0, 2, xen_pci_save, xen_pci_load, d); printf("Done register platform.\n"); + register_ioport_write(0x10, 16, 4, platform_fixed_ioport_write4, NULL); + register_ioport_write(0x10, 16, 2, platform_fixed_ioport_write2, NULL); + register_ioport_write(0x10, 16, 1, platform_fixed_ioport_write1, NULL); + register_ioport_read(0x10, 16, 2, platform_fixed_ioport_read2, NULL); + register_ioport_read(0x10, 16, 1, platform_fixed_ioport_read1, NULL); + + if (stat("/etc/disable-guest-log-throttle", &stbuf) == 0) + throttling_disabled = 1; + } diff --git a/i386-dm/exec-dm.c b/i386-dm/exec-dm.c index 6dbd4607a..20ac93c0d 100644 --- a/i386-dm/exec-dm.c +++ b/i386-dm/exec-dm.c @@ -272,7 +272,7 @@ void cpu_abort(CPUState *env, const char *fmt, ...) /* XXX: Simple implementation. Fix later */ #define MAX_MMIO 32 -struct mmio_space { +static struct mmio_space { target_phys_addr_t start; unsigned long size; unsigned long io_index; @@ -418,6 +418,17 @@ int iomem_index(target_phys_addr_t addr) return 0; } +void unregister_iomem(target_phys_addr_t start) +{ + int index = iomem_index(start); + if (index) { + fprintf(logfile, "squash iomem [%lx, %lx).\n", mmio[index].start, + mmio[index].start + mmio[index].size); + mmio[index].start = mmio[index].size = 0; + } +} + + #if defined(__i386__) || defined(__x86_64__) #define phys_ram_addr(x) (qemu_map_cache(x)) #elif defined(__ia64__) diff --git a/qemu-xen.h b/qemu-xen.h index 8375b7933..44467a3fd 100644 --- a/qemu-xen.h +++ b/qemu-xen.h @@ -26,8 +26,11 @@ void xen_vga_populate_vram(uint64_t vram_addr); void xen_vga_vram_map(uint64_t vram_addr, int copy); #endif - +void ide_unplug_harddisks(void); +void net_tap_shutdown_all(void); +void pci_unplug_netifs(void); void destroy_hvm_domain(void); +void unregister_iomem(target_phys_addr_t start); #ifdef __ia64__ static inline void xc_domain_shutdown_hook(int xc_handle, uint32_t domid) @@ -88,6 +91,8 @@ char *xenstore_vm_read(int domid, const char *key, unsigned int *len); char *xenstore_device_model_read(int domid, char *key, unsigned int *len); char *xenstore_read_battery_data(int battery_status); int xenstore_refresh_battery_status(void); +int xenstore_pv_driver_build_blacklisted(uint16_t product_number, + uint32_t build_nr); /* xenfbfront.c */ int xenfb_pv_display_init(DisplayState *ds); diff --git a/vl.c b/vl.c index ac38c2abe..ae9170d83 100644 --- a/vl.c +++ b/vl.c @@ -275,6 +275,20 @@ uint8_t qemu_uuid[16]; #include "xen-vl-extra.c" +typedef struct IOHandlerRecord { + int fd; + IOCanRWHandler *fd_read_poll; + IOHandler *fd_read; + IOHandler *fd_write; + int deleted; + void *opaque; + /* temporary data */ + struct pollfd *ufd; + struct IOHandlerRecord *next; +} IOHandlerRecord; + +static IOHandlerRecord *first_io_handler; + /***********************************************************/ /* x86 ISA bus support */ @@ -4355,6 +4369,7 @@ void do_info_slirp(void) typedef struct TAPState { VLANClientState *vc; int fd; + struct TAPState *next; char down_script[1024]; char script_arg[1024]; } TAPState; @@ -4392,6 +4407,34 @@ static void tap_send(void *opaque) } } +static TAPState *head_net_tap; + +void net_tap_shutdown_all(void) +{ + struct IOHandlerRecord **pioh, *ioh; + + while (head_net_tap) { + pioh = &first_io_handler; + for (;;) { + ioh = *pioh; + if (ioh == NULL) + break; + if (ioh->fd == head_net_tap->fd) { + *pioh = ioh->next; + qemu_free(ioh); + break; + } + pioh = &ioh->next; + } + if (!ioh) + fprintf(stderr, + "warning: can't find iohandler for %d to close it properly.\n", + head_net_tap->fd); + close(head_net_tap->fd); + head_net_tap = head_net_tap->next; + } +} + /* fd support */ static TAPState *net_tap_fd_init(VLANState *vlan, int fd) @@ -4403,6 +4446,8 @@ static TAPState *net_tap_fd_init(VLANState *vlan, int fd) return NULL; s->fd = fd; s->vc = qemu_new_vlan_client(vlan, tap_receive, NULL, s); + s->next = head_net_tap; + head_net_tap = s; qemu_set_fd_handler(s->fd, tap_send, NULL, s); snprintf(s->vc->info_str, sizeof(s->vc->info_str), "tap: fd=%d", fd); return s; @@ -6133,20 +6178,6 @@ static void dumb_display_init(DisplayState *ds) #define MAX_IO_HANDLERS 64 -typedef struct IOHandlerRecord { - int fd; - IOCanRWHandler *fd_read_poll; - IOHandler *fd_read; - IOHandler *fd_write; - int deleted; - void *opaque; - /* temporary data */ - struct pollfd *ufd; - struct IOHandlerRecord *next; -} IOHandlerRecord; - -static IOHandlerRecord *first_io_handler; - /* XXX: fd_read_poll should be suppressed, but an API change is necessary in the character devices to suppress fd_can_read(). */ int qemu_set_fd_handler2(int fd, diff --git a/xenstore.c b/xenstore.c index 86e8b639b..e57fd6658 100644 --- a/xenstore.c +++ b/xenstore.c @@ -778,6 +778,34 @@ void xenstore_record_dm(char *subpath, char *state) free(path); } +int +xenstore_pv_driver_build_blacklisted(uint16_t product_nr, + uint32_t build_nr) +{ + char *buf = NULL; + char *tmp; + const char *product; + + switch (product_nr) { + case 1: + product = "xensource-windows"; + break; + default: + /* Don't know what product this is -> we can't blacklist + * it. */ + return 0; + } + if (asprintf(&buf, "/mh/driver-blacklist/%s/%d", product, build_nr) < 0) + return 0; + tmp = xs_read(xsh, XBT_NULL, buf, NULL); + free(tmp); + free(buf); + if (tmp == NULL) + return 0; + else + return 1; +} + void xenstore_record_dm_state(char *state) { xenstore_record_dm("state", state); -- 2.39.5