ia64/xen-unstable

changeset 14748:1a347b19142a

PV-on-HVM: Add new ioreq 'invalidate' for zapping ioemu-dm mapccahe
after balloon operations in an HVM guest.

This removes the I/O port hack from the guest OS, and from ioemu.

Also we flush on reservation *increases* as well as decreases. This is
necessary until qemu-dm can demand-fault page mappings into existing
valid buckets.

Signed-off-by: Steven Hand <steven@xensource.com>
Signed-off-by: Keir Fraser <keir@xensource.com>
author kfraser@localhost.localdomain
date Thu Apr 05 14:29:18 2007 +0100 (2007-04-05)
parents c29a4adc65c6
children 40d4150764ad
files linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c tools/ioemu/hw/xen_platform.c tools/ioemu/target-i386-dm/helper2.c unmodified_drivers/linux-2.6/platform-pci/platform-pci.c xen/arch/x86/hvm/hvm.c xen/arch/x86/hvm/io.c xen/arch/x86/hvm/platform.c xen/arch/x86/hvm/svm/svm.c xen/arch/x86/hvm/vmx/vmx.c xen/include/asm-x86/hvm/io.h xen/include/asm-x86/hvm/support.h xen/include/public/hvm/ioreq.h
line diff
     1.1 --- a/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c	Thu Apr 05 14:02:55 2007 +0100
     1.2 +++ b/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c	Thu Apr 05 14:29:18 2007 +0100
     1.3 @@ -306,14 +306,6 @@ static int decrease_reservation(unsigned
     1.4  		balloon_append(pfn_to_page(pfn));
     1.5  	}
     1.6  
     1.7 -#ifndef CONFIG_XEN
     1.8 -	/* XXX Temporary hack. */
     1.9 -	{
    1.10 -		extern void xen_invalidate_foreign_mappings(void);
    1.11 -		xen_invalidate_foreign_mappings(); 
    1.12 -	}
    1.13 -#endif
    1.14 -
    1.15  	set_xen_guest_handle(reservation.extent_start, frame_list);
    1.16  	reservation.nr_extents   = nr_pages;
    1.17  	ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
     2.1 --- a/tools/ioemu/hw/xen_platform.c	Thu Apr 05 14:02:55 2007 +0100
     2.2 +++ b/tools/ioemu/hw/xen_platform.c	Thu Apr 05 14:29:18 2007 +0100
     2.3 @@ -29,16 +29,10 @@
     2.4  
     2.5  extern FILE *logfile;
     2.6  
     2.7 -static void platform_ioport_write(void *opaque, uint32_t addr, uint32_t val)
     2.8 -{
     2.9 -    if (val == 0)
    2.10 -        qemu_invalidate_map_cache();
    2.11 -}
    2.12 -
    2.13  static void platform_ioport_map(PCIDevice *pci_dev, int region_num,
    2.14                                  uint32_t addr, uint32_t size, int type)
    2.15  {
    2.16 -    register_ioport_write(addr, 1, 1, platform_ioport_write, NULL);
    2.17 +    /* nothing yet */
    2.18  }
    2.19  
    2.20  static uint32_t platform_mmio_read(void *opaque, target_phys_addr_t addr)
     3.1 --- a/tools/ioemu/target-i386-dm/helper2.c	Thu Apr 05 14:02:55 2007 +0100
     3.2 +++ b/tools/ioemu/target-i386-dm/helper2.c	Thu Apr 05 14:29:18 2007 +0100
     3.3 @@ -506,8 +506,11 @@ void __handle_ioreq(CPUState *env, ioreq
     3.4          cpu_ioreq_xchg(env, req);
     3.5          break;
     3.6      case IOREQ_TYPE_TIMEOFFSET:
     3.7 -	cpu_ioreq_timeoffset(env, req);
     3.8 -	break;
     3.9 +        cpu_ioreq_timeoffset(env, req);
    3.10 +        break;
    3.11 +    case IOREQ_TYPE_INVALIDATE:
    3.12 +        qemu_invalidate_map_cache();
    3.13 +        break;
    3.14      default:
    3.15          hw_error("Invalid ioreq type 0x%x\n", req->type);
    3.16      }
     4.1 --- a/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c	Thu Apr 05 14:02:55 2007 +0100
     4.2 +++ b/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c	Thu Apr 05 14:29:18 2007 +0100
     4.3 @@ -208,14 +208,6 @@ static uint64_t get_callback_via(struct 
     4.4  		((uint64_t)(pin - 1) & 3));
     4.5  }
     4.6  
     4.7 -/* Invalidate foreign mappings (e.g., in qemu-based device model). */
     4.8 -static uint16_t invlmap_port;
     4.9 -void xen_invalidate_foreign_mappings(void)
    4.10 -{
    4.11 -	outb(0, invlmap_port);
    4.12 -}
    4.13 -EXPORT_SYMBOL(xen_invalidate_foreign_mappings);
    4.14 -
    4.15  static int __devinit platform_pci_init(struct pci_dev *pdev,
    4.16  				       const struct pci_device_id *ent)
    4.17  {
    4.18 @@ -240,8 +232,6 @@ static int __devinit platform_pci_init(s
    4.19  		return -ENOENT;
    4.20  	}
    4.21  
    4.22 -	invlmap_port = ioaddr;
    4.23 -
    4.24  	if (request_mem_region(mmio_addr, mmio_len, DRV_NAME) == NULL)
    4.25  	{
    4.26  		printk(KERN_ERR ":MEM I/O resource 0x%lx @ 0x%lx busy\n",
     5.1 --- a/xen/arch/x86/hvm/hvm.c	Thu Apr 05 14:02:55 2007 +0100
     5.2 +++ b/xen/arch/x86/hvm/hvm.c	Thu Apr 05 14:29:18 2007 +0100
     5.3 @@ -521,32 +521,19 @@ static hvm_hypercall_t *hvm_hypercall_ta
     5.4      HYPERCALL(hvm_op)
     5.5  };
     5.6  
     5.7 -int hvm_do_hypercall(struct cpu_user_regs *pregs)
     5.8 +static void __hvm_do_hypercall(struct cpu_user_regs *pregs)
     5.9  {
    5.10 -    if ( unlikely(ring_3(pregs)) )
    5.11 -    {
    5.12 -        pregs->eax = -EPERM;
    5.13 -        return 0;
    5.14 -    }
    5.15 -
    5.16      if ( (pregs->eax >= NR_hypercalls) || !hvm_hypercall_table[pregs->eax] )
    5.17      {
    5.18          if ( pregs->eax != __HYPERVISOR_grant_table_op )
    5.19              gdprintk(XENLOG_WARNING, "HVM vcpu %d:%d bad hypercall %d.\n",
    5.20                       current->domain->domain_id, current->vcpu_id, pregs->eax);
    5.21          pregs->eax = -ENOSYS;
    5.22 -        return 0;
    5.23 +        return;
    5.24      }
    5.25  
    5.26 -    /* Check for preemption: EIP will be modified from this dummy value. */
    5.27 -    pregs->eip = 0xF0F0F0FF;
    5.28 -
    5.29      pregs->eax = hvm_hypercall_table[pregs->eax](
    5.30          pregs->ebx, pregs->ecx, pregs->edx, pregs->esi, pregs->edi);
    5.31 -
    5.32 -    /* XXX: put fake IO instr here to inform the emulator to flush mapcache */
    5.33 -
    5.34 -    return (pregs->eip != 0xF0F0F0FF); /* preempted? */
    5.35  }
    5.36  
    5.37  #else /* defined(__x86_64__) */
    5.38 @@ -606,14 +593,8 @@ static hvm_hypercall_t *hvm_hypercall32_
    5.39      HYPERCALL(event_channel_op)
    5.40  };
    5.41  
    5.42 -int hvm_do_hypercall(struct cpu_user_regs *pregs)
    5.43 +static void __hvm_do_hypercall(struct cpu_user_regs *pregs)
    5.44  {
    5.45 -    if ( unlikely(ring_3(pregs)) )
    5.46 -    {
    5.47 -        pregs->rax = -EPERM;
    5.48 -        return 0;
    5.49 -    }
    5.50 -
    5.51      pregs->rax = (uint32_t)pregs->eax; /* mask in case compat32 caller */
    5.52      if ( (pregs->rax >= NR_hypercalls) || !hvm_hypercall64_table[pregs->rax] )
    5.53      {
    5.54 @@ -621,12 +602,9 @@ int hvm_do_hypercall(struct cpu_user_reg
    5.55              gdprintk(XENLOG_WARNING, "HVM vcpu %d:%d bad hypercall %ld.\n",
    5.56                       current->domain->domain_id, current->vcpu_id, pregs->rax);
    5.57          pregs->rax = -ENOSYS;
    5.58 -        return 0;
    5.59 +        return;
    5.60      }
    5.61  
    5.62 -    /* Check for preemption: RIP will be modified from this dummy value. */
    5.63 -    pregs->rip = 0xF0F0F0FF;
    5.64 -
    5.65      if ( current->arch.paging.mode->guest_levels == 4 )
    5.66      {
    5.67          pregs->rax = hvm_hypercall64_table[pregs->rax](pregs->rdi,
    5.68 @@ -643,14 +621,41 @@ int hvm_do_hypercall(struct cpu_user_reg
    5.69                                                         (uint32_t)pregs->esi,
    5.70                                                         (uint32_t)pregs->edi);
    5.71      }
    5.72 -
    5.73 -    /* XXX: put fake IO instr here to inform the emulator to flush mapcache */
    5.74 -
    5.75 -    return (pregs->rip != 0xF0F0F0FF); /* preempted? */
    5.76  }
    5.77  
    5.78  #endif /* defined(__x86_64__) */
    5.79  
    5.80 +int hvm_do_hypercall(struct cpu_user_regs *pregs)
    5.81 +{
    5.82 +    int flush, preempted;
    5.83 +    unsigned long old_eip;
    5.84 +
    5.85 +    if ( unlikely(ring_3(pregs)) )
    5.86 +    {
    5.87 +        pregs->eax = -EPERM;
    5.88 +        return 0;
    5.89 +    }
    5.90 +
    5.91 +    /*
    5.92 +     * NB. In future flush only on decrease_reservation.
    5.93 +     * For now we also need to flush when pages are added, as qemu-dm is not
    5.94 +     * yet capable of faulting pages into an existing valid mapcache bucket.
    5.95 +     */
    5.96 +    flush = ((uint32_t)pregs->eax == __HYPERVISOR_memory_op);
    5.97 +
    5.98 +    /* Check for preemption: RIP will be modified from this dummy value. */
    5.99 +    old_eip = pregs->eip;
   5.100 +    pregs->eip = 0xF0F0F0FF;
   5.101 +
   5.102 +    __hvm_do_hypercall(pregs);
   5.103 +
   5.104 +    preempted = (pregs->eip != 0xF0F0F0FF);
   5.105 +    pregs->eip = old_eip;
   5.106 +
   5.107 +    return (preempted ? HVM_HCALL_preempted :
   5.108 +            flush ? HVM_HCALL_invalidate : HVM_HCALL_completed);
   5.109 +}
   5.110 +
   5.111  void hvm_update_guest_cr3(struct vcpu *v, unsigned long guest_cr3)
   5.112  {
   5.113      v->arch.hvm_vcpu.hw_cr3 = guest_cr3;
     6.1 --- a/xen/arch/x86/hvm/io.c	Thu Apr 05 14:02:55 2007 +0100
     6.2 +++ b/xen/arch/x86/hvm/io.c	Thu Apr 05 14:29:18 2007 +0100
     6.3 @@ -845,10 +845,17 @@ void hvm_io_assist(void)
     6.4  
     6.5      p->state = STATE_IOREQ_NONE;
     6.6  
     6.7 -    if ( p->type == IOREQ_TYPE_PIO )
     6.8 +    switch ( p->type )
     6.9 +    {
    6.10 +    case IOREQ_TYPE_INVALIDATE:
    6.11 +        goto out;
    6.12 +    case IOREQ_TYPE_PIO:
    6.13          hvm_pio_assist(regs, p, io_opp);
    6.14 -    else
    6.15 +        break;
    6.16 +    default:
    6.17          hvm_mmio_assist(regs, p, io_opp);
    6.18 +        break;
    6.19 +    }
    6.20  
    6.21      /* Copy register changes back into current guest state. */
    6.22      hvm_load_cpu_guest_regs(v, regs);
    6.23 @@ -861,6 +868,7 @@ void hvm_io_assist(void)
    6.24          mark_dirty(d, gmfn);
    6.25      }
    6.26  
    6.27 + out:
    6.28      vcpu_end_shutdown_deferral(v);
    6.29  }
    6.30  
     7.1 --- a/xen/arch/x86/hvm/platform.c	Thu Apr 05 14:02:55 2007 +0100
     7.2 +++ b/xen/arch/x86/hvm/platform.c	Thu Apr 05 14:29:18 2007 +0100
     7.3 @@ -941,6 +941,34 @@ void send_timeoffset_req(unsigned long t
     7.4          printk("Unsuccessful timeoffset update\n");
     7.5  }
     7.6  
     7.7 +/* Ask ioemu mapcache to invalidate mappings. */
     7.8 +void send_invalidate_req(void)
     7.9 +{
    7.10 +    struct vcpu *v = current;
    7.11 +    vcpu_iodata_t *vio;
    7.12 +    ioreq_t *p;
    7.13 +
    7.14 +    vio = get_vio(v->domain, v->vcpu_id);
    7.15 +    if ( vio == NULL )
    7.16 +    {
    7.17 +        printk("bad shared page: %lx\n", (unsigned long) vio);
    7.18 +        domain_crash_synchronous();
    7.19 +    }
    7.20 +
    7.21 +    p = &vio->vp_ioreq;
    7.22 +    if ( p->state != STATE_IOREQ_NONE )
    7.23 +        printk("WARNING: send invalidate req with something "
    7.24 +               "already pending (%d)?\n", p->state);
    7.25 +
    7.26 +    p->type = IOREQ_TYPE_INVALIDATE;
    7.27 +    p->size = 4;
    7.28 +    p->dir = IOREQ_WRITE;
    7.29 +    p->data = ~0UL; /* flush all */
    7.30 +    p->io_count++;
    7.31 +
    7.32 +    hvm_send_assist_req(v);
    7.33 +}
    7.34 +
    7.35  static void mmio_operands(int type, unsigned long gpa,
    7.36                            struct hvm_io_op *mmio_op,
    7.37                            unsigned char op_size)
     8.1 --- a/xen/arch/x86/hvm/svm/svm.c	Thu Apr 05 14:02:55 2007 +0100
     8.2 +++ b/xen/arch/x86/hvm/svm/svm.c	Thu Apr 05 14:29:18 2007 +0100
     8.3 @@ -2166,7 +2166,7 @@ asmlinkage void svm_vmexit_handler(struc
     8.4      unsigned long eip;
     8.5      struct vcpu *v = current;
     8.6      struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
     8.7 -    int inst_len;
     8.8 +    int inst_len, rc;
     8.9  
    8.10      exit_reason = vmcb->exitcode;
    8.11      save_svm_cpu_user_regs(v, regs);
    8.12 @@ -2275,8 +2275,13 @@ asmlinkage void svm_vmexit_handler(struc
    8.13          inst_len = __get_instruction_length(v, INSTR_VMCALL, NULL);
    8.14          ASSERT(inst_len > 0);
    8.15          HVMTRACE_1D(VMMCALL, v, regs->eax);
    8.16 -        if ( !hvm_do_hypercall(regs) )
    8.17 -            __update_guest_eip(vmcb, inst_len); /* not preempted */
    8.18 +        rc = hvm_do_hypercall(regs);
    8.19 +        if ( rc != HVM_HCALL_preempted )
    8.20 +        {
    8.21 +            __update_guest_eip(vmcb, inst_len);
    8.22 +            if ( rc == HVM_HCALL_invalidate )
    8.23 +                send_invalidate_req();
    8.24 +        }
    8.25          break;
    8.26  
    8.27      case VMEXIT_CR0_READ:
     9.1 --- a/xen/arch/x86/hvm/vmx/vmx.c	Thu Apr 05 14:02:55 2007 +0100
     9.2 +++ b/xen/arch/x86/hvm/vmx/vmx.c	Thu Apr 05 14:29:18 2007 +0100
     9.3 @@ -2626,10 +2626,16 @@ asmlinkage void vmx_vmexit_handler(struc
     9.4      }
     9.5      case EXIT_REASON_VMCALL:
     9.6      {
     9.7 +        int rc;
     9.8          HVMTRACE_1D(VMMCALL, v, regs->eax);
     9.9          inst_len = __get_instruction_length(); /* Safe: VMCALL */
    9.10 -        if ( !hvm_do_hypercall(regs) )
    9.11 -            __update_guest_eip(inst_len); /* not preempted */
    9.12 +        rc = hvm_do_hypercall(regs);
    9.13 +        if ( rc != HVM_HCALL_preempted )
    9.14 +        {
    9.15 +            __update_guest_eip(inst_len);
    9.16 +            if ( rc == HVM_HCALL_invalidate )
    9.17 +                send_invalidate_req();
    9.18 +        }
    9.19          break;
    9.20      }
    9.21      case EXIT_REASON_CR_ACCESS:
    10.1 --- a/xen/include/asm-x86/hvm/io.h	Thu Apr 05 14:02:55 2007 +0100
    10.2 +++ b/xen/include/asm-x86/hvm/io.h	Thu Apr 05 14:29:18 2007 +0100
    10.3 @@ -147,6 +147,7 @@ static inline int irq_masked(unsigned lo
    10.4  extern void send_pio_req(unsigned long port, unsigned long count, int size,
    10.5                           paddr_t value, int dir, int df, int value_is_ptr);
    10.6  void send_timeoffset_req(unsigned long timeoff);
    10.7 +void send_invalidate_req(void);
    10.8  extern void handle_mmio(unsigned long gpa);
    10.9  extern void hvm_interrupt_post(struct vcpu *v, int vector, int type);
   10.10  extern void hvm_io_assist(void);
    11.1 --- a/xen/include/asm-x86/hvm/support.h	Thu Apr 05 14:02:55 2007 +0100
    11.2 +++ b/xen/include/asm-x86/hvm/support.h	Thu Apr 05 14:29:18 2007 +0100
    11.3 @@ -228,6 +228,9 @@ int hvm_copy_from_guest_virt(void *buf, 
    11.4  void hvm_print_line(struct vcpu *v, const char c);
    11.5  void hlt_timer_fn(void *data);
    11.6  
    11.7 +#define HVM_HCALL_completed  0 /* hypercall completed - no further action */
    11.8 +#define HVM_HCALL_preempted  1 /* hypercall preempted - re-execute VMCALL */
    11.9 +#define HVM_HCALL_invalidate 2 /* invalidate ioemu-dm memory cache        */
   11.10  int hvm_do_hypercall(struct cpu_user_regs *pregs);
   11.11  
   11.12  void hvm_hlt(unsigned long rflags);
    12.1 --- a/xen/include/public/hvm/ioreq.h	Thu Apr 05 14:02:55 2007 +0100
    12.2 +++ b/xen/include/public/hvm/ioreq.h	Thu Apr 05 14:29:18 2007 +0100
    12.3 @@ -40,6 +40,7 @@
    12.4  #define IOREQ_TYPE_XCHG         5
    12.5  #define IOREQ_TYPE_ADD          6
    12.6  #define IOREQ_TYPE_TIMEOFFSET   7
    12.7 +#define IOREQ_TYPE_INVALIDATE   8 /* mapcache */
    12.8  
    12.9  /*
   12.10   * VMExit dispatcher should cooperate with instruction decoder to