direct-io.hg

changeset 10257:b198bbfeec10

[PAE] Fix support for pgdirs above 4GB. Requires an interface change so that %cr3
is extended to include high-order address bits at bottom of %cr3 value.
Guests who understand this interface change publish the fact by setting the
option 'PAE=yes[extended-cr3]' in their __xen_guest section.
Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@firebug.cl.cam.ac.uk
date Fri Jun 02 11:46:24 2006 +0100 (2006-06-02)
parents 1f619b6adbc6
children bcc2c9ea60d8
files linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S linux-2.6-xen-sparse/drivers/xen/core/smpboot.c linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/system.h xen/arch/x86/dom0_ops.c xen/arch/x86/domain.c xen/arch/x86/traps.c xen/include/public/arch-x86_32.h xen/include/public/arch-x86_64.h
line diff
     1.1 --- a/linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S	Fri Jun 02 09:36:28 2006 +0100
     1.2 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S	Fri Jun 02 11:46:24 2006 +0100
     1.3 @@ -173,7 +173,7 @@ ENTRY(cpu_gdt_table)
     1.4  	.ascii	         "|pae_pgdir_above_4gb"
     1.5  	.ascii	         "|supervisor_mode_kernel"
     1.6  #ifdef CONFIG_X86_PAE
     1.7 -	.ascii	",PAE=yes"
     1.8 +	.ascii	",PAE=yes[extended-cr3]"
     1.9  #else
    1.10  	.ascii	",PAE=no"
    1.11  #endif
     2.1 --- a/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c	Fri Jun 02 09:36:28 2006 +0100
     2.2 +++ b/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c	Fri Jun 02 11:46:24 2006 +0100
     2.3 @@ -208,7 +208,7 @@ void cpu_initialize_context(unsigned int
     2.4  	ctxt.failsafe_callback_cs  = __KERNEL_CS;
     2.5  	ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
     2.6  
     2.7 -	ctxt.ctrlreg[3] = virt_to_mfn(swapper_pg_dir) << PAGE_SHIFT;
     2.8 +	ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
     2.9  #else /* __x86_64__ */
    2.10  	ctxt.user_regs.cs = __KERNEL_CS;
    2.11  	ctxt.user_regs.esp = idle->thread.rsp0 - sizeof(struct pt_regs);
    2.12 @@ -220,7 +220,7 @@ void cpu_initialize_context(unsigned int
    2.13  	ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
    2.14  	ctxt.syscall_callback_eip  = (unsigned long)system_call;
    2.15  
    2.16 -	ctxt.ctrlreg[3] = virt_to_mfn(init_level4_pgt) << PAGE_SHIFT;
    2.17 +	ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(init_level4_pgt));
    2.18  
    2.19  	ctxt.gs_base_kernel = (unsigned long)(cpu_pda(cpu));
    2.20  #endif
     3.1 --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/system.h	Fri Jun 02 09:36:28 2006 +0100
     3.2 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/system.h	Fri Jun 02 11:46:24 2006 +0100
     3.3 @@ -116,10 +116,12 @@ extern struct task_struct * FASTCALL(__s
     3.4  	__asm__ ( \
     3.5  		"movl %%cr3,%0\n\t" \
     3.6  		:"=r" (__dummy)); \
     3.7 -	machine_to_phys(__dummy); \
     3.8 +	__dummy = xen_cr3_to_pfn(__dummy); \
     3.9 +	mfn_to_pfn(__dummy) << PAGE_SHIFT; \
    3.10  })
    3.11  #define write_cr3(x) ({						\
    3.12 -	maddr_t __dummy = phys_to_machine(x);			\
    3.13 +	unsigned int __dummy = pfn_to_mfn((x) >> PAGE_SHIFT);	\
    3.14 +	__dummy = xen_pfn_to_cr3(__dummy);			\
    3.15  	__asm__ __volatile__("movl %0,%%cr3": :"r" (__dummy));	\
    3.16  })
    3.17  
     4.1 --- a/xen/arch/x86/dom0_ops.c	Fri Jun 02 09:36:28 2006 +0100
     4.2 +++ b/xen/arch/x86/dom0_ops.c	Fri Jun 02 11:46:24 2006 +0100
     4.3 @@ -467,7 +467,7 @@ void arch_getdomaininfo_ctxt(
     4.4      if ( hvm_guest(v) )
     4.5          c->flags |= VGCF_HVM_GUEST;
     4.6  
     4.7 -    c->ctrlreg[3] = pagetable_get_paddr(v->arch.guest_table);
     4.8 +    c->ctrlreg[3] = xen_pfn_to_cr3(pagetable_get_pfn(v->arch.guest_table));
     4.9  
    4.10      c->vm_assist = v->domain->vm_assist;
    4.11  }
     5.1 --- a/xen/arch/x86/domain.c	Fri Jun 02 09:36:28 2006 +0100
     5.2 +++ b/xen/arch/x86/domain.c	Fri Jun 02 11:46:24 2006 +0100
     5.3 @@ -259,7 +259,7 @@ int arch_set_info_guest(
     5.4      struct vcpu *v, struct vcpu_guest_context *c)
     5.5  {
     5.6      struct domain *d = v->domain;
     5.7 -    unsigned long phys_basetab = INVALID_MFN;
     5.8 +    unsigned long cr3_pfn;
     5.9      int i, rc;
    5.10  
    5.11      if ( !(c->flags & VGCF_HVM_GUEST) )
    5.12 @@ -322,12 +322,8 @@ int arch_set_info_guest(
    5.13  
    5.14      if ( !(c->flags & VGCF_HVM_GUEST) )
    5.15      {
    5.16 -        phys_basetab = c->ctrlreg[3];
    5.17 -        phys_basetab =
    5.18 -            (gmfn_to_mfn(d, phys_basetab >> PAGE_SHIFT) << PAGE_SHIFT) |
    5.19 -            (phys_basetab & ~PAGE_MASK);
    5.20 -
    5.21 -        v->arch.guest_table = pagetable_from_paddr(phys_basetab);
    5.22 +        cr3_pfn = gmfn_to_mfn(d, xen_cr3_to_pfn(c->ctrlreg[3]));
    5.23 +        v->arch.guest_table = pagetable_from_pfn(cr3_pfn);
    5.24      }
    5.25  
    5.26      if ( (rc = (int)set_gdt(v, c->gdt_frames, c->gdt_ents)) != 0 )
    5.27 @@ -342,7 +338,7 @@ int arch_set_info_guest(
    5.28      }
    5.29      else if ( shadow_mode_refcounts(d) )
    5.30      {
    5.31 -        if ( !get_page(mfn_to_page(phys_basetab>>PAGE_SHIFT), d) )
    5.32 +        if ( !get_page(mfn_to_page(cr3_pfn), d) )
    5.33          {
    5.34              destroy_gdt(v);
    5.35              return -EINVAL;
    5.36 @@ -350,7 +346,7 @@ int arch_set_info_guest(
    5.37      }
    5.38      else
    5.39      {
    5.40 -        if ( !get_page_and_type(mfn_to_page(phys_basetab>>PAGE_SHIFT), d,
    5.41 +        if ( !get_page_and_type(mfn_to_page(cr3_pfn), d,
    5.42                                  PGT_base_page_table) )
    5.43          {
    5.44              destroy_gdt(v);
     6.1 --- a/xen/arch/x86/traps.c	Fri Jun 02 09:36:28 2006 +0100
     6.2 +++ b/xen/arch/x86/traps.c	Fri Jun 02 11:46:24 2006 +0100
     6.3 @@ -1034,8 +1034,8 @@ static int emulate_privileged_op(struct 
     6.4              break;
     6.5              
     6.6          case 3: /* Read CR3 */
     6.7 -            *reg = pfn_to_paddr(mfn_to_gmfn(v->domain,
     6.8 -                                    pagetable_get_pfn(v->arch.guest_table)));
     6.9 +            *reg = xen_pfn_to_cr3(mfn_to_gmfn(
    6.10 +                v->domain, pagetable_get_pfn(v->arch.guest_table)));
    6.11              break;
    6.12  
    6.13          case 4: /* Read CR4 */
    6.14 @@ -1085,7 +1085,7 @@ static int emulate_privileged_op(struct 
    6.15          case 3: /* Write CR3 */
    6.16              LOCK_BIGLOCK(v->domain);
    6.17              cleanup_writable_pagetable(v->domain);
    6.18 -            (void)new_guest_cr3(gmfn_to_mfn(v->domain, paddr_to_pfn(*reg)));
    6.19 +            (void)new_guest_cr3(gmfn_to_mfn(v->domain, xen_cr3_to_pfn(*reg)));
    6.20              UNLOCK_BIGLOCK(v->domain);
    6.21              break;
    6.22  
     7.1 --- a/xen/include/public/arch-x86_32.h	Fri Jun 02 09:36:28 2006 +0100
     7.2 +++ b/xen/include/public/arch-x86_32.h	Fri Jun 02 11:46:24 2006 +0100
     7.3 @@ -158,6 +158,14 @@ struct vcpu_guest_context {
     7.4  typedef struct vcpu_guest_context vcpu_guest_context_t;
     7.5  DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t);
     7.6  
     7.7 +/*
     7.8 + * Page-directory addresses above 4GB do not fit into architectural %cr3.
     7.9 + * When accessing %cr3, or equivalent field in vcpu_guest_context, guests
    7.10 + * must use the following accessor macros to pack/unpack valid MFNs.
    7.11 + */
    7.12 +#define xen_pfn_to_cr3(pfn) (((unsigned)(pfn) << 12) | ((unsigned)(pfn) >> 20))
    7.13 +#define xen_cr3_to_pfn(cr3) (((unsigned)(cr3) >> 12) | ((unsigned)(cr3) << 20))
    7.14 +
    7.15  struct arch_shared_info {
    7.16      unsigned long max_pfn;                  /* max pfn that appears in table */
    7.17      /* Frame containing list of mfns containing list of mfns containing p2m. */
     8.1 --- a/xen/include/public/arch-x86_64.h	Fri Jun 02 09:36:28 2006 +0100
     8.2 +++ b/xen/include/public/arch-x86_64.h	Fri Jun 02 11:46:24 2006 +0100
     8.3 @@ -234,6 +234,9 @@ struct vcpu_guest_context {
     8.4  typedef struct vcpu_guest_context vcpu_guest_context_t;
     8.5  DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t);
     8.6  
     8.7 +#define xen_pfn_to_cr3(pfn) ((unsigned long)(pfn) << 12)
     8.8 +#define xen_cr3_to_pfn(cr3) ((unsigned long)(cr3) >> 12)
     8.9 +
    8.10  struct arch_shared_info {
    8.11      unsigned long max_pfn;                  /* max pfn that appears in table */
    8.12      /* Frame containing list of mfns containing list of mfns containing p2m. */