ia64/xen-unstable

changeset 13288:7c5eea5feebd

Introduce _DOMF_compat and infrastructure as well as several conditionals
dealing with operations that need to distinguish between native and
compatibility mode guests.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
author Emmanuel Ackaouy <ack@xensource.com>
date Fri Jan 05 17:32:00 2007 +0000 (2007-01-05)
parents c75883680f28
children 5a690aa51fb5
files config/x86_64.mk tools/libxc/xc_linux_build.c xen/arch/x86/boot/x86_64.S xen/arch/x86/domain.c xen/arch/x86/domain_build.c xen/arch/x86/mm.c xen/arch/x86/setup.c xen/arch/x86/traps.c xen/arch/x86/x86_32/mm.c xen/arch/x86/x86_32/traps.c xen/arch/x86/x86_64/asm-offsets.c xen/arch/x86/x86_64/mm.c xen/arch/x86/x86_64/traps.c xen/common/Makefile xen/common/elf.c xen/common/elf32.c xen/include/asm-x86/config.h xen/include/asm-x86/desc.h xen/include/asm-x86/ldt.h xen/include/asm-x86/mm.h xen/include/asm-x86/regs.h xen/include/asm-x86/x86_32/regs.h xen/include/asm-x86/x86_64/regs.h xen/include/public/arch-x86/xen-x86_64.h xen/include/public/arch-x86/xen.h xen/include/xen/elf.h xen/include/xen/sched.h
line diff
     1.1 --- a/config/x86_64.mk	Fri Jan 05 17:24:55 2007 +0000
     1.2 +++ b/config/x86_64.mk	Fri Jan 05 17:32:00 2007 +0000
     1.3 @@ -2,6 +2,7 @@ CONFIG_X86 := y
     1.4  CONFIG_X86_64 := y
     1.5  CONFIG_X86_$(XEN_OS) := y
     1.6  
     1.7 +CONFIG_COMPAT := y
     1.8  CONFIG_HVM := y
     1.9  CONFIG_MIGRATE := y
    1.10  CONFIG_XCUTILS := y
     2.1 --- a/tools/libxc/xc_linux_build.c	Fri Jan 05 17:24:55 2007 +0000
     2.2 +++ b/tools/libxc/xc_linux_build.c	Fri Jan 05 17:32:00 2007 +0000
     2.3 @@ -595,6 +595,7 @@ static int compat_check(int xc_handle, s
     2.4          return 0;
     2.5      }
     2.6  
     2.7 +#ifndef __x86_64__//temp
     2.8      if (strstr(xen_caps, "xen-3.0-x86_32p")) {
     2.9          if (dsi->pae_kernel == PAEKERN_bimodal) {
    2.10              dsi->pae_kernel = PAEKERN_extended_cr3;
    2.11 @@ -612,6 +613,7 @@ static int compat_check(int xc_handle, s
    2.12              return 0;
    2.13          }
    2.14      }
    2.15 +#endif
    2.16  
    2.17      return 1;
    2.18  }
     3.1 --- a/xen/arch/x86/boot/x86_64.S	Fri Jan 05 17:24:55 2007 +0000
     3.2 +++ b/xen/arch/x86/boot/x86_64.S	Fri Jan 05 17:32:00 2007 +0000
     3.3 @@ -224,15 +224,34 @@ high_start:
     3.4          .align PAGE_SIZE, 0
     3.5  ENTRY(gdt_table)
     3.6          .quad 0x0000000000000000     /* unused */
     3.7 -        .quad 0x00cf9a000000ffff     /* 0xe008 ring 0 code, compatibility */
     3.8 -        .quad 0x00af9a000000ffff     /* 0xe010 ring 0 code, 64-bit mode   */
     3.9 -        .quad 0x00cf92000000ffff     /* 0xe018 ring 0 data                */
    3.10 +        .quad 0x00af9a000000ffff     /* 0xe008 ring 0 code, 64-bit mode   */
    3.11 +        .quad 0x00cf92000000ffff     /* 0xe010 ring 0 data                */
    3.12 +        .quad 0x0000000000000000     /* reserved                          */
    3.13          .quad 0x00cffa000000ffff     /* 0xe023 ring 3 code, compatibility */
    3.14          .quad 0x00cff2000000ffff     /* 0xe02b ring 3 data                */
    3.15          .quad 0x00affa000000ffff     /* 0xe033 ring 3 code, 64-bit mode   */
    3.16 -        .quad 0x0000000000000000     /* unused                            */
    3.17 +        .quad 0x00cf9a000000ffff     /* 0xe038 ring 0 code, compatibility */
    3.18 +        .org gdt_table - FIRST_RESERVED_GDT_BYTE + __TSS(0) * 8
    3.19          .fill 4*NR_CPUS,8,0          /* space for TSS and LDT per CPU     */
    3.20  
    3.21 +#ifdef CONFIG_COMPAT
    3.22 +        .align PAGE_SIZE, 0
    3.23 +/* NB. Even rings != 0 get access to the full 4Gb, as only the            */
    3.24 +/*     (compatibility) machine->physical mapping table lives there.       */
    3.25 +ENTRY(compat_gdt_table)
    3.26 +        .quad 0x0000000000000000     /* unused */
    3.27 +        .quad 0x00af9a000000ffff     /* 0xe008 ring 0 code, 64-bit mode   */
    3.28 +        .quad 0x00cf92000000ffff     /* 0xe010 ring 0 data                */
    3.29 +        .quad 0x00cfba000000ffff     /* 0xe019 ring 1 code, compatibility */
    3.30 +        .quad 0x00cfb2000000ffff     /* 0xe021 ring 1 data                */
    3.31 +        .quad 0x00cffa000000ffff     /* 0xe02b ring 3 code, compatibility */
    3.32 +        .quad 0x00cff2000000ffff     /* 0xe033 ring 3 data                */
    3.33 +        .quad 0x00cf9a000000ffff     /* 0xe038 ring 0 code, compatibility */
    3.34 +        .org compat_gdt_table - FIRST_RESERVED_GDT_BYTE + __TSS(0) * 8
    3.35 +        .fill 4*NR_CPUS,8,0          /* space for TSS and LDT per CPU     */
    3.36 +# undef LIMIT
    3.37 +#endif
    3.38 +
    3.39  /* Initial PML4 -- level-4 page table. */
    3.40          .align PAGE_SIZE, 0
    3.41  ENTRY(idle_pg_table)
     4.1 --- a/xen/arch/x86/domain.c	Fri Jan 05 17:24:55 2007 +0000
     4.2 +++ b/xen/arch/x86/domain.c	Fri Jan 05 17:32:00 2007 +0000
     4.3 @@ -283,17 +283,18 @@ int arch_set_info_guest(
     4.4  
     4.5      if ( !is_hvm_vcpu(v) )
     4.6      {
     4.7 -        fixup_guest_stack_selector(c->user_regs.ss);
     4.8 -        fixup_guest_stack_selector(c->kernel_ss);
     4.9 -        fixup_guest_code_selector(c->user_regs.cs);
    4.10 +        fixup_guest_stack_selector(d, c->user_regs.ss);
    4.11 +        fixup_guest_stack_selector(d, c->kernel_ss);
    4.12 +        fixup_guest_code_selector(d, c->user_regs.cs);
    4.13  
    4.14 -#ifdef __i386__
    4.15 -        fixup_guest_code_selector(c->event_callback_cs);
    4.16 -        fixup_guest_code_selector(c->failsafe_callback_cs);
    4.17 -#endif
    4.18 +        if ( CONFIG_PAGING_LEVELS < 4 || IS_COMPAT(d) )
    4.19 +        {
    4.20 +            fixup_guest_code_selector(d, c->event_callback_cs);
    4.21 +            fixup_guest_code_selector(d, c->failsafe_callback_cs);
    4.22 +        }
    4.23  
    4.24          for ( i = 0; i < 256; i++ )
    4.25 -            fixup_guest_code_selector(c->trap_ctxt[i].cs);
    4.26 +            fixup_guest_code_selector(d, c->trap_ctxt[i].cs);
    4.27  
    4.28          /* LDT safety checks. */
    4.29          if ( ((c->ldt_base & (PAGE_SIZE-1)) != 0) || 
    4.30 @@ -489,27 +490,30 @@ static void load_segments(struct vcpu *n
    4.31              all_segs_okay &= loadsegment(gs, nctxt->user_regs.gs);
    4.32      }
    4.33  
    4.34 -    /* This can only be non-zero if selector is NULL. */
    4.35 -    if ( nctxt->fs_base )
    4.36 -        wrmsr(MSR_FS_BASE,
    4.37 -              nctxt->fs_base,
    4.38 -              nctxt->fs_base>>32);
    4.39 +    if ( !IS_COMPAT(n->domain) )
    4.40 +    {
    4.41 +        /* This can only be non-zero if selector is NULL. */
    4.42 +        if ( nctxt->fs_base )
    4.43 +            wrmsr(MSR_FS_BASE,
    4.44 +                  nctxt->fs_base,
    4.45 +                  nctxt->fs_base>>32);
    4.46  
    4.47 -    /* Most kernels have non-zero GS base, so don't bother testing. */
    4.48 -    /* (This is also a serialising instruction, avoiding AMD erratum #88.) */
    4.49 -    wrmsr(MSR_SHADOW_GS_BASE,
    4.50 -          nctxt->gs_base_kernel,
    4.51 -          nctxt->gs_base_kernel>>32);
    4.52 +        /* Most kernels have non-zero GS base, so don't bother testing. */
    4.53 +        /* (This is also a serialising instruction, avoiding AMD erratum #88.) */
    4.54 +        wrmsr(MSR_SHADOW_GS_BASE,
    4.55 +              nctxt->gs_base_kernel,
    4.56 +              nctxt->gs_base_kernel>>32);
    4.57  
    4.58 -    /* This can only be non-zero if selector is NULL. */
    4.59 -    if ( nctxt->gs_base_user )
    4.60 -        wrmsr(MSR_GS_BASE,
    4.61 -              nctxt->gs_base_user,
    4.62 -              nctxt->gs_base_user>>32);
    4.63 +        /* This can only be non-zero if selector is NULL. */
    4.64 +        if ( nctxt->gs_base_user )
    4.65 +            wrmsr(MSR_GS_BASE,
    4.66 +                  nctxt->gs_base_user,
    4.67 +                  nctxt->gs_base_user>>32);
    4.68  
    4.69 -    /* If in kernel mode then switch the GS bases around. */
    4.70 -    if ( n->arch.flags & TF_kernel_mode )
    4.71 -        __asm__ __volatile__ ( "swapgs" );
    4.72 +        /* If in kernel mode then switch the GS bases around. */
    4.73 +        if ( (n->arch.flags & TF_kernel_mode) )
    4.74 +            __asm__ __volatile__ ( "swapgs" );
    4.75 +    }
    4.76  
    4.77      if ( unlikely(!all_segs_okay) )
    4.78      {
    4.79 @@ -520,6 +524,55 @@ static void load_segments(struct vcpu *n
    4.80              (unsigned long *)nctxt->kernel_sp;
    4.81          unsigned long cs_and_mask, rflags;
    4.82  
    4.83 +        if ( IS_COMPAT(n->domain) )
    4.84 +        {
    4.85 +            unsigned int *esp = ring_1(regs) ?
    4.86 +                                (unsigned int *)regs->rsp :
    4.87 +                                (unsigned int *)nctxt->kernel_sp;
    4.88 +            unsigned int cs_and_mask, eflags;
    4.89 +            int ret = 0;
    4.90 +
    4.91 +            /* CS longword also contains full evtchn_upcall_mask. */
    4.92 +            cs_and_mask = (unsigned short)regs->cs |
    4.93 +                ((unsigned int)n->vcpu_info->evtchn_upcall_mask << 16);
    4.94 +            /* Fold upcall mask into RFLAGS.IF. */
    4.95 +            eflags  = regs->_eflags & ~X86_EFLAGS_IF;
    4.96 +            eflags |= !n->vcpu_info->evtchn_upcall_mask << 9;
    4.97 +
    4.98 +            if ( !ring_1(regs) )
    4.99 +            {
   4.100 +                ret  = put_user(regs->ss,       esp-1);
   4.101 +                ret |= put_user(regs->_esp,     esp-2);
   4.102 +                esp -= 2;
   4.103 +            }
   4.104 +
   4.105 +            if ( ret |
   4.106 +                 put_user(eflags,              esp-1) |
   4.107 +                 put_user(cs_and_mask,         esp-2) |
   4.108 +                 put_user(regs->_eip,          esp-3) |
   4.109 +                 put_user(nctxt->user_regs.gs, esp-4) |
   4.110 +                 put_user(nctxt->user_regs.fs, esp-5) |
   4.111 +                 put_user(nctxt->user_regs.es, esp-6) |
   4.112 +                 put_user(nctxt->user_regs.ds, esp-7) )
   4.113 +            {
   4.114 +                gdprintk(XENLOG_ERR, "Error while creating compat "
   4.115 +                         "failsafe callback frame.\n");
   4.116 +                domain_crash(n->domain);
   4.117 +            }
   4.118 +
   4.119 +            if ( test_bit(_VGCF_failsafe_disables_events,
   4.120 +                          &n->arch.guest_context.flags) )
   4.121 +                n->vcpu_info->evtchn_upcall_mask = 1;
   4.122 +
   4.123 +            regs->entry_vector  = TRAP_syscall;
   4.124 +            regs->_eflags      &= 0xFFFCBEFFUL;
   4.125 +            regs->ss            = FLAT_COMPAT_KERNEL_SS;
   4.126 +            regs->_esp          = (unsigned long)(esp-7);
   4.127 +            regs->cs            = FLAT_COMPAT_KERNEL_CS;
   4.128 +            regs->_eip          = nctxt->failsafe_callback_eip;
   4.129 +            return;
   4.130 +        }
   4.131 +
   4.132          if ( !(n->arch.flags & TF_kernel_mode) )
   4.133              toggle_guest_mode(n);
   4.134          else
   4.135 @@ -581,7 +634,7 @@ static void save_segments(struct vcpu *v
   4.136      if ( regs->es )
   4.137          dirty_segment_mask |= DIRTY_ES;
   4.138  
   4.139 -    if ( regs->fs )
   4.140 +    if ( regs->fs || IS_COMPAT(v->domain) )
   4.141      {
   4.142          dirty_segment_mask |= DIRTY_FS;
   4.143          ctxt->fs_base = 0; /* != 0 selector kills fs_base */
   4.144 @@ -591,7 +644,7 @@ static void save_segments(struct vcpu *v
   4.145          dirty_segment_mask |= DIRTY_FS_BASE;
   4.146      }
   4.147  
   4.148 -    if ( regs->gs )
   4.149 +    if ( regs->gs || IS_COMPAT(v->domain) )
   4.150      {
   4.151          dirty_segment_mask |= DIRTY_GS;
   4.152          ctxt->gs_base_user = 0; /* != 0 selector kills gs_base_user */
   4.153 @@ -726,6 +779,23 @@ void context_switch(struct vcpu *prev, s
   4.154      {
   4.155          __context_switch();
   4.156  
   4.157 +#ifdef CONFIG_COMPAT
   4.158 +        if ( is_idle_vcpu(prev)
   4.159 +             || IS_COMPAT(prev->domain) != IS_COMPAT(next->domain) )
   4.160 +        {
   4.161 +            uint32_t efer_lo, efer_hi;
   4.162 +
   4.163 +            local_flush_tlb_one(GDT_VIRT_START(next) + FIRST_RESERVED_GDT_BYTE);
   4.164 +
   4.165 +            rdmsr(MSR_EFER, efer_lo, efer_hi);
   4.166 +            if ( !IS_COMPAT(next->domain) == !(efer_lo & EFER_SCE) )
   4.167 +            {
   4.168 +                efer_lo ^= EFER_SCE;
   4.169 +                wrmsr(MSR_EFER, efer_lo, efer_hi);
   4.170 +            }
   4.171 +        }
   4.172 +#endif
   4.173 +
   4.174          /* Re-enable interrupts before restoring state which may fault. */
   4.175          local_irq_enable();
   4.176  
   4.177 @@ -938,6 +1008,10 @@ void domain_relinquish_resources(struct 
   4.178                  put_page(mfn_to_page(pfn));
   4.179              else
   4.180                  put_page_and_type(mfn_to_page(pfn));
   4.181 +#ifdef __x86_64__
   4.182 +            if ( pfn == pagetable_get_pfn(v->arch.guest_table_user) )
   4.183 +                v->arch.guest_table_user = pagetable_null();
   4.184 +#endif
   4.185              v->arch.guest_table = pagetable_null();
   4.186          }
   4.187  
     5.1 --- a/xen/arch/x86/domain_build.c	Fri Jan 05 17:24:55 2007 +0000
     5.2 +++ b/xen/arch/x86/domain_build.c	Fri Jan 05 17:32:00 2007 +0000
     5.3 @@ -319,10 +319,38 @@ int construct_dom0(struct domain *d,
     5.4  
     5.5      nr_pages = compute_dom0_nr_pages();
     5.6  
     5.7 -    if ( (rc = parseelfimage(&dsi)) != 0 )
     5.8 -        return rc;
     5.9 +    rc = parseelfimage(&dsi);
    5.10 +#ifdef CONFIG_COMPAT
    5.11 +    if ( rc == -ENOSYS
    5.12 +         && (rc = parseelf32image(&dsi)) == 0 )
    5.13 +    {
    5.14 +        l1_pgentry_t gdt_l1e;
    5.15 +
    5.16 +        set_bit(_DOMF_compat, &d->domain_flags);
    5.17 +
    5.18 +        if ( nr_pages != (unsigned int)nr_pages )
    5.19 +            nr_pages = UINT_MAX;
    5.20  
    5.21 -    xen_pae  = (CONFIG_PAGING_LEVELS == 3);
    5.22 +        /*
    5.23 +         * Map compatibility Xen segments into every VCPU's GDT. See
    5.24 +         * arch_domain_create() for further comments.
    5.25 +         */
    5.26 +        gdt_l1e = l1e_from_page(virt_to_page(compat_gdt_table),
    5.27 +                                PAGE_HYPERVISOR);
    5.28 +        for ( i = 0; i < MAX_VIRT_CPUS; i++ )
    5.29 +            d->arch.mm_perdomain_pt[((i << GDT_LDT_VCPU_SHIFT) +
    5.30 +                                     FIRST_RESERVED_GDT_PAGE)] = gdt_l1e;
    5.31 +        local_flush_tlb_one(GDT_LDT_VIRT_START + FIRST_RESERVED_GDT_BYTE);
    5.32 +    }
    5.33 +#endif
    5.34 +    if ( rc != 0)
    5.35 +    {
    5.36 +        if ( rc == -ENOSYS )
    5.37 +            printk("DOM0 image is not a Xen-compatible Elf image.\n");
    5.38 +       return rc;
    5.39 +    }
    5.40 +
    5.41 +    xen_pae  = (CONFIG_PAGING_LEVELS == 3) || IS_COMPAT(d);
    5.42      if (dsi.pae_kernel == PAEKERN_bimodal)
    5.43          dom0_pae = xen_pae; 
    5.44      else
    5.45 @@ -338,7 +366,13 @@ int construct_dom0(struct domain *d,
    5.46              dsi.pae_kernel == PAEKERN_bimodal) )
    5.47              set_bit(VMASST_TYPE_pae_extended_cr3, &d->vm_assist);
    5.48  
    5.49 -    if ( (p = xen_elfnote_string(&dsi, XEN_ELFNOTE_FEATURES)) != NULL )
    5.50 +#ifdef CONFIG_COMPAT
    5.51 +    if ( IS_COMPAT(d) )
    5.52 +        p = xen_elf32note_string(&dsi, XEN_ELFNOTE_FEATURES);
    5.53 +    else
    5.54 +#endif
    5.55 +        p = xen_elfnote_string(&dsi, XEN_ELFNOTE_FEATURES);
    5.56 +    if ( p != NULL )
    5.57      {
    5.58          parse_features(p,
    5.59                         dom0_features_supported,
    5.60 @@ -590,6 +624,12 @@ int construct_dom0(struct domain *d,
    5.61          return -EINVAL;
    5.62      }
    5.63  
    5.64 +    if ( IS_COMPAT(d) )
    5.65 +    {
    5.66 +        v->arch.guest_context.failsafe_callback_cs = FLAT_COMPAT_KERNEL_CS;
    5.67 +        v->arch.guest_context.event_callback_cs    = FLAT_COMPAT_KERNEL_CS;
    5.68 +    }
    5.69 +
    5.70      /* WARNING: The new domain must have its 'processor' field filled in! */
    5.71      maddr_to_page(mpt_alloc)->u.inuse.type_info = PGT_l4_page_table;
    5.72      l4start = l4tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE;
    5.73 @@ -599,6 +639,8 @@ int construct_dom0(struct domain *d,
    5.74      l4tab[l4_table_offset(PERDOMAIN_VIRT_START)] =
    5.75          l4e_from_paddr(__pa(d->arch.mm_perdomain_l3), __PAGE_HYPERVISOR);
    5.76      v->arch.guest_table = pagetable_from_paddr(__pa(l4start));
    5.77 +    if ( IS_COMPAT(d) )
    5.78 +        v->arch.guest_table_user = v->arch.guest_table;
    5.79  
    5.80      l4tab += l4_table_offset(dsi.v_start);
    5.81      mfn = alloc_spfn;
    5.82 @@ -711,10 +753,20 @@ int construct_dom0(struct domain *d,
    5.83      write_ptbase(v);
    5.84  
    5.85      /* Copy the OS image and free temporary buffer. */
    5.86 -    (void)loadelfimage(&dsi);
    5.87 -
    5.88 -    hypercall_page =
    5.89 -        xen_elfnote_numeric(&dsi, XEN_ELFNOTE_HYPERCALL_PAGE, &hypercall_page_defined);
    5.90 +#ifdef CONFIG_COMPAT
    5.91 +    if ( IS_COMPAT(d) )
    5.92 +    {
    5.93 +        (void)loadelf32image(&dsi);
    5.94 +        hypercall_page =
    5.95 +            xen_elf32note_numeric(&dsi, XEN_ELFNOTE_HYPERCALL_PAGE, &hypercall_page_defined);
    5.96 +    }
    5.97 +    else
    5.98 +#endif
    5.99 +    {
   5.100 +        (void)loadelfimage(&dsi);
   5.101 +        hypercall_page =
   5.102 +            xen_elfnote_numeric(&dsi, XEN_ELFNOTE_HYPERCALL_PAGE, &hypercall_page_defined);
   5.103 +    }
   5.104      if ( hypercall_page_defined )
   5.105      {
   5.106          if ( (hypercall_page < dsi.v_start) || (hypercall_page >= v_end) )
   5.107 @@ -747,7 +799,7 @@ int construct_dom0(struct domain *d,
   5.108      si->mfn_list     = vphysmap_start;
   5.109      sprintf(si->magic, "xen-%i.%i-x86_%d%s",
   5.110              xen_major_version(), xen_minor_version(),
   5.111 -            BITS_PER_LONG, xen_pae ? "p" : "");
   5.112 +            !IS_COMPAT(d) ? BITS_PER_LONG : 32, xen_pae ? "p" : "");
   5.113  
   5.114      /* Write the phys->machine and machine->phys table entries. */
   5.115      for ( pfn = 0; pfn < d->tot_pages; pfn++ )
   5.116 @@ -819,9 +871,11 @@ int construct_dom0(struct domain *d,
   5.117       *  [EAX,EBX,ECX,EDX,EDI,EBP are zero]
   5.118       */
   5.119      regs = &v->arch.guest_context.user_regs;
   5.120 -    regs->ds = regs->es = regs->fs = regs->gs = FLAT_KERNEL_DS;
   5.121 -    regs->ss = FLAT_KERNEL_SS;
   5.122 -    regs->cs = FLAT_KERNEL_CS;
   5.123 +    regs->ds = regs->es = regs->fs = regs->gs = !IS_COMPAT(d)
   5.124 +                                                ? FLAT_KERNEL_DS
   5.125 +                                                : FLAT_COMPAT_KERNEL_DS;
   5.126 +    regs->ss = !IS_COMPAT(d) ? FLAT_KERNEL_SS : FLAT_COMPAT_KERNEL_SS;
   5.127 +    regs->cs = !IS_COMPAT(d) ? FLAT_KERNEL_CS : FLAT_COMPAT_KERNEL_CS;
   5.128      regs->eip = dsi.v_kernentry;
   5.129      regs->esp = vstack_end;
   5.130      regs->esi = vstartinfo_start;
   5.131 @@ -906,13 +960,28 @@ int elf_sanity_check(const Elf_Ehdr *ehd
   5.132           (ehdr->e_ident[EI_DATA] != ELFDATA2LSB) ||
   5.133           (ehdr->e_type != ET_EXEC) )
   5.134      {
   5.135 -        printk("DOM0 image is not a Xen-compatible Elf image.\n");
   5.136          return 0;
   5.137      }
   5.138  
   5.139      return 1;
   5.140  }
   5.141  
   5.142 +#ifdef CONFIG_COMPAT
   5.143 +int elf32_sanity_check(const Elf32_Ehdr *ehdr)
   5.144 +{
   5.145 +    if ( !IS_ELF(*ehdr) ||
   5.146 +         (ehdr->e_ident[EI_CLASS] != ELFCLASS32) ||
   5.147 +         (ehdr->e_machine != EM_386) ||
   5.148 +         (ehdr->e_ident[EI_DATA] != ELFDATA2LSB) ||
   5.149 +         (ehdr->e_type != ET_EXEC) )
   5.150 +    {
   5.151 +        return 0;
   5.152 +    }
   5.153 +
   5.154 +    return 1;
   5.155 +}
   5.156 +#endif
   5.157 +
   5.158  /*
   5.159   * Local variables:
   5.160   * mode: C
     6.1 --- a/xen/arch/x86/mm.c	Fri Jan 05 17:24:55 2007 +0000
     6.2 +++ b/xen/arch/x86/mm.c	Fri Jan 05 17:32:00 2007 +0000
     6.3 @@ -433,7 +433,7 @@ static int alloc_segdesc_page(struct pag
     6.4      descs = map_domain_page(page_to_mfn(page));
     6.5  
     6.6      for ( i = 0; i < 512; i++ )
     6.7 -        if ( unlikely(!check_descriptor(&descs[i])) )
     6.8 +        if ( unlikely(!check_descriptor(page_get_owner(page), &descs[i])) )
     6.9              goto fail;
    6.10  
    6.11      unmap_domain_page(descs);
    6.12 @@ -2835,7 +2835,7 @@ long do_update_descriptor(u64 pa, u64 de
    6.13      mfn = gmfn_to_mfn(dom, gmfn);
    6.14      if ( (((unsigned int)pa % sizeof(struct desc_struct)) != 0) ||
    6.15           !mfn_valid(mfn) ||
    6.16 -         !check_descriptor(&d) )
    6.17 +         !check_descriptor(dom, &d) )
    6.18      {
    6.19          UNLOCK_BIGLOCK(dom);
    6.20          return -EINVAL;
     7.1 --- a/xen/arch/x86/setup.c	Fri Jan 05 17:24:55 2007 +0000
     7.2 +++ b/xen/arch/x86/setup.c	Fri Jan 05 17:32:00 2007 +0000
     7.3 @@ -791,6 +791,9 @@ void arch_get_xen_caps(xen_capabilities_
     7.4  #elif defined(CONFIG_X86_64)
     7.5  
     7.6      p += sprintf(p, "xen-%d.%d-x86_64 ", major, minor);
     7.7 +#ifdef CONFIG_COMPAT
     7.8 +    p += sprintf(p, "xen-%d.%d-x86_32p ", major, minor);
     7.9 +#endif
    7.10      if ( hvm_enabled )
    7.11      {
    7.12          p += sprintf(p, "hvm-%d.%d-x86_32 ", major, minor);
     8.1 --- a/xen/arch/x86/traps.c	Fri Jan 05 17:24:55 2007 +0000
     8.2 +++ b/xen/arch/x86/traps.c	Fri Jan 05 17:32:00 2007 +0000
     8.3 @@ -382,7 +382,7 @@ static int do_guest_trap(
     8.4      if ( TI_GET_IF(ti) )
     8.5          tb->flags |= TBF_INTERRUPT;
     8.6  
     8.7 -    if ( unlikely(null_trap_bounce(tb)) )
     8.8 +    if ( unlikely(null_trap_bounce(v, tb)) )
     8.9          gdprintk(XENLOG_WARNING, "Unhandled %s fault/trap [#%d] in "
    8.10                   "domain %d on VCPU %d [ec=%04x]\n",
    8.11                   trapstr(trapnr), trapnr, v->domain->domain_id, v->vcpu_id,
    8.12 @@ -673,7 +673,7 @@ void propagate_page_fault(unsigned long 
    8.13      tb->eip        = ti->address;
    8.14      if ( TI_GET_IF(ti) )
    8.15          tb->flags |= TBF_INTERRUPT;
    8.16 -    if ( unlikely(null_trap_bounce(tb)) )
    8.17 +    if ( unlikely(null_trap_bounce(v, tb)) )
    8.18      {
    8.19          printk("Unhandled page fault in domain %d on VCPU %d (ec=%04X)\n",
    8.20                 v->domain->domain_id, v->vcpu_id, error_code);
    8.21 @@ -1785,6 +1785,13 @@ void set_tss_desc(unsigned int n, void *
    8.22          (unsigned long)addr,
    8.23          offsetof(struct tss_struct, __cacheline_filler) - 1,
    8.24          9);
    8.25 +#ifdef CONFIG_COMPAT
    8.26 +    _set_tssldt_desc(
    8.27 +        compat_gdt_table + __TSS(n) - FIRST_RESERVED_GDT_ENTRY,
    8.28 +        (unsigned long)addr,
    8.29 +        offsetof(struct tss_struct, __cacheline_filler) - 1,
    8.30 +        11);
    8.31 +#endif
    8.32  }
    8.33  
    8.34  void __init trap_init(void)
    8.35 @@ -1859,7 +1866,7 @@ long do_set_trap_table(XEN_GUEST_HANDLE(
    8.36          if ( cur.address == 0 )
    8.37              break;
    8.38  
    8.39 -        fixup_guest_code_selector(cur.cs);
    8.40 +        fixup_guest_code_selector(current->domain, cur.cs);
    8.41  
    8.42          memcpy(&dst[cur.vector], &cur, sizeof(cur));
    8.43  
     9.1 --- a/xen/arch/x86/x86_32/mm.c	Fri Jan 05 17:24:55 2007 +0000
     9.2 +++ b/xen/arch/x86/x86_32/mm.c	Fri Jan 05 17:32:00 2007 +0000
     9.3 @@ -230,7 +230,7 @@ long do_stack_switch(unsigned long ss, u
     9.4      int nr = smp_processor_id();
     9.5      struct tss_struct *t = &init_tss[nr];
     9.6  
     9.7 -    fixup_guest_stack_selector(ss);
     9.8 +    fixup_guest_stack_selector(current->domain, ss);
     9.9  
    9.10      current->arch.guest_context.kernel_ss = ss;
    9.11      current->arch.guest_context.kernel_sp = esp;
    9.12 @@ -241,7 +241,7 @@ long do_stack_switch(unsigned long ss, u
    9.13  }
    9.14  
    9.15  /* Returns TRUE if given descriptor is valid for GDT or LDT. */
    9.16 -int check_descriptor(struct desc_struct *d)
    9.17 +int check_descriptor(const struct domain *dom, struct desc_struct *d)
    9.18  {
    9.19      unsigned long base, limit;
    9.20      u32 a = d->a, b = d->b;
    9.21 @@ -261,8 +261,8 @@ int check_descriptor(struct desc_struct 
    9.22       * gates (consider a call gate pointing at another kernel descriptor with 
    9.23       * DPL 0 -- this would get the OS ring-0 privileges).
    9.24       */
    9.25 -    if ( (b & _SEGMENT_DPL) < (GUEST_KERNEL_RPL << 13) )
    9.26 -        d->b = b = (b & ~_SEGMENT_DPL) | (GUEST_KERNEL_RPL << 13);
    9.27 +    if ( (b & _SEGMENT_DPL) < (GUEST_KERNEL_RPL(dom) << 13) )
    9.28 +        d->b = b = (b & ~_SEGMENT_DPL) | (GUEST_KERNEL_RPL(dom) << 13);
    9.29  
    9.30      if ( !(b & _SEGMENT_S) )
    9.31      {
    9.32 @@ -284,8 +284,8 @@ int check_descriptor(struct desc_struct 
    9.33  
    9.34          /* Validate and fix up the target code selector. */
    9.35          cs = a >> 16;
    9.36 -        fixup_guest_code_selector(cs);
    9.37 -        if ( !guest_gate_selector_okay(cs) )
    9.38 +        fixup_guest_code_selector(dom, cs);
    9.39 +        if ( !guest_gate_selector_okay(dom, cs) )
    9.40              goto bad;
    9.41          a = d->a = (d->a & 0xffffU) | (cs << 16);
    9.42  
    10.1 --- a/xen/arch/x86/x86_32/traps.c	Fri Jan 05 17:24:55 2007 +0000
    10.2 +++ b/xen/arch/x86/x86_32/traps.c	Fri Jan 05 17:32:00 2007 +0000
    10.3 @@ -296,7 +296,7 @@ void init_int80_direct_trap(struct vcpu 
    10.4       * switch to the Xen stack and we need to swap back to the guest
    10.5       * kernel stack before passing control to the system call entry point.
    10.6       */
    10.7 -    if ( TI_GET_IF(ti) || !guest_gate_selector_okay(ti->cs) ||
    10.8 +    if ( TI_GET_IF(ti) || !guest_gate_selector_okay(v->domain, ti->cs) ||
    10.9           supervisor_mode_kernel )
   10.10      {
   10.11          v->arch.int80_desc.a = v->arch.int80_desc.b = 0;
   10.12 @@ -326,7 +326,7 @@ static long register_guest_callback(stru
   10.13      long ret = 0;
   10.14      struct vcpu *v = current;
   10.15  
   10.16 -    fixup_guest_code_selector(reg->address.cs);
   10.17 +    fixup_guest_code_selector(v->domain, reg->address.cs);
   10.18  
   10.19      switch ( reg->type )
   10.20      {
    11.1 --- a/xen/arch/x86/x86_64/asm-offsets.c	Fri Jan 05 17:24:55 2007 +0000
    11.2 +++ b/xen/arch/x86/x86_64/asm-offsets.c	Fri Jan 05 17:32:00 2007 +0000
    11.3 @@ -58,12 +58,16 @@ void __dummy__(void)
    11.4      OFFSET(VCPU_thread_flags, struct vcpu, arch.flags);
    11.5      OFFSET(VCPU_event_addr, struct vcpu,
    11.6             arch.guest_context.event_callback_eip);
    11.7 +    OFFSET(VCPU_event_sel, struct vcpu,
    11.8 +           arch.guest_context.event_callback_cs);
    11.9      OFFSET(VCPU_failsafe_addr, struct vcpu,
   11.10             arch.guest_context.failsafe_callback_eip);
   11.11 +    OFFSET(VCPU_failsafe_sel, struct vcpu,
   11.12 +           arch.guest_context.failsafe_callback_cs);
   11.13      OFFSET(VCPU_syscall_addr, struct vcpu,
   11.14             arch.guest_context.syscall_callback_eip);
   11.15 -    OFFSET(VCPU_kernel_sp, struct vcpu,
   11.16 -           arch.guest_context.kernel_sp);
   11.17 +    OFFSET(VCPU_kernel_sp, struct vcpu, arch.guest_context.kernel_sp);
   11.18 +    OFFSET(VCPU_kernel_ss, struct vcpu, arch.guest_context.kernel_ss);
   11.19      OFFSET(VCPU_guest_context_flags, struct vcpu, arch.guest_context.flags);
   11.20      OFFSET(VCPU_arch_guest_fpu_ctxt, struct vcpu, arch.guest_context.fpu_ctxt);
   11.21      OFFSET(VCPU_flags, struct vcpu, vcpu_flags);
    12.1 --- a/xen/arch/x86/x86_64/mm.c	Fri Jan 05 17:24:55 2007 +0000
    12.2 +++ b/xen/arch/x86/x86_64/mm.c	Fri Jan 05 17:32:00 2007 +0000
    12.3 @@ -231,7 +231,7 @@ long subarch_memory_op(int op, XEN_GUEST
    12.4  
    12.5  long do_stack_switch(unsigned long ss, unsigned long esp)
    12.6  {
    12.7 -    fixup_guest_stack_selector(ss);
    12.8 +    fixup_guest_stack_selector(current->domain, ss);
    12.9      current->arch.guest_context.kernel_ss = ss;
   12.10      current->arch.guest_context.kernel_sp = esp;
   12.11      return 0;
   12.12 @@ -291,7 +291,7 @@ long do_set_segment_base(unsigned int wh
   12.13  
   12.14  
   12.15  /* Returns TRUE if given descriptor is valid for GDT or LDT. */
   12.16 -int check_descriptor(struct desc_struct *d)
   12.17 +int check_descriptor(const struct domain *dom, struct desc_struct *d)
   12.18  {
   12.19      u32 a = d->a, b = d->b;
   12.20      u16 cs;
   12.21 @@ -301,8 +301,8 @@ int check_descriptor(struct desc_struct 
   12.22          goto good;
   12.23  
   12.24      /* Check and fix up the DPL. */
   12.25 -    if ( (b & _SEGMENT_DPL) < (GUEST_KERNEL_RPL << 13) )
   12.26 -        d->b = b = (b & ~_SEGMENT_DPL) | (GUEST_KERNEL_RPL << 13);
   12.27 +    if ( (b & _SEGMENT_DPL) < (GUEST_KERNEL_RPL(dom) << 13) )
   12.28 +        d->b = b = (b & ~_SEGMENT_DPL) | (GUEST_KERNEL_RPL(dom) << 13);
   12.29  
   12.30      /* All code and data segments are okay. No base/limit checking. */
   12.31      if ( (b & _SEGMENT_S) )
   12.32 @@ -318,8 +318,8 @@ int check_descriptor(struct desc_struct 
   12.33  
   12.34      /* Validate and fix up the target code selector. */
   12.35      cs = a >> 16;
   12.36 -    fixup_guest_code_selector(cs);
   12.37 -    if ( !guest_gate_selector_okay(cs) )
   12.38 +    fixup_guest_code_selector(dom, cs);
   12.39 +    if ( !guest_gate_selector_okay(dom, cs) )
   12.40          goto bad;
   12.41      a = d->a = (d->a & 0xffffU) | (cs << 16);
   12.42  
    13.1 --- a/xen/arch/x86/x86_64/traps.c	Fri Jan 05 17:24:55 2007 +0000
    13.2 +++ b/xen/arch/x86/x86_64/traps.c	Fri Jan 05 17:32:00 2007 +0000
    13.3 @@ -178,6 +178,8 @@ asmlinkage void do_double_fault(struct c
    13.4  
    13.5  void toggle_guest_mode(struct vcpu *v)
    13.6  {
    13.7 +    if ( IS_COMPAT(v->domain) )
    13.8 +        return;
    13.9      v->arch.flags ^= TF_kernel_mode;
   13.10      __asm__ __volatile__ ( "swapgs" );
   13.11      update_cr3(v);
    14.1 --- a/xen/common/Makefile	Fri Jan 05 17:24:55 2007 +0000
    14.2 +++ b/xen/common/Makefile	Fri Jan 05 17:32:00 2007 +0000
    14.3 @@ -3,6 +3,7 @@ obj-y += bitmap.o
    14.4  obj-y += domctl.o
    14.5  obj-y += domain.o
    14.6  obj-y += elf.o
    14.7 +obj-$(CONFIG_COMPAT) += elf32.o
    14.8  obj-y += event_channel.o
    14.9  obj-y += grant_table.o
   14.10  obj-y += kernel.o
    15.1 --- a/xen/common/elf.c	Fri Jan 05 17:24:55 2007 +0000
    15.2 +++ b/xen/common/elf.c	Fri Jan 05 17:32:00 2007 +0000
    15.3 @@ -203,7 +203,7 @@ int parseelfimage(struct domain_setup_in
    15.4      int h, virt_base_defined, elf_pa_off_defined, virt_entry_defined;
    15.5  
    15.6      if ( !elf_sanity_check(ehdr) )
    15.7 -        return -EINVAL;
    15.8 +        return -ENOSYS;
    15.9  
   15.10      if ( (ehdr->e_phoff + (ehdr->e_phnum*ehdr->e_phentsize)) > image_len )
   15.11      {
    16.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    16.2 +++ b/xen/common/elf32.c	Fri Jan 05 17:32:00 2007 +0000
    16.3 @@ -0,0 +1,19 @@
    16.4 +/******************************************************************************
    16.5 + * elf32.c
    16.6 + *
    16.7 + * Stub to support 32-bit ELF images on 64-bit platforms.
    16.8 + */
    16.9 +
   16.10 +#include <xen/config.h>
   16.11 +#undef ELFSIZE
   16.12 +#define ELFSIZE 32
   16.13 +#include <xen/types.h>
   16.14 +#include <xen/elf.h>
   16.15 +
   16.16 +#define xen_elfnote_string xen_elf32note_string
   16.17 +#define xen_elfnote_numeric xen_elf32note_numeric
   16.18 +#define parseelfimage parseelf32image
   16.19 +#define loadelfimage loadelf32image
   16.20 +#define elf_sanity_check elf32_sanity_check
   16.21 +
   16.22 +#include "elf.c"
    17.1 --- a/xen/include/asm-x86/config.h	Fri Jan 05 17:24:55 2007 +0000
    17.2 +++ b/xen/include/asm-x86/config.h	Fri Jan 05 17:32:00 2007 +0000
    17.3 @@ -87,6 +87,7 @@
    17.4  #if defined(__x86_64__)
    17.5  
    17.6  #define CONFIG_X86_64 1
    17.7 +#define CONFIG_COMPAT 1
    17.8  
    17.9  #define asmlinkage
   17.10  
   17.11 @@ -181,13 +182,21 @@
   17.12  #define DIRECTMAP_VIRT_START    (PML4_ADDR(262))
   17.13  #define DIRECTMAP_VIRT_END      (DIRECTMAP_VIRT_START + PML4_ENTRY_BYTES*2)
   17.14  
   17.15 +#define __HYPERVISOR_COMPAT_VIRT_START 0xF5800000
   17.16 +#define HYPERVISOR_COMPAT_VIRT_START   \
   17.17 +    mk_unsigned_long(__HYPERVISOR_COMPAT_VIRT_START)
   17.18 +#define MACH2PHYS_COMPAT_VIRT_START    HYPERVISOR_COMPAT_VIRT_START
   17.19 +#define MACH2PHYS_COMPAT_VIRT_END      0xFFE00000
   17.20 +#define MACH2PHYS_COMPAT_NR_ENTRIES    \
   17.21 +    ((MACH2PHYS_COMPAT_VIRT_END-MACH2PHYS_COMPAT_VIRT_START)>>2)
   17.22 +
   17.23  #define PGT_base_page_table     PGT_l4_page_table
   17.24  
   17.25 -#define __HYPERVISOR_CS64 0xe010
   17.26 -#define __HYPERVISOR_CS32 0xe008
   17.27 +#define __HYPERVISOR_CS64 0xe008
   17.28 +#define __HYPERVISOR_CS32 0xe038
   17.29  #define __HYPERVISOR_CS   __HYPERVISOR_CS64
   17.30  #define __HYPERVISOR_DS64 0x0000
   17.31 -#define __HYPERVISOR_DS32 0xe018
   17.32 +#define __HYPERVISOR_DS32 0xe010
   17.33  #define __HYPERVISOR_DS   __HYPERVISOR_DS64
   17.34  
   17.35  /* For generic assembly code: use macros to define operation/operand sizes. */
    18.1 --- a/xen/include/asm-x86/desc.h	Fri Jan 05 17:24:55 2007 +0000
    18.2 +++ b/xen/include/asm-x86/desc.h	Fri Jan 05 17:32:00 2007 +0000
    18.3 @@ -18,31 +18,76 @@
    18.4  
    18.5  #define LDT_ENTRY_SIZE 8
    18.6  
    18.7 +#if defined(__x86_64__)
    18.8 +
    18.9 +#define FLAT_COMPAT_RING1_CS 0xe019  /* GDT index 259 */
   18.10 +#define FLAT_COMPAT_RING1_DS 0xe021  /* GDT index 260 */
   18.11 +#define FLAT_COMPAT_RING1_SS 0xe021  /* GDT index 260 */
   18.12 +#define FLAT_COMPAT_RING3_CS 0xe02b  /* GDT index 261 */
   18.13 +#define FLAT_COMPAT_RING3_DS 0xe033  /* GDT index 262 */
   18.14 +#define FLAT_COMPAT_RING3_SS 0xe033  /* GDT index 262 */
   18.15 +
   18.16 +#define FLAT_COMPAT_KERNEL_DS FLAT_COMPAT_RING1_DS
   18.17 +#define FLAT_COMPAT_KERNEL_CS FLAT_COMPAT_RING1_CS
   18.18 +#define FLAT_COMPAT_KERNEL_SS FLAT_COMPAT_RING1_SS
   18.19 +#define FLAT_COMPAT_USER_DS   FLAT_COMPAT_RING3_DS
   18.20 +#define FLAT_COMPAT_USER_CS   FLAT_COMPAT_RING3_CS
   18.21 +#define FLAT_COMPAT_USER_SS   FLAT_COMPAT_RING3_SS
   18.22 +
   18.23 +#define __FIRST_TSS_ENTRY (FIRST_RESERVED_GDT_ENTRY + 8)
   18.24 +#define __FIRST_LDT_ENTRY (__FIRST_TSS_ENTRY + 2)
   18.25 +
   18.26 +#define __TSS(n) (((n)<<2) + __FIRST_TSS_ENTRY)
   18.27 +#define __LDT(n) (((n)<<2) + __FIRST_LDT_ENTRY)
   18.28 +
   18.29 +#elif defined(__i386__)
   18.30 +
   18.31 +#define FLAT_COMPAT_KERNEL_CS FLAT_KERNEL_CS
   18.32 +#define FLAT_COMPAT_KERNEL_DS FLAT_KERNEL_DS
   18.33 +#define FLAT_COMPAT_KERNEL_SS FLAT_KERNEL_SS
   18.34 +#define FLAT_COMPAT_USER_CS   FLAT_USER_CS
   18.35 +#define FLAT_COMPAT_USER_DS   FLAT_USER_DS
   18.36 +#define FLAT_COMPAT_USER_SS   FLAT_USER_SS
   18.37 +
   18.38 +#define __DOUBLEFAULT_TSS_ENTRY FIRST_RESERVED_GDT_ENTRY
   18.39 +
   18.40 +#define __FIRST_TSS_ENTRY (FIRST_RESERVED_GDT_ENTRY + 8)
   18.41 +#define __FIRST_LDT_ENTRY (__FIRST_TSS_ENTRY + 1)
   18.42 +
   18.43 +#define __TSS(n) (((n)<<1) + __FIRST_TSS_ENTRY)
   18.44 +#define __LDT(n) (((n)<<1) + __FIRST_LDT_ENTRY)
   18.45 +
   18.46 +#endif
   18.47 +
   18.48 +#ifndef __ASSEMBLY__
   18.49 +
   18.50  #define load_TR(n)  __asm__ __volatile__ ("ltr  %%ax" : : "a" (__TSS(n)<<3) )
   18.51  
   18.52  #if defined(__x86_64__)
   18.53 -#define GUEST_KERNEL_RPL 3
   18.54 +#define GUEST_KERNEL_RPL(d) (!IS_COMPAT(d) ? 3 : 1)
   18.55  #elif defined(__i386__)
   18.56 -#define GUEST_KERNEL_RPL 1
   18.57 +#define GUEST_KERNEL_RPL(d) ((void)(d), 1)
   18.58  #endif
   18.59  
   18.60  /* Fix up the RPL of a guest segment selector. */
   18.61 -#define __fixup_guest_selector(sel)                             \
   18.62 -    ((sel) = (((sel) & 3) >= GUEST_KERNEL_RPL) ? (sel) :        \
   18.63 -     (((sel) & ~3) | GUEST_KERNEL_RPL))
   18.64 +#define __fixup_guest_selector(d, sel)                             \
   18.65 +({                                                                 \
   18.66 +    uint16_t _rpl = GUEST_KERNEL_RPL(d);                           \
   18.67 +    (sel) = (((sel) & 3) >= _rpl) ? (sel) : (((sel) & ~3) | _rpl); \
   18.68 +})
   18.69  
   18.70  /* Stack selectors don't need fixing up if the kernel runs in ring 0. */
   18.71  #ifdef CONFIG_X86_SUPERVISOR_MODE_KERNEL
   18.72 -#define fixup_guest_stack_selector(ss) ((void)0)
   18.73 +#define fixup_guest_stack_selector(d, ss) ((void)0)
   18.74  #else
   18.75 -#define fixup_guest_stack_selector(ss) __fixup_guest_selector(ss)
   18.76 +#define fixup_guest_stack_selector(d, ss) __fixup_guest_selector(d, ss)
   18.77  #endif
   18.78  
   18.79  /*
   18.80   * Code selectors are always fixed up. It allows the Xen exit stub to detect
   18.81   * return to guest context, even when the guest kernel runs in ring 0.
   18.82   */
   18.83 -#define fixup_guest_code_selector(cs)  __fixup_guest_selector(cs)
   18.84 +#define fixup_guest_code_selector(d, cs)  __fixup_guest_selector(d, cs)
   18.85  
   18.86  /*
   18.87   * We need this function because enforcing the correct guest kernel RPL is
   18.88 @@ -57,11 +102,15 @@
   18.89   * DPL < CPL then they'll be cleared automatically. If SS RPL or DPL differs
   18.90   * from CS RPL then we'll #GP.
   18.91   */
   18.92 -#define guest_gate_selector_okay(sel)                                   \
   18.93 +#define guest_gate_selector_okay(d, sel)                                \
   18.94      ((((sel)>>3) < FIRST_RESERVED_GDT_ENTRY) || /* Guest seg? */        \
   18.95 -     ((sel) == FLAT_KERNEL_CS) ||               /* Xen default seg? */  \
   18.96 +     ((sel) == (!IS_COMPAT(d) ?                                         \
   18.97 +                FLAT_KERNEL_CS :                /* Xen default seg? */  \
   18.98 +                FLAT_COMPAT_KERNEL_CS)) ||      /* Xen default compat seg? */  \
   18.99       ((sel) & 4))                               /* LDT seg? */
  18.100  
  18.101 +#endif /* __ASSEMBLY__ */
  18.102 +
  18.103  /* These are bitmasks for the high 32 bits of a descriptor table entry. */
  18.104  #define _SEGMENT_TYPE    (15<< 8)
  18.105  #define _SEGMENT_EC      ( 1<<10) /* Expand-down or Conforming segment */
  18.106 @@ -81,12 +130,6 @@ struct desc_struct {
  18.107  
  18.108  #if defined(__x86_64__)
  18.109  
  18.110 -#define __FIRST_TSS_ENTRY (FIRST_RESERVED_GDT_ENTRY + 8)
  18.111 -#define __FIRST_LDT_ENTRY (__FIRST_TSS_ENTRY + 2)
  18.112 -
  18.113 -#define __TSS(n) (((n)<<2) + __FIRST_TSS_ENTRY)
  18.114 -#define __LDT(n) (((n)<<2) + __FIRST_LDT_ENTRY)
  18.115 -
  18.116  typedef struct {
  18.117      u64 a, b;
  18.118  } idt_entry_t;
  18.119 @@ -118,14 +161,6 @@ do {                                    
  18.120  
  18.121  #elif defined(__i386__)
  18.122  
  18.123 -#define __DOUBLEFAULT_TSS_ENTRY FIRST_RESERVED_GDT_ENTRY
  18.124 -
  18.125 -#define __FIRST_TSS_ENTRY (FIRST_RESERVED_GDT_ENTRY + 8)
  18.126 -#define __FIRST_LDT_ENTRY (__FIRST_TSS_ENTRY + 1)
  18.127 -
  18.128 -#define __TSS(n) (((n)<<1) + __FIRST_TSS_ENTRY)
  18.129 -#define __LDT(n) (((n)<<1) + __FIRST_LDT_ENTRY)
  18.130 -
  18.131  typedef struct desc_struct idt_entry_t;
  18.132  
  18.133  #define _set_gate(gate_addr,type,dpl,addr) \
  18.134 @@ -155,6 +190,11 @@ do { \
  18.135  #endif
  18.136  
  18.137  extern struct desc_struct gdt_table[];
  18.138 +#ifdef CONFIG_COMPAT
  18.139 +extern struct desc_struct compat_gdt_table[];
  18.140 +#else
  18.141 +# define compat_gdt_table gdt_table
  18.142 +#endif
  18.143  
  18.144  struct Xgt_desc_struct {
  18.145      unsigned short size;
    19.1 --- a/xen/include/asm-x86/ldt.h	Fri Jan 05 17:24:55 2007 +0000
    19.2 +++ b/xen/include/asm-x86/ldt.h	Fri Jan 05 17:32:00 2007 +0000
    19.3 @@ -17,7 +17,8 @@ static inline void load_LDT(struct vcpu 
    19.4      else
    19.5      {
    19.6          cpu = smp_processor_id();
    19.7 -        desc = gdt_table + __LDT(cpu) - FIRST_RESERVED_GDT_ENTRY;
    19.8 +        desc = (!IS_COMPAT(v->domain) ? gdt_table : compat_gdt_table)
    19.9 +               + __LDT(cpu) - FIRST_RESERVED_GDT_ENTRY;
   19.10          _set_tssldt_desc(desc, LDT_VIRT_START(v), ents*8-1, 2);
   19.11          __asm__ __volatile__ ( "lldt %%ax" : : "a" (__LDT(cpu)<<3) );
   19.12      }
    20.1 --- a/xen/include/asm-x86/mm.h	Fri Jan 05 17:24:55 2007 +0000
    20.2 +++ b/xen/include/asm-x86/mm.h	Fri Jan 05 17:32:00 2007 +0000
    20.3 @@ -244,7 +244,7 @@ unsigned long
    20.4  pae_copy_root(struct vcpu *v, l3_pgentry_t *l3tab);
    20.5  #endif /* CONFIG_PAGING_LEVELS == 3 */
    20.6  
    20.7 -int check_descriptor(struct desc_struct *d);
    20.8 +int check_descriptor(const struct domain *, struct desc_struct *d);
    20.9  
   20.10  /*
   20.11   * The MPT (machine->physical mapping table) is an array of word-sized
    21.1 --- a/xen/include/asm-x86/regs.h	Fri Jan 05 17:24:55 2007 +0000
    21.2 +++ b/xen/include/asm-x86/regs.h	Fri Jan 05 17:32:00 2007 +0000
    21.3 @@ -38,7 +38,8 @@ enum EFLAGS {
    21.4      ASSERT(diff < STACK_SIZE);                                                \
    21.5      /* If a guest frame, it must be have guest privs (unless HVM guest).   */ \
    21.6      /* We permit CS==0 which can come from an uninitialised trap entry. */    \
    21.7 -    ASSERT((diff != 0) || vm86_mode(r) || ((r->cs&3) >= GUEST_KERNEL_RPL) ||  \
    21.8 +    ASSERT((diff != 0) || vm86_mode(r) ||                                     \
    21.9 +           ((r->cs&3) >= GUEST_KERNEL_RPL(current->domain)) ||                \
   21.10             (r->cs == 0) || is_hvm_vcpu(current));                             \
   21.11      /* If not a guest frame, it must be a hypervisor frame. */                \
   21.12      ASSERT((diff == 0) || (!vm86_mode(r) && (r->cs == __HYPERVISOR_CS)));     \
    22.1 --- a/xen/include/asm-x86/x86_32/regs.h	Fri Jan 05 17:24:55 2007 +0000
    22.2 +++ b/xen/include/asm-x86/x86_32/regs.h	Fri Jan 05 17:32:00 2007 +0000
    22.3 @@ -17,7 +17,7 @@
    22.4      ((dpl) >= (vm86_mode(r) ? 3 : ((r)->cs & 3)))
    22.5  
    22.6  /* Check for null trap callback handler: Is the selector null (0-3)? */
    22.7 -#define null_trap_bounce(tb) (((tb)->cs & ~3) == 0)
    22.8 +#define null_trap_bounce(v, tb) (((tb)->cs & ~3) == 0)
    22.9  
   22.10  /* Number of bytes of on-stack execution state to be context-switched. */
   22.11  #define CTXT_SWITCH_STACK_BYTES (sizeof(struct cpu_user_regs))
    23.1 --- a/xen/include/asm-x86/x86_64/regs.h	Fri Jan 05 17:24:55 2007 +0000
    23.2 +++ b/xen/include/asm-x86/x86_64/regs.h	Fri Jan 05 17:32:00 2007 +0000
    23.3 @@ -11,13 +11,16 @@
    23.4  #define ring_3(r)    (((r)->cs & 3) == 3)
    23.5  
    23.6  #define guest_kernel_mode(v, r)   \
    23.7 -    (ring_3(r) && ((v)->arch.flags & TF_kernel_mode))
    23.8 +    (!IS_COMPAT((v)->domain) ? \
    23.9 +     ring_3(r) && ((v)->arch.flags & TF_kernel_mode) : \
   23.10 +     ring_1(r))
   23.11  
   23.12  #define permit_softint(dpl, v, r) \
   23.13      ((dpl) >= (guest_kernel_mode(v, r) ? 1 : 3))
   23.14  
   23.15  /* Check for null trap callback handler: Is the EIP null? */
   23.16 -#define null_trap_bounce(tb) ((tb)->eip == 0)
   23.17 +#define null_trap_bounce(v, tb) \
   23.18 +    (!IS_COMPAT((v)->domain) ? (tb)->eip == 0 : ((tb)->cs & ~3) == 0)
   23.19  
   23.20  /* Number of bytes of on-stack execution state to be context-switched. */
   23.21  /* NB. Segment registers and bases are not saved/restored on x86/64 stack. */
    24.1 --- a/xen/include/public/arch-x86/xen-x86_64.h	Fri Jan 05 17:24:55 2007 +0000
    24.2 +++ b/xen/include/public/arch-x86/xen-x86_64.h	Fri Jan 05 17:32:00 2007 +0000
    24.3 @@ -141,7 +141,10 @@ struct iret_context {
    24.4  
    24.5  #ifdef __GNUC__
    24.6  /* Anonymous union includes both 32- and 64-bit names (e.g., eax/rax). */
    24.7 -#define __DECL_REG(name) union { uint64_t r ## name, e ## name; }
    24.8 +#define __DECL_REG(name) union { \
    24.9 +    uint64_t r ## name, e ## name; \
   24.10 +    uint32_t _e ## name; \
   24.11 +}
   24.12  #else
   24.13  /* Non-gcc sources must always use the proper 64-bit name (e.g., rax). */
   24.14  #define __DECL_REG(name) uint64_t r ## name
    25.1 --- a/xen/include/public/arch-x86/xen.h	Fri Jan 05 17:24:55 2007 +0000
    25.2 +++ b/xen/include/public/arch-x86/xen.h	Fri Jan 05 17:32:00 2007 +0000
    25.3 @@ -141,8 +141,18 @@ struct vcpu_guest_context {
    25.4  #else
    25.5      unsigned long event_callback_eip;
    25.6      unsigned long failsafe_callback_eip;
    25.7 +#ifdef __XEN__
    25.8 +    union {
    25.9 +        unsigned long syscall_callback_eip;
   25.10 +        struct {
   25.11 +            unsigned int event_callback_cs;    /* compat CS of event cb     */
   25.12 +            unsigned int failsafe_callback_cs; /* compat CS of failsafe cb  */
   25.13 +        };
   25.14 +    };
   25.15 +#else
   25.16      unsigned long syscall_callback_eip;
   25.17  #endif
   25.18 +#endif
   25.19      unsigned long vm_assist;                /* VMASST_TYPE_* bitmap */
   25.20  #ifdef __x86_64__
   25.21      /* Segment base addresses. */
    26.1 --- a/xen/include/xen/elf.h	Fri Jan 05 17:24:55 2007 +0000
    26.2 +++ b/xen/include/xen/elf.h	Fri Jan 05 17:32:00 2007 +0000
    26.3 @@ -525,6 +525,15 @@ extern unsigned long long xen_elfnote_nu
    26.4  					      int type, int *defined);
    26.5  extern const char *xen_elfnote_string(struct domain_setup_info *dsi, int type);
    26.6  
    26.7 +#ifdef CONFIG_COMPAT
    26.8 +extern int elf32_sanity_check(const Elf32_Ehdr *ehdr);
    26.9 +extern int loadelf32image(struct domain_setup_info *);
   26.10 +extern int parseelf32image(struct domain_setup_info *);
   26.11 +extern unsigned long long xen_elf32note_numeric(struct domain_setup_info *,
   26.12 +						int type, int *defined);
   26.13 +extern const char *xen_elf32note_string(struct domain_setup_info *, int type);
   26.14 +#endif
   26.15 +
   26.16  #ifdef Elf_Ehdr
   26.17  extern int elf_sanity_check(const Elf_Ehdr *ehdr);
   26.18  #endif
    27.1 --- a/xen/include/xen/sched.h	Fri Jan 05 17:24:55 2007 +0000
    27.2 +++ b/xen/include/xen/sched.h	Fri Jan 05 17:32:00 2007 +0000
    27.3 @@ -422,6 +422,9 @@ extern struct domain *domain_list;
    27.4   /* Domain is paused by the hypervisor? */
    27.5  #define _DOMF_paused           5
    27.6  #define DOMF_paused            (1UL<<_DOMF_paused)
    27.7 + /* Domain is a compatibility one? */
    27.8 +#define _DOMF_compat           6
    27.9 +#define DOMF_compat            (1UL<<_DOMF_compat)
   27.10  
   27.11  static inline int vcpu_runnable(struct vcpu *v)
   27.12  {
   27.13 @@ -458,6 +461,13 @@ static inline void vcpu_unblock(struct v
   27.14  
   27.15  #define IS_PRIV(_d) ((_d)->is_privileged)
   27.16  
   27.17 +#ifdef CONFIG_COMPAT
   27.18 +#define IS_COMPAT(_d)                                       \
   27.19 +    (test_bit(_DOMF_compat, &(_d)->domain_flags))
   27.20 +#else
   27.21 +#define IS_COMPAT(_d) 0
   27.22 +#endif
   27.23 +
   27.24  #define VM_ASSIST(_d,_t) (test_bit((_t), &(_d)->vm_assist))
   27.25  
   27.26  #define is_hvm_domain(d) ((d)->is_hvm)