ia64/xen-unstable

changeset 362:8d23d3ad0f20

bitkeeper revision 1.165.1.1 (3e9c3ccaCJe7Z8jxplsENPEQ5oFIFw)

Many files:
Partial checkin of virtualised LDT support.
author kaf24@scramble.cl.cam.ac.uk
date Tue Apr 15 17:09:30 2003 +0000 (2003-04-15)
parents c9a8a90c8a96
children 2eb189eacf01
files xen/arch/i386/mm.c xen/arch/i386/process.c xen/arch/i386/traps.c xen/common/memory.c xen/include/asm-i386/desc.h xen/include/asm-i386/system.h xen/include/xeno/config.h xen/include/xeno/mm.h xen/include/xeno/sched.h xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/entry.S xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/head.S xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/ldt.c xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/process.c xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/setup.c xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/traps.c xenolinux-2.4.21-pre4-sparse/arch/xeno/mm/hypervisor.c xenolinux-2.4.21-pre4-sparse/include/asm-xeno/desc.h xenolinux-2.4.21-pre4-sparse/include/asm-xeno/hypervisor.h xenolinux-2.4.21-pre4-sparse/include/asm-xeno/mmu_context.h xenolinux-2.4.21-pre4-sparse/include/asm-xeno/pgalloc.h xenolinux-2.4.21-pre4-sparse/include/asm-xeno/pgtable.h xenolinux-2.4.21-pre4-sparse/include/asm-xeno/processor.h
line diff
     1.1 --- a/xen/arch/i386/mm.c	Mon Apr 14 16:31:59 2003 +0000
     1.2 +++ b/xen/arch/i386/mm.c	Tue Apr 15 17:09:30 2003 +0000
     1.3 @@ -116,12 +116,12 @@ long do_stack_switch(unsigned long ss, u
     1.4  
     1.5  
     1.6  /* Returns TRUE if given descriptor is valid for GDT or LDT. */
     1.7 -static int check_descriptor(unsigned long a, unsigned long b)
     1.8 +int check_descriptor(unsigned long a, unsigned long b)
     1.9  {
    1.10      unsigned long base, limit;
    1.11  
    1.12      /* A not-present descriptor will always fault, so is safe. */
    1.13 -    if ( !(a & _SEGMENT_P) ) 
    1.14 +    if ( !(b & _SEGMENT_P) ) 
    1.15          goto good;
    1.16  
    1.17      /*
    1.18 @@ -130,10 +130,10 @@ static int check_descriptor(unsigned lon
    1.19       * gates (consider a call gate pointing at another guestos descriptor with 
    1.20       * DPL 0 -- this would get the OS ring-0 privileges).
    1.21       */
    1.22 -    if ( (a & _SEGMENT_DPL) == 0 )
    1.23 +    if ( (b & _SEGMENT_DPL) == 0 )
    1.24          goto bad;
    1.25  
    1.26 -    if ( !(a & _SEGMENT_S) )
    1.27 +    if ( !(b & _SEGMENT_S) )
    1.28      {
    1.29          /*
    1.30           * System segment:
    1.31 @@ -148,15 +148,15 @@ static int check_descriptor(unsigned lon
    1.32           */
    1.33  
    1.34          /* Disallow everything but call gates. */
    1.35 -        if ( (a & _SEGMENT_TYPE) != 0xc00 )
    1.36 +        if ( (b & _SEGMENT_TYPE) != 0xc00 )
    1.37              goto bad;
    1.38  
    1.39          /* Can't allow far jump to a Xen-private segment. */
    1.40 -        if ( !VALID_CODESEL(b>>16) )
    1.41 +        if ( !VALID_CODESEL(a>>16) )
    1.42              goto bad;
    1.43  
    1.44          /* Reserved bits must be zero. */
    1.45 -        if ( (a & 0xe0) != 0 )
    1.46 +        if ( (b & 0xe0) != 0 )
    1.47              goto bad;
    1.48          
    1.49          /* No base/limit check is needed for a call gate. */
    1.50 @@ -164,10 +164,10 @@ static int check_descriptor(unsigned lon
    1.51      }
    1.52      
    1.53      /* Check that base/limit do not overlap Xen-private space. */
    1.54 -    base  = (a&(0xff<<24)) | ((a&0xff)<<16) | (b>>16);
    1.55 -    limit = (a&0xf0000) | (b&0xffff);
    1.56 +    base  = (b&(0xff<<24)) | ((b&0xff)<<16) | (a>>16);
    1.57 +    limit = (b&0xf0000) | (a&0xffff);
    1.58      limit++; /* We add one because limit is inclusive. */
    1.59 -    if ( (a & _SEGMENT_G) )
    1.60 +    if ( (b & _SEGMENT_G) )
    1.61          limit <<= 12;
    1.62      if ( ((base + limit) <= base) || 
    1.63           ((base + limit) >= PAGE_OFFSET) )
    1.64 @@ -214,7 +214,7 @@ long do_set_gdt(unsigned long *frame_lis
    1.65              /* Check all potential GDT entries in the page. */
    1.66              gdt_page = map_domain_mem(frames[0] << PAGE_SHIFT);
    1.67              for ( i = 0; i < 512; i++ )
    1.68 -                if ( !check_descriptor(gdt_page[i*2], gdt_page[i*2]+1) )
    1.69 +                if ( !check_descriptor(gdt_page[i*2], gdt_page[i*2+1]) )
    1.70                      goto out;
    1.71              unmap_domain_mem(gdt_page);
    1.72          }
    1.73 @@ -247,9 +247,9 @@ long do_set_gdt(unsigned long *frame_lis
    1.74      flush_tlb();
    1.75  
    1.76      /* Copy over first entries of the new GDT. */
    1.77 -    memcpy((void *)PERDOMAIN_VIRT_START, gdt_table, FIRST_DOMAIN_GDT_ENTRY*8);
    1.78 +    memcpy((void *)GDT_VIRT_START, gdt_table, FIRST_DOMAIN_GDT_ENTRY*8);
    1.79      
    1.80 -    SET_GDT_ADDRESS(current, PERDOMAIN_VIRT_START);
    1.81 +    SET_GDT_ADDRESS(current, GDT_VIRT_START);
    1.82      SET_GDT_ENTRIES(current, (entries*8)-1);
    1.83      __asm__ __volatile__ ("lgdt %0" : "=m" (*current->mm.gdt));
    1.84  
     2.1 --- a/xen/arch/i386/process.c	Mon Apr 14 16:31:59 2003 +0000
     2.2 +++ b/xen/arch/i386/process.c	Tue Apr 15 17:09:30 2003 +0000
     2.3 @@ -216,25 +216,6 @@ void show_regs(struct pt_regs * regs)
     2.4      show_trace(&regs->esp);
     2.5  }
     2.6  
     2.7 -/*
     2.8 - * No need to lock the MM as we are the last user
     2.9 - */
    2.10 -void release_segments(struct mm_struct *mm)
    2.11 -{
    2.12 -#if 0
    2.13 -    void * ldt = mm.context.segments;
    2.14 -
    2.15 -    /*
    2.16 -     * free the LDT
    2.17 -     */
    2.18 -    if (ldt) {
    2.19 -        mm.context.segments = NULL;
    2.20 -        clear_LDT();
    2.21 -        vfree(ldt);
    2.22 -    }
    2.23 -#endif
    2.24 -}
    2.25 -
    2.26  
    2.27  /*
    2.28   * Free current thread data structures etc..
    2.29 @@ -258,48 +239,8 @@ void flush_thread(void)
    2.30  
    2.31  void release_thread(struct task_struct *dead_task)
    2.32  {
    2.33 -#if 0
    2.34 -    if (dead_task->mm) {
    2.35 -        void * ldt = dead_task->mm.context.segments;
    2.36 -
    2.37 -        // temporary debugging check
    2.38 -        if (ldt) {
    2.39 -            printk("WARNING: dead process %8s still has LDT? <%p>\n",
    2.40 -                   dead_task->comm, ldt);
    2.41 -            BUG();
    2.42 -        }
    2.43 -    }
    2.44 -#endif
    2.45  }
    2.46  
    2.47 -/*
    2.48 - * we do not have to muck with descriptors here, that is
    2.49 - * done in switch_mm() as needed.
    2.50 - */
    2.51 -void copy_segments(struct task_struct *p, struct mm_struct *new_mm)
    2.52 -{
    2.53 -#if 0
    2.54 -    struct mm_struct * old_mm;
    2.55 -    void *old_ldt, *ldt;
    2.56 -
    2.57 -    ldt = NULL;
    2.58 -    old_mm = current->mm;
    2.59 -    if (old_mm && (old_ldt = old_mm.context.segments) != NULL) {
    2.60 -        /*
    2.61 -         * Completely new LDT, we initialize it from the parent:
    2.62 -         */
    2.63 -        ldt = vmalloc(LDT_ENTRIES*LDT_ENTRY_SIZE);
    2.64 -        if (!ldt)
    2.65 -            printk(KERN_WARNING "ldt allocation failed\n");
    2.66 -        else
    2.67 -            memcpy(ldt, old_ldt, LDT_ENTRIES*LDT_ENTRY_SIZE);
    2.68 -    }
    2.69 -    new_mm.context.segments = ldt;
    2.70 -    new_mm.context.cpuvalid = ~0UL;	/* valid on all CPU's - they can't have stale data */
    2.71 -#endif
    2.72 -}
    2.73 -
    2.74 -
    2.75  void new_thread(struct task_struct *p,
    2.76                  unsigned long start_pc,
    2.77                  unsigned long start_stack,
    2.78 @@ -395,7 +336,7 @@ void __switch_to(struct task_struct *pre
    2.79  
    2.80      /* Switch GDT and LDT. */
    2.81      __asm__ __volatile__ ("lgdt %0" : "=m" (*next_p->mm.gdt));
    2.82 -//    __load_LDT(0);
    2.83 +    load_LDT();
    2.84  
    2.85      /*
    2.86       * Restore %fs and %gs.
     3.1 --- a/xen/arch/i386/traps.c	Mon Apr 14 16:31:59 2003 +0000
     3.2 +++ b/xen/arch/i386/traps.c	Tue Apr 15 17:09:30 2003 +0000
     3.3 @@ -17,7 +17,7 @@
     3.4  #include <xeno/delay.h>
     3.5  #include <xeno/spinlock.h>
     3.6  #include <xeno/irq.h>
     3.7 -
     3.8 +#include <asm/domain_page.h>
     3.9  #include <asm/system.h>
    3.10  #include <asm/io.h>
    3.11  #include <asm/atomic.h>
    3.12 @@ -188,22 +188,13 @@ static void inline do_trap(int trapnr, c
    3.13  {
    3.14      struct guest_trap_bounce *gtb = guest_trap_bounce+smp_processor_id();
    3.15      trap_info_t *ti;
    3.16 -    unsigned long addr, fixup;
    3.17 +    unsigned long fixup;
    3.18  
    3.19      if (!(regs->xcs & 3))
    3.20          goto fault_in_hypervisor;
    3.21  
    3.22      ti = current->thread.traps + trapnr;
    3.23 -    if ( trapnr == 14 )
    3.24 -    {
    3.25 -        /* page fault pushes %cr2 */
    3.26 -        gtb->flags = GTBF_TRAP_CR2;
    3.27 -        __asm__ __volatile__ ("movl %%cr2,%0" : "=r" (gtb->cr2) : );
    3.28 -    }
    3.29 -    else
    3.30 -    {
    3.31 -        gtb->flags = use_error_code ? GTBF_TRAP : GTBF_TRAP_NOCODE;
    3.32 -    }
    3.33 +    gtb->flags = use_error_code ? GTBF_TRAP : GTBF_TRAP_NOCODE;
    3.34      gtb->error_code = error_code;
    3.35      gtb->cs         = ti->cs;
    3.36      gtb->eip        = ti->address;
    3.37 @@ -217,29 +208,10 @@ static void inline do_trap(int trapnr, c
    3.38          return;
    3.39      }
    3.40  
    3.41 -    __asm__ __volatile__ ("movl %%cr2,%0" : "=r" (addr) : );
    3.42 -
    3.43 -    if ( (trapnr == 14) && (addr >= PAGE_OFFSET) )
    3.44 -    {
    3.45 -        unsigned long page;
    3.46 -        unsigned long *pde;
    3.47 -        pde = (unsigned long *)idle_pg_table[smp_processor_id()];
    3.48 -        page = pde[addr >> L2_PAGETABLE_SHIFT];
    3.49 -        printk("*pde = %08lx\n", page);
    3.50 -        if ( page & _PAGE_PRESENT )
    3.51 -        {
    3.52 -            page &= PAGE_MASK;
    3.53 -            page = ((unsigned long *) __va(page))[(addr&0x3ff000)>>PAGE_SHIFT];
    3.54 -            printk(" *pte = %08lx\n", page);
    3.55 -        }
    3.56 -    }
    3.57 -
    3.58      show_registers(regs);
    3.59      panic("CPU%d FATAL TRAP: vector = %d (%s)\n"
    3.60 -          "[error_code=%08x]\n"
    3.61 -          "Faulting linear address might be %08lx\n",
    3.62 -          smp_processor_id(), trapnr, str,
    3.63 -          error_code, addr);
    3.64 +          "[error_code=%08x]\n",
    3.65 +          smp_processor_id(), trapnr, str, error_code);
    3.66  }
    3.67  
    3.68  #define DO_ERROR_NOCODE(trapnr, str, name) \
    3.69 @@ -265,14 +237,134 @@ DO_ERROR_NOCODE( 9, "coprocessor segment
    3.70  DO_ERROR(10, "invalid TSS", invalid_TSS)
    3.71  DO_ERROR(11, "segment not present", segment_not_present)
    3.72  DO_ERROR(12, "stack segment", stack_segment)
    3.73 -DO_ERROR(14, "page fault", page_fault)
    3.74  /* Vector 15 reserved by Intel */
    3.75  DO_ERROR_NOCODE(16, "fpu error", coprocessor_error)
    3.76  DO_ERROR(17, "alignment check", alignment_check)
    3.77  DO_ERROR_NOCODE(18, "machine check", machine_check)
    3.78  DO_ERROR_NOCODE(19, "simd error", simd_coprocessor_error)
    3.79  
    3.80 -asmlinkage void do_general_protection(struct pt_regs * regs, long error_code)
    3.81 +asmlinkage void do_page_fault(struct pt_regs *regs, long error_code)
    3.82 +{
    3.83 +    struct guest_trap_bounce *gtb = guest_trap_bounce+smp_processor_id();
    3.84 +    trap_info_t *ti;
    3.85 +    l2_pgentry_t *pl2e;
    3.86 +    l1_pgentry_t *pl1e;
    3.87 +    unsigned long addr, off, fixup, l2e, l1e, *ldt_page;
    3.88 +    struct task_struct *p = current;
    3.89 +    struct pfn_info *page;
    3.90 +    int i;
    3.91 +
    3.92 +    __asm__ __volatile__ ("movl %%cr2,%0" : "=r" (addr) : );
    3.93 +
    3.94 +    if ( unlikely(!(regs->xcs & 3)) )
    3.95 +        goto fault_in_hypervisor;
    3.96 +
    3.97 +    if ( unlikely(addr > PAGE_OFFSET) )
    3.98 +        goto fault_in_xen_space;
    3.99 +
   3.100 + bounce_fault:
   3.101 +
   3.102 +    if ( (regs->xcs &3) == 1 )
   3.103 +        printk("Fault at %08x (%08x)\n", addr, regs->eip); /* XXX */
   3.104 +
   3.105 +    ti = p->thread.traps + 14;
   3.106 +    gtb->flags = GTBF_TRAP_CR2; /* page fault pushes %cr2 */
   3.107 +    gtb->cr2        = addr;
   3.108 +    gtb->error_code = error_code;
   3.109 +    gtb->cs         = ti->cs;
   3.110 +    gtb->eip        = ti->address;
   3.111 +    return; 
   3.112 +
   3.113 +
   3.114 + fault_in_xen_space:
   3.115 +
   3.116 +    if ( (addr < LDT_VIRT_START) || 
   3.117 +         (addr >= (LDT_VIRT_START + (p->mm.ldt_ents*LDT_ENTRY_SIZE))) )
   3.118 +        goto bounce_fault;
   3.119 +
   3.120 +    off  = addr - LDT_VIRT_START;
   3.121 +    addr = p->mm.ldt_base + off;
   3.122 +
   3.123 +    spin_lock_irq(&p->page_lock);
   3.124 +
   3.125 +    pl2e  = map_domain_mem(pagetable_val(p->mm.pagetable));
   3.126 +    l2e   = l2_pgentry_val(pl2e[l2_table_offset(addr)]);
   3.127 +    unmap_domain_mem(pl2e);
   3.128 +    if ( !(l2e & _PAGE_PRESENT) )
   3.129 +        goto unlock_and_bounce_fault;
   3.130 +
   3.131 +    pl1e  = map_domain_mem(l2e & PAGE_MASK);
   3.132 +    l1e   = l1_pgentry_val(pl1e[l1_table_offset(addr)]);
   3.133 +    unmap_domain_mem(pl1e);
   3.134 +    if ( !(l1e & _PAGE_PRESENT) )
   3.135 +        goto unlock_and_bounce_fault;
   3.136 +
   3.137 +    page = frame_table + (l1e >> PAGE_SHIFT);
   3.138 +    if ( (page->flags & PG_type_mask) != PGT_ldt_page )
   3.139 +    {
   3.140 +        if ( page->type_count != 0 )
   3.141 +        { /* XXX */
   3.142 +            printk("BOGO TYPE %08lx %ld\n", page->flags, page->type_count);
   3.143 +            goto unlock_and_bounce_fault;
   3.144 +        }
   3.145 +        /* Check all potential LDT entries in the page. */
   3.146 +        ldt_page = map_domain_mem(l1e & PAGE_MASK);
   3.147 +        for ( i = 0; i < 512; i++ )
   3.148 +            if ( !check_descriptor(ldt_page[i*2], ldt_page[i*2+1]) )
   3.149 +            { /* XXX */
   3.150 +                printk("Bad desc!!!!!\n");
   3.151 +                goto unlock_and_bounce_fault;
   3.152 +            }
   3.153 +        unmap_domain_mem(ldt_page);
   3.154 +        page->flags &= ~PG_type_mask;
   3.155 +        page->flags |= PGT_ldt_page;
   3.156 +        get_page_type(page);
   3.157 +        get_page_tot(page);
   3.158 +    }
   3.159 +
   3.160 +    p->mm.perdomain_pt[l1_table_offset(off)+16] = mk_l1_pgentry(l1e);
   3.161 +
   3.162 +    spin_unlock_irq(&p->page_lock);
   3.163 +    return;
   3.164 +
   3.165 +
   3.166 + unlock_and_bounce_fault:
   3.167 +
   3.168 +    spin_unlock_irq(&p->page_lock);
   3.169 +    goto bounce_fault;
   3.170 +
   3.171 +
   3.172 + fault_in_hypervisor:
   3.173 +
   3.174 +    if ( (fixup = search_exception_table(regs->eip)) != 0 )
   3.175 +    {
   3.176 +        regs->eip = fixup;
   3.177 +        return;
   3.178 +    }
   3.179 +
   3.180 +    if ( addr >= PAGE_OFFSET )
   3.181 +    {
   3.182 +        unsigned long page;
   3.183 +        unsigned long *pde;
   3.184 +        pde = (unsigned long *)idle_pg_table[smp_processor_id()];
   3.185 +        page = pde[addr >> L2_PAGETABLE_SHIFT];
   3.186 +        printk("*pde = %08lx\n", page);
   3.187 +        if ( page & _PAGE_PRESENT )
   3.188 +        {
   3.189 +            page &= PAGE_MASK;
   3.190 +            page = ((unsigned long *) __va(page))[(addr&0x3ff000)>>PAGE_SHIFT];
   3.191 +            printk(" *pte = %08lx\n", page);
   3.192 +        }
   3.193 +    }
   3.194 +
   3.195 +    show_registers(regs);
   3.196 +    panic("CPU%d FATAL PAGE FAULT\n"
   3.197 +          "[error_code=%08x]\n"
   3.198 +          "Faulting linear address might be %08lx\n",
   3.199 +          smp_processor_id(), error_code, addr);
   3.200 +}
   3.201 +
   3.202 +asmlinkage void do_general_protection(struct pt_regs *regs, long error_code)
   3.203  {
   3.204      struct guest_trap_bounce *gtb = guest_trap_bounce+smp_processor_id();
   3.205      trap_info_t *ti;
   3.206 @@ -315,7 +407,7 @@ asmlinkage void do_general_protection(st
   3.207              return;
   3.208          }
   3.209      }
   3.210 -
   3.211 +    
   3.212      /* Pass on GPF as is. */
   3.213      ti = current->thread.traps + 13;
   3.214      gtb->flags      = GTBF_TRAP;
   3.215 @@ -328,6 +420,7 @@ asmlinkage void do_general_protection(st
   3.216  
   3.217      if ( (fixup = search_exception_table(regs->eip)) != 0 )
   3.218      {
   3.219 +        printk("Hmmmm %08lx -> %08lx (%04lx)\n", regs->eip, fixup, error_code);
   3.220          regs->eip = fixup;
   3.221          return;
   3.222      }
     4.1 --- a/xen/common/memory.c	Mon Apr 14 16:31:59 2003 +0000
     4.2 +++ b/xen/common/memory.c	Tue Apr 15 17:09:30 2003 +0000
     4.3 @@ -176,7 +176,7 @@
     4.4  #include <asm/uaccess.h>
     4.5  #include <asm/domain_page.h>
     4.6  
     4.7 -#if 0
     4.8 +#if 1
     4.9  #define MEM_LOG(_f, _a...) printk("DOM%d: (file=memory.c, line=%d) " _f "\n", current->domain, __LINE__, ## _a )
    4.10  #else
    4.11  #define MEM_LOG(_f, _a...) ((void)0)
    4.12 @@ -621,10 +621,15 @@ static int mod_l1_entry(unsigned long pa
    4.13  static int do_extended_command(unsigned long ptr, unsigned long val)
    4.14  {
    4.15      int err = 0;
    4.16 +    unsigned int cmd = val & PGEXT_CMD_MASK;
    4.17      unsigned long pfn = ptr >> PAGE_SHIFT;
    4.18      struct pfn_info *page = frame_table + pfn;
    4.19  
    4.20 -    switch ( (val & PGEXT_CMD_MASK) )
    4.21 +    /* 'ptr' must be in range except where it isn't a machine address. */
    4.22 +    if ( (pfn >= max_page) && (cmd != PGEXT_SET_LDT) )
    4.23 +        return 1;
    4.24 +
    4.25 +    switch ( cmd )
    4.26      {
    4.27      case PGEXT_PIN_L1_TABLE:
    4.28          err = get_l1_table(pfn);
    4.29 @@ -695,6 +700,42 @@ static int do_extended_command(unsigned 
    4.30          __flush_tlb_one(val & ~PGEXT_CMD_MASK);
    4.31          break;
    4.32  
    4.33 +    case PGEXT_SET_LDT:
    4.34 +    {
    4.35 +        int i;
    4.36 +        unsigned long ents = val >> PGEXT_CMD_SHIFT;
    4.37 +        if ( ((ptr & (PAGE_SIZE-1)) != 0) || 
    4.38 +             (ents > 8192) ||
    4.39 +             ((ptr+ents*LDT_ENTRY_SIZE) < ptr) ||
    4.40 +             ((ptr+ents*LDT_ENTRY_SIZE) > PAGE_OFFSET) )
    4.41 +        {
    4.42 +            err = 1;
    4.43 +            MEM_LOG("Bad args to SET_LDT: ptr=%08lx, ents=%08lx", ptr, ents);
    4.44 +        }
    4.45 +        else if ( (current->mm.ldt_ents != ents) || 
    4.46 +                  (current->mm.ldt_base != ptr) )
    4.47 +        {
    4.48 +            if ( current->mm.ldt_ents != 0 )
    4.49 +            {
    4.50 +                /* Tear down the old LDT. */
    4.51 +                for ( i = 16; i < 32; i++ )
    4.52 +                {
    4.53 +                    pfn = l1_pgentry_to_pagenr(current->mm.perdomain_pt[i]);
    4.54 +                    if ( pfn == 0 ) continue;
    4.55 +                    current->mm.perdomain_pt[i] = mk_l1_pgentry(0);
    4.56 +                    page = frame_table + pfn;
    4.57 +                    put_page_type(page);
    4.58 +                    put_page_tot(page);                
    4.59 +                }
    4.60 +                tlb_flush[smp_processor_id()] = 1;
    4.61 +            }
    4.62 +            current->mm.ldt_base = ptr;
    4.63 +            current->mm.ldt_ents = ents;
    4.64 +            load_LDT();
    4.65 +        }
    4.66 +        break;
    4.67 +    }
    4.68 +
    4.69      default:
    4.70          MEM_LOG("Invalid extended pt command 0x%08lx", val & PGEXT_CMD_MASK);
    4.71          err = 1;
    4.72 @@ -710,6 +751,7 @@ int do_process_page_updates(page_update_
    4.73      unsigned long flags, pfn;
    4.74      struct pfn_info *page;
    4.75      int err = 0, i;
    4.76 +    unsigned int cmd;
    4.77  
    4.78      for ( i = 0; i < count; i++ )
    4.79      {
    4.80 @@ -718,8 +760,11 @@ int do_process_page_updates(page_update_
    4.81              kill_domain_with_errmsg("Cannot read page update request");
    4.82          } 
    4.83  
    4.84 +        cmd = req.ptr & (sizeof(l1_pgentry_t)-1);
    4.85 +
    4.86 +        /* All normal commands must have 'ptr' in range. */
    4.87          pfn = req.ptr >> PAGE_SHIFT;
    4.88 -        if ( pfn >= max_page )
    4.89 +        if ( (pfn >= max_page) && (cmd != PGREQ_EXTENDED_COMMAND) )
    4.90          {
    4.91              MEM_LOG("Page out of range (%08lx > %08lx)", pfn, max_page);
    4.92              kill_domain_with_errmsg("Page update request out of range");
    4.93 @@ -729,7 +774,7 @@ int do_process_page_updates(page_update_
    4.94  
    4.95          /* Least significant bits of 'ptr' demux the operation type. */
    4.96          spin_lock_irq(&current->page_lock);
    4.97 -        switch ( req.ptr & (sizeof(l1_pgentry_t)-1) )
    4.98 +        switch ( cmd )
    4.99          {
   4.100              /*
   4.101               * PGREQ_NORMAL: Normal update to any level of page table.
     5.1 --- a/xen/include/asm-i386/desc.h	Mon Apr 14 16:31:59 2003 +0000
     5.2 +++ b/xen/include/asm-i386/desc.h	Tue Apr 15 17:09:30 2003 +0000
     5.3 @@ -1,14 +1,15 @@
     5.4  #ifndef __ARCH_DESC_H
     5.5  #define __ARCH_DESC_H
     5.6  
     5.7 +#define LDT_ENTRY_SIZE 8
     5.8 +
     5.9  #define __FIRST_TSS_ENTRY 8
    5.10  #define __FIRST_LDT_ENTRY (__FIRST_TSS_ENTRY+1)
    5.11  
    5.12  #define __TSS(n) (((n)<<1) + __FIRST_TSS_ENTRY)
    5.13  #define __LDT(n) (((n)<<1) + __FIRST_LDT_ENTRY)
    5.14  
    5.15 -#define load_TR(n)  __asm__ __volatile__ ( "ltr  %%ax" : : "a" (__TSS(n)<<3) )
    5.16 -#define __load_LDT(n) __asm__ __volatile__ ( "lldt %%ax" : : "a" (n) )
    5.17 +#define load_TR(n)  __asm__ __volatile__ ("ltr  %%ax" : : "a" (__TSS(n)<<3) )
    5.18  
    5.19  /* Guest OS must provide its own code selectors, or use the one we provide. */
    5.20  #define VALID_CODESEL(_s) \
     6.1 --- a/xen/include/asm-i386/system.h	Mon Apr 14 16:31:59 2003 +0000
     6.2 +++ b/xen/include/asm-i386/system.h	Tue Apr 15 17:09:30 2003 +0000
     6.3 @@ -33,50 +33,6 @@ extern void FASTCALL(__switch_to(struct 
     6.4                       :"memory");                                        \
     6.5  } while (0)
     6.6  
     6.7 -#define _set_base(addr,base) do { unsigned long __pr; \
     6.8 -__asm__ __volatile__ ("movw %%dx,%1\n\t" \
     6.9 -	"rorl $16,%%edx\n\t" \
    6.10 -	"movb %%dl,%2\n\t" \
    6.11 -	"movb %%dh,%3" \
    6.12 -	:"=&d" (__pr) \
    6.13 -	:"m" (*((addr)+2)), \
    6.14 -	 "m" (*((addr)+4)), \
    6.15 -	 "m" (*((addr)+7)), \
    6.16 -         "0" (base) \
    6.17 -        ); } while(0)
    6.18 -
    6.19 -#define _set_limit(addr,limit) do { unsigned long __lr; \
    6.20 -__asm__ __volatile__ ("movw %%dx,%1\n\t" \
    6.21 -	"rorl $16,%%edx\n\t" \
    6.22 -	"movb %2,%%dh\n\t" \
    6.23 -	"andb $0xf0,%%dh\n\t" \
    6.24 -	"orb %%dh,%%dl\n\t" \
    6.25 -	"movb %%dl,%2" \
    6.26 -	:"=&d" (__lr) \
    6.27 -	:"m" (*(addr)), \
    6.28 -	 "m" (*((addr)+6)), \
    6.29 -	 "0" (limit) \
    6.30 -        ); } while(0)
    6.31 -
    6.32 -#define set_base(ldt,base) _set_base( ((char *)&(ldt)) , (base) )
    6.33 -#define set_limit(ldt,limit) _set_limit( ((char *)&(ldt)) , ((limit)-1)>>12 )
    6.34 -
    6.35 -static inline unsigned long _get_base(char * addr)
    6.36 -{
    6.37 -	unsigned long __base;
    6.38 -	__asm__("movb %3,%%dh\n\t"
    6.39 -		"movb %2,%%dl\n\t"
    6.40 -		"shll $16,%%edx\n\t"
    6.41 -		"movw %1,%%dx"
    6.42 -		:"=&d" (__base)
    6.43 -		:"m" (*((addr)+2)),
    6.44 -		 "m" (*((addr)+4)),
    6.45 -		 "m" (*((addr)+7)));
    6.46 -	return __base;
    6.47 -}
    6.48 -
    6.49 -#define get_base(ldt) _get_base( ((char *)&(ldt)) )
    6.50 -
    6.51  /*
    6.52   * Load a segment. Fall back on loading the zero
    6.53   * segment if something goes wrong..
     7.1 --- a/xen/include/xeno/config.h	Mon Apr 14 16:31:59 2003 +0000
     7.2 +++ b/xen/include/xeno/config.h	Tue Apr 15 17:09:30 2003 +0000
     7.3 @@ -78,6 +78,10 @@
     7.4  /* Next 4MB of virtual address space used for per-domain mappings (eg. GDT). */
     7.5  #define PERDOMAIN_VIRT_START  (DIRECTMAP_VIRT_END)
     7.6  #define PERDOMAIN_VIRT_END    (PERDOMAIN_VIRT_START + (4*1024*1024))
     7.7 +#define GDT_VIRT_START        (PERDOMAIN_VIRT_START)
     7.8 +#define GDT_VIRT_END          (GDT_VIRT_START + (64*1024))
     7.9 +#define LDT_VIRT_START        (GDT_VIRT_END)
    7.10 +#define LDT_VIRT_END          (LDT_VIRT_START + (64*1024))
    7.11  /* Penultimate 4MB of virtual address space used for domain page mappings. */
    7.12  #define MAPCACHE_VIRT_START   (PERDOMAIN_VIRT_END)
    7.13  #define MAPCACHE_VIRT_END     (MAPCACHE_VIRT_START + (4*1024*1024))
     8.1 --- a/xen/include/xeno/mm.h	Mon Apr 14 16:31:59 2003 +0000
     8.2 +++ b/xen/include/xeno/mm.h	Tue Apr 15 17:09:30 2003 +0000
     8.3 @@ -134,6 +134,8 @@ extern unsigned int free_pfns;
     8.4  extern unsigned long max_page;
     8.5  void init_frametable(unsigned long nr_pages);
     8.6  
     8.7 +int check_descriptor(unsigned long a, unsigned long b);
     8.8 +
     8.9  /*
    8.10   * The MPT (machine->physical mapping table) is an array of word-sized
    8.11   * values, indexed on machine frame number. It is expected that guest OSes
     9.1 --- a/xen/include/xeno/sched.h	Mon Apr 14 16:31:59 2003 +0000
     9.2 +++ b/xen/include/xeno/sched.h	Tue Apr 15 17:09:30 2003 +0000
     9.3 @@ -29,8 +29,8 @@ struct mm_struct {
     9.4       */
     9.5      l1_pgentry_t *perdomain_pt;
     9.6      pagetable_t  pagetable;
     9.7 -    /* Current LDT descriptor. */
     9.8 -    unsigned long ldt[2];
     9.9 +    /* Current LDT details. */
    9.10 +    unsigned long ldt_base, ldt_ents;
    9.11      /* Next entry is passed to LGDT on domain switch. */
    9.12      char gdt[6];
    9.13  };
    9.14 @@ -283,4 +283,26 @@ void cpu_idle(void);   /* Idle loop. */
    9.15  
    9.16  extern void update_process_times(int user);
    9.17  
    9.18 +#include <asm/desc.h>
    9.19 +static inline void load_LDT(void)
    9.20 +{
    9.21 +    unsigned int cpu;
    9.22 +    struct desc_struct *desc;
    9.23 +    unsigned long ents;
    9.24 +
    9.25 +    if ( (ents = current->mm.ldt_ents) == 0 )
    9.26 +    {
    9.27 +        __asm__ __volatile__ ( "lldt %%ax" : : "a" (0) );
    9.28 +    }
    9.29 +    else
    9.30 +    {
    9.31 +        cpu = smp_processor_id();
    9.32 +        desc = (struct desc_struct *)GET_GDT_ADDRESS(current) + __LDT(cpu);
    9.33 +        desc->a = ((LDT_VIRT_START&0xffff)<<16) | (ents*8-1);
    9.34 +        desc->b = (LDT_VIRT_START&(0xff<<24)) | 0x8200 | 
    9.35 +            ((LDT_VIRT_START&0xff0000)>>16);
    9.36 +        __asm__ __volatile__ ( "lldt %%ax" : : "a" (__LDT(cpu)<<3) );
    9.37 +    }
    9.38 +}
    9.39 +
    9.40  #endif
    10.1 --- a/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/entry.S	Mon Apr 14 16:31:59 2003 +0000
    10.2 +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/entry.S	Tue Apr 15 17:09:30 2003 +0000
    10.3 @@ -133,6 +133,55 @@ 6:	pushl %ss;	\
    10.4  	movl $-8192, reg; \
    10.5  	andl %esp, reg
    10.6  
    10.7 +ENTRY(lcall7)
    10.8 +	pushfl			# We get a different stack layout with call
    10.9 +	pushl %eax		# gates, which has to be cleaned up later..
   10.10 +	SAVE_ALL
   10.11 +	movl EIP(%esp),%eax	# due to call gates, this is eflags, not eip..
   10.12 +	movl CS(%esp),%edx	# this is eip..
   10.13 +	movl EFLAGS(%esp),%ecx	# and this is cs..
   10.14 +	movl %eax,EFLAGS(%esp)	#
   10.15 +	andl $~(NT_MASK|TF_MASK|DF_MASK), %eax
   10.16 +	pushl %eax
   10.17 +	popfl
   10.18 +	movl %edx,EIP(%esp)	# Now we move them to their "normal" places
   10.19 +	movl %ecx,CS(%esp)	#
   10.20 +	movl %esp,%ebx
   10.21 +	pushl %ebx
   10.22 +	andl $-8192,%ebx	# GET_CURRENT
   10.23 +	movl exec_domain(%ebx),%edx	# Get the execution domain
   10.24 +	movl 4(%edx),%edx	# Get the lcall7 handler for the domain
   10.25 +	pushl $0x7
   10.26 +	call *%edx
   10.27 +	addl $4, %esp
   10.28 +	popl %eax
   10.29 +	jmp ret_from_sys_call
   10.30 +
   10.31 +ENTRY(lcall27)
   10.32 +	pushfl			# We get a different stack layout with call
   10.33 +	pushl %eax		# gates, which has to be cleaned up later..
   10.34 +	SAVE_ALL
   10.35 +	movl EIP(%esp),%eax	# due to call gates, this is eflags, not eip..
   10.36 +	movl CS(%esp),%edx	# this is eip..
   10.37 +	movl EFLAGS(%esp),%ecx	# and this is cs..
   10.38 +	movl %eax,EFLAGS(%esp)	#
   10.39 +	andl $~(NT_MASK|TF_MASK|DF_MASK), %eax
   10.40 +	pushl %eax
   10.41 +	popfl
   10.42 +	movl %edx,EIP(%esp)	# Now we move them to their "normal" places
   10.43 +	movl %ecx,CS(%esp)	#
   10.44 +	movl %esp,%ebx
   10.45 +	pushl %ebx
   10.46 +	andl $-8192,%ebx	# GET_CURRENT
   10.47 +	movl exec_domain(%ebx),%edx	# Get the execution domain
   10.48 +	movl 4(%edx),%edx	# Get the lcall7 handler for the domain
   10.49 +	pushl $0x27
   10.50 +	call *%edx
   10.51 +	addl $4, %esp
   10.52 +	popl %eax
   10.53 +	jmp ret_from_sys_call
   10.54 +
   10.55 +        
   10.56  ENTRY(ret_from_fork)
   10.57  	pushl %ebx
   10.58  	call SYMBOL_NAME(schedule_tail)
    11.1 --- a/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/head.S	Mon Apr 14 16:31:59 2003 +0000
    11.2 +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/head.S	Tue Apr 15 17:09:30 2003 +0000
    11.3 @@ -57,11 +57,14 @@ ENTRY(stack_start)
    11.4  ENTRY(empty_zero_page)
    11.5  
    11.6  .org 0x2000
    11.7 +ENTRY(default_ldt)
    11.8 +
    11.9 +.org 0x3000
   11.10  ENTRY(cpu0_pte_quicklist)
   11.11  
   11.12 -.org 0x2400
   11.13 +.org 0x3400
   11.14  ENTRY(cpu0_pgd_quicklist)
   11.15          
   11.16 -.org 0x2800
   11.17 +.org 0x3800
   11.18  ENTRY(stext)
   11.19  ENTRY(_stext)
    12.1 --- a/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/ldt.c	Mon Apr 14 16:31:59 2003 +0000
    12.2 +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/ldt.c	Tue Apr 15 17:09:30 2003 +0000
    12.3 @@ -9,18 +9,161 @@
    12.4  #include <linux/sched.h>
    12.5  #include <linux/string.h>
    12.6  #include <linux/mm.h>
    12.7 +#include <linux/smp.h>
    12.8 +#include <linux/smp_lock.h>
    12.9 +#include <linux/vmalloc.h>
   12.10 +
   12.11 +#include <asm/uaccess.h>
   12.12 +#include <asm/system.h>
   12.13 +#include <asm/ldt.h>
   12.14 +#include <asm/desc.h>
   12.15  
   12.16  /*
   12.17 - * XXX KAF (28/7/02): This stuff is only used for DOS emulation, and is
   12.18 - * the default way of finding current TCB in linuxthreads. Supporting
   12.19 - * table update svia the hypervisor is feasible, but a hassle: for now,
   12.20 - * recompiling linuxthreads is the most sensible option.
   12.21 - * 
   12.22 - * Oh, this may become an issue depending on what JVM we use for
   12.23 - * running the xeno-daemon.
   12.24 + * read_ldt() is not really atomic - this is not a problem since
   12.25 + * synchronization of reads and writes done to the LDT has to be
   12.26 + * assured by user-space anyway. Writes are atomic, to protect
   12.27 + * the security checks done on new descriptors.
   12.28   */
   12.29 +static int read_ldt(void * ptr, unsigned long bytecount)
   12.30 +{
   12.31 +    int err;
   12.32 +    unsigned long size;
   12.33 +    struct mm_struct * mm = current->mm;
   12.34 +
   12.35 +    err = 0;
   12.36 +    if (!mm->context.segments)
   12.37 +        goto out;
   12.38 +
   12.39 +    size = LDT_ENTRIES*LDT_ENTRY_SIZE;
   12.40 +    if (size > bytecount)
   12.41 +        size = bytecount;
   12.42 +
   12.43 +    err = size;
   12.44 +    if (copy_to_user(ptr, mm->context.segments, size))
   12.45 +        err = -EFAULT;
   12.46 + out:
   12.47 +    return err;
   12.48 +}
   12.49 +
   12.50 +static int read_default_ldt(void * ptr, unsigned long bytecount)
   12.51 +{
   12.52 +    int err;
   12.53 +    unsigned long size;
   12.54 +    void *address;
   12.55 +
   12.56 +    err = 0;
   12.57 +    address = &default_ldt[0];
   12.58 +    size = sizeof(struct desc_struct);
   12.59 +    if (size > bytecount)
   12.60 +        size = bytecount;
   12.61 +
   12.62 +    err = size;
   12.63 +    if (copy_to_user(ptr, address, size))
   12.64 +        err = -EFAULT;
   12.65 +
   12.66 +    return err;
   12.67 +}
   12.68 +
   12.69 +static int write_ldt(void * ptr, unsigned long bytecount, int oldmode)
   12.70 +{
   12.71 +    struct mm_struct * mm = current->mm;
   12.72 +    __u32 entry_1, entry_2, *lp;
   12.73 +    unsigned long phys_lp;
   12.74 +    int error;
   12.75 +    struct modify_ldt_ldt_s ldt_info;
   12.76 +
   12.77 +    error = -EINVAL;
   12.78 +    if (bytecount != sizeof(ldt_info))
   12.79 +        goto out;
   12.80 +    error = -EFAULT; 	
   12.81 +    if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info)))
   12.82 +        goto out;
   12.83 +
   12.84 +    error = -EINVAL;
   12.85 +    if (ldt_info.entry_number >= LDT_ENTRIES)
   12.86 +        goto out;
   12.87 +    if (ldt_info.contents == 3) {
   12.88 +        if (oldmode)
   12.89 +            goto out;
   12.90 +        if (ldt_info.seg_not_present == 0)
   12.91 +            goto out;
   12.92 +    }
   12.93 +
   12.94 +    down_write(&mm->mmap_sem);
   12.95 +    if (!mm->context.segments) {
   12.96 +        void * segments = vmalloc(LDT_ENTRIES*LDT_ENTRY_SIZE);
   12.97 +        error = -ENOMEM;
   12.98 +        if (!segments)
   12.99 +            goto out_unlock;
  12.100 +        memset(segments, 0, LDT_ENTRIES*LDT_ENTRY_SIZE);
  12.101 +        make_pages_readonly(segments, (LDT_ENTRIES*LDT_ENTRY_SIZE)/PAGE_SIZE);
  12.102 +        wmb();
  12.103 +        mm->context.segments = segments;
  12.104 +        mm->context.cpuvalid = 1UL << smp_processor_id();
  12.105 +        load_LDT(mm);
  12.106 +        flush_page_update_queue();
  12.107 +    }
  12.108 +
  12.109 +    lp = (__u32 *)((ldt_info.entry_number<<3) + (char *)mm->context.segments);
  12.110 +    phys_lp = arbitrary_virt_to_phys(lp);
  12.111 +
  12.112 +    /* Allow LDTs to be cleared by the user. */
  12.113 +    if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
  12.114 +        if (oldmode ||
  12.115 +            (ldt_info.contents == 0		&&
  12.116 +             ldt_info.read_exec_only == 1	&&
  12.117 +             ldt_info.seg_32bit == 0		&&
  12.118 +             ldt_info.limit_in_pages == 0	&&
  12.119 +             ldt_info.seg_not_present == 1	&&
  12.120 +             ldt_info.useable == 0 )) {
  12.121 +            entry_1 = 0;
  12.122 +            entry_2 = 0;
  12.123 +            goto install;
  12.124 +        }
  12.125 +    }
  12.126 +
  12.127 +    entry_1 = ((ldt_info.base_addr & 0x0000ffff) << 16) |
  12.128 +        (ldt_info.limit & 0x0ffff);
  12.129 +    entry_2 = (ldt_info.base_addr & 0xff000000) |
  12.130 +        ((ldt_info.base_addr & 0x00ff0000) >> 16) |
  12.131 +        (ldt_info.limit & 0xf0000) |
  12.132 +        ((ldt_info.read_exec_only ^ 1) << 9) |
  12.133 +        (ldt_info.contents << 10) |
  12.134 +        ((ldt_info.seg_not_present ^ 1) << 15) |
  12.135 +        (ldt_info.seg_32bit << 22) |
  12.136 +        (ldt_info.limit_in_pages << 23) |
  12.137 +        0x7000;
  12.138 +    if (!oldmode)
  12.139 +        entry_2 |= (ldt_info.useable << 20);
  12.140 +
  12.141 +    /* Install the new entry ...  */
  12.142 + install:
  12.143 +    HYPERVISOR_update_descriptor(phys_lp, entry_1, entry_2);
  12.144 +    error = 0;
  12.145 +
  12.146 + out_unlock:
  12.147 +    up_write(&mm->mmap_sem);
  12.148 + out:
  12.149 +    return error;
  12.150 +}
  12.151  
  12.152  asmlinkage int sys_modify_ldt(int func, void *ptr, unsigned long bytecount)
  12.153  {
  12.154 -    return -ENOSYS;
  12.155 +    int ret = -ENOSYS;
  12.156 +
  12.157 +    switch (func) {
  12.158 +    case 0:
  12.159 +        ret = read_ldt(ptr, bytecount);
  12.160 +        break;
  12.161 +    case 1:
  12.162 +        ret = write_ldt(ptr, bytecount, 1);
  12.163 +        break;
  12.164 +    case 2:
  12.165 +        ret = read_default_ldt(ptr, bytecount);
  12.166 +        break;
  12.167 +    case 0x11:
  12.168 +        ret = write_ldt(ptr, bytecount, 0);
  12.169 +        break;
  12.170 +    }
  12.171 +    return ret;
  12.172  }
    13.1 --- a/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/process.c	Mon Apr 14 16:31:59 2003 +0000
    13.2 +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/process.c	Tue Apr 15 17:09:30 2003 +0000
    13.3 @@ -140,6 +140,8 @@ void release_segments(struct mm_struct *
    13.4      if (ldt) {
    13.5          mm->context.segments = NULL;
    13.6          clear_LDT();
    13.7 +        make_pages_writeable(ldt, (LDT_ENTRIES*LDT_ENTRY_SIZE)/PAGE_SIZE);
    13.8 +        flush_page_update_queue();
    13.9          vfree(ldt);
   13.10      }
   13.11  }
   13.12 @@ -225,10 +227,15 @@ void copy_segments(struct task_struct *p
   13.13           * Completely new LDT, we initialize it from the parent:
   13.14           */
   13.15          ldt = vmalloc(LDT_ENTRIES*LDT_ENTRY_SIZE);
   13.16 -        if (!ldt)
   13.17 +        if ( ldt == NULL )
   13.18 +        {
   13.19              printk(KERN_WARNING "ldt allocation failed\n");
   13.20 +        }
   13.21          else
   13.22 +        {
   13.23              memcpy(ldt, old_ldt, LDT_ENTRIES*LDT_ENTRY_SIZE);
   13.24 +            make_pages_readonly(ldt, (LDT_ENTRIES*LDT_ENTRY_SIZE)/PAGE_SIZE);
   13.25 +        }
   13.26      }
   13.27      new_mm->context.segments = ldt;
   13.28      new_mm->context.cpuvalid = ~0UL;	/* valid on all CPU's - they can't have stale data */
   13.29 @@ -335,6 +342,10 @@ void __switch_to(struct task_struct *pre
   13.30      struct thread_struct *prev = &prev_p->thread,
   13.31          *next = &next_p->thread;
   13.32  
   13.33 +    __cli();
   13.34 +
   13.35 +    MULTICALL_flush_page_update_queue();
   13.36 +
   13.37      /*
   13.38       * This is basically 'unlazy_fpu', except that we queue a multicall to 
   13.39       * indicate FPU task switch, rather than synchronously trapping to Xen.
   13.40 @@ -356,7 +367,7 @@ void __switch_to(struct task_struct *pre
   13.41  
   13.42      /* EXECUTE ALL TASK SWITCH XEN SYSCALLS AT THIS POINT. */
   13.43      execute_multicall_list();
   13.44 -    sti(); /* matches 'cli' in switch_mm() */
   13.45 +    __sti();
   13.46  
   13.47      /*
   13.48       * Save away %fs and %gs. No need to save %es and %ds, as
    14.1 --- a/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/setup.c	Mon Apr 14 16:31:59 2003 +0000
    14.2 +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/setup.c	Tue Apr 15 17:09:30 2003 +0000
    14.3 @@ -968,6 +968,9 @@ void __init cpu_init (void)
    14.4  
    14.5      HYPERVISOR_stack_switch(__KERNEL_DS, current->thread.esp0);
    14.6  
    14.7 +    load_LDT(&init_mm);
    14.8 +    flush_page_update_queue();
    14.9 +
   14.10      /* Force FPU initialization. */
   14.11      current->flags &= ~PF_USEDFPU;
   14.12      current->used_math = 0;
    15.1 --- a/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/traps.c	Mon Apr 14 16:31:59 2003 +0000
    15.2 +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/traps.c	Tue Apr 15 17:09:30 2003 +0000
    15.3 @@ -42,6 +42,8 @@
    15.4  #include <linux/module.h>
    15.5  
    15.6  asmlinkage int system_call(void);
    15.7 +asmlinkage void lcall7(void);
    15.8 +asmlinkage void lcall27(void);
    15.9  
   15.10  asmlinkage void divide_error(void);
   15.11  asmlinkage void debug(void);
   15.12 @@ -530,6 +532,26 @@ asmlinkage void math_state_restore(struc
   15.13  	current->flags |= PF_USEDFPU;	/* So we fnsave on switch_to() */
   15.14  }
   15.15  
   15.16 +
   15.17 +#define _set_gate(gate_addr,type,dpl,addr) \
   15.18 +do { \
   15.19 +  int __d0, __d1; \
   15.20 +  __asm__ __volatile__ ("movw %%dx,%%ax\n\t" \
   15.21 +	"movw %4,%%dx\n\t" \
   15.22 +	"movl %%eax,%0\n\t" \
   15.23 +	"movl %%edx,%1" \
   15.24 +	:"=m" (*((long *) (gate_addr))), \
   15.25 +	 "=m" (*(1+(long *) (gate_addr))), "=&a" (__d0), "=&d" (__d1) \
   15.26 +	:"i" ((short) (0x8000+(dpl<<13)+(type<<8))), \
   15.27 +	 "3" ((char *) (addr)),"2" (__KERNEL_CS << 16)); \
   15.28 +} while (0)
   15.29 +
   15.30 +static void __init set_call_gate(void *a, void *addr)
   15.31 +{
   15.32 +	_set_gate(a,12,3,addr);
   15.33 +}
   15.34 +
   15.35 +
   15.36  static trap_info_t trap_table[] = {
   15.37      {  0, 0, __KERNEL_CS, (unsigned long)divide_error                },
   15.38      {  1, 0, __KERNEL_CS, (unsigned long)debug                       },
   15.39 @@ -561,5 +583,15 @@ void __init trap_init(void)
   15.40  {
   15.41      HYPERVISOR_set_trap_table(trap_table);    
   15.42      HYPERVISOR_set_fast_trap(SYSCALL_VECTOR);
   15.43 +
   15.44 +    /*
   15.45 +     * The default LDT is a single-entry callgate to lcall7 for iBCS and a
   15.46 +     * callgate to lcall27 for Solaris/x86 binaries.
   15.47 +     */
   15.48 +    clear_page(&default_ldt[0]);
   15.49 +    set_call_gate(&default_ldt[0],lcall7);
   15.50 +    set_call_gate(&default_ldt[4],lcall27);
   15.51 +    __make_page_readonly(&default_ldt[0]);
   15.52 +
   15.53      cpu_init();
   15.54  }
    16.1 --- a/xenolinux-2.4.21-pre4-sparse/arch/xeno/mm/hypervisor.c	Mon Apr 14 16:31:59 2003 +0000
    16.2 +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/mm/hypervisor.c	Tue Apr 15 17:09:30 2003 +0000
    16.3 @@ -81,12 +81,6 @@ static void DEBUG_disallow_pt_read(unsig
    16.4  
    16.5  
    16.6  /*
    16.7 - * This is the current pagetable base pointer, which is updated
    16.8 - * on context switch.
    16.9 - */
   16.10 -unsigned long pt_baseptr;
   16.11 -
   16.12 -/*
   16.13   * MULTICALL_flush_page_update_queue:
   16.14   *   This is a version of the flush which queues as part of a multicall.
   16.15   */
   16.16 @@ -232,3 +226,13 @@ void queue_pte_unpin(unsigned long ptr)
   16.17      increment_index();
   16.18      spin_unlock_irqrestore(&update_lock, flags);
   16.19  }
   16.20 +
   16.21 +void queue_set_ldt(unsigned long ptr, unsigned long len)
   16.22 +{
   16.23 +    unsigned long flags;
   16.24 +    spin_lock_irqsave(&update_lock, flags);
   16.25 +    update_queue[idx].ptr  = PGREQ_EXTENDED_COMMAND | ptr;
   16.26 +    update_queue[idx].val  = PGEXT_SET_LDT | (len << PGEXT_CMD_SHIFT);
   16.27 +    increment_index();
   16.28 +    spin_unlock_irqrestore(&update_lock, flags);
   16.29 +}
    17.1 --- a/xenolinux-2.4.21-pre4-sparse/include/asm-xeno/desc.h	Mon Apr 14 16:31:59 2003 +0000
    17.2 +++ b/xenolinux-2.4.21-pre4-sparse/include/asm-xeno/desc.h	Tue Apr 15 17:09:30 2003 +0000
    17.3 @@ -3,9 +3,37 @@
    17.4  
    17.5  #include <asm/ldt.h>
    17.6  
    17.7 -#define __LDT(_X)     (0)
    17.8 +#ifndef __ASSEMBLY__
    17.9 +
   17.10 +struct desc_struct {
   17.11 +	unsigned long a,b;
   17.12 +};
   17.13 +
   17.14 +struct Xgt_desc_struct {
   17.15 +	unsigned short size;
   17.16 +	unsigned long address __attribute__((packed));
   17.17 +};
   17.18 +
   17.19 +extern struct desc_struct default_ldt[];
   17.20  
   17.21 -#define clear_LDT()   ((void)0)
   17.22 -#define load_LDT(_mm) ((void)0)
   17.23 +static inline void clear_LDT(void)
   17.24 +{
   17.25 +    queue_set_ldt((unsigned long)&default_ldt[0], 5);
   17.26 +}
   17.27 +
   17.28 +static inline void load_LDT(struct mm_struct *mm)
   17.29 +{
   17.30 +    void *segments = mm->context.segments;
   17.31 +    int count = LDT_ENTRIES;
   17.32  
   17.33 -#endif
   17.34 +    if (!segments) {
   17.35 +        segments = &default_ldt[0];
   17.36 +        count = 5;
   17.37 +    }
   17.38 +         
   17.39 +    queue_set_ldt((unsigned long)segments, count);
   17.40 +}
   17.41 +
   17.42 +#endif /* __ASSEMBLY__ */
   17.43 +
   17.44 +#endif /* __ARCH_DESC_H__ */
    18.1 --- a/xenolinux-2.4.21-pre4-sparse/include/asm-xeno/hypervisor.h	Mon Apr 14 16:31:59 2003 +0000
    18.2 +++ b/xenolinux-2.4.21-pre4-sparse/include/asm-xeno/hypervisor.h	Tue Apr 15 17:09:30 2003 +0000
    18.3 @@ -11,6 +11,7 @@
    18.4  
    18.5  #include <asm/hypervisor-ifs/hypervisor-if.h>
    18.6  #include <asm/ptrace.h>
    18.7 +#include <asm/page.h>
    18.8  
    18.9  /* arch/xeno/kernel/setup.c */
   18.10  union start_info_union
   18.11 @@ -42,7 +43,7 @@ void queue_pgd_pin(unsigned long ptr);
   18.12  void queue_pgd_unpin(unsigned long ptr);
   18.13  void queue_pte_pin(unsigned long ptr);
   18.14  void queue_pte_unpin(unsigned long ptr);
   18.15 -
   18.16 +void queue_set_ldt(unsigned long ptr, unsigned long bytes);
   18.17  #define PT_UPDATE_DEBUG 0
   18.18  
   18.19  #if PT_UPDATE_DEBUG > 0
   18.20 @@ -119,6 +120,10 @@ extern page_update_debug_t update_debug_
   18.21   printk("PTE UNPIN %s %d: %08lx\n", __FILE__, __LINE__, (_p));    \
   18.22   queue_pte_unpin(_p);                                             \
   18.23  })   
   18.24 +#define queue_set_ldt(_p,_l) ({                                        \
   18.25 + printk("SETL LDT %s %d: %08lx %d\n", __FILE__, __LINE__, (_p), (_l)); \
   18.26 + queue_set_ldt((_p), (_l));                                            \
   18.27 +})   
   18.28  #endif
   18.29  
   18.30  void _flush_page_update_queue(void);
    19.1 --- a/xenolinux-2.4.21-pre4-sparse/include/asm-xeno/mmu_context.h	Mon Apr 14 16:31:59 2003 +0000
    19.2 +++ b/xenolinux-2.4.21-pre4-sparse/include/asm-xeno/mmu_context.h	Tue Apr 15 17:09:30 2003 +0000
    19.3 @@ -34,7 +34,6 @@ extern pgd_t *cur_pgd;
    19.4  
    19.5  static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk, unsigned cpu)
    19.6  {
    19.7 -	cli(); /* protect flush_update_queue multicall */
    19.8  	if (prev != next) {
    19.9  		/* stop flush ipis for the previous mm */
   19.10  		clear_bit(cpu, &prev->cpu_vm_mask);
   19.11 @@ -52,7 +51,6 @@ static inline void switch_mm(struct mm_s
   19.12  		/* Re-load page tables */
   19.13  		cur_pgd = next->pgd;
   19.14  		queue_pt_switch(__pa(cur_pgd));
   19.15 -		MULTICALL_flush_page_update_queue();
   19.16  	}
   19.17  #ifdef CONFIG_SMP
   19.18  	else {
   19.19 @@ -74,8 +72,7 @@ static inline void switch_mm(struct mm_s
   19.20  #define activate_mm(prev, next) \
   19.21  do { \
   19.22  	switch_mm((prev),(next),NULL,smp_processor_id()); \
   19.23 -	execute_multicall_list(); \
   19.24 -	sti(); /* matches 'cli' in switch_mm() */ \
   19.25 +	flush_page_update_queue(); \
   19.26  } while ( 0 )
   19.27  
   19.28  #endif
    20.1 --- a/xenolinux-2.4.21-pre4-sparse/include/asm-xeno/pgalloc.h	Mon Apr 14 16:31:59 2003 +0000
    20.2 +++ b/xenolinux-2.4.21-pre4-sparse/include/asm-xeno/pgalloc.h	Tue Apr 15 17:09:30 2003 +0000
    20.3 @@ -73,19 +73,13 @@ out_oom:
    20.4  static inline pgd_t *get_pgd_slow(void)
    20.5  {
    20.6  	pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL);
    20.7 -	pgd_t *kpgd;
    20.8 -	pmd_t *kpmd;
    20.9 -	pte_t *kpte;
   20.10  
   20.11  	if (pgd) {
   20.12  		memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t));
   20.13  		memcpy(pgd + USER_PTRS_PER_PGD,
   20.14  			init_mm.pgd + USER_PTRS_PER_PGD,
   20.15  			(PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
   20.16 -		kpgd = pgd_offset_k((unsigned long)pgd);
   20.17 -		kpmd = pmd_offset(kpgd, (unsigned long)pgd);
   20.18 -		kpte = pte_offset(kpmd, (unsigned long)pgd);
   20.19 -		queue_l1_entry_update(__pa(kpte), (*(unsigned long *)kpte)&~_PAGE_RW);
   20.20 +                __make_page_readonly(pgd);
   20.21  		queue_pgd_pin(__pa(pgd));
   20.22  
   20.23  	}
   20.24 @@ -117,14 +111,8 @@ static inline void free_pgd_slow(pgd_t *
   20.25  		free_page((unsigned long)__va(pgd_val(pgd[i])-1));
   20.26  	kmem_cache_free(pae_pgd_cachep, pgd);
   20.27  #else
   20.28 -	pgd_t *kpgd;
   20.29 -	pmd_t *kpmd;
   20.30 -	pte_t *kpte;
   20.31  	queue_pgd_unpin(__pa(pgd));
   20.32 -	kpgd = pgd_offset_k((unsigned long)pgd);
   20.33 -	kpmd = pmd_offset(kpgd, (unsigned long)pgd);
   20.34 -	kpte = pte_offset(kpmd, (unsigned long)pgd);
   20.35 -	queue_l1_entry_update(__pa(kpte), (*(unsigned long *)kpte)|_PAGE_RW);
   20.36 +        __make_page_writeable(pgd);
   20.37  	free_page((unsigned long)pgd);
   20.38  #endif
   20.39  }
   20.40 @@ -141,18 +129,12 @@ static inline void free_pgd_fast(pgd_t *
   20.41  static inline pte_t *pte_alloc_one(struct mm_struct *mm, unsigned long address)
   20.42  {
   20.43      pte_t *pte;
   20.44 -    pgd_t *kpgd;
   20.45 -    pmd_t *kpmd;
   20.46 -    pte_t *kpte;
   20.47  
   20.48      pte = (pte_t *) __get_free_page(GFP_KERNEL);
   20.49      if (pte)
   20.50      {
   20.51          clear_page(pte);
   20.52 -        kpgd = pgd_offset_k((unsigned long)pte);
   20.53 -        kpmd = pmd_offset(kpgd, (unsigned long)pte);
   20.54 -        kpte = pte_offset(kpmd, (unsigned long)pte);
   20.55 -        queue_l1_entry_update(__pa(kpte), (*(unsigned long *)kpte)&~_PAGE_RW);
   20.56 +        __make_page_readonly(pte);
   20.57          queue_pte_pin(__pa(pte));
   20.58      }
   20.59      return pte;
   20.60 @@ -172,14 +154,8 @@ static inline pte_t *pte_alloc_one_fast(
   20.61  
   20.62  static __inline__ void pte_free_slow(pte_t *pte)
   20.63  {
   20.64 -    pgd_t *kpgd;
   20.65 -    pmd_t *kpmd;
   20.66 -    pte_t *kpte;
   20.67      queue_pte_unpin(__pa(pte));
   20.68 -    kpgd = pgd_offset_k((unsigned long)pte);
   20.69 -    kpmd = pmd_offset(kpgd, (unsigned long)pte);
   20.70 -    kpte = pte_offset(kpmd, (unsigned long)pte);
   20.71 -    queue_l1_entry_update(__pa(kpte), (*(unsigned long *)kpte)|_PAGE_RW);
   20.72 +    __make_page_writeable(pte);
   20.73      free_page((unsigned long)pte);
   20.74  }
   20.75  
    21.1 --- a/xenolinux-2.4.21-pre4-sparse/include/asm-xeno/pgtable.h	Mon Apr 14 16:31:59 2003 +0000
    21.2 +++ b/xenolinux-2.4.21-pre4-sparse/include/asm-xeno/pgtable.h	Tue Apr 15 17:09:30 2003 +0000
    21.3 @@ -99,6 +99,7 @@ extern void pgtable_cache_init(void);
    21.4  #ifndef __ASSEMBLY__
    21.5  /* 4MB is just a nice "safety zone". Also, we align to a fresh pde. */
    21.6  #define VMALLOC_OFFSET	(4*1024*1024)
    21.7 +extern void * high_memory;
    21.8  #define VMALLOC_START	(((unsigned long) high_memory + 2*VMALLOC_OFFSET-1) & \
    21.9  						~(VMALLOC_OFFSET-1))
   21.10  #define VMALLOC_VMADDR(x) ((unsigned long)(x))
   21.11 @@ -291,6 +292,71 @@ static inline pte_t pte_modify(pte_t pte
   21.12  struct page;
   21.13  int change_page_attr(struct page *, int, pgprot_t prot);
   21.14  
   21.15 +static inline void __make_page_readonly(void *va)
   21.16 +{
   21.17 +    pgd_t *pgd = pgd_offset_k((unsigned long)va);
   21.18 +    pmd_t *pmd = pmd_offset(pgd, (unsigned long)va);
   21.19 +    pte_t *pte = pte_offset(pmd, (unsigned long)va);
   21.20 +    queue_l1_entry_update(__pa(pte), (*(unsigned long *)pte)&~_PAGE_RW);
   21.21 +}
   21.22 +
   21.23 +static inline void __make_page_writeable(void *va)
   21.24 +{
   21.25 +    pgd_t *pgd = pgd_offset_k((unsigned long)va);
   21.26 +    pmd_t *pmd = pmd_offset(pgd, (unsigned long)va);
   21.27 +    pte_t *pte = pte_offset(pmd, (unsigned long)va);
   21.28 +    queue_l1_entry_update(__pa(pte), (*(unsigned long *)pte)|_PAGE_RW);
   21.29 +}
   21.30 +
   21.31 +static inline void make_page_readonly(void *va)
   21.32 +{
   21.33 +    pgd_t *pgd = pgd_offset_k((unsigned long)va);
   21.34 +    pmd_t *pmd = pmd_offset(pgd, (unsigned long)va);
   21.35 +    pte_t *pte = pte_offset(pmd, (unsigned long)va);
   21.36 +    queue_l1_entry_update(__pa(pte), (*(unsigned long *)pte)&~_PAGE_RW);
   21.37 +    if ( (unsigned long)va >= VMALLOC_START )
   21.38 +        __make_page_readonly(machine_to_virt(
   21.39 +            *(unsigned long *)pte&PAGE_MASK));
   21.40 +}
   21.41 +
   21.42 +static inline void make_page_writeable(void *va)
   21.43 +{
   21.44 +    pgd_t *pgd = pgd_offset_k((unsigned long)va);
   21.45 +    pmd_t *pmd = pmd_offset(pgd, (unsigned long)va);
   21.46 +    pte_t *pte = pte_offset(pmd, (unsigned long)va);
   21.47 +    queue_l1_entry_update(__pa(pte), (*(unsigned long *)pte)|_PAGE_RW);
   21.48 +    if ( (unsigned long)va >= VMALLOC_START )
   21.49 +        __make_page_writeable(machine_to_virt(
   21.50 +            *(unsigned long *)pte&PAGE_MASK));
   21.51 +}
   21.52 +
   21.53 +static inline void make_pages_readonly(void *va, unsigned int nr)
   21.54 +{
   21.55 +    while ( nr-- != 0 )
   21.56 +    {
   21.57 +        make_page_readonly(va);
   21.58 +        va = (void *)((unsigned long)va + PAGE_SIZE);
   21.59 +    }
   21.60 +}
   21.61 +
   21.62 +static inline void make_pages_writeable(void *va, unsigned int nr)
   21.63 +{
   21.64 +    while ( nr-- != 0 )
   21.65 +    {
   21.66 +        make_page_writeable(va);
   21.67 +        va = (void *)((unsigned long)va + PAGE_SIZE);
   21.68 +    }
   21.69 +}
   21.70 +
   21.71 +static inline unsigned long arbitrary_virt_to_phys(void *va)
   21.72 +{
   21.73 +    pgd_t *pgd = pgd_offset_k((unsigned long)va);
   21.74 +    pmd_t *pmd = pmd_offset(pgd, (unsigned long)va);
   21.75 +    pte_t *pte = pte_offset(pmd, (unsigned long)va);
   21.76 +    unsigned long pa = (*(unsigned long *)pte) & PAGE_MASK;
   21.77 +    return pa | ((unsigned long)va & (PAGE_SIZE-1));
   21.78 +}
   21.79 +
   21.80  #endif /* !__ASSEMBLY__ */
   21.81  
   21.82  /* Needs to be defined here and not in linux/mm.h, as it is arch dependent */
    22.1 --- a/xenolinux-2.4.21-pre4-sparse/include/asm-xeno/processor.h	Mon Apr 14 16:31:59 2003 +0000
    22.2 +++ b/xenolinux-2.4.21-pre4-sparse/include/asm-xeno/processor.h	Tue Apr 15 17:09:30 2003 +0000
    22.3 @@ -381,7 +381,7 @@ struct thread_struct {
    22.4  	0,0,0,0, /* esp,ebp,esi,edi */				\
    22.5  	0,0,0,0,0,0, /* es,cs,ss */				\
    22.6  	0,0,0,0,0,0, /* ds,fs,gs */				\
    22.7 -	__LDT(0),0, /* ldt */					\
    22.8 +	0,0, /* ldt */						\
    22.9  	0, INVALID_IO_BITMAP_OFFSET, /* tace, bitmap */		\
   22.10  	{~0, } /* ioperm */					\
   22.11  }