ia64/xen-unstable

changeset 3630:d55d523078f7

bitkeeper revision 1.1159.212.77 (4202221693AFbvFZWeMHHIjQfbzTIQ)

More x86_64 prgress. Many more gaps filled in. Next step is DOM0
construction.
Signed-off-by: keir.fraser@cl.cam.ac.uk
author kaf24@scramble.cl.cam.ac.uk
date Thu Feb 03 13:07:34 2005 +0000 (2005-02-03)
parents 07d5c9548534
children 677cb76cff18
files linux-2.6.10-xen-sparse/drivers/xen/privcmd/privcmd.c xen/arch/x86/boot/x86_64.S xen/arch/x86/domain.c xen/arch/x86/memory.c xen/arch/x86/setup.c xen/arch/x86/traps.c xen/arch/x86/x86_32/mm.c xen/arch/x86/x86_32/traps.c xen/arch/x86/x86_64/entry.S xen/arch/x86/x86_64/mm.c xen/arch/x86/x86_64/traps.c xen/include/asm-x86/desc.h xen/include/asm-x86/mm.h xen/include/asm-x86/processor.h xen/include/asm-x86/x86_64/regs.h xen/include/public/arch-x86_32.h xen/include/public/arch-x86_64.h
line diff
     1.1 --- a/linux-2.6.10-xen-sparse/drivers/xen/privcmd/privcmd.c	Thu Feb 03 09:11:33 2005 +0000
     1.2 +++ b/linux-2.6.10-xen-sparse/drivers/xen/privcmd/privcmd.c	Thu Feb 03 13:07:34 2005 +0000
     1.3 @@ -174,13 +174,11 @@ static int privcmd_ioctl(struct inode *i
     1.4  
     1.5      case IOCTL_PRIVCMD_GET_MACH2PHYS_START_MFN:
     1.6      {
     1.7 -	unsigned long m2p_start_mfn = 
     1.8 -	    HYPERVISOR_shared_info->arch.mfn_to_pfn_start;
     1.9 -
    1.10 -	if( put_user( m2p_start_mfn, (unsigned long *) data ) )
    1.11 -	    ret = -EFAULT;
    1.12 -	else
    1.13 -	    ret = 0;
    1.14 +        unsigned long m2pv = (unsigned long)machine_to_phys_mapping;
    1.15 +        pgd_t *pgd = pgd_offset_k(m2pv);
    1.16 +        pmd_t *pmd = pmd_offset(pgd, m2pv);
    1.17 +        unsigned long m2p_start_mfn = pmd_val(*pmd) >> PAGE_SHIFT;
    1.18 +        ret = put_user(m2p_start_mfn, (unsigned long *)data) ? -EFAULT: 0;
    1.19      }
    1.20      break;
    1.21  
     2.1 --- a/xen/arch/x86/boot/x86_64.S	Thu Feb 03 09:11:33 2005 +0000
     2.2 +++ b/xen/arch/x86/boot/x86_64.S	Thu Feb 03 13:07:34 2005 +0000
     2.3 @@ -249,16 +249,11 @@ ENTRY(cpu0_stack)    # Initial stack is 
     2.4  ENTRY(stext)
     2.5  ENTRY(_stext)
     2.6  
     2.7 -.globl switch_to, do_iopl
     2.8 -switch_to:
     2.9 -do_iopl: 
    2.10 -.globl copy_from_user, copy_to_user, copy_user_generic, new_thread
    2.11 +.globl copy_from_user, copy_to_user, copy_user_generic
    2.12  copy_from_user: 
    2.13  copy_to_user:
    2.14  copy_user_generic:
    2.15 -new_thread:
    2.16  .globl __get_user_1, __get_user_4, __get_user_8
    2.17  __get_user_1:
    2.18  __get_user_4:
    2.19  __get_user_8:
    2.20 -        
     3.1 --- a/xen/arch/x86/domain.c	Thu Feb 03 09:11:33 2005 +0000
     3.2 +++ b/xen/arch/x86/domain.c	Thu Feb 03 13:07:34 2005 +0000
     3.3 @@ -261,9 +261,8 @@ static void continue_nonidle_task(struct
     3.4  void arch_do_createdomain(struct exec_domain *ed)
     3.5  {
     3.6      struct domain *d = ed->domain;
     3.7 -#ifdef ARCH_HAS_FAST_TRAP
     3.8 +
     3.9      SET_DEFAULT_FAST_TRAP(&ed->thread);
    3.10 -#endif
    3.11  
    3.12      if ( d->id == IDLE_DOMAIN_ID )
    3.13      {
    3.14 @@ -276,7 +275,6 @@ void arch_do_createdomain(struct exec_do
    3.15          d->shared_info = (void *)alloc_xenheap_page();
    3.16          memset(d->shared_info, 0, PAGE_SIZE);
    3.17          ed->vcpu_info = &d->shared_info->vcpu_data[ed->eid];
    3.18 -        d->shared_info->arch.mfn_to_pfn_start = m2p_start_mfn;
    3.19          SHARE_PFN_WITH_DOMAIN(virt_to_page(d->shared_info), d);
    3.20          machine_to_phys_mapping[virt_to_phys(d->shared_info) >> 
    3.21                                 PAGE_SHIFT] = INVALID_P2M_ENTRY;
    3.22 @@ -453,10 +451,8 @@ int arch_final_setup_guestos(struct exec
    3.23             &c->trap_ctxt,
    3.24             sizeof(d->thread.traps));
    3.25  
    3.26 -#ifdef ARCH_HAS_FAST_TRAP
    3.27      if ( (rc = (int)set_fast_trap(d, c->fast_trap_idx)) != 0 )
    3.28          return rc;
    3.29 -#endif
    3.30  
    3.31      d->mm.ldt_base = c->ldt_base;
    3.32      d->mm.ldt_ents = c->ldt_ents;
    3.33 @@ -498,8 +494,6 @@ int arch_final_setup_guestos(struct exec
    3.34      return 0;
    3.35  }
    3.36  
    3.37 -#if defined(__i386__) /* XXX */
    3.38 -
    3.39  void new_thread(struct exec_domain *d,
    3.40                  unsigned long start_pc,
    3.41                  unsigned long start_stack,
    3.42 @@ -515,8 +509,8 @@ void new_thread(struct exec_domain *d,
    3.43       *          ESI = start_info
    3.44       *  [EAX,EBX,ECX,EDX,EDI,EBP are zero]
    3.45       */
    3.46 -    ec->ds = ec->es = ec->fs = ec->gs = ec->ss = FLAT_RING1_DS;
    3.47 -    ec->cs = FLAT_RING1_CS;
    3.48 +    ec->ds = ec->es = ec->fs = ec->gs = ec->ss = FLAT_GUESTOS_DS;
    3.49 +    ec->cs = FLAT_GUESTOS_CS;
    3.50      ec->eip = start_pc;
    3.51      ec->esp = start_stack;
    3.52      ec->esi = start_info;
    3.53 @@ -530,18 +524,19 @@ void new_thread(struct exec_domain *d,
    3.54   * This special macro can be used to load a debugging register
    3.55   */
    3.56  #define loaddebug(thread,register) \
    3.57 -		__asm__("movl %0,%%db" #register  \
    3.58 +		__asm__("mov %0,%%db" #register  \
    3.59  			: /* no output */ \
    3.60  			:"r" (thread->debugreg[register]))
    3.61  
    3.62 -
    3.63  void switch_to(struct exec_domain *prev_p, struct exec_domain *next_p)
    3.64  {
    3.65      struct thread_struct *next = &next_p->thread;
    3.66      struct tss_struct *tss = init_tss + smp_processor_id();
    3.67      execution_context_t *stack_ec = get_execution_context();
    3.68      int i;
    3.69 +#ifdef CONFIG_VMX
    3.70      unsigned long vmx_domain = next_p->thread.arch_vmx.flags; 
    3.71 +#endif
    3.72  
    3.73      __cli();
    3.74  
    3.75 @@ -573,7 +568,9 @@ void switch_to(struct exec_domain *prev_
    3.76              loaddebug(next, 7);
    3.77          }
    3.78  
    3.79 -         if (vmx_domain) {
    3.80 +#ifdef CONFIG_VMX
    3.81 +        if ( vmx_domain )
    3.82 +        {
    3.83              /* Switch page tables. */
    3.84              write_ptbase(&next_p->mm);
    3.85   
    3.86 @@ -583,13 +580,16 @@ void switch_to(struct exec_domain *prev_
    3.87  
    3.88              __sti();
    3.89              return;
    3.90 -         }
    3.91 +        }
    3.92 +#endif
    3.93   
    3.94          SET_FAST_TRAP(&next_p->thread);
    3.95  
    3.96 +#ifdef __i386__
    3.97          /* Switch the guest OS ring-1 stack. */
    3.98          tss->esp1 = next->guestos_sp;
    3.99          tss->ss1  = next->guestos_ss;
   3.100 +#endif
   3.101  
   3.102          /* Switch page tables. */
   3.103          write_ptbase(&next_p->mm);
   3.104 @@ -632,8 +632,6 @@ long do_iopl(domid_t domain, unsigned in
   3.105      return 0;
   3.106  }
   3.107  
   3.108 -#endif
   3.109 -
   3.110  unsigned long hypercall_create_continuation(
   3.111      unsigned int op, unsigned int nr_args, ...)
   3.112  {
     4.1 --- a/xen/arch/x86/memory.c	Thu Feb 03 09:11:33 2005 +0000
     4.2 +++ b/xen/arch/x86/memory.c	Thu Feb 03 13:07:34 2005 +0000
     4.3 @@ -168,31 +168,10 @@ void __init init_frametable(void)
     4.4  
     4.5  void arch_init_memory(void)
     4.6  {
     4.7 -#ifdef __i386__ /* XXX */
     4.8 -    unsigned long i;
     4.9 -
    4.10 -    /*
    4.11 -     * We are rather picky about the layout of 'struct pfn_info'. The
    4.12 -     * count_info and domain fields must be adjacent, as we perform atomic
    4.13 -     * 64-bit operations on them. Also, just for sanity, we assert the size
    4.14 -     * of the structure here.
    4.15 -     */
    4.16 -    if ( (offsetof(struct pfn_info, u.inuse.domain) != 
    4.17 -          (offsetof(struct pfn_info, count_info) + sizeof(u32))) ||
    4.18 -         (sizeof(struct pfn_info) != 24) )
    4.19 -    {
    4.20 -        printk("Weird pfn_info layout (%ld,%ld,%d)\n",
    4.21 -               offsetof(struct pfn_info, count_info),
    4.22 -               offsetof(struct pfn_info, u.inuse.domain),
    4.23 -               sizeof(struct pfn_info));
    4.24 -        for ( ; ; ) ;
    4.25 -    }
    4.26 +    extern void subarch_init_memory(struct domain *);
    4.27  
    4.28      memset(percpu_info, 0, sizeof(percpu_info));
    4.29  
    4.30 -    /* Initialise to a magic of 0x55555555 so easier to spot bugs later. */
    4.31 -    memset(machine_to_phys_mapping, 0x55, 4<<20);
    4.32 -
    4.33      /*
    4.34       * Initialise our DOMID_XEN domain.
    4.35       * Any Xen-heap pages that we will allow to be mapped will have
    4.36 @@ -211,16 +190,7 @@ void arch_init_memory(void)
    4.37      atomic_set(&dom_io->refcnt, 1);
    4.38      dom_io->id = DOMID_IO;
    4.39  
    4.40 -    /* M2P table is mappable read-only by privileged domains. */
    4.41 -    for ( i = 0; i < 1024; i++ )
    4.42 -    {
    4.43 -        frame_table[m2p_start_mfn+i].count_info        = PGC_allocated | 1;
    4.44 -	/* gdt to make sure it's only mapped read-only by non-privileged
    4.45 -	   domains. */
    4.46 -        frame_table[m2p_start_mfn+i].u.inuse.type_info = PGT_gdt_page | 1;
    4.47 -        frame_table[m2p_start_mfn+i].u.inuse.domain    = dom_xen;
    4.48 -    }
    4.49 -#endif
    4.50 +    subarch_init_memory(dom_xen);
    4.51  }
    4.52  
    4.53  static void __invalidate_shadow_ldt(struct exec_domain *d)
     5.1 --- a/xen/arch/x86/setup.c	Thu Feb 03 09:11:33 2005 +0000
     5.2 +++ b/xen/arch/x86/setup.c	Thu Feb 03 13:07:34 2005 +0000
     5.3 @@ -459,6 +459,9 @@ static void __init start_of_day(void)
     5.4  #endif
     5.5  
     5.6      watchdog_on = 1;
     5.7 +#ifdef __x86_64__ /* x86_32 uses low mappings when building DOM0. */
     5.8 +    zap_low_mappings();
     5.9 +#endif
    5.10  }
    5.11  
    5.12  void __init __start_xen(multiboot_info_t *mbi)
     6.1 --- a/xen/arch/x86/traps.c	Thu Feb 03 09:11:33 2005 +0000
     6.2 +++ b/xen/arch/x86/traps.c	Thu Feb 03 13:07:34 2005 +0000
     6.3 @@ -329,26 +329,8 @@ asmlinkage int do_page_fault(struct xen_
     6.4  
     6.5      DEBUGGER_trap_fatal(TRAP_page_fault, regs);
     6.6  
     6.7 -#ifdef __i386__
     6.8 -    if ( addr >= PAGE_OFFSET )
     6.9 -    {
    6.10 -        unsigned long page;
    6.11 -        page = l2_pgentry_val(idle_pg_table[addr >> L2_PAGETABLE_SHIFT]);
    6.12 -        printk("*pde = %p\n", page);
    6.13 -        if ( page & _PAGE_PRESENT )
    6.14 -        {
    6.15 -            page &= PAGE_MASK;
    6.16 -            page = ((unsigned long *) __va(page))[(addr&0x3ff000)>>PAGE_SHIFT];
    6.17 -            printk(" *pte = %p\n", page);
    6.18 -        }
    6.19 -#ifdef MEMORY_GUARD
    6.20 -        if ( !(regs->error_code & 1) )
    6.21 -            printk(" -- POSSIBLY AN ACCESS TO FREED MEMORY? --\n");
    6.22 -#endif
    6.23 -    }
    6.24 -#endif /* __i386__ */
    6.25 -
    6.26      show_registers(regs);
    6.27 +    show_page_walk(addr);
    6.28      panic("CPU%d FATAL PAGE FAULT\n"
    6.29            "[error_code=%04x]\n"
    6.30            "Faulting linear address might be %p\n",
    6.31 @@ -749,7 +731,6 @@ void __init trap_init(void)
    6.32      set_intr_gate(TRAP_deferred_nmi,&nmi);
    6.33  
    6.34  #if defined(__i386__)
    6.35 -    set_task_gate(TRAP_double_fault,__DOUBLEFAULT_TSS_ENTRY<<3);
    6.36      _set_gate(idt_table+HYPERCALL_VECTOR, 14, 1, &hypercall);
    6.37  #elif defined(__x86_64__)
    6.38      _set_gate(idt_table+HYPERCALL_VECTOR, 14, 3, &hypercall);
     7.1 --- a/xen/arch/x86/x86_32/mm.c	Thu Feb 03 09:11:33 2005 +0000
     7.2 +++ b/xen/arch/x86/x86_32/mm.c	Thu Feb 03 13:07:34 2005 +0000
     7.3 @@ -27,8 +27,6 @@
     7.4  #include <asm/fixmap.h>
     7.5  #include <asm/domain_page.h>
     7.6  
     7.7 -unsigned long m2p_start_mfn;
     7.8 -
     7.9  /* Map physical byte range (@p, @p+@s) at virt address @v in pagetable @pt. */
    7.10  int map_pages(
    7.11      pagetable_t *pt,
    7.12 @@ -97,16 +95,16 @@ void __init paging_init(void)
    7.13      /* Allocate and map the machine-to-phys table. */
    7.14      if ( (pg = alloc_domheap_pages(NULL, 10)) == NULL )
    7.15          panic("Not enough memory to bootstrap Xen.\n");
    7.16 -    m2p_start_mfn = page_to_pfn(pg);
    7.17 -    idle_pg_table[RDWR_MPT_VIRT_START >> L2_PAGETABLE_SHIFT] =
    7.18 +    idle_pg_table[l2_table_offset(RDWR_MPT_VIRT_START)] =
    7.19          mk_l2_pgentry(page_to_phys(pg) | __PAGE_HYPERVISOR | _PAGE_PSE);
    7.20 +    memset((void *)RDWR_MPT_VIRT_START, 0x55, 4UL << 20);
    7.21  
    7.22      /* Xen 4MB mappings can all be GLOBAL. */
    7.23      if ( cpu_has_pge )
    7.24      {
    7.25          for ( v = HYPERVISOR_VIRT_START; v; v += (1 << L2_PAGETABLE_SHIFT) )
    7.26          {
    7.27 -             l2e = l2_pgentry_val(idle_pg_table[v >> L2_PAGETABLE_SHIFT]);
    7.28 +             l2e = l2_pgentry_val(idle_pg_table[l2_table_offset(v)]);
    7.29               if ( l2e & _PAGE_PSE )
    7.30                   l2e |= _PAGE_GLOBAL;
    7.31               idle_pg_table[v >> L2_PAGETABLE_SHIFT] = mk_l2_pgentry(l2e);
    7.32 @@ -116,23 +114,22 @@ void __init paging_init(void)
    7.33      /* Create page table for ioremap(). */
    7.34      ioremap_pt = (void *)alloc_xenheap_page();
    7.35      clear_page(ioremap_pt);
    7.36 -    idle_pg_table[IOREMAP_VIRT_START >> L2_PAGETABLE_SHIFT] = 
    7.37 +    idle_pg_table[l2_table_offset(IOREMAP_VIRT_START)] =
    7.38          mk_l2_pgentry(__pa(ioremap_pt) | __PAGE_HYPERVISOR);
    7.39  
    7.40      /* Create read-only mapping of MPT for guest-OS use. */
    7.41 -    idle_pg_table[RO_MPT_VIRT_START >> L2_PAGETABLE_SHIFT] =
    7.42 +    idle_pg_table[l2_table_offset(RO_MPT_VIRT_START)] =
    7.43          mk_l2_pgentry(l2_pgentry_val(
    7.44 -            idle_pg_table[RDWR_MPT_VIRT_START >> L2_PAGETABLE_SHIFT]) & 
    7.45 -                      ~_PAGE_RW);
    7.46 +            idle_pg_table[l2_table_offset(RDWR_MPT_VIRT_START)]) & ~_PAGE_RW);
    7.47  
    7.48      /* Set up mapping cache for domain pages. */
    7.49      mapcache = (unsigned long *)alloc_xenheap_page();
    7.50      clear_page(mapcache);
    7.51 -    idle_pg_table[MAPCACHE_VIRT_START >> L2_PAGETABLE_SHIFT] =
    7.52 +    idle_pg_table[l2_table_offset(MAPCACHE_VIRT_START)] =
    7.53          mk_l2_pgentry(__pa(mapcache) | __PAGE_HYPERVISOR);
    7.54  
    7.55      /* Set up linear page table mapping. */
    7.56 -    idle_pg_table[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
    7.57 +    idle_pg_table[l2_table_offset(LINEAR_PT_VIRT_START)] =
    7.58          mk_l2_pgentry(__pa(idle_pg_table) | __PAGE_HYPERVISOR);
    7.59  }
    7.60  
    7.61 @@ -144,6 +141,39 @@ void __init zap_low_mappings(void)
    7.62      flush_tlb_all_pge();
    7.63  }
    7.64  
    7.65 +void subarch_init_memory(struct domain *dom_xen)
    7.66 +{
    7.67 +    unsigned long i, m2p_start_mfn;
    7.68 +
    7.69 +    /*
    7.70 +     * We are rather picky about the layout of 'struct pfn_info'. The
    7.71 +     * count_info and domain fields must be adjacent, as we perform atomic
    7.72 +     * 64-bit operations on them. Also, just for sanity, we assert the size
    7.73 +     * of the structure here.
    7.74 +     */
    7.75 +    if ( (offsetof(struct pfn_info, u.inuse.domain) != 
    7.76 +          (offsetof(struct pfn_info, count_info) + sizeof(u32))) ||
    7.77 +         (sizeof(struct pfn_info) != 24) )
    7.78 +    {
    7.79 +        printk("Weird pfn_info layout (%ld,%ld,%d)\n",
    7.80 +               offsetof(struct pfn_info, count_info),
    7.81 +               offsetof(struct pfn_info, u.inuse.domain),
    7.82 +               sizeof(struct pfn_info));
    7.83 +        for ( ; ; ) ;
    7.84 +    }
    7.85 +
    7.86 +    /* M2P table is mappable read-only by privileged domains. */
    7.87 +    m2p_start_mfn = l2_pgentry_to_pagenr(
    7.88 +        idle_pg_table[l2_table_offset(RDWR_MPT_VIRT_START)]);
    7.89 +    for ( i = 0; i < 1024; i++ )
    7.90 +    {
    7.91 +        frame_table[m2p_start_mfn+i].count_info        = PGC_allocated | 1;
    7.92 +	/* gdt to make sure it's only mapped read-only by non-privileged
    7.93 +	   domains. */
    7.94 +        frame_table[m2p_start_mfn+i].u.inuse.type_info = PGT_gdt_page | 1;
    7.95 +        frame_table[m2p_start_mfn+i].u.inuse.domain    = dom_xen;
    7.96 +    }
    7.97 +}
    7.98  
    7.99  /*
   7.100   * Allows shooting down of borrowed page-table use on specific CPUs.
     8.1 --- a/xen/arch/x86/x86_32/traps.c	Thu Feb 03 09:11:33 2005 +0000
     8.2 +++ b/xen/arch/x86/x86_32/traps.c	Thu Feb 03 13:07:34 2005 +0000
     8.3 @@ -117,6 +117,25 @@ void show_registers(struct xen_regs *reg
     8.4      show_stack((unsigned long *)&regs->esp);
     8.5  } 
     8.6  
     8.7 +void show_page_walk(unsigned long addr)
     8.8 +{
     8.9 +    unsigned long page;
    8.10 +
    8.11 +    if ( addr < PAGE_OFFSET )
    8.12 +        return;
    8.13 +
    8.14 +    printk("Pagetable walk from %p:\n", addr);
    8.15 +    
    8.16 +    page = l2_pgentry_val(idle_pg_table[l2_table_offset(addr)]);
    8.17 +    printk(" L2 = %p %s\n", page, (page & _PAGE_PSE) ? "(4MB)" : "");
    8.18 +    if ( !(page & _PAGE_PRESENT) || (page & _PAGE_PSE) )
    8.19 +        return;
    8.20 +
    8.21 +    page &= PAGE_MASK;
    8.22 +    page = ((unsigned long *) __va(page))[l1_table_offset(addr)];
    8.23 +    printk("  L1 = %p\n", page);
    8.24 +}
    8.25 +
    8.26  #define DOUBLEFAULT_STACK_SIZE 1024
    8.27  static struct tss_struct doublefault_tss;
    8.28  static unsigned char doublefault_stack[DOUBLEFAULT_STACK_SIZE];
    8.29 @@ -173,6 +192,8 @@ void __init doublefault_init(void)
    8.30      tss->bitmap = IOBMP_INVALID_OFFSET;
    8.31      _set_tssldt_desc(gdt_table+__DOUBLEFAULT_TSS_ENTRY,
    8.32                       (unsigned long)tss, 235, 9);
    8.33 +
    8.34 +    set_task_gate(TRAP_double_fault, __DOUBLEFAULT_TSS_ENTRY<<3);
    8.35  }
    8.36  
    8.37  long set_fast_trap(struct exec_domain *p, int idx)
     9.1 --- a/xen/arch/x86/x86_64/entry.S	Thu Feb 03 09:11:33 2005 +0000
     9.2 +++ b/xen/arch/x86/x86_64/entry.S	Thu Feb 03 13:07:34 2005 +0000
     9.3 @@ -126,6 +126,10 @@ ENTRY(spurious_interrupt_bug)
     9.4          movl  $TRAP_spurious_int,4(%rsp)
     9.5  	jmp   error_code
     9.6  
     9.7 +ENTRY(double_fault)
     9.8 +        movl  $TRAP_double_fault,4(%rsp)
     9.9 +        jmp   error_code
    9.10 +
    9.11  ENTRY(nmi)
    9.12          iret
    9.13  
    9.14 @@ -140,7 +144,7 @@ ENTRY(exception_table)
    9.15          .quad SYMBOL_NAME(do_bounds)
    9.16          .quad SYMBOL_NAME(do_invalid_op)
    9.17          .quad SYMBOL_NAME(math_state_restore)
    9.18 -        .quad 0 # double fault
    9.19 +        .quad SYMBOL_NAME(do_double_fault)
    9.20          .quad SYMBOL_NAME(do_coprocessor_segment_overrun)
    9.21          .quad SYMBOL_NAME(do_invalid_TSS)
    9.22          .quad SYMBOL_NAME(do_segment_not_present)
    10.1 --- a/xen/arch/x86/x86_64/mm.c	Thu Feb 03 09:11:33 2005 +0000
    10.2 +++ b/xen/arch/x86/x86_64/mm.c	Thu Feb 03 13:07:34 2005 +0000
    10.3 @@ -27,8 +27,6 @@
    10.4  #include <asm/fixmap.h>
    10.5  #include <asm/domain_page.h>
    10.6  
    10.7 -unsigned long m2p_start_mfn; /* XXX Kill this (in 32-bit code also). */
    10.8 -
    10.9  void *safe_page_alloc(void)
   10.10  {
   10.11      extern int early_boot;
   10.12 @@ -117,34 +115,86 @@ void __set_fixmap(
   10.13  void __init paging_init(void)
   10.14  {
   10.15      void *newpt;
   10.16 +    unsigned long i, p, max;
   10.17  
   10.18 -    /* Allocate and map the machine-to-phys table. */
   10.19 -    /* XXX TODO XXX */
   10.20 +    /* Map all of physical memory. */
   10.21 +    max = (max_page + (1UL << L2_PAGETABLE_SHIFT) - 1UL) &
   10.22 +        ~((1UL << L2_PAGETABLE_SHIFT) - 1UL);
   10.23 +    map_pages(idle_pg_table, PAGE_OFFSET, 0, max, PAGE_HYPERVISOR);
   10.24  
   10.25 -    /* Create page table for ioremap(). */
   10.26 -    newpt = (void *)alloc_xenheap_page();
   10.27 -    clear_page(newpt);
   10.28 -    idle_pg_table[IOREMAP_VIRT_START >> L4_PAGETABLE_SHIFT] = 
   10.29 -        mk_l4_pgentry(__pa(newpt) | __PAGE_HYPERVISOR);
   10.30 +    /*
   10.31 +     * Allocate and map the machine-to-phys table.
   10.32 +     * This also ensures L3 is present for ioremap().
   10.33 +     */
   10.34 +    for ( i = 0; i < max_page; i += ((1UL << L2_PAGETABLE_SHIFT) / 8) )
   10.35 +    {
   10.36 +        p = alloc_boot_pages(1UL << L2_PAGETABLE_SHIFT,
   10.37 +                             1UL << L2_PAGETABLE_SHIFT);
   10.38 +        if ( p == 0 )
   10.39 +            panic("Not enough memory for m2p table\n");
   10.40 +        map_pages(idle_pg_table, RDWR_MPT_VIRT_START + i*8, p, 
   10.41 +                  1UL << L2_PAGETABLE_SHIFT, PAGE_HYPERVISOR);
   10.42 +        memset((void *)(RDWR_MPT_VIRT_START + i*8), 0x55,
   10.43 +               1UL << L2_PAGETABLE_SHIFT);
   10.44 +    }
   10.45  
   10.46      /* Create read-only mapping of MPT for guest-OS use. */
   10.47      newpt = (void *)alloc_xenheap_page();
   10.48      clear_page(newpt);
   10.49 -    idle_pg_table[RO_MPT_VIRT_START >> L4_PAGETABLE_SHIFT] = 
   10.50 +    idle_pg_table[l4_table_offset(RO_MPT_VIRT_START)] =
   10.51          mk_l4_pgentry((__pa(newpt) | __PAGE_HYPERVISOR | _PAGE_USER) &
   10.52                        ~_PAGE_RW);
   10.53 -    /* XXX TODO: Copy appropriate L3 entries from RDWR_MPT_VIRT_START XXX */
   10.54 +    /* Copy the L3 mappings from the RDWR_MPT area. */
   10.55 +    p  = l4_pgentry_val(idle_pg_table[l4_table_offset(RDWR_MPT_VIRT_START)]);
   10.56 +    p &= PAGE_MASK;
   10.57 +    p += l3_table_offset(RDWR_MPT_VIRT_START) * sizeof(l3_pgentry_t);
   10.58 +    newpt = (void *)((unsigned long)newpt +
   10.59 +                     (l3_table_offset(RO_MPT_VIRT_START) *
   10.60 +                      sizeof(l3_pgentry_t)));
   10.61 +    memcpy(newpt, __va(p),
   10.62 +           (RDWR_MPT_VIRT_END - RDWR_MPT_VIRT_START) >> L3_PAGETABLE_SHIFT);
   10.63  
   10.64      /* Set up linear page table mapping. */
   10.65 -    idle_pg_table[LINEAR_PT_VIRT_START >> L4_PAGETABLE_SHIFT] =
   10.66 +    idle_pg_table[l4_table_offset(LINEAR_PT_VIRT_START)] =
   10.67          mk_l4_pgentry(__pa(idle_pg_table) | __PAGE_HYPERVISOR);
   10.68  }
   10.69  
   10.70  void __init zap_low_mappings(void)
   10.71  {
   10.72      idle_pg_table[0] = mk_l4_pgentry(0);
   10.73 +    flush_tlb_all_pge();
   10.74  }
   10.75  
   10.76 +void subarch_init_memory(struct domain *dom_xen)
   10.77 +{
   10.78 +    unsigned long i, v, m2p_start_mfn;
   10.79 +    l3_pgentry_t l3e;
   10.80 +    l2_pgentry_t l2e;
   10.81 +
   10.82 +    /* M2P table is mappable read-only by privileged domains. */
   10.83 +    for ( v  = RDWR_MPT_VIRT_START; 
   10.84 +          v != RDWR_MPT_VIRT_END;
   10.85 +          v += 1 << L2_PAGETABLE_SHIFT )
   10.86 +    {
   10.87 +        l3e = l4_pgentry_to_l3(idle_pg_table[l4_table_offset(v)])[
   10.88 +            l3_table_offset(v)];
   10.89 +        if ( !(l3_pgentry_val(l3e) & _PAGE_PRESENT) )
   10.90 +            continue;
   10.91 +        l2e = l3_pgentry_to_l2(l3e)[l2_table_offset(v)];
   10.92 +        if ( !(l2_pgentry_val(l2e) & _PAGE_PRESENT) )
   10.93 +            continue;
   10.94 +        m2p_start_mfn = l2_pgentry_to_pagenr(l2e);
   10.95 +
   10.96 +        for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ )
   10.97 +        {
   10.98 +            frame_table[m2p_start_mfn+i].count_info        = PGC_allocated | 1;
   10.99 +            /* gdt to make sure it's only mapped read-only by non-privileged
  10.100 +               domains. */
  10.101 +            frame_table[m2p_start_mfn+i].u.inuse.type_info = PGT_gdt_page | 1;
  10.102 +            frame_table[m2p_start_mfn+i].u.inuse.domain    = dom_xen;
  10.103 +        }
  10.104 +    }
  10.105 +}
  10.106  
  10.107  /*
  10.108   * Allows shooting down of borrowed page-table use on specific CPUs.
  10.109 @@ -165,19 +215,10 @@ void synchronise_pagetables(unsigned lon
  10.110  
  10.111  long do_stack_switch(unsigned long ss, unsigned long esp)
  10.112  {
  10.113 -#if 0
  10.114 -    int nr = smp_processor_id();
  10.115 -    struct tss_struct *t = &init_tss[nr];
  10.116 -
  10.117 -    /* We need to do this check as we load and use SS on guest's behalf. */
  10.118 -    if ( (ss & 3) == 0 )
  10.119 +    if ( (ss & 3) != 3 )
  10.120          return -EPERM;
  10.121 -
  10.122      current->thread.guestos_ss = ss;
  10.123      current->thread.guestos_sp = esp;
  10.124 -    t->ss1  = ss;
  10.125 -    t->esp1 = esp;
  10.126 -#endif
  10.127      return 0;
  10.128  }
  10.129  
    11.1 --- a/xen/arch/x86/x86_64/traps.c	Thu Feb 03 09:11:33 2005 +0000
    11.2 +++ b/xen/arch/x86/x86_64/traps.c	Thu Feb 03 13:07:34 2005 +0000
    11.3 @@ -6,6 +6,7 @@
    11.4  #include <xen/errno.h>
    11.5  #include <xen/mm.h>
    11.6  #include <xen/irq.h>
    11.7 +#include <xen/console.h>
    11.8  
    11.9  static int kstack_depth_to_print = 8*20;
   11.10  
   11.11 @@ -97,8 +98,82 @@ void show_registers(struct xen_regs *reg
   11.12      show_stack((unsigned long *)regs->rsp);
   11.13  } 
   11.14  
   11.15 +void show_page_walk(unsigned long addr)
   11.16 +{
   11.17 +    unsigned long page = read_cr3();
   11.18 +    
   11.19 +    printk("Pagetable walk from %p:\n", addr);
   11.20 +
   11.21 +    page &= PAGE_MASK;
   11.22 +    page = ((unsigned long *) __va(page))[l4_table_offset(addr)];
   11.23 +    printk(" L4 = %p\n", page);
   11.24 +    if ( !(page & _PAGE_PRESENT) )
   11.25 +        return;
   11.26 +
   11.27 +    page &= PAGE_MASK;
   11.28 +    page = ((unsigned long *) __va(page))[l3_table_offset(addr)];
   11.29 +    printk("  L3 = %p\n", page);
   11.30 +    if ( !(page & _PAGE_PRESENT) )
   11.31 +        return;
   11.32 +
   11.33 +    page &= PAGE_MASK;
   11.34 +    page = ((unsigned long *) __va(page))[l2_table_offset(addr)];
   11.35 +    printk("   L2 = %p %s\n", page, (page & _PAGE_PSE) ? "(2MB)" : "");
   11.36 +    if ( !(page & _PAGE_PRESENT) || (page & _PAGE_PSE) )
   11.37 +        return;
   11.38 +
   11.39 +    page &= PAGE_MASK;
   11.40 +    page = ((unsigned long *) __va(page))[l1_table_offset(addr)];
   11.41 +    printk("    L1 = %p\n", page);
   11.42 +}
   11.43 +
   11.44 +#define DOUBLEFAULT_STACK_SIZE 1024
   11.45 +static unsigned char doublefault_stack[DOUBLEFAULT_STACK_SIZE];
   11.46 +asmlinkage void double_fault(void);
   11.47 +
   11.48 +asmlinkage void do_double_fault(struct xen_regs *regs)
   11.49 +{
   11.50 +    /* Disable the NMI watchdog. It's useless now. */
   11.51 +    watchdog_on = 0;
   11.52 +
   11.53 +    /* Find information saved during fault and dump it to the console. */
   11.54 +    printk("************************************\n");
   11.55 +    printk("EIP:    %04lx:[<%p>]      \nEFLAGS: %p\n",
   11.56 +           0xffff & regs->cs, regs->rip, regs->eflags);
   11.57 +    printk("rax: %p   rbx: %p   rcx: %p   rdx: %p\n",
   11.58 +           regs->rax, regs->rbx, regs->rcx, regs->rdx);
   11.59 +    printk("rsi: %p   rdi: %p   rbp: %p   rsp: %p\n",
   11.60 +           regs->rsi, regs->rdi, regs->rbp, regs->rsp);
   11.61 +    printk("r8:  %p   r9:  %p   r10: %p   r11: %p\n",
   11.62 +           regs->r8,  regs->r9,  regs->r10, regs->r11);
   11.63 +    printk("r12: %p   r13: %p   r14: %p   r15: %p\n",
   11.64 +           regs->r12, regs->r13, regs->r14, regs->r15);
   11.65 +    printk("************************************\n");
   11.66 +    printk("CPU%d DOUBLE FAULT -- system shutdown\n",
   11.67 +           logical_smp_processor_id());
   11.68 +    printk("System needs manual reset.\n");
   11.69 +    printk("************************************\n");
   11.70 +
   11.71 +    /* Lock up the console to prevent spurious output from other CPUs. */
   11.72 +    console_force_lock();
   11.73 +
   11.74 +    /* Wait for manual reset. */
   11.75 +    for ( ; ; )
   11.76 +        __asm__ __volatile__ ( "hlt" );
   11.77 +}
   11.78 +
   11.79  void __init doublefault_init(void)
   11.80  {
   11.81 +    int i;
   11.82 +
   11.83 +    /* Initialise IST1 for each CPU. Note the handler is non-reentrant. */
   11.84 +    for ( i = 0; i < NR_CPUS; i++ )
   11.85 +        init_tss[i].ist[0] = (unsigned long)
   11.86 +            &doublefault_stack[DOUBLEFAULT_STACK_SIZE];
   11.87 +
   11.88 +    /* Set interrupt gate for double faults, specifying IST1. */
   11.89 +    set_intr_gate(TRAP_double_fault, &double_fault);
   11.90 +    idt_table[TRAP_double_fault].a |= 1UL << 32; /* IST1 */
   11.91  }
   11.92  
   11.93  void *decode_reg(struct xen_regs *regs, u8 b)
    12.1 --- a/xen/include/asm-x86/desc.h	Thu Feb 03 09:11:33 2005 +0000
    12.2 +++ b/xen/include/asm-x86/desc.h	Thu Feb 03 13:07:34 2005 +0000
    12.3 @@ -18,6 +18,7 @@
    12.4        (((_s)>>3) >  LAST_RESERVED_GDT_ENTRY) ||                            \
    12.5        ((_s)&4)) &&                                                         \
    12.6       (((_s)&3) == 1))
    12.7 +#define VALID_CODESEL(_s) ((_s) == FLAT_GUESTOS_CS || VALID_SEL(_s))
    12.8  
    12.9  /* These are bitmasks for the high 32 bits of a descriptor table entry. */
   12.10  #define _SEGMENT_TYPE    (15<< 8)
   12.11 @@ -42,8 +43,6 @@ struct desc_struct {
   12.12  #define __TSS(n) (((n)<<2) + __FIRST_TSS_ENTRY)
   12.13  #define __LDT(n) (((n)<<2) + __FIRST_LDT_ENTRY)
   12.14  
   12.15 -#define VALID_CODESEL(_s) ((_s) == FLAT_RING3_CS64 || VALID_SEL(_s))
   12.16 -
   12.17  typedef struct {
   12.18      u64 a, b;
   12.19  } idt_entry_t;
   12.20 @@ -83,8 +82,6 @@ do {                                    
   12.21  #define __TSS(n) (((n)<<1) + __FIRST_TSS_ENTRY)
   12.22  #define __LDT(n) (((n)<<1) + __FIRST_LDT_ENTRY)
   12.23  
   12.24 -#define VALID_CODESEL(_s) ((_s) == FLAT_RING1_CS || VALID_SEL(_s))
   12.25 -
   12.26  typedef struct desc_struct idt_entry_t;
   12.27  
   12.28  #define _set_gate(gate_addr,type,dpl,addr) \
    13.1 --- a/xen/include/asm-x86/mm.h	Thu Feb 03 09:11:33 2005 +0000
    13.2 +++ b/xen/include/asm-x86/mm.h	Thu Feb 03 13:07:34 2005 +0000
    13.3 @@ -222,10 +222,7 @@ void synchronise_pagetables(unsigned lon
    13.4   */
    13.5  #undef  phys_to_machine_mapping
    13.6  
    13.7 -/* Don't call virt_to_phys on this: it isn't direct mapped.  Using
    13.8 -   m2p_start_mfn instead. */
    13.9  #define machine_to_phys_mapping ((unsigned long *)RDWR_MPT_VIRT_START)
   13.10 -extern unsigned long m2p_start_mfn;
   13.11  #define phys_to_machine_mapping ((unsigned long *)PERDOMAIN_VIRT_START)
   13.12  
   13.13  #define set_machinetophys(_mfn, _pfn) machine_to_phys_mapping[(_mfn)] = (_pfn)
    14.1 --- a/xen/include/asm-x86/processor.h	Thu Feb 03 09:11:33 2005 +0000
    14.2 +++ b/xen/include/asm-x86/processor.h	Thu Feb 03 13:07:34 2005 +0000
    14.3 @@ -428,7 +428,7 @@ struct thread_struct {
    14.4      u8 *io_bitmap; /* Pointer to task's IO bitmap or NULL */
    14.5  
    14.6      /* Trap info. */
    14.7 -#ifdef __i386__
    14.8 +#ifdef ARCH_HAS_FAST_TRAP
    14.9      int                fast_trap_idx;
   14.10      struct desc_struct fast_trap_desc;
   14.11  #endif
   14.12 @@ -442,7 +442,7 @@ struct thread_struct {
   14.13  extern idt_entry_t idt_table[];
   14.14  extern idt_entry_t *idt_tables[];
   14.15  
   14.16 -#if defined(__i386__)
   14.17 +#ifdef ARCH_HAS_FAST_TRAP
   14.18  
   14.19  #define SET_DEFAULT_FAST_TRAP(_p) \
   14.20      (_p)->fast_trap_idx = 0x20;   \
   14.21 @@ -466,6 +466,13 @@ extern idt_entry_t *idt_tables[];
   14.22  
   14.23  long set_fast_trap(struct exec_domain *p, int idx);
   14.24  
   14.25 +#else
   14.26 +
   14.27 +#define SET_DEFAULT_FAST_TRAP(_p) ((void)0)
   14.28 +#define CLEAR_FAST_TRAP(_p)       ((void)0)
   14.29 +#define SET_FAST_TRAP(_p)         ((void)0)
   14.30 +#define set_fast_trap(_p, _i)     (0)
   14.31 +
   14.32  #endif
   14.33  
   14.34  #define INIT_THREAD { 0 }
   14.35 @@ -636,6 +643,7 @@ void show_guest_stack();
   14.36  void show_trace(unsigned long *esp);
   14.37  void show_stack(unsigned long *esp);
   14.38  void show_registers(struct xen_regs *regs);
   14.39 +void show_page_walk(unsigned long addr);
   14.40  asmlinkage void fatal_trap(int trapnr, struct xen_regs *regs);
   14.41  
   14.42  #endif /* !__ASSEMBLY__ */
    15.1 --- a/xen/include/asm-x86/x86_64/regs.h	Thu Feb 03 09:11:33 2005 +0000
    15.2 +++ b/xen/include/asm-x86/x86_64/regs.h	Thu Feb 03 13:07:34 2005 +0000
    15.3 @@ -9,8 +9,8 @@ struct xen_regs
    15.4      u64 r14;
    15.5      u64 r13;
    15.6      u64 r12;
    15.7 -    u64 rbp;
    15.8 -    u64 rbx;
    15.9 +    union { u64 rbp; u32 ebp; } __attribute__ ((packed));
   15.10 +    union { u64 rbx; u32 ebx; } __attribute__ ((packed));
   15.11      /* NB. Above here is C callee-saves. */
   15.12      u64 r11;
   15.13      u64 r10;	
   15.14 @@ -25,8 +25,8 @@ struct xen_regs
   15.15      u32 entry_vector;
   15.16      union { u64 rip; u64 eip; } __attribute__ ((packed));
   15.17      u64 cs;
   15.18 -    u64 eflags;
   15.19 -    u64 rsp;
   15.20 +    union { u64 rflags; u64 eflags; } __attribute__ ((packed));
   15.21 +    union { u64 rsp; u64 esp; } __attribute__ ((packed));
   15.22      u64 ss;
   15.23  } __attribute__ ((packed));
   15.24  
    16.1 --- a/xen/include/public/arch-x86_32.h	Thu Feb 03 09:11:33 2005 +0000
    16.2 +++ b/xen/include/public/arch-x86_32.h	Thu Feb 03 13:07:34 2005 +0000
    16.3 @@ -137,9 +137,8 @@ typedef struct {
    16.4  } PACKED full_execution_context_t;
    16.5  
    16.6  typedef struct {
    16.7 -    u64 mfn_to_pfn_start;      /* MFN of start of m2p table */
    16.8 -    u64 pfn_to_mfn_frame_list; /* MFN of a table of MFNs that 
    16.9 -				  make up p2m table */
   16.10 +    /* MFN of a table of MFNs that make up p2m table */
   16.11 +    u64 pfn_to_mfn_frame_list;
   16.12  } PACKED arch_shared_info_t;
   16.13  
   16.14  #define ARCH_HAS_FAST_TRAP
    17.1 --- a/xen/include/public/arch-x86_64.h	Thu Feb 03 09:11:33 2005 +0000
    17.2 +++ b/xen/include/public/arch-x86_64.h	Thu Feb 03 13:07:34 2005 +0000
    17.3 @@ -44,15 +44,22 @@
    17.4  
    17.5  #define FLAT_RING3_CS32 0x0823  /* GDT index 260 */
    17.6  #define FLAT_RING3_CS64 0x082b  /* GDT index 261 */
    17.7 -#define FLAT_RING3_DS   0x0833  /* GDT index 262 */
    17.8 +#define FLAT_RING3_DS32 0x0833  /* GDT index 262 */
    17.9 +#define FLAT_RING3_DS64 0x0000
   17.10  
   17.11 -#define FLAT_GUESTOS_DS   FLAT_RING3_DS
   17.12 -#define FLAT_GUESTOS_CS   FLAT_RING3_CS64
   17.13 +#define FLAT_GUESTOS_DS64 FLAT_RING3_DS64
   17.14 +#define FLAT_GUESTOS_DS32 FLAT_RING3_DS32
   17.15 +#define FLAT_GUESTOS_DS   FLAT_GUESTOS_DS64
   17.16 +#define FLAT_GUESTOS_CS64 FLAT_RING3_CS64
   17.17  #define FLAT_GUESTOS_CS32 FLAT_RING3_CS32
   17.18 +#define FLAT_GUESTOS_CS   FLAT_GUESTOS_CS64
   17.19  
   17.20 -#define FLAT_USER_DS      FLAT_RING3_DS
   17.21 -#define FLAT_USER_CS      FLAT_RING3_CS64
   17.22 -#define FLAT_USER_CS32    FLAT_RING3_CS32
   17.23 +#define FLAT_USER_DS64 FLAT_RING3_DS64
   17.24 +#define FLAT_USER_DS32 FLAT_RING3_DS32
   17.25 +#define FLAT_USER_DS   FLAT_USER_DS64
   17.26 +#define FLAT_USER_CS64 FLAT_RING3_CS64
   17.27 +#define FLAT_USER_CS32 FLAT_RING3_CS32
   17.28 +#define FLAT_USER_CS   FLAT_USER_CS64
   17.29  
   17.30  /* And the trap vector is... */
   17.31  #define TRAP_INSTR "syscall"
   17.32 @@ -89,22 +96,27 @@ typedef struct
   17.33      unsigned long r14;
   17.34      unsigned long r13;
   17.35      unsigned long r12;
   17.36 -    unsigned long rbp;
   17.37 -    unsigned long rbx;
   17.38 +    union { unsigned long rbp, ebp; } PACKED;
   17.39 +    union { unsigned long rbx, ebx; } PACKED;
   17.40      unsigned long r11;
   17.41      unsigned long r10;
   17.42      unsigned long r9;
   17.43      unsigned long r8;
   17.44 -    unsigned long rax;
   17.45 -    unsigned long rcx;
   17.46 -    unsigned long rdx;
   17.47 -    unsigned long rsi;
   17.48 -    unsigned long rdi;
   17.49 -    unsigned long rip;
   17.50 +    union { unsigned long rax, eax; } PACKED;
   17.51 +    union { unsigned long rcx, ecx; } PACKED;
   17.52 +    union { unsigned long rdx, edx; } PACKED;
   17.53 +    union { unsigned long rsi, esi; } PACKED;
   17.54 +    union { unsigned long rdi, edi; } PACKED;
   17.55 +    unsigned long _unused;
   17.56 +    union { unsigned long rip, eip; } PACKED;
   17.57      unsigned long cs;
   17.58 -    unsigned long eflags;
   17.59 -    unsigned long rsp;
   17.60 +    union { unsigned long rflags, eflags; } PACKED;
   17.61 +    union { unsigned long rsp, esp; } PACKED;
   17.62      unsigned long ss;
   17.63 +    unsigned long es;
   17.64 +    unsigned long ds;
   17.65 +    unsigned long fs;
   17.66 +    unsigned long gs;
   17.67  } PACKED execution_context_t;
   17.68  
   17.69  typedef u64 tsc_timestamp_t; /* RDTSC timestamp */
   17.70 @@ -132,9 +144,8 @@ typedef struct {
   17.71  } PACKED full_execution_context_t;
   17.72  
   17.73  typedef struct {
   17.74 -    u64 mfn_to_pfn_start;      /* MFN of start of m2p table */
   17.75 -    u64 pfn_to_mfn_frame_list; /* MFN of a table of MFNs that 
   17.76 -				  make up p2m table */
   17.77 +    /* MFN of a table of MFNs that make up p2m table */
   17.78 +    u64 pfn_to_mfn_frame_list;
   17.79  } PACKED arch_shared_info_t;
   17.80  
   17.81  #endif /* !__ASSEMBLY__ */