ia64/xen-unstable

changeset 48:5e3054a6afc0

bitkeeper revision 1.7.1.10 (3dff74698s12BxyOGasH_9k2KXdErw)

Merge boulderdash.cl.cam.ac.uk:/usr/groups/xeno/BK/xeno
into boulderdash.cl.cam.ac.uk:/local/scratch/smh22/andy/xeno.bk
author smh22@boulderdash.cl.cam.ac.uk
date Tue Dec 17 19:00:57 2002 +0000 (2002-12-17)
parents 34208c741acc c89b11899064
children 8ce741992076
files xen-2.4.16/arch/i386/entry.S xen-2.4.16/arch/i386/traps.c xen-2.4.16/include/hypervisor-ifs/hypervisor-if.h xenolinux-2.4.16-sparse/arch/xeno/kernel/process.c xenolinux-2.4.16-sparse/arch/xeno/kernel/signal.c xenolinux-2.4.16-sparse/arch/xeno/kernel/traps.c xenolinux-2.4.16-sparse/include/asm-xeno/hypervisor.h
line diff
     1.1 --- a/xen-2.4.16/arch/i386/entry.S	Tue Dec 17 19:00:45 2002 +0000
     1.2 +++ b/xen-2.4.16/arch/i386/entry.S	Tue Dec 17 19:00:57 2002 +0000
     1.3 @@ -302,6 +302,9 @@ FAULT11:movl %eax,4(%esi)
     1.4          movl EFLAGS+4(%esp),%eax
     1.5  FAULT12:movl %eax,8(%esi)
     1.6          /* Rewrite our stack frame and return to ring 1. */
     1.7 +        /* IA32 Ref. Vol. 3: TF, VM, RF and NT flags are cleared on trap. */
     1.8 +        andl $0xfffcbeff,%eax
     1.9 +        movl %eax,EFLAGS+4(%esp)
    1.10          movl %ds,OLDSS+4(%esp)
    1.11          movl %esi,OLDESP+4(%esp)
    1.12          movzwl %es:GTB_CS(%edx),%eax
    1.13 @@ -521,6 +524,8 @@ ENTRY(hypervisor_call_table)
    1.14          .long SYMBOL_NAME(kill_domain)
    1.15          .long SYMBOL_NAME(do_dom0_op)
    1.16          .long SYMBOL_NAME(do_network_op)
    1.17 +        .long SYMBOL_NAME(do_set_debugreg)
    1.18 +        .long SYMBOL_NAME(do_get_debugreg)
    1.19          .rept NR_syscalls-(.-hypervisor_call_table)/4
    1.20          .long SYMBOL_NAME(sys_ni_syscall)
    1.21  	.endr
     2.1 --- a/xen-2.4.16/arch/i386/traps.c	Tue Dec 17 19:00:45 2002 +0000
     2.2 +++ b/xen-2.4.16/arch/i386/traps.c	Tue Dec 17 19:00:57 2002 +0000
     2.3 @@ -273,7 +273,26 @@ asmlinkage void do_general_protection(st
     2.4      if (!(regs->xcs & 3) || (error_code & 1))
     2.5          goto gp_in_kernel;
     2.6  
     2.7 -    if ( (error_code & 2) )
     2.8 +    /*
     2.9 +     * Cunning trick to allow arbitrary "INT n" handling.
    2.10 +     * 
    2.11 +     * We set DPL == 0 on all vectors in the IDT. This prevents any INT <n>
    2.12 +     * instruction from trapping to the appropriate vector, when that might not 
    2.13 +     * be expected by Xen or the guest OS. For example, that entry might be for
    2.14 +     * a fault handler (unlike traps, faults don't increment EIP), or might
    2.15 +     * expect an error code on the stack (which a software trap never
    2.16 +     * provides), or might be a hardware interrupt handler that doesn't like
    2.17 +     * being called spuriously.  
    2.18 +     * 
    2.19 +     * Instead, a GPF occurs with the faulting IDT vector in the error code.
    2.20 +     * Bit 1 is set to indicate that an IDT entry caused the fault.
    2.21 +     * Bit 0 is clear to indicate that it's a software fault, not hardware.
    2.22 +     * 
    2.23 +     * NOTE: Vectors 3 and 4 are dealt with from their own handler. This is okay
    2.24 +     * because they can only be triggered by an explicit DPL-checked instruction.
    2.25 +     * The DPL specified by the guest OS for these vectors is NOT CHECKED!!
    2.26 +     */
    2.27 +    if ( (error_code & 3) == 2 )
    2.28      {
    2.29          /* This fault must be due to <INT n> instruction. */
    2.30          ti = current->thread.traps + (error_code>>3);
    2.31 @@ -385,53 +404,42 @@ asmlinkage void math_state_restore(struc
    2.32  }
    2.33  
    2.34  
    2.35 -/*
    2.36 - * Our handling of the processor debug registers is non-trivial.
    2.37 - * We do not clear them on entry and exit from the kernel. Therefore
    2.38 - * it is possible to get a watchpoint trap here from inside the kernel.
    2.39 - * However, the code in ./ptrace.c has ensured that the user can
    2.40 - * only set watchpoints on userspace addresses. Therefore the in-kernel
    2.41 - * watchpoint trap can only occur in code which is reading/writing
    2.42 - * from user space. Such code must not hold kernel locks (since it
    2.43 - * can equally take a page fault), therefore it is safe to call
    2.44 - * force_sig_info even though that claims and releases locks.
    2.45 - * 
    2.46 - * Code in ./signal.c ensures that the debug control register
    2.47 - * is restored before we deliver any signal, and therefore that
    2.48 - * user code runs with the correct debug control register even though
    2.49 - * we clear it here.
    2.50 - *
    2.51 - * Being careful here means that we don't have to be as careful in a
    2.52 - * lot of more complicated places (task switching can be a bit lazy
    2.53 - * about restoring all the debug state, and ptrace doesn't have to
    2.54 - * find every occurrence of the TF bit that could be saved away even
    2.55 - * by user code)
    2.56 - */
    2.57  asmlinkage void do_debug(struct pt_regs * regs, long error_code)
    2.58  {
    2.59      unsigned int condition;
    2.60      struct task_struct *tsk = current;
    2.61 +    struct guest_trap_bounce *gtb = guest_trap_bounce+smp_processor_id();
    2.62  
    2.63      __asm__ __volatile__("movl %%db6,%0" : "=r" (condition));
    2.64  
    2.65      /* Mask out spurious debug traps due to lazy DR7 setting */
    2.66 -    if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
    2.67 -        if (!tsk->thread.debugreg[7])
    2.68 -            goto clear_dr7;
    2.69 +    if ( (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) &&
    2.70 +         (tsk->thread.debugreg[7] == 0) )
    2.71 +    {
    2.72 +        __asm__("movl %0,%%db7" : : "r" (0));
    2.73 +        return;
    2.74      }
    2.75  
    2.76 -    /* Save debug status register where ptrace can see it */
    2.77 +    if ( (regs->xcs & 3) == 0 )
    2.78 +    {
    2.79 +        /* Clear TF just for absolute sanity. */
    2.80 +        regs->eflags &= ~EF_TF;
    2.81 +        /*
    2.82 +         * Basically, we ignore watchpoints when they trigger in
    2.83 +         * the hypervisor. This may happen when a buffer is passed
    2.84 +         * to us which previously had a watchpoint set on it.
    2.85 +         * No need to bump EIP; the only faulting trap is an
    2.86 +         * instruction breakpoint, which can't happen to us.
    2.87 +         */
    2.88 +        return;
    2.89 +    }
    2.90 +
    2.91 +    /* Save debug status register where guest OS can peek at it */
    2.92      tsk->thread.debugreg[6] = condition;
    2.93  
    2.94 -    panic("trap up to OS here, pehaps\n");
    2.95 -
    2.96 -    /* Disable additional traps. They'll be re-enabled when
    2.97 -     * the signal is delivered.
    2.98 -     */
    2.99 - clear_dr7:
   2.100 -    __asm__("movl %0,%%db7"
   2.101 -            : /* no output */
   2.102 -            : "r" (0));
   2.103 +    gtb->flags = GTBF_TRAP_NOCODE;
   2.104 +    gtb->cs    = tsk->thread.traps[1].cs;
   2.105 +    gtb->eip   = tsk->thread.traps[1].address;
   2.106  }
   2.107  
   2.108  
   2.109 @@ -516,9 +524,9 @@ void __init trap_init(void)
   2.110      set_trap_gate(0,&divide_error);
   2.111      set_trap_gate(1,&debug);
   2.112      set_intr_gate(2,&nmi);
   2.113 -    set_system_gate(3,&int3);	/* int3-5 can be called from all */
   2.114 -    set_system_gate(4,&overflow);
   2.115 -    set_system_gate(5,&bounds);
   2.116 +    set_system_gate(3,&int3);     /* usable from all privilege levels */
   2.117 +    set_system_gate(4,&overflow); /* usable from all privilege levels */
   2.118 +    set_trap_gate(5,&bounds);
   2.119      set_trap_gate(6,&invalid_op);
   2.120      set_trap_gate(7,&device_not_available);
   2.121      set_trap_gate(8,&double_fault);
   2.122 @@ -534,27 +542,6 @@ void __init trap_init(void)
   2.123      set_trap_gate(18,&machine_check);
   2.124      set_trap_gate(19,&simd_coprocessor_error);
   2.125  
   2.126 -    /*
   2.127 -     * Cunning trick to allow arbitrary "INT n" handling.
   2.128 -     * 
   2.129 -     * 1. 3 <= N <= 5 is trivial, as these are intended to be explicit.
   2.130 -     * 
   2.131 -     * 2. All others, we set gate DPL == 0. Any use of "INT n" will thus
   2.132 -     *    cause a GPF with CS:EIP pointing at the faulting instruction.
   2.133 -     *    We can then peek at the instruction at check if it is of the
   2.134 -     *    form "0xCD <imm8>". If so, we fake out an exception to the
   2.135 -     *    guest OS. If the protected read page faults, we patch that up as
   2.136 -     *    a page fault to the guest OS.
   2.137 -     *    [NB. Of course we check the "soft DPL" to check that guest OS
   2.138 -     *     wants to handle a particular 'n'. If not, we pass the GPF up
   2.139 -     *     to the guest OS untouched.]
   2.140 -     * 
   2.141 -     * 3. For efficiency, we may want to allow direct traps by the guest
   2.142 -     *    OS for certain critical vectors (eg. 0x80 in Linux). These must
   2.143 -     *    therefore not be mapped by hardware interrupts, and so we'd need
   2.144 -     *    a static list of them, which we add to on demand.
   2.145 -     */
   2.146 -
   2.147      /* Only ring 1 can access monitor services. */
   2.148      _set_gate(idt_table+HYPERVISOR_CALL_VECTOR,15,1,&hypervisor_call);
   2.149  
   2.150 @@ -594,3 +581,69 @@ long do_fpu_taskswitch(void)
   2.151      stts();
   2.152      return 0;
   2.153  }
   2.154 +
   2.155 +
   2.156 +long do_set_debugreg(int reg, unsigned long value)
   2.157 +{
   2.158 +    int i;
   2.159 +
   2.160 +    switch ( reg )
   2.161 +    {
   2.162 +    case 0: 
   2.163 +        if ( value > (PAGE_OFFSET-4) ) return -EPERM;
   2.164 +        __asm__ ( "movl %0, %%db0" : : "r" (value) );
   2.165 +        break;
   2.166 +    case 1: 
   2.167 +        if ( value > (PAGE_OFFSET-4) ) return -EPERM;
   2.168 +        __asm__ ( "movl %0, %%db1" : : "r" (value) );
   2.169 +        break;
   2.170 +    case 2: 
   2.171 +        if ( value > (PAGE_OFFSET-4) ) return -EPERM;
   2.172 +        __asm__ ( "movl %0, %%db2" : : "r" (value) );
   2.173 +        break;
   2.174 +    case 3:
   2.175 +        if ( value > (PAGE_OFFSET-4) ) return -EPERM;
   2.176 +        __asm__ ( "movl %0, %%db3" : : "r" (value) );
   2.177 +        break;
   2.178 +    case 6:
   2.179 +        /*
   2.180 +         * DR6: Bits 4-11,16-31 reserved (set to 1).
   2.181 +         *      Bit 12 reserved (set to 0).
   2.182 +         */
   2.183 +        value &= 0xffffefff; /* reserved bits => 0 */
   2.184 +        value |= 0xffff0ff0; /* reserved bits => 1 */
   2.185 +        __asm__ ( "movl %0, %%db6" : : "r" (value) );
   2.186 +        break;
   2.187 +    case 7:
   2.188 +        /*
   2.189 +         * DR7: Bit 10 reserved (set to 1).
   2.190 +         *      Bits 11-12,14-15 reserved (set to 0).
   2.191 +         * Privileged bits:
   2.192 +         *      GD (bit 13): must be 0.
   2.193 +         *      R/Wn (bits 16-17,20-21,24-25,28-29): mustn't be 10.
   2.194 +         *      LENn (bits 18-19,22-23,26-27,30-31): mustn't be 10.
   2.195 +         */
   2.196 +        /* DR7 == 0 => debugging disabled for this domain. */
   2.197 +        if ( value != 0 )
   2.198 +        {
   2.199 +            value &= 0xffff27ff; /* reserved bits => 0 */
   2.200 +            value |= 0x00000400; /* reserved bits => 1 */
   2.201 +            if ( (value & (1<<13)) != 0 ) return -EPERM;
   2.202 +            for ( i = 0; i < 16; i += 2 )
   2.203 +                if ( ((value >> (i+16)) & 3) == 2 ) return -EPERM;
   2.204 +        }
   2.205 +        __asm__ ( "movl %0, %%db7" : : "r" (value) );
   2.206 +        break;
   2.207 +    default:
   2.208 +        return -EINVAL;
   2.209 +    }
   2.210 +
   2.211 +    current->thread.debugreg[reg] = value;
   2.212 +    return 0;
   2.213 +}
   2.214 +
   2.215 +unsigned long do_get_debugreg(int reg)
   2.216 +{
   2.217 +    if ( (reg < 0) || (reg > 7) ) return -EINVAL;
   2.218 +    return current->thread.debugreg[reg];
   2.219 +}
     3.1 --- a/xen-2.4.16/include/hypervisor-ifs/hypervisor-if.h	Tue Dec 17 19:00:45 2002 +0000
     3.2 +++ b/xen-2.4.16/include/hypervisor-ifs/hypervisor-if.h	Tue Dec 17 19:00:57 2002 +0000
     3.3 @@ -40,6 +40,8 @@ typedef struct
     3.4  #define __HYPERVISOR_exit            8
     3.5  #define __HYPERVISOR_dom0_op         9
     3.6  #define __HYPERVISOR_network_op     10
     3.7 +#define __HYPERVISOR_set_debugreg   11
     3.8 +#define __HYPERVISOR_get_debugreg   12
     3.9  
    3.10  #define TRAP_INSTR "int $0x82"
    3.11  
     4.1 --- a/xenolinux-2.4.16-sparse/arch/xeno/kernel/process.c	Tue Dec 17 19:00:45 2002 +0000
     4.2 +++ b/xenolinux-2.4.16-sparse/arch/xeno/kernel/process.c	Tue Dec 17 19:00:57 2002 +0000
     4.3 @@ -187,6 +187,7 @@ void flush_thread(void)
     4.4      struct task_struct *tsk = current;
     4.5  
     4.6      memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8);
     4.7 +
     4.8      /*
     4.9       * Forget coprocessor state..
    4.10       */
    4.11 @@ -307,14 +308,6 @@ void dump_thread(struct pt_regs * regs, 
    4.12  }
    4.13  
    4.14  /*
    4.15 - * This special macro can be used to load a debugging register
    4.16 - */
    4.17 -#define loaddebug(thread,register) \
    4.18 -		__asm__("movl %0,%%db" #register  \
    4.19 -			: /* no output */ \
    4.20 -			:"r" (thread->debugreg[register]))
    4.21 -
    4.22 -/*
    4.23   *	switch_to(x,yn) should switch tasks from x to y.
    4.24   *
    4.25   * We fsave/fwait so that an exception goes off at the right time
    4.26 @@ -359,20 +352,19 @@ void __switch_to(struct task_struct *pre
    4.27      loadsegment(fs, next->fs);
    4.28      loadsegment(gs, next->gs);
    4.29  
    4.30 -#if 0
    4.31      /*
    4.32       * Now maybe reload the debug registers
    4.33       */
    4.34 -    if (next->debugreg[7]){
    4.35 -        loaddebug(next, 0);
    4.36 -        loaddebug(next, 1);
    4.37 -        loaddebug(next, 2);
    4.38 -        loaddebug(next, 3);
    4.39 +    if ( next->debugreg[7] != 0 )
    4.40 +    {
    4.41 +        HYPERVISOR_set_debugreg(0, next->debugreg[0]);
    4.42 +        HYPERVISOR_set_debugreg(1, next->debugreg[1]);
    4.43 +        HYPERVISOR_set_debugreg(2, next->debugreg[2]);
    4.44 +        HYPERVISOR_set_debugreg(3, next->debugreg[3]);
    4.45          /* no 4 and 5 */
    4.46 -        loaddebug(next, 6);
    4.47 -        loaddebug(next, 7);
    4.48 +        HYPERVISOR_set_debugreg(6, next->debugreg[6]);
    4.49 +        HYPERVISOR_set_debugreg(7, next->debugreg[7]);
    4.50      }
    4.51 -#endif
    4.52  }
    4.53  
    4.54  asmlinkage int sys_fork(struct pt_regs regs)
     5.1 --- a/xenolinux-2.4.16-sparse/arch/xeno/kernel/signal.c	Tue Dec 17 19:00:45 2002 +0000
     5.2 +++ b/xenolinux-2.4.16-sparse/arch/xeno/kernel/signal.c	Tue Dec 17 19:00:57 2002 +0000
     5.3 @@ -693,6 +693,14 @@ int do_signal(struct pt_regs *regs, sigs
     5.4  			}
     5.5  		}
     5.6  
     5.7 +                /* Reenable any watchpoints before delivering the
     5.8 +                 * signal to user space. The processor register will
     5.9 +                 * have been cleared if the watchpoint triggered
    5.10 +                 * inside the kernel.
    5.11 +                 */
    5.12 +                if ( current->thread.debugreg[7] != 0 )
    5.13 +                    HYPERVISOR_set_debugreg(7, current->thread.debugreg[7]);
    5.14 +
    5.15  		/* Whee!  Actually deliver the signal.  */
    5.16  		handle_signal(signr, ka, &info, oldset, regs);
    5.17  		return 1;
     6.1 --- a/xenolinux-2.4.16-sparse/arch/xeno/kernel/traps.c	Tue Dec 17 19:00:45 2002 +0000
     6.2 +++ b/xenolinux-2.4.16-sparse/arch/xeno/kernel/traps.c	Tue Dec 17 19:00:57 2002 +0000
     6.3 @@ -324,11 +324,60 @@ gp_in_kernel:
     6.4  
     6.5  asmlinkage void do_debug(struct pt_regs * regs, long error_code)
     6.6  {
     6.7 -    /*
     6.8 -     * We don't mess with breakpoints, so the only way this exception
     6.9 -     * type can occur is through single-step mode.
    6.10 +    unsigned int condition;
    6.11 +    struct task_struct *tsk = current;
    6.12 +    siginfo_t info;
    6.13 +
    6.14 +    condition = HYPERVISOR_get_debugreg(6);
    6.15 +
    6.16 +    /* Mask out spurious debug traps due to lazy DR7 setting */
    6.17 +    if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
    6.18 +        if (!tsk->thread.debugreg[7])
    6.19 +            goto clear_dr7;
    6.20 +    }
    6.21 +
    6.22 +    /* Save debug status register where ptrace can see it */
    6.23 +    tsk->thread.debugreg[6] = condition;
    6.24 +
    6.25 +    /* Mask out spurious TF errors due to lazy TF clearing */
    6.26 +    if (condition & DR_STEP) {
    6.27 +        /*
    6.28 +         * The TF error should be masked out only if the current
    6.29 +         * process is not traced and if the TRAP flag has been set
    6.30 +         * previously by a tracing process (condition detected by
    6.31 +         * the PT_DTRACE flag); remember that the i386 TRAP flag
    6.32 +         * can be modified by the process itself in user mode,
    6.33 +         * allowing programs to debug themselves without the ptrace()
    6.34 +         * interface.
    6.35 +         */
    6.36 +        if ((tsk->ptrace & (PT_DTRACE|PT_PTRACED)) == PT_DTRACE)
    6.37 +            goto clear_TF;
    6.38 +    }
    6.39 +
    6.40 +    /* Ok, finally something we can handle */
    6.41 +    tsk->thread.trap_no = 1;
    6.42 +    tsk->thread.error_code = error_code;
    6.43 +    info.si_signo = SIGTRAP;
    6.44 +    info.si_errno = 0;
    6.45 +    info.si_code = TRAP_BRKPT;
    6.46 +        
    6.47 +    /* If this is a kernel mode trap, save the user PC on entry to 
    6.48 +     * the kernel, that's what the debugger can make sense of.
    6.49       */
    6.50 +    info.si_addr = ((regs->xcs & 3) == 0) ? (void *)tsk->thread.eip : 
    6.51 +                                            (void *)regs->eip;
    6.52 +    force_sig_info(SIGTRAP, &info, tsk);
    6.53 +
    6.54 +    /* Disable additional traps. They'll be re-enabled when
    6.55 +     * the signal is delivered.
    6.56 +     */
    6.57 + clear_dr7:
    6.58 +    HYPERVISOR_set_debugreg(7, 0);
    6.59 +    return;
    6.60 +
    6.61 + clear_TF:
    6.62      regs->eflags &= ~TF_MASK;
    6.63 +    return;
    6.64  }
    6.65  
    6.66  
     7.1 --- a/xenolinux-2.4.16-sparse/include/asm-xeno/hypervisor.h	Tue Dec 17 19:00:45 2002 +0000
     7.2 +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/hypervisor.h	Tue Dec 17 19:00:57 2002 +0000
     7.3 @@ -163,4 +163,26 @@ static inline int HYPERVISOR_network_op(
     7.4      return ret;
     7.5  }
     7.6  
     7.7 +static inline int HYPERVISOR_set_debugreg(int reg, unsigned long value)
     7.8 +{
     7.9 +    int ret;
    7.10 +    __asm__ __volatile__ (
    7.11 +        TRAP_INSTR
    7.12 +        : "=a" (ret) : "0" (__HYPERVISOR_set_debugreg),
    7.13 +        "b" (reg), "c" (value) );
    7.14 +
    7.15 +    return ret;
    7.16 +}
    7.17 +
    7.18 +static inline unsigned long HYPERVISOR_get_debugreg(int reg)
    7.19 +{
    7.20 +    unsigned long ret;
    7.21 +    __asm__ __volatile__ (
    7.22 +        TRAP_INSTR
    7.23 +        : "=a" (ret) : "0" (__HYPERVISOR_get_debugreg),
    7.24 +        "b" (reg) );
    7.25 +
    7.26 +    return ret;
    7.27 +}
    7.28 +
    7.29  #endif /* __HYPERVISOR_H__ */