ia64/xen-unstable

changeset 16412:8c305873f2b8

x86: Make IDT/GDT/LDT updates safe.

This involves either determining that the entry will not be
read/written while the update takes place, or atomically making the
entry 'present', or doing the entire write atomically, as appropriate.

This issue raised, and original patch provided, by Jan Beulich.

Signed-off-by: Keir Fraser <keir.fraser@eu.citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Wed Nov 21 11:38:51 2007 +0000 (2007-11-21)
parents ec0bc82cebfd
children 81aa410fa662
files xen/arch/x86/mm.c xen/arch/x86/traps.c xen/arch/x86/x86_32/seg_fixup.c xen/include/asm-x86/desc.h xen/include/asm-x86/system.h
line diff
     1.1 --- a/xen/arch/x86/mm.c	Wed Nov 21 09:52:04 2007 +0000
     1.2 +++ b/xen/arch/x86/mm.c	Wed Nov 21 11:38:51 2007 +0000
     1.3 @@ -3007,7 +3007,8 @@ long set_gdt(struct vcpu *v,
     1.4          return -EINVAL;
     1.5  
     1.6      /* Check the pages in the new GDT. */
     1.7 -    for ( i = 0; i < nr_pages; i++ ) {
     1.8 +    for ( i = 0; i < nr_pages; i++ )
     1.9 +    {
    1.10          mfn = frames[i] = gmfn_to_mfn(d, frames[i]);
    1.11          if ( !mfn_valid(mfn) ||
    1.12               !get_page_and_type(mfn_to_page(mfn), d, PGT_gdt_page) )
    1.13 @@ -3073,23 +3074,15 @@ long do_update_descriptor(u64 pa, u64 de
    1.14  
    1.15      *(u64 *)&d = desc;
    1.16  
    1.17 -    LOCK_BIGLOCK(dom);
    1.18 -
    1.19      mfn = gmfn_to_mfn(dom, gmfn);
    1.20      if ( (((unsigned int)pa % sizeof(struct desc_struct)) != 0) ||
    1.21           !mfn_valid(mfn) ||
    1.22           !check_descriptor(dom, &d) )
    1.23 -    {
    1.24 -        UNLOCK_BIGLOCK(dom);
    1.25          return -EINVAL;
    1.26 -    }
    1.27  
    1.28      page = mfn_to_page(mfn);
    1.29      if ( unlikely(!get_page(page, dom)) )
    1.30 -    {
    1.31 -        UNLOCK_BIGLOCK(dom);
    1.32          return -EINVAL;
    1.33 -    }
    1.34  
    1.35      /* Check if the given frame is in use in an unsafe context. */
    1.36      switch ( page->u.inuse.type_info & PGT_type_mask )
    1.37 @@ -3112,7 +3105,7 @@ long do_update_descriptor(u64 pa, u64 de
    1.38  
    1.39      /* All is good so make the update. */
    1.40      gdt_pent = map_domain_page(mfn);
    1.41 -    memcpy(&gdt_pent[offset], &d, 8);
    1.42 +    atomic_write64((uint64_t *)&gdt_pent[offset], *(uint64_t *)&d);
    1.43      unmap_domain_page(gdt_pent);
    1.44  
    1.45      put_page_type(page);
    1.46 @@ -3122,8 +3115,6 @@ long do_update_descriptor(u64 pa, u64 de
    1.47   out:
    1.48      put_page(page);
    1.49  
    1.50 -    UNLOCK_BIGLOCK(dom);
    1.51 -
    1.52      return ret;
    1.53  }
    1.54  
     2.1 --- a/xen/arch/x86/traps.c	Wed Nov 21 09:52:04 2007 +0000
     2.2 +++ b/xen/arch/x86/traps.c	Wed Nov 21 11:38:51 2007 +0000
     2.3 @@ -2583,7 +2583,10 @@ void set_system_gate(unsigned int n, voi
     2.4  
     2.5  void set_task_gate(unsigned int n, unsigned int sel)
     2.6  {
     2.7 +    idt_table[n].b = 0;
     2.8 +    wmb(); /* disable gate /then/ rewrite */
     2.9      idt_table[n].a = sel << 16;
    2.10 +    wmb(); /* rewrite /then/ enable gate */
    2.11      idt_table[n].b = 0x8500;
    2.12  }
    2.13  
     3.1 --- a/xen/arch/x86/x86_32/seg_fixup.c	Wed Nov 21 09:52:04 2007 +0000
     3.2 +++ b/xen/arch/x86/x86_32/seg_fixup.c	Wed Nov 21 11:38:51 2007 +0000
     3.3 @@ -153,7 +153,7 @@ static unsigned char twobyte_decode[256]
     3.4   *  @base  (OUT): Decoded linear base address.
     3.5   *  @limit (OUT): Decoded segment limit, in bytes. 0 == unlimited (4GB).
     3.6   */
     3.7 -int get_baselimit(u16 seg, unsigned long *base, unsigned long *limit)
     3.8 +static int get_baselimit(u16 seg, unsigned long *base, unsigned long *limit)
     3.9  {
    3.10      struct vcpu *d = current;
    3.11      unsigned long *table, a, b;
    3.12 @@ -204,7 +204,7 @@ int get_baselimit(u16 seg, unsigned long
    3.13  }
    3.14  
    3.15  /* Turn a segment+offset into a linear address. */
    3.16 -int linearise_address(u16 seg, unsigned long off, unsigned long *linear)
    3.17 +static int linearise_address(u16 seg, unsigned long off, unsigned long *linear)
    3.18  {
    3.19      unsigned long base, limit;
    3.20  
    3.21 @@ -216,10 +216,14 @@ int linearise_address(u16 seg, unsigned 
    3.22  
    3.23      *linear = base + off;
    3.24  
    3.25 +    /* Conservatively check 32 bytes from returned linear base. */
    3.26 +    if ( !access_ok(linear, 32) )
    3.27 +        return 0;
    3.28 +
    3.29      return 1;
    3.30  }
    3.31  
    3.32 -int fixup_seg(u16 seg, unsigned long offset)
    3.33 +static int fixup_seg(u16 seg, unsigned long offset)
    3.34  {
    3.35      struct vcpu *d = current;
    3.36      unsigned long *table, a, b, base, limit;
    3.37 @@ -303,9 +307,8 @@ int fixup_seg(u16 seg, unsigned long off
    3.38      a &= ~0x0ffff; a |= limit & 0x0ffff;
    3.39      b &= ~0xf0000; b |= limit & 0xf0000;
    3.40      b ^= _SEGMENT_EC; /* grows-up <-> grows-down */
    3.41 -    /* NB. These can't fault. Checked readable above; must also be writable. */
    3.42 -    table[2*idx+0] = a;
    3.43 -    table[2*idx+1] = b;
    3.44 +    /* NB. This can't fault. Checked readable above; must also be writable. */
    3.45 +    atomic_write64((uint64_t *)&table[2*idx], ((uint64_t)b<<32) | a);
    3.46      return 1;
    3.47  }
    3.48  
     4.1 --- a/xen/include/asm-x86/desc.h	Wed Nov 21 09:52:04 2007 +0000
     4.2 +++ b/xen/include/asm-x86/desc.h	Wed Nov 21 11:38:51 2007 +0000
     4.3 @@ -143,6 +143,11 @@ typedef struct {
     4.4  
     4.5  #define _set_gate(gate_addr,type,dpl,addr)               \
     4.6  do {                                                     \
     4.7 +    (gate_addr)->a = 0;                                  \
     4.8 +    wmb(); /* disable gate /then/ rewrite */             \
     4.9 +    (gate_addr)->b =                                     \
    4.10 +        ((unsigned long)(addr) >> 32);                   \
    4.11 +    wmb(); /* rewrite /then/ enable gate */              \
    4.12      (gate_addr)->a =                                     \
    4.13          (((unsigned long)(addr) & 0xFFFF0000UL) << 32) | \
    4.14          ((unsigned long)(dpl) << 45) |                   \
    4.15 @@ -150,49 +155,53 @@ do {                                    
    4.16          ((unsigned long)(addr) & 0xFFFFUL) |             \
    4.17          ((unsigned long)__HYPERVISOR_CS64 << 16) |       \
    4.18          (1UL << 47);                                     \
    4.19 -    (gate_addr)->b =                                     \
    4.20 -        ((unsigned long)(addr) >> 32);                   \
    4.21  } while (0)
    4.22  
    4.23  #define _set_tssldt_desc(desc,addr,limit,type)           \
    4.24  do {                                                     \
    4.25 +    (desc)[0].b = (desc)[1].b = 0;                       \
    4.26 +    wmb(); /* disable entry /then/ rewrite */            \
    4.27      (desc)[0].a =                                        \
    4.28          ((u32)(addr) << 16) | ((u32)(limit) & 0xFFFF);   \
    4.29 +    (desc)[1].a = (u32)(((unsigned long)(addr)) >> 32);  \
    4.30 +    wmb(); /* rewrite /then/ enable entry */             \
    4.31      (desc)[0].b =                                        \
    4.32          ((u32)(addr) & 0xFF000000U) |                    \
    4.33          ((u32)(type) << 8) | 0x8000U |                   \
    4.34          (((u32)(addr) & 0x00FF0000U) >> 16);             \
    4.35 -    (desc)[1].a = (u32)(((unsigned long)(addr)) >> 32);  \
    4.36 -    (desc)[1].b = 0;                                     \
    4.37  } while (0)
    4.38  
    4.39  #elif defined(__i386__)
    4.40  
    4.41  typedef struct desc_struct idt_entry_t;
    4.42  
    4.43 -#define _set_gate(gate_addr,type,dpl,addr) \
    4.44 -do { \
    4.45 -  int __d0, __d1; \
    4.46 -  __asm__ __volatile__ ("movw %%dx,%%ax\n\t" \
    4.47 - "movw %4,%%dx\n\t" \
    4.48 - "movl %%eax,%0\n\t" \
    4.49 - "movl %%edx,%1" \
    4.50 - :"=m" (*((long *) (gate_addr))), \
    4.51 -  "=m" (*(1+(long *) (gate_addr))), "=&a" (__d0), "=&d" (__d1) \
    4.52 - :"i" ((short) (0x8000+(dpl<<13)+(type<<8))), \
    4.53 -  "3" ((char *) (addr)),"2" (__HYPERVISOR_CS << 16)); \
    4.54 +#define _set_gate(gate_addr,type,dpl,addr)               \
    4.55 +do {                                                     \
    4.56 +    (gate_addr)->b = 0;                                  \
    4.57 +    wmb(); /* disable gate /then/ rewrite */             \
    4.58 +    (gate_addr)->a =                                     \
    4.59 +        ((unsigned long)(addr) & 0xFFFFUL) |             \
    4.60 +        ((unsigned long)__HYPERVISOR_CS << 16);          \
    4.61 +    wmb(); /* rewrite /then/ enable gate */              \
    4.62 +    (gate_addr)->b =                                     \
    4.63 +        ((unsigned long)(addr) & 0xFFFF0000UL) |         \
    4.64 +        ((unsigned long)(dpl) << 13) |                   \
    4.65 +        ((unsigned long)(type) << 8) |                   \
    4.66 +        (1UL << 15);                                     \
    4.67  } while (0)
    4.68  
    4.69 -#define _set_tssldt_desc(n,addr,limit,type) \
    4.70 -__asm__ __volatile__ ("movw %w3,0(%2)\n\t" \
    4.71 - "movw %%ax,2(%2)\n\t" \
    4.72 - "rorl $16,%%eax\n\t" \
    4.73 - "movb %%al,4(%2)\n\t" \
    4.74 - "movb %4,5(%2)\n\t" \
    4.75 - "movb $0,6(%2)\n\t" \
    4.76 - "movb %%ah,7(%2)\n\t" \
    4.77 - "rorl $16,%%eax" \
    4.78 - : "=m"(*(n)) : "a" (addr), "r"(n), "ir"(limit), "i"(type|0x80))
    4.79 +#define _set_tssldt_desc(desc,addr,limit,type)           \
    4.80 +do {                                                     \
    4.81 +    (desc)->b = 0;                                       \
    4.82 +    wmb(); /* disable entry /then/ rewrite */            \
    4.83 +    (desc)->a =                                          \
    4.84 +        ((u32)(addr) << 16) | ((u32)(limit) & 0xFFFF);   \
    4.85 +    wmb(); /* rewrite /then/ enable entry */             \
    4.86 +    (desc)->b =                                          \
    4.87 +        ((u32)(addr) & 0xFF000000U) |                    \
    4.88 +        ((u32)(type) << 8) | 0x8000U |                   \
    4.89 +        (((u32)(addr) & 0x00FF0000U) >> 16);             \
    4.90 +} while (0)
    4.91  
    4.92  #endif
    4.93  
     5.1 --- a/xen/include/asm-x86/system.h	Wed Nov 21 09:52:04 2007 +0000
     5.2 +++ b/xen/include/asm-x86/system.h	Wed Nov 21 11:38:51 2007 +0000
     5.3 @@ -256,6 +256,17 @@ static always_inline unsigned long long 
     5.4  })
     5.5  #endif
     5.6  
     5.7 +static inline void atomic_write64(uint64_t *p, uint64_t v)
     5.8 +{
     5.9 +#ifdef __i386__
    5.10 +    uint64_t w = *p, x;
    5.11 +    while ( (x = __cmpxchg8b(p, w, v)) != w )
    5.12 +        w = x;
    5.13 +#else
    5.14 +    *p = v;
    5.15 +#endif
    5.16 +}
    5.17 +
    5.18  #if defined(__i386__)
    5.19  #define mb() 	__asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory")
    5.20  #define rmb()	__asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory")