ia64/xen-unstable

changeset 2345:bd470dc06d31

bitkeeper revision 1.1159.1.104 (412cb2ee26F5kEIHPDh0Kj4pQScH6Q)

Merge labyrinth.cl.cam.ac.uk:/auto/groups/xeno/BK/xeno.bk
into labyrinth.cl.cam.ac.uk:/auto/groups/xeno/users/kaf24/xeno
author kaf24@labyrinth.cl.cam.ac.uk
date Wed Aug 25 15:40:30 2004 +0000 (2004-08-25)
parents 30bc4c5fe838 22afa9c3be28
children f2b75edc9ce1
files linux-2.4.27-xen-sparse/include/asm-xen/queues.h linux-2.6.7-xen-sparse/drivers/xen/blkback/common.h linux-2.6.7-xen-sparse/drivers/xen/blkback/interface.c linux-2.6.7-xen-sparse/drivers/xen/netback/common.h linux-2.6.7-xen-sparse/drivers/xen/netback/interface.c xen/arch/x86/domain.c xen/arch/x86/memory.c xen/arch/x86/setup.c xen/common/domain.c xen/common/grant_table.c xen/common/kernel.c xen/common/page_alloc.c xen/include/asm-x86/atomic.h xen/include/asm-x86/mm.h xen/include/asm-x86/smp.h xen/include/asm-x86/system.h xen/include/xen/grant_table.h xen/include/xen/sched.h
line diff
     1.1 --- a/linux-2.4.27-xen-sparse/include/asm-xen/queues.h	Tue Aug 24 22:32:56 2004 +0000
     1.2 +++ b/linux-2.4.27-xen-sparse/include/asm-xen/queues.h	Wed Aug 25 15:40:30 2004 +0000
     1.3 @@ -10,8 +10,11 @@
     1.4  
     1.5  #define DECLARE_TQUEUE(_name, _fn, _arg) \
     1.6      struct tq_struct _name = { LIST_HEAD_INIT((_name).list), 0, _fn, _arg }
     1.7 +#define DECLARE_WORK(_name, _fn, _arg) DECLARE_TQUEUE(_name, _fn, _arg)
     1.8  
     1.9 -#define DECLARE_WORK(_name, _fn, _arg) DECLARE_TQUEUE(_name, _fn, _arg)
    1.10 +#define work_struct tq_struct
    1.11 +#define INIT_WORK(_work, _fn, _arg) INIT_TQUEUE(_work, _fn, _arg)
    1.12 +
    1.13  #define schedule_work(_w) schedule_task(_w)
    1.14  
    1.15  #endif /* __QUEUES_H__ */
     2.1 --- a/linux-2.6.7-xen-sparse/drivers/xen/blkback/common.h	Tue Aug 24 22:32:56 2004 +0000
     2.2 +++ b/linux-2.6.7-xen-sparse/drivers/xen/blkback/common.h	Wed Aug 25 15:40:30 2004 +0000
     2.3 @@ -60,19 +60,21 @@ typedef struct blkif_st {
     2.4      struct list_head blkdev_list;
     2.5      spinlock_t       blk_ring_lock;
     2.6      atomic_t         refcnt;
     2.7 +
     2.8 +    struct work_struct work;
     2.9  } blkif_t;
    2.10  
    2.11  void blkif_create(blkif_be_create_t *create);
    2.12  void blkif_destroy(blkif_be_destroy_t *destroy);
    2.13  void blkif_connect(blkif_be_connect_t *connect);
    2.14  int  blkif_disconnect(blkif_be_disconnect_t *disconnect, u8 rsp_id);
    2.15 -void __blkif_disconnect_complete(blkif_t *blkif);
    2.16 +void blkif_disconnect_complete(blkif_t *blkif);
    2.17  blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle);
    2.18  #define blkif_get(_b) (atomic_inc(&(_b)->refcnt))
    2.19  #define blkif_put(_b)                             \
    2.20      do {                                          \
    2.21          if ( atomic_dec_and_test(&(_b)->refcnt) ) \
    2.22 -            __blkif_disconnect_complete(_b);      \
    2.23 +            blkif_disconnect_complete(_b);        \
    2.24      } while (0)
    2.25  
    2.26  /* An entry in a list of xen_extents. */
     3.1 --- a/linux-2.6.7-xen-sparse/drivers/xen/blkback/interface.c	Tue Aug 24 22:32:56 2004 +0000
     3.2 +++ b/linux-2.6.7-xen-sparse/drivers/xen/blkback/interface.c	Wed Aug 25 15:40:30 2004 +0000
     3.3 @@ -27,13 +27,14 @@ blkif_t *blkif_find_by_handle(domid_t do
     3.4      return blkif;
     3.5  }
     3.6  
     3.7 -void __blkif_disconnect_complete(blkif_t *blkif)
     3.8 +static void __blkif_disconnect_complete(void *arg)
     3.9  {
    3.10 +    blkif_t              *blkif = (blkif_t *)arg;
    3.11      ctrl_msg_t            cmsg;
    3.12      blkif_be_disconnect_t disc;
    3.13  
    3.14      /*
    3.15 -     * These can't be done in __blkif_disconnect() because at that point there
    3.16 +     * These can't be done in blkif_disconnect() because at that point there
    3.17       * may be outstanding requests at the disc whose asynchronous responses
    3.18       * must still be notified to the remote driver.
    3.19       */
    3.20 @@ -67,6 +68,12 @@ void __blkif_disconnect_complete(blkif_t
    3.21      ctrl_if_send_response(&cmsg);
    3.22  }
    3.23  
    3.24 +void blkif_disconnect_complete(blkif_t *blkif)
    3.25 +{
    3.26 +    INIT_WORK(&blkif->work, __blkif_disconnect_complete, (void *)blkif);
    3.27 +    schedule_work(&blkif->work);
    3.28 +}
    3.29 +
    3.30  void blkif_create(blkif_be_create_t *create)
    3.31  {
    3.32      domid_t       domid  = create->domid;
     4.1 --- a/linux-2.6.7-xen-sparse/drivers/xen/netback/common.h	Tue Aug 24 22:32:56 2004 +0000
     4.2 +++ b/linux-2.6.7-xen-sparse/drivers/xen/netback/common.h	Wed Aug 25 15:40:30 2004 +0000
     4.3 @@ -70,19 +70,21 @@ typedef struct netif_st {
     4.4      spinlock_t       rx_lock, tx_lock;
     4.5      struct net_device *dev;
     4.6      struct net_device_stats stats;
     4.7 +
     4.8 +    struct work_struct work;
     4.9  } netif_t;
    4.10  
    4.11  void netif_create(netif_be_create_t *create);
    4.12  void netif_destroy(netif_be_destroy_t *destroy);
    4.13  void netif_connect(netif_be_connect_t *connect);
    4.14  int  netif_disconnect(netif_be_disconnect_t *disconnect, u8 rsp_id);
    4.15 -void __netif_disconnect_complete(netif_t *netif);
    4.16 +void netif_disconnect_complete(netif_t *netif);
    4.17  netif_t *netif_find_by_handle(domid_t domid, unsigned int handle);
    4.18  #define netif_get(_b) (atomic_inc(&(_b)->refcnt))
    4.19  #define netif_put(_b)                             \
    4.20      do {                                          \
    4.21          if ( atomic_dec_and_test(&(_b)->refcnt) ) \
    4.22 -            __netif_disconnect_complete(_b);      \
    4.23 +            netif_disconnect_complete(_b);        \
    4.24      } while (0)
    4.25  
    4.26  void netif_interface_init(void);
     5.1 --- a/linux-2.6.7-xen-sparse/drivers/xen/netback/interface.c	Tue Aug 24 22:32:56 2004 +0000
     5.2 +++ b/linux-2.6.7-xen-sparse/drivers/xen/netback/interface.c	Wed Aug 25 15:40:30 2004 +0000
     5.3 @@ -27,13 +27,14 @@ netif_t *netif_find_by_handle(domid_t do
     5.4      return netif;
     5.5  }
     5.6  
     5.7 -void __netif_disconnect_complete(netif_t *netif)
     5.8 +static void __netif_disconnect_complete(void *arg)
     5.9  {
    5.10 +    netif_t              *netif = (netif_t *)arg;
    5.11      ctrl_msg_t            cmsg;
    5.12      netif_be_disconnect_t disc;
    5.13  
    5.14      /*
    5.15 -     * These can't be done in __netif_disconnect() because at that point there
    5.16 +     * These can't be done in netif_disconnect() because at that point there
    5.17       * may be outstanding requests at the disc whose asynchronous responses
    5.18       * must still be notified to the remote driver.
    5.19       */
    5.20 @@ -70,6 +71,12 @@ void __netif_disconnect_complete(netif_t
    5.21      ctrl_if_send_response(&cmsg);
    5.22  }
    5.23  
    5.24 +void netif_disconnect_complete(netif_t *netif)
    5.25 +{
    5.26 +    INIT_WORK(&netif->work, __netif_disconnect_complete, (void *)netif);
    5.27 +    schedule_work(&netif->work);
    5.28 +}
    5.29 +
    5.30  void netif_create(netif_be_create_t *create)
    5.31  {
    5.32      int                err = 0;
     6.1 --- a/xen/arch/x86/domain.c	Tue Aug 24 22:32:56 2004 +0000
     6.2 +++ b/xen/arch/x86/domain.c	Wed Aug 25 15:40:30 2004 +0000
     6.3 @@ -668,9 +668,9 @@ int construct_dom0(struct domain *p,
     6.4            mfn++ )
     6.5      {
     6.6          page = &frame_table[mfn];
     6.7 -        page->u.inuse.domain        = p;
     6.8 +        page->u.inuse.domain     = p;
     6.9          page->u.inuse.type_info  = 0;
    6.10 -        page->u.inuse.count_info = PGC_allocated | 1;
    6.11 +        page->u.inuse.count_info = PGC_always_set | PGC_allocated | 1;
    6.12          list_add_tail(&page->list, &p->page_list);
    6.13          p->tot_pages++; p->max_pages++;
    6.14      }
     7.1 --- a/xen/arch/x86/memory.c	Tue Aug 24 22:32:56 2004 +0000
     7.2 +++ b/xen/arch/x86/memory.c	Wed Aug 25 15:40:30 2004 +0000
     7.3 @@ -153,6 +153,9 @@ void arch_init_memory(void)
     7.4      vm_assist_info[VMASST_TYPE_writable_pagetables].disable =
     7.5          ptwr_disable;
     7.6  
     7.7 +    for ( mfn = 0; mfn < max_page; mfn++ )
     7.8 +        frame_table[mfn].u.inuse.count_info |= PGC_always_set;
     7.9 +
    7.10      /* Initialise to a magic of 0x55555555 so easier to spot bugs later. */
    7.11      memset(machine_to_phys_mapping, 0x55, 4<<20);
    7.12  
    7.13 @@ -179,9 +182,9 @@ void arch_init_memory(void)
    7.14            mfn < virt_to_phys(&machine_to_phys_mapping[1<<20])>>PAGE_SHIFT;
    7.15            mfn++ )
    7.16      {
    7.17 -        frame_table[mfn].u.inuse.count_info = 1 | PGC_allocated;
    7.18 -        frame_table[mfn].u.inuse.type_info  = 1 | PGT_gdt_page; /* non-RW */
    7.19 -        frame_table[mfn].u.inuse.domain     = dom_xen;
    7.20 +        frame_table[mfn].u.inuse.count_info |= PGC_allocated | 1;
    7.21 +        frame_table[mfn].u.inuse.type_info   = PGT_gdt_page | 1; /* non-RW */
    7.22 +        frame_table[mfn].u.inuse.domain      = dom_xen;
    7.23      }
    7.24  }
    7.25  
    7.26 @@ -370,6 +373,7 @@ get_page_from_l1e(
    7.27  {
    7.28      unsigned long l1v = l1_pgentry_val(l1e);
    7.29      unsigned long pfn = l1_pgentry_to_pagenr(l1e);
    7.30 +    struct pfn_info *page = &frame_table[pfn];
    7.31      extern int domain_iomem_in_pfn(struct domain *d, unsigned long pfn);
    7.32  
    7.33      if ( !(l1v & _PAGE_PRESENT) )
    7.34 @@ -383,6 +387,8 @@ get_page_from_l1e(
    7.35  
    7.36      if ( unlikely(!pfn_is_ram(pfn)) )
    7.37      {
    7.38 +        /* SPECIAL CASE 1. Mapping an I/O page. */
    7.39 +
    7.40          /* Revert to caller privileges if FD == DOMID_IO. */
    7.41          if ( d == dom_io )
    7.42              d = current;
    7.43 @@ -397,17 +403,41 @@ get_page_from_l1e(
    7.44          return 0;
    7.45      }
    7.46  
    7.47 +    if ( unlikely(!get_page_from_pagenr(pfn, d)) )
    7.48 +    {
    7.49 +        /* SPECIAL CASE 2. Mapping a foreign page via a grant table. */
    7.50 +        
    7.51 +        int rc;
    7.52 +        struct domain *e;
    7.53 +        u32 count_info;
    7.54 +        /*
    7.55 +         * Yuk! Amazingly this is the simplest way to get a guaranteed atomic
    7.56 +         * snapshot of a 64-bit value on IA32. x86/64 solves this of course!
    7.57 +         * Basically it's a no-op CMPXCHG, to get us the current contents.
    7.58 +         * No need for LOCK prefix -- we know that count_info is never zero
    7.59 +         * because it contains PGC_always_set.
    7.60 +         */
    7.61 +        __asm__ __volatile__(
    7.62 +            "cmpxchg8b %2"
    7.63 +            : "=a" (e), "=d" (count_info),
    7.64 +              "=m" (*(volatile u64 *)(&page->u.inuse.domain))
    7.65 +            : "0" (0), "1" (0), "b" (0), "c" (0) );
    7.66 +        if ( unlikely((count_info & PGC_count_mask) == 0) ||
    7.67 +             unlikely(e == NULL) || unlikely(!get_domain(e)) )
    7.68 +             return 0;
    7.69 +        rc = gnttab_try_map(e, d, page, l1v & _PAGE_RW);
    7.70 +        put_domain(e);
    7.71 +        return rc;
    7.72 +    }
    7.73 +
    7.74      if ( l1v & _PAGE_RW )
    7.75      {
    7.76 -        if ( unlikely(!get_page_and_type_from_pagenr(
    7.77 -            pfn, PGT_writable_page, d)) )
    7.78 +        if ( unlikely(!get_page_type(page, PGT_writable_page)) )
    7.79              return 0;
    7.80 -        set_bit(_PGC_tlb_flush_on_type_change, 
    7.81 -                &frame_table[pfn].u.inuse.count_info);
    7.82 -        return 1;
    7.83 +        set_bit(_PGC_tlb_flush_on_type_change, &page->u.inuse.count_info);
    7.84      }
    7.85  
    7.86 -    return get_page_from_pagenr(pfn, d);
    7.87 +    return 1;
    7.88  }
    7.89  
    7.90  
    7.91 @@ -434,14 +464,33 @@ get_page_from_l2e(
    7.92  }
    7.93  
    7.94  
    7.95 -static void put_page_from_l1e(l1_pgentry_t l1e)
    7.96 +static void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d)
    7.97  {
    7.98      struct pfn_info *page = &frame_table[l1_pgentry_to_pagenr(l1e)];
    7.99      unsigned long    l1v  = l1_pgentry_val(l1e);
   7.100 +    struct domain   *e = page->u.inuse.domain;
   7.101  
   7.102      if ( !(l1v & _PAGE_PRESENT) || !pfn_is_ram(l1v >> PAGE_SHIFT) )
   7.103          return;
   7.104  
   7.105 +    if ( unlikely(e != d) )
   7.106 +    {
   7.107 +        /*
   7.108 +         * Unmap a foreign page that may have been mapped via a grant table.
   7.109 +         * Note that this can fail for a privileged domain that can map foreign
   7.110 +         * pages via MMUEXT_SET_FOREIGNDOM. Such domains can have some mappings
   7.111 +         * counted via a grant entry and some counted directly in the page
   7.112 +         * structure's reference count. Note that reference counts won't get
   7.113 +         * dangerously confused as long as we always try to decrement the
   7.114 +         * grant entry first. We may end up with a mismatch between which
   7.115 +         * mappings and which unmappings are counted via the grant entry, but
   7.116 +         * really it doesn't matter as privileged domains have carte blanche.
   7.117 +         */
   7.118 +        if ( likely(gnttab_try_unmap(e, d, page, l1v & _PAGE_RW)) )
   7.119 +            return;
   7.120 +        /* Assume this mapping was made via MMUEXT_SET_FOREIGNDOM... */
   7.121 +    }
   7.122 +
   7.123      if ( l1v & _PAGE_RW )
   7.124      {
   7.125          put_page_and_type(page);
   7.126 @@ -452,7 +501,7 @@ static void put_page_from_l1e(l1_pgentry
   7.127          if ( unlikely(((page->u.inuse.type_info & PGT_type_mask) == 
   7.128                         PGT_ldt_page)) &&
   7.129               unlikely(((page->u.inuse.type_info & PGT_count_mask) != 0)) )
   7.130 -            invalidate_shadow_ldt(page->u.inuse.domain);
   7.131 +            invalidate_shadow_ldt(e);
   7.132          put_page(page);
   7.133      }
   7.134  }
   7.135 @@ -527,7 +576,7 @@ static int alloc_l1_table(struct pfn_inf
   7.136  
   7.137   fail:
   7.138      while ( i-- > 0 )
   7.139 -        put_page_from_l1e(pl1e[i]);
   7.140 +        put_page_from_l1e(pl1e[i], d);
   7.141  
   7.142      unmap_domain_mem(pl1e);
   7.143      return 0;
   7.144 @@ -551,6 +600,7 @@ static void free_l2_table(struct pfn_inf
   7.145  
   7.146  static void free_l1_table(struct pfn_info *page)
   7.147  {
   7.148 +    struct domain *d = page->u.inuse.domain;
   7.149      unsigned long page_nr = page - frame_table;
   7.150      l1_pgentry_t *pl1e;
   7.151      int i;
   7.152 @@ -558,7 +608,7 @@ static void free_l1_table(struct pfn_inf
   7.153      pl1e = map_domain_mem(page_nr << PAGE_SHIFT);
   7.154  
   7.155      for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ )
   7.156 -        put_page_from_l1e(pl1e[i]);
   7.157 +        put_page_from_l1e(pl1e[i], d);
   7.158  
   7.159      unmap_domain_mem(pl1e);
   7.160  }
   7.161 @@ -651,6 +701,7 @@ static int mod_l1_entry(l1_pgentry_t *pl
   7.162  {
   7.163      l1_pgentry_t ol1e;
   7.164      unsigned long _ol1e;
   7.165 +    struct domain *d = current;
   7.166  
   7.167      if ( unlikely(__get_user(_ol1e, (unsigned long *)pl1e) != 0) )
   7.168      {
   7.169 @@ -671,18 +722,18 @@ static int mod_l1_entry(l1_pgentry_t *pl
   7.170          
   7.171          if ( unlikely(!update_l1e(pl1e, ol1e, nl1e)) )
   7.172          {
   7.173 -            put_page_from_l1e(nl1e);
   7.174 +            put_page_from_l1e(nl1e, d);
   7.175              return 0;
   7.176          }
   7.177          
   7.178 -        put_page_from_l1e(ol1e);
   7.179 +        put_page_from_l1e(ol1e, d);
   7.180          return 1;
   7.181      }
   7.182  
   7.183      if ( unlikely(!update_l1e(pl1e, ol1e, nl1e)) )
   7.184          return 0;
   7.185      
   7.186 -    put_page_from_l1e(ol1e);
   7.187 +    put_page_from_l1e(ol1e, d);
   7.188      return 1;
   7.189  }
   7.190  
   7.191 @@ -1289,20 +1340,10 @@ int do_update_va_mapping_otherdomain(uns
   7.192  }
   7.193  
   7.194  
   7.195 -static inline int readonly_page_from_l1e(l1_pgentry_t l1e)
   7.196 -{
   7.197 -    struct pfn_info *page = &frame_table[l1_pgentry_to_pagenr(l1e)];
   7.198 -    unsigned long    l1v  = l1_pgentry_val(l1e);
   7.199  
   7.200 -    if ( (l1v & _PAGE_RW) || !(l1v & _PAGE_PRESENT) ||
   7.201 -         !pfn_is_ram(l1v >> PAGE_SHIFT) )
   7.202 -        return 0;
   7.203 -    put_page_type(page);
   7.204 -    return 1;
   7.205 -}
   7.206 -
   7.207 -
   7.208 -/* Writable Pagetables */
   7.209 +/*************************
   7.210 + * Writable Pagetables
   7.211 + */
   7.212  
   7.213  ptwr_info_t ptwr_info[NR_CPUS] =
   7.214      { [ 0 ... NR_CPUS-1 ] =
   7.215 @@ -1365,13 +1406,8 @@ void ptwr_reconnect_disconnected(unsigne
   7.216          nl1e = pl1e[i];
   7.217          if (likely(l1_pgentry_val(nl1e) == l1_pgentry_val(ol1e)))
   7.218              continue;
   7.219 -        if (likely((l1_pgentry_val(nl1e) ^ l1_pgentry_val(ol1e)) ==
   7.220 -                   _PAGE_RW)) {
   7.221 -            if (likely(readonly_page_from_l1e(nl1e)))
   7.222 -                continue;
   7.223 -        }
   7.224          if (unlikely(l1_pgentry_val(ol1e) & _PAGE_PRESENT))
   7.225 -            put_page_from_l1e(ol1e);
   7.226 +            put_page_from_l1e(ol1e, current);
   7.227          if (unlikely(!get_page_from_l1e(nl1e, current)))
   7.228              BUG();
   7.229      }
   7.230 @@ -1438,7 +1474,7 @@ void ptwr_flush_inactive(void)
   7.231              if (likely(l1_pgentry_val(ol1e) == l1_pgentry_val(nl1e)))
   7.232                  continue;
   7.233              if (unlikely(l1_pgentry_val(ol1e) & _PAGE_PRESENT))
   7.234 -                put_page_from_l1e(ol1e);
   7.235 +                put_page_from_l1e(ol1e, current);
   7.236              if (unlikely(!get_page_from_l1e(nl1e, current)))
   7.237                  BUG();
   7.238          }
     8.1 --- a/xen/arch/x86/setup.c	Tue Aug 24 22:32:56 2004 +0000
     8.2 +++ b/xen/arch/x86/setup.c	Wed Aug 25 15:40:30 2004 +0000
     8.3 @@ -411,7 +411,7 @@ void __init start_of_day(void)
     8.4      clear_bit(smp_processor_id(), &wait_init_idle);
     8.5      smp_threads_ready = 1;
     8.6      smp_commence(); /* Tell other CPUs that state of the world is stable. */
     8.7 -    while (wait_init_idle) 
     8.8 +    while ( wait_init_idle != 0 )
     8.9      {
    8.10          cpu_relax();
    8.11          barrier();
     9.1 --- a/xen/common/domain.c	Tue Aug 24 22:32:56 2004 +0000
     9.2 +++ b/xen/common/domain.c	Wed Aug 25 15:40:30 2004 +0000
     9.3 @@ -232,12 +232,16 @@ void domain_destruct(struct domain *d)
     9.4  {
     9.5      struct domain **pd;
     9.6      unsigned long flags;
     9.7 +    atomic_t      old, new;
     9.8  
     9.9      if ( !test_bit(DF_DYING, &d->flags) )
    9.10          BUG();
    9.11  
    9.12      /* May be already destructed, or get_domain() can race us. */
    9.13 -    if ( cmpxchg(&d->refcnt.counter, 0, DOMAIN_DESTRUCTED) != 0 )
    9.14 +    _atomic_set(old, 0);
    9.15 +    _atomic_set(new, DOMAIN_DESTRUCTED);
    9.16 +    old = atomic_compareandswap(old, new, &d->refcnt);
    9.17 +    if ( _atomic_read(old) != 0 )
    9.18          return;
    9.19  
    9.20      DPRINTK("Releasing task %u\n", d->domain);
    10.1 --- a/xen/common/grant_table.c	Tue Aug 24 22:32:56 2004 +0000
    10.2 +++ b/xen/common/grant_table.c	Wed Aug 25 15:40:30 2004 +0000
    10.3 @@ -24,6 +24,13 @@
    10.4  #include <xen/config.h>
    10.5  #include <xen/sched.h>
    10.6  
    10.7 +#define PIN_FAIL(_rc, _f, _a...)   \
    10.8 +    do {                           \
    10.9 +        DPRINTK( _f, ## _a );      \
   10.10 +        rc = -(_rc);               \
   10.11 +        goto out;                  \
   10.12 +    } while ( 0 )
   10.13 +
   10.14  static inline void
   10.15  check_tlb_flush(
   10.16      active_grant_entry_t *a)
   10.17 @@ -70,6 +77,7 @@ gnttab_update_pin_status(
   10.18      active_grant_entry_t *act;
   10.19      grant_entry_t *sha;
   10.20      long           rc = 0;
   10.21 +    unsigned long  frame;
   10.22  
   10.23      ld = current;
   10.24  
   10.25 @@ -93,8 +101,11 @@ gnttab_update_pin_status(
   10.26          return -EINVAL;
   10.27      }
   10.28  
   10.29 -    if ( unlikely((rd = find_domain_by_id(dom)) == NULL) )
   10.30 +    if ( unlikely((rd = find_domain_by_id(dom)) == NULL) ||
   10.31 +         unlikely(ld == rd) )
   10.32      {
   10.33 +        if ( rd != NULL )
   10.34 +            put_domain(rd);
   10.35          DPRINTK("Could not find domain %d\n", dom);
   10.36          return -ESRCH;
   10.37      }
   10.38 @@ -102,6 +113,8 @@ gnttab_update_pin_status(
   10.39      act = &rd->grant_table->active[ref];
   10.40      sha = &rd->grant_table->shared[ref];
   10.41  
   10.42 +    spin_lock(&rd->grant_table->lock);
   10.43 +
   10.44      if ( act->status == 0 )
   10.45      {
   10.46          if ( unlikely(pin_flags == 0) )
   10.47 @@ -118,23 +131,17 @@ gnttab_update_pin_status(
   10.48  
   10.49              if ( unlikely((sflags & GTF_type_mask) != GTF_permit_access) ||
   10.50                   unlikely(sdom != ld->domain) )
   10.51 -            {
   10.52 -                DPRINTK("Bad flags (%x) or dom (%d). (NB. expected dom %d)\n",
   10.53 +                PIN_FAIL(EINVAL,
   10.54 +                         "Bad flags (%x) or dom (%d). (NB. expected dom %d)\n",
   10.55                          sflags, sdom, ld->domain);
   10.56 -                rc = -EINVAL;
   10.57 -                goto out;
   10.58 -            }
   10.59  
   10.60              sflags |= GTF_reading;
   10.61              if ( !(pin_flags & GNTPIN_readonly) )
   10.62              {
   10.63                  sflags |= GTF_writing;
   10.64                  if ( unlikely(sflags & GTF_readonly) )
   10.65 -                {
   10.66 -                    DPRINTK("Attempt to write-pin a read-only grant entry.\n");
   10.67 -                    rc = -EINVAL;
   10.68 -                    goto out;
   10.69 -                }
   10.70 +                    PIN_FAIL(EINVAL,
   10.71 +                             "Attempt to write-pin a r/o grant entry.\n");
   10.72              }
   10.73  
   10.74              /* Merge two 16-bit values into a 32-bit combined update. */
   10.75 @@ -144,11 +151,8 @@ gnttab_update_pin_status(
   10.76              /* NB. prev_sflags is updated in place to seen value. */
   10.77              if ( unlikely(cmpxchg_user((u32 *)&sha->flags, prev_scombo, 
   10.78                                         prev_scombo | GTF_writing)) )
   10.79 -            {
   10.80 -                DPRINTK("Fault while modifying shared flags and domid.\n");
   10.81 -                rc = -EINVAL;
   10.82 -                goto out;
   10.83 -            }
   10.84 +                PIN_FAIL(EINVAL,
   10.85 +                         "Fault while modifying shared flags and domid.\n");
   10.86  
   10.87              /* Did the combined update work (did we see what we expected?). */
   10.88              if ( prev_scombo == scombo )
   10.89 @@ -161,10 +165,22 @@ gnttab_update_pin_status(
   10.90          }
   10.91  
   10.92          /* rmb(); */ /* not on x86 */
   10.93 +        frame = sha->frame;
   10.94 +        if ( unlikely(!pfn_is_ram(frame)) || 
   10.95 +             unlikely(!((pin_flags & GNTPIN_readonly) ? 
   10.96 +                        get_page(&frame_table[frame], rd) : 
   10.97 +                        get_page_and_type(&frame_table[frame], rd, 
   10.98 +                                          PGT_writable_page))) )
   10.99 +        {
  10.100 +            clear_bit(_GTF_writing, &sha->flags);
  10.101 +            clear_bit(_GTF_reading, &sha->flags);
  10.102 +            PIN_FAIL(EINVAL, 
  10.103 +                     "Could not pin the granted frame!\n");
  10.104 +        }
  10.105  
  10.106          act->status = pin_flags;
  10.107          act->domid  = sdom;
  10.108 -        act->frame  = sha->frame;
  10.109 +        act->frame  = frame;
  10.110  
  10.111          make_entry_mappable(rd->grant_table, act);
  10.112      }
  10.113 @@ -174,11 +190,13 @@ gnttab_update_pin_status(
  10.114  
  10.115          if ( unlikely((act->status & 
  10.116                         (GNTPIN_wmap_mask|GNTPIN_rmap_mask)) != 0) )
  10.117 -        {
  10.118 -            DPRINTK("Attempt to deactivate a mapped g.e. (%x)\n", act->status);
  10.119 -            rc = -EINVAL;
  10.120 -            goto out;
  10.121 -        }
  10.122 +            PIN_FAIL(EINVAL,
  10.123 +                     "Attempt to deactiv a mapped g.e. (%x)\n", act->status);
  10.124 +
  10.125 +        frame = act->frame;
  10.126 +        if ( !(act->status & GNTPIN_readonly) )
  10.127 +            put_page_type(&frame_table[frame]);
  10.128 +        put_page(&frame_table[frame]);
  10.129  
  10.130          act->status = 0;
  10.131          make_entry_unmappable(rd->grant_table, act);
  10.132 @@ -199,12 +217,9 @@ gnttab_update_pin_status(
  10.133               (unlikely((act->status & GNTPIN_wmap_mask) != 0) ||
  10.134                (((pin_flags & GNTPIN_host_accessible) == 0) &&
  10.135                 unlikely((act->status & GNTPIN_rmap_mask) != 0))) )
  10.136 -        {
  10.137 -            DPRINTK("Attempt to reduce pinning of a mapped g.e. (%x,%x)\n",
  10.138 +            PIN_FAIL(EINVAL,
  10.139 +                     "Attempt to reduce pinning of a mapped g.e. (%x,%x)\n",
  10.140                      pin_flags, act->status);
  10.141 -            rc = -EINVAL;
  10.142 -            goto out;
  10.143 -        }
  10.144  
  10.145          /* Check for changes to host accessibility. */
  10.146          if ( pin_flags & GNTPIN_host_accessible )
  10.147 @@ -220,6 +235,7 @@ gnttab_update_pin_status(
  10.148          {
  10.149              if ( !(act->status & GNTPIN_readonly) )
  10.150              {
  10.151 +                put_page_type(&frame_table[act->frame]);
  10.152                  check_tlb_flush(act);
  10.153                  clear_bit(_GTF_writing, &sha->flags);
  10.154              }
  10.155 @@ -231,20 +247,19 @@ gnttab_update_pin_status(
  10.156                  prev_sflags = sflags;
  10.157  
  10.158                  if ( unlikely(prev_sflags & GTF_readonly) )
  10.159 -                {
  10.160 -                    DPRINTK("Attempt to write-pin a read-only grant entry.\n");
  10.161 -                    rc = -EINVAL;
  10.162 -                    goto out;
  10.163 -                }
  10.164 -                
  10.165 +                    PIN_FAIL(EINVAL,
  10.166 +                             "Attempt to write-pin a r/o grant entry.\n");
  10.167 +
  10.168 +                if ( unlikely(!get_page_type(&frame_table[act->frame],
  10.169 +                                             PGT_writable_page)) )
  10.170 +                    PIN_FAIL(EINVAL,
  10.171 +                             "Attempt to write-pin a unwritable page.\n");
  10.172 +
  10.173                  /* NB. prev_sflags is updated in place to seen value. */
  10.174                  if ( unlikely(cmpxchg_user(&sha->flags, prev_sflags, 
  10.175                                             prev_sflags | GTF_writing)) )
  10.176 -                {
  10.177 -                    DPRINTK("Fault while modifying shared flags.\n");
  10.178 -                    rc = -EINVAL;
  10.179 -                    goto out;
  10.180 -                }
  10.181 +                    PIN_FAIL(EINVAL,
  10.182 +                             "Fault while modifying shared flags.\n");
  10.183              }
  10.184              while ( prev_sflags != sflags );
  10.185          }
  10.186 @@ -261,6 +276,7 @@ gnttab_update_pin_status(
  10.187      (void)__put_user(act->frame, &uop->host_phys_addr);
  10.188  
  10.189   out:
  10.190 +    spin_unlock(&rd->grant_table->lock);
  10.191      put_domain(rd);
  10.192      return rc;
  10.193  }
  10.194 @@ -289,6 +305,20 @@ do_grant_table_op(
  10.195      return rc;
  10.196  }
  10.197  
  10.198 +int
  10.199 +gnttab_try_map(
  10.200 +    struct domain *rd, struct domain *ld, struct pfn_info *page, int readonly)
  10.201 +{
  10.202 +    return 0;
  10.203 +}
  10.204 +
  10.205 +int
  10.206 +gnttab_try_unmap(
  10.207 +    struct domain *rd, struct domain *ld, struct pfn_info *page, int readonly)
  10.208 +{
  10.209 +    return 0;
  10.210 +}
  10.211 +
  10.212  int 
  10.213  grant_table_create(
  10.214      struct domain *d)
  10.215 @@ -318,6 +348,7 @@ grant_table_create(
  10.216      SHARE_PFN_WITH_DOMAIN(virt_to_page(t->shared), d);
  10.217  
  10.218      /* Okay, install the structure. */
  10.219 +    wmb(); /* avoid races with lock-free access to d->grant_table */
  10.220      d->grant_table = t;
  10.221      return 0;
  10.222  
    11.1 --- a/xen/common/kernel.c	Tue Aug 24 22:32:56 2004 +0000
    11.2 +++ b/xen/common/kernel.c	Wed Aug 25 15:40:30 2004 +0000
    11.3 @@ -296,9 +296,19 @@ void cmain(multiboot_info_t *mbi)
    11.4      xmem_cache_init();
    11.5      xmem_cache_sizes_init(max_page);
    11.6  
    11.7 +    /*
    11.8 +     * Create a domain-structure allocator. The SLAB_NO_REAP flag is essential!
    11.9 +     * This is because in some situations a domain's reference count will be
   11.10 +     * incremented by someone with no other handle on the structure -- this is 
   11.11 +     * inherently racey because the struct could be freed by the time that the
   11.12 +     * count is incremented. By specifying 'no-reap' we ensure that, worst
   11.13 +     * case, they increment some other domain's count, rather than corrupting
   11.14 +     * a random field in a random structure!
   11.15 +     * See, for example, arch/x86/memory.c:get_page_from_l1e().
   11.16 +     */
   11.17      domain_struct_cachep = xmem_cache_create(
   11.18          "domain_cache", sizeof(struct domain),
   11.19 -        0, SLAB_HWCACHE_ALIGN, NULL, NULL);
   11.20 +        0, SLAB_HWCACHE_ALIGN | SLAB_NO_REAP, NULL, NULL);
   11.21      if ( domain_struct_cachep == NULL )
   11.22          panic("No slab cache for task structs.");
   11.23  
    12.1 --- a/xen/common/page_alloc.c	Tue Aug 24 22:32:56 2004 +0000
    12.2 +++ b/xen/common/page_alloc.c	Wed Aug 25 15:40:30 2004 +0000
    12.3 @@ -300,12 +300,21 @@ void init_xenheap_pages(unsigned long ps
    12.4  unsigned long alloc_xenheap_pages(int order)
    12.5  {
    12.6      struct pfn_info *pg;
    12.7 -    int attempts = 0;
    12.8 +    int i, attempts = 0;
    12.9  
   12.10   retry:
   12.11      if ( unlikely((pg = alloc_heap_pages(MEMZONE_XEN, order)) == NULL) )
   12.12          goto no_memory;
   12.13 +
   12.14      memguard_unguard_range(page_to_virt(pg), 1 << (order + PAGE_SHIFT));
   12.15 +
   12.16 +    for ( i = 0; i < (1 << order); i++ )
   12.17 +    {
   12.18 +        pg[i].u.inuse.count_info = PGC_always_set;
   12.19 +        pg[i].u.inuse.domain     = NULL;
   12.20 +        pg[i].u.inuse.type_info  = 0;
   12.21 +    }
   12.22 +
   12.23      return (unsigned long)page_to_virt(pg);
   12.24  
   12.25   no_memory:
   12.26 @@ -343,7 +352,7 @@ struct pfn_info *alloc_domheap_pages(str
   12.27  {
   12.28      struct pfn_info *pg;
   12.29      unsigned long mask, flushed_mask, pfn_stamp, cpu_stamp;
   12.30 -    int i;
   12.31 +    int i, j;
   12.32  
   12.33      ASSERT(!in_irq());
   12.34  
   12.35 @@ -353,19 +362,16 @@ struct pfn_info *alloc_domheap_pages(str
   12.36      flushed_mask = 0;
   12.37      for ( i = 0; i < (1 << order); i++ )
   12.38      {
   12.39 -        pg[i].u.inuse.domain    = NULL;
   12.40 -        pg[i].u.inuse.type_info = 0;
   12.41 -
   12.42          if ( (mask = (pg[i].u.free.cpu_mask & ~flushed_mask)) != 0 )
   12.43          {
   12.44              pfn_stamp = pg[i].tlbflush_timestamp;
   12.45 -            for ( i = 0; (mask != 0) && (i < smp_num_cpus); i++ )
   12.46 +            for ( j = 0; (mask != 0) && (j < smp_num_cpus); j++ )
   12.47              {
   12.48 -                if ( mask & (1<<i) )
   12.49 +                if ( mask & (1<<j) )
   12.50                  {
   12.51 -                    cpu_stamp = tlbflush_time[i];
   12.52 +                    cpu_stamp = tlbflush_time[j];
   12.53                      if ( !NEED_FLUSH(cpu_stamp, pfn_stamp) )
   12.54 -                        mask &= ~(1<<i);
   12.55 +                        mask &= ~(1<<j);
   12.56                  }
   12.57              }
   12.58              
   12.59 @@ -376,6 +382,10 @@ struct pfn_info *alloc_domheap_pages(str
   12.60                  flushed_mask |= mask;
   12.61              }
   12.62          }
   12.63 +
   12.64 +        pg[i].u.inuse.count_info = PGC_always_set;
   12.65 +        pg[i].u.inuse.domain     = NULL;
   12.66 +        pg[i].u.inuse.type_info  = 0;
   12.67      }
   12.68  
   12.69      if ( d == NULL )
   12.70 @@ -401,7 +411,7 @@ struct pfn_info *alloc_domheap_pages(str
   12.71      {
   12.72          pg[i].u.inuse.domain = d;
   12.73          wmb(); /* Domain pointer must be visible before updating refcnt. */
   12.74 -        pg[i].u.inuse.count_info = PGC_allocated | 1;
   12.75 +        pg[i].u.inuse.count_info |= PGC_allocated | 1;
   12.76          list_add_tail(&pg[i].list, &d->page_list);
   12.77      }
   12.78  
   12.79 @@ -418,10 +428,13 @@ void free_domheap_pages(struct pfn_info 
   12.80      if ( unlikely(IS_XEN_HEAP_FRAME(pg)) )
   12.81      {
   12.82          spin_lock_recursive(&d->page_alloc_lock);
   12.83 +
   12.84          for ( i = 0; i < (1 << order); i++ )
   12.85              list_del(&pg[i].list);
   12.86 +
   12.87          d->xenheap_pages -= 1 << order;
   12.88          drop_dom_ref = (d->xenheap_pages == 0);
   12.89 +
   12.90          spin_unlock_recursive(&d->page_alloc_lock);
   12.91      }
   12.92      else if ( likely(d != NULL) )
   12.93 @@ -431,9 +444,8 @@ void free_domheap_pages(struct pfn_info 
   12.94  
   12.95          for ( i = 0; i < (1 << order); i++ )
   12.96          {
   12.97 -            pg[i].tlbflush_timestamp = tlbflush_clock;
   12.98 -            pg[i].u.inuse.count_info = 0;
   12.99 -            pg[i].u.free.cpu_mask    = 1 << d->processor;
  12.100 +            pg[i].tlbflush_timestamp  = tlbflush_clock;
  12.101 +            pg[i].u.free.cpu_mask     = 1 << d->processor;
  12.102              list_del(&pg[i].list);
  12.103          }
  12.104  
    13.1 --- a/xen/include/asm-x86/atomic.h	Tue Aug 24 22:32:56 2004 +0000
    13.2 +++ b/xen/include/asm-x86/atomic.h	Wed Aug 25 15:40:30 2004 +0000
    13.3 @@ -2,11 +2,7 @@
    13.4  #define __ARCH_X86_ATOMIC__
    13.5  
    13.6  #include <xen/config.h>
    13.7 -
    13.8 -/*
    13.9 - * Atomic operations that C can't guarantee us.  Useful for
   13.10 - * resource counting etc..
   13.11 - */
   13.12 +#include <asm/system.h>
   13.13  
   13.14  #ifdef CONFIG_SMP
   13.15  #define LOCK "lock ; "
   13.16 @@ -15,11 +11,11 @@
   13.17  #endif
   13.18  
   13.19  /*
   13.20 - * Make sure gcc doesn't try to be clever and move things around
   13.21 - * on us. We need to use _exactly_ the address the user gave us,
   13.22 - * not some alias that contains the same information.
   13.23 + * NB. I've pushed the volatile qualifier into the operations. This allows
   13.24 + * fast accessors such as _atomic_read() and _atomic_set() which don't give
   13.25 + * the compiler a fit.
   13.26   */
   13.27 -typedef struct { volatile int counter; } atomic_t;
   13.28 +typedef struct { int counter; } atomic_t;
   13.29  
   13.30  #define ATOMIC_INIT(i)	{ (i) }
   13.31  
   13.32 @@ -29,8 +25,9 @@ typedef struct { volatile int counter; }
   13.33   * 
   13.34   * Atomically reads the value of @v.  Note that the guaranteed
   13.35   * useful range of an atomic_t is only 24 bits.
   13.36 - */ 
   13.37 -#define atomic_read(v)		((v)->counter)
   13.38 + */
   13.39 +#define _atomic_read(v)		((v).counter)
   13.40 +#define atomic_read(v)		(*(volatile int *)&((v)->counter))
   13.41  
   13.42  /**
   13.43   * atomic_set - set atomic variable
   13.44 @@ -40,7 +37,8 @@ typedef struct { volatile int counter; }
   13.45   * Atomically sets the value of @v to @i.  Note that the guaranteed
   13.46   * useful range of an atomic_t is only 24 bits.
   13.47   */ 
   13.48 -#define atomic_set(v,i)		(((v)->counter) = (i))
   13.49 +#define _atomic_set(v,i)	(((v).counter) = (i))
   13.50 +#define atomic_set(v,i)		(*(volatile int *)&((v)->counter) = (i))
   13.51  
   13.52  /**
   13.53   * atomic_add - add integer to atomic variable
   13.54 @@ -54,8 +52,8 @@ static __inline__ void atomic_add(int i,
   13.55  {
   13.56  	__asm__ __volatile__(
   13.57  		LOCK "addl %1,%0"
   13.58 -		:"=m" (v->counter)
   13.59 -		:"ir" (i), "m" (v->counter));
   13.60 +		:"=m" (*(volatile int *)&v->counter)
   13.61 +		:"ir" (i), "m" (*(volatile int *)&v->counter));
   13.62  }
   13.63  
   13.64  /**
   13.65 @@ -70,8 +68,8 @@ static __inline__ void atomic_sub(int i,
   13.66  {
   13.67  	__asm__ __volatile__(
   13.68  		LOCK "subl %1,%0"
   13.69 -		:"=m" (v->counter)
   13.70 -		:"ir" (i), "m" (v->counter));
   13.71 +		:"=m" (*(volatile int *)&v->counter)
   13.72 +		:"ir" (i), "m" (*(volatile int *)&v->counter));
   13.73  }
   13.74  
   13.75  /**
   13.76 @@ -90,8 +88,8 @@ static __inline__ int atomic_sub_and_tes
   13.77  
   13.78  	__asm__ __volatile__(
   13.79  		LOCK "subl %2,%0; sete %1"
   13.80 -		:"=m" (v->counter), "=qm" (c)
   13.81 -		:"ir" (i), "m" (v->counter) : "memory");
   13.82 +		:"=m" (*(volatile int *)&v->counter), "=qm" (c)
   13.83 +		:"ir" (i), "m" (*(volatile int *)&v->counter) : "memory");
   13.84  	return c;
   13.85  }
   13.86  
   13.87 @@ -106,8 +104,8 @@ static __inline__ void atomic_inc(atomic
   13.88  {
   13.89  	__asm__ __volatile__(
   13.90  		LOCK "incl %0"
   13.91 -		:"=m" (v->counter)
   13.92 -		:"m" (v->counter));
   13.93 +		:"=m" (*(volatile int *)&v->counter)
   13.94 +		:"m" (*(volatile int *)&v->counter));
   13.95  }
   13.96  
   13.97  /**
   13.98 @@ -121,8 +119,8 @@ static __inline__ void atomic_dec(atomic
   13.99  {
  13.100  	__asm__ __volatile__(
  13.101  		LOCK "decl %0"
  13.102 -		:"=m" (v->counter)
  13.103 -		:"m" (v->counter));
  13.104 +		:"=m" (*(volatile int *)&v->counter)
  13.105 +		:"m" (*(volatile int *)&v->counter));
  13.106  }
  13.107  
  13.108  /**
  13.109 @@ -140,8 +138,8 @@ static __inline__ int atomic_dec_and_tes
  13.110  
  13.111  	__asm__ __volatile__(
  13.112  		LOCK "decl %0; sete %1"
  13.113 -		:"=m" (v->counter), "=qm" (c)
  13.114 -		:"m" (v->counter) : "memory");
  13.115 +		:"=m" (*(volatile int *)&v->counter), "=qm" (c)
  13.116 +		:"m" (*(volatile int *)&v->counter) : "memory");
  13.117  	return c != 0;
  13.118  }
  13.119  
  13.120 @@ -160,8 +158,8 @@ static __inline__ int atomic_inc_and_tes
  13.121  
  13.122  	__asm__ __volatile__(
  13.123  		LOCK "incl %0; sete %1"
  13.124 -		:"=m" (v->counter), "=qm" (c)
  13.125 -		:"m" (v->counter) : "memory");
  13.126 +		:"=m" (*(volatile int *)&v->counter), "=qm" (c)
  13.127 +		:"m" (*(volatile int *)&v->counter) : "memory");
  13.128  	return c != 0;
  13.129  }
  13.130  
  13.131 @@ -181,11 +179,20 @@ static __inline__ int atomic_add_negativ
  13.132  
  13.133  	__asm__ __volatile__(
  13.134  		LOCK "addl %2,%0; sets %1"
  13.135 -		:"=m" (v->counter), "=qm" (c)
  13.136 -		:"ir" (i), "m" (v->counter) : "memory");
  13.137 +		:"=m" (*(volatile int *)&v->counter), "=qm" (c)
  13.138 +		:"ir" (i), "m" (*(volatile int *)&v->counter) : "memory");
  13.139  	return c;
  13.140  }
  13.141  
  13.142 +static __inline__ atomic_t atomic_compareandswap(
  13.143 +	atomic_t old, atomic_t new, atomic_t *v)
  13.144 +{
  13.145 +	atomic_t rc;
  13.146 +	rc.counter = 
  13.147 +		__cmpxchg(&v->counter, old.counter, new.counter, sizeof(int));
  13.148 +	return rc;
  13.149 +}
  13.150 +
  13.151  /* Atomic operations are already serializing on x86 */
  13.152  #define smp_mb__before_atomic_dec()	barrier()
  13.153  #define smp_mb__after_atomic_dec()	barrier()
    14.1 --- a/xen/include/asm-x86/mm.h	Tue Aug 24 22:32:56 2004 +0000
    14.2 +++ b/xen/include/asm-x86/mm.h	Wed Aug 25 15:40:30 2004 +0000
    14.3 @@ -87,9 +87,11 @@ struct pfn_info
    14.4   /* Cleared when the owning guest 'frees' this page. */
    14.5  #define _PGC_allocated                29
    14.6  #define PGC_allocated                 (1<<_PGC_allocated)
    14.7 - /* 28-bit count of references to this frame. */
    14.8 -#define PGC_count_mask                ((1<<29)-1)
    14.9 -
   14.10 + /* This bit is always set, guaranteeing that the count word is never zero. */
   14.11 +#define _PGC_always_set               28
   14.12 +#define PGC_always_set                (1<<_PGC_always_set)
   14.13 + /* 27-bit count of references to this frame. */
   14.14 +#define PGC_count_mask                ((1<<28)-1)
   14.15  
   14.16  /* We trust the slab allocator in slab.c, and our use of it. */
   14.17  #define PageSlab(page)		(1)
   14.18 @@ -106,7 +108,8 @@ struct pfn_info
   14.19          wmb(); /* install valid domain ptr before updating refcnt. */       \
   14.20          spin_lock(&(_dom)->page_alloc_lock);                                \
   14.21          /* _dom holds an allocation reference */                            \
   14.22 -        (_pfn)->u.inuse.count_info = PGC_allocated | 1;                     \
   14.23 +        ASSERT((_pfn)->u.inuse.count_info == PGC_always_set);               \
   14.24 +        (_pfn)->u.inuse.count_info |= PGC_allocated | 1;                    \
   14.25          if ( unlikely((_dom)->xenheap_pages++ == 0) )                       \
   14.26              get_knownalive_domain(_dom);                                    \
   14.27          list_add_tail(&(_pfn)->list, &(_dom)->xenpage_list);                \
   14.28 @@ -150,10 +153,8 @@ static inline int get_page(struct pfn_in
   14.29               unlikely((nx & PGC_count_mask) == 0) || /* Count overflow? */
   14.30               unlikely(p != domain) )                 /* Wrong owner? */
   14.31          {
   14.32 -            DPRINTK("Error pfn %08lx: ed=%p(%u), sd=%p(%u),"
   14.33 -                    " caf=%08x, taf=%08x\n",
   14.34 -                    page_to_pfn(page), domain, domain->domain,
   14.35 -                    p, (p && !((x & PGC_count_mask) == 0))?p->domain:999, 
   14.36 +            DPRINTK("Error pfn %08lx: ed=%p, sd=%p, caf=%08x, taf=%08x\n",
   14.37 +                    page_to_pfn(page), domain, p,
   14.38                      x, page->u.inuse.type_info);
   14.39              return 0;
   14.40          }
   14.41 @@ -364,26 +365,21 @@ void ptwr_reconnect_disconnected(unsigne
   14.42  void ptwr_flush_inactive(void);
   14.43  int ptwr_do_page_fault(unsigned long);
   14.44  
   14.45 -static always_inline void 
   14.46 -__cleanup_writable_pagetable(
   14.47 -    const int what)
   14.48 -{
   14.49 -    int cpu = smp_processor_id();
   14.50 +#define __cleanup_writable_pagetable(_what)                               \
   14.51 +do {                                                                      \
   14.52 +    int cpu = smp_processor_id();                                         \
   14.53 +    if ((_what) & PTWR_CLEANUP_ACTIVE)                                    \
   14.54 +        if (ptwr_info[cpu].disconnected != ENTRIES_PER_L2_PAGETABLE)      \
   14.55 +            ptwr_reconnect_disconnected(0L);                              \
   14.56 +    if ((_what) & PTWR_CLEANUP_INACTIVE)                                  \
   14.57 +        if (ptwr_info[cpu].writable_idx)                                  \
   14.58 +            ptwr_flush_inactive();                                        \
   14.59 +} while ( 0 )
   14.60  
   14.61 -    if (what & PTWR_CLEANUP_ACTIVE)
   14.62 -        if (ptwr_info[cpu].disconnected != ENTRIES_PER_L2_PAGETABLE)
   14.63 -            ptwr_reconnect_disconnected(0L);
   14.64 -    if (what & PTWR_CLEANUP_INACTIVE)
   14.65 -        if (ptwr_info[cpu].writable_idx)
   14.66 -            ptwr_flush_inactive();
   14.67 -}
   14.68 -
   14.69 -static always_inline void
   14.70 -cleanup_writable_pagetable(
   14.71 -    struct domain *d, const int what)
   14.72 -{
   14.73 -    if ( unlikely(VM_ASSIST(d, VMASST_TYPE_writable_pagetables)) )
   14.74 -        __cleanup_writable_pagetable(what);
   14.75 -}
   14.76 +#define cleanup_writable_pagetable(_d, _w)                                \
   14.77 +    do {                                                                  \
   14.78 +        if ( unlikely(VM_ASSIST((_d), VMASST_TYPE_writable_pagetables)) ) \
   14.79 +        __cleanup_writable_pagetable(_w);                                 \
   14.80 +    } while ( 0 )
   14.81  
   14.82  #endif /* __ASM_X86_MM_H__ */
    15.1 --- a/xen/include/asm-x86/smp.h	Tue Aug 24 22:32:56 2004 +0000
    15.2 +++ b/xen/include/asm-x86/smp.h	Wed Aug 25 15:40:30 2004 +0000
    15.3 @@ -1,26 +1,13 @@
    15.4  #ifndef __ASM_SMP_H
    15.5  #define __ASM_SMP_H
    15.6  
    15.7 -/*
    15.8 - * We need the APIC definitions automatically as part of 'smp.h'
    15.9 - */
   15.10  #ifndef __ASSEMBLY__
   15.11  #include <xen/config.h>
   15.12 -/*#include <xen/threads.h>*/
   15.13 -#include <asm/ptrace.h>
   15.14 -#endif
   15.15 -
   15.16 -#ifdef CONFIG_X86_LOCAL_APIC
   15.17 -#ifndef __ASSEMBLY__
   15.18  #include <asm/fixmap.h>
   15.19 -#include <asm/bitops.h>
   15.20  #include <asm/mpspec.h>
   15.21 -#ifdef CONFIG_X86_IO_APIC
   15.22  #include <asm/io_apic.h>
   15.23 -#endif
   15.24  #include <asm/apic.h>
   15.25  #endif
   15.26 -#endif
   15.27  
   15.28  #ifdef CONFIG_SMP
   15.29  #ifndef __ASSEMBLY__
   15.30 @@ -37,12 +24,6 @@ extern int pic_mode;
   15.31  extern int smp_num_siblings;
   15.32  extern int cpu_sibling_map[];
   15.33  
   15.34 -extern void smp_flush_tlb(void);
   15.35 -extern void smp_message_irq(int cpl, void *dev_id, struct pt_regs *regs);
   15.36 -extern void smp_send_reschedule(int cpu);
   15.37 -extern void smp_invalidate_rcv(void);		/* Process an NMI */
   15.38 -extern void (*mtrr_hook) (void);
   15.39 -
   15.40  /*
   15.41   * On x86 all CPUs are mapped 1:1 to the APIC space.
   15.42   * This simplifies scheduling and IPI sending and
    16.1 --- a/xen/include/asm-x86/system.h	Tue Aug 24 22:32:56 2004 +0000
    16.2 +++ b/xen/include/asm-x86/system.h	Wed Aug 25 15:40:30 2004 +0000
    16.3 @@ -30,33 +30,33 @@ static always_inline unsigned long __xch
    16.4  		case 1:
    16.5  			__asm__ __volatile__("xchgb %b0,%1"
    16.6  				:"=q" (x)
    16.7 -				:"m" (*__xg(ptr)), "0" (x)
    16.8 +				:"m" (*__xg((volatile void *)ptr)), "0" (x)
    16.9  				:"memory");
   16.10  			break;
   16.11  		case 2:
   16.12  			__asm__ __volatile__("xchgw %w0,%1"
   16.13  				:"=r" (x)
   16.14 -				:"m" (*__xg(ptr)), "0" (x)
   16.15 +				:"m" (*__xg((volatile void *)ptr)), "0" (x)
   16.16  				:"memory");
   16.17  			break;
   16.18  #if defined(__i386__)
   16.19  		case 4:
   16.20  			__asm__ __volatile__("xchgl %0,%1"
   16.21  				:"=r" (x)
   16.22 -				:"m" (*__xg(ptr)), "0" (x)
   16.23 +				:"m" (*__xg((volatile void *)ptr)), "0" (x)
   16.24  				:"memory");
   16.25  			break;
   16.26  #elif defined(__x86_64__)
   16.27  		case 4:
   16.28  			__asm__ __volatile__("xchgl %k0,%1"
   16.29  				:"=r" (x)
   16.30 -				:"m" (*__xg(ptr)), "0" (x)
   16.31 +				:"m" (*__xg((volatile void *)ptr)), "0" (x)
   16.32  				:"memory");
   16.33  			break;
   16.34  		case 8:
   16.35  			__asm__ __volatile__("xchgq %0,%1"
   16.36  				:"=r" (x)
   16.37 -				:"m" (*__xg(ptr)), "0" (x)
   16.38 +				:"m" (*__xg((volatile void *)ptr)), "0" (x)
   16.39  				:"memory");
   16.40  			break;
   16.41  #endif
   16.42 @@ -78,33 +78,33 @@ static always_inline unsigned long __cmp
   16.43  	case 1:
   16.44  		__asm__ __volatile__(LOCK_PREFIX "cmpxchgb %b1,%2"
   16.45  				     : "=a"(prev)
   16.46 -				     : "q"(new), "m"(*__xg(ptr)), "0"(old)
   16.47 +				     : "q"(new), "m"(*__xg((volatile void *)ptr)), "0"(old)
   16.48  				     : "memory");
   16.49  		return prev;
   16.50  	case 2:
   16.51  		__asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2"
   16.52  				     : "=a"(prev)
   16.53 -				     : "q"(new), "m"(*__xg(ptr)), "0"(old)
   16.54 +				     : "r"(new), "m"(*__xg((volatile void *)ptr)), "0"(old)
   16.55  				     : "memory");
   16.56  		return prev;
   16.57  #if defined(__i386__)
   16.58  	case 4:
   16.59  		__asm__ __volatile__(LOCK_PREFIX "cmpxchgl %1,%2"
   16.60  				     : "=a"(prev)
   16.61 -				     : "q"(new), "m"(*__xg(ptr)), "0"(old)
   16.62 +				     : "r"(new), "m"(*__xg((volatile void *)ptr)), "0"(old)
   16.63  				     : "memory");
   16.64  		return prev;
   16.65  #elif defined(__x86_64__)
   16.66  	case 4:
   16.67  		__asm__ __volatile__(LOCK_PREFIX "cmpxchgl %k1,%2"
   16.68  				     : "=a"(prev)
   16.69 -				     : "q"(new), "m"(*__xg(ptr)), "0"(old)
   16.70 +				     : "r"(new), "m"(*__xg((volatile void *)ptr)), "0"(old)
   16.71  				     : "memory");
   16.72  		return prev;
   16.73  	case 8:
   16.74  		__asm__ __volatile__(LOCK_PREFIX "cmpxchgq %1,%2"
   16.75  				     : "=a"(prev)
   16.76 -				     : "q"(new), "m"(*__xg(ptr)), "0"(old)
   16.77 +				     : "r"(new), "m"(*__xg((volatile void *)ptr)), "0"(old)
   16.78  				     : "memory");
   16.79  		return prev;
   16.80  #endif
    17.1 --- a/xen/include/xen/grant_table.h	Tue Aug 24 22:32:56 2004 +0000
    17.2 +++ b/xen/include/xen/grant_table.h	Wed Aug 25 15:40:30 2004 +0000
    17.3 @@ -24,6 +24,8 @@
    17.4  #ifndef __XEN_GRANT_H__
    17.5  #define __XEN_GRANT_H__
    17.6  
    17.7 +#include <xen/config.h>
    17.8 +#include <xen/mm.h>
    17.9  #include <hypervisor-ifs/grant_table.h>
   17.10  
   17.11  /* Active grant entry - used for shadowing GTF_permit_access grants. */
   17.12 @@ -65,10 +67,19 @@ typedef struct {
   17.13  } grant_table_t;
   17.14  
   17.15  /* Start-of-day system initialisation. */
   17.16 -void grant_table_init(void);
   17.17 +void grant_table_init(
   17.18 +    void);
   17.19  
   17.20  /* Create/destroy per-domain grant table context. */
   17.21 -int  grant_table_create(struct domain *d);
   17.22 -void grant_table_destroy(struct domain *d);
   17.23 +int grant_table_create(
   17.24 +    struct domain *d);
   17.25 +void grant_table_destroy(
   17.26 +    struct domain *d);
   17.27 +
   17.28 +/* Create/destroy host-CPU mappings via a grant-table entry. */
   17.29 +int gnttab_try_map(
   17.30 +    struct domain *rd, struct domain *ld, struct pfn_info *page, int readonly);
   17.31 +int gnttab_try_unmap(
   17.32 +    struct domain *rd, struct domain *ld, struct pfn_info *page, int readonly);
   17.33  
   17.34  #endif /* __XEN_GRANT_H__ */
    18.1 --- a/xen/include/xen/sched.h	Tue Aug 24 22:32:56 2004 +0000
    18.2 +++ b/xen/include/xen/sched.h	Wed Aug 25 15:40:30 2004 +0000
    18.3 @@ -1,6 +1,9 @@
    18.4  #ifndef __SCHED_H__
    18.5  #define __SCHED_H__
    18.6  
    18.7 +#define STACK_SIZE (2*PAGE_SIZE)
    18.8 +#define MAX_DOMAIN_NAME 16
    18.9 +
   18.10  #include <xen/config.h>
   18.11  #include <xen/types.h>
   18.12  #include <xen/spinlock.h>
   18.13 @@ -10,23 +13,18 @@
   18.14  #include <asm/processor.h>
   18.15  #include <hypervisor-ifs/hypervisor-if.h>
   18.16  #include <hypervisor-ifs/dom0_ops.h>
   18.17 -#include <xen/grant_table.h>
   18.18  #include <xen/list.h>
   18.19  #include <xen/time.h>
   18.20  #include <xen/ac_timer.h>
   18.21  #include <xen/delay.h>
   18.22  #include <asm/atomic.h>
   18.23 -
   18.24 -#define STACK_SIZE (2*PAGE_SIZE)
   18.25  #include <asm/current.h>
   18.26 -
   18.27 -#define MAX_DOMAIN_NAME 16
   18.28 +#include <xen/spinlock.h>
   18.29 +#include <xen/grant_table.h>
   18.30  
   18.31  extern unsigned long volatile jiffies;
   18.32  extern rwlock_t tasklist_lock;
   18.33  
   18.34 -#include <xen/spinlock.h>
   18.35 -
   18.36  struct domain;
   18.37  
   18.38  typedef struct event_channel_st
   18.39 @@ -167,10 +165,19 @@ struct domain *alloc_domain_struct();
   18.40   * Use this when you don't have an existing reference to @d. It returns
   18.41   * FALSE if @d is being destructed.
   18.42   */
   18.43 -static inline int get_domain(struct domain *d)
   18.44 +static always_inline int get_domain(struct domain *d)
   18.45  {
   18.46 -    atomic_inc(&d->refcnt);
   18.47 -    return !(atomic_read(&d->refcnt) & DOMAIN_DESTRUCTED);
   18.48 +    atomic_t old, new, seen = d->refcnt;
   18.49 +    do
   18.50 +    {
   18.51 +        old = seen;
   18.52 +        if ( unlikely(_atomic_read(old) & DOMAIN_DESTRUCTED) )
   18.53 +            return 0;
   18.54 +        _atomic_set(new, _atomic_read(old) + 1);
   18.55 +        seen = atomic_compareandswap(old, new, &d->refcnt);
   18.56 +    }
   18.57 +    while ( unlikely(_atomic_read(seen) != _atomic_read(old)) );
   18.58 +    return 1;
   18.59  }
   18.60  
   18.61  /*