ia64/xen-unstable

changeset 2617:215824d97bfc

bitkeeper revision 1.1159.1.216 (41656f2ek7HkbBXpAt8AAbtJEyjlTg)

Grant-table interface redone.
author kaf24@freefall.cl.cam.ac.uk
date Thu Oct 07 16:30:38 2004 +0000 (2004-10-07)
parents c3493b09c749
children 0d28c3c8cef5 1594b2cbd611
files linux-2.6.8.1-xen-sparse/arch/xen/kernel/gnttab.c linux-2.6.8.1-xen-sparse/include/asm-xen/gnttab.h linux-2.6.8.1-xen-sparse/include/asm-xen/hypervisor.h xen/arch/x86/domain.c xen/arch/x86/memory.c xen/common/grant_table.c xen/common/kernel.c xen/common/page_alloc.c xen/include/asm-x86/mm.h xen/include/hypervisor-ifs/grant_table.h xen/include/xen/grant_table.h
line diff
     1.1 --- a/linux-2.6.8.1-xen-sparse/arch/xen/kernel/gnttab.c	Thu Oct 07 15:25:52 2004 +0000
     1.2 +++ b/linux-2.6.8.1-xen-sparse/arch/xen/kernel/gnttab.c	Thu Oct 07 16:30:38 2004 +0000
     1.3 @@ -33,22 +33,9 @@ EXPORT_SYMBOL(gnttab_end_foreign_access)
     1.4  EXPORT_SYMBOL(gnttab_grant_foreign_transfer);
     1.5  EXPORT_SYMBOL(gnttab_end_foreign_transfer);
     1.6  
     1.7 -struct gntent_auxinfo {
     1.8 -    u16         write_pin, read_pin; /* reference counts */
     1.9 -    u16         inuse;
    1.10 -    grant_ref_t next;                /* hash chain       */
    1.11 -};
    1.12 -
    1.13  #define NR_GRANT_REFS 512
    1.14 -
    1.15 -static struct gntent_auxinfo auxtab[NR_GRANT_REFS];
    1.16 +static grant_ref_t gnttab_free_list[NR_GRANT_REFS];
    1.17  static grant_ref_t gnttab_free_head;
    1.18 -static spinlock_t gnttab_lock;
    1.19 -
    1.20 -#define HASH_INVALID (0xFFFFU)
    1.21 -#define GNTTAB_HASH_SZ 512
    1.22 -#define GNTTAB_HASH(_f) ((_f) & (GNTTAB_HASH_SZ-1))
    1.23 -static grant_ref_t gnttab_hash[GNTTAB_HASH_SZ];
    1.24  
    1.25  static grant_entry_t *shared;
    1.26  
    1.27 @@ -56,14 +43,14 @@ static grant_entry_t *shared;
    1.28   * Lock-free grant-entry allocator
    1.29   */
    1.30  
    1.31 -static inline grant_ref_t
    1.32 +static inline int
    1.33  get_free_entry(
    1.34      void)
    1.35  {
    1.36      grant_ref_t fh, nfh = gnttab_free_head;
    1.37 -    do { fh = nfh; }
    1.38 +    do { if ( unlikely((fh = nfh) == NR_GRANT_REFS) ) return -1; }
    1.39      while ( unlikely((nfh = cmpxchg(&gnttab_free_head, fh,
    1.40 -                                    auxtab[fh].next)) != fh) );
    1.41 +                                    gnttab_free_list[fh])) != fh) );
    1.42      return fh;
    1.43  }
    1.44  
    1.45 @@ -72,109 +59,55 @@ put_free_entry(
    1.46      grant_ref_t ref)
    1.47  {
    1.48      grant_ref_t fh, nfh = gnttab_free_head;
    1.49 -    do { auxtab[ref].next = fh = nfh; wmb(); }
    1.50 +    do { gnttab_free_list[ref] = fh = nfh; wmb(); }
    1.51      while ( unlikely((nfh = cmpxchg(&gnttab_free_head, fh, ref)) != fh) );
    1.52  }
    1.53  
    1.54  /*
    1.55 - * Public interface functions
    1.56 + * Public grant-issuing interface functions
    1.57   */
    1.58  
    1.59 -grant_ref_t
    1.60 +int
    1.61  gnttab_grant_foreign_access(
    1.62      domid_t domid, unsigned long frame, int readonly)
    1.63  {
    1.64 -    unsigned long flags;
    1.65 -    grant_ref_t   ref;
    1.66 -
    1.67 -    spin_lock_irqsave(&gnttab_lock, flags);
    1.68 -
    1.69 -    for ( ref  = gnttab_hash[GNTTAB_HASH(frame)];
    1.70 -          ref != HASH_INVALID;
    1.71 -          ref  = auxtab[ref].next )
    1.72 -    {
    1.73 -        if ( auxtab[ref].inuse && (shared[ref].frame == frame) )
    1.74 -        {
    1.75 -            if ( readonly )
    1.76 -                auxtab[ref].read_pin++;
    1.77 -            else if ( auxtab[ref].write_pin++ == 0 )
    1.78 -                clear_bit(_GTF_readonly, (unsigned long *)&shared[ref].flags);
    1.79 -            goto done;
    1.80 -        }
    1.81 -    }
    1.82 -
    1.83 -    ref = get_free_entry();
    1.84 -    auxtab[ref].inuse     = 1;
    1.85 -    auxtab[ref].read_pin  = !!readonly;
    1.86 -    auxtab[ref].write_pin =  !readonly;
    1.87 -    auxtab[ref].next = gnttab_hash[GNTTAB_HASH(frame)];
    1.88 -    gnttab_hash[GNTTAB_HASH(frame)] = ref;
    1.89 +    int ref;
    1.90 +    
    1.91 +    if ( unlikely((ref = get_free_entry()) == -1) )
    1.92 +        return -ENOSPC;
    1.93  
    1.94      shared[ref].frame = frame;
    1.95      shared[ref].domid = domid;
    1.96      wmb();
    1.97      shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0);
    1.98  
    1.99 - done:
   1.100 -    spin_unlock_irqrestore(&gnttab_lock, flags);
   1.101 -    return 0;
   1.102 +    return ref;
   1.103  }
   1.104  
   1.105  void
   1.106  gnttab_end_foreign_access(
   1.107      grant_ref_t ref, int readonly)
   1.108  {
   1.109 -    unsigned long flags, frame = shared[ref].frame;
   1.110 -    grant_ref_t  *pref;
   1.111 -    u16           sflags, nsflags;
   1.112 -
   1.113 -    spin_lock_irqsave(&gnttab_lock, flags);
   1.114 +    u16 flags, nflags;
   1.115  
   1.116 -    if ( readonly )
   1.117 -    {
   1.118 -        if ( (auxtab[ref].read_pin-- == 0) && (auxtab[ref].write_pin == 0) )
   1.119 -            goto delete;
   1.120 -    }
   1.121 -    else if ( auxtab[ref].write_pin-- == 0 )
   1.122 -    {
   1.123 -        if ( auxtab[ref].read_pin == 0 )
   1.124 -            goto delete;
   1.125 -        nsflags = shared[ref].flags;
   1.126 -        do {
   1.127 -            if ( (sflags = nsflags) & GTF_writing )
   1.128 -                printk(KERN_ALERT "WARNING: g.e. still in use for writing!\n");
   1.129 -        }
   1.130 -        while ( (nsflags = cmpxchg(&shared[ref].flags, sflags, 
   1.131 -                                   sflags | GTF_readonly)) != sflags );
   1.132 -    }
   1.133 -
   1.134 -    goto out;
   1.135 -
   1.136 - delete:
   1.137 -    nsflags = shared[ref].flags;
   1.138 +    nflags = shared[ref].flags;
   1.139      do {
   1.140 -        if ( (sflags = nsflags) & (GTF_reading|GTF_writing) )
   1.141 +        if ( (flags = nflags) & (GTF_reading|GTF_writing) )
   1.142              printk(KERN_ALERT "WARNING: g.e. still in use!\n");
   1.143      }
   1.144 -    while ( (nsflags = cmpxchg(&shared[ref].flags, sflags, 0)) != sflags );
   1.145 +    while ( (nflags = cmpxchg(&shared[ref].flags, flags, 0)) != flags );
   1.146  
   1.147 -    pref = &gnttab_hash[GNTTAB_HASH(frame)];
   1.148 -    while ( *pref != ref )
   1.149 -        pref = &auxtab[*pref].next;
   1.150 -    *pref = auxtab[ref].next;
   1.151 -
   1.152 -    auxtab[ref].inuse = 0;
   1.153      put_free_entry(ref);
   1.154 -
   1.155 - out:
   1.156 -    spin_unlock_irqrestore(&gnttab_lock, flags);
   1.157  }
   1.158  
   1.159 -grant_ref_t
   1.160 +int
   1.161  gnttab_grant_foreign_transfer(
   1.162      domid_t domid)
   1.163  {
   1.164 -    grant_ref_t ref = get_free_entry();
   1.165 +    int ref;
   1.166 +
   1.167 +    if ( unlikely((ref = get_free_entry()) == -1) )
   1.168 +        return -ENOSPC;
   1.169  
   1.170      shared[ref].frame = 0;
   1.171      shared[ref].domid = domid;
   1.172 @@ -210,23 +143,19 @@ gnttab_end_foreign_transfer(
   1.173  
   1.174  void __init gnttab_init(void)
   1.175  {
   1.176 -    int               i;
   1.177 -    gnttab_op_t       gntop;
   1.178 -    unsigned long     frame;
   1.179 +    gnttab_setup_table_t setup;
   1.180 +    unsigned long        frame;
   1.181 +    int                  i;
   1.182  
   1.183 -    spin_lock_init(&gnttab_lock);
   1.184 +    for ( i = 0; i < NR_GRANT_REFS; i++ )
   1.185 +        gnttab_free_list[i] = i + 1;
   1.186  
   1.187 -    for ( i = 0; i < GNTTAB_HASH_SZ; i++ )
   1.188 -    {
   1.189 -        gnttab_hash[i] = HASH_INVALID;
   1.190 -        auxtab[i].next = i+1;
   1.191 -    }
   1.192 -
   1.193 -    gntop.cmd = GNTTABOP_setup_table;
   1.194 -    gntop.u.setup_table.dom        = DOMID_SELF;
   1.195 -    gntop.u.setup_table.nr_frames  = 1;
   1.196 -    gntop.u.setup_table.frame_list = &frame;
   1.197 -    if ( HYPERVISOR_grant_table_op(&gntop) != 0 )
   1.198 +    setup.dom        = DOMID_SELF;
   1.199 +    setup.nr_frames  = 1;
   1.200 +    setup.frame_list = &frame;
   1.201 +    if ( HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1) != 0 )
   1.202 +        BUG();
   1.203 +    if ( setup.status != 0 )
   1.204          BUG();
   1.205  
   1.206      set_fixmap_ma(FIX_GNTTAB, frame << PAGE_SHIFT);
     2.1 --- a/linux-2.6.8.1-xen-sparse/include/asm-xen/gnttab.h	Thu Oct 07 15:25:52 2004 +0000
     2.2 +++ b/linux-2.6.8.1-xen-sparse/include/asm-xen/gnttab.h	Thu Oct 07 16:30:38 2004 +0000
     2.3 @@ -16,7 +16,7 @@
     2.4  #include <asm-xen/hypervisor.h>
     2.5  #include <asm-xen/hypervisor-ifs/grant_table.h>
     2.6  
     2.7 -grant_ref_t
     2.8 +int
     2.9  gnttab_grant_foreign_access(
    2.10      domid_t domid, unsigned long frame, int readonly);
    2.11  
    2.12 @@ -24,7 +24,7 @@ void
    2.13  gnttab_end_foreign_access(
    2.14      grant_ref_t ref, int readonly);
    2.15  
    2.16 -grant_ref_t
    2.17 +int
    2.18  gnttab_grant_foreign_transfer(
    2.19      domid_t domid);
    2.20  
     3.1 --- a/linux-2.6.8.1-xen-sparse/include/asm-xen/hypervisor.h	Thu Oct 07 15:25:52 2004 +0000
     3.2 +++ b/linux-2.6.8.1-xen-sparse/include/asm-xen/hypervisor.h	Thu Oct 07 16:30:38 2004 +0000
     3.3 @@ -181,7 +181,9 @@ void deallocate_lowmem_region(unsigned l
     3.4   * Assembler stubs for hyper-calls.
     3.5   */
     3.6  
     3.7 -static inline int HYPERVISOR_set_trap_table(trap_info_t *table)
     3.8 +static inline int
     3.9 +HYPERVISOR_set_trap_table(
    3.10 +    trap_info_t *table)
    3.11  {
    3.12      int ret;
    3.13      __asm__ __volatile__ (
    3.14 @@ -192,8 +194,9 @@ static inline int HYPERVISOR_set_trap_ta
    3.15      return ret;
    3.16  }
    3.17  
    3.18 -static inline int HYPERVISOR_mmu_update(mmu_update_t *req, int count,
    3.19 -					int *success_count)
    3.20 +static inline int
    3.21 +HYPERVISOR_mmu_update(
    3.22 +    mmu_update_t *req, int count, int *success_count)
    3.23  {
    3.24      int ret;
    3.25      __asm__ __volatile__ (
    3.26 @@ -204,7 +207,9 @@ static inline int HYPERVISOR_mmu_update(
    3.27      return ret;
    3.28  }
    3.29  
    3.30 -static inline int HYPERVISOR_set_gdt(unsigned long *frame_list, int entries)
    3.31 +static inline int
    3.32 +HYPERVISOR_set_gdt(
    3.33 +    unsigned long *frame_list, int entries)
    3.34  {
    3.35      int ret;
    3.36      __asm__ __volatile__ (
    3.37 @@ -216,7 +221,9 @@ static inline int HYPERVISOR_set_gdt(uns
    3.38      return ret;
    3.39  }
    3.40  
    3.41 -static inline int HYPERVISOR_stack_switch(unsigned long ss, unsigned long esp)
    3.42 +static inline int
    3.43 +HYPERVISOR_stack_switch(
    3.44 +    unsigned long ss, unsigned long esp)
    3.45  {
    3.46      int ret;
    3.47      __asm__ __volatile__ (
    3.48 @@ -227,7 +234,8 @@ static inline int HYPERVISOR_stack_switc
    3.49      return ret;
    3.50  }
    3.51  
    3.52 -static inline int HYPERVISOR_set_callbacks(
    3.53 +static inline int
    3.54 +HYPERVISOR_set_callbacks(
    3.55      unsigned long event_selector, unsigned long event_address,
    3.56      unsigned long failsafe_selector, unsigned long failsafe_address)
    3.57  {
    3.58 @@ -241,7 +249,9 @@ static inline int HYPERVISOR_set_callbac
    3.59      return ret;
    3.60  }
    3.61  
    3.62 -static inline int HYPERVISOR_fpu_taskswitch(void)
    3.63 +static inline int
    3.64 +HYPERVISOR_fpu_taskswitch(
    3.65 +    void)
    3.66  {
    3.67      int ret;
    3.68      __asm__ __volatile__ (
    3.69 @@ -251,7 +261,9 @@ static inline int HYPERVISOR_fpu_taskswi
    3.70      return ret;
    3.71  }
    3.72  
    3.73 -static inline int HYPERVISOR_yield(void)
    3.74 +static inline int
    3.75 +HYPERVISOR_yield(
    3.76 +    void)
    3.77  {
    3.78      int ret;
    3.79      __asm__ __volatile__ (
    3.80 @@ -262,7 +274,9 @@ static inline int HYPERVISOR_yield(void)
    3.81      return ret;
    3.82  }
    3.83  
    3.84 -static inline int HYPERVISOR_block(void)
    3.85 +static inline int
    3.86 +HYPERVISOR_block(
    3.87 +    void)
    3.88  {
    3.89      int ret;
    3.90      __asm__ __volatile__ (
    3.91 @@ -273,7 +287,9 @@ static inline int HYPERVISOR_block(void)
    3.92      return ret;
    3.93  }
    3.94  
    3.95 -static inline int HYPERVISOR_shutdown(void)
    3.96 +static inline int
    3.97 +HYPERVISOR_shutdown(
    3.98 +    void)
    3.99  {
   3.100      int ret;
   3.101      __asm__ __volatile__ (
   3.102 @@ -285,7 +301,9 @@ static inline int HYPERVISOR_shutdown(vo
   3.103      return ret;
   3.104  }
   3.105  
   3.106 -static inline int HYPERVISOR_reboot(void)
   3.107 +static inline int
   3.108 +HYPERVISOR_reboot(
   3.109 +    void)
   3.110  {
   3.111      int ret;
   3.112      __asm__ __volatile__ (
   3.113 @@ -297,7 +315,9 @@ static inline int HYPERVISOR_reboot(void
   3.114      return ret;
   3.115  }
   3.116  
   3.117 -static inline int HYPERVISOR_suspend(unsigned long srec)
   3.118 +static inline int
   3.119 +HYPERVISOR_suspend(
   3.120 +    unsigned long srec)
   3.121  {
   3.122      int ret;
   3.123      /* NB. On suspend, control software expects a suspend record in %esi. */
   3.124 @@ -310,7 +330,9 @@ static inline int HYPERVISOR_suspend(uns
   3.125      return ret;
   3.126  }
   3.127  
   3.128 -static inline long HYPERVISOR_set_timer_op(u64 timeout)
   3.129 +static inline long
   3.130 +HYPERVISOR_set_timer_op(
   3.131 +    u64 timeout)
   3.132  {
   3.133      int ret;
   3.134      unsigned long timeout_hi = (unsigned long)(timeout>>32);
   3.135 @@ -323,7 +345,9 @@ static inline long HYPERVISOR_set_timer_
   3.136      return ret;
   3.137  }
   3.138  
   3.139 -static inline int HYPERVISOR_dom0_op(dom0_op_t *dom0_op)
   3.140 +static inline int
   3.141 +HYPERVISOR_dom0_op(
   3.142 +    dom0_op_t *dom0_op)
   3.143  {
   3.144      int ret;
   3.145      dom0_op->interface_version = DOM0_INTERFACE_VERSION;
   3.146 @@ -335,7 +359,9 @@ static inline int HYPERVISOR_dom0_op(dom
   3.147      return ret;
   3.148  }
   3.149  
   3.150 -static inline int HYPERVISOR_set_debugreg(int reg, unsigned long value)
   3.151 +static inline int
   3.152 +HYPERVISOR_set_debugreg(
   3.153 +    int reg, unsigned long value)
   3.154  {
   3.155      int ret;
   3.156      __asm__ __volatile__ (
   3.157 @@ -346,7 +372,9 @@ static inline int HYPERVISOR_set_debugre
   3.158      return ret;
   3.159  }
   3.160  
   3.161 -static inline unsigned long HYPERVISOR_get_debugreg(int reg)
   3.162 +static inline unsigned long
   3.163 +HYPERVISOR_get_debugreg(
   3.164 +    int reg)
   3.165  {
   3.166      unsigned long ret;
   3.167      __asm__ __volatile__ (
   3.168 @@ -357,7 +385,8 @@ static inline unsigned long HYPERVISOR_g
   3.169      return ret;
   3.170  }
   3.171  
   3.172 -static inline int HYPERVISOR_update_descriptor(
   3.173 +static inline int
   3.174 +HYPERVISOR_update_descriptor(
   3.175      unsigned long ma, unsigned long word1, unsigned long word2)
   3.176  {
   3.177      int ret;
   3.178 @@ -369,7 +398,9 @@ static inline int HYPERVISOR_update_desc
   3.179      return ret;
   3.180  }
   3.181  
   3.182 -static inline int HYPERVISOR_set_fast_trap(int idx)
   3.183 +static inline int
   3.184 +HYPERVISOR_set_fast_trap(
   3.185 +    int idx)
   3.186  {
   3.187      int ret;
   3.188      __asm__ __volatile__ (
   3.189 @@ -380,10 +411,10 @@ static inline int HYPERVISOR_set_fast_tr
   3.190      return ret;
   3.191  }
   3.192  
   3.193 -static inline int HYPERVISOR_dom_mem_op(unsigned int   op,
   3.194 -                                        unsigned long *extent_list,
   3.195 -                                        unsigned long  nr_extents,
   3.196 -                                        unsigned int   extent_order)
   3.197 +static inline int
   3.198 +HYPERVISOR_dom_mem_op(
   3.199 +    unsigned int op, unsigned long *extent_list,
   3.200 +    unsigned long nr_extents, unsigned int extent_order)
   3.201  {
   3.202      int ret;
   3.203      __asm__ __volatile__ (
   3.204 @@ -396,7 +427,9 @@ static inline int HYPERVISOR_dom_mem_op(
   3.205      return ret;
   3.206  }
   3.207  
   3.208 -static inline int HYPERVISOR_multicall(void *call_list, int nr_calls)
   3.209 +static inline int
   3.210 +HYPERVISOR_multicall(
   3.211 +    void *call_list, int nr_calls)
   3.212  {
   3.213      int ret;
   3.214      __asm__ __volatile__ (
   3.215 @@ -407,7 +440,8 @@ static inline int HYPERVISOR_multicall(v
   3.216      return ret;
   3.217  }
   3.218  
   3.219 -static inline int HYPERVISOR_update_va_mapping(
   3.220 +static inline int
   3.221 +HYPERVISOR_update_va_mapping(
   3.222      unsigned long page_nr, pte_t new_val, unsigned long flags)
   3.223  {
   3.224      int ret;
   3.225 @@ -426,7 +460,9 @@ static inline int HYPERVISOR_update_va_m
   3.226      return ret;
   3.227  }
   3.228  
   3.229 -static inline int HYPERVISOR_event_channel_op(void *op)
   3.230 +static inline int
   3.231 +HYPERVISOR_event_channel_op(
   3.232 +    void *op)
   3.233  {
   3.234      int ret;
   3.235      __asm__ __volatile__ (
   3.236 @@ -437,7 +473,9 @@ static inline int HYPERVISOR_event_chann
   3.237      return ret;
   3.238  }
   3.239  
   3.240 -static inline int HYPERVISOR_xen_version(int cmd)
   3.241 +static inline int
   3.242 +HYPERVISOR_xen_version(
   3.243 +    int cmd)
   3.244  {
   3.245      int ret;
   3.246      __asm__ __volatile__ (
   3.247 @@ -448,7 +486,9 @@ static inline int HYPERVISOR_xen_version
   3.248      return ret;
   3.249  }
   3.250  
   3.251 -static inline int HYPERVISOR_console_io(int cmd, int count, char *str)
   3.252 +static inline int
   3.253 +HYPERVISOR_console_io(
   3.254 +    int cmd, int count, char *str)
   3.255  {
   3.256      int ret;
   3.257      __asm__ __volatile__ (
   3.258 @@ -459,7 +499,9 @@ static inline int HYPERVISOR_console_io(
   3.259      return ret;
   3.260  }
   3.261  
   3.262 -static inline int HYPERVISOR_physdev_op(void *physdev_op)
   3.263 +static inline int
   3.264 +HYPERVISOR_physdev_op(
   3.265 +    void *physdev_op)
   3.266  {
   3.267      int ret;
   3.268      __asm__ __volatile__ (
   3.269 @@ -470,18 +512,21 @@ static inline int HYPERVISOR_physdev_op(
   3.270      return ret;
   3.271  }
   3.272  
   3.273 -static inline int HYPERVISOR_grant_table_op(void *gnttab_op)
   3.274 +static inline int
   3.275 +HYPERVISOR_grant_table_op(
   3.276 +    unsigned int cmd, void *uop, unsigned int count)
   3.277  {
   3.278      int ret;
   3.279      __asm__ __volatile__ (
   3.280          TRAP_INSTR
   3.281          : "=a" (ret) : "0" (__HYPERVISOR_grant_table_op),
   3.282 -        "b" (gnttab_op) : "memory" );
   3.283 +        "b" (cmd), "c" (count), "d" (uop) : "memory" );
   3.284  
   3.285      return ret;
   3.286  }
   3.287  
   3.288 -static inline int HYPERVISOR_update_va_mapping_otherdomain(
   3.289 +static inline int
   3.290 +HYPERVISOR_update_va_mapping_otherdomain(
   3.291      unsigned long page_nr, pte_t new_val, unsigned long flags, domid_t domid)
   3.292  {
   3.293      int ret;
   3.294 @@ -494,7 +539,9 @@ static inline int HYPERVISOR_update_va_m
   3.295      return ret;
   3.296  }
   3.297  
   3.298 -static inline int HYPERVISOR_vm_assist(unsigned int cmd, unsigned int type)
   3.299 +static inline int
   3.300 +HYPERVISOR_vm_assist(
   3.301 +    unsigned int cmd, unsigned int type)
   3.302  {
   3.303      int ret;
   3.304      __asm__ __volatile__ (
     4.1 --- a/xen/arch/x86/domain.c	Thu Oct 07 15:25:52 2004 +0000
     4.2 +++ b/xen/arch/x86/domain.c	Thu Oct 07 16:30:38 2004 +0000
     4.3 @@ -708,7 +708,7 @@ int construct_dom0(struct domain *p,
     4.4          page = &frame_table[mfn];
     4.5          page->u.inuse.domain    = p;
     4.6          page->u.inuse.type_info = 0;
     4.7 -        page->count_info        = PGC_always_set | PGC_allocated | 1;
     4.8 +        page->count_info        = PGC_allocated | 1;
     4.9          list_add_tail(&page->list, &p->page_list);
    4.10          p->tot_pages++; p->max_pages++;
    4.11      }
     5.1 --- a/xen/arch/x86/memory.c	Thu Oct 07 15:25:52 2004 +0000
     5.2 +++ b/xen/arch/x86/memory.c	Thu Oct 07 16:30:38 2004 +0000
     5.3 @@ -164,9 +164,6 @@ void arch_init_memory(void)
     5.4  
     5.5      memset(percpu_info, 0, sizeof(percpu_info));
     5.6  
     5.7 -    for ( mfn = 0; mfn < max_page; mfn++ )
     5.8 -        frame_table[mfn].count_info |= PGC_always_set;
     5.9 -
    5.10      /* Initialise to a magic of 0x55555555 so easier to spot bugs later. */
    5.11      memset(machine_to_phys_mapping, 0x55, 4<<20);
    5.12  
    5.13 @@ -193,9 +190,9 @@ void arch_init_memory(void)
    5.14            mfn < virt_to_phys(&machine_to_phys_mapping[1<<20])>>PAGE_SHIFT;
    5.15            mfn++ )
    5.16      {
    5.17 -        frame_table[mfn].count_info        |= PGC_allocated | 1;
    5.18 -        frame_table[mfn].u.inuse.type_info  = PGT_gdt_page | 1; /* non-RW */
    5.19 -        frame_table[mfn].u.inuse.domain     = dom_xen;
    5.20 +        frame_table[mfn].count_info        = PGC_allocated | 1;
    5.21 +        frame_table[mfn].u.inuse.type_info = PGT_gdt_page | 1; /* non-RW */
    5.22 +        frame_table[mfn].u.inuse.domain    = dom_xen;
    5.23      }
    5.24  }
    5.25  
    5.26 @@ -403,8 +400,6 @@ get_page_from_l1e(
    5.27  
    5.28      if ( unlikely(!pfn_is_ram(pfn)) )
    5.29      {
    5.30 -        /* SPECIAL CASE 1. Mapping an I/O page. */
    5.31 -
    5.32          /* Revert to caller privileges if FD == DOMID_IO. */
    5.33          if ( d == dom_io )
    5.34              d = current;
    5.35 @@ -420,33 +415,7 @@ get_page_from_l1e(
    5.36      }
    5.37  
    5.38      if ( unlikely(!get_page_from_pagenr(pfn, d)) )
    5.39 -    {
    5.40 -        /* SPECIAL CASE 2. Mapping a foreign page via a grant table. */
    5.41 -        
    5.42 -        int rc;
    5.43 -        struct domain *e;
    5.44 -        u32 count_info;
    5.45 -        /*
    5.46 -         * Yuk! Amazingly this is the simplest way to get a guaranteed atomic
    5.47 -         * snapshot of a 64-bit value on IA32. x86/64 solves this of course!
    5.48 -         * Basically it's a no-op CMPXCHG, to get us the current contents.
    5.49 -         * No need for LOCK prefix -- we know that count_info is never zero
    5.50 -         * because it contains PGC_always_set.
    5.51 -         */
    5.52 -        ASSERT(test_bit(_PGC_always_set, &page->count_info));
    5.53 -        __asm__ __volatile__(
    5.54 -            "cmpxchg8b %2"
    5.55 -            : "=d" (e), "=a" (count_info),
    5.56 -              "=m" (*(volatile u64 *)(&page->count_info))
    5.57 -            : "0" (0), "1" (0), "c" (0), "b" (0) );
    5.58 -        if ( unlikely((count_info & PGC_count_mask) == 0) ||
    5.59 -             unlikely(e == NULL) || unlikely(!get_domain(e)) )
    5.60 -             return 0;
    5.61 -        rc = gnttab_try_map(
    5.62 -            e, d, pfn, (l1v & _PAGE_RW) ? GNTTAB_MAP_RW : GNTTAB_MAP_RO);
    5.63 -        put_domain(e);
    5.64 -        return rc;
    5.65 -    }
    5.66 +        return 0;
    5.67  
    5.68      if ( l1v & _PAGE_RW )
    5.69      {
    5.70 @@ -510,8 +479,7 @@ static void put_page_from_l1e(l1_pgentry
    5.71           * mappings and which unmappings are counted via the grant entry, but
    5.72           * really it doesn't matter as privileged domains have carte blanche.
    5.73           */
    5.74 -        if ( likely(gnttab_try_map(e, d, pfn, (l1v & _PAGE_RW) ? 
    5.75 -                                   GNTTAB_UNMAP_RW : GNTTAB_UNMAP_RO)) )
    5.76 +        if ( likely(gnttab_check_unmap(e, d, pfn, !(l1v & _PAGE_RW))) )
    5.77              return;
    5.78          /* Assume this mapping was made via MMUEXT_SET_FOREIGNDOM... */
    5.79      }
     6.1 --- a/xen/common/grant_table.c	Thu Oct 07 15:25:52 2004 +0000
     6.2 +++ b/xen/common/grant_table.c	Thu Oct 07 16:30:38 2004 +0000
     6.3 @@ -27,65 +27,50 @@
     6.4  #define PIN_FAIL(_rc, _f, _a...)   \
     6.5      do {                           \
     6.6          DPRINTK( _f, ## _a );      \
     6.7 -        rc = -(_rc);               \
     6.8 -        goto out;                  \
     6.9 +        rc = (_rc);                \
    6.10 +        goto fail;                 \
    6.11      } while ( 0 )
    6.12  
    6.13 +static inline int
    6.14 +get_maptrack_handle(
    6.15 +    grant_table_t *t)
    6.16 +{
    6.17 +    unsigned int h;
    6.18 +    if ( unlikely((h = t->maptrack_head) == NR_MAPTRACK_ENTRIES) )
    6.19 +        return -1;
    6.20 +    t->maptrack_head = t->maptrack[h].ref_and_flags >> MAPTRACK_REF_SHIFT;
    6.21 +    return h;
    6.22 +}
    6.23 +
    6.24  static inline void
    6.25 -check_tlb_flush(
    6.26 -    active_grant_entry_t *a)
    6.27 +put_maptrack_handle(
    6.28 +    grant_table_t *t, int handle)
    6.29  {
    6.30 -    if ( unlikely(NEED_FLUSH(tlbflush_time[smp_processor_id()],
    6.31 -                             a->tlbflush_timestamp)) )
    6.32 -    {
    6.33 -        perfc_incr(need_flush_tlb_flush);
    6.34 -        local_flush_tlb();
    6.35 -    }
    6.36 -}
    6.37 -
    6.38 -static void
    6.39 -make_entry_mappable(
    6.40 -    grant_table_t *t, active_grant_entry_t *a)
    6.41 -{
    6.42 -    u16 *ph = &t->maphash[GNT_MAPHASH(a->frame)];
    6.43 -    a->next = *ph;
    6.44 -    *ph = a - t->active;
    6.45 +    t->maptrack[handle].ref_and_flags = t->maptrack_head << MAPTRACK_REF_SHIFT;
    6.46 +    t->maptrack_head = handle;
    6.47  }
    6.48  
    6.49  static void
    6.50 -make_entry_unmappable(
    6.51 -    grant_table_t *t, active_grant_entry_t *a)
    6.52 -{
    6.53 -    active_grant_entry_t *p;
    6.54 -    u16 *ph = &t->maphash[GNT_MAPHASH(a->frame)];
    6.55 -    while ( (p = &t->active[*ph]) != a )
    6.56 -        ph = &p->next;
    6.57 -    *ph = a->next;
    6.58 -    a->next = GNT_MAPHASH_INVALID;
    6.59 -    check_tlb_flush(a);
    6.60 -}
    6.61 -
    6.62 -static long
    6.63 -gnttab_update_pin_status(
    6.64 -    gnttab_update_pin_status_t *uop)
    6.65 +__gnttab_map_grant_ref(
    6.66 +    gnttab_map_grant_ref_t *uop)
    6.67  {
    6.68      domid_t        dom, sdom;
    6.69      grant_ref_t    ref;
    6.70 -    u16            pin_flags;
    6.71      struct domain *ld, *rd;
    6.72 -    u16            sflags;
    6.73 +    u16            flags, sflags;
    6.74 +    int            handle;
    6.75      active_grant_entry_t *act;
    6.76      grant_entry_t *sha;
    6.77 -    long           rc = 0;
    6.78 +    s16            rc = 0;
    6.79      unsigned long  frame;
    6.80  
    6.81      /*
    6.82 -     * We bound the number of times we retry CMPXCHG on memory locations
    6.83 -     * that we share with a guest OS. The reason is that the guest can modify
    6.84 -     * that location at a higher rate than we can read-modify-CMPXCHG, so
    6.85 -     * the guest could cause us to livelock. There are a few cases
    6.86 -     * where it is valid for the guest to race our updates (e.g., to change
    6.87 -     * the GTF_readonly flag), so we allow a few retries before failing.
    6.88 +     * We bound the number of times we retry CMPXCHG on memory locations that
    6.89 +     * we share with a guest OS. The reason is that the guest can modify that
    6.90 +     * location at a higher rate than we can read-modify-CMPXCHG, so the guest
    6.91 +     * could cause us to livelock. There are a few cases where it is valid for
    6.92 +     * the guest to race our updates (e.g., to change the GTF_readonly flag),
    6.93 +     * so we allow a few retries before failing.
    6.94       */
    6.95      int            retries = 0;
    6.96  
    6.97 @@ -94,21 +79,18 @@ gnttab_update_pin_status(
    6.98      /* Bitwise-OR avoids short-circuiting which screws control flow. */
    6.99      if ( unlikely(__get_user(dom, &uop->dom) |
   6.100                    __get_user(ref, &uop->ref) |
   6.101 -                  __get_user(pin_flags, &uop->pin_flags)) )
   6.102 +                  __get_user(flags, &uop->flags)) )
   6.103      {
   6.104 -        DPRINTK("Fault while reading gnttab_update_pin_status_t.\n");
   6.105 -        return -EFAULT;
   6.106 +        DPRINTK("Fault while reading gnttab_map_grant_ref_t.\n");
   6.107 +        return; /* don't set status */
   6.108      }
   6.109  
   6.110 -    pin_flags &= (GNTPIN_dev_accessible | 
   6.111 -                  GNTPIN_host_accessible |
   6.112 -                  GNTPIN_readonly);
   6.113 -
   6.114      if ( unlikely(ref >= NR_GRANT_ENTRIES) || 
   6.115 -         unlikely(pin_flags == GNTPIN_readonly) )
   6.116 +         unlikely((flags & (GNTMAP_device_map|GNTMAP_host_map)) == 0) )
   6.117      {
   6.118 -        DPRINTK("Bad ref (%d) or flags (%x).\n", ref, pin_flags);
   6.119 -        return -EINVAL;
   6.120 +        DPRINTK("Bad ref (%d) or flags (%x).\n", ref, flags);
   6.121 +        (void)__put_user(GNTST_bad_gntref, &uop->handle);
   6.122 +        return;
   6.123      }
   6.124  
   6.125      if ( unlikely((rd = find_domain_by_id(dom)) == NULL) ||
   6.126 @@ -117,19 +99,25 @@ gnttab_update_pin_status(
   6.127          if ( rd != NULL )
   6.128              put_domain(rd);
   6.129          DPRINTK("Could not find domain %d\n", dom);
   6.130 -        return -ESRCH;
   6.131 +        (void)__put_user(GNTST_bad_domain, &uop->handle);
   6.132 +        return;
   6.133 +    }
   6.134 +
   6.135 +    if ( unlikely((handle = get_maptrack_handle(ld->grant_table)) == -1) )
   6.136 +    {
   6.137 +        put_domain(rd);
   6.138 +        DPRINTK("No more map handles available\n");
   6.139 +        (void)__put_user(GNTST_no_device_space, &uop->handle);
   6.140 +        return;
   6.141      }
   6.142  
   6.143      act = &rd->grant_table->active[ref];
   6.144      sha = &rd->grant_table->shared[ref];
   6.145  
   6.146      spin_lock(&rd->grant_table->lock);
   6.147 -
   6.148 -    if ( act->status == 0 )
   6.149 +    
   6.150 +    if ( act->pin == 0 )
   6.151      {
   6.152 -        if ( unlikely(pin_flags == 0) )
   6.153 -            goto out;
   6.154 -
   6.155          /* CASE 1: Activating a previously inactive entry. */
   6.156  
   6.157          sflags = sha->flags;
   6.158 @@ -141,7 +129,7 @@ gnttab_update_pin_status(
   6.159  
   6.160              if ( unlikely((sflags & GTF_type_mask) != GTF_permit_access) ||
   6.161                   unlikely(sdom != ld->domain) )
   6.162 -                PIN_FAIL(EINVAL,
   6.163 +                PIN_FAIL(GNTST_general_error,
   6.164                           "Bad flags (%x) or dom (%d). (NB. expected dom %d)\n",
   6.165                          sflags, sdom, ld->domain);
   6.166  
   6.167 @@ -150,11 +138,11 @@ gnttab_update_pin_status(
   6.168              prev_scombo = scombo = ((u32)sdom << 16) | (u32)sflags;
   6.169  
   6.170              new_scombo = scombo | GTF_reading;
   6.171 -            if ( !(pin_flags & GNTPIN_readonly) )
   6.172 +            if ( !(flags & GNTMAP_readonly) )
   6.173              {
   6.174                  new_scombo |= GTF_writing;
   6.175                  if ( unlikely(sflags & GTF_readonly) )
   6.176 -                    PIN_FAIL(EINVAL,
   6.177 +                    PIN_FAIL(GNTST_general_error,
   6.178                               "Attempt to write-pin a r/o grant entry.\n");
   6.179              }
   6.180  
   6.181 @@ -162,7 +150,7 @@ gnttab_update_pin_status(
   6.182              if ( unlikely(cmpxchg_user((u32 *)&sha->flags,
   6.183                                         prev_scombo, 
   6.184                                         new_scombo)) )
   6.185 -                PIN_FAIL(EINVAL,
   6.186 +                PIN_FAIL(GNTST_general_error,
   6.187                           "Fault while modifying shared flags and domid.\n");
   6.188  
   6.189              /* Did the combined update work (did we see what we expected?). */
   6.190 @@ -170,7 +158,7 @@ gnttab_update_pin_status(
   6.191                  break;
   6.192  
   6.193              if ( retries++ == 4 )
   6.194 -                PIN_FAIL(EINVAL,
   6.195 +                PIN_FAIL(GNTST_general_error,
   6.196                           "Shared grant entry is unstable.\n");
   6.197  
   6.198              /* Didn't see what we expected. Split out the seen flags & dom. */
   6.199 @@ -182,140 +170,226 @@ gnttab_update_pin_status(
   6.200          /* rmb(); */ /* not on x86 */
   6.201          frame = sha->frame;
   6.202          if ( unlikely(!pfn_is_ram(frame)) || 
   6.203 -             unlikely(!((pin_flags & GNTPIN_readonly) ? 
   6.204 +             unlikely(!((flags & GNTMAP_readonly) ? 
   6.205                          get_page(&frame_table[frame], rd) : 
   6.206                          get_page_and_type(&frame_table[frame], rd, 
   6.207                                            PGT_writable_page))) )
   6.208          {
   6.209              clear_bit(_GTF_writing, &sha->flags);
   6.210              clear_bit(_GTF_reading, &sha->flags);
   6.211 -            PIN_FAIL(EINVAL, 
   6.212 +            PIN_FAIL(GNTST_general_error, 
   6.213                       "Could not pin the granted frame!\n");
   6.214          }
   6.215  
   6.216 -        act->status = pin_flags;
   6.217 -        act->domid  = sdom;
   6.218 -        act->frame  = frame;
   6.219 -
   6.220 -        make_entry_mappable(rd->grant_table, act);
   6.221 -    }
   6.222 -    else if ( pin_flags == 0 )
   6.223 -    {
   6.224 -        /* CASE 2: Deactivating a previously active entry. */
   6.225 -
   6.226 -        if ( unlikely((act->status & 
   6.227 -                       (GNTPIN_wmap_mask|GNTPIN_rmap_mask)) != 0) )
   6.228 -            PIN_FAIL(EINVAL,
   6.229 -                     "Attempt to deactiv a mapped g.e. (%x)\n", act->status);
   6.230 -
   6.231 -        frame = act->frame;
   6.232 -        if ( !(act->status & GNTPIN_readonly) )
   6.233 -            put_page_type(&frame_table[frame]);
   6.234 -        put_page(&frame_table[frame]);
   6.235 -
   6.236 -        act->status = 0;
   6.237 -        make_entry_unmappable(rd->grant_table, act);
   6.238 -
   6.239 -        clear_bit(_GTF_writing, &sha->flags);
   6.240 -        clear_bit(_GTF_reading, &sha->flags);
   6.241 +        if ( flags & GNTMAP_device_map )
   6.242 +            act->pin += (flags & GNTMAP_readonly) ? 
   6.243 +                GNTPIN_devr_inc : GNTPIN_devw_inc;
   6.244 +        if ( flags & GNTMAP_host_map )
   6.245 +            act->pin += (flags & GNTMAP_readonly) ?
   6.246 +                GNTPIN_hstr_inc : GNTPIN_hstw_inc;
   6.247 +        act->domid = sdom;
   6.248 +        act->frame = frame;
   6.249      }
   6.250      else 
   6.251      {
   6.252 -        /* CASE 3: Active modications to an already active entry. */
   6.253 +        /* CASE 2: Active modications to an already active entry. */
   6.254  
   6.255          /*
   6.256 -         * Check mapping counts up front, as necessary.
   6.257 -         * After this compound check, the operation cannot fail.
   6.258 +         * A cheesy check for possible pin-count overflow.
   6.259 +         * A more accurate check cannot be done with a single comparison.
   6.260           */
   6.261 -        if ( ((pin_flags & (GNTPIN_readonly|GNTPIN_host_accessible)) !=
   6.262 -              GNTPIN_host_accessible) &&
   6.263 -             (unlikely((act->status & GNTPIN_wmap_mask) != 0) ||
   6.264 -              (((pin_flags & GNTPIN_host_accessible) == 0) &&
   6.265 -               unlikely((act->status & GNTPIN_rmap_mask) != 0))) )
   6.266 -            PIN_FAIL(EINVAL,
   6.267 -                     "Attempt to reduce pinning of a mapped g.e. (%x,%x)\n",
   6.268 -                    pin_flags, act->status);
   6.269 +        if ( (act->pin & 0x80808080U) != 0 )
   6.270 +            PIN_FAIL(ENOSPC, "Risk of counter overflow %08x\n", act->pin);
   6.271  
   6.272 -        /* Check for changes to host accessibility. */
   6.273 -        if ( pin_flags & GNTPIN_host_accessible )
   6.274 +        if ( !(flags & GNTMAP_readonly) && 
   6.275 +             !((sflags = sha->flags) & GTF_writing) )
   6.276          {
   6.277 -            if ( !(act->status & GNTPIN_host_accessible) )
   6.278 -                make_entry_mappable(rd->grant_table, act);
   6.279 -        }
   6.280 -        else if ( act->status & GNTPIN_host_accessible )
   6.281 -            make_entry_unmappable(rd->grant_table, act);
   6.282 -
   6.283 -        /* Check for changes to write accessibility. */
   6.284 -        if ( pin_flags & GNTPIN_readonly )
   6.285 -        {
   6.286 -            if ( !(act->status & GNTPIN_readonly) )
   6.287 -            {
   6.288 -                put_page_type(&frame_table[act->frame]);
   6.289 -                check_tlb_flush(act);
   6.290 -                clear_bit(_GTF_writing, &sha->flags);
   6.291 -            }
   6.292 -        }
   6.293 -        else if ( act->status & GNTPIN_readonly )
   6.294 -        {
   6.295 -            sflags = sha->flags;
   6.296 -
   6.297              for ( ; ; )
   6.298              {
   6.299                  u16 prev_sflags;
   6.300                  
   6.301                  if ( unlikely(sflags & GTF_readonly) )
   6.302 -                    PIN_FAIL(EINVAL,
   6.303 +                    PIN_FAIL(GNTST_general_error,
   6.304                               "Attempt to write-pin a r/o grant entry.\n");
   6.305  
   6.306 -                if ( unlikely(!get_page_type(&frame_table[act->frame],
   6.307 -                                             PGT_writable_page)) )
   6.308 -                    PIN_FAIL(EINVAL,
   6.309 -                             "Attempt to write-pin a unwritable page.\n");
   6.310 -
   6.311                  prev_sflags = sflags;
   6.312  
   6.313                  /* NB. prev_sflags is updated in place to seen value. */
   6.314                  if ( unlikely(cmpxchg_user(&sha->flags, prev_sflags, 
   6.315                                             prev_sflags | GTF_writing)) )
   6.316 -                    PIN_FAIL(EINVAL,
   6.317 +                    PIN_FAIL(GNTST_general_error,
   6.318                               "Fault while modifying shared flags.\n");
   6.319  
   6.320                  if ( likely(prev_sflags == sflags) )
   6.321                      break;
   6.322  
   6.323                  if ( retries++ == 4 )
   6.324 -                    PIN_FAIL(EINVAL,
   6.325 +                    PIN_FAIL(GNTST_general_error,
   6.326                               "Shared grant entry is unstable.\n");
   6.327  
   6.328                  sflags = prev_sflags;
   6.329              }
   6.330 +
   6.331 +            if ( unlikely(!get_page_type(&frame_table[act->frame],
   6.332 +                                         PGT_writable_page)) )
   6.333 +            {
   6.334 +                clear_bit(_GTF_writing, &sha->flags);
   6.335 +                PIN_FAIL(GNTST_general_error,
   6.336 +                         "Attempt to write-pin a unwritable page.\n");
   6.337 +            }
   6.338          }
   6.339  
   6.340 -        /* Update status word -- this includes device accessibility. */
   6.341 -        act->status &= ~(GNTPIN_dev_accessible |
   6.342 -                         GNTPIN_host_accessible |
   6.343 -                         GNTPIN_readonly);
   6.344 -        act->status |= pin_flags;
   6.345 +        if ( flags & GNTMAP_device_map )
   6.346 +            act->pin += (flags & GNTMAP_readonly) ? 
   6.347 +                GNTPIN_devr_inc : GNTPIN_devw_inc;
   6.348 +        if ( flags & GNTMAP_host_map )
   6.349 +            act->pin += (flags & GNTMAP_readonly) ?
   6.350 +                GNTPIN_hstr_inc : GNTPIN_hstw_inc;
   6.351 +    }
   6.352 +
   6.353 +    ld->grant_table->maptrack[handle].domid         = dom;
   6.354 +    ld->grant_table->maptrack[handle].ref_and_flags =
   6.355 +        (ref << MAPTRACK_REF_SHIFT) | (flags & MAPTRACK_GNTMAP_MASK);
   6.356 +
   6.357 +    /* Unchecked and unconditional. */
   6.358 +    (void)__put_user(handle, &uop->handle);
   6.359 +    (void)__put_user(act->frame,  &uop->dev_bus_addr);
   6.360 +
   6.361 +    spin_unlock(&rd->grant_table->lock);
   6.362 +    put_domain(rd);
   6.363 +    return;
   6.364 +
   6.365 + fail:
   6.366 +    (void)__put_user(rc, &uop->handle);
   6.367 +    spin_unlock(&rd->grant_table->lock);
   6.368 +    put_domain(rd);
   6.369 +    put_maptrack_handle(ld->grant_table, handle);
   6.370 +}
   6.371 +
   6.372 +static long
   6.373 +gnttab_map_grant_ref(
   6.374 +    gnttab_map_grant_ref_t *uop, unsigned int count)
   6.375 +{
   6.376 +    int i;
   6.377 +    for ( i = 0; i < count; i++ )
   6.378 +        __gnttab_map_grant_ref(&uop[i]);
   6.379 +    return 0;
   6.380 +}
   6.381 +
   6.382 +static void
   6.383 +__gnttab_unmap_grant_ref(
   6.384 +    gnttab_unmap_grant_ref_t *uop)
   6.385 +{
   6.386 +    domid_t        dom;
   6.387 +    grant_ref_t    ref;
   6.388 +    u16            handle;
   6.389 +    struct domain *ld, *rd;
   6.390 +
   6.391 +    active_grant_entry_t *act;
   6.392 +    grant_entry_t *sha;
   6.393 +    grant_mapping_t *map;
   6.394 +    s16            rc = 0;
   6.395 +    unsigned long  frame, virt;
   6.396 +
   6.397 +    ld = current;
   6.398 +
   6.399 +    /* Bitwise-OR avoids short-circuiting which screws control flow. */
   6.400 +    if ( unlikely(__get_user(virt, &uop->host_virt_addr) |
   6.401 +                  __get_user(frame, &uop->dev_bus_addr) |
   6.402 +                  __get_user(handle, &uop->handle)) )
   6.403 +    {
   6.404 +        DPRINTK("Fault while reading gnttab_unmap_grant_ref_t.\n");
   6.405 +        return; /* don't set status */
   6.406      }
   6.407  
   6.408 -    /* Unchecked and unconditional. */
   6.409 -    (void)__put_user(act->frame, &uop->dev_bus_addr);
   6.410 -    (void)__put_user(act->frame, &uop->host_phys_addr);
   6.411 +    map = &ld->grant_table->maptrack[handle];
   6.412 +
   6.413 +    if ( unlikely(handle >= NR_MAPTRACK_ENTRIES) ||
   6.414 +         unlikely(!(map->ref_and_flags & MAPTRACK_GNTMAP_MASK)) )
   6.415 +    {
   6.416 +        DPRINTK("Bad handle (%d).\n", handle);
   6.417 +        (void)__put_user(GNTST_bad_handle, &uop->status);
   6.418 +        return;
   6.419 +    }
   6.420 +
   6.421 +    dom = map->domid;
   6.422 +    ref = map->ref_and_flags >> MAPTRACK_REF_SHIFT;
   6.423 +
   6.424 +    if ( unlikely((rd = find_domain_by_id(dom)) == NULL) ||
   6.425 +         unlikely(ld == rd) )
   6.426 +    {
   6.427 +        if ( rd != NULL )
   6.428 +            put_domain(rd);
   6.429 +        DPRINTK("Could not find domain %d\n", dom);
   6.430 +        (void)__put_user(GNTST_bad_domain, &uop->status);
   6.431 +        return;
   6.432 +    }
   6.433 +
   6.434 +    act = &rd->grant_table->active[ref];
   6.435 +    sha = &rd->grant_table->shared[ref];
   6.436 +
   6.437 +    spin_lock(&rd->grant_table->lock);
   6.438  
   6.439 - out:
   6.440 +    if ( frame != 0 )
   6.441 +    {
   6.442 +        if ( unlikely(frame != act->frame) )
   6.443 +            PIN_FAIL(GNTST_general_error,
   6.444 +                     "Bad frame number doesn't match gntref.\n");
   6.445 +        if ( map->ref_and_flags & GNTMAP_device_map )
   6.446 +            act->pin -= (map->ref_and_flags & GNTMAP_readonly) ? 
   6.447 +                GNTPIN_devr_inc : GNTPIN_devw_inc;
   6.448 +    }
   6.449 +    else
   6.450 +    {
   6.451 +        frame = act->frame;
   6.452 +    }
   6.453 +
   6.454 +    if ( (virt != 0) && (map->ref_and_flags & GNTMAP_host_map) )
   6.455 +    {
   6.456 +        act->pin -= (map->ref_and_flags & GNTMAP_readonly) ?
   6.457 +            GNTPIN_hstr_inc : GNTPIN_hstw_inc;
   6.458 +    }
   6.459 +
   6.460 +    if ( ((act->pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask)) == 0) &&
   6.461 +              !(map->ref_and_flags & GNTMAP_readonly) )
   6.462 +    {
   6.463 +        put_page_type(&frame_table[frame]);
   6.464 +        clear_bit(_GTF_writing, &sha->flags);
   6.465 +    }
   6.466 +
   6.467 +    if ( act->pin == 0 )
   6.468 +    {
   6.469 +        put_page(&frame_table[frame]);
   6.470 +        clear_bit(_GTF_reading, &sha->flags);
   6.471 +    }
   6.472 +
   6.473 + fail:
   6.474 +    (void)__put_user(rc, &uop->status);
   6.475      spin_unlock(&rd->grant_table->lock);
   6.476      put_domain(rd);
   6.477 -    return rc;
   6.478 +}
   6.479 +
   6.480 +static long
   6.481 +gnttab_unmap_grant_ref(
   6.482 +    gnttab_unmap_grant_ref_t *uop, unsigned int count)
   6.483 +{
   6.484 +    int i;
   6.485 +    for ( i = 0; i < count; i++ )
   6.486 +        __gnttab_unmap_grant_ref(&uop[i]);
   6.487 +    return 0;
   6.488  }
   6.489  
   6.490  static long 
   6.491  gnttab_setup_table(
   6.492 -    gnttab_setup_table_t *uop)
   6.493 +    gnttab_setup_table_t *uop, unsigned int count)
   6.494  {
   6.495      gnttab_setup_table_t  op;
   6.496      struct domain        *d;
   6.497  
   6.498 -    if ( unlikely(__copy_from_user(&op, uop, sizeof(op)) != 0) )
   6.499 +    if ( count != 1 )
   6.500 +        return -EINVAL;
   6.501 +
   6.502 +    if ( unlikely(copy_from_user(&op, uop, sizeof(op)) != 0) )
   6.503      {
   6.504          DPRINTK("Fault while reading gnttab_setup_table_t.\n");
   6.505          return -EFAULT;
   6.506 @@ -324,29 +398,33 @@ gnttab_setup_table(
   6.507      if ( unlikely(op.nr_frames > 1) )
   6.508      {
   6.509          DPRINTK("Xen only supports one grant-table frame per domain.\n");
   6.510 -        return -EINVAL;
   6.511 +        (void)put_user(GNTST_general_error, &uop->status);
   6.512 +        return 0;
   6.513      }
   6.514  
   6.515      if ( op.dom == DOMID_SELF )
   6.516 +    {
   6.517          op.dom = current->domain;
   6.518 +    }
   6.519 +    else if ( unlikely(!IS_PRIV(current)) )
   6.520 +    {
   6.521 +        (void)put_user(GNTST_permission_denied, &uop->status);
   6.522 +        return 0;
   6.523 +    }
   6.524  
   6.525      if ( unlikely((d = find_domain_by_id(op.dom)) == NULL) )
   6.526      {
   6.527          DPRINTK("Bad domid %d.\n", op.dom);
   6.528 -        return -ESRCH;
   6.529 +        (void)put_user(GNTST_bad_domain, &uop->status);
   6.530 +        return 0;
   6.531      }
   6.532  
   6.533      if ( op.nr_frames == 1 )
   6.534      {
   6.535          ASSERT(d->grant_table != NULL);
   6.536 -
   6.537 -        if ( unlikely(put_user(virt_to_phys(d->grant_table) >> PAGE_SHIFT,
   6.538 -                               &op.frame_list[0])) )
   6.539 -        {
   6.540 -            DPRINTK("Fault while writing frame list.\n");
   6.541 -            put_domain(d);
   6.542 -            return -EFAULT;
   6.543 -        }
   6.544 +        (void)put_user(GNTST_okay, &uop->status);
   6.545 +        (void)put_user(virt_to_phys(d->grant_table) >> PAGE_SHIFT,
   6.546 +                       &uop->frame_list[0]);
   6.547      }
   6.548  
   6.549      put_domain(d);
   6.550 @@ -355,22 +433,29 @@ gnttab_setup_table(
   6.551  
   6.552  long 
   6.553  do_grant_table_op(
   6.554 -    gnttab_op_t *uop)
   6.555 +    unsigned int cmd, void *uop, unsigned int count)
   6.556  {
   6.557      long rc;
   6.558 -    u32  cmd;
   6.559  
   6.560 -    if ( unlikely(!access_ok(VERIFY_WRITE, uop, sizeof(*uop))) ||
   6.561 -         unlikely(__get_user(cmd, &uop->cmd)) )
   6.562 -        return -EFAULT;
   6.563 +    if ( count > 512 )
   6.564 +        return -EINVAL;
   6.565  
   6.566      switch ( cmd )
   6.567      {
   6.568 -    case GNTTABOP_update_pin_status:
   6.569 -        rc = gnttab_update_pin_status(&uop->u.update_pin_status);
   6.570 +    case GNTTABOP_map_grant_ref:
   6.571 +        if ( unlikely(!access_ok(VERIFY_WRITE, uop,
   6.572 +                                 count * sizeof(gnttab_map_grant_ref_t))) )
   6.573 +            return -EFAULT;
   6.574 +        rc = gnttab_map_grant_ref((gnttab_map_grant_ref_t *)uop, count);
   6.575 +        break;
   6.576 +    case GNTTABOP_unmap_grant_ref:
   6.577 +        if ( unlikely(!access_ok(VERIFY_WRITE, uop,
   6.578 +                                 count * sizeof(gnttab_unmap_grant_ref_t))) )
   6.579 +            return -EFAULT;
   6.580 +        rc = gnttab_unmap_grant_ref((gnttab_unmap_grant_ref_t *)uop, count);
   6.581          break;
   6.582      case GNTTABOP_setup_table:
   6.583 -        rc = gnttab_setup_table(&uop->u.setup_table);
   6.584 +        rc = gnttab_setup_table((gnttab_setup_table_t *)uop, count);
   6.585          break;
   6.586      default:
   6.587          rc = -ENOSYS;
   6.588 @@ -381,66 +466,10 @@ do_grant_table_op(
   6.589  }
   6.590  
   6.591  int
   6.592 -gnttab_try_map(
   6.593 -    struct domain *rd, struct domain *ld, unsigned long frame, int op)
   6.594 +gnttab_check_unmap(
   6.595 +    struct domain *rd, struct domain *ld, unsigned long frame, int readonly)
   6.596  {
   6.597 -    grant_table_t        *t;
   6.598 -    active_grant_entry_t *a;
   6.599 -    u16                  *ph, h;
   6.600 -
   6.601 -    if ( unlikely((t = rd->grant_table) == NULL) )
   6.602 -        return 0;
   6.603 -
   6.604 -    spin_lock(&t->lock);
   6.605 -
   6.606 -    ph = &t->maphash[GNT_MAPHASH(frame)];
   6.607 -    while ( (h = *ph) != GNT_MAPHASH_INVALID )
   6.608 -    {
   6.609 -        if ( (a = &t->active[*ph])->frame != frame )
   6.610 -            goto found;
   6.611 -        ph = &a->next;
   6.612 -    }
   6.613 -    
   6.614 - fail:
   6.615 -    spin_unlock(&t->lock);
   6.616      return 0;
   6.617 -
   6.618 - found:
   6.619 -    if ( !(a->status & GNTPIN_host_accessible) )
   6.620 -        goto fail;
   6.621 -
   6.622 -    switch ( op )
   6.623 -    {
   6.624 -    case GNTTAB_MAP_RO:
   6.625 -        if ( (a->status & GNTPIN_rmap_mask) == GNTPIN_rmap_mask )
   6.626 -            goto fail;
   6.627 -        a->status += 1 << GNTPIN_rmap_shift;
   6.628 -        break;
   6.629 -
   6.630 -    case GNTTAB_MAP_RW:
   6.631 -        if ( (a->status & GNTPIN_wmap_mask) == GNTPIN_wmap_mask )
   6.632 -            goto fail;
   6.633 -        a->status += 1 << GNTPIN_wmap_shift;
   6.634 -        break;
   6.635 -
   6.636 -    case GNTTAB_UNMAP_RO:
   6.637 -        if ( (a->status & GNTPIN_rmap_mask) == 0 )
   6.638 -            goto fail;
   6.639 -        a->status -= 1 << GNTPIN_rmap_shift;
   6.640 -        break;
   6.641 -
   6.642 -    case GNTTAB_UNMAP_RW:
   6.643 -        if ( (a->status & GNTPIN_wmap_mask) == 0 )
   6.644 -            goto fail;
   6.645 -        a->status -= 1 << GNTPIN_wmap_shift;
   6.646 -        break;
   6.647 -
   6.648 -    default:
   6.649 -        BUG();
   6.650 -    }
   6.651 -
   6.652 -    spin_unlock(&t->lock);
   6.653 -    return 1;
   6.654  }
   6.655  
   6.656  int 
   6.657 @@ -529,21 +558,24 @@ grant_table_create(
   6.658      grant_table_t *t;
   6.659      int            i;
   6.660  
   6.661 -    if ( (t = xmalloc(sizeof(grant_table_t))) == NULL )
   6.662 +    if ( (t = xmalloc(sizeof(*t))) == NULL )
   6.663          goto no_mem;
   6.664  
   6.665      /* Simple stuff. */
   6.666 -    t->shared = NULL;
   6.667 -    t->active = NULL;
   6.668 +    memset(t, 0, sizeof(*t));
   6.669      spin_lock_init(&t->lock);
   6.670 -    for ( i = 0; i < GNT_MAPHASH_SZ; i++ )
   6.671 -        t->maphash[i] = GNT_MAPHASH_INVALID;
   6.672  
   6.673      /* Active grant-table page. */
   6.674      if ( (t->active = xmalloc(sizeof(active_grant_entry_t) * 
   6.675                                NR_GRANT_ENTRIES)) == NULL )
   6.676          goto no_mem;
   6.677  
   6.678 +    if ( (t->maptrack = (void *)alloc_xenheap_page()) == NULL )
   6.679 +        goto no_mem;
   6.680 +    memset(t->maptrack, 0, PAGE_SIZE);
   6.681 +    for ( i = 0; i < NR_MAPTRACK_ENTRIES; i++ )
   6.682 +        t->maptrack[i].ref_and_flags = (i+1) << MAPTRACK_REF_SHIFT;
   6.683 +
   6.684      /* Set up shared grant-table page. */
   6.685      if ( (t->shared = (void *)alloc_xenheap_page()) == NULL )
   6.686          goto no_mem;
   6.687 @@ -560,6 +592,8 @@ grant_table_create(
   6.688      {
   6.689          if ( t->active != NULL )
   6.690              xfree(t->active);
   6.691 +        if ( t->maptrack != NULL )
   6.692 +            free_xenheap_page((unsigned long)t->maptrack);
   6.693          xfree(t);
   6.694      }
   6.695      return -ENOMEM;
   6.696 @@ -576,6 +610,7 @@ grant_table_destroy(
   6.697          /* Free memory relating to this grant table. */
   6.698          d->grant_table = NULL;
   6.699          free_xenheap_page((unsigned long)t->shared);
   6.700 +        free_xenheap_page((unsigned long)t->maptrack);
   6.701          xfree(t->active);
   6.702          xfree(t);
   6.703      }
     7.1 --- a/xen/common/kernel.c	Thu Oct 07 15:25:52 2004 +0000
     7.2 +++ b/xen/common/kernel.c	Thu Oct 07 16:30:38 2004 +0000
     7.3 @@ -297,19 +297,9 @@ void cmain(multiboot_info_t *mbi)
     7.4      xmem_cache_init();
     7.5      xmem_cache_sizes_init(max_page);
     7.6  
     7.7 -    /*
     7.8 -     * Create a domain-structure allocator. The SLAB_NO_REAP flag is essential!
     7.9 -     * This is because in some situations a domain's reference count will be
    7.10 -     * incremented by someone with no other handle on the structure -- this is 
    7.11 -     * inherently racey because the struct could be freed by the time that the
    7.12 -     * count is incremented. By specifying 'no-reap' we ensure that, worst
    7.13 -     * case, they increment some other domain's count, rather than corrupting
    7.14 -     * a random field in a random structure!
    7.15 -     * See, for example, arch/x86/memory.c:get_page_from_l1e().
    7.16 -     */
    7.17      domain_struct_cachep = xmem_cache_create(
    7.18          "domain_cache", sizeof(struct domain),
    7.19 -        0, SLAB_HWCACHE_ALIGN | SLAB_NO_REAP, NULL, NULL);
    7.20 +        0, SLAB_HWCACHE_ALIGN, NULL, NULL);
    7.21      if ( domain_struct_cachep == NULL )
    7.22          panic("No slab cache for task structs.");
    7.23  
     8.1 --- a/xen/common/page_alloc.c	Thu Oct 07 15:25:52 2004 +0000
     8.2 +++ b/xen/common/page_alloc.c	Thu Oct 07 16:30:38 2004 +0000
     8.3 @@ -311,7 +311,7 @@ unsigned long alloc_xenheap_pages(int or
     8.4  
     8.5      for ( i = 0; i < (1 << order); i++ )
     8.6      {
     8.7 -        pg[i].count_info        = PGC_always_set;
     8.8 +        pg[i].count_info        = 0;
     8.9          pg[i].u.inuse.domain    = NULL;
    8.10          pg[i].u.inuse.type_info = 0;
    8.11      }
    8.12 @@ -384,7 +384,7 @@ struct pfn_info *alloc_domheap_pages(str
    8.13              }
    8.14          }
    8.15  
    8.16 -        pg[i].count_info        = PGC_always_set;
    8.17 +        pg[i].count_info        = 0;
    8.18          pg[i].u.inuse.domain    = NULL;
    8.19          pg[i].u.inuse.type_info = 0;
    8.20      }
     9.1 --- a/xen/include/asm-x86/mm.h	Thu Oct 07 15:25:52 2004 +0000
     9.2 +++ b/xen/include/asm-x86/mm.h	Thu Oct 07 16:30:38 2004 +0000
     9.3 @@ -70,33 +70,30 @@ struct pfn_info
     9.4  #define PGT_type_mask       (7<<29) /* Bits 29-31. */
     9.5   /* Has this page been validated for use as its current type? */
     9.6  #define _PGT_validated      28
     9.7 -#define PGT_validated       (1<<_PGT_validated)
     9.8 +#define PGT_validated       (1U<<_PGT_validated)
     9.9   /* Owning guest has pinned this page to its current type? */
    9.10  #define _PGT_pinned         27
    9.11 -#define PGT_pinned          (1<<_PGT_pinned)
    9.12 +#define PGT_pinned          (1U<<_PGT_pinned)
    9.13   /* The 10 most significant bits of virt address if this is a page table. */
    9.14  #define PGT_va_shift        17
    9.15 -#define PGT_va_mask         (((1<<10)-1)<<PGT_va_shift)
    9.16 +#define PGT_va_mask         (((1U<<10)-1)<<PGT_va_shift)
    9.17   /* Is the back pointer still mutable (i.e. not fixed yet)? */
    9.18 -#define PGT_va_mutable      (((1<<10)-1)<<PGT_va_shift)
    9.19 +#define PGT_va_mutable      (((1U<<10)-1)<<PGT_va_shift)
    9.20   /* Is the back pointer unknown (e.g., p.t. is mapped at multiple VAs)? */
    9.21 -#define PGT_va_unknown      (((1<<10)-2)<<PGT_va_shift)
    9.22 +#define PGT_va_unknown      (((1U<<10)-2)<<PGT_va_shift)
    9.23   /* 17-bit count of uses of this frame as its current type. */
    9.24 -#define PGT_count_mask      ((1<<17)-1)
    9.25 +#define PGT_count_mask      ((1U<<17)-1)
    9.26  
    9.27   /* Cleared when the owning guest 'frees' this page. */
    9.28 -#define _PGC_allocated                31
    9.29 -#define PGC_allocated                 (1<<_PGC_allocated)
    9.30 - /* This bit is always set, guaranteeing that the count word is never zero. */
    9.31 -#define _PGC_always_set               30
    9.32 -#define PGC_always_set                (1<<_PGC_always_set)
    9.33 - /* 30-bit count of references to this frame. */
    9.34 -#define PGC_count_mask                ((1<<30)-1)
    9.35 +#define _PGC_allocated      31
    9.36 +#define PGC_allocated       (1U<<_PGC_allocated)
    9.37 + /* 31-bit count of references to this frame. */
    9.38 +#define PGC_count_mask      ((1U<<31)-1)
    9.39  
    9.40  /* We trust the slab allocator in slab.c, and our use of it. */
    9.41 -#define PageSlab(page)		(1)
    9.42 -#define PageSetSlab(page)	((void)0)
    9.43 -#define PageClearSlab(page)	((void)0)
    9.44 +#define PageSlab(page)	    (1)
    9.45 +#define PageSetSlab(page)   ((void)0)
    9.46 +#define PageClearSlab(page) ((void)0)
    9.47  
    9.48  #define IS_XEN_HEAP_FRAME(_pfn) (page_to_phys(_pfn) < xenheap_phys_end)
    9.49  
    9.50 @@ -108,7 +105,7 @@ struct pfn_info
    9.51          wmb(); /* install valid domain ptr before updating refcnt. */       \
    9.52          spin_lock(&(_dom)->page_alloc_lock);                                \
    9.53          /* _dom holds an allocation reference */                            \
    9.54 -        ASSERT((_pfn)->count_info == PGC_always_set);                       \
    9.55 +        ASSERT((_pfn)->count_info == 0);                                    \
    9.56          (_pfn)->count_info |= PGC_allocated | 1;                            \
    9.57          if ( unlikely((_dom)->xenheap_pages++ == 0) )                       \
    9.58              get_knownalive_domain(_dom);                                    \
    10.1 --- a/xen/include/hypervisor-ifs/grant_table.h	Thu Oct 07 15:25:52 2004 +0000
    10.2 +++ b/xen/include/hypervisor-ifs/grant_table.h	Thu Oct 07 16:30:38 2004 +0000
    10.3 @@ -24,7 +24,10 @@
    10.4   * 
    10.5   * Introducing a valid entry into the grant table:
    10.6   *  1. Write ent->domid.
    10.7 - *  2. Write ent->frame (to zero if installing GTF_accept_transfer).
    10.8 + *  2. Write ent->frame:
    10.9 + *      GTF_permit_access:   Frame to which access is permitted.
   10.10 + *      GTF_accept_transfer: Pseudo-phys frame slot being filled by new
   10.11 + *                           frame, or zero if none.
   10.12   *  3. Write memory barrier (WMB).
   10.13   *  4. Write ent->flags, inc. valid type.
   10.14   * 
   10.15 @@ -49,10 +52,11 @@
   10.16   *  [*] If GTF_transfer_committed is set then the grant entry is 'committed'.
   10.17   *      The guest must /not/ modify the grant entry until the address of the
   10.18   *      transferred frame is written. It is safe for the guest to spin waiting
   10.19 - *      for this to occur (detect by observing non-zero value in ent->frame).
   10.20 + *      for this to occur (detect by observing GTF_transfer_completed in
   10.21 + *      ent->flags).
   10.22   *
   10.23   * Invalidating a committed GTF_accept_transfer entry:
   10.24 - *  1. Wait for ent->frame != 0.
   10.25 + *  1. Wait for (ent->flags & GTF_transfer_completed).
   10.26   *
   10.27   * Changing a GTF_permit_access from writable to read-only:
   10.28   *  Use SMP-safe CMPXCHG to set GTF_readonly, while checking !GTF_writing.
   10.29 @@ -86,10 +90,10 @@ typedef struct {
   10.30   *  GTF_accept_transfer: Allow @domid to transfer ownership of one page frame
   10.31   *                       to this guest. Xen writes the page number to @frame.
   10.32   */
   10.33 -#define GTF_invalid         (0<<0)
   10.34 -#define GTF_permit_access   (1<<0)
   10.35 -#define GTF_accept_transfer (2<<0)
   10.36 -#define GTF_type_mask       (3<<0)
   10.37 +#define GTF_invalid         (0U<<0)
   10.38 +#define GTF_permit_access   (1U<<0)
   10.39 +#define GTF_accept_transfer (2U<<0)
   10.40 +#define GTF_type_mask       (3U<<0)
   10.41  
   10.42  /*
   10.43   * Subflags for GTF_permit_access.
   10.44 @@ -98,23 +102,26 @@ typedef struct {
   10.45   *  GTF_writing: Grant entry is currently mapped for writing by @domid. [XEN]
   10.46   */
   10.47  #define _GTF_readonly       (2)
   10.48 -#define GTF_readonly        (1<<_GTF_readonly)
   10.49 +#define GTF_readonly        (1U<<_GTF_readonly)
   10.50  #define _GTF_reading        (3)
   10.51 -#define GTF_reading         (1<<_GTF_reading)
   10.52 +#define GTF_reading         (1U<<_GTF_reading)
   10.53  #define _GTF_writing        (4)
   10.54 -#define GTF_writing         (1<<_GTF_writing)
   10.55 +#define GTF_writing         (1U<<_GTF_writing)
   10.56  
   10.57  /*
   10.58   * Subflags for GTF_accept_transfer:
   10.59   *  GTF_transfer_committed: Xen sets this flag to indicate that it is committed
   10.60   *      to transferring ownership of a page frame. When a guest sees this flag
   10.61 - *      it must /not/ modify the grant entry until the address of the
   10.62 - *      transferred frame is written into the entry.
   10.63 - *      NB. It is safe for the guest to spin-wait on the frame address:
   10.64 - *          Xen will always write the frame address in a timely manner.
   10.65 + *      it must /not/ modify the grant entry until GTF_transfer_completed is
   10.66 + *      set by Xen.
   10.67 + *  GTF_transfer_completed: It is safe for the guest to spin-wait on this flag
   10.68 + *      after reading GTF_transfer_committed. Xen will always write the frame
   10.69 + *      address, followed by ORing this flag, in a timely manner.
   10.70   */
   10.71  #define _GTF_transfer_committed (2)
   10.72 -#define GTF_transfer_committed  (1<<_GTF_transfer_committed)
   10.73 +#define GTF_transfer_committed  (1U<<_GTF_transfer_committed)
   10.74 +#define _GTF_transfer_completed (3)
   10.75 +#define GTF_transfer_completed  (1U<<_GTF_transfer_completed)
   10.76  
   10.77  
   10.78  /***********************************
   10.79 @@ -127,28 +134,56 @@ typedef struct {
   10.80  typedef u16 grant_ref_t;
   10.81  
   10.82  /*
   10.83 - * GNTTABOP_update_pin_status: Change the pin status of of <dom>'s grant entry
   10.84 - * with reference <ref>.
   10.85 + * GNTTABOP_map_grant_ref: Map the grant entry (<dom>,<ref>) for access
   10.86 + * by devices and/or host CPUs. If successful, <handle> is a tracking number
   10.87 + * that must be presented later to destroy the mapping(s). On error, <handle>
   10.88 + * is a negative status code.
   10.89   * NOTES:
   10.90 - *  1. If GNTPIN_dev_accessible is specified then <dev_bus_addr> is the address
   10.91 + *  1. If GNTPIN_map_for_dev is specified then <dev_bus_addr> is the address
   10.92   *     via which I/O devices may access the granted frame.
   10.93 - *  2. If GNTPIN_host_accessible is specified then <host_phys_addr> is the
   10.94 - *     physical address of the frame, which may be mapped into the caller's
   10.95 - *     page tables.
   10.96 + *  2. If GNTPIN_map_for_host is specified then a mapping will be added at
   10.97 + *     virtual address <host_virt_addr> in the current address space.
   10.98 + *  3. Mappings should only be destroyed via GNTTABOP_unmap_grant_ref. If a
   10.99 + *     host mapping is destroyed by other means then it is *NOT* guaranteed
  10.100 + *     to be accounted to the correct grant reference!
  10.101   */
  10.102 -#define GNTTABOP_update_pin_status    0
  10.103 +#define GNTTABOP_map_grant_ref        0
  10.104  typedef struct {
  10.105      /* IN parameters. */
  10.106 -    domid_t     dom;                  /*  0 */
  10.107 -    grant_ref_t ref;                  /*  2 */
  10.108 -    u16         pin_flags;            /*  4 */
  10.109 -    u16         __pad;                /*  6 */
  10.110 +    memory_t    host_virt_addr;       /*  0 */
  10.111 +    MEMORY_PADDING;
  10.112 +    domid_t     dom;                  /*  8 */
  10.113 +    grant_ref_t ref;                  /* 10 */
  10.114 +    u16         flags;                /* 12: GNTMAP_* */
  10.115      /* OUT parameters. */
  10.116 +    s16         handle;               /* 14: +ve: handle; -ve: GNTST_* */
  10.117 +    memory_t    dev_bus_addr;         /* 16 */
  10.118 +    MEMORY_PADDING;
  10.119 +} PACKED gnttab_map_grant_ref_t; /* 24 bytes */
  10.120 +
  10.121 +/*
  10.122 + * GNTTABOP_unmap_grant_ref: Destroy one or more grant-reference mappings
  10.123 + * tracked by <handle>. If <host_virt_addr> or <dev_bus_addr> is zero, that
  10.124 + * field is ignored. If non-zero, they must refer to a device/host mapping
  10.125 + * that is tracked by <handle>
  10.126 + * NOTES:
  10.127 + *  1. The call may fail in an undefined manner if either mapping is not
  10.128 + *     tracked by <handle>.
  10.129 + *  3. After executing a batch of unmaps, it is guaranteed that no stale
  10.130 + *     mappings will remain in the device or host TLBs.
  10.131 + */
  10.132 +#define GNTTABOP_unmap_grant_ref      1
  10.133 +typedef struct {
  10.134 +    /* IN parameters. */
  10.135 +    memory_t    host_virt_addr;       /*  0 */
  10.136 +    MEMORY_PADDING;
  10.137      memory_t    dev_bus_addr;         /*  8 */
  10.138      MEMORY_PADDING;
  10.139 -    memory_t    host_phys_addr;       /* 12 */
  10.140 -    MEMORY_PADDING;
  10.141 -} PACKED gnttab_update_pin_status_t; /* 16 bytes */
  10.142 +    u16         handle;               /* 16 */
  10.143 +    /* OUT parameters. */
  10.144 +    s16         status;               /* 18: GNTST_* */
  10.145 +    u32         __pad;
  10.146 +} PACKED gnttab_unmap_grant_ref_t; /* 24 bytes */
  10.147  
  10.148  /*
  10.149   * GNTTABOP_setup_table: Set up a grant table for <dom> comprising at least
  10.150 @@ -159,38 +194,58 @@ typedef struct {
  10.151   *  2. Only a sufficiently-privileged domain may specify <dom> != DOMID_SELF.
  10.152   *  3. Xen may not support more than a single grant-table page per domain.
  10.153   */
  10.154 -#define GNTTABOP_setup_table          1
  10.155 +#define GNTTABOP_setup_table          2
  10.156  typedef struct {
  10.157      /* IN parameters. */
  10.158      domid_t     dom;                  /*  0 */
  10.159      u16         nr_frames;            /*  2 */
  10.160 -    u32         __pad;
  10.161 +    u16         __pad;
  10.162      /* OUT parameters. */
  10.163 +    s16         status;               /*  6: GNTST_* */
  10.164      unsigned long *frame_list;        /*  8 */
  10.165      MEMORY_PADDING;
  10.166  } PACKED gnttab_setup_table_t; /* 16 bytes */
  10.167  
  10.168 -typedef struct {
  10.169 -    u32 cmd; /* GNTTABOP_* */         /*  0 */
  10.170 -    u32 __reserved;                   /*  4 */
  10.171 -    union {                           /*  8 */
  10.172 -        gnttab_update_pin_status_t update_pin_status;
  10.173 -        gnttab_setup_table_t       setup_table;
  10.174 -        u8                         __dummy[16];
  10.175 -    } PACKED u;
  10.176 -} PACKED gnttab_op_t; /* 24 bytes */
  10.177 +/*
  10.178 + * Bitfield values for update_pin_status.flags.
  10.179 + */
  10.180 + /* Map the grant entry for access by I/O devices. */
  10.181 +#define _GNTMAP_device_map      (0)
  10.182 +#define GNTMAP_device_map       (1<<_GNTMAP_device_map)
  10.183 + /* Map the grant entry for access by host CPUs. */
  10.184 +#define _GNTMAP_host_map        (1)
  10.185 +#define GNTMAP_host_map         (1<<_GNTMAP_host_map)
  10.186 + /* Accesses to the granted frame will be restricted to read-only access. */
  10.187 +#define _GNTMAP_readonly        (2)
  10.188 +#define GNTMAP_readonly         (1<<_GNTMAP_readonly)
  10.189 + /*
  10.190 +  * GNTMAP_host_map subflag:
  10.191 +  *  0 => The host mapping is usable only by the guest OS.
  10.192 +  *  1 => The host mapping is usable by guest OS + current application.
  10.193 +  */
  10.194 +#define _GNTMAP_application_map (3)
  10.195 +#define GNTMAP_application_map  (1<<_GNTMAP_application_map)
  10.196  
  10.197  /*
  10.198 - * Bitfield values for <pin_flags>.
  10.199 + * Values for error status returns. All errors are -ve.
  10.200   */
  10.201 - /* Pin the grant entry for access by I/O devices. */
  10.202 -#define _GNTPIN_dev_accessible  (0)
  10.203 -#define GNTPIN_dev_accessible   (1<<_GNTPIN_dev_accessible)
  10.204 - /* Pin the grant entry for access by host CPUs. */
  10.205 -#define _GNTPIN_host_accessible (1)
  10.206 -#define GNTPIN_host_accessible  (1<<_GNTPIN_host_accessible)
  10.207 - /* Accesses to the granted frame will be restricted to read-only access. */
  10.208 -#define _GNTPIN_readonly        (2)
  10.209 -#define GNTPIN_readonly         (1<<_GNTPIN_readonly)
  10.210 +#define GNTST_okay             (0)
  10.211 +#define GNTST_general_error    (-1) /* General undefined error.              */
  10.212 +#define GNTST_bad_domain       (-2) /* Unrecognsed domain id.                */
  10.213 +#define GNTST_bad_gntref       (-3) /* Unrecognised or inappropriate gntref. */
  10.214 +#define GNTST_bad_handle       (-3) /* Unrecognised or inappropriate handle. */
  10.215 +#define GNTST_no_device_space  (-4) /* Out of space in I/O MMU.              */
  10.216 +#define GNTST_permission_denied (-5) /* Not enough privilege for operation.  */
  10.217 +
  10.218 +#define GNTTABOP_error_msgs {                   \
  10.219 +    "okay",                                     \
  10.220 +    "undefined error",                          \
  10.221 +    "unrecognised domain id",                   \
  10.222 +    "invalid grant reference",                  \
  10.223 +    "invalid mapping handle",                   \
  10.224 +    "no spare translation slot in the I/O MMU", \
  10.225 +    "permission denied"                         \
  10.226 +}
  10.227 +        
  10.228  
  10.229  #endif /* __HYPERVISOR_IFS_GRANT_TABLE_H__ */
    11.1 --- a/xen/include/xen/grant_table.h	Thu Oct 07 15:25:52 2004 +0000
    11.2 +++ b/xen/include/xen/grant_table.h	Thu Oct 07 16:30:38 2004 +0000
    11.3 @@ -30,40 +30,53 @@
    11.4  
    11.5  /* Active grant entry - used for shadowing GTF_permit_access grants. */
    11.6  typedef struct {
    11.7 -    u32           status; /* Reference count information.  */
    11.8 -    u32           tlbflush_timestamp; /* Flush avoidance.  */
    11.9 -    u16           next;   /* Mapping hash chain.           */
   11.10 +    u32           pin;    /* Reference count information.  */
   11.11      domid_t       domid;  /* Domain being granted access.  */
   11.12      unsigned long frame;  /* Frame being granted.          */
   11.13  } active_grant_entry_t;
   11.14  
   11.15 -/*
   11.16 - * Bitfields in active_grant_entry_t:counts.
   11.17 - * NB. Some other GNTPIN_xxx definitions are in hypervisor-ifs/grant_table.h.
   11.18 - */
   11.19   /* Count of writable host-CPU mappings. */
   11.20 -#define GNTPIN_wmap_shift    (4)
   11.21 -#define GNTPIN_wmap_mask     (0x3FFFU << GNTPIN_wmap_shift)
   11.22 +#define GNTPIN_hstw_shift    (0)
   11.23 +#define GNTPIN_hstw_inc      (1 << GNTPIN_hstw_shift)
   11.24 +#define GNTPIN_hstw_mask     (0xFFU << GNTPIN_hstw_shift)
   11.25   /* Count of read-only host-CPU mappings. */
   11.26 -#define GNTPIN_rmap_shift    (18)
   11.27 -#define GNTPIN_rmap_mask     (0x3FFFU << GNTPIN_rmap_shift)
   11.28 -
   11.29 -#define GNT_MAPHASH_SZ       (256)
   11.30 -#define GNT_MAPHASH(_k)      ((_k) & (GNT_MAPHASH_SZ-1))
   11.31 -#define GNT_MAPHASH_INVALID  (0xFFFFU)
   11.32 +#define GNTPIN_hstr_shift    (8)
   11.33 +#define GNTPIN_hstr_inc      (1 << GNTPIN_hstr_shift)
   11.34 +#define GNTPIN_hstr_mask     (0xFFU << GNTPIN_hstr_shift)
   11.35 + /* Count of writable device-bus mappings. */
   11.36 +#define GNTPIN_devw_shift    (16)
   11.37 +#define GNTPIN_devw_inc      (1 << GNTPIN_devw_shift)
   11.38 +#define GNTPIN_devw_mask     (0xFFU << GNTPIN_devw_shift)
   11.39 + /* Count of read-only device-bus mappings. */
   11.40 +#define GNTPIN_devr_shift    (24)
   11.41 +#define GNTPIN_devr_inc      (1 << GNTPIN_devr_shift)
   11.42 +#define GNTPIN_devr_mask     (0xFFU << GNTPIN_devr_shift)
   11.43  
   11.44  #define NR_GRANT_ENTRIES     (PAGE_SIZE / sizeof(grant_entry_t))
   11.45  
   11.46 +/*
   11.47 + * Tracks a mapping of another domain's grant reference. Each domain has a
   11.48 + * table of these, indexes into which are returned as a 'mapping handle'.
   11.49 + */
   11.50 +typedef struct {
   11.51 +    u16      ref_and_flags; /* 0-2: GNTMAP_* ; 3-15: grant ref */
   11.52 +    domid_t  domid;         /* granting domain */
   11.53 +} grant_mapping_t;
   11.54 +#define MAPTRACK_GNTMAP_MASK 7
   11.55 +#define MAPTRACK_REF_SHIFT   3
   11.56 +#define NR_MAPTRACK_ENTRIES  (PAGE_SIZE / sizeof(grant_mapping_t))
   11.57 +
   11.58  /* Per-domain grant information. */
   11.59  typedef struct {
   11.60      /* Shared grant table (see include/hypervisor-ifs/grant_table.h). */
   11.61      grant_entry_t        *shared;
   11.62      /* Active grant table. */
   11.63      active_grant_entry_t *active;
   11.64 -    /* Lock protecting updates to maphash and shared grant table. */
   11.65 +    /* Mapping tracking table. */
   11.66 +    grant_mapping_t      *maptrack;
   11.67 +    unsigned int          maptrack_head;
   11.68 +    /* Lock protecting updates to active and shared grant tables. */
   11.69      spinlock_t            lock;
   11.70 -    /* Hash table: frame -> active grant entry. */
   11.71 -    u16                   maphash[GNT_MAPHASH_SZ];
   11.72  } grant_table_t;
   11.73  
   11.74  /* Start-of-day system initialisation. */
   11.75 @@ -76,13 +89,9 @@ int grant_table_create(
   11.76  void grant_table_destroy(
   11.77      struct domain *d);
   11.78  
   11.79 -/* Create/destroy host-CPU mappings via a grant-table entry. */
   11.80 -#define GNTTAB_MAP_RO   0
   11.81 -#define GNTTAB_MAP_RW   1
   11.82 -#define GNTTAB_UNMAP_RO 2
   11.83 -#define GNTTAB_UNMAP_RW 3
   11.84 -int gnttab_try_map(
   11.85 -    struct domain *rd, struct domain *ld, unsigned long frame, int op);
   11.86 +/* Destroy host-CPU mappings via a grant-table entry. */
   11.87 +int gnttab_check_unmap(
   11.88 +    struct domain *rd, struct domain *ld, unsigned long frame, int readonly);
   11.89  
   11.90  /*
   11.91   * Check that the given grant reference (rd,ref) allows 'ld' to transfer