ia64/xen-unstable

changeset 274:207ec86133bb

bitkeeper revision 1.120 (3e6ca44cvMxdBQkw-MjBh0_JyTebvw)

hypervisor.c:
Better synchronisation in page-table update code -- removed locking and replaced with cmpxchg (CAS)
author kaf24@labyrinth.cl.cam.ac.uk
date Mon Mar 10 14:42:20 2003 +0000 (2003-03-10)
parents 666728c2dad4
children 99f5d61af2be
files xenolinux-2.4.21-pre4-sparse/arch/xeno/mm/hypervisor.c
line diff
     1.1 --- a/xenolinux-2.4.21-pre4-sparse/arch/xeno/mm/hypervisor.c	Mon Mar 10 13:44:34 2003 +0000
     1.2 +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/mm/hypervisor.c	Mon Mar 10 14:42:20 2003 +0000
     1.3 @@ -13,17 +13,26 @@
     1.4  #include <asm/pgtable.h>
     1.5  
     1.6  /*
     1.7 - * This suffices to protect us if we ever move to SMP domains.
     1.8 - * Further, it protects us against interrupts. At the very least, this is
     1.9 - * required for the network driver which flushes the update queue before
    1.10 - * pushing new receive buffers.
    1.11 + * A note on atomicity of these operations. We assume that queue_xxx
    1.12 + * operations never occur in an asynchronous (eg. interrupt) context.
    1.13 + * Therefore they do not need to be synchronised w.r.t. each other.
    1.14 + * However, flush_update_queue may be called from an interrupt context
    1.15 + * (eg. this is done in the network driver).
    1.16 + * 
    1.17 + * We use lock-free techniques to synchronise on the queue index. If a
    1.18 + * queue_xxx operation finds this index changes while it runs, it will
    1.19 + * fail and retry.
    1.20 + * 
    1.21 + * Flush operations must synchronize with themselves. They do this by
    1.22 + * atomically updating the index to zero on entry. This effectively locks
    1.23 + * out any other asynchronous calls to a flush operation.
    1.24 + * 
    1.25 + * Debug routines synchronise by disabling interrupts. It's easier that way.
    1.26   */
    1.27 -static spinlock_t update_lock = SPIN_LOCK_UNLOCKED;
    1.28  
    1.29  #define QUEUE_SIZE 2048
    1.30  static page_update_request_t update_queue[QUEUE_SIZE];
    1.31 -unsigned int pt_update_queue_idx = 0;
    1.32 -#define idx pt_update_queue_idx
    1.33 +volatile unsigned int pt_update_queue_idx = 0;
    1.34  
    1.35  #if PT_UPDATE_DEBUG > 0
    1.36  page_update_debug_t update_debug_queue[QUEUE_SIZE] = {{0}};
    1.37 @@ -33,7 +42,11 @@ static void DEBUG_allow_pt_reads(void)
    1.38  {
    1.39      pte_t *pte;
    1.40      page_update_request_t update;
    1.41 +    unsigned int idx;
    1.42 +    unsigned long flags;
    1.43      int i;
    1.44 +    local_irq_save(flags);
    1.45 +    idx = pt_update_queue_idx;
    1.46      for ( i = idx-1; i >= 0; i-- )
    1.47      {
    1.48          pte = update_debug_queue[i].ptep;
    1.49 @@ -43,13 +56,17 @@ static void DEBUG_allow_pt_reads(void)
    1.50          update.val = update_debug_queue[i].pteval;
    1.51          HYPERVISOR_pt_update(&update, 1);
    1.52      }
    1.53 +    local_irq_restore(flags);
    1.54  }
    1.55  static void DEBUG_disallow_pt_read(unsigned long pa)
    1.56  {
    1.57      pte_t *pte;
    1.58      pmd_t *pmd;
    1.59      pgd_t *pgd;
    1.60 -    unsigned long pteval;
    1.61 +    unsigned long pteval, flags;
    1.62 +    unsigned int idx;
    1.63 +    local_irq_save(flags);
    1.64 +    idx = pt_update_queue_idx;
    1.65      /*
    1.66       * We may fault because of an already outstanding update.
    1.67       * That's okay -- it'll get fixed up in the fault handler.
    1.68 @@ -65,6 +82,7 @@ static void DEBUG_disallow_pt_read(unsig
    1.69      HYPERVISOR_pt_update(&update, 1);
    1.70      update_debug_queue[idx].ptep = pte;
    1.71      update_debug_queue[idx].pteval = pteval;
    1.72 +    local_irq_restore(flags);
    1.73  }
    1.74  #endif
    1.75  
    1.76 @@ -87,9 +105,8 @@ unsigned long pt_baseptr;
    1.77  
    1.78  void _flush_page_update_queue(void)
    1.79  {
    1.80 -    unsigned long flags;
    1.81 -    spin_lock_irqsave(&update_lock, flags);
    1.82 -    if ( idx == 0 ) goto out;
    1.83 +    unsigned int idx = xchg(&pt_update_queue_idx, 0);
    1.84 +    if ( idx == 0 ) return;
    1.85  #if PT_UPDATE_DEBUG > 1
    1.86      printk("Flushing %d entries from pt update queue\n", idx);
    1.87  #endif
    1.88 @@ -97,111 +114,112 @@ void _flush_page_update_queue(void)
    1.89      DEBUG_allow_pt_reads();
    1.90  #endif
    1.91      HYPERVISOR_pt_update(update_queue, idx);
    1.92 -    idx = 0;
    1.93 - out:
    1.94 -    spin_unlock_irqrestore(&update_lock, flags);
    1.95 -}
    1.96 -
    1.97 -static void increment_index(void)
    1.98 -{
    1.99 -    if ( ++idx == QUEUE_SIZE ) _flush_page_update_queue();
   1.100  }
   1.101  
   1.102  void queue_l1_entry_update(unsigned long ptr, unsigned long val)
   1.103  {
   1.104 -    unsigned long flags;
   1.105 -    spin_lock_irqsave(&update_lock, flags);
   1.106 +    unsigned int idx;
   1.107  #if PT_UPDATE_DEBUG > 0
   1.108      DEBUG_disallow_pt_read(ptr);
   1.109  #endif
   1.110 -    update_queue[idx].ptr = phys_to_machine(ptr);
   1.111 -    update_queue[idx].val = val;
   1.112 -    increment_index();
   1.113 -    spin_unlock_irqrestore(&update_lock, flags);
   1.114 +    do {
   1.115 +        idx = pt_update_queue_idx;
   1.116 +        update_queue[idx].ptr = phys_to_machine(ptr);
   1.117 +        update_queue[idx].val = val;
   1.118 +    } while ( cmpxchg(&pt_update_queue_idx, idx, idx+1) != idx );    
   1.119 +    if ( idx == (QUEUE_SIZE-1) ) _flush_page_update_queue();
   1.120  }
   1.121  
   1.122  void queue_l2_entry_update(unsigned long ptr, unsigned long val)
   1.123  {
   1.124 -    unsigned long flags;
   1.125 -    spin_lock_irqsave(&update_lock, flags);
   1.126 -    update_queue[idx].ptr = phys_to_machine(ptr);
   1.127 -    update_queue[idx].val = val;
   1.128 -    increment_index();
   1.129 -    spin_unlock_irqrestore(&update_lock, flags);
   1.130 +    unsigned int idx;
   1.131 +    do {
   1.132 +        idx = pt_update_queue_idx;
   1.133 +        update_queue[idx].ptr = phys_to_machine(ptr);
   1.134 +        update_queue[idx].val = val;
   1.135 +    } while ( cmpxchg(&pt_update_queue_idx, idx, idx+1) != idx );
   1.136 +    if ( idx == (QUEUE_SIZE-1) ) _flush_page_update_queue();
   1.137  }
   1.138  
   1.139  void queue_pt_switch(unsigned long ptr)
   1.140  {
   1.141 -    unsigned long flags;
   1.142 -    spin_lock_irqsave(&update_lock, flags);
   1.143 -    update_queue[idx].ptr  = phys_to_machine(ptr);
   1.144 -    update_queue[idx].ptr |= PGREQ_EXTENDED_COMMAND;
   1.145 -    update_queue[idx].val  = PGEXT_NEW_BASEPTR;
   1.146 -    increment_index();
   1.147 -    spin_unlock_irqrestore(&update_lock, flags);
   1.148 +    unsigned int idx;
   1.149 +    do {
   1.150 +        idx = pt_update_queue_idx;
   1.151 +        update_queue[idx].ptr  = phys_to_machine(ptr);
   1.152 +        update_queue[idx].ptr |= PGREQ_EXTENDED_COMMAND;
   1.153 +        update_queue[idx].val  = PGEXT_NEW_BASEPTR;
   1.154 +    } while ( cmpxchg(&pt_update_queue_idx, idx, idx+1) != idx );
   1.155 +    if ( idx == (QUEUE_SIZE-1) ) _flush_page_update_queue();
   1.156  }
   1.157  
   1.158  void queue_tlb_flush(void)
   1.159  {
   1.160 -    unsigned long flags;
   1.161 -    spin_lock_irqsave(&update_lock, flags);
   1.162 -    update_queue[idx].ptr  = PGREQ_EXTENDED_COMMAND;
   1.163 -    update_queue[idx].val  = PGEXT_TLB_FLUSH;
   1.164 -    increment_index();
   1.165 -    spin_unlock_irqrestore(&update_lock, flags);
   1.166 +    unsigned int idx;
   1.167 +    do {
   1.168 +        idx = pt_update_queue_idx;
   1.169 +        update_queue[idx].ptr  = PGREQ_EXTENDED_COMMAND;
   1.170 +        update_queue[idx].val  = PGEXT_TLB_FLUSH;
   1.171 +    } while ( cmpxchg(&pt_update_queue_idx, idx, idx+1) != idx );
   1.172 +    if ( idx == (QUEUE_SIZE-1) ) _flush_page_update_queue();
   1.173  }
   1.174  
   1.175  void queue_invlpg(unsigned long ptr)
   1.176  {
   1.177 -    unsigned long flags;
   1.178 -    spin_lock_irqsave(&update_lock, flags);
   1.179 -    update_queue[idx].ptr  = PGREQ_EXTENDED_COMMAND;
   1.180 -    update_queue[idx].val  = ptr & PAGE_MASK;
   1.181 -    update_queue[idx].val |= PGEXT_INVLPG;
   1.182 -    increment_index();
   1.183 -    spin_unlock_irqrestore(&update_lock, flags);
   1.184 +    unsigned int idx;
   1.185 +    do {
   1.186 +        idx = pt_update_queue_idx;
   1.187 +        update_queue[idx].ptr  = PGREQ_EXTENDED_COMMAND;
   1.188 +        update_queue[idx].val  = ptr & PAGE_MASK;
   1.189 +        update_queue[idx].val |= PGEXT_INVLPG;
   1.190 +    } while ( cmpxchg(&pt_update_queue_idx, idx, idx+1) != idx );
   1.191 +    if ( idx == (QUEUE_SIZE-1) ) _flush_page_update_queue();
   1.192  }
   1.193  
   1.194  void queue_pgd_pin(unsigned long ptr)
   1.195  {
   1.196 -    unsigned long flags;
   1.197 -    spin_lock_irqsave(&update_lock, flags);
   1.198 -    update_queue[idx].ptr  = phys_to_machine(ptr);
   1.199 -    update_queue[idx].ptr |= PGREQ_EXTENDED_COMMAND;
   1.200 -    update_queue[idx].val  = PGEXT_PIN_L2_TABLE;
   1.201 -    increment_index();
   1.202 -    spin_unlock_irqrestore(&update_lock, flags);
   1.203 +    unsigned int idx;
   1.204 +    do {
   1.205 +        idx = pt_update_queue_idx;
   1.206 +        update_queue[idx].ptr  = phys_to_machine(ptr);
   1.207 +        update_queue[idx].ptr |= PGREQ_EXTENDED_COMMAND;
   1.208 +        update_queue[idx].val  = PGEXT_PIN_L2_TABLE;
   1.209 +    } while ( cmpxchg(&pt_update_queue_idx, idx, idx+1) != idx );
   1.210 +    if ( idx == (QUEUE_SIZE-1) ) _flush_page_update_queue();
   1.211  }
   1.212  
   1.213  void queue_pgd_unpin(unsigned long ptr)
   1.214  {
   1.215 -    unsigned long flags;
   1.216 -    spin_lock_irqsave(&update_lock, flags);
   1.217 -    update_queue[idx].ptr  = phys_to_machine(ptr);
   1.218 -    update_queue[idx].ptr |= PGREQ_EXTENDED_COMMAND;
   1.219 -    update_queue[idx].val  = PGEXT_UNPIN_TABLE;
   1.220 -    increment_index();
   1.221 -    spin_unlock_irqrestore(&update_lock, flags);
   1.222 +    unsigned int idx;
   1.223 +    do {
   1.224 +        idx = pt_update_queue_idx;
   1.225 +        update_queue[idx].ptr  = phys_to_machine(ptr);
   1.226 +        update_queue[idx].ptr |= PGREQ_EXTENDED_COMMAND;
   1.227 +        update_queue[idx].val  = PGEXT_UNPIN_TABLE;
   1.228 +    } while ( cmpxchg(&pt_update_queue_idx, idx, idx+1) != idx );
   1.229 +    if ( idx == (QUEUE_SIZE-1) ) _flush_page_update_queue();
   1.230  }
   1.231  
   1.232  void queue_pte_pin(unsigned long ptr)
   1.233  {
   1.234 -    unsigned long flags;
   1.235 -    spin_lock_irqsave(&update_lock, flags);
   1.236 -    update_queue[idx].ptr  = phys_to_machine(ptr);
   1.237 -    update_queue[idx].ptr |= PGREQ_EXTENDED_COMMAND;
   1.238 -    update_queue[idx].val  = PGEXT_PIN_L1_TABLE;
   1.239 -    increment_index();
   1.240 -    spin_unlock_irqrestore(&update_lock, flags);
   1.241 +    unsigned int idx;
   1.242 +    do {
   1.243 +        idx = pt_update_queue_idx;
   1.244 +        update_queue[idx].ptr  = phys_to_machine(ptr);
   1.245 +        update_queue[idx].ptr |= PGREQ_EXTENDED_COMMAND;
   1.246 +        update_queue[idx].val  = PGEXT_PIN_L1_TABLE;
   1.247 +    } while ( cmpxchg(&pt_update_queue_idx, idx, idx+1) != idx );
   1.248 +    if ( idx == (QUEUE_SIZE-1) ) _flush_page_update_queue();
   1.249  }
   1.250  
   1.251  void queue_pte_unpin(unsigned long ptr)
   1.252  {
   1.253 -    unsigned long flags;
   1.254 -    spin_lock_irqsave(&update_lock, flags);
   1.255 -    update_queue[idx].ptr  = phys_to_machine(ptr);
   1.256 -    update_queue[idx].ptr |= PGREQ_EXTENDED_COMMAND;
   1.257 -    update_queue[idx].val  = PGEXT_UNPIN_TABLE;
   1.258 -    increment_index();
   1.259 -    spin_unlock_irqrestore(&update_lock, flags);
   1.260 +    unsigned int idx;
   1.261 +    do {
   1.262 +        idx = pt_update_queue_idx;
   1.263 +        update_queue[idx].ptr  = phys_to_machine(ptr);
   1.264 +        update_queue[idx].ptr |= PGREQ_EXTENDED_COMMAND;
   1.265 +        update_queue[idx].val  = PGEXT_UNPIN_TABLE;
   1.266 +    } while ( cmpxchg(&pt_update_queue_idx, idx, idx+1) != idx );
   1.267 +    if ( idx == (QUEUE_SIZE-1) ) _flush_page_update_queue();
   1.268  }