direct-io.hg

changeset 804:a55c876d6d2e

bitkeeper revision 1.493 (3f84098eCbgC6OdGchmKMJlm84nGJw)

memory.c, hypervisor.h, hypervisor-if.h, domain.c, entry.S:
Faster page fault handling --- fast path in Xen if mapping to be updated is part of current address space.
author kaf24@scramble.cl.cam.ac.uk
date Wed Oct 08 12:56:46 2003 +0000 (2003-10-08)
parents 54d82b047eb4
children fb248d1df870
files xen/arch/i386/entry.S xen/common/domain.c xen/common/memory.c xen/include/hypervisor-ifs/hypervisor-if.h xenolinux-2.4.22-sparse/include/asm-xeno/hypervisor.h xenolinux-2.4.22-sparse/mm/memory.c
line diff
     1.1 --- a/xen/arch/i386/entry.S	Tue Oct 07 21:52:00 2003 +0000
     1.2 +++ b/xen/arch/i386/entry.S	Wed Oct 08 12:56:46 2003 +0000
     1.3 @@ -647,6 +647,7 @@ ENTRY(hypervisor_call_table)
     1.4          .long SYMBOL_NAME(do_dom_mem_op)
     1.5          .long SYMBOL_NAME(do_multicall)
     1.6          .long SYMBOL_NAME(do_kbd_op)
     1.7 +        .long SYMBOL_NAME(do_update_va_mapping)
     1.8          .rept NR_syscalls-((.-hypervisor_call_table)/4)
     1.9          .long SYMBOL_NAME(sys_ni_syscall)
    1.10  	.endr
     2.1 --- a/xen/common/domain.c	Tue Oct 07 21:52:00 2003 +0000
     2.2 +++ b/xen/common/domain.c	Wed Oct 08 12:56:46 2003 +0000
     2.3 @@ -163,6 +163,8 @@ void __kill_domain(struct task_struct *p
     2.4  
     2.5  void kill_domain(void)
     2.6  {
     2.7 +    /* May have been in middle of a p.t. update with WP bit cleared. */
     2.8 +    write_cr0(read_cr0()|X86_CR0_WP);
     2.9      __kill_domain(current);
    2.10  }
    2.11  
     3.1 --- a/xen/common/memory.c	Tue Oct 07 21:52:00 2003 +0000
     3.2 +++ b/xen/common/memory.c	Wed Oct 08 12:56:46 2003 +0000
     3.3 @@ -550,9 +550,7 @@ static int mod_l2_entry(l2_pgentry_t *p_
     3.4                 l2_pgentry_val(new_l2_entry)) & 0xfffff001) != 0 )
     3.5          {
     3.6              if ( (l2_pgentry_val(old_l2_entry) & _PAGE_PRESENT) ) 
     3.7 -            {
     3.8                  put_l1_table(l2_pgentry_to_pagenr(old_l2_entry));
     3.9 -            }
    3.10              
    3.11              /* Assume we're mapping an L1 table, falling back to twisted L2. */
    3.12              if ( unlikely(get_l1_table(l2_pgentry_to_pagenr(new_l2_entry))) )
    3.13 @@ -601,15 +599,12 @@ static int mod_l1_entry(l1_pgentry_t *p_
    3.14                 l1_pgentry_val(new_l1_entry)) & 0xfffff003) != 0 )
    3.15          {
    3.16              if ( (l1_pgentry_val(old_l1_entry) & _PAGE_PRESENT) ) 
    3.17 -            {
    3.18                  put_page(l1_pgentry_to_pagenr(old_l1_entry),
    3.19                           l1_pgentry_val(old_l1_entry) & _PAGE_RW);
    3.20 -            }
    3.21 -            
    3.22 +
    3.23              if ( get_page(l1_pgentry_to_pagenr(new_l1_entry),
    3.24 -                          l1_pgentry_val(new_l1_entry) & _PAGE_RW) ){
    3.25 +                          l1_pgentry_val(new_l1_entry) & _PAGE_RW) )
    3.26                  goto fail;
    3.27 -            }
    3.28          } 
    3.29      }
    3.30      else if ( (l1_pgentry_val(old_l1_entry) & _PAGE_PRESENT) )
    3.31 @@ -753,17 +748,12 @@ int do_process_page_updates(page_update_
    3.32      struct pfn_info *page;
    3.33      int err = 0, i;
    3.34      unsigned int cmd;
    3.35 -    unsigned long cr0 = read_cr0();
    3.36 -
    3.37 -    /* Clear the WP bit so that we can write even read-only page mappings. */
    3.38 -    write_cr0(cr0 & ~X86_CR0_WP);
    3.39 +    unsigned long cr0 = 0;
    3.40  
    3.41      for ( i = 0; i < count; i++ )
    3.42      {
    3.43          if ( copy_from_user(&req, ureqs, sizeof(req)) )
    3.44 -        {
    3.45              kill_domain_with_errmsg("Cannot read page update request");
    3.46 -        } 
    3.47  
    3.48          cmd = req.ptr & (sizeof(l1_pgentry_t)-1);
    3.49          pfn = req.ptr >> PAGE_SHIFT;
    3.50 @@ -773,26 +763,23 @@ int do_process_page_updates(page_update_
    3.51          spin_lock_irq(&current->page_lock);
    3.52  
    3.53          /* Get the page-frame number that a non-extended command references. */
    3.54 -        if ( likely(cmd != PGREQ_EXTENDED_COMMAND) )
    3.55 +        if ( (cmd == PGREQ_NORMAL_UPDATE) || (cmd == PGREQ_UNCHECKED_UPDATE) )
    3.56          {
    3.57 -            if ( likely(cmd != PGREQ_MPT_UPDATE) )
    3.58 +            if ( cr0 == 0 )
    3.59              {
    3.60 -                /* Need to use 'get_user' since the VA's PGD may be absent. */
    3.61 -                __get_user(l1e, (unsigned long *)(linear_pg_table+pfn));
    3.62 -                /* Now check that the VA's PTE isn't absent. */
    3.63 -                if ( !(l1e & _PAGE_PRESENT) )
    3.64 -                {
    3.65 -                    MEM_LOG("L1E n.p. at VA %08lx (%08lx)", req.ptr&~3, l1e);
    3.66 -                    goto unlock;
    3.67 -                }
    3.68 -                /* Finally, get the underlying machine address. */
    3.69 -                pfn = l1e >> PAGE_SHIFT;
    3.70 +                cr0 = read_cr0();
    3.71 +                write_cr0(cr0 & ~X86_CR0_WP);
    3.72              }
    3.73 -            else if ( pfn >= max_page )
    3.74 +            /* Need to use 'get_user' since the VA's PGD may be absent. */
    3.75 +            __get_user(l1e, (unsigned long *)(linear_pg_table+pfn));
    3.76 +            /* Now check that the VA's PTE isn't absent. */
    3.77 +            if ( !(l1e & _PAGE_PRESENT) )
    3.78              {
    3.79 -                MEM_LOG("Page out of range (%08lx > %08lx)", pfn, max_page);
    3.80 +                MEM_LOG("L1E n.p. at VA %08lx (%08lx)", req.ptr&~3, l1e);
    3.81                  goto unlock;
    3.82              }
    3.83 +            /* Finally, get the underlying machine address. */
    3.84 +            pfn = l1e >> PAGE_SHIFT;
    3.85          }
    3.86  
    3.87          /* Least significant bits of 'ptr' demux the operation type. */
    3.88 @@ -850,7 +837,11 @@ int do_process_page_updates(page_update_
    3.89              
    3.90          case PGREQ_MPT_UPDATE:
    3.91              page = frame_table + pfn;
    3.92 -            if ( DOMAIN_OKAY(page->flags) )
    3.93 +            if ( pfn >= max_page )
    3.94 +            {
    3.95 +                MEM_LOG("Page out of range (%08lx > %08lx)", pfn, max_page);
    3.96 +            }
    3.97 +            else if ( DOMAIN_OKAY(page->flags) )
    3.98              {
    3.99                  machine_to_phys_mapping[pfn] = req.val;
   3.100                  err = 0;
   3.101 @@ -892,9 +883,77 @@ int do_process_page_updates(page_update_
   3.102  
   3.103      }
   3.104  
   3.105 -    /* Restore the WP bit before returning to guest. */
   3.106 -    write_cr0(cr0);
   3.107 +    if ( cr0 != 0 )
   3.108 +        write_cr0(cr0);
   3.109  
   3.110      return 0;
   3.111  }
   3.112  
   3.113 +
   3.114 +/*
   3.115 + * Note: This function is structured this way so that the common path is very 
   3.116 + * fast. Tests that are unlikely to be TRUE branch to out-of-line code. 
   3.117 + * Unfortunately GCC's 'unlikely()' macro doesn't do the right thing :-(
   3.118 + */
   3.119 +int do_update_va_mapping(unsigned long page_nr, 
   3.120 +                         unsigned long val, 
   3.121 +                         unsigned long flags)
   3.122 +{
   3.123 +    unsigned long _x, cr0 = 0;
   3.124 +    struct task_struct *p = current;
   3.125 +    int err = -EINVAL;
   3.126 +
   3.127 +    if ( page_nr >= (HYPERVISOR_VIRT_START >> PAGE_SHIFT) )
   3.128 +        goto out;
   3.129 +
   3.130 +    spin_lock_irq(&p->page_lock);
   3.131 +
   3.132 +    /* Check that the VA's page-directory entry is present.. */
   3.133 +    if ( (err = __get_user(_x, (unsigned long *)
   3.134 +                           (&linear_pg_table[page_nr]))) != 0 )
   3.135 +        goto unlock_and_out;
   3.136 +
   3.137 +    /* If the VA's page-directory entry is read-only, we frob the WP bit. */
   3.138 +    if ( __put_user(_x, (unsigned long *)(&linear_pg_table[page_nr])) )
   3.139 +        goto clear_wp; return_from_clear_wp:
   3.140 +
   3.141 +    if ( (err = mod_l1_entry(&linear_pg_table[page_nr], 
   3.142 +                             mk_l1_pgentry(val))) != 0 )
   3.143 +        goto bad;
   3.144 +
   3.145 +    if ( (flags & UVMF_INVLPG) )
   3.146 +        goto invlpg; return_from_invlpg:
   3.147 +
   3.148 +    if ( (flags & UVMF_FLUSH_TLB) )
   3.149 +        goto flush; return_from_flush:
   3.150 +
   3.151 +    if ( cr0 != 0 )
   3.152 +        goto write_cr0; return_from_write_cr0:
   3.153 +
   3.154 + unlock_and_out:
   3.155 +    spin_unlock_irq(&p->page_lock);
   3.156 + out:
   3.157 +    return err;
   3.158 +
   3.159 + clear_wp:
   3.160 +    cr0 = read_cr0();
   3.161 +    write_cr0(cr0 & ~X86_CR0_WP);        
   3.162 +    goto return_from_clear_wp;
   3.163 +
   3.164 + bad:
   3.165 +    spin_unlock_irq(&p->page_lock);
   3.166 +    kill_domain_with_errmsg("Illegal VA-mapping update request");
   3.167 +    return 0;
   3.168 +
   3.169 + invlpg:
   3.170 +    flush_tlb[p->processor] = 1;
   3.171 +    goto return_from_invlpg;
   3.172 +    
   3.173 + flush:
   3.174 +    __write_cr3_counted(pagetable_val(p->mm.pagetable));
   3.175 +    goto return_from_flush;
   3.176 +
   3.177 + write_cr0:
   3.178 +    write_cr0(cr0);
   3.179 +    goto return_from_write_cr0;
   3.180 +}
     4.1 --- a/xen/include/hypervisor-ifs/hypervisor-if.h	Tue Oct 07 21:52:00 2003 +0000
     4.2 +++ b/xen/include/hypervisor-ifs/hypervisor-if.h	Wed Oct 08 12:56:46 2003 +0000
     4.3 @@ -60,6 +60,7 @@
     4.4  #define __HYPERVISOR_dom_mem_op		  17
     4.5  #define __HYPERVISOR_multicall		  18
     4.6  #define __HYPERVISOR_kbd_op               19
     4.7 +#define __HYPERVISOR_update_va_mapping    20
     4.8  
     4.9  /* And the trap vector is... */
    4.10  #define TRAP_INSTR "int $0x82"
    4.11 @@ -142,6 +143,10 @@
    4.12  #define PGEXT_CMD_MASK        255
    4.13  #define PGEXT_CMD_SHIFT         8
    4.14  
    4.15 +/* These are passed as 'flags' to update_va_mapping. They can be ORed. */
    4.16 +#define UVMF_FLUSH_TLB          1 /* Flush entire TLB. */
    4.17 +#define UVMF_INVLPG             2 /* Flush the VA mapping being updated. */
    4.18 +
    4.19  /*
    4.20   * Master "switch" for enabling/disabling event delivery.
    4.21   */
     5.1 --- a/xenolinux-2.4.22-sparse/include/asm-xeno/hypervisor.h	Tue Oct 07 21:52:00 2003 +0000
     5.2 +++ b/xenolinux-2.4.22-sparse/include/asm-xeno/hypervisor.h	Wed Oct 08 12:56:46 2003 +0000
     5.3 @@ -369,4 +369,16 @@ static inline long HYPERVISOR_kbd_op(uns
     5.4      return ret;
     5.5  }
     5.6  
     5.7 +static inline int HYPERVISOR_update_va_mapping(
     5.8 +    unsigned long page_nr, pte_t new_val, unsigned long flags)
     5.9 +{
    5.10 +    int ret;
    5.11 +    __asm__ __volatile__ (
    5.12 +        TRAP_INSTR
    5.13 +        : "=a" (ret) : "0" (__HYPERVISOR_update_va_mapping), 
    5.14 +        "b" (page_nr), "c" ((new_val).pte_low), "d" (flags) );
    5.15 +
    5.16 +    return ret;
    5.17 +}
    5.18 +
    5.19  #endif /* __HYPERVISOR_H__ */
     6.1 --- a/xenolinux-2.4.22-sparse/mm/memory.c	Tue Oct 07 21:52:00 2003 +0000
     6.2 +++ b/xenolinux-2.4.22-sparse/mm/memory.c	Wed Oct 08 12:56:46 2003 +0000
     6.3 @@ -918,8 +918,18 @@ int remap_page_range(unsigned long from,
     6.4   */
     6.5  static inline void establish_pte(struct vm_area_struct * vma, unsigned long address, pte_t *page_table, pte_t entry)
     6.6  {
     6.7 +#ifdef CONFIG_XENO
     6.8 +	if ( likely(vma->vm_mm == current->mm) ) {
     6.9 +		XENO_flush_page_update_queue();
    6.10 +		HYPERVISOR_update_va_mapping(address>>PAGE_SHIFT, entry, UVMF_INVLPG);
    6.11 +	} else {
    6.12 +		set_pte(page_table, entry);
    6.13 +		flush_tlb_page(vma, address);
    6.14 +	}
    6.15 +#else
    6.16  	set_pte(page_table, entry);
    6.17  	flush_tlb_page(vma, address);
    6.18 +#endif
    6.19  	update_mmu_cache(vma, address, entry);
    6.20  }
    6.21  
    6.22 @@ -1183,11 +1193,20 @@ static int do_swap_page(struct mm_struct
    6.23  
    6.24  	flush_page_to_ram(page);
    6.25  	flush_icache_page(vma, page);
    6.26 +#ifdef CONFIG_XENO
    6.27 +	if ( likely(vma->vm_mm == current->mm) ) {
    6.28 +		XENO_flush_page_update_queue();
    6.29 +		HYPERVISOR_update_va_mapping(address>>PAGE_SHIFT, pte, 0);
    6.30 +	} else {
    6.31 +		set_pte(page_table, pte);
    6.32 +		XENO_flush_page_update_queue();
    6.33 +	}
    6.34 +#else
    6.35  	set_pte(page_table, pte);
    6.36 +#endif
    6.37  
    6.38  	/* No need to invalidate - it was non-present before */
    6.39  	update_mmu_cache(vma, address, pte);
    6.40 -	XENO_flush_page_update_queue();
    6.41  	spin_unlock(&mm->page_table_lock);
    6.42  	return ret;
    6.43  }
    6.44 @@ -1229,11 +1248,20 @@ static int do_anonymous_page(struct mm_s
    6.45  		mark_page_accessed(page);
    6.46  	}
    6.47  
    6.48 +#ifdef CONFIG_XENO
    6.49 +	if ( likely(vma->vm_mm == current->mm) ) {
    6.50 +		XENO_flush_page_update_queue();
    6.51 +		HYPERVISOR_update_va_mapping(addr>>PAGE_SHIFT, entry, 0);
    6.52 +	} else {
    6.53 +		set_pte(page_table, entry);
    6.54 +		XENO_flush_page_update_queue();
    6.55 +	}
    6.56 +#else
    6.57  	set_pte(page_table, entry);
    6.58 +#endif
    6.59  
    6.60  	/* No need to invalidate - it was non-present before */
    6.61  	update_mmu_cache(vma, addr, entry);
    6.62 -	XENO_flush_page_update_queue();
    6.63  	spin_unlock(&mm->page_table_lock);
    6.64  	return 1;	/* Minor fault */
    6.65  
    6.66 @@ -1304,7 +1332,17 @@ static int do_no_page(struct mm_struct *
    6.67  		entry = mk_pte(new_page, vma->vm_page_prot);
    6.68  		if (write_access)
    6.69  			entry = pte_mkwrite(pte_mkdirty(entry));
    6.70 +#ifdef CONFIG_XENO
    6.71 +		if ( likely(vma->vm_mm == current->mm) ) {
    6.72 +			XENO_flush_page_update_queue();
    6.73 +			HYPERVISOR_update_va_mapping(address>>PAGE_SHIFT, entry, 0);
    6.74 +		} else {
    6.75 +			set_pte(page_table, entry);
    6.76 +			XENO_flush_page_update_queue();
    6.77 +		}
    6.78 +#else
    6.79  		set_pte(page_table, entry);
    6.80 +#endif
    6.81  	} else {
    6.82  		/* One of our sibling threads was faster, back out. */
    6.83  		page_cache_release(new_page);
    6.84 @@ -1314,7 +1352,6 @@ static int do_no_page(struct mm_struct *
    6.85  
    6.86  	/* no need to invalidate: a not-present page shouldn't be cached */
    6.87  	update_mmu_cache(vma, address, entry);
    6.88 -	XENO_flush_page_update_queue();
    6.89  	spin_unlock(&mm->page_table_lock);
    6.90  	return 2;	/* Major fault */
    6.91  }
    6.92 @@ -1366,7 +1403,6 @@ static inline int handle_pte_fault(struc
    6.93  	}
    6.94  	entry = pte_mkyoung(entry);
    6.95  	establish_pte(vma, address, pte, entry);
    6.96 -	XENO_flush_page_update_queue();
    6.97  	spin_unlock(&mm->page_table_lock);
    6.98  	return 1;
    6.99  }