ia64/xen-unstable
changeset 804:a55c876d6d2e
bitkeeper revision 1.493 (3f84098eCbgC6OdGchmKMJlm84nGJw)
memory.c, hypervisor.h, hypervisor-if.h, domain.c, entry.S:
Faster page fault handling --- fast path in Xen if mapping to be updated is part of current address space.
memory.c, hypervisor.h, hypervisor-if.h, domain.c, entry.S:
Faster page fault handling --- fast path in Xen if mapping to be updated is part of current address space.
author | kaf24@scramble.cl.cam.ac.uk |
---|---|
date | Wed Oct 08 12:56:46 2003 +0000 (2003-10-08) |
parents | 54d82b047eb4 |
children | fb248d1df870 |
files | xen/arch/i386/entry.S xen/common/domain.c xen/common/memory.c xen/include/hypervisor-ifs/hypervisor-if.h xenolinux-2.4.22-sparse/include/asm-xeno/hypervisor.h xenolinux-2.4.22-sparse/mm/memory.c |
line diff
1.1 --- a/xen/arch/i386/entry.S Tue Oct 07 21:52:00 2003 +0000 1.2 +++ b/xen/arch/i386/entry.S Wed Oct 08 12:56:46 2003 +0000 1.3 @@ -647,6 +647,7 @@ ENTRY(hypervisor_call_table) 1.4 .long SYMBOL_NAME(do_dom_mem_op) 1.5 .long SYMBOL_NAME(do_multicall) 1.6 .long SYMBOL_NAME(do_kbd_op) 1.7 + .long SYMBOL_NAME(do_update_va_mapping) 1.8 .rept NR_syscalls-((.-hypervisor_call_table)/4) 1.9 .long SYMBOL_NAME(sys_ni_syscall) 1.10 .endr
2.1 --- a/xen/common/domain.c Tue Oct 07 21:52:00 2003 +0000 2.2 +++ b/xen/common/domain.c Wed Oct 08 12:56:46 2003 +0000 2.3 @@ -163,6 +163,8 @@ void __kill_domain(struct task_struct *p 2.4 2.5 void kill_domain(void) 2.6 { 2.7 + /* May have been in middle of a p.t. update with WP bit cleared. */ 2.8 + write_cr0(read_cr0()|X86_CR0_WP); 2.9 __kill_domain(current); 2.10 } 2.11
3.1 --- a/xen/common/memory.c Tue Oct 07 21:52:00 2003 +0000 3.2 +++ b/xen/common/memory.c Wed Oct 08 12:56:46 2003 +0000 3.3 @@ -550,9 +550,7 @@ static int mod_l2_entry(l2_pgentry_t *p_ 3.4 l2_pgentry_val(new_l2_entry)) & 0xfffff001) != 0 ) 3.5 { 3.6 if ( (l2_pgentry_val(old_l2_entry) & _PAGE_PRESENT) ) 3.7 - { 3.8 put_l1_table(l2_pgentry_to_pagenr(old_l2_entry)); 3.9 - } 3.10 3.11 /* Assume we're mapping an L1 table, falling back to twisted L2. */ 3.12 if ( unlikely(get_l1_table(l2_pgentry_to_pagenr(new_l2_entry))) ) 3.13 @@ -601,15 +599,12 @@ static int mod_l1_entry(l1_pgentry_t *p_ 3.14 l1_pgentry_val(new_l1_entry)) & 0xfffff003) != 0 ) 3.15 { 3.16 if ( (l1_pgentry_val(old_l1_entry) & _PAGE_PRESENT) ) 3.17 - { 3.18 put_page(l1_pgentry_to_pagenr(old_l1_entry), 3.19 l1_pgentry_val(old_l1_entry) & _PAGE_RW); 3.20 - } 3.21 - 3.22 + 3.23 if ( get_page(l1_pgentry_to_pagenr(new_l1_entry), 3.24 - l1_pgentry_val(new_l1_entry) & _PAGE_RW) ){ 3.25 + l1_pgentry_val(new_l1_entry) & _PAGE_RW) ) 3.26 goto fail; 3.27 - } 3.28 } 3.29 } 3.30 else if ( (l1_pgentry_val(old_l1_entry) & _PAGE_PRESENT) ) 3.31 @@ -753,17 +748,12 @@ int do_process_page_updates(page_update_ 3.32 struct pfn_info *page; 3.33 int err = 0, i; 3.34 unsigned int cmd; 3.35 - unsigned long cr0 = read_cr0(); 3.36 - 3.37 - /* Clear the WP bit so that we can write even read-only page mappings. */ 3.38 - write_cr0(cr0 & ~X86_CR0_WP); 3.39 + unsigned long cr0 = 0; 3.40 3.41 for ( i = 0; i < count; i++ ) 3.42 { 3.43 if ( copy_from_user(&req, ureqs, sizeof(req)) ) 3.44 - { 3.45 kill_domain_with_errmsg("Cannot read page update request"); 3.46 - } 3.47 3.48 cmd = req.ptr & (sizeof(l1_pgentry_t)-1); 3.49 pfn = req.ptr >> PAGE_SHIFT; 3.50 @@ -773,26 +763,23 @@ int do_process_page_updates(page_update_ 3.51 spin_lock_irq(¤t->page_lock); 3.52 3.53 /* Get the page-frame number that a non-extended command references. */ 3.54 - if ( likely(cmd != PGREQ_EXTENDED_COMMAND) ) 3.55 + if ( (cmd == PGREQ_NORMAL_UPDATE) || (cmd == PGREQ_UNCHECKED_UPDATE) ) 3.56 { 3.57 - if ( likely(cmd != PGREQ_MPT_UPDATE) ) 3.58 + if ( cr0 == 0 ) 3.59 { 3.60 - /* Need to use 'get_user' since the VA's PGD may be absent. */ 3.61 - __get_user(l1e, (unsigned long *)(linear_pg_table+pfn)); 3.62 - /* Now check that the VA's PTE isn't absent. */ 3.63 - if ( !(l1e & _PAGE_PRESENT) ) 3.64 - { 3.65 - MEM_LOG("L1E n.p. at VA %08lx (%08lx)", req.ptr&~3, l1e); 3.66 - goto unlock; 3.67 - } 3.68 - /* Finally, get the underlying machine address. */ 3.69 - pfn = l1e >> PAGE_SHIFT; 3.70 + cr0 = read_cr0(); 3.71 + write_cr0(cr0 & ~X86_CR0_WP); 3.72 } 3.73 - else if ( pfn >= max_page ) 3.74 + /* Need to use 'get_user' since the VA's PGD may be absent. */ 3.75 + __get_user(l1e, (unsigned long *)(linear_pg_table+pfn)); 3.76 + /* Now check that the VA's PTE isn't absent. */ 3.77 + if ( !(l1e & _PAGE_PRESENT) ) 3.78 { 3.79 - MEM_LOG("Page out of range (%08lx > %08lx)", pfn, max_page); 3.80 + MEM_LOG("L1E n.p. at VA %08lx (%08lx)", req.ptr&~3, l1e); 3.81 goto unlock; 3.82 } 3.83 + /* Finally, get the underlying machine address. */ 3.84 + pfn = l1e >> PAGE_SHIFT; 3.85 } 3.86 3.87 /* Least significant bits of 'ptr' demux the operation type. */ 3.88 @@ -850,7 +837,11 @@ int do_process_page_updates(page_update_ 3.89 3.90 case PGREQ_MPT_UPDATE: 3.91 page = frame_table + pfn; 3.92 - if ( DOMAIN_OKAY(page->flags) ) 3.93 + if ( pfn >= max_page ) 3.94 + { 3.95 + MEM_LOG("Page out of range (%08lx > %08lx)", pfn, max_page); 3.96 + } 3.97 + else if ( DOMAIN_OKAY(page->flags) ) 3.98 { 3.99 machine_to_phys_mapping[pfn] = req.val; 3.100 err = 0; 3.101 @@ -892,9 +883,77 @@ int do_process_page_updates(page_update_ 3.102 3.103 } 3.104 3.105 - /* Restore the WP bit before returning to guest. */ 3.106 - write_cr0(cr0); 3.107 + if ( cr0 != 0 ) 3.108 + write_cr0(cr0); 3.109 3.110 return 0; 3.111 } 3.112 3.113 + 3.114 +/* 3.115 + * Note: This function is structured this way so that the common path is very 3.116 + * fast. Tests that are unlikely to be TRUE branch to out-of-line code. 3.117 + * Unfortunately GCC's 'unlikely()' macro doesn't do the right thing :-( 3.118 + */ 3.119 +int do_update_va_mapping(unsigned long page_nr, 3.120 + unsigned long val, 3.121 + unsigned long flags) 3.122 +{ 3.123 + unsigned long _x, cr0 = 0; 3.124 + struct task_struct *p = current; 3.125 + int err = -EINVAL; 3.126 + 3.127 + if ( page_nr >= (HYPERVISOR_VIRT_START >> PAGE_SHIFT) ) 3.128 + goto out; 3.129 + 3.130 + spin_lock_irq(&p->page_lock); 3.131 + 3.132 + /* Check that the VA's page-directory entry is present.. */ 3.133 + if ( (err = __get_user(_x, (unsigned long *) 3.134 + (&linear_pg_table[page_nr]))) != 0 ) 3.135 + goto unlock_and_out; 3.136 + 3.137 + /* If the VA's page-directory entry is read-only, we frob the WP bit. */ 3.138 + if ( __put_user(_x, (unsigned long *)(&linear_pg_table[page_nr])) ) 3.139 + goto clear_wp; return_from_clear_wp: 3.140 + 3.141 + if ( (err = mod_l1_entry(&linear_pg_table[page_nr], 3.142 + mk_l1_pgentry(val))) != 0 ) 3.143 + goto bad; 3.144 + 3.145 + if ( (flags & UVMF_INVLPG) ) 3.146 + goto invlpg; return_from_invlpg: 3.147 + 3.148 + if ( (flags & UVMF_FLUSH_TLB) ) 3.149 + goto flush; return_from_flush: 3.150 + 3.151 + if ( cr0 != 0 ) 3.152 + goto write_cr0; return_from_write_cr0: 3.153 + 3.154 + unlock_and_out: 3.155 + spin_unlock_irq(&p->page_lock); 3.156 + out: 3.157 + return err; 3.158 + 3.159 + clear_wp: 3.160 + cr0 = read_cr0(); 3.161 + write_cr0(cr0 & ~X86_CR0_WP); 3.162 + goto return_from_clear_wp; 3.163 + 3.164 + bad: 3.165 + spin_unlock_irq(&p->page_lock); 3.166 + kill_domain_with_errmsg("Illegal VA-mapping update request"); 3.167 + return 0; 3.168 + 3.169 + invlpg: 3.170 + flush_tlb[p->processor] = 1; 3.171 + goto return_from_invlpg; 3.172 + 3.173 + flush: 3.174 + __write_cr3_counted(pagetable_val(p->mm.pagetable)); 3.175 + goto return_from_flush; 3.176 + 3.177 + write_cr0: 3.178 + write_cr0(cr0); 3.179 + goto return_from_write_cr0; 3.180 +}
4.1 --- a/xen/include/hypervisor-ifs/hypervisor-if.h Tue Oct 07 21:52:00 2003 +0000 4.2 +++ b/xen/include/hypervisor-ifs/hypervisor-if.h Wed Oct 08 12:56:46 2003 +0000 4.3 @@ -60,6 +60,7 @@ 4.4 #define __HYPERVISOR_dom_mem_op 17 4.5 #define __HYPERVISOR_multicall 18 4.6 #define __HYPERVISOR_kbd_op 19 4.7 +#define __HYPERVISOR_update_va_mapping 20 4.8 4.9 /* And the trap vector is... */ 4.10 #define TRAP_INSTR "int $0x82" 4.11 @@ -142,6 +143,10 @@ 4.12 #define PGEXT_CMD_MASK 255 4.13 #define PGEXT_CMD_SHIFT 8 4.14 4.15 +/* These are passed as 'flags' to update_va_mapping. They can be ORed. */ 4.16 +#define UVMF_FLUSH_TLB 1 /* Flush entire TLB. */ 4.17 +#define UVMF_INVLPG 2 /* Flush the VA mapping being updated. */ 4.18 + 4.19 /* 4.20 * Master "switch" for enabling/disabling event delivery. 4.21 */
5.1 --- a/xenolinux-2.4.22-sparse/include/asm-xeno/hypervisor.h Tue Oct 07 21:52:00 2003 +0000 5.2 +++ b/xenolinux-2.4.22-sparse/include/asm-xeno/hypervisor.h Wed Oct 08 12:56:46 2003 +0000 5.3 @@ -369,4 +369,16 @@ static inline long HYPERVISOR_kbd_op(uns 5.4 return ret; 5.5 } 5.6 5.7 +static inline int HYPERVISOR_update_va_mapping( 5.8 + unsigned long page_nr, pte_t new_val, unsigned long flags) 5.9 +{ 5.10 + int ret; 5.11 + __asm__ __volatile__ ( 5.12 + TRAP_INSTR 5.13 + : "=a" (ret) : "0" (__HYPERVISOR_update_va_mapping), 5.14 + "b" (page_nr), "c" ((new_val).pte_low), "d" (flags) ); 5.15 + 5.16 + return ret; 5.17 +} 5.18 + 5.19 #endif /* __HYPERVISOR_H__ */
6.1 --- a/xenolinux-2.4.22-sparse/mm/memory.c Tue Oct 07 21:52:00 2003 +0000 6.2 +++ b/xenolinux-2.4.22-sparse/mm/memory.c Wed Oct 08 12:56:46 2003 +0000 6.3 @@ -918,8 +918,18 @@ int remap_page_range(unsigned long from, 6.4 */ 6.5 static inline void establish_pte(struct vm_area_struct * vma, unsigned long address, pte_t *page_table, pte_t entry) 6.6 { 6.7 +#ifdef CONFIG_XENO 6.8 + if ( likely(vma->vm_mm == current->mm) ) { 6.9 + XENO_flush_page_update_queue(); 6.10 + HYPERVISOR_update_va_mapping(address>>PAGE_SHIFT, entry, UVMF_INVLPG); 6.11 + } else { 6.12 + set_pte(page_table, entry); 6.13 + flush_tlb_page(vma, address); 6.14 + } 6.15 +#else 6.16 set_pte(page_table, entry); 6.17 flush_tlb_page(vma, address); 6.18 +#endif 6.19 update_mmu_cache(vma, address, entry); 6.20 } 6.21 6.22 @@ -1183,11 +1193,20 @@ static int do_swap_page(struct mm_struct 6.23 6.24 flush_page_to_ram(page); 6.25 flush_icache_page(vma, page); 6.26 +#ifdef CONFIG_XENO 6.27 + if ( likely(vma->vm_mm == current->mm) ) { 6.28 + XENO_flush_page_update_queue(); 6.29 + HYPERVISOR_update_va_mapping(address>>PAGE_SHIFT, pte, 0); 6.30 + } else { 6.31 + set_pte(page_table, pte); 6.32 + XENO_flush_page_update_queue(); 6.33 + } 6.34 +#else 6.35 set_pte(page_table, pte); 6.36 +#endif 6.37 6.38 /* No need to invalidate - it was non-present before */ 6.39 update_mmu_cache(vma, address, pte); 6.40 - XENO_flush_page_update_queue(); 6.41 spin_unlock(&mm->page_table_lock); 6.42 return ret; 6.43 } 6.44 @@ -1229,11 +1248,20 @@ static int do_anonymous_page(struct mm_s 6.45 mark_page_accessed(page); 6.46 } 6.47 6.48 +#ifdef CONFIG_XENO 6.49 + if ( likely(vma->vm_mm == current->mm) ) { 6.50 + XENO_flush_page_update_queue(); 6.51 + HYPERVISOR_update_va_mapping(addr>>PAGE_SHIFT, entry, 0); 6.52 + } else { 6.53 + set_pte(page_table, entry); 6.54 + XENO_flush_page_update_queue(); 6.55 + } 6.56 +#else 6.57 set_pte(page_table, entry); 6.58 +#endif 6.59 6.60 /* No need to invalidate - it was non-present before */ 6.61 update_mmu_cache(vma, addr, entry); 6.62 - XENO_flush_page_update_queue(); 6.63 spin_unlock(&mm->page_table_lock); 6.64 return 1; /* Minor fault */ 6.65 6.66 @@ -1304,7 +1332,17 @@ static int do_no_page(struct mm_struct * 6.67 entry = mk_pte(new_page, vma->vm_page_prot); 6.68 if (write_access) 6.69 entry = pte_mkwrite(pte_mkdirty(entry)); 6.70 +#ifdef CONFIG_XENO 6.71 + if ( likely(vma->vm_mm == current->mm) ) { 6.72 + XENO_flush_page_update_queue(); 6.73 + HYPERVISOR_update_va_mapping(address>>PAGE_SHIFT, entry, 0); 6.74 + } else { 6.75 + set_pte(page_table, entry); 6.76 + XENO_flush_page_update_queue(); 6.77 + } 6.78 +#else 6.79 set_pte(page_table, entry); 6.80 +#endif 6.81 } else { 6.82 /* One of our sibling threads was faster, back out. */ 6.83 page_cache_release(new_page); 6.84 @@ -1314,7 +1352,6 @@ static int do_no_page(struct mm_struct * 6.85 6.86 /* no need to invalidate: a not-present page shouldn't be cached */ 6.87 update_mmu_cache(vma, address, entry); 6.88 - XENO_flush_page_update_queue(); 6.89 spin_unlock(&mm->page_table_lock); 6.90 return 2; /* Major fault */ 6.91 } 6.92 @@ -1366,7 +1403,6 @@ static inline int handle_pte_fault(struc 6.93 } 6.94 entry = pte_mkyoung(entry); 6.95 establish_pte(vma, address, pte, entry); 6.96 - XENO_flush_page_update_queue(); 6.97 spin_unlock(&mm->page_table_lock); 6.98 return 1; 6.99 }