ia64/xen-unstable
changeset 14439:3a186e94f613
linux: Various cleanup and locking clarification (and fixing!)
of mm pinning/unpinning logic.
Signed-off-by: Keir Fraser <keir@xensource.com>
of mm pinning/unpinning logic.
Signed-off-by: Keir Fraser <keir@xensource.com>
author | kfraser@localhost.localdomain |
---|---|
date | Fri Mar 16 15:19:08 2007 +0000 (2007-03-16) |
parents | ba83d33c961b |
children | 90d6fe6de04d |
files | linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c linux-2.6-xen-sparse/arch/x86_64/mm/pageattr-xen.c |
line diff
1.1 --- a/linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c Fri Mar 16 15:18:33 2007 +0000 1.2 +++ b/linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c Fri Mar 16 15:19:08 2007 +0000 1.3 @@ -256,8 +256,9 @@ void pte_free(struct page *pte) 1.4 unsigned long va = (unsigned long)__va(pfn << PAGE_SHIFT); 1.5 1.6 if (!pte_write(*virt_to_ptep(va))) 1.7 - BUG_ON(HYPERVISOR_update_va_mapping( 1.8 - va, pfn_pte(pfn, PAGE_KERNEL), 0)); 1.9 + if (HYPERVISOR_update_va_mapping( 1.10 + va, pfn_pte(pfn, PAGE_KERNEL), 0)) 1.11 + BUG(); 1.12 } else 1.13 clear_bit(PG_pinned, &pte->flags); 1.14 1.15 @@ -672,14 +673,23 @@ void mm_unpin(struct mm_struct *mm) 1.16 void mm_pin_all(void) 1.17 { 1.18 struct page *page; 1.19 + unsigned long flags; 1.20 1.21 if (xen_feature(XENFEAT_writable_page_tables)) 1.22 return; 1.23 1.24 + /* 1.25 + * Allow uninterrupted access to the pgd_list. Also protects 1.26 + * __pgd_pin() by disabling preemption. 1.27 + * All other CPUs must be at a safe point (e.g., in stop_machine 1.28 + * or offlined entirely). 1.29 + */ 1.30 + spin_lock_irqsave(&pgd_lock, flags); 1.31 for (page = pgd_list; page; page = (struct page *)page->index) { 1.32 if (!test_bit(PG_pinned, &page->flags)) 1.33 __pgd_pin((pgd_t *)page_address(page)); 1.34 } 1.35 + spin_unlock_irqrestore(&pgd_lock, flags); 1.36 } 1.37 1.38 void _arch_dup_mmap(struct mm_struct *mm)
2.1 --- a/linux-2.6-xen-sparse/arch/x86_64/mm/pageattr-xen.c Fri Mar 16 15:18:33 2007 +0000 2.2 +++ b/linux-2.6-xen-sparse/arch/x86_64/mm/pageattr-xen.c Fri Mar 16 15:19:08 2007 +0000 2.3 @@ -79,14 +79,17 @@ void mm_pin(struct mm_struct *mm) 2.4 spin_lock(&mm->page_table_lock); 2.5 2.6 mm_walk(mm, PAGE_KERNEL_RO); 2.7 - BUG_ON(HYPERVISOR_update_va_mapping( 2.8 - (unsigned long)mm->pgd, 2.9 - pfn_pte(virt_to_phys(mm->pgd)>>PAGE_SHIFT, PAGE_KERNEL_RO), 2.10 - UVMF_TLB_FLUSH)); 2.11 - BUG_ON(HYPERVISOR_update_va_mapping( 2.12 - (unsigned long)__user_pgd(mm->pgd), 2.13 - pfn_pte(virt_to_phys(__user_pgd(mm->pgd))>>PAGE_SHIFT, PAGE_KERNEL_RO), 2.14 - UVMF_TLB_FLUSH)); 2.15 + if (HYPERVISOR_update_va_mapping( 2.16 + (unsigned long)mm->pgd, 2.17 + pfn_pte(virt_to_phys(mm->pgd)>>PAGE_SHIFT, PAGE_KERNEL_RO), 2.18 + UVMF_TLB_FLUSH)) 2.19 + BUG(); 2.20 + if (HYPERVISOR_update_va_mapping( 2.21 + (unsigned long)__user_pgd(mm->pgd), 2.22 + pfn_pte(virt_to_phys(__user_pgd(mm->pgd))>>PAGE_SHIFT, 2.23 + PAGE_KERNEL_RO), 2.24 + UVMF_TLB_FLUSH)) 2.25 + BUG(); 2.26 xen_pgd_pin(__pa(mm->pgd)); /* kernel */ 2.27 xen_pgd_pin(__pa(__user_pgd(mm->pgd))); /* user */ 2.28 mm->context.pinned = 1; 2.29 @@ -106,12 +109,15 @@ void mm_unpin(struct mm_struct *mm) 2.30 2.31 xen_pgd_unpin(__pa(mm->pgd)); 2.32 xen_pgd_unpin(__pa(__user_pgd(mm->pgd))); 2.33 - BUG_ON(HYPERVISOR_update_va_mapping( 2.34 - (unsigned long)mm->pgd, 2.35 - pfn_pte(virt_to_phys(mm->pgd)>>PAGE_SHIFT, PAGE_KERNEL), 0)); 2.36 - BUG_ON(HYPERVISOR_update_va_mapping( 2.37 - (unsigned long)__user_pgd(mm->pgd), 2.38 - pfn_pte(virt_to_phys(__user_pgd(mm->pgd))>>PAGE_SHIFT, PAGE_KERNEL), 0)); 2.39 + if (HYPERVISOR_update_va_mapping( 2.40 + (unsigned long)mm->pgd, 2.41 + pfn_pte(virt_to_phys(mm->pgd)>>PAGE_SHIFT, PAGE_KERNEL), 0)) 2.42 + BUG(); 2.43 + if (HYPERVISOR_update_va_mapping( 2.44 + (unsigned long)__user_pgd(mm->pgd), 2.45 + pfn_pte(virt_to_phys(__user_pgd(mm->pgd))>>PAGE_SHIFT, 2.46 + PAGE_KERNEL), 0)) 2.47 + BUG(); 2.48 mm_walk(mm, PAGE_KERNEL); 2.49 xen_tlb_flush(); 2.50 mm->context.pinned = 0; 2.51 @@ -127,43 +133,50 @@ void mm_pin_all(void) 2.52 if (xen_feature(XENFEAT_writable_page_tables)) 2.53 return; 2.54 2.55 + /* 2.56 + * Allow uninterrupted access to the mm_unpinned list. We don't 2.57 + * actually take the mm_unpinned_lock as it is taken inside mm_pin(). 2.58 + * All other CPUs must be at a safe point (e.g., in stop_machine 2.59 + * or offlined entirely). 2.60 + */ 2.61 + preempt_disable(); 2.62 while (!list_empty(&mm_unpinned)) 2.63 mm_pin(list_entry(mm_unpinned.next, struct mm_struct, 2.64 context.unpinned)); 2.65 + preempt_enable(); 2.66 } 2.67 2.68 void _arch_dup_mmap(struct mm_struct *mm) 2.69 { 2.70 - if (!mm->context.pinned) 2.71 - mm_pin(mm); 2.72 + if (!mm->context.pinned) 2.73 + mm_pin(mm); 2.74 } 2.75 2.76 void _arch_exit_mmap(struct mm_struct *mm) 2.77 { 2.78 - struct task_struct *tsk = current; 2.79 + struct task_struct *tsk = current; 2.80 + 2.81 + task_lock(tsk); 2.82 2.83 - task_lock(tsk); 2.84 + /* 2.85 + * We aggressively remove defunct pgd from cr3. We execute unmap_vmas() 2.86 + * *much* faster this way, as no tlb flushes means bigger wrpt batches. 2.87 + */ 2.88 + if (tsk->active_mm == mm) { 2.89 + tsk->active_mm = &init_mm; 2.90 + atomic_inc(&init_mm.mm_count); 2.91 2.92 - /* 2.93 - * We aggressively remove defunct pgd from cr3. We execute unmap_vmas() 2.94 - * *much* faster this way, as no tlb flushes means bigger wrpt batches. 2.95 - */ 2.96 - if ( tsk->active_mm == mm ) 2.97 - { 2.98 - tsk->active_mm = &init_mm; 2.99 - atomic_inc(&init_mm.mm_count); 2.100 + switch_mm(mm, &init_mm, tsk); 2.101 + 2.102 + atomic_dec(&mm->mm_count); 2.103 + BUG_ON(atomic_read(&mm->mm_count) == 0); 2.104 + } 2.105 2.106 - switch_mm(mm, &init_mm, tsk); 2.107 - 2.108 - atomic_dec(&mm->mm_count); 2.109 - BUG_ON(atomic_read(&mm->mm_count) == 0); 2.110 - } 2.111 + task_unlock(tsk); 2.112 2.113 - task_unlock(tsk); 2.114 - 2.115 - if ( mm->context.pinned && (atomic_read(&mm->mm_count) == 1) && 2.116 - !mm->context.has_foreign_mappings ) 2.117 - mm_unpin(mm); 2.118 + if ( mm->context.pinned && (atomic_read(&mm->mm_count) == 1) && 2.119 + !mm->context.has_foreign_mappings ) 2.120 + mm_unpin(mm); 2.121 } 2.122 2.123 struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) 2.124 @@ -183,8 +196,9 @@ void pte_free(struct page *pte) 2.125 unsigned long va = (unsigned long)__va(page_to_pfn(pte)<<PAGE_SHIFT); 2.126 2.127 if (!pte_write(*virt_to_ptep(va))) 2.128 - BUG_ON(HYPERVISOR_update_va_mapping( 2.129 - va, pfn_pte(page_to_pfn(pte), PAGE_KERNEL), 0)); 2.130 + if (HYPERVISOR_update_va_mapping( 2.131 + va, pfn_pte(page_to_pfn(pte), PAGE_KERNEL), 0)) 2.132 + BUG(); 2.133 2.134 ClearPageForeign(pte); 2.135 init_page_count(pte);