direct-io.hg

changeset 14408:3a186e94f613

linux: Various cleanup and locking clarification (and fixing!)
of mm pinning/unpinning logic.
Signed-off-by: Keir Fraser <keir@xensource.com>
author kfraser@localhost.localdomain
date Fri Mar 16 15:19:08 2007 +0000 (2007-03-16)
parents ba83d33c961b
children 90d6fe6de04d
files linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c linux-2.6-xen-sparse/arch/x86_64/mm/pageattr-xen.c
line diff
     1.1 --- a/linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c	Fri Mar 16 15:18:33 2007 +0000
     1.2 +++ b/linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c	Fri Mar 16 15:19:08 2007 +0000
     1.3 @@ -256,8 +256,9 @@ void pte_free(struct page *pte)
     1.4  		unsigned long va = (unsigned long)__va(pfn << PAGE_SHIFT);
     1.5  
     1.6  		if (!pte_write(*virt_to_ptep(va)))
     1.7 -			BUG_ON(HYPERVISOR_update_va_mapping(
     1.8 -			       va, pfn_pte(pfn, PAGE_KERNEL), 0));
     1.9 +			if (HYPERVISOR_update_va_mapping(
    1.10 +				va, pfn_pte(pfn, PAGE_KERNEL), 0))
    1.11 +				BUG();
    1.12  	} else
    1.13  		clear_bit(PG_pinned, &pte->flags);
    1.14  
    1.15 @@ -672,14 +673,23 @@ void mm_unpin(struct mm_struct *mm)
    1.16  void mm_pin_all(void)
    1.17  {
    1.18  	struct page *page;
    1.19 +	unsigned long flags;
    1.20  
    1.21  	if (xen_feature(XENFEAT_writable_page_tables))
    1.22  		return;
    1.23  
    1.24 +	/*
    1.25 +	 * Allow uninterrupted access to the pgd_list. Also protects
    1.26 +	 * __pgd_pin() by disabling preemption.
    1.27 +	 * All other CPUs must be at a safe point (e.g., in stop_machine
    1.28 +	 * or offlined entirely).
    1.29 +	 */
    1.30 +	spin_lock_irqsave(&pgd_lock, flags);
    1.31  	for (page = pgd_list; page; page = (struct page *)page->index) {
    1.32  		if (!test_bit(PG_pinned, &page->flags))
    1.33  			__pgd_pin((pgd_t *)page_address(page));
    1.34  	}
    1.35 +	spin_unlock_irqrestore(&pgd_lock, flags);
    1.36  }
    1.37  
    1.38  void _arch_dup_mmap(struct mm_struct *mm)
     2.1 --- a/linux-2.6-xen-sparse/arch/x86_64/mm/pageattr-xen.c	Fri Mar 16 15:18:33 2007 +0000
     2.2 +++ b/linux-2.6-xen-sparse/arch/x86_64/mm/pageattr-xen.c	Fri Mar 16 15:19:08 2007 +0000
     2.3 @@ -79,14 +79,17 @@ void mm_pin(struct mm_struct *mm)
     2.4  	spin_lock(&mm->page_table_lock);
     2.5  
     2.6  	mm_walk(mm, PAGE_KERNEL_RO);
     2.7 -	BUG_ON(HYPERVISOR_update_va_mapping(
     2.8 -		       (unsigned long)mm->pgd,
     2.9 -		       pfn_pte(virt_to_phys(mm->pgd)>>PAGE_SHIFT, PAGE_KERNEL_RO),
    2.10 -		       UVMF_TLB_FLUSH));
    2.11 -	BUG_ON(HYPERVISOR_update_va_mapping(
    2.12 -		       (unsigned long)__user_pgd(mm->pgd),
    2.13 -		       pfn_pte(virt_to_phys(__user_pgd(mm->pgd))>>PAGE_SHIFT, PAGE_KERNEL_RO),
    2.14 -		       UVMF_TLB_FLUSH));
    2.15 +	if (HYPERVISOR_update_va_mapping(
    2.16 +		(unsigned long)mm->pgd,
    2.17 +		pfn_pte(virt_to_phys(mm->pgd)>>PAGE_SHIFT, PAGE_KERNEL_RO),
    2.18 +		UVMF_TLB_FLUSH))
    2.19 +		BUG();
    2.20 +	if (HYPERVISOR_update_va_mapping(
    2.21 +		(unsigned long)__user_pgd(mm->pgd),
    2.22 +		pfn_pte(virt_to_phys(__user_pgd(mm->pgd))>>PAGE_SHIFT,
    2.23 +			PAGE_KERNEL_RO),
    2.24 +		UVMF_TLB_FLUSH))
    2.25 +		BUG();
    2.26  	xen_pgd_pin(__pa(mm->pgd)); /* kernel */
    2.27  	xen_pgd_pin(__pa(__user_pgd(mm->pgd))); /* user */
    2.28  	mm->context.pinned = 1;
    2.29 @@ -106,12 +109,15 @@ void mm_unpin(struct mm_struct *mm)
    2.30  
    2.31  	xen_pgd_unpin(__pa(mm->pgd));
    2.32  	xen_pgd_unpin(__pa(__user_pgd(mm->pgd)));
    2.33 -	BUG_ON(HYPERVISOR_update_va_mapping(
    2.34 -		       (unsigned long)mm->pgd,
    2.35 -		       pfn_pte(virt_to_phys(mm->pgd)>>PAGE_SHIFT, PAGE_KERNEL), 0));
    2.36 -	BUG_ON(HYPERVISOR_update_va_mapping(
    2.37 -		       (unsigned long)__user_pgd(mm->pgd),
    2.38 -		       pfn_pte(virt_to_phys(__user_pgd(mm->pgd))>>PAGE_SHIFT, PAGE_KERNEL), 0));
    2.39 +	if (HYPERVISOR_update_va_mapping(
    2.40 +		(unsigned long)mm->pgd,
    2.41 +		pfn_pte(virt_to_phys(mm->pgd)>>PAGE_SHIFT, PAGE_KERNEL), 0))
    2.42 +		BUG();
    2.43 +	if (HYPERVISOR_update_va_mapping(
    2.44 +		(unsigned long)__user_pgd(mm->pgd),
    2.45 +		pfn_pte(virt_to_phys(__user_pgd(mm->pgd))>>PAGE_SHIFT,
    2.46 +			PAGE_KERNEL), 0))
    2.47 +		BUG();
    2.48  	mm_walk(mm, PAGE_KERNEL);
    2.49  	xen_tlb_flush();
    2.50  	mm->context.pinned = 0;
    2.51 @@ -127,43 +133,50 @@ void mm_pin_all(void)
    2.52  	if (xen_feature(XENFEAT_writable_page_tables))
    2.53  		return;
    2.54  
    2.55 +	/*
    2.56 +	 * Allow uninterrupted access to the mm_unpinned list. We don't
    2.57 +	 * actually take the mm_unpinned_lock as it is taken inside mm_pin().
    2.58 +	 * All other CPUs must be at a safe point (e.g., in stop_machine
    2.59 +	 * or offlined entirely).
    2.60 +	 */
    2.61 +	preempt_disable();
    2.62  	while (!list_empty(&mm_unpinned))	
    2.63  		mm_pin(list_entry(mm_unpinned.next, struct mm_struct,
    2.64  				  context.unpinned));
    2.65 +	preempt_enable();
    2.66  }
    2.67  
    2.68  void _arch_dup_mmap(struct mm_struct *mm)
    2.69  {
    2.70 -    if (!mm->context.pinned)
    2.71 -        mm_pin(mm);
    2.72 +	if (!mm->context.pinned)
    2.73 +		mm_pin(mm);
    2.74  }
    2.75  
    2.76  void _arch_exit_mmap(struct mm_struct *mm)
    2.77  {
    2.78 -    struct task_struct *tsk = current;
    2.79 -
    2.80 -    task_lock(tsk);
    2.81 +	struct task_struct *tsk = current;
    2.82  
    2.83 -    /*
    2.84 -     * We aggressively remove defunct pgd from cr3. We execute unmap_vmas()
    2.85 -     * *much* faster this way, as no tlb flushes means bigger wrpt batches.
    2.86 -     */
    2.87 -    if ( tsk->active_mm == mm )
    2.88 -    {
    2.89 -        tsk->active_mm = &init_mm;
    2.90 -        atomic_inc(&init_mm.mm_count);
    2.91 +	task_lock(tsk);
    2.92  
    2.93 -        switch_mm(mm, &init_mm, tsk);
    2.94 +	/*
    2.95 +	 * We aggressively remove defunct pgd from cr3. We execute unmap_vmas()
    2.96 +	 * *much* faster this way, as no tlb flushes means bigger wrpt batches.
    2.97 +	 */
    2.98 +	if (tsk->active_mm == mm) {
    2.99 +		tsk->active_mm = &init_mm;
   2.100 +		atomic_inc(&init_mm.mm_count);
   2.101  
   2.102 -        atomic_dec(&mm->mm_count);
   2.103 -        BUG_ON(atomic_read(&mm->mm_count) == 0);
   2.104 -    }
   2.105 +		switch_mm(mm, &init_mm, tsk);
   2.106  
   2.107 -    task_unlock(tsk);
   2.108 +		atomic_dec(&mm->mm_count);
   2.109 +		BUG_ON(atomic_read(&mm->mm_count) == 0);
   2.110 +	}
   2.111  
   2.112 -    if ( mm->context.pinned && (atomic_read(&mm->mm_count) == 1) &&
   2.113 -         !mm->context.has_foreign_mappings )
   2.114 -        mm_unpin(mm);
   2.115 +	task_unlock(tsk);
   2.116 +
   2.117 +	if ( mm->context.pinned && (atomic_read(&mm->mm_count) == 1) &&
   2.118 +	     !mm->context.has_foreign_mappings )
   2.119 +		mm_unpin(mm);
   2.120  }
   2.121  
   2.122  struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
   2.123 @@ -183,8 +196,9 @@ void pte_free(struct page *pte)
   2.124  	unsigned long va = (unsigned long)__va(page_to_pfn(pte)<<PAGE_SHIFT);
   2.125  
   2.126  	if (!pte_write(*virt_to_ptep(va)))
   2.127 -		BUG_ON(HYPERVISOR_update_va_mapping(
   2.128 -			va, pfn_pte(page_to_pfn(pte), PAGE_KERNEL), 0));
   2.129 +		if (HYPERVISOR_update_va_mapping(
   2.130 +			va, pfn_pte(page_to_pfn(pte), PAGE_KERNEL), 0))
   2.131 +			BUG();
   2.132  
   2.133  	ClearPageForeign(pte);
   2.134  	init_page_count(pte);