]> xenbits.xensource.com Git - legacy/linux-2.6.18-xen.git/commitdiff
xen/x86: synchronize vmalloc_sync_all() with mm_{,un}pin()
authorKeir Fraser <keir.fraser@citrix.com>
Wed, 22 Sep 2010 07:57:21 +0000 (08:57 +0100)
committerKeir Fraser <keir.fraser@citrix.com>
Wed, 22 Sep 2010 07:57:21 +0000 (08:57 +0100)
As recently diagnosed by Citrix engineers, mm_{,un}pin() and
vmalloc_sync_all() aren't properly synchronized. So we add a backlink
to the referencing struct mm_struct to the pgd's struct page, and use
this to lock the page table updates in vmalloc_sync_all().

Due to the way pgd-s get allocated and put on the global list on i386,
we have to account for the backlink not to be set yet (in which case
they cannot be subject to (un)pinning.

Along the way, I found it necessary/desirable to also fix
- a potential NULL dereference in i386's pgd_alloc(),
- x86-64 adding not yet cleaned out pgd-s to the global list, and
- x86-64 removing pgd-s from the global list rather late.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
arch/i386/mm/fault-xen.c
arch/i386/mm/pgtable-xen.c
arch/x86_64/mm/fault-xen.c
include/asm-x86_64/mach-xen/asm/pgalloc.h

index 6c036c7c63ccbd78caf7019ae11a878efda56465..22e7a208f080721dcfc704cdcc6b7f9c296a1d54 100644 (file)
@@ -762,12 +762,25 @@ void vmalloc_sync_all(void)
                                return;
                        }
                        for (page = pgd_list; page; page =
-                                       (struct page *)page->index)
-                               if (!vmalloc_sync_one(page_address(page),
-                                                               address)) {
+                                       (struct page *)page->index) {
+                               spinlock_t *lock = page->mapping
+                                       ? &((struct mm_struct *)page->mapping)
+                                               ->page_table_lock
+                                       : NULL;
+                               pmd_t *pmd;
+
+                               if (lock)
+                                       spin_lock(lock);
+                               pmd = vmalloc_sync_one(page_address(page),
+                                                      address);
+                               if (lock)
+                                       spin_unlock(lock);
+
+                               if (!pmd) {
                                        BUG_ON(page != pgd_list);
                                        break;
                                }
+                       }
                        spin_unlock_irqrestore(&pgd_lock, flags);
                        if (!page)
                                set_bit(sync_index(address), insync);
index fa783a432bf6a7925daa3ba98257eb6b2d1d1e60..9311d4781092e490ddfdb97c4829f800d362d6f6 100644 (file)
@@ -232,6 +232,7 @@ static inline void pgd_list_del(pgd_t *pgd)
        *pprev = next;
        if (next)
                set_page_private(next, (unsigned long)pprev);
+       page->mapping = NULL;
 }
 
 void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused)
@@ -273,9 +274,15 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
        pmd_t **pmd;
        unsigned long flags;
 
+       if (!pgd)
+               return NULL;
+
        pgd_test_and_unpin(pgd);
 
-       if (PTRS_PER_PMD == 1 || !pgd)
+       /* Store a back link for vmalloc_sync_all(). */
+       virt_to_page(pgd)->mapping = (void *)mm;
+
+       if (PTRS_PER_PMD == 1)
                return pgd;
 
        if (HAVE_SHARED_KERNEL_PMD) {
index 7a12f4a50cc0d631bfb506527d5db225f7fc5bab..7a1e74ed5f3fe6413981ec552426cfa0dea1ebc2 100644 (file)
@@ -677,6 +677,9 @@ do_sigbus:
 DEFINE_SPINLOCK(pgd_lock);
 struct page *pgd_list;
 
+#define pgd_page_table(what, pg) \
+       spin_##what(&((struct mm_struct *)(pg)->mapping)->page_table_lock)
+
 void vmalloc_sync_all(void)
 {
        /* Note that races in the updates of insync and start aren't 
@@ -699,10 +702,13 @@ void vmalloc_sync_all(void)
                             page = (struct page *)page->index) {
                                pgd_t *pgd;
                                pgd = (pgd_t *)page_address(page) + pgd_index(address);
+
+                               pgd_page_table(lock, page);
                                if (pgd_none(*pgd))
                                        set_pgd(pgd, *pgd_ref);
                                else
                                        BUG_ON(pgd_page(*pgd) != pgd_page(*pgd_ref));
+                               pgd_page_table(unlock, page);
                        }
                        spin_unlock(&pgd_lock);
                        set_bit(pgd_index(address), insync);
index cff0fcf1c59b30ae0de36a33e3ce0e648c012e65..7f9757fd380b7e3e3ca728510fd7572b5ede10af 100644 (file)
@@ -95,10 +95,13 @@ static inline void pud_free(pud_t *pud)
        pte_free(virt_to_page(pud));
 }
 
-static inline void pgd_list_add(pgd_t *pgd)
+static inline void pgd_list_add(pgd_t *pgd, void *mm)
 {
        struct page *page = virt_to_page(pgd);
 
+       /* Store a back link for vmalloc_sync_all(). */
+       page->mapping = mm;
+
        spin_lock(&pgd_lock);
        page->index = (pgoff_t)pgd_list;
        if (pgd_list)
@@ -119,6 +122,8 @@ static inline void pgd_list_del(pgd_t *pgd)
        if (next)
                next->private = (unsigned long)pprev;
        spin_unlock(&pgd_lock);
+
+       page->mapping = NULL;
 }
 
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
@@ -127,22 +132,22 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
         * We allocate two contiguous pages for kernel and user.
         */
        unsigned boundary;
-       pgd_t *pgd = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_REPEAT, 1);
+       pgd_t *pgd;
+
+       pgd = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 1);
        if (!pgd)
                return NULL;
-       pgd_list_add(pgd);
+       pgd_list_add(pgd, mm);
        /*
         * Copy kernel pointers in from init.
         * Could keep a freelist or slab cache of those because the kernel
         * part never changes.
         */
        boundary = pgd_index(__PAGE_OFFSET);
-       memset(pgd, 0, boundary * sizeof(pgd_t));
        memcpy(pgd + boundary,
               init_level4_pgt + boundary,
               (PTRS_PER_PGD - boundary) * sizeof(pgd_t));
 
-       memset(__user_pgd(pgd), 0, PAGE_SIZE); /* clean up user pgd */
        /*
         * Set level3_user_pgt for vsyscall area
         */
@@ -155,6 +160,8 @@ static inline void pgd_free(pgd_t *pgd)
 {
        pte_t *ptep = virt_to_ptep(pgd);
 
+       pgd_list_del(pgd);
+
        if (!pte_write(*ptep)) {
                xen_pgd_unpin(__pa(pgd));
                BUG_ON(HYPERVISOR_update_va_mapping(
@@ -174,7 +181,6 @@ static inline void pgd_free(pgd_t *pgd)
                               0));
        }
 
-       pgd_list_del(pgd);
        free_pages((unsigned long)pgd, 1);
 }