]> xenbits.xensource.com Git - xen.git/commitdiff
p2m: Allow non-leaf entries to be replaced by leaf entries
authorGeorge Dunlap <george.dunlap@eu.citrix.com>
Fri, 21 Jan 2011 15:37:36 +0000 (15:37 +0000)
committerGeorge Dunlap <george.dunlap@eu.citrix.com>
Fri, 21 Jan 2011 15:37:36 +0000 (15:37 +0000)
Allow l2 and l3 p2m tables to be replaced with 2MB and 1GB pages
respectively, freeing the p2m table page properly.  This allows, for example,
a sequence of 512 singleton zero pages to be replaced with a superpage
populate-on-demand entry.

Changes:
* Add a p2m_free_ptp() corresponding to p2m_alloc_ptp(), which will
handle everything related to the freeing properly.
* Add p2m_free_entry(), based on ept_free_entry(), which will free
intermediate tables recursively.
* For both ept and p2m, when replacing non-leaf entries with leaf
entries, keep old entry and call *_free_entry() after new entry
has been written and proper flushes have been done.

Signed-off-by: George Dunlap <george.dunlap@eu.citrix.com>
Signed-off-by: Tim Deegan <Tim.Deegan@citrix.com>
xen/arch/x86/mm/hap/hap.c
xen/arch/x86/mm/hap/p2m-ept.c
xen/arch/x86/mm/p2m.c
xen/include/asm-x86/p2m.h

index 42bc674962d3eed11eae01023a0e4a4bd69a03a4..8d1bc70efabaadf469b74ea535cc8d4c411f2c2e 100644 (file)
@@ -333,9 +333,11 @@ static void hap_free_p2m_page(struct domain *d, struct page_info *pg)
 
     ASSERT(page_get_owner(pg) == d);
     /* Should have just the one ref we gave it in alloc_p2m_page() */
-    if ( (pg->count_info & PGC_count_mask) != 1 )
-        HAP_ERROR("Odd p2m page count c=%#lx t=%"PRtype_info"\n",
-                  pg->count_info, pg->u.inuse.type_info);
+    if ( (pg->count_info & PGC_count_mask) != 1 ) {
+        HAP_ERROR("Odd p2m page %p count c=%#lx t=%"PRtype_info"\n",
+                     pg, pg->count_info, pg->u.inuse.type_info);
+        WARN();
+    }
     pg->count_info &= ~PGC_count_mask;
     /* Free should not decrement domain's total allocation, since
      * these pages were allocated without an owner. */
index fe1d5cd1682818e8e867d0aa38554bcc9948eea8..49f379a2efc4858e7b0e5a041f356b51b2fbadf2 100644 (file)
@@ -166,8 +166,6 @@ static int ept_set_middle_entry(struct p2m_domain *p2m, ept_entry_t *ept_entry)
 /* free ept sub tree behind an entry */
 void ept_free_entry(struct p2m_domain *p2m, ept_entry_t *ept_entry, int level)
 {
-    struct domain *d = p2m->domain;
-
     /* End if the entry is a leaf entry. */
     if ( level == 0 || !is_epte_present(ept_entry) ||
          is_epte_superpage(ept_entry) )
@@ -180,8 +178,8 @@ void ept_free_entry(struct p2m_domain *p2m, ept_entry_t *ept_entry, int level)
             ept_free_entry(p2m, epte + i, level - 1);
         unmap_domain_page(epte);
     }
-
-    d->arch.paging.free_page(d, mfn_to_page(ept_entry->mfn));
+    
+    p2m_free_ptp(p2m, mfn_to_page(ept_entry->mfn));
 }
 
 static int ept_split_super_page(struct p2m_domain *p2m, ept_entry_t *ept_entry,
@@ -317,6 +315,7 @@ ept_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn,
     int vtd_pte_present = 0;
     int needs_sync = 1;
     struct domain *d = p2m->domain;
+    ept_entry_t old_entry = { .epte = 0 };
 
     /*
      * the caller must make sure:
@@ -357,8 +356,12 @@ ept_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn,
     vtd_pte_present = is_epte_present(ept_entry) ? 1 : 0;
 
     /*
-     * When we are here, we must be on a leaf ept entry
-     * with i == target or i > target.
+     * If we're here with i > target, we must be at a leaf node, and
+     * we need to break up the superpage.
+     *
+     * If we're here with i == target and i > 0, we need to check to see
+     * if we're replacing a non-leaf entry (i.e., pointing to an N-1 table)
+     * with a leaf entry (a 1GiB or 2MiB page), and handle things appropriately.
      */
 
     if ( i == target )
@@ -370,6 +373,10 @@ ept_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn,
         if ( !is_epte_present(ept_entry) )
             needs_sync = 0;
 
+        /* If we're replacing a non-leaf entry with a leaf entry (1GiB or 2MiB),
+         * the intermediate tables will be freed below after the ept flush */
+        old_entry = *ept_entry;
+
         if ( mfn_valid(mfn_x(mfn)) || direct_mmio || p2m_is_paged(p2mt) ||
              (p2mt == p2m_ram_paging_in_start) )
         {
@@ -487,6 +494,13 @@ out:
         }
     }
 
+    /* Release the old intermediate tables, if any.  This has to be the
+       last thing we do, after the ept_sync_domain() and removal
+       from the iommu tables, so as to avoid a potential
+       use-after-free. */
+    if ( is_epte_present(&old_entry) )
+        ept_free_entry(p2m, &old_entry, target);
+
     return rv;
 }
 
index 3d54beb14d85884cd5bf5595b588becd7a5465e8..6d9e8ac52aadef53751a3a099b093898f7fd3f31 100644 (file)
@@ -153,11 +153,45 @@ p2m_alloc_ptp(struct p2m_domain *p2m, unsigned long type)
 
     page_list_add_tail(pg, &p2m->pages);
     pg->u.inuse.type_info = type | 1 | PGT_validated;
-    pg->count_info |= 1;
 
     return pg;
 }
 
+void
+p2m_free_ptp(struct p2m_domain *p2m, struct page_info *pg)
+{
+    ASSERT(pg);
+    ASSERT(p2m);
+    ASSERT(p2m->domain);
+    ASSERT(p2m->domain->arch.paging.free_page);
+
+    page_list_del(pg, &p2m->pages);
+    p2m->domain->arch.paging.free_page(p2m->domain, pg);
+
+    return;
+}
+
+/* Free intermediate tables from a p2m sub-tree */
+void
+p2m_free_entry(struct p2m_domain *p2m, l1_pgentry_t *p2m_entry, int page_order)
+{
+    /* End if the entry is a leaf entry. */
+    if ( page_order == 0
+         || !(l1e_get_flags(*p2m_entry) & _PAGE_PRESENT)
+         || (l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
+        return;
+
+    if ( page_order > 9 )
+    {
+        l1_pgentry_t *l3_table = map_domain_page(l1e_get_pfn(*p2m_entry));
+        for ( int i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
+            p2m_free_entry(p2m, l3_table + i, page_order - 9);
+        unmap_domain_page(l3_table);
+    }
+
+    p2m_free_ptp(p2m, mfn_to_page(_mfn(l1e_get_pfn(*p2m_entry))));
+}
+
 // Walk one level of the P2M table, allocating a new table if required.
 // Returns 0 on error.
 //
@@ -1316,6 +1350,7 @@ p2m_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn,
      */
     if ( page_order == 18 )
     {
+        l1_pgentry_t old_entry = l1e_empty();
         p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
                                    L3_PAGETABLE_SHIFT - PAGE_SHIFT,
                                    L3_PAGETABLE_ENTRIES);
@@ -1323,10 +1358,11 @@ p2m_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn,
         if ( (l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) &&
              !(l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
         {
-            P2M_ERROR("configure P2M table L3 entry with large page\n");
-            domain_crash(p2m->domain);
-            goto out;
+            /* We're replacing a non-SP page with a superpage.  Make sure to
+             * handle freeing the table properly. */
+            old_entry = *p2m_entry;
         }
+
         ASSERT(!mfn_valid(mfn) || p2mt != p2m_mmio_direct);
         l3e_content = mfn_valid(mfn) 
             ? l3e_from_pfn(mfn_x(mfn),
@@ -1335,7 +1371,11 @@ p2m_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn,
         entry_content.l1 = l3e_content.l3;
         paging_write_p2m_entry(p2m->domain, gfn, p2m_entry,
                                table_mfn, entry_content, 3);
+        /* NB: paging_write_p2m_entry() handles tlb flushes properly */
 
+        /* Free old intermediate tables if necessary */
+        if ( l1e_get_flags(old_entry) & _PAGE_PRESENT )
+            p2m_free_entry(p2m, &old_entry, page_order);
     }
     /*
      * When using PAE Xen, we only allow 33 bits of pseudo-physical
@@ -1372,9 +1412,11 @@ p2m_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn,
         /* level 1 entry */
         paging_write_p2m_entry(p2m->domain, gfn, p2m_entry,
                                table_mfn, entry_content, 1);
+        /* NB: paging_write_p2m_entry() handles tlb flushes properly */
     }
     else if ( page_order == 9 )
     {
+        l1_pgentry_t old_entry = l1e_empty();
         p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
                                    L2_PAGETABLE_SHIFT - PAGE_SHIFT,
                                    L2_PAGETABLE_ENTRIES);
@@ -1384,9 +1426,9 @@ p2m_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn,
         if ( (l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) &&
              !(l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
         {
-            P2M_ERROR("configure P2M table 4KB L2 entry with large page\n");
-            domain_crash(p2m->domain);
-            goto out;
+            /* We're replacing a non-SP page with a superpage.  Make sure to
+             * handle freeing the table properly. */
+            old_entry = *p2m_entry;
         }
         
         ASSERT(!mfn_valid(mfn) || p2mt != p2m_mmio_direct);
@@ -1400,6 +1442,11 @@ p2m_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn,
         entry_content.l1 = l2e_content.l2;
         paging_write_p2m_entry(p2m->domain, gfn, p2m_entry,
                                table_mfn, entry_content, 2);
+        /* NB: paging_write_p2m_entry() handles tlb flushes properly */
+
+        /* Free old intermediate tables if necessary */
+        if ( l1e_get_flags(old_entry) & _PAGE_PRESENT )
+            p2m_free_entry(p2m, &old_entry, page_order);
     }
 
     /* Track the highest gfn for which we have ever had a valid mapping */
index dbd1e27d5a8eef27e95abce41203c9cda7b28ff7..2e6941e13e66ef3bb55ffa9f61d59a61e986b8f3 100644 (file)
@@ -541,6 +541,7 @@ static inline void p2m_mem_access_check(unsigned long gpa, bool_t gla_valid,
 #endif
 
 struct page_info *p2m_alloc_ptp(struct p2m_domain *p2m, unsigned long type);
+void p2m_free_ptp(struct p2m_domain *p2m, struct page_info *pg);
 
 #endif /* _XEN_P2M_H */