]> xenbits.xensource.com Git - xen.git/commitdiff
The internal Xen x86 emulator is fixed to handle shared/sharable pages corretly.
authorKeir Fraser <keir.fraser@citrix.com>
Thu, 17 Dec 2009 06:27:56 +0000 (06:27 +0000)
committerKeir Fraser <keir.fraser@citrix.com>
Thu, 17 Dec 2009 06:27:56 +0000 (06:27 +0000)
If pages cannot be unshared immediately (due to lack of free memory required to
create private copies) the VCPU under emulation is paused, and the emulator
returns X86EMUL_RETRY, which will get resolved after some memory is freed back
to Xen (possibly through host paging).

Signed-off-by: Grzegorz Milos <Grzegorz.Milos@citrix.com>
xen/arch/x86/hvm/emulate.c
xen/arch/x86/hvm/hvm.c
xen/arch/x86/hvm/intercept.c
xen/arch/x86/hvm/io.c
xen/arch/x86/mm/guest_walk.c
xen/arch/x86/mm/hap/guest_walk.c
xen/include/asm-x86/hvm/support.h
xen/include/asm-x86/page.h
xen/include/asm-x86/processor.h

index a636c9bef65ad2a7d6d97d8dca11fcacb2f41e61..88b1eb19469fccae4a7f76ae06024a9c94bdeb7c 100644 (file)
@@ -62,12 +62,14 @@ static int hvmemul_do_io(
     int rc;
 
     /* Check for paged out page */
-    ram_mfn = gfn_to_mfn(current->domain, ram_gfn, &p2mt);
+    ram_mfn = gfn_to_mfn_unshare(current->domain, ram_gfn, &p2mt, 0);
     if ( p2m_is_paging(p2mt) )
     {
         p2m_mem_paging_populate(curr->domain, ram_gfn);
         return X86EMUL_RETRY;
     }
+    if ( p2m_is_shared(p2mt) )
+        return X86EMUL_RETRY;
 
     /*
      * Weird-sized accesses have undefined behaviour: we discard writes
@@ -282,7 +284,7 @@ static int hvmemul_linear_to_phys(
     }
     else if ( (pfn = paging_gva_to_gfn(curr, addr, &pfec)) == INVALID_GFN )
     {
-        if ( pfec == PFEC_page_paged )
+        if ( pfec == PFEC_page_paged || pfec == PFEC_page_shared )
             return X86EMUL_RETRY;
         hvm_inject_exception(TRAP_page_fault, pfec, addr);
         return X86EMUL_EXCEPTION;
@@ -299,7 +301,7 @@ static int hvmemul_linear_to_phys(
         /* Is it contiguous with the preceding PFNs? If not then we're done. */
         if ( (npfn == INVALID_GFN) || (npfn != (pfn + (reverse ? -i : i))) )
         {
-            if ( pfec == PFEC_page_paged )
+            if ( pfec == PFEC_page_paged || pfec == PFEC_page_shared )
                 return X86EMUL_RETRY;
             done /= bytes_per_rep;
             if ( done == 0 )
@@ -441,6 +443,8 @@ static int __hvmemul_read(
         return hvmemul_do_mmio(gpa, &reps, bytes, 0, IOREQ_READ, 0, p_data);
     case HVMCOPY_gfn_paged_out:
         return X86EMUL_RETRY;
+    case HVMCOPY_gfn_shared:
+        return X86EMUL_RETRY;
     default:
         break;
     }
@@ -533,6 +537,8 @@ static int hvmemul_write(
                                IOREQ_WRITE, 0, p_data);
     case HVMCOPY_gfn_paged_out:
         return X86EMUL_RETRY;
+    case HVMCOPY_gfn_shared:
+        return X86EMUL_RETRY;
     default:
         break;
     }
@@ -708,6 +714,8 @@ static int hvmemul_rep_movs(
 
     if ( rc == HVMCOPY_gfn_paged_out )
         return X86EMUL_RETRY;
+    if ( rc == HVMCOPY_gfn_shared )
+        return X86EMUL_RETRY;
     if ( rc != HVMCOPY_okay )
     {
         gdprintk(XENLOG_WARNING, "Failed memory-to-memory REP MOVS: sgpa=%"
index 4fc1b934883defd01eaf1a216791af65b9dbe7cf..e18e94f34947c3451f176d925760964696f5f6cd 100644 (file)
@@ -311,7 +311,7 @@ static int hvm_set_ioreq_page(
     unsigned long mfn;
     void *va;
 
-    mfn = mfn_x(gfn_to_mfn(d, gmfn, &p2mt));
+    mfn = mfn_x(gfn_to_mfn_unshare(d, gmfn, &p2mt, 0));
     if ( !p2m_is_ram(p2mt) )
         return -EINVAL;
     if ( p2m_is_paging(p2mt) )
@@ -319,6 +319,8 @@ static int hvm_set_ioreq_page(
         p2m_mem_paging_populate(d, gmfn);
         return -ENOENT;
     }
+    if ( p2m_is_shared(p2mt) )
+        return -ENOENT;
     ASSERT(mfn_valid(mfn));
 
     page = mfn_to_page(mfn);
@@ -1323,7 +1325,7 @@ static void *hvm_map_entry(unsigned long va)
      * we still treat it as a kernel-mode read (i.e. no access checks). */
     pfec = PFEC_page_present;
     gfn = paging_gva_to_gfn(current, va, &pfec);
-    if ( pfec == PFEC_page_paged )
+    if ( pfec == PFEC_page_paged || pfec == PFEC_page_shared )
         return NULL;
     mfn = mfn_x(gfn_to_mfn_unshare(current->domain, gfn, &p2mt, 0));
     if ( p2m_is_paging(p2mt) )
@@ -1557,6 +1559,8 @@ void hvm_task_switch(
         goto out;
     if ( rc == HVMCOPY_gfn_paged_out )
         goto out;
+    if ( rc == HVMCOPY_gfn_shared )
+        goto out;
 
     eflags = regs->eflags;
     if ( taskswitch_reason == TSW_iret )
@@ -1595,6 +1599,8 @@ void hvm_task_switch(
         goto out;
     if ( rc == HVMCOPY_gfn_paged_out )
         goto out;
+    if ( rc == HVMCOPY_gfn_shared )
+        goto out;
 
     rc = hvm_copy_from_guest_virt(
         &tss, tr.base, sizeof(tss), PFEC_page_present);
@@ -1602,6 +1608,11 @@ void hvm_task_switch(
         goto out;
     if ( rc == HVMCOPY_gfn_paged_out )
         goto out;
+    /* Note: this could be optimised, if the callee functions knew we want RO
+     * access */
+    if ( rc == HVMCOPY_gfn_shared )
+        goto out;
+
 
     if ( hvm_set_cr3(tss.cr3) )
         goto out;
@@ -1639,6 +1650,8 @@ void hvm_task_switch(
         exn_raised = 1;
     if ( rc == HVMCOPY_gfn_paged_out )
         goto out;
+    if ( rc == HVMCOPY_gfn_shared )
+        goto out;
 
     if ( (tss.trace & 1) && !exn_raised )
         hvm_inject_exception(TRAP_debug, tss_sel & 0xfff8, 0);
@@ -1700,6 +1713,8 @@ static enum hvm_copy_result __hvm_copy(
             {
                 if ( pfec == PFEC_page_paged )
                     return HVMCOPY_gfn_paged_out;
+                if ( pfec == PFEC_page_shared )
+                    return HVMCOPY_gfn_shared;
                 if ( flags & HVMCOPY_fault )
                     hvm_inject_exception(TRAP_page_fault, pfec, addr);
                 return HVMCOPY_bad_gva_to_gfn;
@@ -1710,13 +1725,15 @@ static enum hvm_copy_result __hvm_copy(
             gfn = addr >> PAGE_SHIFT;
         }
 
-        mfn = mfn_x(gfn_to_mfn_current(gfn, &p2mt));
+        mfn = mfn_x(gfn_to_mfn_unshare(current->domain, gfn, &p2mt, 0));
 
         if ( p2m_is_paging(p2mt) )
         {
             p2m_mem_paging_populate(curr->domain, gfn);
             return HVMCOPY_gfn_paged_out;
         }
+        if ( p2m_is_shared(p2mt) )
+            return HVMCOPY_gfn_shared;
         if ( p2m_is_grant(p2mt) )
             return HVMCOPY_unhandleable;
         if ( !p2m_is_ram(p2mt) )
index 841dec89216b1a69797bd116d284d5fb0c1b12cf..4af9e3d112776cb7b3fd074dc24e603837e00914 100644 (file)
@@ -72,12 +72,17 @@ static int hvm_mmio_access(struct vcpu *v,
     {
         for ( i = 0; i < p->count; i++ )
         {
+            int ret;
+
             rc = read_handler(v, p->addr + (sign * i * p->size), p->size,
                               &data);
             if ( rc != X86EMUL_OKAY )
                 break;
-            if ( hvm_copy_to_guest_phys(p->data + (sign * i * p->size), &data,
-                                        p->size) == HVMCOPY_gfn_paged_out )
+            ret = hvm_copy_to_guest_phys(p->data + (sign * i * p->size),
+                                         &data,
+                                         p->size);
+            if ( (ret == HVMCOPY_gfn_paged_out) || 
+                 (ret == HVMCOPY_gfn_shared) )
             {
                 rc = X86EMUL_RETRY;
                 break;
@@ -88,9 +93,13 @@ static int hvm_mmio_access(struct vcpu *v,
     {
         for ( i = 0; i < p->count; i++ )
         {
-            if ( hvm_copy_from_guest_phys(&data,
-                                          p->data + (sign * i * p->size),
-                                          p->size) == HVMCOPY_gfn_paged_out )
+            int ret;
+
+            ret = hvm_copy_from_guest_phys(&data,
+                                           p->data + (sign * i * p->size),
+                                           p->size);
+            if ( (ret == HVMCOPY_gfn_paged_out) || 
+                 (ret == HVMCOPY_gfn_shared) )
             {
                 rc = X86EMUL_RETRY;
                 break;
index 29079f296d27067f83618c8f87a3eae92f282d7b..3c31a6d2e46c1042f496798dd9819db1f17b5de5 100644 (file)
@@ -263,8 +263,11 @@ static int dpci_ioport_read(uint32_t mport, ioreq_t *p)
 
         if ( p->data_is_ptr )
         {
-            if ( hvm_copy_to_guest_phys(p->data + (sign * i * p->size), &data,
-                                        p->size) ==  HVMCOPY_gfn_paged_out )
+            int ret;
+            ret = hvm_copy_to_guest_phys(p->data + (sign * i * p->size), &data,
+                                         p->size);
+            if ( (ret == HVMCOPY_gfn_paged_out) ||
+                 (ret == HVMCOPY_gfn_shared) )
                 return X86EMUL_RETRY;
         }
         else
@@ -284,8 +287,13 @@ static int dpci_ioport_write(uint32_t mport, ioreq_t *p)
         data = p->data;
         if ( p->data_is_ptr )
         {
-            if ( hvm_copy_from_guest_phys(&data, p->data + (sign * i * p->size),
-                                          p->size) ==  HVMCOPY_gfn_paged_out )
+            int ret;
+            
+            ret = hvm_copy_from_guest_phys(&data, 
+                                           p->data + (sign * i * p->size),
+                                           p->size);
+            if ( (ret == HVMCOPY_gfn_paged_out) &&
+                 (ret == HVMCOPY_gfn_shared) )
                 return X86EMUL_RETRY;
         }
 
index b0f42f328adfcf2ca6b1c884e7e32e5b2437053e..5a4f491e2b2ab11b806dfaf64f4fa1e53834af50 100644 (file)
@@ -86,6 +86,36 @@ static uint32_t set_ad_bits(void *guest_p, void *walk_p, int set_dirty)
     return 0;
 }
 
+static inline void *map_domain_gfn(struct domain *d,
+                                   gfn_t gfn, 
+                                   mfn_t *mfn,
+                                   p2m_type_t *p2mt,
+                                   uint32_t *rc) 
+{
+    /* Translate the gfn, unsharing if shared */
+    *mfn = gfn_to_mfn_unshare(d, gfn_x(gfn), p2mt, 0);
+    if ( p2m_is_paging(*p2mt) )
+    {
+        p2m_mem_paging_populate(d, gfn_x(gfn));
+
+        *rc = _PAGE_PAGED;
+        return NULL;
+    }
+    if ( p2m_is_shared(*p2mt) )
+    {
+        *rc = _PAGE_SHARED;
+        return NULL;
+    }
+    if ( !p2m_is_ram(*p2mt) ) 
+    {
+        *rc |= _PAGE_PRESENT;
+        return NULL;
+    }
+    ASSERT(mfn_valid(mfn_x(*mfn)));
+    
+    return map_domain_page(mfn_x(*mfn));
+}
+
 
 /* Walk the guest pagetables, after the manner of a hardware walker. */
 uint32_t
@@ -124,23 +154,14 @@ guest_walk_tables(struct vcpu *v, unsigned long va, walk_t *gw,
     if ( rc & _PAGE_PRESENT ) goto out;
 
     /* Map the l3 table */
-    gw->l3mfn = gfn_to_mfn(d, guest_l4e_get_gfn(gw->l4e), &p2mt);
-    if ( p2m_is_paging(p2mt) )
-    {
-        p2m_mem_paging_populate(d, gfn_x(guest_l4e_get_gfn(gw->l4e)));
-
-        rc = _PAGE_PAGED;
-        goto out;
-    }
-    if ( !p2m_is_ram(p2mt) ) 
-    {
-        rc |= _PAGE_PRESENT;
+    l3p = map_domain_gfn(d, 
+                         guest_l4e_get_gfn(gw->l4e), 
+                         &gw->l3mfn,
+                         &p2mt, 
+                         &rc); 
+    if(l3p == NULL)
         goto out;
-    }
-    ASSERT(mfn_valid(mfn_x(gw->l3mfn)));
-
     /* Get the l3e and check its flags*/
-    l3p = map_domain_page(mfn_x(gw->l3mfn));
     gw->l3e = l3p[guest_l3_table_offset(va)];
     gflags = guest_l3e_get_flags(gw->l3e) ^ _PAGE_NX_BIT;
     rc |= ((gflags & mflags) ^ mflags);
@@ -160,23 +181,14 @@ guest_walk_tables(struct vcpu *v, unsigned long va, walk_t *gw,
 #endif /* PAE or 64... */
 
     /* Map the l2 table */
-    gw->l2mfn = gfn_to_mfn(d, guest_l3e_get_gfn(gw->l3e), &p2mt);
-    if ( p2m_is_paging(p2mt) )
-    {
-        p2m_mem_paging_populate(d, gfn_x(guest_l3e_get_gfn(gw->l3e)));
-
-        rc = _PAGE_PAGED;
-        goto out;
-    }
-    if ( !p2m_is_ram(p2mt) )
-    {
-        rc |= _PAGE_PRESENT;
+    l2p = map_domain_gfn(d, 
+                         guest_l3e_get_gfn(gw->l3e), 
+                         &gw->l2mfn,
+                         &p2mt, 
+                         &rc); 
+    if(l2p == NULL)
         goto out;
-    }
-    ASSERT(mfn_valid(mfn_x(gw->l2mfn)));
-
     /* Get the l2e */
-    l2p = map_domain_page(mfn_x(gw->l2mfn));
     gw->l2e = l2p[guest_l2_table_offset(va)];
 
 #else /* 32-bit only... */
@@ -225,21 +237,13 @@ guest_walk_tables(struct vcpu *v, unsigned long va, walk_t *gw,
     else 
     {
         /* Not a superpage: carry on and find the l1e. */
-        gw->l1mfn = gfn_to_mfn(d, guest_l2e_get_gfn(gw->l2e), &p2mt);
-        if ( p2m_is_paging(p2mt) )
-        {
-            p2m_mem_paging_populate(d, gfn_x(guest_l2e_get_gfn(gw->l2e)));
-
-            rc = _PAGE_PAGED;
+        l1p = map_domain_gfn(d, 
+                             guest_l2e_get_gfn(gw->l2e), 
+                             &gw->l1mfn,
+                             &p2mt,
+                             &rc);
+        if(l1p == NULL)
             goto out;
-        }
-        if ( !p2m_is_ram(p2mt) )
-        {
-            rc |= _PAGE_PRESENT;
-            goto out;
-        }
-        ASSERT(mfn_valid(mfn_x(gw->l1mfn)));
-        l1p = map_domain_page(mfn_x(gw->l1mfn));
         gw->l1e = l1p[guest_l1_table_offset(va)];
         gflags = guest_l1e_get_flags(gw->l1e) ^ _PAGE_NX_BIT;
         rc |= ((gflags & mflags) ^ mflags);
index be8a85e071301d5396200035c2c2481636b522c5..ac9b975f0233a324bec5ba47f733a8f0569c1f1b 100644 (file)
@@ -32,6 +32,7 @@
 #if GUEST_PAGING_LEVELS <= CONFIG_PAGING_LEVELS
 
 #include <asm/guest_pt.h>
+#include <asm/p2m.h>
 
 unsigned long hap_gva_to_gfn(GUEST_PAGING_LEVELS)(
     struct vcpu *v, unsigned long gva, uint32_t *pfec)
@@ -45,7 +46,7 @@ unsigned long hap_gva_to_gfn(GUEST_PAGING_LEVELS)(
 
     /* Get the top-level table's MFN */
     cr3 = v->arch.hvm_vcpu.guest_cr[3];
-    top_mfn = gfn_to_mfn(v->domain, _gfn(cr3 >> PAGE_SHIFT), &p2mt);
+    top_mfn = gfn_to_mfn_unshare(v->domain, cr3 >> PAGE_SHIFT, &p2mt, 0);
     if ( p2m_is_paging(p2mt) )
     {
 //        if ( p2m_is_paged(p2mt) )
@@ -54,6 +55,11 @@ unsigned long hap_gva_to_gfn(GUEST_PAGING_LEVELS)(
         pfec[0] = PFEC_page_paged;
         return INVALID_GFN;
     }
+    if ( p2m_is_shared(p2mt) )
+    {
+        pfec[0] = PFEC_page_shared;
+        return INVALID_GFN;
+    }
     if ( !p2m_is_ram(p2mt) )
     {
         pfec[0] &= ~PFEC_page_present;
@@ -73,7 +79,7 @@ unsigned long hap_gva_to_gfn(GUEST_PAGING_LEVELS)(
     if ( missing == 0 )
     {
         gfn_t gfn = guest_l1e_get_gfn(gw.l1e);
-        gfn_to_mfn(v->domain, gfn, &p2mt);
+        gfn_to_mfn_unshare(v->domain, gfn_x(gfn), &p2mt, 0);
         if ( p2m_is_paging(p2mt) )
         {
 //            if ( p2m_is_paged(p2mt) )
@@ -82,6 +88,11 @@ unsigned long hap_gva_to_gfn(GUEST_PAGING_LEVELS)(
             pfec[0] = PFEC_page_paged;
             return INVALID_GFN;
         }
+        if ( p2m_is_shared(p2mt) )
+        {
+            pfec[0] = PFEC_page_shared;
+            return INVALID_GFN;
+        }
 
         return gfn_x(gfn);
     }
@@ -92,6 +103,9 @@ unsigned long hap_gva_to_gfn(GUEST_PAGING_LEVELS)(
     if ( missing & _PAGE_PAGED )
         pfec[0] = PFEC_page_paged;
 
+    if ( missing & _PAGE_SHARED )
+        pfec[0] = PFEC_page_shared;
+
     return INVALID_GFN;
 }
 
index e98daca9372e3846d070fb9d4cb328ad49e9c4f5..6d64927868caf4aee4671b13384558e1d8335d39 100644 (file)
@@ -74,6 +74,7 @@ enum hvm_copy_result {
     HVMCOPY_bad_gfn_to_mfn,
     HVMCOPY_unhandleable,
     HVMCOPY_gfn_paged_out,
+    HVMCOPY_gfn_shared,
 };
 
 /*
index 9802e2d4959e80e788a1d063ada6fff434fd6ff1..a94feac127198b12bb60bf13b05bada8111aef56 100644 (file)
@@ -312,6 +312,7 @@ void setup_idle_pagetable(void);
 #define _PAGE_AVAIL    0xE00U
 #define _PAGE_PSE_PAT 0x1000U
 #define _PAGE_PAGED   0x2000U
+#define _PAGE_SHARED  0x4000U
 
 /*
  * Debug option: Ensure that granted mappings are not implicitly unmapped.
index fc4b531022a7d9a01ac91b9000587afa0a7ed73b..90d8806b6572520bb8cedacd10d681dbb71548b7 100644 (file)
 #define PFEC_reserved_bit   (1U<<3)
 #define PFEC_insn_fetch     (1U<<4)
 #define PFEC_page_paged     (1U<<5)
+#define PFEC_page_shared    (1U<<6)
 
 #ifndef __ASSEMBLY__