]> xenbits.xensource.com Git - people/royger/xen.git/commitdiff
x86/ioreq server: add DMOP to map guest ram with p2m_ioreq_server to an ioreq server
authorPaul Durrant <paul.durrant@citrix.com>
Fri, 7 Apr 2017 15:38:11 +0000 (17:38 +0200)
committerJan Beulich <jbeulich@suse.com>
Fri, 7 Apr 2017 15:38:11 +0000 (17:38 +0200)
Previously, p2m_ioreq_server is used to write-protect guest ram
pages, which are tracked with ioreq server's rangeset. However,
number of ram pages to be tracked may exceed the upper limit of
rangeset.

Now, a new DMOP - XEN_DMOP_map_mem_type_to_ioreq_server, is added
to let one ioreq server claim/disclaim its responsibility for the
handling of guest pages with p2m type p2m_ioreq_server. Users of
this DMOP can specify which kind of operation is supposed to be
emulated in a parameter named flags. Currently, this DMOP only
support the emulation of write operations. And it can be further
extended to support the emulation of read ones if an ioreq server
has such requirement in the future.

For now, we only support one ioreq server for this p2m type, so
once an ioreq server has claimed its ownership, subsequent calls
of the XEN_DMOP_map_mem_type_to_ioreq_server will fail. Users can
also disclaim the ownership of guest ram pages with p2m_ioreq_server,
by triggering this new DMOP, with ioreq server id set to the current
owner's and flags parameter set to 0.

Note:
a> both XEN_DMOP_map_mem_type_to_ioreq_server and p2m_ioreq_server
are only supported for HVMs with HAP enabled.

b> only after one ioreq server claims its ownership of p2m_ioreq_server,
will the p2m type change to p2m_ioreq_server be allowed.

Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
Signed-off-by: Yu Zhang <yu.c.zhang@linux.intel.com>
Acked-by: Tim Deegan <tim@xen.org>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: George Dunlap <george.dunlap@citrix.com>
xen/arch/x86/hvm/dm.c
xen/arch/x86/hvm/emulate.c
xen/arch/x86/hvm/ioreq.c
xen/arch/x86/mm/p2m-ept.c
xen/arch/x86/mm/p2m-pt.c
xen/arch/x86/mm/p2m.c
xen/arch/x86/mm/shadow/multi.c
xen/include/asm-x86/hvm/ioreq.h
xen/include/asm-x86/p2m.h
xen/include/public/hvm/dm_op.h
xen/include/public/hvm/hvm_op.h

index 333c884dc834920c8de78f071050b69a4defd3f0..7e0da8103353d7a686ee302b60db32f57b7a8562 100644 (file)
@@ -173,9 +173,14 @@ static int modified_memory(struct domain *d,
 
 static bool allow_p2m_type_change(p2m_type_t old, p2m_type_t new)
 {
+    if ( new == p2m_ioreq_server )
+        return old == p2m_ram_rw;
+
+    if ( old == p2m_ioreq_server )
+        return new == p2m_ram_rw;
+
     return p2m_is_ram(old) ||
-           (p2m_is_hole(old) && new == p2m_mmio_dm) ||
-           (old == p2m_ioreq_server && new == p2m_ram_rw);
+           (p2m_is_hole(old) && new == p2m_mmio_dm);
 }
 
 static int set_mem_type(struct domain *d,
@@ -202,6 +207,18 @@ static int set_mem_type(struct domain *d,
          unlikely(data->mem_type == HVMMEM_unused) )
         return -EINVAL;
 
+    if ( data->mem_type  == HVMMEM_ioreq_server )
+    {
+        unsigned int flags;
+
+        if ( !hap_enabled(d) )
+            return -EOPNOTSUPP;
+
+        /* Do not change to HVMMEM_ioreq_server if no ioreq server mapped. */
+        if ( !p2m_get_ioreq_server(d, &flags) )
+            return -EINVAL;
+    }
+
     while ( iter < data->nr )
     {
         unsigned long pfn = data->first_pfn + iter;
@@ -365,6 +382,20 @@ static int dm_op(domid_t domid,
         break;
     }
 
+    case XEN_DMOP_map_mem_type_to_ioreq_server:
+    {
+        const struct xen_dm_op_map_mem_type_to_ioreq_server *data =
+            &op.u.map_mem_type_to_ioreq_server;
+
+        rc = -EOPNOTSUPP;
+        if ( !hap_enabled(d) )
+            break;
+
+        rc = hvm_map_mem_type_to_ioreq_server(d, data->id,
+                                              data->type, data->flags);
+        break;
+    }
+
     case XEN_DMOP_set_ioreq_server_state:
     {
         const struct xen_dm_op_set_ioreq_server_state *data =
index a4918e1c16ae723ca2ef097db2ad2eedad1b4b0d..09bef9d3e48aad74a9c28a26f9e1a88c511cc9ec 100644 (file)
@@ -100,6 +100,7 @@ static int hvmemul_do_io(
     uint8_t dir, bool_t df, bool_t data_is_addr, uintptr_t data)
 {
     struct vcpu *curr = current;
+    struct domain *currd = curr->domain;
     struct hvm_vcpu_io *vio = &curr->arch.hvm_vcpu.hvm_io;
     ioreq_t p = {
         .type = is_mmio ? IOREQ_TYPE_COPY : IOREQ_TYPE_PIO,
@@ -141,7 +142,7 @@ static int hvmemul_do_io(
              (p.dir != dir) ||
              (p.df != df) ||
              (p.data_is_ptr != data_is_addr) )
-            domain_crash(curr->domain);
+            domain_crash(currd);
 
         if ( data_is_addr )
             return X86EMUL_UNHANDLEABLE;
@@ -178,8 +179,60 @@ static int hvmemul_do_io(
         break;
     case X86EMUL_UNHANDLEABLE:
     {
-        struct hvm_ioreq_server *s =
-            hvm_select_ioreq_server(curr->domain, &p);
+        /*
+         * Xen isn't emulating the instruction internally, so see if there's
+         * an ioreq server that can handle it.
+         *
+         * Rules:
+         * A> PIO or MMIO accesses run through hvm_select_ioreq_server() to
+         * choose the ioreq server by range. If no server is found, the access
+         * is ignored.
+         *
+         * B> p2m_ioreq_server accesses are handled by the designated
+         * ioreq server for the domain, but there are some corner cases:
+         *
+         *   - If the domain ioreq server is NULL, it's likely we suffer from
+         *   a race with an unmap operation on the ioreq server, so re-try the
+         *   instruction.
+         *
+         * Note: Even when an ioreq server is found, its value could become
+         * stale later, because it is possible that
+         *
+         *   - the PIO or MMIO address is removed from the rangeset of the
+         *   ioreq server, before the event is delivered to the device model.
+         *
+         *   - the p2m_ioreq_server type is unmapped from the ioreq server,
+         *   before the event is delivered to the device model.
+         *
+         * However, there's no cheap approach to avoid above situations in xen,
+         * so the device model side needs to check the incoming ioreq event.
+         */
+        struct hvm_ioreq_server *s = NULL;
+        p2m_type_t p2mt = p2m_invalid;
+
+        if ( is_mmio )
+        {
+            unsigned long gmfn = paddr_to_pfn(addr);
+
+            get_gfn_query_unlocked(currd, gmfn, &p2mt);
+
+            if ( p2mt == p2m_ioreq_server )
+            {
+                unsigned int flags;
+
+                s = p2m_get_ioreq_server(currd, &flags);
+
+                if ( s == NULL )
+                {
+                    rc = X86EMUL_RETRY;
+                    vio->io_req.state = STATE_IOREQ_NONE;
+                    break;
+                }
+            }
+        }
+
+        if ( !s )
+            s = hvm_select_ioreq_server(currd, &p);
 
         /* If there is no suitable backing DM, just ignore accesses */
         if ( !s )
@@ -190,7 +243,7 @@ static int hvmemul_do_io(
         else
         {
             rc = hvm_send_ioreq(s, &p, 0);
-            if ( rc != X86EMUL_RETRY || curr->domain->is_shutting_down )
+            if ( rc != X86EMUL_RETRY || currd->is_shutting_down )
                 vio->io_req.state = STATE_IOREQ_NONE;
             else if ( data_is_addr )
                 rc = X86EMUL_OKAY;
index ad2edad16e658be462d32db34cc66f334f5546cc..5bf3b6dc7a3f5e5a3063acbda98f7d3f39c33305 100644 (file)
@@ -753,6 +753,8 @@ int hvm_destroy_ioreq_server(struct domain *d, ioservid_t id)
 
         domain_pause(d);
 
+        p2m_set_ioreq_server(d, 0, s);
+
         hvm_ioreq_server_disable(s, 0);
 
         list_del(&s->list_entry);
@@ -914,6 +916,48 @@ int hvm_unmap_io_range_from_ioreq_server(struct domain *d, ioservid_t id,
     return rc;
 }
 
+/*
+ * Map or unmap an ioreq server to specific memory type. For now, only
+ * HVMMEM_ioreq_server is supported, and in the future new types can be
+ * introduced, e.g. HVMMEM_ioreq_serverX mapped to ioreq server X. And
+ * currently, only write operations are to be forwarded to an ioreq server.
+ * Support for the emulation of read operations can be added when an ioreq
+ * server has such requirement in the future.
+ */
+int hvm_map_mem_type_to_ioreq_server(struct domain *d, ioservid_t id,
+                                     uint32_t type, uint32_t flags)
+{
+    struct hvm_ioreq_server *s;
+    int rc;
+
+    if ( type != HVMMEM_ioreq_server )
+        return -EINVAL;
+
+    if ( flags & ~XEN_DMOP_IOREQ_MEM_ACCESS_WRITE )
+        return -EINVAL;
+
+    spin_lock_recursive(&d->arch.hvm_domain.ioreq_server.lock);
+
+    rc = -ENOENT;
+    list_for_each_entry ( s,
+                          &d->arch.hvm_domain.ioreq_server.list,
+                          list_entry )
+    {
+        if ( s == d->arch.hvm_domain.default_ioreq_server )
+            continue;
+
+        if ( s->id == id )
+        {
+            rc = p2m_set_ioreq_server(d, flags, s);
+            break;
+        }
+    }
+
+    spin_unlock_recursive(&d->arch.hvm_domain.ioreq_server.lock);
+
+    return rc;
+}
+
 int hvm_set_ioreq_server_state(struct domain *d, ioservid_t id,
                                bool_t enabled)
 {
index 568944f309761eba1ac90cb81d2bca41e5113384..cc1eb217baf5c0b0f117ab5ccb8f5009aae82cdf 100644 (file)
@@ -131,6 +131,13 @@ static void ept_p2m_type_to_flags(struct p2m_domain *p2m, ept_entry_t *entry,
             entry->r = entry->w = entry->x = 1;
             entry->a = entry->d = !!cpu_has_vmx_ept_ad;
             break;
+        case p2m_ioreq_server:
+            entry->r = 1;
+            entry->w = !(p2m->ioreq.flags & XEN_DMOP_IOREQ_MEM_ACCESS_WRITE);
+            entry->x = 0;
+            entry->a = !!cpu_has_vmx_ept_ad;
+            entry->d = entry->w && entry->a;
+            break;
         case p2m_mmio_direct:
             entry->r = entry->x = 1;
             entry->w = !rangeset_contains_singleton(mmio_ro_ranges,
@@ -170,7 +177,6 @@ static void ept_p2m_type_to_flags(struct p2m_domain *p2m, ept_entry_t *entry,
             entry->a = entry->d = !!cpu_has_vmx_ept_ad;
             break;
         case p2m_grant_map_ro:
-        case p2m_ioreq_server:
             entry->r = 1;
             entry->w = entry->x = 0;
             entry->a = !!cpu_has_vmx_ept_ad;
index 268b2320a1b44098de8ffd51a320460982fde62b..c0055f3ab5e55b395fcbdd73c32cd05b2f9af2e9 100644 (file)
@@ -70,7 +70,9 @@ static const unsigned long pgt[] = {
     PGT_l3_page_table
 };
 
-static unsigned long p2m_type_to_flags(p2m_type_t t, mfn_t mfn,
+static unsigned long p2m_type_to_flags(const struct p2m_domain *p2m,
+                                       p2m_type_t t,
+                                       mfn_t mfn,
                                        unsigned int level)
 {
     unsigned long flags;
@@ -92,8 +94,12 @@ static unsigned long p2m_type_to_flags(p2m_type_t t, mfn_t mfn,
     default:
         return flags | _PAGE_NX_BIT;
     case p2m_grant_map_ro:
-    case p2m_ioreq_server:
         return flags | P2M_BASE_FLAGS | _PAGE_NX_BIT;
+    case p2m_ioreq_server:
+        flags |= P2M_BASE_FLAGS | _PAGE_RW | _PAGE_NX_BIT;
+        if ( p2m->ioreq.flags & XEN_DMOP_IOREQ_MEM_ACCESS_WRITE )
+            return flags & ~_PAGE_RW;
+        return flags;
     case p2m_ram_ro:
     case p2m_ram_logdirty:
     case p2m_ram_shared:
@@ -440,7 +446,8 @@ static int do_recalc(struct p2m_domain *p2m, unsigned long gfn)
             p2m_type_t p2mt = p2m_is_logdirty_range(p2m, gfn & mask, gfn | ~mask)
                               ? p2m_ram_logdirty : p2m_ram_rw;
             unsigned long mfn = l1e_get_pfn(e);
-            unsigned long flags = p2m_type_to_flags(p2mt, _mfn(mfn), level);
+            unsigned long flags = p2m_type_to_flags(p2m, p2mt,
+                                                    _mfn(mfn), level);
 
             if ( level )
             {
@@ -578,7 +585,7 @@ p2m_pt_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn,
         ASSERT(!mfn_valid(mfn) || p2mt != p2m_mmio_direct);
         l3e_content = mfn_valid(mfn) || p2m_allows_invalid_mfn(p2mt)
             ? l3e_from_pfn(mfn_x(mfn),
-                           p2m_type_to_flags(p2mt, mfn, 2) | _PAGE_PSE)
+                           p2m_type_to_flags(p2m, p2mt, mfn, 2) | _PAGE_PSE)
             : l3e_empty();
         entry_content.l1 = l3e_content.l3;
 
@@ -615,7 +622,7 @@ p2m_pt_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn,
 
         if ( mfn_valid(mfn) || p2m_allows_invalid_mfn(p2mt) )
             entry_content = p2m_l1e_from_pfn(mfn_x(mfn),
-                                             p2m_type_to_flags(p2mt, mfn, 0));
+                                         p2m_type_to_flags(p2m, p2mt, mfn, 0));
         else
             entry_content = l1e_empty();
 
@@ -652,7 +659,7 @@ p2m_pt_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn,
         ASSERT(!mfn_valid(mfn) || p2mt != p2m_mmio_direct);
         if ( mfn_valid(mfn) || p2m_allows_invalid_mfn(p2mt) )
             l2e_content = l2e_from_pfn(mfn_x(mfn),
-                                       p2m_type_to_flags(p2mt, mfn, 1) |
+                                       p2m_type_to_flags(p2m, p2mt, mfn, 1) |
                                        _PAGE_PSE);
         else
             l2e_content = l2e_empty();
index d38004c3e0bd390e12621a3ca795aa0c092cdd6a..b84add035eab7c3e364765877df93262be579bc1 100644 (file)
@@ -82,6 +82,8 @@ static int p2m_initialise(struct domain *d, struct p2m_domain *p2m)
     else
         p2m_pt_init(p2m);
 
+    spin_lock_init(&p2m->ioreq.lock);
+
     return ret;
 }
 
@@ -286,6 +288,62 @@ void p2m_memory_type_changed(struct domain *d)
     }
 }
 
+int p2m_set_ioreq_server(struct domain *d,
+                         unsigned int flags,
+                         struct hvm_ioreq_server *s)
+{
+    struct p2m_domain *p2m = p2m_get_hostp2m(d);
+    int rc;
+
+    /*
+     * Use lock to prevent concurrent setting attempts
+     * from multiple ioreq servers.
+     */
+    spin_lock(&p2m->ioreq.lock);
+
+    /* Unmap ioreq server from p2m type by passing flags with 0. */
+    if ( flags == 0 )
+    {
+        rc = -EINVAL;
+        if ( p2m->ioreq.server != s )
+            goto out;
+
+        p2m->ioreq.server = NULL;
+        p2m->ioreq.flags = 0;
+    }
+    else
+    {
+        rc = -EBUSY;
+        if ( p2m->ioreq.server != NULL )
+            goto out;
+
+        p2m->ioreq.server = s;
+        p2m->ioreq.flags = flags;
+    }
+
+    rc = 0;
+
+ out:
+    spin_unlock(&p2m->ioreq.lock);
+
+    return rc;
+}
+
+struct hvm_ioreq_server *p2m_get_ioreq_server(struct domain *d,
+                                              unsigned int *flags)
+{
+    struct p2m_domain *p2m = p2m_get_hostp2m(d);
+    struct hvm_ioreq_server *s;
+
+    spin_lock(&p2m->ioreq.lock);
+
+    s = p2m->ioreq.server;
+    *flags = p2m->ioreq.flags;
+
+    spin_unlock(&p2m->ioreq.lock);
+    return s;
+}
+
 void p2m_enable_hardware_log_dirty(struct domain *d)
 {
     struct p2m_domain *p2m = p2m_get_hostp2m(d);
index 4798c93bc6a87e4fb016b5e3afb892836a2c7d79..5195d614d129e930799c0b8dfbca3d4b1fb68418 100644 (file)
@@ -3306,8 +3306,7 @@ static int sh_page_fault(struct vcpu *v,
     }
 
     /* Need to hand off device-model MMIO to the device model */
-    if ( p2mt == p2m_mmio_dm
-         || (p2mt == p2m_ioreq_server && ft == ft_demand_write) )
+    if ( p2mt == p2m_mmio_dm )
     {
         gpa = guest_walk_to_gpa(&gw);
         goto mmio;
index fbf2c749289af1651a9ce4bc6af17a0269658242..b43667a36794c091a5fc08bf875aeee8d81b707d 100644 (file)
@@ -37,6 +37,8 @@ int hvm_map_io_range_to_ioreq_server(struct domain *d, ioservid_t id,
 int hvm_unmap_io_range_from_ioreq_server(struct domain *d, ioservid_t id,
                                          uint32_t type, uint64_t start,
                                          uint64_t end);
+int hvm_map_mem_type_to_ioreq_server(struct domain *d, ioservid_t id,
+                                     uint32_t type, uint32_t flags);
 int hvm_set_ioreq_server_state(struct domain *d, ioservid_t id,
                                bool_t enabled);
 
index bc189d1f79e33b6abd0b421b5eb079bb2dc265bc..452162072dcf15241c8b373fc89d10b7c0803a38 100644 (file)
@@ -89,7 +89,8 @@ typedef unsigned int p2m_query_t;
                        | p2m_to_mask(p2m_ram_paging_out)      \
                        | p2m_to_mask(p2m_ram_paged)           \
                        | p2m_to_mask(p2m_ram_paging_in)       \
-                       | p2m_to_mask(p2m_ram_shared))
+                       | p2m_to_mask(p2m_ram_shared)          \
+                       | p2m_to_mask(p2m_ioreq_server))
 
 /* Types that represent a physmap hole that is ok to replace with a shared
  * entry */
@@ -111,8 +112,7 @@ typedef unsigned int p2m_query_t;
 #define P2M_RO_TYPES (p2m_to_mask(p2m_ram_logdirty)     \
                       | p2m_to_mask(p2m_ram_ro)         \
                       | p2m_to_mask(p2m_grant_map_ro)   \
-                      | p2m_to_mask(p2m_ram_shared)     \
-                      | p2m_to_mask(p2m_ioreq_server))
+                      | p2m_to_mask(p2m_ram_shared))
 
 /* Write-discard types, which should discard the write operations */
 #define P2M_DISCARD_WRITE_TYPES (p2m_to_mask(p2m_ram_ro)     \
@@ -336,6 +336,20 @@ struct p2m_domain {
         struct ept_data ept;
         /* NPT-equivalent structure could be added here. */
     };
+
+     struct {
+         spinlock_t lock;
+         /*
+          * ioreq server who's responsible for the emulation of
+          * gfns with specific p2m type(for now, p2m_ioreq_server).
+          */
+         struct hvm_ioreq_server *server;
+         /*
+          * flags specifies whether read, write or both operations
+          * are to be emulated by an ioreq server.
+          */
+         unsigned int flags;
+     } ioreq;
 };
 
 /* get host p2m table */
@@ -827,6 +841,11 @@ static inline unsigned int p2m_get_iommu_flags(p2m_type_t p2mt, mfn_t mfn)
     return flags;
 }
 
+int p2m_set_ioreq_server(struct domain *d, unsigned int flags,
+                         struct hvm_ioreq_server *s);
+struct hvm_ioreq_server *p2m_get_ioreq_server(struct domain *d,
+                                              unsigned int *flags);
+
 #endif /* _XEN_ASM_X86_P2M_H */
 
 /*
index f54cece6fa72d148c3ff162c734c11b717b62f61..5ea79ef0e6eb81e8c3fe009abda22fdab2b84519 100644 (file)
@@ -318,6 +318,32 @@ struct xen_dm_op_inject_msi {
     uint64_aligned_t addr;
 };
 
+/*
+ * XEN_DMOP_map_mem_type_to_ioreq_server : map or unmap the IOREQ Server <id>
+ *                                      to specific memory type <type>
+ *                                      for specific accesses <flags>
+ *
+ * For now, flags only accept the value of XEN_DMOP_IOREQ_MEM_ACCESS_WRITE,
+ * which means only write operations are to be forwarded to an ioreq server.
+ * Support for the emulation of read operations can be added when an ioreq
+ * server has such requirement in future.
+ */
+#define XEN_DMOP_map_mem_type_to_ioreq_server 15
+
+struct xen_dm_op_map_mem_type_to_ioreq_server {
+    ioservid_t id;      /* IN - ioreq server id */
+    uint16_t type;      /* IN - memory type */
+    uint32_t flags;     /* IN - types of accesses to be forwarded to the
+                           ioreq server. flags with 0 means to unmap the
+                           ioreq server */
+
+#define XEN_DMOP_IOREQ_MEM_ACCESS_READ (1u << 0)
+#define XEN_DMOP_IOREQ_MEM_ACCESS_WRITE (1u << 1)
+
+    uint64_t opaque;    /* IN/OUT - only used for hypercall continuation,
+                           has to be set to zero by the caller */
+};
+
 struct xen_dm_op {
     uint32_t op;
     uint32_t pad;
@@ -336,6 +362,8 @@ struct xen_dm_op {
         struct xen_dm_op_set_mem_type set_mem_type;
         struct xen_dm_op_inject_event inject_event;
         struct xen_dm_op_inject_msi inject_msi;
+        struct xen_dm_op_map_mem_type_to_ioreq_server
+                map_mem_type_to_ioreq_server;
     } u;
 };
 
index bc00ef0e652a5f23f5a61d34ca6c346e9adafcad..0bdafdf59adcfdccb46688ebe0dcd77b5253e6dc 100644 (file)
@@ -93,7 +93,13 @@ typedef enum {
     HVMMEM_unused,             /* Placeholder; setting memory to this type
                                   will fail for code after 4.7.0 */
 #endif
-    HVMMEM_ioreq_server
+    HVMMEM_ioreq_server        /* Memory type claimed by an ioreq server; type
+                                  changes to this value are only allowed after
+                                  an ioreq server has claimed its ownership.
+                                  Only pages with HVMMEM_ram_rw are allowed to
+                                  change to this type; conversely, pages with
+                                  this type are only allowed to be changed back
+                                  to HVMMEM_ram_rw. */
 } hvmmem_type_t;
 
 /* Hint from PV drivers for pagetable destruction. */