#include <xen/errno.h>
#include <xen/domain_page.h>
#include <xen/bitops.h>
+#include <xen/vm_event.h>
+#include <xen/mem_access.h>
+#include <public/vm_event.h>
#include <asm/flushtlb.h>
#include <asm/gic.h>
#include <asm/event.h>
return 0;
}
+static int __p2m_get_mem_access(struct domain *d, unsigned long gpfn,
+ xenmem_access_t *access)
+{
+ struct p2m_domain *p2m = p2m_get_hostp2m(d);
+ void *i;
+ unsigned int index;
+
+ static const xenmem_access_t memaccess[] = {
+#define ACCESS(ac) [p2m_access_##ac] = XENMEM_access_##ac
+ ACCESS(n),
+ ACCESS(r),
+ ACCESS(w),
+ ACCESS(rw),
+ ACCESS(x),
+ ACCESS(rx),
+ ACCESS(wx),
+ ACCESS(rwx),
+ ACCESS(rx2rw),
+ ACCESS(n2rwx),
+#undef ACCESS
+ };
+
+ /* If no setting was ever set, just return rwx. */
+ if ( !p2m->mem_access_enabled )
+ {
+ *access = XENMEM_access_rwx;
+ return 0;
+ }
+
+ /* If request to get default access */
+ if ( gpfn == ~0ul )
+ {
+ *access = memaccess[p2m->default_access];
+ return 0;
+ }
+
+ i = radix_tree_lookup(&p2m->mem_access_settings, gpfn);
+
+ if ( !i )
+ {
+ /*
+ * No setting was found in the Radix tree. Check if the
+ * entry exists in the page-tables.
+ */
+ paddr_t maddr = p2m_lookup(d, gpfn << PAGE_SHIFT, NULL);
+ if ( INVALID_PADDR == maddr )
+ return -ESRCH;
+
+ /* If entry exists then its rwx. */
+ *access = XENMEM_access_rwx;
+ }
+ else
+ {
+ /* Setting was found in the Radix tree. */
+ index = radix_tree_ptr_to_int(i);
+ if ( index >= ARRAY_SIZE(memaccess) )
+ return -ERANGE;
+
+ *access = memaccess[index];
+ }
+
+ return 0;
+}
+
+static int p2m_mem_access_radix_set(struct p2m_domain *p2m, unsigned long pfn,
+ p2m_access_t a)
+{
+ int rc;
+
+ if ( !p2m->mem_access_enabled )
+ return 0;
+
+ if ( p2m_access_rwx == a )
+ {
+ radix_tree_delete(&p2m->mem_access_settings, pfn);
+ return 0;
+ }
+
+ rc = radix_tree_insert(&p2m->mem_access_settings, pfn,
+ radix_tree_int_to_ptr(a));
+ if ( rc == -EEXIST )
+ {
+ /* If a setting already exists, change it to the new one */
+ radix_tree_replace_slot(
+ radix_tree_lookup_slot(
+ &p2m->mem_access_settings, pfn),
+ radix_tree_int_to_ptr(a));
+ rc = 0;
+ }
+
+ return rc;
+}
+
enum p2m_operation {
INSERT,
ALLOCATE,
REMOVE,
RELINQUISH,
CACHEFLUSH,
+ MEMACCESS,
};
/* Put any references on the single 4K page referenced by pte. TODO:
if ( p2m_valid(orig_pte) )
return P2M_ONE_DESCEND;
- if ( is_mapping_aligned(*addr, end_gpaddr, 0, level_size) )
+ if ( is_mapping_aligned(*addr, end_gpaddr, 0, level_size) &&
+ /* We only create superpages when mem_access is not in use. */
+ (level == 3 || (level < 3 && !p2m->mem_access_enabled)) )
{
struct page_info *page;
page = alloc_domheap_pages(d, level_shift - PAGE_SHIFT, 0);
if ( page )
{
+ rc = p2m_mem_access_radix_set(p2m, paddr_to_pfn(*addr), a);
+ if ( rc < 0 )
+ {
+ free_domheap_page(page);
+ return rc;
+ }
+
pte = mfn_to_p2m_entry(page_to_mfn(page), mattr, t, a);
if ( level < 3 )
pte.p2m.table = 0;
/*
* If we get here then we failed to allocate a sufficiently
* large contiguous region for this level (which can't be
- * L3). Create a page table and continue to descend so we try
- * smaller allocations.
+ * L3) or mem_access is in use. Create a page table and
+ * continue to descend so we try smaller allocations.
*/
rc = p2m_create_table(d, entry, 0, flush_cache);
if ( rc < 0 )
case INSERT:
if ( is_mapping_aligned(*addr, end_gpaddr, *maddr, level_size) &&
- /* We do not handle replacing an existing table with a superpage */
- (level == 3 || !p2m_table(orig_pte)) )
+ /*
+ * We do not handle replacing an existing table with a superpage
+ * or when mem_access is in use.
+ */
+ (level == 3 || (!p2m_table(orig_pte) && !p2m->mem_access_enabled)) )
{
+ rc = p2m_mem_access_radix_set(p2m, paddr_to_pfn(*addr), a);
+ if ( rc < 0 )
+ return rc;
+
/* New mapping is superpage aligned, make it */
pte = mfn_to_p2m_entry(*maddr >> PAGE_SHIFT, mattr, t, a);
if ( level < 3 )
memset(&pte, 0x00, sizeof(pte));
p2m_write_pte(entry, pte, flush_cache);
+ p2m_mem_access_radix_set(p2m, paddr_to_pfn(*addr), p2m_access_rwx);
*addr += level_size;
*maddr += level_size;
*addr += PAGE_SIZE;
return P2M_ONE_PROGRESS_NOP;
}
+
+ case MEMACCESS:
+ if ( level < 3 )
+ {
+ if ( !p2m_valid(orig_pte) )
+ {
+ *addr += level_size;
+ return P2M_ONE_PROGRESS_NOP;
+ }
+
+ /* Shatter large pages as we descend */
+ if ( p2m_mapping(orig_pte) )
+ {
+ rc = p2m_shatter_page(d, entry, level, flush_cache);
+ if ( rc < 0 )
+ return rc;
+ } /* else: an existing table mapping -> descend */
+
+ return P2M_ONE_DESCEND;
+ }
+ else
+ {
+ pte = orig_pte;
+
+ if ( p2m_valid(pte) )
+ {
+ rc = p2m_mem_access_radix_set(p2m, paddr_to_pfn(*addr), a);
+ if ( rc < 0 )
+ return rc;
+
+ p2m_set_permission(&pte, pte.p2m.type, a);
+ p2m_write_pte(entry, pte, flush_cache);
+ }
+
+ *addr += level_size;
+ *flush = true;
+ return P2M_ONE_PROGRESS;
+ }
}
BUG(); /* Should never get here */
unsigned int cur_root_table = ~0;
unsigned int cur_offset[4] = { ~0, ~0, ~0, ~0 };
unsigned int count = 0;
+ const unsigned long sgfn = paddr_to_pfn(start_gpaddr),
+ egfn = paddr_to_pfn(end_gpaddr);
+ const unsigned int preempt_count_limit = (op == MEMACCESS) ? 1 : 0x2000;
bool_t flush = false;
bool_t flush_pt;
};
/*
- * Arbitrarily, preempt every 512 operations or 8192 nops.
- * 512*P2M_ONE_PROGRESS == 8192*P2M_ONE_PROGRESS_NOP == 0x2000
- *
- * count is initialised to 0 above, so we are guaranteed to
- * always make at least one pass.
+ * Check if current iteration should be possibly preempted.
+ * Since count is initialised to 0 above we are guaranteed to
+ * always make at least one pass as long as preempt_count_limit is
+ * initialized with a value >= 1.
*/
-
- if ( op == RELINQUISH && count >= 0x2000 )
+ if ( count >= preempt_count_limit && hypercall_preempt_check() )
{
- if ( hypercall_preempt_check() )
+ switch ( op )
{
+ case RELINQUISH:
+ /*
+ * Arbitrarily, preempt every 512 operations or 8192 nops.
+ * 512*P2M_ONE_PROGRESS == 8192*P2M_ONE_PROGRESS_NOP == 0x2000
+ * This is set in preempt_count_limit.
+ *
+ */
p2m->lowest_mapped_gfn = addr >> PAGE_SHIFT;
rc = -ERESTART;
goto out;
+
+ case MEMACCESS:
+ {
+ /*
+ * Preempt setting mem_access permissions as required by XSA-89,
+ * if it's not the last iteration.
+ */
+ uint32_t progress = paddr_to_pfn(addr) - sgfn + 1;
+
+ if ( (egfn - sgfn) > progress && !(progress & mask) )
+ {
+ rc = progress;
+ goto tlbflush;
+ }
+ break;
}
+
+ default:
+ break;
+ };
+
+ /*
+ * Reset current iteration counter.
+ */
count = 0;
}
}
}
- if ( flush )
- {
- unsigned long sgfn = paddr_to_pfn(start_gpaddr);
- unsigned long egfn = paddr_to_pfn(end_gpaddr);
-
- flush_tlb_domain(d);
- iommu_iotlb_flush(d, sgfn, egfn - sgfn);
- }
-
if ( op == ALLOCATE || op == INSERT )
{
- unsigned long sgfn = paddr_to_pfn(start_gpaddr);
- unsigned long egfn = paddr_to_pfn(end_gpaddr);
-
p2m->max_mapped_gfn = max(p2m->max_mapped_gfn, egfn);
p2m->lowest_mapped_gfn = min(p2m->lowest_mapped_gfn, sgfn);
}
rc = 0;
+tlbflush:
+ if ( flush )
+ {
+ flush_tlb_domain(d);
+ iommu_iotlb_flush(d, sgfn, egfn - sgfn);
+ }
+
out:
+
if ( rc < 0 && ( op == INSERT || op == ALLOCATE ) &&
addr != start_gpaddr )
{
* We do this first as this is faster in the default case when no
* permission is set on the page.
*/
- rc = p2m_get_mem_access(current->domain, paddr_to_pfn(ipa), &xma);
+ rc = __p2m_get_mem_access(current->domain, paddr_to_pfn(ipa), &xma);
if ( rc < 0 )
goto err;
smp_call_function(setup_virt_paging_one, (void *)val, 1);
}
+bool_t p2m_mem_access_check(paddr_t gpa, vaddr_t gla, const struct npfec npfec)
+{
+ int rc;
+ bool_t violation;
+ xenmem_access_t xma;
+ vm_event_request_t *req;
+ struct vcpu *v = current;
+ struct p2m_domain *p2m = p2m_get_hostp2m(v->domain);
+
+ /* Mem_access is not in use. */
+ if ( !p2m->mem_access_enabled )
+ return true;
+
+ rc = p2m_get_mem_access(v->domain, paddr_to_pfn(gpa), &xma);
+ if ( rc )
+ return true;
+
+ /* Now check for mem_access violation. */
+ switch ( xma )
+ {
+ case XENMEM_access_rwx:
+ violation = false;
+ break;
+ case XENMEM_access_rw:
+ violation = npfec.insn_fetch;
+ break;
+ case XENMEM_access_wx:
+ violation = npfec.read_access;
+ break;
+ case XENMEM_access_rx:
+ case XENMEM_access_rx2rw:
+ violation = npfec.write_access;
+ break;
+ case XENMEM_access_x:
+ violation = npfec.read_access || npfec.write_access;
+ break;
+ case XENMEM_access_w:
+ violation = npfec.read_access || npfec.insn_fetch;
+ break;
+ case XENMEM_access_r:
+ violation = npfec.write_access || npfec.insn_fetch;
+ break;
+ default:
+ case XENMEM_access_n:
+ case XENMEM_access_n2rwx:
+ violation = true;
+ break;
+ }
+
+ if ( !violation )
+ return true;
+
+ /* First, handle rx2rw and n2rwx conversion automatically. */
+ if ( npfec.write_access && xma == XENMEM_access_rx2rw )
+ {
+ rc = p2m_set_mem_access(v->domain, paddr_to_pfn(gpa), 1,
+ 0, ~0, XENMEM_access_rw);
+ return false;
+ }
+ else if ( xma == XENMEM_access_n2rwx )
+ {
+ rc = p2m_set_mem_access(v->domain, paddr_to_pfn(gpa), 1,
+ 0, ~0, XENMEM_access_rwx);
+ }
+
+ /* Otherwise, check if there is a vm_event monitor subscriber */
+ if ( !vm_event_check_ring(&v->domain->vm_event->monitor) )
+ {
+ /* No listener */
+ if ( p2m->access_required )
+ {
+ gdprintk(XENLOG_INFO, "Memory access permissions failure, "
+ "no vm_event listener VCPU %d, dom %d\n",
+ v->vcpu_id, v->domain->domain_id);
+ domain_crash(v->domain);
+ }
+ else
+ {
+ /* n2rwx was already handled */
+ if ( xma != XENMEM_access_n2rwx )
+ {
+ /* A listener is not required, so clear the access
+ * restrictions. */
+ rc = p2m_set_mem_access(v->domain, paddr_to_pfn(gpa), 1,
+ 0, ~0, XENMEM_access_rwx);
+ }
+ }
+
+ /* No need to reinject */
+ return false;
+ }
+
+ req = xzalloc(vm_event_request_t);
+ if ( req )
+ {
+ req->reason = VM_EVENT_REASON_MEM_ACCESS;
+
+ /* Pause the current VCPU */
+ if ( xma != XENMEM_access_n2rwx )
+ req->flags |= VM_EVENT_FLAG_VCPU_PAUSED;
+
+ /* Send request to mem access subscriber */
+ req->u.mem_access.gfn = gpa >> PAGE_SHIFT;
+ req->u.mem_access.offset = gpa & ((1 << PAGE_SHIFT) - 1);
+ if ( npfec.gla_valid )
+ {
+ req->u.mem_access.flags |= MEM_ACCESS_GLA_VALID;
+ req->u.mem_access.gla = gla;
+
+ if ( npfec.kind == npfec_kind_with_gla )
+ req->u.mem_access.flags |= MEM_ACCESS_FAULT_WITH_GLA;
+ else if ( npfec.kind == npfec_kind_in_gpt )
+ req->u.mem_access.flags |= MEM_ACCESS_FAULT_IN_GPT;
+ }
+ req->u.mem_access.flags |= npfec.read_access ? MEM_ACCESS_R : 0;
+ req->u.mem_access.flags |= npfec.write_access ? MEM_ACCESS_W : 0;
+ req->u.mem_access.flags |= npfec.insn_fetch ? MEM_ACCESS_X : 0;
+ req->vcpu_id = v->vcpu_id;
+
+ mem_access_send_req(v->domain, req);
+ xfree(req);
+ }
+
+ /* Pause the current VCPU */
+ if ( xma != XENMEM_access_n2rwx )
+ vm_event_vcpu_pause(v);
+
+ return false;
+}
+
+/*
+ * Set access type for a region of pfns.
+ * If start_pfn == -1ul, sets the default access type.
+ */
+long p2m_set_mem_access(struct domain *d, unsigned long pfn, uint32_t nr,
+ uint32_t start, uint32_t mask, xenmem_access_t access)
+{
+ struct p2m_domain *p2m = p2m_get_hostp2m(d);
+ p2m_access_t a;
+ long rc = 0;
+
+ static const p2m_access_t memaccess[] = {
+#define ACCESS(ac) [XENMEM_access_##ac] = p2m_access_##ac
+ ACCESS(n),
+ ACCESS(r),
+ ACCESS(w),
+ ACCESS(rw),
+ ACCESS(x),
+ ACCESS(rx),
+ ACCESS(wx),
+ ACCESS(rwx),
+ ACCESS(rx2rw),
+ ACCESS(n2rwx),
+#undef ACCESS
+ };
+
+ switch ( access )
+ {
+ case 0 ... ARRAY_SIZE(memaccess) - 1:
+ a = memaccess[access];
+ break;
+ case XENMEM_access_default:
+ a = p2m->default_access;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ /*
+ * Flip mem_access_enabled to true when a permission is set, as to prevent
+ * allocating or inserting super-pages.
+ */
+ p2m->mem_access_enabled = true;
+
+ /* If request to set default access. */
+ if ( pfn == ~0ul )
+ {
+ p2m->default_access = a;
+ return 0;
+ }
+
+ rc = apply_p2m_changes(d, MEMACCESS,
+ pfn_to_paddr(pfn+start), pfn_to_paddr(pfn+nr),
+ 0, MATTR_MEM, mask, 0, a);
+ if ( rc < 0 )
+ return rc;
+ else if ( rc > 0 )
+ return start + rc;
+
+ return 0;
+}
+
+int p2m_get_mem_access(struct domain *d, unsigned long gpfn,
+ xenmem_access_t *access)
+{
+ int ret;
+ struct p2m_domain *p2m = p2m_get_hostp2m(d);
+
+ spin_lock(&p2m->lock);
+ ret = __p2m_get_mem_access(d, gpfn, access);
+ spin_unlock(&p2m->lock);
+
+ return ret;
+}
+
/*
* Local variables:
* mode: C