*
* Update ref counts to shadow tables appropriately.
*/
-void update_cr3(struct vcpu *v)
+pagetable_t update_cr3(struct vcpu *v)
{
mfn_t cr3_mfn;
if ( paging_mode_enabled(v->domain) )
- {
- paging_update_cr3(v, false);
- return;
- }
+ return paging_update_cr3(v, false);
if ( !(v->arch.flags & TF_kernel_mode) )
cr3_mfn = pagetable_get_mfn(v->arch.guest_table_user);
cr3_mfn = pagetable_get_mfn(v->arch.guest_table);
make_cr3(v, cr3_mfn);
+
+ return pagetable_null();
}
static inline void set_tlbflush_timestamp(struct page_info *page)
struct domain *d = curr->domain;
int rc;
mfn_t old_base_mfn;
+ pagetable_t old_shadow;
if ( is_pv_32bit_domain(d) )
{
if ( !VM_ASSIST(d, m2p_strict) )
fill_ro_mpt(mfn);
curr->arch.guest_table = pagetable_from_mfn(mfn);
- update_cr3(curr);
+ old_shadow = update_cr3(curr);
+
+ /*
+ * In shadow mode update_cr3() can fail, in which case here we're still
+ * running on the prior top-level shadow (which we're about to release).
+ * Switch to the idle page tables in such an event; the guest will have
+ * been crashed already.
+ */
+ if ( likely(!mfn_eq(pagetable_get_mfn(old_shadow),
+ maddr_to_mfn(curr->arch.cr3 & ~X86_CR3_NOFLUSH))) )
+ write_ptbase(curr);
+ else
+ write_ptbase(idle_vcpu[curr->processor]);
- write_ptbase(curr);
+ if ( !pagetable_is_null(old_shadow) )
+ shadow_put_top_level(d, old_shadow);
if ( likely(mfn_x(old_base_mfn) != 0) )
{
return 1;
}
-static void hap_update_cr3(struct vcpu *v, int do_locking, bool noflush)
+static pagetable_t hap_update_cr3(struct vcpu *v, bool do_locking, bool noflush)
{
v->arch.hvm.hw_cr[3] = v->arch.hvm.guest_cr[3];
hvm_update_guest_cr3(v, noflush);
+
+ return pagetable_null();
}
/*
}
/* Set up the top-level shadow and install it in slot 'slot' of shadow_table */
-void sh_set_toplevel_shadow(struct vcpu *v,
- unsigned int slot,
- mfn_t gmfn,
- unsigned int root_type,
- mfn_t (*make_shadow)(struct vcpu *v,
- mfn_t gmfn,
- uint32_t shadow_type))
+pagetable_t sh_set_toplevel_shadow(struct vcpu *v,
+ unsigned int slot,
+ mfn_t gmfn,
+ unsigned int root_type,
+ mfn_t (*make_shadow)(struct vcpu *v,
+ mfn_t gmfn,
+ uint32_t shadow_type))
{
mfn_t smfn;
pagetable_t old_entry, new_entry;
mfn_x(gmfn), mfn_x(pagetable_get_mfn(new_entry)));
v->arch.paging.shadow.shadow_table[slot] = new_entry;
- /* Decrement the refcount of the old contents of this slot */
- if ( !pagetable_is_null(old_entry) )
+ /*
+ * Decrement the refcount of the old contents of this slot, unless
+ * we're still running on that shadow - in that case it'll need holding
+ * on to until the actual page table switch did occur.
+ */
+ if ( !pagetable_is_null(old_entry) && (v != current || !is_pv_domain(d)) )
{
- mfn_t old_smfn = pagetable_get_mfn(old_entry);
- /* Need to repin the old toplevel shadow if it's been unpinned
- * by shadow_prealloc(): in PV mode we're still running on this
- * shadow and it's not safe to free it yet. */
- if ( !mfn_to_page(old_smfn)->u.sh.pinned && !sh_pin(d, old_smfn) )
- {
- printk(XENLOG_G_ERR "can't re-pin %"PRI_mfn"\n", mfn_x(old_smfn));
- domain_crash(d);
- }
- sh_put_ref(d, old_smfn, 0);
+ sh_put_ref(d, pagetable_get_mfn(old_entry), 0);
+ old_entry = pagetable_null();
}
+
+ /*
+ * 2- and 3-level shadow mode is used for HVM only. Therefore we never run
+ * on such a shadow, so only call sites requesting an L4 shadow need to pay
+ * attention to the returned value.
+ */
+ ASSERT(pagetable_is_null(old_entry) || root_type == SH_type_l4_64_shadow);
+
+ return old_entry;
+}
+
+/*
+ * Helper invoked when releasing of a top-level shadow's reference was
+ * deferred in sh_set_toplevel_shadow() above.
+ */
+void shadow_put_top_level(struct domain *d, pagetable_t old_entry)
+{
+ ASSERT(!pagetable_is_null(old_entry));
+ paging_lock(d);
+ sh_put_ref(d, pagetable_get_mfn(old_entry), 0);
+ paging_unlock(d);
}
/**************************************************************************/
}
}
-static void
-sh_update_cr3(struct vcpu *v, int do_locking, bool noflush)
+static pagetable_t
+sh_update_cr3(struct vcpu *v, bool do_locking, bool noflush)
/* Updates vcpu->arch.cr3 after the guest has changed CR3.
* Paravirtual guests should set v->arch.guest_table (and guest_table_user,
* if appropriate).
{
struct domain *d = v->domain;
mfn_t gmfn;
+ pagetable_t old_entry = pagetable_null();
#if GUEST_PAGING_LEVELS == 3 && defined(CONFIG_HVM)
const guest_l3e_t *gl3e;
unsigned int i, guest_idx;
if ( is_pv_domain(d) && !v->is_initialised )
{
ASSERT(v->arch.cr3 == 0);
- return;
+ return old_entry;
}
if ( do_locking ) paging_lock(v->domain);
#if GUEST_PAGING_LEVELS == 4
if ( sh_remove_write_access(d, gmfn, 4, 0) != 0 )
guest_flush_tlb_mask(d, d->dirty_cpumask);
- sh_set_toplevel_shadow(v, 0, gmfn, SH_type_l4_shadow, sh_make_shadow);
+ old_entry = sh_set_toplevel_shadow(v, 0, gmfn, SH_type_l4_shadow,
+ sh_make_shadow);
if ( unlikely(pagetable_is_null(v->arch.paging.shadow.shadow_table[0])) )
{
ASSERT(d->is_dying || d->is_shutting_down);
- return;
+ return old_entry;
}
if ( !shadow_mode_external(d) && !is_pv_32bit_domain(d) )
{
gl2gfn = guest_l3e_get_gfn(gl3e[i]);
gl2mfn = get_gfn_query_unlocked(d, gfn_x(gl2gfn), &p2mt);
if ( p2m_is_ram(p2mt) )
- sh_set_toplevel_shadow(v, i, gl2mfn, (i == 3)
- ? SH_type_l2h_shadow
- : SH_type_l2_shadow,
- sh_make_shadow);
+ old_entry = sh_set_toplevel_shadow(v, i, gl2mfn,
+ (i == 3
+ ? SH_type_l2h_shadow
+ : SH_type_l2_shadow),
+ sh_make_shadow);
else
- sh_set_toplevel_shadow(v, i, INVALID_MFN, 0,
- sh_make_shadow);
+ old_entry = sh_set_toplevel_shadow(v, i, INVALID_MFN, 0,
+ sh_make_shadow);
}
else
- sh_set_toplevel_shadow(v, i, INVALID_MFN, 0, sh_make_shadow);
+ old_entry = sh_set_toplevel_shadow(v, i, INVALID_MFN, 0,
+ sh_make_shadow);
+
+ ASSERT(pagetable_is_null(old_entry));
}
}
#elif GUEST_PAGING_LEVELS == 2
if ( sh_remove_write_access(d, gmfn, 2, 0) != 0 )
guest_flush_tlb_mask(d, d->dirty_cpumask);
- sh_set_toplevel_shadow(v, 0, gmfn, SH_type_l2_shadow, sh_make_shadow);
+ old_entry = sh_set_toplevel_shadow(v, 0, gmfn, SH_type_l2_shadow,
+ sh_make_shadow);
+ ASSERT(pagetable_is_null(old_entry));
if ( unlikely(pagetable_is_null(v->arch.paging.shadow.shadow_table[0])) )
{
ASSERT(d->is_dying || d->is_shutting_down);
- return;
+ return old_entry;
}
#else
#error This should never happen
/* Release the lock, if we took it (otherwise it's the caller's problem) */
if ( do_locking ) paging_unlock(v->domain);
+
+ return old_entry;
}
return gfn_x(INVALID_GFN);
}
-static void _update_cr3(struct vcpu *v, int do_locking, bool noflush)
+static pagetable_t _update_cr3(struct vcpu *v, bool do_locking, bool noflush)
{
ASSERT_UNREACHABLE();
+ return pagetable_null();
}
static void _update_paging_modes(struct vcpu *v)
void shadow_free(struct domain *d, mfn_t smfn);
/* Set up the top-level shadow and install it in slot 'slot' of shadow_table */
-void sh_set_toplevel_shadow(struct vcpu *v,
- unsigned int slot,
- mfn_t gmfn,
- unsigned int root_type,
- mfn_t (*make_shadow)(struct vcpu *v,
- mfn_t gmfn,
- uint32_t shadow_type));
+pagetable_t sh_set_toplevel_shadow(struct vcpu *v,
+ unsigned int slot,
+ mfn_t gmfn,
+ unsigned int root_type,
+ mfn_t (*make_shadow)(struct vcpu *v,
+ mfn_t gmfn,
+ uint32_t shadow_type));
/* Update the shadows in response to a pagetable write from Xen */
int sh_validate_guest_entry(struct vcpu *v, mfn_t gmfn, void *entry, u32 size);
static void _toggle_guest_pt(struct vcpu *v)
{
+ bool guest_update;
+ pagetable_t old_shadow;
unsigned long cr3;
v->arch.flags ^= TF_kernel_mode;
- update_cr3(v);
+ guest_update = v->arch.flags & TF_kernel_mode;
+ old_shadow = update_cr3(v);
/*
* Don't flush user global mappings from the TLB. Don't tick TLB clock.
* TLB flush (for just the incoming PCID), as the top level page table may
* have changed behind our backs. To be on the safe side, suppress the
* no-flush unconditionally in this case.
+ *
+ * Furthermore in shadow mode update_cr3() can fail, in which case here
+ * we're still running on the prior top-level shadow (which we're about
+ * to release). Switch to the idle page tables in such an event; the
+ * guest will have been crashed already.
*/
cr3 = v->arch.cr3;
if ( shadow_mode_enabled(v->domain) )
+ {
cr3 &= ~X86_CR3_NOFLUSH;
+
+ if ( unlikely(mfn_eq(pagetable_get_mfn(old_shadow),
+ maddr_to_mfn(cr3))) )
+ {
+ cr3 = idle_vcpu[v->processor]->arch.cr3;
+ /* Also suppress runstate/time area updates below. */
+ guest_update = false;
+ }
+ }
write_cr3(cr3);
- if ( !(v->arch.flags & TF_kernel_mode) )
+ if ( !pagetable_is_null(old_shadow) )
+ shadow_put_top_level(v->domain, old_shadow);
+
+ if ( !guest_update )
return;
if ( v->arch.pv.need_update_runstate_area && update_runstate_area(v) )
#endif
void make_cr3(struct vcpu *v, mfn_t mfn);
-void update_cr3(struct vcpu *v);
+pagetable_t update_cr3(struct vcpu *v);
int vcpu_destroy_pagetables(struct vcpu *);
void *do_page_walk(struct vcpu *v, unsigned long addr);
unsigned long cr3,
paddr_t ga, uint32_t *pfec,
unsigned int *page_order);
- void (*update_cr3 )(struct vcpu *v, int do_locking,
+ pagetable_t (*update_cr3 )(struct vcpu *v, bool do_locking,
bool noflush);
void (*update_paging_modes )(struct vcpu *v);
bool (*flush_tlb )(bool (*flush_vcpu)(void *ctxt,
/* Update all the things that are derived from the guest's CR3.
* Called when the guest changes CR3; the caller can then use v->arch.cr3
* as the value to load into the host CR3 to schedule this vcpu */
-static inline void paging_update_cr3(struct vcpu *v, bool noflush)
+static inline pagetable_t paging_update_cr3(struct vcpu *v, bool noflush)
{
- paging_get_hostmode(v)->update_cr3(v, 1, noflush);
+ return paging_get_hostmode(v)->update_cr3(v, 1, noflush);
}
/* Update all the things that are derived from the guest's CR0/CR3/CR4.
int shadow_set_allocation(struct domain *d, unsigned int pages,
bool *preempted);
+/* Helper to invoke for deferred releasing of a top-level shadow's reference. */
+void shadow_put_top_level(struct domain *d, pagetable_t old);
+
#else /* !CONFIG_SHADOW_PAGING */
#define shadow_vcpu_teardown(v) ASSERT(is_pv_vcpu(v))
static inline void shadow_blow_tables_per_domain(struct domain *d) {}
+static inline void shadow_put_top_level(struct domain *d, pagetable_t old)
+{
+ ASSERT_UNREACHABLE();
+}
+
static inline int shadow_domctl(struct domain *d,
struct xen_domctl_shadow_op *sc,
XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)