Set/Way operations are used to perform maintenance on a given cache.
At the moment, Set/Way operations are not trapped and therefore a guest
OS will directly act on the local cache. However, a vCPU may migrate to
another pCPU in the middle of the processor. This will result to have
cache with stale data (Set/Way are not propagated) potentially causing
crash. This may be the cause of heisenbug noticed in Osstest [1].
Furthermore, Set/Way operations are not available on system cache. This
means that OS, such as Linux 32-bit, relying on those operations to
fully clean the cache before disabling MMU may break because data may
sits in system caches and not in RAM.
For more details about Set/Way, see the talk "The Art of Virtualizing
Cache Maintenance" given at Xen Summit 2018 [2].
In the context of Xen, we need to trap Set/Way operations and emulate
them. From the Arm Arm (B1.14.4 in DDI 046C.c), Set/Way operations are
difficult to virtualized. So we can assume that a guest OS using them will
suffer the consequence (i.e slowness) until developer removes all the usage
of Set/Way.
As the software is not allowed to infer the Set/Way to Physical Address
mapping, Xen will need to go through the guest P2M and clean &
invalidate all the entries mapped.
Because Set/Way happen in batch (a loop on all Set/Way of a cache), Xen
would need to go through the P2M for every instructions. This is quite
expensive and would severely impact the guest OS. The implementation is
re-using the KVM policy to limit the number of flush:
- If we trap a Set/Way operations, we enable VM trapping (i.e
HVC_EL2.TVM) to detect cache being turned on/off, and do a full
clean.
- We clean the caches when turning on and off
- Once the caches are enabled, we stop trapping VM instructions
[1] https://lists.xenproject.org/archives/html/xen-devel/2017-09/msg03191.html
[2] https://fr.slideshare.net/xen_com_mgr/virtualizing-cache
Signed-off-by: Julien Grall <julien.grall@arm.com>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
static bool vreg_emulate_##reg(struct cpu_user_regs *regs, \
uint64_t *r, bool read) \
{ \
+ struct vcpu *v = current; \
+ bool cache_enabled = vcpu_has_cache_enabled(v); \
+ \
GUEST_BUG_ON(read); \
WRITE_SYSREG64(*r, reg); \
\
+ p2m_toggle_cache(v, cache_enabled); \
+ \
return true; \
}
set_user_reg(regs, regidx, v->arch.actlr);
break;
+ /*
+ * HCR_EL2.TSW
+ *
+ * ARMv8 (DDI 0487B.b): Table D1-42
+ */
+ case HSR_SYSREG_DCISW:
+ case HSR_SYSREG_DCCSW:
+ case HSR_SYSREG_DCCISW:
+ if ( !hsr.sysreg.read )
+ p2m_set_way_flush(current);
+ break;
+
/*
* HCR_EL2.TVM
*
#include <xen/iocap.h>
#include <xen/lib.h>
#include <xen/sched.h>
+#include <xen/softirq.h>
#include <asm/event.h>
#include <asm/flushtlb.h>
return rc;
}
+/*
+ * Clean & invalidate RAM associated to the guest vCPU.
+ *
+ * The function can only work with the current vCPU and should be called
+ * with IRQ enabled as the vCPU could get preempted.
+ */
+void p2m_flush_vm(struct vcpu *v)
+{
+ int rc;
+ gfn_t start = _gfn(0);
+
+ ASSERT(v == current);
+ ASSERT(local_irq_is_enabled());
+ ASSERT(v->arch.need_flush_to_ram);
+
+ do
+ {
+ rc = p2m_cache_flush_range(v->domain, &start, _gfn(ULONG_MAX));
+ if ( rc == -ERESTART )
+ do_softirq();
+ } while ( rc == -ERESTART );
+
+ if ( rc != 0 )
+ gprintk(XENLOG_WARNING,
+ "P2M has not been correctly cleaned (rc = %d)\n",
+ rc);
+
+ v->arch.need_flush_to_ram = false;
+}
+
+/*
+ * See note at ARMv7 ARM B1.14.4 (DDI 0406C.c) (TL;DR: S/W ops are not
+ * easily virtualized).
+ *
+ * Main problems:
+ * - S/W ops are local to a CPU (not broadcast)
+ * - We have line migration behind our back (speculation)
+ * - System caches don't support S/W at all (damn!)
+ *
+ * In the face of the above, the best we can do is to try and convert
+ * S/W ops to VA ops. Because the guest is not allowed to infer the S/W
+ * to PA mapping, it can only use S/W to nuke the whole cache, which is
+ * rather a good thing for us.
+ *
+ * Also, it is only used when turning caches on/off ("The expected
+ * usage of the cache maintenance instructions that operate by set/way
+ * is associated with the powerdown and powerup of caches, if this is
+ * required by the implementation.").
+ *
+ * We use the following policy:
+ * - If we trap a S/W operation, we enabled VM trapping to detect
+ * caches being turned on/off, and do a full clean.
+ *
+ * - We flush the caches on both caches being turned on and off.
+ *
+ * - Once the caches are enabled, we stop trapping VM ops.
+ */
+void p2m_set_way_flush(struct vcpu *v)
+{
+ /* This function can only work with the current vCPU. */
+ ASSERT(v == current);
+
+ if ( !(v->arch.hcr_el2 & HCR_TVM) )
+ {
+ v->arch.need_flush_to_ram = true;
+ vcpu_hcr_set_flags(v, HCR_TVM);
+ }
+}
+
+void p2m_toggle_cache(struct vcpu *v, bool was_enabled)
+{
+ bool now_enabled = vcpu_has_cache_enabled(v);
+
+ /* This function can only work with the current vCPU. */
+ ASSERT(v == current);
+
+ /*
+ * If switching the MMU+caches on, need to invalidate the caches.
+ * If switching it off, need to clean the caches.
+ * Clean + invalidate does the trick always.
+ */
+ if ( was_enabled != now_enabled )
+ v->arch.need_flush_to_ram = true;
+
+ /* Caches are now on, stop trapping VM ops (until a S/W op) */
+ if ( now_enabled )
+ vcpu_hcr_clear_flags(v, HCR_TVM);
+}
+
mfn_t gfn_to_mfn(struct domain *d, gfn_t gfn)
{
return p2m_lookup(d, gfn, NULL);
{
return (HCR_PTW|HCR_BSU_INNER|HCR_AMO|HCR_IMO|HCR_FMO|HCR_VM|
(vwfi != NATIVE ? (HCR_TWI|HCR_TWE) : 0) |
- HCR_TSC|HCR_TAC|HCR_SWIO|HCR_TIDCP|HCR_FB);
+ HCR_TSC|HCR_TAC|HCR_SWIO|HCR_TIDCP|HCR_FB|HCR_TSW);
}
static enum {
}
}
+/*
+ * Process pending work for the vCPU. Any call should be fast or
+ * implement preemption.
+ */
+static void check_for_vcpu_work(void)
+{
+ struct vcpu *v = current;
+
+ if ( likely(!v->arch.need_flush_to_ram) )
+ return;
+
+ /*
+ * Give a chance for the pCPU to process work before handling the vCPU
+ * pending work.
+ */
+ check_for_pcpu_work();
+
+ local_irq_enable();
+ p2m_flush_vm(v);
+ local_irq_disable();
+}
+
void leave_hypervisor_tail(void)
{
local_irq_disable();
+ check_for_vcpu_work();
check_for_pcpu_work();
vgic_sync_to_lrs();
#define TVM_REG(sz, func, reg...) \
static bool func(struct cpu_user_regs *regs, uint##sz##_t *r, bool read) \
{ \
+ struct vcpu *v = current; \
+ bool cache_enabled = vcpu_has_cache_enabled(v); \
+ \
GUEST_BUG_ON(read); \
WRITE_SYSREG##sz(*r, reg); \
\
+ p2m_toggle_cache(v, cache_enabled); \
+ \
return true; \
}
static bool vreg_emulate_##xreg(struct cpu_user_regs *regs, uint32_t *r, \
bool read, bool hi) \
{ \
+ struct vcpu *v = current; \
+ bool cache_enabled = vcpu_has_cache_enabled(v); \
register_t reg = READ_SYSREG(xreg); \
\
GUEST_BUG_ON(read); \
} \
WRITE_SYSREG(reg, xreg); \
\
+ p2m_toggle_cache(v, cache_enabled); \
+ \
return true; \
} \
\
set_user_reg(regs, regidx, v->arch.actlr);
break;
+ /*
+ * HCR_EL2.TSW
+ *
+ * ARMv7 (DDI 0406C.b): B1.14.6
+ * ARMv8 (DDI 0487B.b): Table D1-42
+ */
+ case HSR_CPREG32(DCISW):
+ case HSR_CPREG32(DCCSW):
+ case HSR_CPREG32(DCCISW):
+ if ( !cp32.read )
+ p2m_set_way_flush(current);
+ break;
+
/*
* HCR_EL2.TVM
*
struct vtimer phys_timer;
struct vtimer virt_timer;
bool vtimer_initialized;
+
+ /*
+ * The full P2M may require some cleaning (e.g when emulation
+ * set/way). As the action can take a long time, it requires
+ * preemption. It is deferred until we return to guest, where we can
+ * more easily check for softirqs and preempt the vCPU safely.
+ */
+ bool need_flush_to_ram;
+
} __cacheline_aligned;
void vcpu_show_execution_state(struct vcpu *);
#include <xen/rwlock.h>
#include <xen/mem_access.h>
+#include <asm/current.h>
+
#define paddr_bits PADDR_BITS
/* Holds the bit size of IPAs in p2m tables. */
*/
int p2m_cache_flush_range(struct domain *d, gfn_t *pstart, gfn_t end);
+void p2m_set_way_flush(struct vcpu *v);
+
+void p2m_toggle_cache(struct vcpu *v, bool was_enabled);
+
+void p2m_flush_vm(struct vcpu *v);
+
/*
* Map a region in the guest p2m with a specific p2m type.
* The memory attributes will be derived from the p2m type.
return -EOPNOTSUPP;
}
+/*
+ * A vCPU has cache enabled only when the MMU is enabled and data cache
+ * is enabled.
+ */
+static inline bool vcpu_has_cache_enabled(struct vcpu *v)
+{
+ /* Only works with the current vCPU */
+ ASSERT(current == v);
+
+ return (READ_SYSREG32(SCTLR_EL1) & (SCTLR_C|SCTLR_M)) == (SCTLR_C|SCTLR_M);
+}
+
#endif /* _XEN_P2M_H */
/*