#include <asm/hvm/hvm.h>
#include <asm/hvm/nestedhvm.h>
#include <asm/hvm/support.h>
+#include <asm/hvm/svm/svm.h>
#include <asm/hvm/viridian.h>
#include <asm/debugreg.h>
#include <asm/msr.h>
struct cpu_user_regs *uregs = &n->arch.user_regs;
int all_segs_okay = 1;
unsigned int dirty_segment_mask, cpu = smp_processor_id();
+ bool fs_gs_done = false;
/* Load and clear the dirty segment mask. */
dirty_segment_mask = per_cpu(dirty_segment_mask, cpu);
per_cpu(dirty_segment_mask, cpu) = 0;
+#ifdef CONFIG_HVM
+ if ( !is_pv_32bit_vcpu(n) && !cpu_has_fsgsbase && cpu_has_svm &&
+ !((uregs->fs | uregs->gs) & ~3) &&
+ /*
+ * The remaining part is just for optimization: If only shadow GS
+ * needs loading, there's nothing to be gained here.
+ */
+ (n->arch.pv.fs_base | n->arch.pv.gs_base_user | n->arch.pv.ldt_ents) )
+ {
+ unsigned long gsb = n->arch.flags & TF_kernel_mode
+ ? n->arch.pv.gs_base_kernel : n->arch.pv.gs_base_user;
+ unsigned long gss = n->arch.flags & TF_kernel_mode
+ ? n->arch.pv.gs_base_user : n->arch.pv.gs_base_kernel;
+
+ fs_gs_done = svm_load_segs(n->arch.pv.ldt_ents, LDT_VIRT_START(n),
+ uregs->fs, n->arch.pv.fs_base,
+ uregs->gs, gsb, gss);
+ }
+#endif
+ if ( !fs_gs_done )
+ load_LDT(n);
+
/* Either selector != 0 ==> reload. */
if ( unlikely((dirty_segment_mask & DIRTY_DS) | uregs->ds) )
{
}
/* Either selector != 0 ==> reload. */
- if ( unlikely((dirty_segment_mask & DIRTY_FS) | uregs->fs) )
+ if ( unlikely((dirty_segment_mask & DIRTY_FS) | uregs->fs) && !fs_gs_done )
{
all_segs_okay &= loadsegment(fs, uregs->fs);
/* non-nul selector updates fs_base */
}
/* Either selector != 0 ==> reload. */
- if ( unlikely((dirty_segment_mask & DIRTY_GS) | uregs->gs) )
+ if ( unlikely((dirty_segment_mask & DIRTY_GS) | uregs->gs) && !fs_gs_done )
{
all_segs_okay &= loadsegment(gs, uregs->gs);
/* non-nul selector updates gs_base_user */
dirty_segment_mask &= ~DIRTY_GS_BASE;
}
- if ( !is_pv_32bit_vcpu(n) )
+ if ( !fs_gs_done && !is_pv_32bit_vcpu(n) )
{
/* This can only be non-zero if selector is NULL. */
if ( n->arch.pv.fs_base | (dirty_segment_mask & DIRTY_FS_BASE) )
write_ptbase(n);
+#if defined(CONFIG_PV) && defined(CONFIG_HVM)
+ /* Prefetch the VMCB if we expect to use it later in the context switch */
+ if ( is_pv_domain(nd) && !is_pv_32bit_domain(nd) && !is_idle_domain(nd) &&
+ !cpu_has_fsgsbase && cpu_has_svm )
+ svm_load_segs(0, 0, 0, 0, 0, 0, 0);
+#endif
+
if ( need_full_gdt(nd) &&
((p->vcpu_id != n->vcpu_id) || !need_full_gdt(pd)) )
{
local_irq_enable();
if ( is_pv_domain(nextd) )
- {
- load_LDT(next);
load_segments(next);
- }
ctxt_switch_levelling(next);
*/
static DEFINE_PER_CPU_READ_MOSTLY(paddr_t, hsa);
static DEFINE_PER_CPU_READ_MOSTLY(paddr_t, host_vmcb);
+#ifdef CONFIG_PV
+static DEFINE_PER_CPU(struct vmcb_struct *, host_vmcb_va);
+#endif
static bool_t amd_erratum383_found __read_mostly;
*this_hsa = 0;
}
+#ifdef CONFIG_PV
+ if ( per_cpu(host_vmcb_va, cpu) )
+ {
+ unmap_domain_page_global(per_cpu(host_vmcb_va, cpu));
+ per_cpu(host_vmcb_va, cpu) = NULL;
+ }
+#endif
+
if ( *this_vmcb )
{
free_domheap_page(maddr_to_page(*this_vmcb));
if ( !pg )
goto err;
+#ifdef CONFIG_PV
+ if ( !cpu_has_fsgsbase )
+ per_cpu(host_vmcb_va, cpu) = __map_domain_page_global(pg);
+#endif
+
clear_domain_page(page_to_mfn(pg));
*this_vmcb = page_to_maddr(pg);
}
}
}
+#ifdef CONFIG_PV
+bool svm_load_segs(unsigned int ldt_ents, unsigned long ldt_base,
+ unsigned int fs_sel, unsigned long fs_base,
+ unsigned int gs_sel, unsigned long gs_base,
+ unsigned long gs_shadow)
+{
+ unsigned int cpu = smp_processor_id();
+ struct vmcb_struct *vmcb = per_cpu(host_vmcb_va, cpu);
+
+ if ( unlikely(!vmcb) )
+ return false;
+
+ if ( !ldt_base )
+ {
+ /*
+ * The actual structure field used here was arbitrarily chosen.
+ * Empirically it doesn't seem to matter much which element is used,
+ * and a clear explanation of the otherwise poor performance has not
+ * been found/provided so far.
+ */
+ prefetchw(&vmcb->ldtr);
+ return true;
+ }
+
+ if ( likely(!ldt_ents) )
+ memset(&vmcb->ldtr, 0, sizeof(vmcb->ldtr));
+ else
+ {
+ /* Keep GDT in sync. */
+ struct desc_struct *desc = this_cpu(gdt_table) + LDT_ENTRY -
+ FIRST_RESERVED_GDT_ENTRY;
+
+ _set_tssldt_desc(desc, ldt_base, ldt_ents * 8 - 1, SYS_DESC_ldt);
+
+ vmcb->ldtr.sel = LDT_ENTRY << 3;
+ vmcb->ldtr.attr = SYS_DESC_ldt | (_SEGMENT_P >> 8);
+ vmcb->ldtr.limit = ldt_ents * 8 - 1;
+ vmcb->ldtr.base = ldt_base;
+ }
+
+ ASSERT(!(fs_sel & ~3));
+ vmcb->fs.sel = fs_sel;
+ vmcb->fs.attr = 0;
+ vmcb->fs.limit = 0;
+ vmcb->fs.base = fs_base;
+
+ ASSERT(!(gs_sel & ~3));
+ vmcb->gs.sel = gs_sel;
+ vmcb->gs.attr = 0;
+ vmcb->gs.limit = 0;
+ vmcb->gs.base = gs_base;
+
+ vmcb->kerngsbase = gs_shadow;
+
+ svm_vmload_pa(per_cpu(host_vmcb, cpu));
+
+ return true;
+}
+#endif
+
static int _svm_cpu_up(bool bsp)
{
uint64_t msr_content;
/* Initialize OSVW bits to be used by guests */
svm_host_osvw_init();
+ svm_vmsave_pa(per_cpu(host_vmcb, cpu));
+
return 0;
}
void __update_guest_eip(struct cpu_user_regs *regs, unsigned int inst_len);
void svm_update_guest_cr(struct vcpu *, unsigned int cr, unsigned int flags);
+/*
+ * PV context switch helper. Calls with zero ldt_base request a prefetch of
+ * the VMCB area to be loaded from, instead of an actual load of state.
+ */
+bool svm_load_segs(unsigned int ldt_ents, unsigned long ldt_base,
+ unsigned int fs_sel, unsigned long fs_base,
+ unsigned int gs_sel, unsigned long gs_base,
+ unsigned long gs_shadow);
+
extern u32 svm_feature_flags;
#define SVM_FEATURE_NPT 0 /* Nested page table support */