direct-io.hg

view xen/include/asm-x86/domain.h @ 14445:522a1cd17b6d

[XEN] Implement faster int 0x80 handling for compat mode guests.

Using the GPF handler to spot the software interrupt and pass it back
to the guest increases the base syscall time by a factor of 2.7
compared with 32on32 using direct trap to ring 1. (0.3270->0.8680
microseconds, measured with lmbench lat_syscall).

Since the 64 bit IDT can only contain 64 bit segment selectors we
cannot trap directly to compat mode ring 1. However implementing a
dedicated 64 bit ring 0 trap handler allows us to avoid much of the
GPF handler overhead and reduces the overhead to 1.7 times
(0.3270->0.5497 microseconds).

Signed-off-by: Ian Campbell <ian.campbell@xensource.com>
author Ian Campbell <ian.campbell@xensource.com>
date Tue Mar 20 14:33:15 2007 +0000 (2007-03-20)
parents 3be7f638cba0
children f0f9533b2a23
line source
1 #ifndef __ASM_DOMAIN_H__
2 #define __ASM_DOMAIN_H__
4 #include <xen/config.h>
5 #include <xen/mm.h>
6 #include <asm/hvm/vcpu.h>
7 #include <asm/hvm/domain.h>
8 #include <asm/e820.h>
10 struct trap_bounce {
11 unsigned long error_code;
12 unsigned short flags; /* TBF_ */
13 unsigned short cs;
14 unsigned long eip;
15 };
17 #define MAPHASH_ENTRIES 8
18 #define MAPHASH_HASHFN(pfn) ((pfn) & (MAPHASH_ENTRIES-1))
19 #define MAPHASHENT_NOTINUSE ((u16)~0U)
20 struct vcpu_maphash {
21 struct vcpu_maphash_entry {
22 unsigned long mfn;
23 uint16_t idx;
24 uint16_t refcnt;
25 } hash[MAPHASH_ENTRIES];
26 } __cacheline_aligned;
28 #define MAPCACHE_ORDER 10
29 #define MAPCACHE_ENTRIES (1 << MAPCACHE_ORDER)
30 struct mapcache {
31 /* The PTEs that provide the mappings, and a cursor into the array. */
32 l1_pgentry_t *l1tab;
33 unsigned int cursor;
35 /* Protects map_domain_page(). */
36 spinlock_t lock;
38 /* Garbage mappings are flushed from TLBs in batches called 'epochs'. */
39 unsigned int epoch, shadow_epoch[MAX_VIRT_CPUS];
40 u32 tlbflush_timestamp;
42 /* Which mappings are in use, and which are garbage to reap next epoch? */
43 unsigned long inuse[BITS_TO_LONGS(MAPCACHE_ENTRIES)];
44 unsigned long garbage[BITS_TO_LONGS(MAPCACHE_ENTRIES)];
46 /* Lock-free per-VCPU hash of recently-used mappings. */
47 struct vcpu_maphash vcpu_maphash[MAX_VIRT_CPUS];
48 };
50 extern void mapcache_init(struct domain *);
52 /* x86/64: toggle guest between kernel and user modes. */
53 extern void toggle_guest_mode(struct vcpu *);
55 /*
56 * Initialise a hypercall-transfer page. The given pointer must be mapped
57 * in Xen virtual address space (accesses are not validated or checked).
58 */
59 extern void hypercall_page_initialise(struct domain *d, void *);
61 /************************************************/
62 /* shadow paging extension */
63 /************************************************/
64 struct shadow_domain {
65 spinlock_t lock; /* shadow domain lock */
66 int locker; /* processor which holds the lock */
67 const char *locker_function; /* Func that took it */
68 unsigned int opt_flags; /* runtime tunable optimizations on/off */
69 struct list_head pinned_shadows;
71 /* Memory allocation */
72 struct list_head freelists[SHADOW_MAX_ORDER + 1];
73 struct list_head p2m_freelist;
74 unsigned int total_pages; /* number of pages allocated */
75 unsigned int free_pages; /* number of pages on freelists */
76 unsigned int p2m_pages; /* number of pages allocates to p2m */
78 /* Shadow hashtable */
79 struct shadow_page_info **hash_table;
80 int hash_walking; /* Some function is walking the hash table */
82 /* Fast MMIO path heuristic */
83 int has_fast_mmio_entries;
85 /* Shadow log-dirty bitmap */
86 unsigned long *dirty_bitmap;
87 unsigned int dirty_bitmap_size; /* in pages, bit per page */
89 /* Shadow log-dirty mode stats */
90 unsigned int fault_count;
91 unsigned int dirty_count;
92 };
94 struct shadow_vcpu {
95 #if CONFIG_PAGING_LEVELS >= 3
96 /* PAE guests: per-vcpu shadow top-level table */
97 l3_pgentry_t l3table[4] __attribute__((__aligned__(32)));
98 /* PAE guests: per-vcpu cache of the top-level *guest* entries */
99 l3_pgentry_t gl3e[4] __attribute__((__aligned__(32)));
100 #endif
101 /* Non-PAE guests: pointer to guest top-level pagetable */
102 void *guest_vtable;
103 /* Last MFN that we emulated a write to. */
104 unsigned long last_emulated_mfn;
105 /* MFN of the last shadow that we shot a writeable mapping in */
106 unsigned long last_writeable_pte_smfn;
107 };
109 /************************************************/
110 /* hardware assisted paging */
111 /************************************************/
112 struct hap_domain {
113 spinlock_t lock;
114 int locker;
115 const char *locker_function;
117 struct list_head freelists;
118 struct list_head p2m_freelist;
119 unsigned int total_pages; /* number of pages allocated */
120 unsigned int free_pages; /* number of pages on freelists */
121 unsigned int p2m_pages; /* number of pages allocates to p2m */
122 };
124 /************************************************/
125 /* p2m handling */
126 /************************************************/
128 struct p2m_domain {
129 /* Lock that protects updates to the p2m */
130 spinlock_t lock;
131 int locker; /* processor which holds the lock */
132 const char *locker_function; /* Func that took it */
134 /* Pages used to construct the p2m */
135 struct list_head pages;
137 /* Functions to call to get or free pages for the p2m */
138 struct page_info * (*alloc_page )(struct domain *d);
139 void (*free_page )(struct domain *d,
140 struct page_info *pg);
142 /* Highest guest frame that's ever been mapped in the p2m */
143 unsigned long max_mapped_pfn;
144 };
146 /************************************************/
147 /* common paging data structure */
148 /************************************************/
149 struct paging_domain {
150 u32 mode; /* flags to control paging operation */
152 /* extension for shadow paging support */
153 struct shadow_domain shadow;
155 /* Other paging assistance code will have structs here */
156 struct hap_domain hap;
157 };
159 struct paging_vcpu {
160 /* Pointers to mode-specific entry points. */
161 struct paging_mode *mode;
162 /* HVM guest: paging enabled (CR0.PG)? */
163 unsigned int translate_enabled:1;
165 /* paging support extension */
166 struct shadow_vcpu shadow;
167 };
169 struct arch_domain
170 {
171 l1_pgentry_t *mm_perdomain_pt;
172 #ifdef CONFIG_X86_64
173 l2_pgentry_t *mm_perdomain_l2;
174 l3_pgentry_t *mm_perdomain_l3;
175 #endif
177 #ifdef CONFIG_X86_32
178 /* map_domain_page() mapping cache. */
179 struct mapcache mapcache;
180 #endif
182 #ifdef CONFIG_COMPAT
183 unsigned int hv_compat_vstart;
184 l3_pgentry_t *mm_arg_xlat_l3;
185 #endif
187 /* I/O-port admin-specified access capabilities. */
188 struct rangeset *ioport_caps;
190 struct hvm_domain hvm_domain;
192 struct paging_domain paging;
193 struct p2m_domain p2m ;
195 /* Shadow translated domain: P2M mapping */
196 pagetable_t phys_table;
198 /* Pseudophysical e820 map (XENMEM_memory_map). */
199 struct e820entry e820[3];
200 unsigned int nr_e820;
202 /* Maximum physical-address bitwidth supported by this guest. */
203 unsigned int physaddr_bitsize;
204 } __cacheline_aligned;
206 #ifdef CONFIG_X86_PAE
207 struct pae_l3_cache {
208 /*
209 * Two low-memory (<4GB) PAE L3 tables, used as fallback when the guest
210 * supplies a >=4GB PAE L3 table. We need two because we cannot set up
211 * an L3 table while we are currently running on it (without using
212 * expensive atomic 64-bit operations).
213 */
214 l3_pgentry_t table[2][4] __attribute__((__aligned__(32)));
215 unsigned long high_mfn; /* The >=4GB MFN being shadowed. */
216 unsigned int inuse_idx; /* Which of the two cache slots is in use? */
217 spinlock_t lock;
218 };
219 #define pae_l3_cache_init(c) spin_lock_init(&(c)->lock)
220 #else /* !CONFIG_X86_PAE */
221 struct pae_l3_cache { };
222 #define pae_l3_cache_init(c) ((void)0)
223 #endif
225 struct arch_vcpu
226 {
227 /* Needs 16-byte aligment for FXSAVE/FXRSTOR. */
228 struct vcpu_guest_context guest_context
229 __attribute__((__aligned__(16)));
231 struct pae_l3_cache pae_l3_cache;
233 unsigned long flags; /* TF_ */
235 void (*schedule_tail) (struct vcpu *);
237 void (*ctxt_switch_from) (struct vcpu *);
238 void (*ctxt_switch_to) (struct vcpu *);
240 /* Bounce information for propagating an exception to guest OS. */
241 struct trap_bounce trap_bounce;
243 /* I/O-port access bitmap. */
244 XEN_GUEST_HANDLE(uint8_t) iobmp; /* Guest kernel virtual address of the bitmap. */
245 int iobmp_limit; /* Number of ports represented in the bitmap. */
246 int iopl; /* Current IOPL for this VCPU. */
248 #ifdef CONFIG_X86_32
249 struct desc_struct int80_desc;
250 #endif
251 #ifdef CONFIG_X86_64
252 struct trap_bounce int80_bounce;
253 #endif
255 /* Virtual Machine Extensions */
256 struct hvm_vcpu hvm_vcpu;
258 /*
259 * Every domain has a L1 pagetable of its own. Per-domain mappings
260 * are put in this table (eg. the current GDT is mapped here).
261 */
262 l1_pgentry_t *perdomain_ptes;
264 #ifdef CONFIG_X86_64
265 pagetable_t guest_table_user; /* (MFN) x86/64 user-space pagetable */
266 #endif
267 pagetable_t guest_table; /* (MFN) guest notion of cr3 */
268 /* guest_table holds a ref to the page, and also a type-count unless
269 * shadow refcounts are in use */
270 pagetable_t shadow_table[4]; /* (MFN) shadow(s) of guest */
271 pagetable_t monitor_table; /* (MFN) hypervisor PT (for HVM) */
272 unsigned long cr3; /* (MA) value to install in HW CR3 */
274 /* Current LDT details. */
275 unsigned long shadow_ldt_mapcnt;
277 struct paging_vcpu paging;
278 } __cacheline_aligned;
280 /* shorthands to improve code legibility */
281 #define hvm_vmx hvm_vcpu.u.vmx
282 #define hvm_svm hvm_vcpu.u.svm
284 #endif /* __ASM_DOMAIN_H__ */
286 /*
287 * Local variables:
288 * mode: C
289 * c-set-style: "BSD"
290 * c-basic-offset: 4
291 * tab-width: 4
292 * indent-tabs-mode: nil
293 * End:
294 */