ia64/xen-unstable

view xen/include/asm-x86/domain.h @ 19787:cecc76506afc

x86_64: don't allocate L1 per-domain page table pages in a single chunk

Instead, allocate them on demand, and adjust the consumer to no longer
assume the allocated space is contiguous.

This another prerequisite to extend to number of vCPU-s the hypervisor
can support per guest.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
author Keir Fraser <keir.fraser@citrix.com>
date Thu Jun 18 10:05:23 2009 +0100 (2009-06-18)
parents 6705898f768d
children
line source
1 #ifndef __ASM_DOMAIN_H__
2 #define __ASM_DOMAIN_H__
4 #include <xen/config.h>
5 #include <xen/mm.h>
6 #include <asm/hvm/vcpu.h>
7 #include <asm/hvm/domain.h>
8 #include <asm/e820.h>
10 #define has_32bit_shinfo(d) ((d)->arch.has_32bit_shinfo)
11 #define is_pv_32bit_domain(d) ((d)->arch.is_32bit_pv)
12 #define is_pv_32bit_vcpu(v) (is_pv_32bit_domain((v)->domain))
13 #ifdef __x86_64__
14 #define is_pv_32on64_domain(d) (is_pv_32bit_domain(d))
15 #else
16 #define is_pv_32on64_domain(d) (0)
17 #endif
18 #define is_pv_32on64_vcpu(v) (is_pv_32on64_domain((v)->domain))
20 struct trap_bounce {
21 uint32_t error_code;
22 uint8_t flags; /* TBF_ */
23 uint16_t cs;
24 unsigned long eip;
25 };
27 #define MAPHASH_ENTRIES 8
28 #define MAPHASH_HASHFN(pfn) ((pfn) & (MAPHASH_ENTRIES-1))
29 #define MAPHASHENT_NOTINUSE ((u16)~0U)
30 struct mapcache_vcpu {
31 /* Shadow of mapcache_domain.epoch. */
32 unsigned int shadow_epoch;
34 /* Lock-free per-VCPU hash of recently-used mappings. */
35 struct vcpu_maphash_entry {
36 unsigned long mfn;
37 uint16_t idx;
38 uint16_t refcnt;
39 } hash[MAPHASH_ENTRIES];
40 };
42 #define MAPCACHE_ORDER 10
43 #define MAPCACHE_ENTRIES (1 << MAPCACHE_ORDER)
44 struct mapcache_domain {
45 /* The PTEs that provide the mappings, and a cursor into the array. */
46 l1_pgentry_t *l1tab;
47 unsigned int cursor;
49 /* Protects map_domain_page(). */
50 spinlock_t lock;
52 /* Garbage mappings are flushed from TLBs in batches called 'epochs'. */
53 unsigned int epoch;
54 u32 tlbflush_timestamp;
56 /* Which mappings are in use, and which are garbage to reap next epoch? */
57 unsigned long inuse[BITS_TO_LONGS(MAPCACHE_ENTRIES)];
58 unsigned long garbage[BITS_TO_LONGS(MAPCACHE_ENTRIES)];
59 };
61 void mapcache_domain_init(struct domain *);
62 void mapcache_vcpu_init(struct vcpu *);
64 /* x86/64: toggle guest between kernel and user modes. */
65 void toggle_guest_mode(struct vcpu *);
67 /*
68 * Initialise a hypercall-transfer page. The given pointer must be mapped
69 * in Xen virtual address space (accesses are not validated or checked).
70 */
71 void hypercall_page_initialise(struct domain *d, void *);
73 /************************************************/
74 /* shadow paging extension */
75 /************************************************/
76 struct shadow_domain {
77 spinlock_t lock; /* shadow domain lock */
78 int locker; /* processor which holds the lock */
79 const char *locker_function; /* Func that took it */
80 unsigned int opt_flags; /* runtime tunable optimizations on/off */
81 struct page_list_head pinned_shadows;
83 /* Memory allocation */
84 struct page_list_head freelists[SHADOW_MAX_ORDER + 1];
85 struct page_list_head p2m_freelist;
86 unsigned int total_pages; /* number of pages allocated */
87 unsigned int free_pages; /* number of pages on freelists */
88 unsigned int p2m_pages; /* number of pages allocates to p2m */
90 /* 1-to-1 map for use when HVM vcpus have paging disabled */
91 pagetable_t unpaged_pagetable;
93 /* Shadow hashtable */
94 struct page_info **hash_table;
95 int hash_walking; /* Some function is walking the hash table */
97 /* Fast MMIO path heuristic */
98 int has_fast_mmio_entries;
100 /* reflect guest table dirty status, incremented by write
101 * emulation and remove write permission
102 */
103 atomic_t gtable_dirty_version;
105 /* OOS */
106 int oos_active;
107 };
109 struct shadow_vcpu {
110 #if CONFIG_PAGING_LEVELS >= 3
111 /* PAE guests: per-vcpu shadow top-level table */
112 l3_pgentry_t l3table[4] __attribute__((__aligned__(32)));
113 /* PAE guests: per-vcpu cache of the top-level *guest* entries */
114 l3_pgentry_t gl3e[4] __attribute__((__aligned__(32)));
115 #endif
116 /* Non-PAE guests: pointer to guest top-level pagetable */
117 void *guest_vtable;
118 /* Last MFN that we emulated a write to as unshadow heuristics. */
119 unsigned long last_emulated_mfn_for_unshadow;
120 /* MFN of the last shadow that we shot a writeable mapping in */
121 unsigned long last_writeable_pte_smfn;
122 /* Last frame number that we emulated a write to. */
123 unsigned long last_emulated_frame;
124 /* Last MFN that we emulated a write successfully */
125 unsigned long last_emulated_mfn;
127 /* Shadow out-of-sync: pages that this vcpu has let go out of sync */
128 mfn_t oos[SHADOW_OOS_PAGES];
129 mfn_t oos_snapshot[SHADOW_OOS_PAGES];
130 struct oos_fixup {
131 int next;
132 mfn_t smfn[SHADOW_OOS_FIXUPS];
133 unsigned long off[SHADOW_OOS_FIXUPS];
134 } oos_fixup[SHADOW_OOS_PAGES];
135 };
137 /************************************************/
138 /* hardware assisted paging */
139 /************************************************/
140 struct hap_domain {
141 spinlock_t lock;
142 int locker;
143 const char *locker_function;
145 struct page_list_head freelist;
146 unsigned int total_pages; /* number of pages allocated */
147 unsigned int free_pages; /* number of pages on freelists */
148 unsigned int p2m_pages; /* number of pages allocates to p2m */
149 };
151 /************************************************/
152 /* common paging data structure */
153 /************************************************/
154 struct log_dirty_domain {
155 /* log-dirty lock */
156 spinlock_t lock;
157 int locker; /* processor that holds the lock */
158 const char *locker_function; /* func that took it */
160 /* log-dirty radix tree to record dirty pages */
161 mfn_t top;
162 unsigned int allocs;
163 unsigned int failed_allocs;
165 /* log-dirty mode stats */
166 unsigned int fault_count;
167 unsigned int dirty_count;
169 /* functions which are paging mode specific */
170 int (*enable_log_dirty )(struct domain *d);
171 int (*disable_log_dirty )(struct domain *d);
172 void (*clean_dirty_bitmap )(struct domain *d);
173 };
175 struct paging_domain {
176 /* flags to control paging operation */
177 u32 mode;
178 /* extension for shadow paging support */
179 struct shadow_domain shadow;
180 /* extension for hardware-assited paging */
181 struct hap_domain hap;
182 /* log dirty support */
183 struct log_dirty_domain log_dirty;
184 };
186 struct paging_vcpu {
187 /* Pointers to mode-specific entry points. */
188 struct paging_mode *mode;
189 /* HVM guest: last emulate was to a pagetable */
190 unsigned int last_write_was_pt:1;
191 /* HVM guest: last write emulation succeeds */
192 unsigned int last_write_emul_ok:1;
193 /* Translated guest: virtual TLB */
194 struct shadow_vtlb *vtlb;
195 spinlock_t vtlb_lock;
197 /* paging support extension */
198 struct shadow_vcpu shadow;
199 };
201 #define MAX_CPUID_INPUT 40
202 typedef xen_domctl_cpuid_t cpuid_input_t;
204 struct p2m_domain;
206 /* Define for GUEST MCA handling */
207 #define MAX_NR_BANKS 30
209 /* This entry is for recording bank nodes for the impacted domain,
210 * put into impact_header list. */
211 struct bank_entry {
212 struct list_head list;
213 int32_t cpu;
214 uint16_t bank;
215 uint64_t mci_status;
216 uint64_t mci_addr;
217 uint64_t mci_misc;
218 };
220 struct domain_mca_msrs
221 {
222 /* Guest should not change below values after DOM boot up */
223 uint64_t mcg_cap;
224 uint64_t mcg_ctl;
225 uint64_t mcg_status;
226 uint64_t mci_ctl[MAX_NR_BANKS];
227 uint16_t nr_injection;
228 struct list_head impact_header;
229 spinlock_t lock;
230 };
232 struct arch_domain
233 {
234 #ifdef CONFIG_X86_64
235 struct page_info **mm_perdomain_pt_pages;
236 l2_pgentry_t *mm_perdomain_l2;
237 l3_pgentry_t *mm_perdomain_l3;
238 #else
239 l1_pgentry_t *mm_perdomain_pt;
240 #endif
242 #ifdef CONFIG_X86_32
243 /* map_domain_page() mapping cache. */
244 struct mapcache_domain mapcache;
245 #endif
247 #ifdef CONFIG_COMPAT
248 unsigned int hv_compat_vstart;
249 #endif
251 bool_t s3_integrity;
253 /* I/O-port admin-specified access capabilities. */
254 struct rangeset *ioport_caps;
255 uint32_t pci_cf8;
257 struct list_head pdev_list;
258 struct hvm_domain hvm_domain;
260 struct paging_domain paging;
261 struct p2m_domain *p2m;
263 /* Shadow translated domain: P2M mapping */
264 pagetable_t phys_table;
266 /* NB. protected by d->event_lock and by irq_desc[vector].lock */
267 int vector_pirq[NR_VECTORS];
268 s16 *pirq_vector;
270 /* Shared page for notifying that explicit PIRQ EOI is required. */
271 unsigned long *pirq_eoi_map;
272 unsigned long pirq_eoi_map_mfn;
274 /* Pseudophysical e820 map (XENMEM_memory_map). */
275 struct e820entry e820[3];
276 unsigned int nr_e820;
278 /* Maximum physical-address bitwidth supported by this guest. */
279 unsigned int physaddr_bitsize;
281 /* Is a 32-bit PV (non-HVM) guest? */
282 bool_t is_32bit_pv;
283 /* Is shared-info page in 32-bit format? */
284 bool_t has_32bit_shinfo;
285 /* Domain cannot handle spurious page faults? */
286 bool_t suppress_spurious_page_faults;
288 /* Continuable domain_relinquish_resources(). */
289 enum {
290 RELMEM_not_started,
291 RELMEM_xen,
292 RELMEM_l4,
293 RELMEM_l3,
294 RELMEM_l2,
295 RELMEM_done,
296 } relmem;
297 struct page_list_head relmem_list;
299 cpuid_input_t cpuids[MAX_CPUID_INPUT];
301 /* For Guest vMCA handling */
302 struct domain_mca_msrs vmca_msrs;
303 } __cacheline_aligned;
305 #define has_arch_pdevs(d) (!list_empty(&(d)->arch.pdev_list))
307 #ifdef CONFIG_X86_64
308 #define perdomain_pt_pgidx(v) \
309 ((v)->vcpu_id >> (PAGETABLE_ORDER - GDT_LDT_VCPU_SHIFT))
310 #define perdomain_ptes(d, v) \
311 ((l1_pgentry_t *)page_to_virt((d)->arch.mm_perdomain_pt_pages \
312 [perdomain_pt_pgidx(v)]) + (((v)->vcpu_id << GDT_LDT_VCPU_SHIFT) & \
313 (L1_PAGETABLE_ENTRIES - 1)))
314 #define perdomain_pt_page(d, n) ((d)->arch.mm_perdomain_pt_pages[n])
315 #else
316 #define perdomain_ptes(d, v) \
317 ((d)->arch.mm_perdomain_pt + ((v)->vcpu_id << GDT_LDT_VCPU_SHIFT))
318 #define perdomain_pt_page(d, n) \
319 (virt_to_page((d)->arch.mm_perdomain_pt) + (n))
320 #endif
323 #ifdef __i386__
324 struct pae_l3_cache {
325 /*
326 * Two low-memory (<4GB) PAE L3 tables, used as fallback when the guest
327 * supplies a >=4GB PAE L3 table. We need two because we cannot set up
328 * an L3 table while we are currently running on it (without using
329 * expensive atomic 64-bit operations).
330 */
331 l3_pgentry_t table[2][4] __attribute__((__aligned__(32)));
332 unsigned long high_mfn; /* The >=4GB MFN being shadowed. */
333 unsigned int inuse_idx; /* Which of the two cache slots is in use? */
334 spinlock_t lock;
335 };
336 #define pae_l3_cache_init(c) spin_lock_init(&(c)->lock)
337 #else /* !defined(__i386__) */
338 struct pae_l3_cache { };
339 #define pae_l3_cache_init(c) ((void)0)
340 #endif
342 struct arch_vcpu
343 {
344 /* Needs 16-byte aligment for FXSAVE/FXRSTOR. */
345 struct vcpu_guest_context guest_context
346 __attribute__((__aligned__(16)));
348 struct pae_l3_cache pae_l3_cache;
350 unsigned long flags; /* TF_ */
352 void (*schedule_tail) (struct vcpu *);
354 void (*ctxt_switch_from) (struct vcpu *);
355 void (*ctxt_switch_to) (struct vcpu *);
357 /* Record information required to continue execution after migration */
358 void *continue_info;
360 /* Bounce information for propagating an exception to guest OS. */
361 struct trap_bounce trap_bounce;
363 /* I/O-port access bitmap. */
364 XEN_GUEST_HANDLE(uint8) iobmp; /* Guest kernel vaddr of the bitmap. */
365 int iobmp_limit; /* Number of ports represented in the bitmap. */
366 int iopl; /* Current IOPL for this VCPU. */
368 #ifdef CONFIG_X86_32
369 struct desc_struct int80_desc;
370 #endif
371 #ifdef CONFIG_X86_64
372 struct trap_bounce int80_bounce;
373 unsigned long syscall32_callback_eip;
374 unsigned long sysenter_callback_eip;
375 unsigned short syscall32_callback_cs;
376 unsigned short sysenter_callback_cs;
377 bool_t syscall32_disables_events;
378 bool_t sysenter_disables_events;
379 #endif
381 /* Virtual Machine Extensions */
382 struct hvm_vcpu hvm_vcpu;
384 /*
385 * Every domain has a L1 pagetable of its own. Per-domain mappings
386 * are put in this table (eg. the current GDT is mapped here).
387 */
388 l1_pgentry_t *perdomain_ptes;
390 #ifdef CONFIG_X86_64
391 pagetable_t guest_table_user; /* (MFN) x86/64 user-space pagetable */
392 #endif
393 pagetable_t guest_table; /* (MFN) guest notion of cr3 */
394 /* guest_table holds a ref to the page, and also a type-count unless
395 * shadow refcounts are in use */
396 pagetable_t shadow_table[4]; /* (MFN) shadow(s) of guest */
397 pagetable_t monitor_table; /* (MFN) hypervisor PT (for HVM) */
398 unsigned long cr3; /* (MA) value to install in HW CR3 */
400 /* Current LDT details. */
401 unsigned long shadow_ldt_mapcnt;
402 spinlock_t shadow_ldt_lock;
404 struct paging_vcpu paging;
406 /* Guest-specified relocation of vcpu_info. */
407 unsigned long vcpu_info_mfn;
409 #ifdef CONFIG_X86_32
410 /* map_domain_page() mapping cache. */
411 struct mapcache_vcpu mapcache;
412 #endif
414 } __cacheline_aligned;
416 /* Shorthands to improve code legibility. */
417 #define hvm_vmx hvm_vcpu.u.vmx
418 #define hvm_svm hvm_vcpu.u.svm
420 /* Continue the current hypercall via func(data) on specified cpu. */
421 int continue_hypercall_on_cpu(int cpu, long (*func)(void *data), void *data);
423 void vcpu_show_execution_state(struct vcpu *);
424 void vcpu_show_registers(const struct vcpu *);
426 /* Clean up CR4 bits that are not under guest control. */
427 unsigned long pv_guest_cr4_fixup(unsigned long guest_cr4);
429 /* Convert between guest-visible and real CR4 values. */
430 #define pv_guest_cr4_to_real_cr4(c) \
431 (((c) | (mmu_cr4_features & (X86_CR4_PGE | X86_CR4_PSE))) & ~X86_CR4_DE)
432 #define real_cr4_to_pv_guest_cr4(c) \
433 ((c) & ~(X86_CR4_PGE | X86_CR4_PSE))
435 void domain_cpuid(struct domain *d,
436 unsigned int input,
437 unsigned int sub_input,
438 unsigned int *eax,
439 unsigned int *ebx,
440 unsigned int *ecx,
441 unsigned int *edx);
443 int construct_dom0(
444 struct domain *d,
445 unsigned long image_base,
446 unsigned long image_start, unsigned long image_len,
447 unsigned long initrd_start, unsigned long initrd_len,
448 char *cmdline);
450 #endif /* __ASM_DOMAIN_H__ */
452 /*
453 * Local variables:
454 * mode: C
455 * c-set-style: "BSD"
456 * c-basic-offset: 4
457 * tab-width: 4
458 * indent-tabs-mode: nil
459 * End:
460 */