ia64/xen-unstable

view tools/libxc/xc_dom_x86.c @ 19639:205b1badbcfd

Add support for superpages (hugepages) in PV domain

This patch adds the option "superpages" to the domain configuration
file. If it is set, the domain is populated using 2M pages.

This code does not support fallback to small pages. If the domain can
not be created with 2M pages, the create will fail.

The patch also includes support for saving and restoring domains with
the superpage flag set. However, if a domain has freed small pages
within its physical page array and then extended the array, the
restore will fill in those freed pages. It will then attempt to
allocate more than its memory limit and will fail. This is
significant because apparently Linux does this during boot, thus a
freshly booted Linux image can not be saved and restored successfully.

Signed-off-by: Dave McCracken <dcm@mccr.org>
author Keir Fraser <keir.fraser@citrix.com>
date Tue May 26 09:58:38 2009 +0100 (2009-05-26)
parents f0d033f0a319
children 2f9e1348aa98
line source
1 /*
2 * Xen domain builder -- i386 and x86_64 bits.
3 *
4 * Most architecture-specific code for x86 goes here.
5 * - prepare page tables.
6 * - fill architecture-specific structs.
7 *
8 * This code is licenced under the GPL.
9 * written 2006 by Gerd Hoffmann <kraxel@suse.de>.
10 *
11 */
12 #include <stdio.h>
13 #include <stdlib.h>
14 #include <string.h>
15 #include <inttypes.h>
17 #include <xen/xen.h>
18 #include <xen/foreign/x86_32.h>
19 #include <xen/foreign/x86_64.h>
20 #include <xen/hvm/hvm_info_table.h>
21 #include <xen/io/protocols.h>
23 #include "xg_private.h"
24 #include "xc_dom.h"
25 #include "xenctrl.h"
27 /* ------------------------------------------------------------------------ */
29 #define SUPERPAGE_PFN_SHIFT 9
30 #define SUPERPAGE_NR_PFNS (1UL << SUPERPAGE_PFN_SHIFT)
32 #define bits_to_mask(bits) (((xen_vaddr_t)1 << (bits))-1)
33 #define round_down(addr, mask) ((addr) & ~(mask))
34 #define round_up(addr, mask) ((addr) | (mask))
36 static unsigned long
37 nr_page_tables(xen_vaddr_t start, xen_vaddr_t end, unsigned long bits)
38 {
39 xen_vaddr_t mask = bits_to_mask(bits);
40 int tables;
42 if ( bits == 0 )
43 return 0; /* unused */
45 if ( bits == (8 * sizeof(unsigned long)) )
46 {
47 /* must be pgd, need one */
48 start = 0;
49 end = -1;
50 tables = 1;
51 }
52 else
53 {
54 start = round_down(start, mask);
55 end = round_up(end, mask);
56 tables = ((end - start) >> bits) + 1;
57 }
59 xc_dom_printf("%s: 0x%016" PRIx64 "/%ld: 0x%016" PRIx64
60 " -> 0x%016" PRIx64 ", %d table(s)\n",
61 __FUNCTION__, mask, bits, start, end, tables);
62 return tables;
63 }
65 static int count_pgtables(struct xc_dom_image *dom, int pae,
66 int l4_bits, int l3_bits, int l2_bits, int l1_bits)
67 {
68 int pages, extra_pages;
69 xen_vaddr_t try_virt_end;
71 extra_pages = dom->alloc_bootstack ? 1 : 0;
72 extra_pages += dom->extra_pages;
73 extra_pages += 128; /* 512kB padding */
74 pages = extra_pages;
75 for ( ; ; )
76 {
77 try_virt_end = round_up(dom->virt_alloc_end + pages * PAGE_SIZE_X86,
78 bits_to_mask(22)); /* 4MB alignment */
79 dom->pg_l4 =
80 nr_page_tables(dom->parms.virt_base, try_virt_end, l4_bits);
81 dom->pg_l3 =
82 nr_page_tables(dom->parms.virt_base, try_virt_end, l3_bits);
83 dom->pg_l2 =
84 nr_page_tables(dom->parms.virt_base, try_virt_end, l2_bits);
85 dom->pg_l1 =
86 nr_page_tables(dom->parms.virt_base, try_virt_end, l1_bits);
87 if (pae && try_virt_end < 0xc0000000)
88 {
89 xc_dom_printf("%s: PAE: extra l2 page table for l3#3\n",
90 __FUNCTION__);
91 dom->pg_l2++;
92 }
93 dom->pgtables = dom->pg_l4 + dom->pg_l3 + dom->pg_l2 + dom->pg_l1;
94 pages = dom->pgtables + extra_pages;
95 if ( dom->virt_alloc_end + pages * PAGE_SIZE_X86 <= try_virt_end + 1 )
96 break;
97 }
98 dom->virt_pgtab_end = try_virt_end + 1;
99 return 0;
100 }
102 /* ------------------------------------------------------------------------ */
103 /* i386 pagetables */
105 #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
106 #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
107 #define L3_PROT (_PAGE_PRESENT)
109 static int count_pgtables_x86_32(struct xc_dom_image *dom)
110 {
111 return count_pgtables(dom, 0, 0, 0, 32, L2_PAGETABLE_SHIFT_I386);
112 }
114 static int count_pgtables_x86_32_pae(struct xc_dom_image *dom)
115 {
116 return count_pgtables(dom, 1, 0, 32,
117 L3_PAGETABLE_SHIFT_PAE, L2_PAGETABLE_SHIFT_PAE);
118 }
120 #define pfn_to_paddr(pfn) ((xen_paddr_t)(pfn) << PAGE_SHIFT_X86)
122 static int setup_pgtables_x86_32(struct xc_dom_image *dom)
123 {
124 xen_pfn_t l2pfn = dom->pgtables_seg.pfn;
125 xen_pfn_t l1pfn = dom->pgtables_seg.pfn + dom->pg_l2;
126 l2_pgentry_32_t *l2tab = xc_dom_pfn_to_ptr(dom, l2pfn, 1);
127 l1_pgentry_32_t *l1tab = NULL;
128 unsigned long l2off, l1off;
129 xen_vaddr_t addr;
130 xen_pfn_t pgpfn;
132 for ( addr = dom->parms.virt_base; addr < dom->virt_pgtab_end;
133 addr += PAGE_SIZE_X86 )
134 {
135 if ( l1tab == NULL )
136 {
137 /* get L1 tab, make L2 entry */
138 l1tab = xc_dom_pfn_to_ptr(dom, l1pfn, 1);
139 l2off = l2_table_offset_i386(addr);
140 l2tab[l2off] =
141 pfn_to_paddr(xc_dom_p2m_guest(dom, l1pfn)) | L2_PROT;
142 l1pfn++;
143 }
145 /* make L1 entry */
146 l1off = l1_table_offset_i386(addr);
147 pgpfn = (addr - dom->parms.virt_base) >> PAGE_SHIFT_X86;
148 l1tab[l1off] =
149 pfn_to_paddr(xc_dom_p2m_guest(dom, pgpfn)) | L1_PROT;
150 if ( (addr >= dom->pgtables_seg.vstart) &&
151 (addr < dom->pgtables_seg.vend) )
152 l1tab[l1off] &= ~_PAGE_RW; /* page tables are r/o */
153 if ( l1off == (L1_PAGETABLE_ENTRIES_I386 - 1) )
154 l1tab = NULL;
155 }
156 return 0;
157 }
159 /*
160 * Move the l3 page table page below 4G for guests which do not
161 * support the extended-cr3 format. The l3 is currently empty so we
162 * do not need to preserve the current contents.
163 */
164 static xen_pfn_t move_l3_below_4G(struct xc_dom_image *dom,
165 xen_pfn_t l3pfn,
166 xen_pfn_t l3mfn)
167 {
168 xen_pfn_t new_l3mfn;
169 struct xc_mmu *mmu;
170 void *l3tab;
171 int xc = dom->guest_xc;
173 mmu = xc_alloc_mmu_updates(xc, dom->guest_domid);
174 if ( mmu == NULL )
175 {
176 xc_dom_printf("%s: failed at %d\n", __FUNCTION__, __LINE__);
177 return l3mfn;
178 }
180 xc_dom_unmap_one(dom, l3pfn);
182 new_l3mfn = xc_make_page_below_4G(dom->guest_xc, dom->guest_domid, l3mfn);
183 if ( !new_l3mfn )
184 goto out;
186 dom->p2m_host[l3pfn] = new_l3mfn;
187 if ( xc_dom_update_guest_p2m(dom) != 0 )
188 goto out;
190 if ( xc_add_mmu_update(xc, mmu,
191 (((unsigned long long)new_l3mfn)
192 << XC_DOM_PAGE_SHIFT(dom)) |
193 MMU_MACHPHYS_UPDATE, l3pfn) )
194 goto out;
196 if ( xc_flush_mmu_updates(xc, mmu) )
197 goto out;
199 /*
200 * This ensures that the entire pgtables_seg is mapped by a single
201 * mmap region. arch_setup_bootlate() relies on this to be able to
202 * unmap and pin the pagetables.
203 */
204 if ( xc_dom_seg_to_ptr(dom, &dom->pgtables_seg) == NULL )
205 goto out;
207 l3tab = xc_dom_pfn_to_ptr(dom, l3pfn, 1);
208 memset(l3tab, 0, XC_DOM_PAGE_SIZE(dom));
210 xc_dom_printf("%s: successfully relocated L3 below 4G. "
211 "(L3 PFN %#"PRIpfn" MFN %#"PRIpfn"=>%#"PRIpfn")\n",
212 __FUNCTION__, l3pfn, l3mfn, new_l3mfn);
214 l3mfn = new_l3mfn;
216 out:
217 free(mmu);
219 return l3mfn;
220 }
222 static int setup_pgtables_x86_32_pae(struct xc_dom_image *dom)
223 {
224 xen_pfn_t l3pfn = dom->pgtables_seg.pfn;
225 xen_pfn_t l2pfn = dom->pgtables_seg.pfn + dom->pg_l3;
226 xen_pfn_t l1pfn = dom->pgtables_seg.pfn + dom->pg_l3 + dom->pg_l2;
227 l3_pgentry_64_t *l3tab;
228 l2_pgentry_64_t *l2tab = NULL;
229 l1_pgentry_64_t *l1tab = NULL;
230 unsigned long l3off, l2off, l1off;
231 xen_vaddr_t addr;
232 xen_pfn_t pgpfn;
233 xen_pfn_t l3mfn = xc_dom_p2m_guest(dom, l3pfn);
235 if ( dom->parms.pae == 1 )
236 {
237 if ( l3mfn >= 0x100000 )
238 l3mfn = move_l3_below_4G(dom, l3pfn, l3mfn);
240 if ( l3mfn >= 0x100000 )
241 {
242 xc_dom_panic(XC_INTERNAL_ERROR,"%s: cannot move L3 below 4G. "
243 "extended-cr3 not supported by guest. "
244 "(L3 PFN %#"PRIpfn" MFN %#"PRIpfn")\n",
245 __FUNCTION__, l3pfn, l3mfn);
246 return -EINVAL;
247 }
248 }
250 l3tab = xc_dom_pfn_to_ptr(dom, l3pfn, 1);
252 for ( addr = dom->parms.virt_base; addr < dom->virt_pgtab_end;
253 addr += PAGE_SIZE_X86 )
254 {
255 if ( l2tab == NULL )
256 {
257 /* get L2 tab, make L3 entry */
258 l2tab = xc_dom_pfn_to_ptr(dom, l2pfn, 1);
259 l3off = l3_table_offset_pae(addr);
260 l3tab[l3off] =
261 pfn_to_paddr(xc_dom_p2m_guest(dom, l2pfn)) | L3_PROT;
262 l2pfn++;
263 }
265 if ( l1tab == NULL )
266 {
267 /* get L1 tab, make L2 entry */
268 l1tab = xc_dom_pfn_to_ptr(dom, l1pfn, 1);
269 l2off = l2_table_offset_pae(addr);
270 l2tab[l2off] =
271 pfn_to_paddr(xc_dom_p2m_guest(dom, l1pfn)) | L2_PROT;
272 if ( l2off == (L2_PAGETABLE_ENTRIES_PAE - 1) )
273 l2tab = NULL;
274 l1pfn++;
275 }
277 /* make L1 entry */
278 l1off = l1_table_offset_pae(addr);
279 pgpfn = (addr - dom->parms.virt_base) >> PAGE_SHIFT_X86;
280 l1tab[l1off] =
281 pfn_to_paddr(xc_dom_p2m_guest(dom, pgpfn)) | L1_PROT;
282 if ( (addr >= dom->pgtables_seg.vstart) &&
283 (addr < dom->pgtables_seg.vend) )
284 l1tab[l1off] &= ~_PAGE_RW; /* page tables are r/o */
285 if ( l1off == (L1_PAGETABLE_ENTRIES_PAE - 1) )
286 l1tab = NULL;
287 }
289 if ( dom->virt_pgtab_end <= 0xc0000000 )
290 {
291 xc_dom_printf("%s: PAE: extra l2 page table for l3#3\n", __FUNCTION__);
292 l3tab[3] = pfn_to_paddr(xc_dom_p2m_guest(dom, l2pfn)) | L3_PROT;
293 }
294 return 0;
295 }
297 #undef L1_PROT
298 #undef L2_PROT
299 #undef L3_PROT
301 /* ------------------------------------------------------------------------ */
302 /* x86_64 pagetables */
304 static int count_pgtables_x86_64(struct xc_dom_image *dom)
305 {
306 return count_pgtables(dom, 0,
307 L4_PAGETABLE_SHIFT_X86_64 + 9,
308 L4_PAGETABLE_SHIFT_X86_64,
309 L3_PAGETABLE_SHIFT_X86_64,
310 L2_PAGETABLE_SHIFT_X86_64);
311 }
313 #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
314 #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
315 #define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
316 #define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
318 static int setup_pgtables_x86_64(struct xc_dom_image *dom)
319 {
320 xen_pfn_t l4pfn = dom->pgtables_seg.pfn;
321 xen_pfn_t l3pfn = dom->pgtables_seg.pfn + dom->pg_l4;
322 xen_pfn_t l2pfn = dom->pgtables_seg.pfn + dom->pg_l4 + dom->pg_l3;
323 xen_pfn_t l1pfn =
324 dom->pgtables_seg.pfn + dom->pg_l4 + dom->pg_l3 + dom->pg_l2;
325 l4_pgentry_64_t *l4tab = xc_dom_pfn_to_ptr(dom, l4pfn, 1);
326 l3_pgentry_64_t *l3tab = NULL;
327 l2_pgentry_64_t *l2tab = NULL;
328 l1_pgentry_64_t *l1tab = NULL;
329 uint64_t l4off, l3off, l2off, l1off;
330 uint64_t addr;
331 xen_pfn_t pgpfn;
333 for ( addr = dom->parms.virt_base; addr < dom->virt_pgtab_end;
334 addr += PAGE_SIZE_X86 )
335 {
336 if ( l3tab == NULL )
337 {
338 /* get L3 tab, make L4 entry */
339 l3tab = xc_dom_pfn_to_ptr(dom, l3pfn, 1);
340 l4off = l4_table_offset_x86_64(addr);
341 l4tab[l4off] =
342 pfn_to_paddr(xc_dom_p2m_guest(dom, l3pfn)) | L4_PROT;
343 l3pfn++;
344 }
346 if ( l2tab == NULL )
347 {
348 /* get L2 tab, make L3 entry */
349 l2tab = xc_dom_pfn_to_ptr(dom, l2pfn, 1);
350 l3off = l3_table_offset_x86_64(addr);
351 l3tab[l3off] =
352 pfn_to_paddr(xc_dom_p2m_guest(dom, l2pfn)) | L3_PROT;
353 if ( l3off == (L3_PAGETABLE_ENTRIES_X86_64 - 1) )
354 l3tab = NULL;
355 l2pfn++;
356 }
358 if ( l1tab == NULL )
359 {
360 /* get L1 tab, make L2 entry */
361 l1tab = xc_dom_pfn_to_ptr(dom, l1pfn, 1);
362 l2off = l2_table_offset_x86_64(addr);
363 l2tab[l2off] =
364 pfn_to_paddr(xc_dom_p2m_guest(dom, l1pfn)) | L2_PROT;
365 if ( l2off == (L2_PAGETABLE_ENTRIES_X86_64 - 1) )
366 l2tab = NULL;
367 l1pfn++;
368 }
370 /* make L1 entry */
371 l1off = l1_table_offset_x86_64(addr);
372 pgpfn = (addr - dom->parms.virt_base) >> PAGE_SHIFT_X86;
373 l1tab[l1off] =
374 pfn_to_paddr(xc_dom_p2m_guest(dom, pgpfn)) | L1_PROT;
375 if ( (addr >= dom->pgtables_seg.vstart) &&
376 (addr < dom->pgtables_seg.vend) )
377 l1tab[l1off] &= ~_PAGE_RW; /* page tables are r/o */
378 if ( l1off == (L1_PAGETABLE_ENTRIES_X86_64 - 1) )
379 l1tab = NULL;
380 }
381 return 0;
382 }
384 #undef L1_PROT
385 #undef L2_PROT
386 #undef L3_PROT
387 #undef L4_PROT
389 /* ------------------------------------------------------------------------ */
391 static int alloc_magic_pages(struct xc_dom_image *dom)
392 {
393 size_t p2m_size = dom->total_pages * dom->arch_hooks->sizeof_pfn;
395 /* allocate phys2mach table */
396 if ( xc_dom_alloc_segment(dom, &dom->p2m_seg, "phys2mach", 0, p2m_size) )
397 return -1;
398 dom->p2m_guest = xc_dom_seg_to_ptr(dom, &dom->p2m_seg);
400 /* allocate special pages */
401 dom->start_info_pfn = xc_dom_alloc_page(dom, "start info");
402 dom->xenstore_pfn = xc_dom_alloc_page(dom, "xenstore");
403 dom->console_pfn = xc_dom_alloc_page(dom, "console");
404 if ( xc_dom_feature_translated(dom) )
405 dom->shared_info_pfn = xc_dom_alloc_page(dom, "shared info");
406 dom->alloc_bootstack = 1;
408 return 0;
409 }
411 /* ------------------------------------------------------------------------ */
413 static int start_info_x86_32(struct xc_dom_image *dom)
414 {
415 start_info_x86_32_t *start_info =
416 xc_dom_pfn_to_ptr(dom, dom->start_info_pfn, 1);
417 xen_pfn_t shinfo =
418 xc_dom_feature_translated(dom) ? dom->shared_info_pfn : dom->
419 shared_info_mfn;
421 xc_dom_printf("%s: called\n", __FUNCTION__);
423 memset(start_info, 0, sizeof(*start_info));
424 strncpy(start_info->magic, dom->guest_type, sizeof(start_info->magic));
425 start_info->magic[sizeof(start_info->magic) - 1] = '\0';
426 start_info->nr_pages = dom->total_pages;
427 start_info->shared_info = shinfo << PAGE_SHIFT_X86;
428 start_info->pt_base = dom->pgtables_seg.vstart;
429 start_info->nr_pt_frames = dom->pgtables;
430 start_info->mfn_list = dom->p2m_seg.vstart;
432 start_info->flags = dom->flags;
433 start_info->store_mfn = xc_dom_p2m_guest(dom, dom->xenstore_pfn);
434 start_info->store_evtchn = dom->xenstore_evtchn;
435 start_info->console.domU.mfn = xc_dom_p2m_guest(dom, dom->console_pfn);
436 start_info->console.domU.evtchn = dom->console_evtchn;
438 if ( dom->ramdisk_blob )
439 {
440 start_info->mod_start = dom->ramdisk_seg.vstart;
441 start_info->mod_len = dom->ramdisk_seg.vend - dom->ramdisk_seg.vstart;
442 }
444 if ( dom->cmdline )
445 {
446 strncpy((char *)start_info->cmd_line, dom->cmdline, MAX_GUEST_CMDLINE);
447 start_info->cmd_line[MAX_GUEST_CMDLINE - 1] = '\0';
448 }
450 return 0;
451 }
453 static int start_info_x86_64(struct xc_dom_image *dom)
454 {
455 start_info_x86_64_t *start_info =
456 xc_dom_pfn_to_ptr(dom, dom->start_info_pfn, 1);
457 xen_pfn_t shinfo =
458 xc_dom_feature_translated(dom) ? dom->shared_info_pfn : dom->
459 shared_info_mfn;
461 xc_dom_printf("%s: called\n", __FUNCTION__);
463 memset(start_info, 0, sizeof(*start_info));
464 strncpy(start_info->magic, dom->guest_type, sizeof(start_info->magic));
465 start_info->magic[sizeof(start_info->magic) - 1] = '\0';
466 start_info->nr_pages = dom->total_pages;
467 start_info->shared_info = shinfo << PAGE_SHIFT_X86;
468 start_info->pt_base = dom->pgtables_seg.vstart;
469 start_info->nr_pt_frames = dom->pgtables;
470 start_info->mfn_list = dom->p2m_seg.vstart;
472 start_info->flags = dom->flags;
473 start_info->store_mfn = xc_dom_p2m_guest(dom, dom->xenstore_pfn);
474 start_info->store_evtchn = dom->xenstore_evtchn;
475 start_info->console.domU.mfn = xc_dom_p2m_guest(dom, dom->console_pfn);
476 start_info->console.domU.evtchn = dom->console_evtchn;
478 if ( dom->ramdisk_blob )
479 {
480 start_info->mod_start = dom->ramdisk_seg.vstart;
481 start_info->mod_len = dom->ramdisk_seg.vend - dom->ramdisk_seg.vstart;
482 }
484 if ( dom->cmdline )
485 {
486 strncpy((char *)start_info->cmd_line, dom->cmdline, MAX_GUEST_CMDLINE);
487 start_info->cmd_line[MAX_GUEST_CMDLINE - 1] = '\0';
488 }
490 return 0;
491 }
493 static int shared_info_x86_32(struct xc_dom_image *dom, void *ptr)
494 {
495 shared_info_x86_32_t *shared_info = ptr;
496 int i;
498 xc_dom_printf("%s: called\n", __FUNCTION__);
500 memset(shared_info, 0, sizeof(*shared_info));
501 for ( i = 0; i < MAX_VIRT_CPUS; i++ )
502 shared_info->vcpu_info[i].evtchn_upcall_mask = 1;
503 return 0;
504 }
506 static int shared_info_x86_64(struct xc_dom_image *dom, void *ptr)
507 {
508 shared_info_x86_64_t *shared_info = ptr;
509 int i;
511 xc_dom_printf("%s: called\n", __FUNCTION__);
513 memset(shared_info, 0, sizeof(*shared_info));
514 for ( i = 0; i < MAX_VIRT_CPUS; i++ )
515 shared_info->vcpu_info[i].evtchn_upcall_mask = 1;
516 return 0;
517 }
519 /* ------------------------------------------------------------------------ */
521 static int vcpu_x86_32(struct xc_dom_image *dom, void *ptr)
522 {
523 vcpu_guest_context_x86_32_t *ctxt = ptr;
524 xen_pfn_t cr3_pfn;
526 xc_dom_printf("%s: called\n", __FUNCTION__);
528 /* clear everything */
529 memset(ctxt, 0, sizeof(*ctxt));
531 ctxt->user_regs.ds = FLAT_KERNEL_DS_X86_32;
532 ctxt->user_regs.es = FLAT_KERNEL_DS_X86_32;
533 ctxt->user_regs.fs = FLAT_KERNEL_DS_X86_32;
534 ctxt->user_regs.gs = FLAT_KERNEL_DS_X86_32;
535 ctxt->user_regs.ss = FLAT_KERNEL_SS_X86_32;
536 ctxt->user_regs.cs = FLAT_KERNEL_CS_X86_32;
537 ctxt->user_regs.eip = dom->parms.virt_entry;
538 ctxt->user_regs.esp =
539 dom->parms.virt_base + (dom->bootstack_pfn + 1) * PAGE_SIZE_X86;
540 ctxt->user_regs.esi =
541 dom->parms.virt_base + (dom->start_info_pfn) * PAGE_SIZE_X86;
542 ctxt->user_regs.eflags = 1 << 9; /* Interrupt Enable */
544 ctxt->kernel_ss = ctxt->user_regs.ss;
545 ctxt->kernel_sp = ctxt->user_regs.esp;
547 ctxt->flags = VGCF_in_kernel_X86_32 | VGCF_online_X86_32;
548 if ( dom->parms.pae == 2 /* extended_cr3 */ ||
549 dom->parms.pae == 3 /* bimodal */ )
550 ctxt->vm_assist |= (1UL << VMASST_TYPE_pae_extended_cr3);
552 cr3_pfn = xc_dom_p2m_guest(dom, dom->pgtables_seg.pfn);
553 ctxt->ctrlreg[3] = xen_pfn_to_cr3_x86_32(cr3_pfn);
554 xc_dom_printf("%s: cr3: pfn 0x%" PRIpfn " mfn 0x%" PRIpfn "\n",
555 __FUNCTION__, dom->pgtables_seg.pfn, cr3_pfn);
557 return 0;
558 }
560 static int vcpu_x86_64(struct xc_dom_image *dom, void *ptr)
561 {
562 vcpu_guest_context_x86_64_t *ctxt = ptr;
563 xen_pfn_t cr3_pfn;
565 xc_dom_printf("%s: called\n", __FUNCTION__);
567 /* clear everything */
568 memset(ctxt, 0, sizeof(*ctxt));
570 ctxt->user_regs.ds = FLAT_KERNEL_DS_X86_64;
571 ctxt->user_regs.es = FLAT_KERNEL_DS_X86_64;
572 ctxt->user_regs.fs = FLAT_KERNEL_DS_X86_64;
573 ctxt->user_regs.gs = FLAT_KERNEL_DS_X86_64;
574 ctxt->user_regs.ss = FLAT_KERNEL_SS_X86_64;
575 ctxt->user_regs.cs = FLAT_KERNEL_CS_X86_64;
576 ctxt->user_regs.rip = dom->parms.virt_entry;
577 ctxt->user_regs.rsp =
578 dom->parms.virt_base + (dom->bootstack_pfn + 1) * PAGE_SIZE_X86;
579 ctxt->user_regs.rsi =
580 dom->parms.virt_base + (dom->start_info_pfn) * PAGE_SIZE_X86;
581 ctxt->user_regs.rflags = 1 << 9; /* Interrupt Enable */
583 ctxt->kernel_ss = ctxt->user_regs.ss;
584 ctxt->kernel_sp = ctxt->user_regs.esp;
586 ctxt->flags = VGCF_in_kernel_X86_64 | VGCF_online_X86_64;
587 cr3_pfn = xc_dom_p2m_guest(dom, dom->pgtables_seg.pfn);
588 ctxt->ctrlreg[3] = xen_pfn_to_cr3_x86_64(cr3_pfn);
589 xc_dom_printf("%s: cr3: pfn 0x%" PRIpfn " mfn 0x%" PRIpfn "\n",
590 __FUNCTION__, dom->pgtables_seg.pfn, cr3_pfn);
592 return 0;
593 }
595 /* ------------------------------------------------------------------------ */
597 static struct xc_dom_arch xc_dom_32 = {
598 .guest_type = "xen-3.0-x86_32",
599 .native_protocol = XEN_IO_PROTO_ABI_X86_32,
600 .page_shift = PAGE_SHIFT_X86,
601 .sizeof_pfn = 4,
602 .alloc_magic_pages = alloc_magic_pages,
603 .count_pgtables = count_pgtables_x86_32,
604 .setup_pgtables = setup_pgtables_x86_32,
605 .start_info = start_info_x86_32,
606 .shared_info = shared_info_x86_32,
607 .vcpu = vcpu_x86_32,
608 };
609 static struct xc_dom_arch xc_dom_32_pae = {
610 .guest_type = "xen-3.0-x86_32p",
611 .native_protocol = XEN_IO_PROTO_ABI_X86_32,
612 .page_shift = PAGE_SHIFT_X86,
613 .sizeof_pfn = 4,
614 .alloc_magic_pages = alloc_magic_pages,
615 .count_pgtables = count_pgtables_x86_32_pae,
616 .setup_pgtables = setup_pgtables_x86_32_pae,
617 .start_info = start_info_x86_32,
618 .shared_info = shared_info_x86_32,
619 .vcpu = vcpu_x86_32,
620 };
622 static struct xc_dom_arch xc_dom_64 = {
623 .guest_type = "xen-3.0-x86_64",
624 .native_protocol = XEN_IO_PROTO_ABI_X86_64,
625 .page_shift = PAGE_SHIFT_X86,
626 .sizeof_pfn = 8,
627 .alloc_magic_pages = alloc_magic_pages,
628 .count_pgtables = count_pgtables_x86_64,
629 .setup_pgtables = setup_pgtables_x86_64,
630 .start_info = start_info_x86_64,
631 .shared_info = shared_info_x86_64,
632 .vcpu = vcpu_x86_64,
633 };
635 static void __init register_arch_hooks(void)
636 {
637 xc_dom_register_arch_hooks(&xc_dom_32);
638 xc_dom_register_arch_hooks(&xc_dom_32_pae);
639 xc_dom_register_arch_hooks(&xc_dom_64);
640 }
642 static int x86_compat(int xc, domid_t domid, char *guest_type)
643 {
644 static const struct {
645 char *guest;
646 uint32_t size;
647 } types[] = {
648 { "xen-3.0-x86_32p", 32 },
649 { "xen-3.0-x86_64", 64 },
650 };
651 DECLARE_DOMCTL;
652 int i,rc;
654 memset(&domctl, 0, sizeof(domctl));
655 domctl.domain = domid;
656 domctl.cmd = XEN_DOMCTL_set_address_size;
657 for ( i = 0; i < sizeof(types)/sizeof(types[0]); i++ )
658 if ( !strcmp(types[i].guest, guest_type) )
659 domctl.u.address_size.size = types[i].size;
660 if ( domctl.u.address_size.size == 0 )
661 /* nothing to do */
662 return 0;
664 xc_dom_printf("%s: guest %s, address size %" PRId32 "\n", __FUNCTION__,
665 guest_type, domctl.u.address_size.size);
666 rc = do_domctl(xc, &domctl);
667 if ( rc != 0 )
668 xc_dom_printf("%s: warning: failed (rc=%d)\n",
669 __FUNCTION__, rc);
670 return rc;
671 }
674 static int x86_shadow(int xc, domid_t domid)
675 {
676 int rc, mode;
678 xc_dom_printf("%s: called\n", __FUNCTION__);
680 mode = XEN_DOMCTL_SHADOW_ENABLE_REFCOUNT |
681 XEN_DOMCTL_SHADOW_ENABLE_TRANSLATE;
683 rc = xc_shadow_control(xc, domid,
684 XEN_DOMCTL_SHADOW_OP_ENABLE,
685 NULL, 0, NULL, mode, NULL);
686 if ( rc != 0 )
687 {
688 xc_dom_panic(XC_INTERNAL_ERROR,
689 "%s: SHADOW_OP_ENABLE (mode=0x%x) failed (rc=%d)\n",
690 __FUNCTION__, mode, rc);
691 return rc;
692 }
693 xc_dom_printf("%s: shadow enabled (mode=0x%x)\n", __FUNCTION__, mode);
694 return rc;
695 }
697 int arch_setup_meminit(struct xc_dom_image *dom, int superpages)
698 {
699 int rc;
700 xen_pfn_t pfn, allocsz, i;
702 rc = x86_compat(dom->guest_xc, dom->guest_domid, dom->guest_type);
703 if ( rc )
704 return rc;
705 if ( xc_dom_feature_translated(dom) )
706 {
707 dom->shadow_enabled = 1;
708 rc = x86_shadow(dom->guest_xc, dom->guest_domid);
709 if ( rc )
710 return rc;
711 }
713 dom->p2m_host = xc_dom_malloc(dom, sizeof(xen_pfn_t) * dom->total_pages);
714 if (superpages)
715 {
716 int count = dom->total_pages >> SUPERPAGE_PFN_SHIFT;
717 xen_pfn_t extents[count];
719 xc_dom_printf("Populating memory with %d superpages\n", count);
720 for (pfn = 0; pfn < count; pfn++)
721 extents[pfn] = pfn << SUPERPAGE_PFN_SHIFT;
722 rc = xc_domain_memory_populate_physmap(dom->guest_xc, dom->guest_domid,
723 count, SUPERPAGE_PFN_SHIFT, 0,
724 extents);
725 if (!rc)
726 {
727 int i, j;
728 xen_pfn_t mfn;
730 /* Expand the returned mfn into the p2m array */
731 pfn = 0;
732 for (i = 0; i < count; i++)
733 {
734 mfn = extents[i];
735 for (j = 0; j < SUPERPAGE_NR_PFNS; j++, pfn++)
736 {
737 dom->p2m_host[pfn] = mfn + j;
738 }
739 }
740 }
741 } else
742 {
743 /* setup initial p2m */
744 for ( pfn = 0; pfn < dom->total_pages; pfn++ )
745 dom->p2m_host[pfn] = pfn;
747 /* allocate guest memory */
748 for ( i = rc = allocsz = 0; (i < dom->total_pages) && !rc; i += allocsz )
749 {
750 allocsz = dom->total_pages - i;
751 if ( allocsz > 1024*1024 )
752 allocsz = 1024*1024;
753 rc = xc_domain_memory_populate_physmap(
754 dom->guest_xc, dom->guest_domid, allocsz, 0, 0, &dom->p2m_host[i]);
755 }
756 }
758 return rc;
759 }
761 int arch_setup_bootearly(struct xc_dom_image *dom)
762 {
763 xc_dom_printf("%s: doing nothing\n", __FUNCTION__);
764 return 0;
765 }
767 int arch_setup_bootlate(struct xc_dom_image *dom)
768 {
769 static const struct {
770 char *guest;
771 unsigned long pgd_type;
772 } types[] = {
773 { "xen-3.0-x86_32", MMUEXT_PIN_L2_TABLE},
774 { "xen-3.0-x86_32p", MMUEXT_PIN_L3_TABLE},
775 { "xen-3.0-x86_64", MMUEXT_PIN_L4_TABLE},
776 };
777 unsigned long pgd_type = 0;
778 shared_info_t *shared_info;
779 xen_pfn_t shinfo;
780 int i, rc;
782 for ( i = 0; i < sizeof(types) / sizeof(types[0]); i++ )
783 if ( !strcmp(types[i].guest, dom->guest_type) )
784 pgd_type = types[i].pgd_type;
786 if ( !xc_dom_feature_translated(dom) )
787 {
788 /* paravirtualized guest */
789 xc_dom_unmap_one(dom, dom->pgtables_seg.pfn);
790 rc = pin_table(dom->guest_xc, pgd_type,
791 xc_dom_p2m_host(dom, dom->pgtables_seg.pfn),
792 dom->guest_domid);
793 if ( rc != 0 )
794 {
795 xc_dom_panic(XC_INTERNAL_ERROR,
796 "%s: pin_table failed (pfn 0x%" PRIpfn ", rc=%d)\n",
797 __FUNCTION__, dom->pgtables_seg.pfn, rc);
798 return rc;
799 }
800 shinfo = dom->shared_info_mfn;
801 }
802 else
803 {
804 /* paravirtualized guest with auto-translation */
805 struct xen_add_to_physmap xatp;
806 int i;
808 /* Map shared info frame into guest physmap. */
809 xatp.domid = dom->guest_domid;
810 xatp.space = XENMAPSPACE_shared_info;
811 xatp.idx = 0;
812 xatp.gpfn = dom->shared_info_pfn;
813 rc = xc_memory_op(dom->guest_xc, XENMEM_add_to_physmap, &xatp);
814 if ( rc != 0 )
815 {
816 xc_dom_panic(XC_INTERNAL_ERROR, "%s: mapping shared_info failed "
817 "(pfn=0x%" PRIpfn ", rc=%d)\n",
818 __FUNCTION__, xatp.gpfn, rc);
819 return rc;
820 }
822 /* Map grant table frames into guest physmap. */
823 for ( i = 0; ; i++ )
824 {
825 xatp.domid = dom->guest_domid;
826 xatp.space = XENMAPSPACE_grant_table;
827 xatp.idx = i;
828 xatp.gpfn = dom->total_pages + i;
829 rc = xc_memory_op(dom->guest_xc, XENMEM_add_to_physmap, &xatp);
830 if ( rc != 0 )
831 {
832 if ( (i > 0) && (errno == EINVAL) )
833 {
834 xc_dom_printf("%s: %d grant tables mapped\n", __FUNCTION__,
835 i);
836 break;
837 }
838 xc_dom_panic(XC_INTERNAL_ERROR,
839 "%s: mapping grant tables failed " "(pfn=0x%"
840 PRIpfn ", rc=%d)\n", __FUNCTION__, xatp.gpfn, rc);
841 return rc;
842 }
843 }
844 shinfo = dom->shared_info_pfn;
845 }
847 /* setup shared_info page */
848 xc_dom_printf("%s: shared_info: pfn 0x%" PRIpfn ", mfn 0x%" PRIpfn "\n",
849 __FUNCTION__, dom->shared_info_pfn, dom->shared_info_mfn);
850 shared_info = xc_map_foreign_range(dom->guest_xc, dom->guest_domid,
851 PAGE_SIZE_X86,
852 PROT_READ | PROT_WRITE,
853 shinfo);
854 if ( shared_info == NULL )
855 return -1;
856 dom->arch_hooks->shared_info(dom, shared_info);
857 munmap(shared_info, PAGE_SIZE_X86);
859 return 0;
860 }
862 /*
863 * Local variables:
864 * mode: C
865 * c-set-style: "BSD"
866 * c-basic-offset: 4
867 * tab-width: 4
868 * indent-tabs-mode: nil
869 * End:
870 */