ia64/xen-unstable

view stubdom/grub/kexec.c @ 18026:f454f2cac170

x86 hvm: New boot option 'softtsc' to cause RDTSC to be trapped-and-emulated.

Signed-off-by: Dan Magenheimer <dan.magenheimer@oracle.com>
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Thu Jul 10 15:45:18 2008 +0100 (2008-07-10)
parents c8d9ade45781
children
line source
1 /*
2 * This supports booting another PV kernel from Mini-OS
3 *
4 * The idea is to setup it using libxc, answer to day0 memory allocation
5 * requests, and using a trampoline boot page to switch to the new page table.
6 *
7 * The procedure of the boot page is:
8 * - map itself at the target position (that may overwrite some C stuff, but we
9 * do not care any more)
10 * - jump there
11 * - switch to the target page table
12 * - unpin the old page table
13 * - jump to the new kernel
14 *
15 * Samuel Thibault <Samuel.Thibault@eu.citrix.com>, May 2008
16 */
17 #include <stdio.h>
18 #include <unistd.h>
19 #include <stdlib.h>
20 #include <sys/mman.h>
22 #include <xenctrl.h>
23 #include <xc_dom.h>
25 #include <kernel.h>
26 #include <console.h>
27 #include <os.h>
28 #include <blkfront.h>
29 #include <netfront.h>
30 #include <fbfront.h>
31 #include <shared.h>
33 #include "mini-os.h"
35 #if 0
36 #define DEBUG(fmt, ...) printk(fmt, ## __VA_ARGS__)
37 #else
38 #define DEBUG(fmt, ...) (void)0
39 #endif
41 /* Assembly boot page from boot.S */
42 extern void _boot_page;
43 extern pgentry_t _boot_page_entry;
44 extern unsigned long _boot_pdmfn;
45 extern unsigned long _boot_stack, _boot_target, _boot_start_info, _boot_start;
46 extern xen_pfn_t _boot_oldpdmfn;
47 extern void _boot(void);
49 static unsigned long *pages;
50 static unsigned long *pages_mfns;
51 static unsigned long allocated;
53 int pin_table(int xc_handle, unsigned int type, unsigned long mfn,
54 domid_t dom);
56 /* We need mfn to appear as target_pfn, so exchange with the MFN there */
57 static void do_exchange(struct xc_dom_image *dom, xen_pfn_t target_pfn, xen_pfn_t source_mfn)
58 {
59 xen_pfn_t source_pfn;
60 xen_pfn_t target_mfn;
62 for (source_pfn = 0; source_pfn < start_info.nr_pages; source_pfn++)
63 if (dom->p2m_host[source_pfn] == source_mfn)
64 break;
65 ASSERT(source_pfn < start_info.nr_pages);
67 target_mfn = dom->p2m_host[target_pfn];
69 /* Put target MFN at source PFN */
70 dom->p2m_host[source_pfn] = target_mfn;
72 /* Put source MFN at target PFN */
73 dom->p2m_host[target_pfn] = source_mfn;
74 }
76 int kexec_allocate(struct xc_dom_image *dom, xen_vaddr_t up_to)
77 {
78 unsigned long new_allocated = (up_to - dom->parms.virt_base) / PAGE_SIZE;
79 unsigned long i;
81 pages = realloc(pages, new_allocated * sizeof(*pages));
82 pages_mfns = realloc(pages_mfns, new_allocated * sizeof(*pages_mfns));
83 for (i = allocated; i < new_allocated; i++) {
84 /* Exchange old page of PFN i with a newly allocated page. */
85 xen_pfn_t old_mfn = dom->p2m_host[i];
86 xen_pfn_t new_pfn;
87 xen_pfn_t new_mfn;
89 pages[i] = alloc_page();
90 memset((void*) pages[i], 0, PAGE_SIZE);
91 new_pfn = PHYS_PFN(to_phys(pages[i]));
92 pages_mfns[i] = new_mfn = pfn_to_mfn(new_pfn);
94 /* Put old page at new PFN */
95 dom->p2m_host[new_pfn] = old_mfn;
97 /* Put new page at PFN i */
98 dom->p2m_host[i] = new_mfn;
99 }
101 allocated = new_allocated;
103 return 0;
104 }
106 void kexec(void *kernel, long kernel_size, void *module, long module_size, char *cmdline)
107 {
108 struct xc_dom_image *dom;
109 int rc;
110 domid_t domid = DOMID_SELF;
111 xen_pfn_t pfn;
112 int xc_handle;
113 unsigned long i;
114 void *seg;
115 xen_pfn_t boot_page_mfn = virt_to_mfn(&_boot_page);
116 char features[] = "";
117 struct mmu_update *m2p_updates;
118 unsigned long nr_m2p_updates;
120 DEBUG("booting with cmdline %s\n", cmdline);
121 xc_handle = xc_interface_open();
123 dom = xc_dom_allocate(cmdline, features);
124 dom->allocate = kexec_allocate;
126 dom->kernel_blob = kernel;
127 dom->kernel_size = kernel_size;
129 dom->ramdisk_blob = module;
130 dom->ramdisk_size = module_size;
132 dom->flags = 0;
133 dom->console_evtchn = start_info.console.domU.evtchn;
134 dom->xenstore_evtchn = start_info.store_evtchn;
136 if ( (rc = xc_dom_boot_xen_init(dom, xc_handle, domid)) != 0 ) {
137 grub_printf("xc_dom_boot_xen_init returned %d\n", rc);
138 errnum = ERR_BOOT_FAILURE;
139 goto out;
140 }
141 if ( (rc = xc_dom_parse_image(dom)) != 0 ) {
142 grub_printf("xc_dom_parse_image returned %d\n", rc);
143 errnum = ERR_BOOT_FAILURE;
144 goto out;
145 }
147 #ifdef __i386__
148 if (strcmp(dom->guest_type, "xen-3.0-x86_32p")) {
149 grub_printf("can only boot x86 32 PAE kernels, not %s\n", dom->guest_type);
150 errnum = ERR_EXEC_FORMAT;
151 goto out;
152 }
153 #endif
154 #ifdef __x86_64__
155 if (strcmp(dom->guest_type, "xen-3.0-x86_64")) {
156 grub_printf("can only boot x86 64 kernels, not %s\n", dom->guest_type);
157 errnum = ERR_EXEC_FORMAT;
158 goto out;
159 }
160 #endif
162 /* equivalent of xc_dom_mem_init */
163 dom->arch_hooks = xc_dom_find_arch_hooks(dom->guest_type);
164 dom->total_pages = start_info.nr_pages;
166 /* equivalent of arch_setup_meminit */
168 /* setup initial p2m */
169 dom->p2m_host = malloc(sizeof(*dom->p2m_host) * dom->total_pages);
171 /* Start with our current P2M */
172 for (i = 0; i < dom->total_pages; i++)
173 dom->p2m_host[i] = pfn_to_mfn(i);
175 if ( (rc = xc_dom_build_image(dom)) != 0 ) {
176 grub_printf("xc_dom_build_image returned %d\n", rc);
177 errnum = ERR_BOOT_FAILURE;
178 goto out;
179 }
181 /* copy hypercall page */
182 /* TODO: domctl instead, but requires privileges */
183 if (dom->parms.virt_hypercall != -1) {
184 pfn = PHYS_PFN(dom->parms.virt_hypercall - dom->parms.virt_base);
185 memcpy((void *) pages[pfn], hypercall_page, PAGE_SIZE);
186 }
188 /* Equivalent of xc_dom_boot_image */
189 dom->shared_info_mfn = PHYS_PFN(start_info.shared_info);
191 if (!xc_dom_compat_check(dom)) {
192 grub_printf("xc_dom_compat_check failed\n");
193 errnum = ERR_EXEC_FORMAT;
194 goto out;
195 }
197 /* Move current console, xenstore and boot MFNs to the allocated place */
198 do_exchange(dom, dom->console_pfn, start_info.console.domU.mfn);
199 do_exchange(dom, dom->xenstore_pfn, start_info.store_mfn);
200 DEBUG("virt base at %llx\n", dom->parms.virt_base);
201 DEBUG("bootstack_pfn %lx\n", dom->bootstack_pfn);
202 _boot_target = dom->parms.virt_base + PFN_PHYS(dom->bootstack_pfn);
203 DEBUG("_boot_target %lx\n", _boot_target);
204 do_exchange(dom, PHYS_PFN(_boot_target - dom->parms.virt_base),
205 virt_to_mfn(&_boot_page));
207 /* Make sure the bootstrap page table does not RW-map any of our current
208 * page table frames */
209 kexec_allocate(dom, dom->virt_pgtab_end);
211 if ( (rc = xc_dom_update_guest_p2m(dom))) {
212 grub_printf("xc_dom_update_guest_p2m returned %d\n", rc);
213 errnum = ERR_BOOT_FAILURE;
214 goto out;
215 }
217 if ( dom->arch_hooks->setup_pgtables )
218 if ( (rc = dom->arch_hooks->setup_pgtables(dom))) {
219 grub_printf("setup_pgtables returned %d\n", rc);
220 errnum = ERR_BOOT_FAILURE;
221 goto out;
222 }
224 /* start info page */
225 #undef start_info
226 if ( dom->arch_hooks->start_info )
227 dom->arch_hooks->start_info(dom);
228 #define start_info (start_info_union.start_info)
230 xc_dom_log_memory_footprint(dom);
232 /* Unmap libxc's projection of the boot page table */
233 seg = xc_dom_seg_to_ptr(dom, &dom->pgtables_seg);
234 munmap(seg, dom->pgtables_seg.vend - dom->pgtables_seg.vstart);
236 /* Unmap day0 pages to avoid having a r/w mapping of the future page table */
237 for (pfn = 0; pfn < allocated; pfn++)
238 munmap((void*) pages[pfn], PAGE_SIZE);
240 /* Pin the boot page table base */
241 if ( (rc = pin_table(dom->guest_xc,
242 #ifdef __i386__
243 MMUEXT_PIN_L3_TABLE,
244 #endif
245 #ifdef __x86_64__
246 MMUEXT_PIN_L4_TABLE,
247 #endif
248 xc_dom_p2m_host(dom, dom->pgtables_seg.pfn),
249 dom->guest_domid)) != 0 ) {
250 grub_printf("pin_table(%lx) returned %d\n", xc_dom_p2m_host(dom,
251 dom->pgtables_seg.pfn), rc);
252 errnum = ERR_BOOT_FAILURE;
253 goto out_remap;
254 }
256 /* We populate the Mini-OS page table here so that boot.S can just call
257 * update_va_mapping to project itself there. */
258 need_pgt(_boot_target);
259 DEBUG("day0 pages %lx\n", allocated);
260 DEBUG("boot target page %lx\n", _boot_target);
261 DEBUG("boot page %p\n", &_boot_page);
262 DEBUG("boot page mfn %lx\n", boot_page_mfn);
263 _boot_page_entry = PFN_PHYS(boot_page_mfn) | L1_PROT;
264 DEBUG("boot page entry %llx\n", _boot_page_entry);
265 _boot_oldpdmfn = virt_to_mfn(start_info.pt_base);
266 DEBUG("boot old pd mfn %lx\n", _boot_oldpdmfn);
267 DEBUG("boot pd virt %lx\n", dom->pgtables_seg.vstart);
268 _boot_pdmfn = dom->p2m_host[PHYS_PFN(dom->pgtables_seg.vstart - dom->parms.virt_base)];
269 DEBUG("boot pd mfn %lx\n", _boot_pdmfn);
270 _boot_stack = _boot_target + PAGE_SIZE;
271 DEBUG("boot stack %lx\n", _boot_stack);
272 _boot_start_info = dom->parms.virt_base + PFN_PHYS(dom->start_info_pfn);
273 DEBUG("boot start info %lx\n", _boot_start_info);
274 _boot_start = dom->parms.virt_entry;
275 DEBUG("boot start %lx\n", _boot_start);
277 /* Keep only useful entries */
278 for (nr_m2p_updates = pfn = 0; pfn < start_info.nr_pages; pfn++)
279 if (dom->p2m_host[pfn] != pfn_to_mfn(pfn))
280 nr_m2p_updates++;
282 m2p_updates = malloc(sizeof(*m2p_updates) * nr_m2p_updates);
283 for (i = pfn = 0; pfn < start_info.nr_pages; pfn++)
284 if (dom->p2m_host[pfn] != pfn_to_mfn(pfn)) {
285 m2p_updates[i].ptr = PFN_PHYS(dom->p2m_host[pfn]) | MMU_MACHPHYS_UPDATE;
286 m2p_updates[i].val = pfn;
287 i++;
288 }
290 for (i = 0; i < blk_nb; i++)
291 shutdown_blkfront(blk_dev[i]);
292 if (net_dev)
293 shutdown_netfront(net_dev);
294 if (kbd_dev)
295 shutdown_kbdfront(kbd_dev);
296 stop_kernel();
298 /* Update M2P */
299 if ((rc = HYPERVISOR_mmu_update(m2p_updates, nr_m2p_updates, NULL, DOMID_SELF)) < 0) {
300 xprintk("Could not update M2P\n");
301 ASSERT(0);
302 }
304 xprintk("go!\n");
306 /* Jump to trampoline boot page */
307 _boot();
309 ASSERT(0);
311 out_remap:
312 for (pfn = 0; pfn < allocated; pfn++)
313 do_map_frames(pages[pfn], &pages_mfns[pfn], 1, 0, 0, DOMID_SELF, 0, L1_PROT);
314 out:
315 xc_dom_release(dom);
316 for (pfn = 0; pfn < allocated; pfn++)
317 free_page((void*)pages[pfn]);
318 free(pages);
319 free(pages_mfns);
320 pages = NULL;
321 pages_mfns = NULL;
322 allocated = 0;
323 xc_interface_close(xc_handle );
324 }