ia64/xen-unstable

view tools/firmware/hvmloader/hvmloader.c @ 18394:dade7f0bdc8d

hvm: Use main memory for video memory.

When creating an HVM domain, if e.g. another domain is created before
qemu allocates video memory, the extra 8MB memory ballooning is not
available any more, because it got consumed by the other domain.

This fixes it by taking video memory from the main memory:

- make hvmloader use e820_malloc to reserve some of the main memory
and notify ioemu of its address through the Xen platform PCI card.
- add XENMAPSPACE_mfn to the xen_add_to_physmap memory op, to allow
ioemu to move the MFNs between the original position and the PCI
mapping, when LFB acceleration is disabled/enabled
- add a remove_from_physmap memory op, to allow ioemu to unmap it
completely for the case of old guests with acceleration disabled.
- add xc_domain_memory_translate_gpfn_list to libxc to allow ioemu to
get the MFNs of the video memory.
- have xend save the PCI memory space instead of ioemu: if a memory
page is there, the guest can access it like usual memory, so xend
can safely be responsible to save it. The extra benefit is that
live migration will apply the logdirty optimization there too.
- handle old saved images, populating the video memory from ioemu if
really needed.

Signed-off-by: Samuel Thibault <samuel.thibault@eu.citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Wed Aug 27 14:53:39 2008 +0100 (2008-08-27)
parents 2ec019301ad8
children 1ac3e2a44dc9
line source
1 /*
2 * hvmloader.c: HVM bootloader.
3 *
4 * Leendert van Doorn, leendert@watson.ibm.com
5 * Copyright (c) 2005, International Business Machines Corporation.
6 *
7 * Copyright (c) 2006, Keir Fraser, XenSource Inc.
8 *
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms and conditions of the GNU General Public License,
11 * version 2, as published by the Free Software Foundation.
12 *
13 * This program is distributed in the hope it will be useful, but WITHOUT
14 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
16 * more details.
17 *
18 * You should have received a copy of the GNU General Public License along with
19 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
20 * Place - Suite 330, Boston, MA 02111-1307 USA.
21 */
23 #include "roms.h"
24 #include "acpi/acpi2_0.h"
25 #include "hypercall.h"
26 #include "util.h"
27 #include "config.h"
28 #include "apic_regs.h"
29 #include "pci_regs.h"
30 #include "e820.h"
31 #include "option_rom.h"
32 #include <xen/version.h>
33 #include <xen/hvm/params.h>
35 asm (
36 " .text \n"
37 " .globl _start \n"
38 "_start: \n"
39 /* C runtime kickoff. */
40 " cld \n"
41 " cli \n"
42 " movl $stack_top,%esp \n"
43 " movl %esp,%ebp \n"
44 " call main \n"
45 /* Relocate real-mode trampoline to 0x0. */
46 " mov $trampoline_start,%esi \n"
47 " xor %edi,%edi \n"
48 " mov $trampoline_end,%ecx \n"
49 " sub %esi,%ecx \n"
50 " rep movsb \n"
51 /* Load real-mode compatible segment state (base 0x0000, limit 0xffff). */
52 " lgdt gdt_desr \n"
53 " mov $0x0010,%ax \n"
54 " mov %ax,%ds \n"
55 " mov %ax,%es \n"
56 " mov %ax,%fs \n"
57 " mov %ax,%gs \n"
58 " mov %ax,%ss \n"
59 /* Initialise all 32-bit GPRs to zero. */
60 " xor %eax,%eax \n"
61 " xor %ebx,%ebx \n"
62 " xor %ecx,%ecx \n"
63 " xor %edx,%edx \n"
64 " xor %esp,%esp \n"
65 " xor %ebp,%ebp \n"
66 " xor %esi,%esi \n"
67 " xor %edi,%edi \n"
68 /* Enter real mode, reload all segment registers and IDT. */
69 " ljmp $0x8,$0x0 \n"
70 "trampoline_start: .code16 \n"
71 " mov %eax,%cr0 \n"
72 " ljmp $0,$1f-trampoline_start\n"
73 "1: mov %ax,%ds \n"
74 " mov %ax,%es \n"
75 " mov %ax,%fs \n"
76 " mov %ax,%gs \n"
77 " mov %ax,%ss \n"
78 " lidt 1f-trampoline_start \n"
79 " ljmp $0xf000,$0xfff0 \n"
80 "1: .word 0x3ff,0,0 \n"
81 "trampoline_end: .code32 \n"
82 " \n"
83 "gdt_desr: \n"
84 " .word gdt_end - gdt - 1 \n"
85 " .long gdt \n"
86 " \n"
87 " .align 8 \n"
88 "gdt: \n"
89 " .quad 0x0000000000000000 \n"
90 " .quad 0x00009a000000ffff \n" /* Ring 0 code, base 0 limit 0xffff */
91 " .quad 0x000092000000ffff \n" /* Ring 0 data, base 0 limit 0xffff */
92 "gdt_end: \n"
93 " \n"
94 " .bss \n"
95 " .align 8 \n"
96 "stack: \n"
97 " .skip 0x4000 \n"
98 "stack_top: \n"
99 " .text \n"
100 );
102 static enum { VGA_none, VGA_std, VGA_cirrus } virtual_vga = VGA_none;
104 static void
105 init_hypercalls(void)
106 {
107 uint32_t eax, ebx, ecx, edx;
108 unsigned long i;
109 char signature[13];
110 xen_extraversion_t extraversion;
112 cpuid(0x40000000, &eax, &ebx, &ecx, &edx);
114 *(uint32_t *)(signature + 0) = ebx;
115 *(uint32_t *)(signature + 4) = ecx;
116 *(uint32_t *)(signature + 8) = edx;
117 signature[12] = '\0';
119 BUG_ON(strcmp("XenVMMXenVMM", signature) || (eax < 0x40000002));
121 /* Fill in hypercall transfer pages. */
122 cpuid(0x40000002, &eax, &ebx, &ecx, &edx);
123 for ( i = 0; i < eax; i++ )
124 wrmsr(ebx, HYPERCALL_PHYSICAL_ADDRESS + (i << 12) + i);
126 /* Print version information. */
127 cpuid(0x40000001, &eax, &ebx, &ecx, &edx);
128 hypercall_xen_version(XENVER_extraversion, extraversion);
129 printf("Detected Xen v%u.%u%s\n", eax >> 16, eax & 0xffff, extraversion);
130 }
132 static void apic_setup(void)
133 {
134 /* Set the IOAPIC ID to tha static value used in the MP/ACPI tables. */
135 ioapic_write(0x00, IOAPIC_ID);
137 /* Set up Virtual Wire mode. */
138 lapic_write(APIC_SPIV, APIC_SPIV_APIC_ENABLED | 0xFF);
139 lapic_write(APIC_LVT0, APIC_MODE_EXTINT << 8);
140 lapic_write(APIC_LVT1, APIC_MODE_NMI << 8);
141 }
143 static void pci_setup(void)
144 {
145 uint32_t base, devfn, bar_reg, bar_data, bar_sz, cmd;
146 uint16_t class, vendor_id, device_id;
147 unsigned int bar, pin, link, isa_irq;
149 /* Resources assignable to PCI devices via BARs. */
150 struct resource {
151 uint32_t base, max;
152 } *resource;
153 struct resource mem_resource = { PCI_MEMBASE, PCI_MEMBASE + PCI_MEMSIZE };
154 struct resource io_resource = { 0xc000, 0x10000 };
156 /* Create a list of device BARs in descending order of size. */
157 struct bars {
158 uint32_t devfn, bar_reg, bar_sz;
159 } *bars = (struct bars *)SCRATCH_PHYSICAL_ADDRESS;
160 unsigned int i, nr_bars = 0;
162 /* Program PCI-ISA bridge with appropriate link routes. */
163 isa_irq = 0;
164 for ( link = 0; link < 4; link++ )
165 {
166 do { isa_irq = (isa_irq + 1) & 15;
167 } while ( !(PCI_ISA_IRQ_MASK & (1U << isa_irq)) );
168 pci_writeb(PCI_ISA_DEVFN, 0x60 + link, isa_irq);
169 printf("PCI-ISA link %u routed to IRQ%u\n", link, isa_irq);
170 }
172 /* Program ELCR to match PCI-wired IRQs. */
173 outb(0x4d0, (uint8_t)(PCI_ISA_IRQ_MASK >> 0));
174 outb(0x4d1, (uint8_t)(PCI_ISA_IRQ_MASK >> 8));
176 /* Scan the PCI bus and map resources. */
177 for ( devfn = 0; devfn < 128; devfn++ )
178 {
179 class = pci_readw(devfn, PCI_CLASS_DEVICE);
180 vendor_id = pci_readw(devfn, PCI_VENDOR_ID);
181 device_id = pci_readw(devfn, PCI_DEVICE_ID);
182 if ( (vendor_id == 0xffff) && (device_id == 0xffff) )
183 continue;
185 ASSERT((devfn != PCI_ISA_DEVFN) ||
186 ((vendor_id == 0x8086) && (device_id == 0x7000)));
188 switch ( class )
189 {
190 case 0x0300:
191 if ( (vendor_id == 0x1234) && (device_id == 0x1111) )
192 virtual_vga = VGA_std;
193 if ( (vendor_id == 0x1013) && (device_id == 0xb8) )
194 virtual_vga = VGA_cirrus;
195 break;
196 case 0x0680:
197 /* PIIX4 ACPI PM. Special device with special PCI config space. */
198 ASSERT((vendor_id == 0x8086) && (device_id == 0x7113));
199 pci_writew(devfn, 0x20, 0x0000); /* No smb bus IO enable */
200 pci_writew(devfn, 0x22, 0x0000);
201 pci_writew(devfn, 0x3c, 0x0009); /* Hardcoded IRQ9 */
202 pci_writew(devfn, 0x3d, 0x0001);
203 break;
204 case 0x0101:
205 if ( vendor_id == 0x8086 )
206 {
207 /* Intel ICHs since PIIX3: enable IDE legacy mode. */
208 pci_writew(devfn, 0x40, 0x8000); /* enable IDE0 */
209 pci_writew(devfn, 0x42, 0x8000); /* enable IDE1 */
210 }
211 break;
212 }
214 /* Map the I/O memory and port resources. */
215 for ( bar = 0; bar < 7; bar++ )
216 {
217 bar_reg = PCI_BASE_ADDRESS_0 + 4*bar;
218 if ( bar == 6 )
219 bar_reg = PCI_ROM_ADDRESS;
221 bar_data = pci_readl(devfn, bar_reg);
222 pci_writel(devfn, bar_reg, ~0);
223 bar_sz = pci_readl(devfn, bar_reg);
224 pci_writel(devfn, bar_reg, bar_data);
225 if ( bar_sz == 0 )
226 continue;
228 bar_sz &= (((bar_data & PCI_BASE_ADDRESS_SPACE) ==
229 PCI_BASE_ADDRESS_SPACE_MEMORY) ?
230 PCI_BASE_ADDRESS_MEM_MASK :
231 (PCI_BASE_ADDRESS_IO_MASK & 0xffff));
232 bar_sz &= ~(bar_sz - 1);
234 for ( i = 0; i < nr_bars; i++ )
235 if ( bars[i].bar_sz < bar_sz )
236 break;
238 if ( i != nr_bars )
239 memmove(&bars[i+1], &bars[i], (nr_bars-i) * sizeof(*bars));
241 bars[i].devfn = devfn;
242 bars[i].bar_reg = bar_reg;
243 bars[i].bar_sz = bar_sz;
245 nr_bars++;
246 }
248 /* Map the interrupt. */
249 pin = pci_readb(devfn, PCI_INTERRUPT_PIN);
250 if ( pin != 0 )
251 {
252 /* This is the barber's pole mapping used by Xen. */
253 link = ((pin - 1) + (devfn >> 3)) & 3;
254 isa_irq = pci_readb(PCI_ISA_DEVFN, 0x60 + link);
255 pci_writeb(devfn, PCI_INTERRUPT_LINE, isa_irq);
256 printf("pci dev %02x:%x INT%c->IRQ%u\n",
257 devfn>>3, devfn&7, 'A'+pin-1, isa_irq);
258 }
259 }
261 /* Assign iomem and ioport resources in descending order of size. */
262 for ( i = 0; i < nr_bars; i++ )
263 {
264 devfn = bars[i].devfn;
265 bar_reg = bars[i].bar_reg;
266 bar_sz = bars[i].bar_sz;
268 bar_data = pci_readl(devfn, bar_reg);
270 if ( (bar_data & PCI_BASE_ADDRESS_SPACE) ==
271 PCI_BASE_ADDRESS_SPACE_MEMORY )
272 {
273 resource = &mem_resource;
274 bar_data &= ~PCI_BASE_ADDRESS_MEM_MASK;
275 }
276 else
277 {
278 resource = &io_resource;
279 bar_data &= ~PCI_BASE_ADDRESS_IO_MASK;
280 }
282 base = (resource->base + bar_sz - 1) & ~(bar_sz - 1);
283 bar_data |= base;
284 base += bar_sz;
286 if ( (base < resource->base) || (base > resource->max) )
287 {
288 printf("pci dev %02x:%x bar %02x size %08x: no space for "
289 "resource!\n", devfn>>3, devfn&7, bar_reg, bar_sz);
290 continue;
291 }
293 resource->base = base;
295 pci_writel(devfn, bar_reg, bar_data);
296 printf("pci dev %02x:%x bar %02x size %08x: %08x\n",
297 devfn>>3, devfn&7, bar_reg, bar_sz, bar_data);
299 /* Now enable the memory or I/O mapping. */
300 cmd = pci_readw(devfn, PCI_COMMAND);
301 if ( (bar_reg == PCI_ROM_ADDRESS) ||
302 ((bar_data & PCI_BASE_ADDRESS_SPACE) ==
303 PCI_BASE_ADDRESS_SPACE_MEMORY) )
304 cmd |= PCI_COMMAND_MEMORY;
305 else
306 cmd |= PCI_COMMAND_IO;
307 pci_writew(devfn, PCI_COMMAND, cmd);
308 }
309 }
311 static int must_load_extboot(void)
312 {
313 return (inb(0x404) == 1);
314 }
316 /*
317 * Scan the PCI bus for the first NIC supported by etherboot, and copy
318 * the corresponding rom data to *copy_rom_dest. Returns the length of the
319 * selected rom, or 0 if no NIC found.
320 */
321 static int scan_etherboot_nic(void *copy_rom_dest)
322 {
323 struct option_rom_header *rom;
324 struct option_rom_pnp_header *pnph;
325 struct option_rom_pci_header *pcih;
326 uint32_t devfn;
327 uint16_t class, vendor_id, device_id;
328 uint8_t csum;
329 int i;
331 for ( devfn = 0; devfn < 128; devfn++ )
332 {
333 class = pci_readw(devfn, PCI_CLASS_DEVICE);
334 vendor_id = pci_readw(devfn, PCI_VENDOR_ID);
335 device_id = pci_readw(devfn, PCI_DEVICE_ID);
337 if ( (vendor_id == 0xffff) && (device_id == 0xffff) )
338 continue;
340 /* We're only interested in NICs. */
341 if ( class != 0x0200 )
342 continue;
344 rom = (struct option_rom_header *)etherboot;
345 for ( ; ; )
346 {
347 /* Invalid signature means we're out of option ROMs. */
348 if ( strncmp((char *)rom->signature, "\x55\xaa", 2) ||
349 (rom->rom_size == 0) )
350 break;
352 /* Invalid checksum means we're out of option ROMs. */
353 csum = 0;
354 for ( i = 0; i < (rom->rom_size * 512); i++ )
355 csum += ((uint8_t *)rom)[i];
356 if ( csum != 0 )
357 break;
359 /* Check the PCI PnP header (if any) for a match. */
360 pcih = (struct option_rom_pci_header *)
361 ((char *)rom + rom->pci_header_offset);
362 if ( (rom->pci_header_offset != 0) &&
363 !strncmp((char *)pcih->signature, "PCIR", 4) &&
364 (pcih->vendor_id == vendor_id) &&
365 (pcih->device_id == device_id) )
366 goto found;
368 rom = (struct option_rom_header *)
369 ((char *)rom + rom->rom_size * 512);
370 }
371 }
373 return 0;
375 found:
376 /* Find the PnP expansion header (if any). */
377 pnph = ((rom->expansion_header_offset != 0)
378 ? ((struct option_rom_pnp_header *)
379 ((char *)rom + rom->expansion_header_offset))
380 : ((struct option_rom_pnp_header *)NULL));
381 while ( (pnph != NULL) && strncmp((char *)pnph->signature, "$PnP", 4) )
382 pnph = ((pnph->next_header_offset != 0)
383 ? ((struct option_rom_pnp_header *)
384 ((char *)rom + pnph->next_header_offset))
385 : ((struct option_rom_pnp_header *)NULL));
387 printf("Loading PXE ROM ...\n");
388 if ( (pnph != NULL) && (pnph->manufacturer_name_offset != 0) )
389 printf(" - Manufacturer: %s\n",
390 (char *)rom + pnph->manufacturer_name_offset);
391 if ( (pnph != NULL) && (pnph->product_name_offset != 0) )
392 printf(" - Product name: %s\n",
393 (char *)rom + pnph->product_name_offset);
394 memcpy(copy_rom_dest, rom, rom->rom_size * 512);
395 return rom->rom_size * 512;
396 }
398 /* Replace possibly erroneous memory-size CMOS fields with correct values. */
399 static void cmos_write_memory_size(void)
400 {
401 struct e820entry *map = HVM_E820;
402 int i, nr = *HVM_E820_NR;
403 uint32_t base_mem = 640, ext_mem = 0, alt_mem = 0;
405 for ( i = 0; i < nr; i++ )
406 if ( (map[i].addr >= 0x100000) && (map[i].type == E820_RAM) )
407 break;
409 if ( i != nr )
410 {
411 alt_mem = ext_mem = map[i].addr + map[i].size;
412 ext_mem = (ext_mem > 0x0100000) ? (ext_mem - 0x0100000) >> 10 : 0;
413 if ( ext_mem > 0xffff )
414 ext_mem = 0xffff;
415 alt_mem = (alt_mem > 0x1000000) ? (alt_mem - 0x1000000) >> 16 : 0;
416 }
418 /* All BIOSes: conventional memory (CMOS *always* reports 640kB). */
419 cmos_outb(0x15, (uint8_t)(base_mem >> 0));
420 cmos_outb(0x16, (uint8_t)(base_mem >> 8));
422 /* All BIOSes: extended memory (1kB chunks above 1MB). */
423 cmos_outb(0x17, (uint8_t)( ext_mem >> 0));
424 cmos_outb(0x18, (uint8_t)( ext_mem >> 8));
425 cmos_outb(0x30, (uint8_t)( ext_mem >> 0));
426 cmos_outb(0x31, (uint8_t)( ext_mem >> 8));
428 /* Some BIOSes: alternative extended memory (64kB chunks above 16MB). */
429 cmos_outb(0x34, (uint8_t)( alt_mem >> 0));
430 cmos_outb(0x35, (uint8_t)( alt_mem >> 8));
431 }
433 static uint16_t init_xen_platform_io_base(void)
434 {
435 struct bios_info *bios_info = (struct bios_info *)ACPI_PHYSICAL_ADDRESS;
436 uint32_t devfn, bar_data;
437 uint16_t vendor_id, device_id;
439 bios_info->xen_pfiob = 0;
441 for ( devfn = 0; devfn < 128; devfn++ )
442 {
443 vendor_id = pci_readw(devfn, PCI_VENDOR_ID);
444 device_id = pci_readw(devfn, PCI_DEVICE_ID);
445 if ( (vendor_id != 0x5853) || (device_id != 0x0001) )
446 continue;
447 bar_data = pci_readl(devfn, PCI_BASE_ADDRESS_0);
448 bios_info->xen_pfiob = bar_data & PCI_BASE_ADDRESS_IO_MASK;
449 }
451 return bios_info->xen_pfiob;
452 }
454 int main(void)
455 {
456 int vgabios_sz = 0, etherboot_sz = 0, rombios_sz, smbios_sz;
457 int extboot_sz = 0;
458 uint32_t vga_ram = 0;
459 uint16_t xen_pfiob;
461 printf("HVM Loader\n");
463 init_hypercalls();
465 printf("CPU speed is %u MHz\n", get_cpu_mhz());
467 smp_initialise();
469 perform_tests();
471 printf("Writing SMBIOS tables ...\n");
472 smbios_sz = hvm_write_smbios_tables();
474 printf("Loading ROMBIOS ...\n");
475 rombios_sz = sizeof(rombios);
476 if ( rombios_sz > 0x10000 )
477 rombios_sz = 0x10000;
478 memcpy((void *)ROMBIOS_PHYSICAL_ADDRESS, rombios, rombios_sz);
479 highbios_setup();
481 apic_setup();
482 pci_setup();
484 if ( (get_vcpu_nr() > 1) || get_apic_mode() )
485 create_mp_tables();
487 switch ( virtual_vga )
488 {
489 case VGA_cirrus:
490 printf("Loading Cirrus VGABIOS ...\n");
491 memcpy((void *)VGABIOS_PHYSICAL_ADDRESS,
492 vgabios_cirrusvga, sizeof(vgabios_cirrusvga));
493 vgabios_sz = sizeof(vgabios_cirrusvga);
494 break;
495 case VGA_std:
496 printf("Loading Standard VGABIOS ...\n");
497 memcpy((void *)VGABIOS_PHYSICAL_ADDRESS,
498 vgabios_stdvga, sizeof(vgabios_stdvga));
499 vgabios_sz = sizeof(vgabios_stdvga);
500 break;
501 default:
502 printf("No emulated VGA adaptor ...\n");
503 break;
504 }
506 if ( virtual_vga != VGA_none )
507 {
508 vga_ram = e820_malloc(8 << 20, 4096);
509 printf("VGA RAM at %08x\n", vga_ram);
510 }
512 etherboot_sz = scan_etherboot_nic((void*)ETHERBOOT_PHYSICAL_ADDRESS);
514 if ( must_load_extboot() )
515 {
516 printf("Loading EXTBOOT ...\n");
517 memcpy((void *)EXTBOOT_PHYSICAL_ADDRESS,
518 extboot, sizeof(extboot));
519 extboot_sz = sizeof(extboot);
520 }
522 if ( get_acpi_enabled() )
523 {
524 printf("Loading ACPI ...\n");
525 acpi_build_tables();
526 }
528 cmos_write_memory_size();
530 printf("BIOS map:\n");
531 if ( vgabios_sz )
532 printf(" %05x-%05x: VGA BIOS\n",
533 VGABIOS_PHYSICAL_ADDRESS,
534 VGABIOS_PHYSICAL_ADDRESS + vgabios_sz - 1);
535 if ( etherboot_sz )
536 printf(" %05x-%05x: Etherboot ROM\n",
537 ETHERBOOT_PHYSICAL_ADDRESS,
538 ETHERBOOT_PHYSICAL_ADDRESS + etherboot_sz - 1);
539 if ( extboot_sz )
540 printf(" %05x-%05x: Extboot ROM\n",
541 EXTBOOT_PHYSICAL_ADDRESS,
542 EXTBOOT_PHYSICAL_ADDRESS + extboot_sz - 1);
543 if ( smbios_sz )
544 printf(" %05x-%05x: SMBIOS tables\n",
545 SMBIOS_PHYSICAL_ADDRESS,
546 SMBIOS_PHYSICAL_ADDRESS + smbios_sz - 1);
547 if ( rombios_sz )
548 printf(" %05x-%05x: Main BIOS\n",
549 ROMBIOS_PHYSICAL_ADDRESS,
550 ROMBIOS_PHYSICAL_ADDRESS + rombios_sz - 1);
552 xen_pfiob = init_xen_platform_io_base();
553 if ( xen_pfiob && vga_ram )
554 outl(xen_pfiob + 4, vga_ram);
556 printf("Invoking ROMBIOS ...\n");
557 return 0;
558 }
560 /*
561 * Local variables:
562 * mode: C
563 * c-set-style: "BSD"
564 * c-basic-offset: 4
565 * tab-width: 4
566 * indent-tabs-mode: nil
567 * End:
568 */