ia64/xen-unstable

view xen/common/kernel.c @ 938:eaaf88bbc222

bitkeeper revision 1.597 (3fb26743975c08pGlW-UntXWf-IBnQ)

Merge labyrinth.cl.cam.ac.uk:/auto/groups/xeno/BK/xeno.bk
into labyrinth.cl.cam.ac.uk:/auto/anfs/scratch/labyrinth/br260/xeno.bk
author br260@labyrinth.cl.cam.ac.uk
date Wed Nov 12 17:00:51 2003 +0000 (2003-11-12)
parents 18d30acdfb3e 106bc1c68738
children 151801bd4e5e
line source
1 /******************************************************************************
2 * kernel.c
3 *
4 * This file should contain architecture-independent bootstrap and low-level
5 * help routines. It's a bit x86/PC specific right now!
6 *
7 * Copyright (c) 2002-2003 K A Fraser
8 */
10 #include <stdarg.h>
11 #include <xeno/lib.h>
12 #include <xeno/errno.h>
13 #include <xeno/spinlock.h>
14 #include <xeno/multiboot.h>
15 #include <xeno/sched.h>
16 #include <xeno/mm.h>
17 #include <xeno/delay.h>
18 #include <xeno/skbuff.h>
19 #include <xeno/interrupt.h>
20 #include <xeno/compile.h>
21 #include <xeno/version.h>
22 #include <xeno/netdevice.h>
23 #include <asm/io.h>
24 #include <asm/msr.h>
25 #include <asm/uaccess.h>
26 #include <hypervisor-ifs/dom0_ops.h>
27 #include <asm/byteorder.h>
28 #include <linux/if_ether.h>
29 #include <asm/domain_page.h>
30 #include <xeno/console.h>
31 #include <xeno/net_headers.h>
33 kmem_cache_t *task_struct_cachep;
35 static int xpos, ypos;
36 static volatile unsigned char *video;
38 spinlock_t console_lock = SPIN_LOCK_UNLOCKED;
40 struct e820entry {
41 unsigned long addr_lo, addr_hi; /* start of memory segment */
42 unsigned long size_lo, size_hi; /* size of memory segment */
43 unsigned long type; /* type of memory segment */
44 };
46 void init_vga(void);
47 void init_serial(void);
48 void start_of_day(void);
50 /* opt_console: If true, Xen sends logging to the VGA console. */
51 int opt_console = 1;
52 /* opt_ser_baud: Baud rate at which logging is sent to COM1. */
53 /* NB. Default (0) means that serial I/O is disabled. */
54 unsigned int opt_ser_baud = 0;
55 /* opt_dom0_mem: Kilobytes of memory allocated to domain 0. */
56 unsigned int opt_dom0_mem = 16000;
57 /* opt_ifname: Name of physical network interface to use. */
58 unsigned char opt_ifname[10] = "eth0";
59 /* opt_noht: If true, Hyperthreading is ignored. */
60 int opt_noht=0;
61 /* opt_noacpi: If true, ACPI tables are not parsed. */
62 int opt_noacpi=0;
63 /* opt_nosmp: If true, secondary processors are ignored. */
64 int opt_nosmp=0;
65 /* opt_noreboot: If true, machine will need manual reset on error. */
66 int opt_noreboot=0;
67 /* opt_ignorebiostables: If true, ACPI and MP tables are ignored. */
68 /* NB. This flag implies 'nosmp' and 'noacpi'. */
69 int opt_ignorebiostables=0;
70 /* opt_watchdog: If true, run a watchdog NMI on each processor. */
71 int opt_watchdog=0;
73 static struct {
74 unsigned char *name;
75 enum { OPT_IP, OPT_STR, OPT_UINT, OPT_BOOL } type;
76 void *var;
77 } opts[] = {
78 { "console", OPT_UINT, &opt_console },
79 { "ser_baud", OPT_UINT, &opt_ser_baud },
80 { "dom0_mem", OPT_UINT, &opt_dom0_mem },
81 { "ifname", OPT_STR, &opt_ifname },
82 { "noht", OPT_BOOL, &opt_noht },
83 { "noacpi", OPT_BOOL, &opt_noacpi },
84 { "nosmp", OPT_BOOL, &opt_nosmp },
85 { "noreboot", OPT_BOOL, &opt_noreboot },
86 { "ignorebiostables", OPT_BOOL, &opt_ignorebiostables },
87 { "watchdog", OPT_BOOL, &opt_watchdog },
88 { NULL, 0, NULL }
89 };
92 void cmain (unsigned long magic, multiboot_info_t *mbi)
93 {
94 struct task_struct *new_dom;
95 dom0_createdomain_t dom0_params;
96 unsigned long max_page;
97 unsigned char *cmdline;
98 module_t *mod;
99 int i;
101 /*
102 * Note that serial output cannot be done properly until after
103 * command-line arguments have been parsed, and the required baud rate is
104 * known. Any messages before that will be output using the settings of
105 * the bootloader, for example.
106 */
108 if ( magic != MULTIBOOT_BOOTLOADER_MAGIC )
109 {
110 init_vga();
111 cls();
112 printk("Invalid magic number: 0x%x\n", (unsigned)magic);
113 for ( ; ; ) ;
114 }
116 /* Parse the command line. */
117 cmdline = (unsigned char *)(mbi->cmdline ? __va(mbi->cmdline) : NULL);
118 if ( cmdline != NULL )
119 {
120 unsigned char *opt_end, *opt;
121 while ( *cmdline == ' ' ) cmdline++;
122 cmdline = strchr(cmdline, ' ');
123 while ( cmdline != NULL )
124 {
125 while ( *cmdline == ' ' ) cmdline++;
126 if ( *cmdline == '\0' ) break;
127 opt_end = strchr(cmdline, ' ');
128 if ( opt_end != NULL ) *opt_end++ = '\0';
129 opt = strchr(cmdline, '=');
130 if ( opt != NULL ) *opt++ = '\0';
131 for ( i = 0; opts[i].name != NULL; i++ )
132 {
133 if ( strcmp(opts[i].name, cmdline ) != 0 ) continue;
134 switch ( opts[i].type )
135 {
136 case OPT_IP:
137 if ( opt != NULL )
138 *(unsigned long *)opts[i].var = str_to_quad(opt);
139 break;
140 case OPT_STR:
141 if ( opt != NULL )
142 strcpy(opts[i].var, opt);
143 break;
144 case OPT_UINT:
145 if ( opt != NULL )
146 *(unsigned int *)opts[i].var =
147 simple_strtol(opt, (char **)&opt, 0);
148 break;
149 case OPT_BOOL:
150 *(int *)opts[i].var = 1;
151 break;
152 }
153 }
154 cmdline = opt_end;
155 }
156 }
158 init_serial();
159 init_console_ring();
160 init_vga();
161 cls();
163 printk(XEN_BANNER);
164 printk(" http://www.cl.cam.ac.uk/netos/xen\n");
165 printk(" University of Cambridge Computer Laboratory\n\n");
166 printk(" Xen version %d.%d%s (%s@%s) (%s) %s\n\n",
167 XEN_VERSION, XEN_SUBVERSION, XEN_EXTRAVERSION,
168 XEN_COMPILE_BY, XEN_COMPILE_DOMAIN,
169 XEN_COMPILER, XEN_COMPILE_DATE);
171 /* We require memory and module information. */
172 if ( (mbi->flags & 9) != 9 )
173 {
174 printk("FATAL ERROR: Bad flags passed by bootloader: 0x%x\n",
175 (unsigned)mbi->flags);
176 for ( ; ; ) ;
177 }
179 if ( mbi->mods_count == 0 )
180 {
181 printk("Require at least one Multiboot module!\n");
182 for ( ; ; ) ;
183 }
185 set_current(&idle0_task);
187 max_page = (mbi->mem_upper+1024) >> (PAGE_SHIFT - 10);
188 init_frametable(max_page);
189 printk("Initialised all memory on a %luMB machine\n",
190 max_page >> (20-PAGE_SHIFT));
192 init_page_allocator(__pa(&_end), MAX_MONITOR_ADDRESS);
194 /* These things will get done by do_createdomain() for all other tasks. */
195 current->shared_info = (void *)get_free_page(GFP_KERNEL);
196 memset(current->shared_info, 0, sizeof(shared_info_t));
197 set_fs(USER_DS);
199 /* Initialise the slab allocator. */
200 kmem_cache_init();
201 kmem_cache_sizes_init(max_page);
203 task_struct_cachep = kmem_cache_create(
204 "task_struct_cache", sizeof(struct task_struct),
205 0, SLAB_HWCACHE_ALIGN, NULL, NULL);
206 if ( task_struct_cachep == NULL )
207 panic("No slab cache for task structs.");
209 start_of_day();
211 /* Create initial domain 0. */
212 dom0_params.memory_kb = opt_dom0_mem;
213 new_dom = do_createdomain(0, 0);
214 if ( new_dom == NULL ) panic("Error creating domain 0\n");
216 /*
217 * We're going to setup domain0 using the module(s) that we stashed safely
218 * above our MAX_DIRECTMAP_ADDRESS in boot/Boot.S The second module, if
219 * present, is an initrd ramdisk
220 */
221 mod = (module_t *)__va(mbi->mods_addr);
222 if ( setup_guestos(new_dom,
223 &dom0_params, 1,
224 (char *)MAX_DIRECTMAP_ADDRESS,
225 mod[mbi->mods_count-1].mod_end - mod[0].mod_start,
226 __va(mod[0].string),
227 (mbi->mods_count == 2) ?
228 (mod[1].mod_end - mod[1].mod_start):0)
229 != 0 ) panic("Could not set up DOM0 guest OS\n");
231 wake_up(new_dom);
233 startup_cpu_idle_loop();
234 }
237 #define SERIAL_BASE 0x3f8
238 #define RX_BUF 0
239 #define TX_HOLD 0
240 #define INT_ENABLE 1
241 #define INT_IDENT 2
242 #define DATA_FORMAT 3
243 #define LINE_CTL 4
244 #define LINE_STATUS 5
245 #define LINE_IN 6
246 #define DIVISOR_LO 0
247 #define DIVISOR_HI 1
249 void init_serial(void)
250 {
251 if ( !SERIAL_ENABLED )
252 return;
254 /* 'opt_ser_baud' baud, no parity, 1 stop bit, 8 data bits. */
255 outb(0x83, SERIAL_BASE+DATA_FORMAT);
256 outb(115200/opt_ser_baud, SERIAL_BASE+DIVISOR_LO);
257 outb(0, SERIAL_BASE+DIVISOR_HI);
258 outb(0x03, SERIAL_BASE+DATA_FORMAT);
260 /* DTR and RTS should both be high, to keep other end happy. */
261 outb(0x02, SERIAL_BASE+LINE_CTL);
263 /* No interrupts. */
264 outb(0x00, SERIAL_BASE+INT_ENABLE);
265 }
268 #ifdef CONFIG_OUTPUT_SERIAL
269 void putchar_serial(unsigned char c)
270 {
271 if ( !SERIAL_ENABLED )
272 return;
273 if ( c == '\n' ) putchar_serial('\r');
274 while ( !(inb(SERIAL_BASE+LINE_STATUS)&(1<<5)) ) barrier();
275 outb(c, SERIAL_BASE+TX_HOLD);
276 }
277 #else
278 void putchar_serial(unsigned char c) {}
279 #endif
282 #ifdef CONFIG_OUTPUT_CONSOLE
284 /* VGA text (mode 3) definitions. */
285 #define COLUMNS 80
286 #define LINES 25
287 #define ATTRIBUTE 7
288 #define VIDEO __va(0xB8000)
290 int detect_video(void *video_base)
291 {
292 volatile u16 *p = (volatile u16 *)video_base;
293 u16 saved1 = p[0], saved2 = p[1];
294 int video_found = 1;
296 p[0] = 0xAA55;
297 p[1] = 0x55AA;
298 if ( (p[0] != 0xAA55) || (p[1] != 0x55AA) )
299 video_found = 0;
301 p[0] = 0x55AA;
302 p[1] = 0xAA55;
303 if ( (p[0] != 0x55AA) || (p[1] != 0xAA55) )
304 video_found = 0;
306 p[0] = saved1;
307 p[1] = saved2;
309 return video_found;
310 }
312 int detect_vga(void)
313 {
314 /*
315 * Look at a number of well-known locations. Even if video is not at
316 * 0xB8000 right now, it will appear there when we set up text mode 3.
317 *
318 * We assume if there is any sign of a video adaptor then it is at least
319 * VGA-compatible (surely noone runs CGA, EGA, .... these days?).
320 *
321 * These checks are basically to detect headless server boxes.
322 */
323 return (detect_video(__va(0xA0000)) ||
324 detect_video(__va(0xB0000)) ||
325 detect_video(__va(0xB8000)));
326 }
328 /* This is actually code from vgaHWRestore in an old version of XFree86 :-) */
329 void init_vga(void)
330 {
331 /* The following VGA state was saved from a chip in text mode 3. */
332 static unsigned char regs[] = {
333 /* Sequencer registers */
334 0x03, 0x00, 0x03, 0x00, 0x02,
335 /* CRTC registers */
336 0x5f, 0x4f, 0x50, 0x82, 0x55, 0x81, 0xbf, 0x1f, 0x00, 0x4f, 0x20,
337 0x0e, 0x00, 0x00, 0x01, 0xe0, 0x9c, 0x8e, 0x8f, 0x28, 0x1f, 0x96,
338 0xb9, 0xa3, 0xff,
339 /* Graphic registers */
340 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x0e, 0x00, 0xff,
341 /* Attribute registers */
342 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x14, 0x07, 0x38, 0x39, 0x3a,
343 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x0c, 0x00, 0x0f, 0x08, 0x00
344 };
346 int i, j = 0;
347 volatile unsigned char tmp;
349 if ( !opt_console )
350 return;
352 if ( !detect_vga() )
353 {
354 printk("No VGA adaptor detected!\n");
355 opt_console = 0;
356 return;
357 }
359 tmp = inb(0x3da);
360 outb(0x00, 0x3c0);
362 for ( i = 0; i < 5; i++ )
363 outw((regs[j++] << 8) | i, 0x3c4);
365 /* Ensure CRTC registers 0-7 are unlocked by clearing bit 7 of CRTC[17]. */
366 outw(((regs[5+17] & 0x7F) << 8) | 17, 0x3d4);
368 for ( i = 0; i < 25; i++ )
369 outw((regs[j++] << 8) | i, 0x3d4);
371 for ( i = 0; i < 9; i++ )
372 outw((regs[j++] << 8) | i, 0x3ce);
374 for ( i = 0; i < 21; i++ )
375 {
376 tmp = inb(0x3da);
377 outb(i, 0x3c0);
378 outb(regs[j++], 0x3c0);
379 }
381 tmp = inb(0x3da);
382 outb(0x20, 0x3c0);
383 }
386 /* Clear the screen and initialize VIDEO, XPOS and YPOS. */
387 void cls(void)
388 {
389 int i;
391 if ( !opt_console )
392 return;
394 video = (unsigned char *) VIDEO;
396 for (i = 0; i < COLUMNS * LINES * 2; i++)
397 *(video + i) = 0;
399 xpos = 0;
400 ypos = 0;
402 outw(10+(1<<(5+8)), 0x3d4); /* cursor off */
403 }
406 static void put_newline(void)
407 {
408 xpos = 0;
409 ypos++;
411 if (ypos >= LINES)
412 {
413 static char zeroarr[2*COLUMNS] = { 0 };
414 ypos = LINES-1;
415 memcpy((char*)video,
416 (char*)video + 2*COLUMNS, (LINES-1)*2*COLUMNS);
417 memcpy((char*)video + (LINES-1)*2*COLUMNS,
418 zeroarr, 2*COLUMNS);
419 }
420 }
423 void putchar_console(int c)
424 {
425 if ( !opt_console )
426 return;
428 if ( c == '\n' )
429 {
430 put_newline();
431 }
432 else
433 {
434 *(video + (xpos + ypos * COLUMNS) * 2) = c & 0xFF;
435 *(video + (xpos + ypos * COLUMNS) * 2 + 1) = ATTRIBUTE;
437 xpos++;
438 if (xpos >= COLUMNS)
439 put_newline();
440 }
441 }
443 #else
445 void init_vga(void) {}
446 void cls(void) {}
447 void putchar_console(int c) {}
449 #endif
451 #ifdef CONFIG_OUTPUT_CONSOLE_RING
453 void putchar_console_ring(int c)
454 {
455 if (console_ring.len < CONSOLE_RING_SIZE)
456 console_ring.buf[console_ring.len++] = (char)c;
457 }
459 #else
461 void putchar_console_ring(int c) {}
463 #endif
466 static void putchar(int c)
467 {
468 if ( (c != '\n') && ((c < 32) || (c > 126)) ) return;
469 putchar_serial(c);
470 putchar_console(c);
471 putchar_console_ring(c);
472 }
475 static inline void __putstr(const char *str)
476 {
477 while ( *str ) putchar(*str++);
478 }
481 void printf (const char *fmt, ...)
482 {
483 va_list args;
484 char buf[128];
485 const char *p = fmt;
486 unsigned long flags;
488 /*
489 * If the format string contains '%' descriptors then we have to parse it
490 * before printing it. We parse it into a fixed-length buffer. Long
491 * strings should therefore _not_ contain '%' characters!
492 */
493 if ( strchr(fmt, '%') != NULL )
494 {
495 va_start(args, fmt);
496 (void)vsnprintf(buf, sizeof(buf), fmt, args);
497 va_end(args);
498 p = buf;
499 }
501 spin_lock_irqsave(&console_lock, flags);
502 while ( *p ) putchar(*p++);
503 spin_unlock_irqrestore(&console_lock, flags);
504 }
507 void panic(const char *fmt, ...)
508 {
509 va_list args;
510 char buf[128];
511 unsigned long flags;
512 extern void machine_restart(char *);
514 va_start(args, fmt);
515 (void)vsnprintf(buf, sizeof(buf), fmt, args);
516 va_end(args);
518 /* Spit out multiline message in one go. */
519 spin_lock_irqsave(&console_lock, flags);
520 __putstr("\n****************************************\n");
521 __putstr(buf);
522 __putstr("Aieee! CPU");
523 sprintf(buf, "%d", smp_processor_id());
524 __putstr(buf);
525 __putstr(" is toast...\n");
526 __putstr("****************************************\n\n");
527 __putstr("Reboot in five seconds...\n");
528 spin_unlock_irqrestore(&console_lock, flags);
530 mdelay(5000);
531 machine_restart(0);
532 }
535 /* No-op syscall. */
536 asmlinkage long sys_ni_syscall(void)
537 {
538 return -ENOSYS;
539 }
542 unsigned short compute_cksum(unsigned short *buf, int count)
543 {
544 unsigned long sum = 0;
545 while ( count-- )
546 sum += *buf++;
547 while ( sum >> 16 )
548 sum = (sum & 0xffff) + (sum >> 16);
549 return (unsigned short) ~sum;
550 }
553 /*
554 * Function written by ek247. Exports console output from all domains upwards
555 * to domain0, by stuffing it into a fake network packet.
556 */
557 int console_export(char *str, int len)
558 {
559 struct sk_buff *skb;
560 struct iphdr *iph = NULL;
561 struct udphdr *udph = NULL;
562 struct ethhdr *ethh = NULL;
563 int hdr_size = sizeof(struct iphdr) + sizeof(struct udphdr);
564 u8 *skb_data;
566 skb = dev_alloc_skb(sizeof(struct ethhdr) +
567 hdr_size + len + 20);
568 if ( skb == NULL ) return 0;
570 skb->dev = the_dev;
571 skb_data = (u8 *)map_domain_mem((skb->pf - frame_table) << PAGE_SHIFT);
572 skb_reserve(skb, 2);
574 /* Get a pointer to each header. */
575 ethh = (struct ethhdr *)
576 (skb_data + (skb->data - skb->head));
577 iph = (struct iphdr *)(ethh + 1);
578 udph = (struct udphdr *)(iph + 1);
580 skb_reserve(skb, sizeof(struct ethhdr));
581 skb_put(skb, hdr_size + len);
583 /* Build IP header. */
584 iph->version = 4;
585 iph->ihl = 5;
586 iph->tos = 0;
587 iph->tot_len = htons(hdr_size + len);
588 iph->id = 0xdead;
589 iph->frag_off= 0;
590 iph->ttl = 255;
591 iph->protocol= 17;
592 iph->daddr = htonl(0xa9fe0100); /* 169.254.1.0 */
593 iph->saddr = htonl(0xa9fefeff); /* 169.254.254.255 */
594 iph->check = 0;
595 iph->check = compute_cksum((__u16 *)iph, sizeof(struct iphdr)/2);
597 /* Build UDP header. */
598 udph->source = htons(current->domain);
599 udph->dest = htons(666);
600 udph->len = htons(sizeof(struct udphdr) + len);
601 udph->check = 0;
603 /* Build the UDP payload. */
604 memcpy((char *)(udph + 1), str, len);
606 /* Fix Ethernet header. */
607 memset(ethh->h_source, 0, ETH_ALEN);
608 memset(ethh->h_dest, 0, ETH_ALEN);
609 ethh->h_proto = htons(ETH_P_IP);
610 skb->mac.ethernet= (struct ethhdr *)ethh;
612 unmap_domain_mem(skb_data);
614 skb->dst_vif = find_vif_by_id(0);
615 (void)netif_rx(skb);
617 return 1;
618 }
621 long do_console_write(char *str, unsigned int count)
622 {
623 #define SIZEOF_BUF 256
624 unsigned char safe_str[SIZEOF_BUF+1];
625 unsigned char exported_str[SIZEOF_BUF+2];
626 unsigned char dom_id[5];
627 unsigned char *p;
628 unsigned long flags;
629 int j;
631 if ( count == 0 )
632 return 0;
634 if ( count > SIZEOF_BUF )
635 count = SIZEOF_BUF;
637 if ( copy_from_user(safe_str, str, count) )
638 return -EFAULT;
639 safe_str[count] = '\0';
641 p = safe_str;
642 while ( *p != '\0' )
643 {
644 j = 0;
646 spin_lock_irqsave(&console_lock, flags);
648 __putstr("DOM");
649 sprintf(dom_id, "%d", current->domain);
650 __putstr(dom_id);
651 __putstr(": ");
653 while ( (*p != '\0') && (*p != '\n') )
654 {
655 exported_str[j++] = *p;
656 putchar(*p);
657 p++;
658 }
660 if ( *p == '\n' )
661 p++;
663 putchar('\n');
665 spin_unlock_irqrestore(&console_lock, flags);
667 if ( current->domain != 0 )
668 {
669 exported_str[j++] = '\n';
670 exported_str[j++] = '\0';
671 console_export(exported_str, j);
672 }
673 }
675 return 0;
676 }
679 void __out_of_line_bug(int line)
680 {
681 printk("kernel BUG in header file at line %d\n", line);
682 BUG();
683 for ( ; ; ) continue;
684 }
687 /*
688 * GRAVEYARD
689 */
690 #if 0
691 if ( (mbi->flags & (1<<6)) )
692 {
693 memory_map_t *mmap = (memory_map_t *)mbi->mmap_addr;
694 struct e820entry *e820 = E820_MAP;
696 while ( (unsigned long)mmap < (mbi->mmap_addr + mbi->mmap_length) )
697 {
698 e820->addr_lo = mmap->base_addr_low;
699 e820->addr_hi = mmap->base_addr_high;
700 e820->size_lo = mmap->length_low;
701 e820->size_hi = mmap->length_high;
702 e820->type = mmap->type;
703 e820++;
704 mmap = (memory_map_t *)
705 ((unsigned long)mmap + mmap->size + sizeof (mmap->size));
706 }
707 }
708 #endif