ia64/xen-unstable

changeset 1269:369b77bc3884

bitkeeper revision 1.834 (406d7297L8YEnoDy3p-Js-U7j2roug)

manual merge as two conflicting dom0 ops
author iap10@tetris.cl.cam.ac.uk
date Fri Apr 02 14:03:03 2004 +0000 (2004-04-02)
parents b9b7a37dc588 5864a35a6c29
children b046dc7976c7
files .rootkeys docs/pdb.txt extras/mini-os/Makefile extras/mini-os/head.S tools/xc/lib/xc.h tools/xc/lib/xc_atropos.c tools/xc/lib/xc_bvtsched.c tools/xc/lib/xc_linux_build.c tools/xc/lib/xc_misc.c tools/xc/lib/xc_netbsd_build.c tools/xc/lib/xc_private.h tools/xc/lib/xc_rrobin.c tools/xc/py/Xc.c tools/xend/lib/main.py tools/xend/lib/manager.py tools/xend/lib/utils.c xen/Rules.mk xen/arch/i386/Rules.mk xen/arch/i386/entry.S xen/arch/i386/io_apic.c xen/arch/i386/irq.c xen/arch/i386/mm.c xen/arch/i386/pdb-stub.c xen/arch/i386/setup.c xen/arch/i386/traps.c xen/common/debug-linux.c xen/common/debug.c xen/common/dom0_ops.c xen/common/domain.c xen/common/event_channel.c xen/common/kernel.c xen/common/keyhandler.c xen/common/memory.c xen/common/page_alloc.c xen/common/physdev.c xen/common/sched_atropos.c xen/common/sched_bvt.c xen/common/sched_rrobin.c xen/common/schedule.c xen/drivers/Makefile xen/drivers/char/console.c xen/drivers/pci/pci.c xen/include/asm-i386/elf.h xen/include/asm-i386/irq.h xen/include/asm-i386/pdb.h xen/include/asm-x86_64/elf.h xen/include/asm-x86_64/irq.h xen/include/hypervisor-ifs/dom0_ops.h xen/include/hypervisor-ifs/event_channel.h xen/include/hypervisor-ifs/hypervisor-if.h xen/include/hypervisor-ifs/physdev.h xen/include/hypervisor-ifs/sched_ctl.h xen/include/xen/blkdev.h xen/include/xen/elf.h xen/include/xen/event.h xen/include/xen/interrupt.h xen/include/xen/irq.h xen/include/xen/mm.h xen/include/xen/sched-if.h xen/include/xen/sched.h xenolinux-2.4.25-sparse/arch/xen/Makefile xenolinux-2.4.25-sparse/arch/xen/boot/Makefile xenolinux-2.4.25-sparse/arch/xen/config.in xenolinux-2.4.25-sparse/arch/xen/defconfig xenolinux-2.4.25-sparse/arch/xen/defconfig-physdev xenolinux-2.4.25-sparse/arch/xen/drivers/console/console.c xenolinux-2.4.25-sparse/arch/xen/drivers/evtchn/evtchn.c xenolinux-2.4.25-sparse/arch/xen/kernel/Makefile xenolinux-2.4.25-sparse/arch/xen/kernel/entry.S xenolinux-2.4.25-sparse/arch/xen/kernel/evtchn.c xenolinux-2.4.25-sparse/arch/xen/kernel/head.S xenolinux-2.4.25-sparse/arch/xen/kernel/irq.c xenolinux-2.4.25-sparse/arch/xen/kernel/pci-i386.c xenolinux-2.4.25-sparse/arch/xen/kernel/pci-i386.h xenolinux-2.4.25-sparse/arch/xen/kernel/pci-irq.c xenolinux-2.4.25-sparse/arch/xen/kernel/pci-pc.c xenolinux-2.4.25-sparse/arch/xen/kernel/setup.c xenolinux-2.4.25-sparse/arch/xen/kernel/traps.c xenolinux-2.4.25-sparse/arch/xen/mm/init.c xenolinux-2.4.25-sparse/include/asm-xen/evtchn.h xenolinux-2.4.25-sparse/include/asm-xen/fixmap.h xenolinux-2.4.25-sparse/include/asm-xen/highmem.h xenolinux-2.4.25-sparse/include/asm-xen/pgtable.h xenolinux-2.4.25-sparse/include/asm-xen/system.h xenolinux-2.4.25-sparse/include/asm-xen/xor.h xenolinux-2.4.25-sparse/mkbuildtree xenolinux-2.4.25-sparse/mm/highmem.c
line diff
     1.1 --- a/.rootkeys	Fri Apr 02 13:58:27 2004 +0000
     1.2 +++ b/.rootkeys	Fri Apr 02 14:03:03 2004 +0000
     1.3 @@ -175,6 +175,7 @@ 3e54c38dkHAev597bPr71-hGzTdocg xen/commo
     1.4  4051bcecFeq4DE70p4zGO5setf47CA xen/common/physdev.c
     1.5  4006e659i9j-doVxY7DKOGU4XVin1Q xen/common/rbtree.c
     1.6  3ddb79bdHqdQpATqC0rmUZNbsb6L6A xen/common/resource.c
     1.7 +4064773cJ31vZt-zhbSoxqft1Jaw0w xen/common/sched_atropos.c
     1.8  40589968dD2D1aejwSOvrROg7fOvGQ xen/common/sched_bvt.c
     1.9  40589968be_t_n0-w6ggceW7h-sx0w xen/common/sched_rrobin.c
    1.10  3e397e6619PgAfBbw2XFbXkewvUWgw xen/common/schedule.c
    1.11 @@ -410,7 +411,6 @@ 3ddb79c3r9-31dIsewPV3P3i8HALsQ xen/inclu
    1.12  3ddb79c34BFiXjBJ_cCKB0aCsV1IDw xen/include/asm-i386/desc.h
    1.13  3e564149UkU91RX7onzpCAmbj_IFjw xen/include/asm-i386/dma.h
    1.14  3e20b82fl1jmQiKdLy7fxMcutfpjWA xen/include/asm-i386/domain_page.h
    1.15 -3ddb79c2O729EttZTYu1c8LcsUO_GQ xen/include/asm-i386/elf.h
    1.16  3ddb79c3NU8Zy40OTrq3D-i30Y3t4A xen/include/asm-i386/fixmap.h
    1.17  3e2d29944GI24gf7vOP_7x8EyuqxeA xen/include/asm-i386/flushtlb.h
    1.18  3ddb79c39o75zPP0T1aQQ4mNrCAN2w xen/include/asm-i386/hardirq.h
    1.19 @@ -459,7 +459,6 @@ 404f1b9eRm9rtcM29P5O2nrPFOGSow xen/inclu
    1.20  404f1b9fl6AQ_a-T1TDK3fuwTPXmHw xen/include/asm-x86_64/desc.h
    1.21  404f1ba05mjpUREtosjzz3PPL5cTJA xen/include/asm-x86_64/dma.h
    1.22  404f1ba13mnjeZT2ytPm0DB63703nA xen/include/asm-x86_64/domain_page.h
    1.23 -404f1ba2IXQ7E0x9NlqpR5hgYtC9RQ xen/include/asm-x86_64/elf.h
    1.24  404f1ba31i0gS-cdqvd0RZX1HVnxsA xen/include/asm-x86_64/fixmap.h
    1.25  404f1ba4KXQ_V7HOkenF04KRU7Tl7w xen/include/asm-x86_64/flushtlb.h
    1.26  404f1ba5Sqzc22eXORShvCF9-rpMbA xen/include/asm-x86_64/hardirq.h
    1.27 @@ -533,6 +532,7 @@ 3f0c428eIwGr7n9fj4FkBdX2YvA_Rw xen/inclu
    1.28  3ddb79c1V44RD26YqCUm-kqIupM37A xen/include/xen/ctype.h
    1.29  3ddb79c05DdHQ0UxX_jKsXdR4QlMCA xen/include/xen/delay.h
    1.30  3ddb79c1uaWQZj551j1O0B5z8AnHOg xen/include/xen/elevator.h
    1.31 +3ddb79c2O729EttZTYu1c8LcsUO_GQ xen/include/xen/elf.h
    1.32  3ddb79c0HIghfBF8zFUdmXhOU8i6hA xen/include/xen/errno.h
    1.33  3ddb79c0rMjudDKkJku_mkm0J-BZgw xen/include/xen/etherdevice.h
    1.34  3ddb79c0T3X07lFnM9OSE-W5bqIDSQ xen/include/xen/ethtool.h
    1.35 @@ -611,6 +611,7 @@ 3e5a4e65IEPjnWPZ5w3TxS5scV8Ewg xenolinux
    1.36  3e5a4e65n-KhsEAs-A4ULiStBp-r6w xenolinux-2.4.25-sparse/arch/xen/boot/Makefile
    1.37  3e5a4e65OV_j_DBtjzt5vej771AJsA xenolinux-2.4.25-sparse/arch/xen/config.in
    1.38  3e5a4e65TNEycLeXqPSXQJQm_xGecA xenolinux-2.4.25-sparse/arch/xen/defconfig
    1.39 +40648526SxcA4lGIHB_k7ID8VlRSzw xenolinux-2.4.25-sparse/arch/xen/defconfig-physdev
    1.40  3e6377f5xwPfYZkPHPrDbEq1PRN7uQ xenolinux-2.4.25-sparse/arch/xen/drivers/balloon/Makefile
    1.41  3e6377f8Me8IqtvEhb70XFgOvqQH7A xenolinux-2.4.25-sparse/arch/xen/drivers/balloon/balloon.c
    1.42  3e5a4e65iHEuC5sjFhj42XALYbLVRw xenolinux-2.4.25-sparse/arch/xen/drivers/block/Makefile
    1.43 @@ -637,9 +638,6 @@ 3e5a4e65MEvZhlr070sK5JsfAQlv7Q xenolinux
    1.44  3e5a4e653U6cELGv528IxOLHvCq8iA xenolinux-2.4.25-sparse/arch/xen/kernel/irq.c
    1.45  3e5a4e65muT6SU3ck47IP87Q7Ti5hA xenolinux-2.4.25-sparse/arch/xen/kernel/ldt.c
    1.46  4051db84bZeRX7a_Kh6VyyDuT5FOIg xenolinux-2.4.25-sparse/arch/xen/kernel/pci-dma.c
    1.47 -4051db89iiHs38tWGkoW_RukNyaBHw xenolinux-2.4.25-sparse/arch/xen/kernel/pci-i386.c
    1.48 -4051db8dJYX86ZCLA-WfTW2dAyrehw xenolinux-2.4.25-sparse/arch/xen/kernel/pci-i386.h
    1.49 -4051db91BenvDZEMZxQCGkQyJYoG5w xenolinux-2.4.25-sparse/arch/xen/kernel/pci-irq.c
    1.50  4051db95N9N99FjsRwi49YKUNHWI8A xenolinux-2.4.25-sparse/arch/xen/kernel/pci-pc.c
    1.51  3e5a4e65IGt3WwQDNiL4h-gYWgNTWQ xenolinux-2.4.25-sparse/arch/xen/kernel/process.c
    1.52  3e5a4e66tR-qJMLj3MppcKqmvuI2XQ xenolinux-2.4.25-sparse/arch/xen/kernel/setup.c
    1.53 @@ -664,7 +662,7 @@ 4048c0ddxnIa2GpBAVR-mY6mNSdeJg xenolinux
    1.54  3e5a4e66HdSkvIV6SJ1evG_xmTmXHA xenolinux-2.4.25-sparse/include/asm-xen/desc.h
    1.55  4048c0e0_P2wUTiT6UqgPhn0s7yFcA xenolinux-2.4.25-sparse/include/asm-xen/evtchn.h
    1.56  3e5a4e66SYp_UpAVcF8Lc1wa3Qtgzw xenolinux-2.4.25-sparse/include/asm-xen/fixmap.h
    1.57 -3e5a4e67w_DWgjIJ17Tlossu1LGujQ xenolinux-2.4.25-sparse/include/asm-xen/highmem.h
    1.58 +406aeeaaQvl4RNtmd9hDEugBURbFpQ xenolinux-2.4.25-sparse/include/asm-xen/highmem.h
    1.59  3e5a4e67YtcyDLQsShhCfQwPSELfvA xenolinux-2.4.25-sparse/include/asm-xen/hw_irq.h
    1.60  3e5a4e677VBavzM1UZIEcH1B-RlXMA xenolinux-2.4.25-sparse/include/asm-xen/hypervisor.h
    1.61  4060044fVx7-tokvNLKBf_6qBB4lqQ xenolinux-2.4.25-sparse/include/asm-xen/io.h
    1.62 @@ -687,6 +685,7 @@ 4062f7e2PzFOUGT0PaE7A0VprTU3JQ xenolinux
    1.63  3e5a4e68mTr0zcp9SXDbnd-XLrrfxw xenolinux-2.4.25-sparse/include/asm-xen/system.h
    1.64  3f1056a9L_kqHcFheV00KbKBzv9j5w xenolinux-2.4.25-sparse/include/asm-xen/vga.h
    1.65  3f689063nhrIRsMMZjZxMFk7iEINqQ xenolinux-2.4.25-sparse/include/asm-xen/xen_proc.h
    1.66 +40659defgWA92arexpMGn8X3QMDj3w xenolinux-2.4.25-sparse/include/asm-xen/xor.h
    1.67  3f056927gMHl7mWB89rb73JahbhQIA xenolinux-2.4.25-sparse/include/linux/blk.h
    1.68  3e5a4e68WLX3B8owTvktP3HHOtznPQ xenolinux-2.4.25-sparse/include/linux/major.h
    1.69  401c0590D_kwJDU59X8NyvqSv_Cl2A xenolinux-2.4.25-sparse/include/linux/sched.h
    1.70 @@ -697,6 +696,7 @@ 3e5a4e68TJJavrunYwTAnLRSBxSYqQ xenolinux
    1.71  3f9d4b44247udoqWEgFkaHiWv6Uvyg xenolinux-2.4.25-sparse/kernel/time.c
    1.72  401c059bjLBFYHRD4Py2uM3eA1D4zQ xenolinux-2.4.25-sparse/kernel/timer.c
    1.73  3e6e7c1efbQe93xCvOpOVCnXTMmQ5w xenolinux-2.4.25-sparse/mkbuildtree
    1.74 +406aeeafkrnCuIVWLFv3kfn4uAD5Eg xenolinux-2.4.25-sparse/mm/highmem.c
    1.75  3e5a4e68GxCIaFH4sy01v1wjapetaA xenolinux-2.4.25-sparse/mm/memory.c
    1.76  3f108af5VxPkLv13tXpXgoRKALQtXQ xenolinux-2.4.25-sparse/mm/mprotect.c
    1.77  3e5a4e681xMPdF9xCMwpyfuYMySU5g xenolinux-2.4.25-sparse/mm/mremap.c
     2.1 --- a/docs/pdb.txt	Fri Apr 02 13:58:27 2004 +0000
     2.2 +++ b/docs/pdb.txt	Fri Apr 02 14:03:03 2004 +0000
     2.3 @@ -1,7 +1,7 @@
     2.4  Pervasive Debugging 
     2.5  ===================
     2.6  
     2.7 -040205 Alex Ho (alex.ho at cl.cam.ac.uk)
     2.8 +Alex Ho (alex.ho at cl.cam.ac.uk)
     2.9  
    2.10  Introduction
    2.11  ------------
    2.12 @@ -79,12 +79,32 @@ Serial Port Configuration
    2.13  
    2.14    See:  xeno.bk/tools/nsplitd
    2.15  
    2.16 +  nsplitd configuration
    2.17 +  ---------------------
    2.18 +  hostname$ more /etc/xinetd.d/nsplit
    2.19 +  service nsplit1
    2.20 +  {
    2.21 +        socket_type             = stream
    2.22 +        protocol                = tcp
    2.23 +        wait                    = no
    2.24 +        user                    = wanda
    2.25 +        server                  = /usr/sbin/in.nsplitd
    2.26 +        server_args             = serial.cl.cam.ac.uk:wcons00
    2.27 +        disable                 = no
    2.28 +        only_from               = 128.232.0.0/17 127.0.0.1
    2.29 +  }
    2.30 +
    2.31 +  hostname$ egrep 'wcons00|nsplit1' /etc/services
    2.32 +  wcons00         9600/tcp        # Wanda remote console
    2.33 +  nsplit1         12010/tcp       # Nemesis console splitter ports.
    2.34 +
    2.35    Note: nsplitd was originally written for the Nemesis project
    2.36    at Cambridge.
    2.37  
    2.38 -  After nsplitd accepts a connection on <port>, it starts listening
    2.39 -  on port <port + 1>.  Characters sent to the <port + 1> will have the
    2.40 -  high bit set and vice versa for characters received.
    2.41 +  After nsplitd accepts a connection on <port> (12010 in the above
    2.42 +  example), it starts listening on port <port + 1>.  Characters sent 
    2.43 +  to the <port + 1> will have the high bit set and vice versa for 
    2.44 +  characters received.
    2.45  
    2.46    You can connect to the nsplitd using
    2.47    'tools/xenctl/lib/console_client.py <host> <port>'
    2.48 @@ -100,8 +120,9 @@ Usage
    2.49  1. Boot Xen and Linux
    2.50  2. Interrupt Xen by pressing 'D' at the console
    2.51     You should see the console message: 
    2.52 -   pdb_handle_exception [0x88][0xfc5c9d88]
    2.53 -   At this point Xen is waiting for gdb commands on the serial line.
    2.54 +   (XEN) pdb_handle_exception [0x88][0x101000:0xfc5e72ac]
    2.55 +   At this point Xen is frozen and the pdb stub is waiting for gdb commands 
    2.56 +   on the serial line.
    2.57  3. Attach with gdb
    2.58     (gdb) file xeno.bk/xen/xen
    2.59     Reading symbols from xeno.bk/xen/xen...done.
    2.60 @@ -146,60 +167,107 @@ 4. You can add additional symbols to gdb
    2.61     Program received signal SIGTRAP, Trace/breakpoint trap.
    2.62     release_task (p=0xc2da0000) at exit.c:51
    2.63     (gdb) print *p
    2.64 -$3 = {state = 4, flags = 4, sigpending = 0, addr_limit = {seg = 3221225472}, 
    2.65 -  exec_domain = 0xc016a040, need_resched = 0, ptrace = 0, lock_depth = -1, 
    2.66 -  counter = 1, nice = 0, policy = 0, mm = 0x0, processor = 0, 
    2.67 -  cpus_runnable = 1, cpus_allowed = 4294967295, run_list = {next = 0x0, 
    2.68 -    prev = 0x0}, sleep_time = 18995, next_task = 0xc017c000, 
    2.69 -  prev_task = 0xc2f94000, active_mm = 0x0, local_pages = {next = 0xc2da0054, 
    2.70 -    prev = 0xc2da0054}, allocation_order = 0, nr_local_pages = 0, 
    2.71 -  binfmt = 0xc016c6a0, exit_code = 0, exit_signal = 17, pdeath_signal = 0, 
    2.72 -  personality = 0, did_exec = -1, task_dumpable = 1, pid = 917, pgrp = 914, 
    2.73 -  tty_old_pgrp = 0, session = 914, tgid = 917, leader = 0, 
    2.74 -  p_opptr = 0xc2f94000, p_pptr = 0xc2f94000, p_cptr = 0x0, p_ysptr = 0x0, 
    2.75 -  p_osptr = 0x0, thread_group = {next = 0xc2da00a8, prev = 0xc2da00a8}, 
    2.76 -  pidhash_next = 0x0, pidhash_pprev = 0xc01900b8, wait_chldexit = {
    2.77 -    lock = <incomplete type>, task_list = {next = 0xc2da00b8, 
    2.78 -      prev = 0xc2da00b8}}, vfork_done = 0x0, rt_priority = 0, 
    2.79 -  it_real_value = 0, it_prof_value = 0, it_virt_value = 0, it_real_incr = 0, 
    2.80 -  it_prof_incr = 0, it_virt_incr = 0, real_timer = {list = {next = 0x0, 
    2.81 -      prev = 0x0}, expires = 18950, data = 3269066752, 
    2.82 -    function = 0xc000ce30 <it_real_fn>}, times = {tms_utime = 0, 
    2.83 -    tms_stime = 0, tms_cutime = 0, tms_cstime = 0}, start_time = 18989, 
    2.84 -  per_cpu_utime = {1}, per_cpu_stime = {310}, min_flt = 13, maj_flt = 104, 
    2.85 -  nswap = 0, cmin_flt = 0, cmaj_flt = 0, cnswap = 0, swappable = -1, uid = 0, 
    2.86 -  euid = 0, suid = 0, fsuid = 0, gid = 0, egid = 0, sgid = 0, fsgid = 0, 
    2.87 -  ngroups = 7, groups = {0, 1, 2, 3, 4, 6, 10, 0 <repeats 25 times>}, 
    2.88 -  cap_effective = 4294967039, cap_inheritable = 0, cap_permitted = 4294967039, 
    2.89 -  keep_capabilities = 0, user = 0xc016b18c, rlim = {{rlim_cur = 4294967295, 
    2.90 -      rlim_max = 4294967295}, {rlim_cur = 4294967295, rlim_max = 4294967295}, {
    2.91 -      rlim_cur = 4294967295, rlim_max = 4294967295}, {rlim_cur = 8388608, 
    2.92 -      rlim_max = 4294967295}, {rlim_cur = 0, rlim_max = 4294967295}, {
    2.93 -      rlim_cur = 4294967295, rlim_max = 4294967295}, {rlim_cur = 512, 
    2.94 -      rlim_max = 512}, {rlim_cur = 1024, rlim_max = 1024}, {
    2.95 -      rlim_cur = 4294967295, rlim_max = 4294967295}, {rlim_cur = 4294967295, 
    2.96 -      rlim_max = 4294967295}, {rlim_cur = 4294967295, rlim_max = 4294967295}}, 
    2.97 -  used_math = 0, comm = "id\000h\000og\000\000\000\000\000\000\000\000", 
    2.98 -  link_count = 0, total_link_count = 1, tty = 0xc3ed1000, locks = 0, 
    2.99 -  semundo = 0x0, semsleeping = 0x0, thread = {esp0 = 3269074944, 
   2.100 -    eip = 3221249046, esp = 3269074792, fs = 0, gs = 0, io_pl = 3, debugreg = {
   2.101 -      0, 0, 0, 0, 0, 0, 0, 0}, cr2 = 0, trap_no = 0, error_code = 0, i387 = {
   2.102 -      fsave = {cwd = 2098047, swd = 125632512, twd = 1073944696, fip = 2091, 
   2.103 -        fcs = -1073745032, foo = 2099, fos = 8064, st_space = {
   2.104 -          0 <repeats 20 times>}, status = 0}, fxsave = {cwd = 895, swd = 32, 
   2.105 -        twd = 0, fop = 1917, fip = 1073944696, fcs = 2091, foo = -1073745032, 
   2.106 -        fos = 2099, mxcsr = 8064, reserved = 0, st_space = {
   2.107 -          0 <repeats 24 times>, 1449431204, -1774489361, 16383, 0, 1, 
   2.108 -          -1891252224, 16404, 0}, xmm_space = {0 <repeats 32 times>}, 
   2.109 -        padding = {0 <repeats 56 times>}}, soft = {cwd = 2098047, 
   2.110 -        swd = 125632512, twd = 1073944696, fip = 2091, fcs = -1073745032, 
   2.111 -        foo = 2099, fos = 8064, st_space = {0 <repeats 20 times>}, 
   2.112 -        ftop = 0 '\0', changed = 0 '\0', lookahead = 0 '\0', 
   2.113 -        no_update = 0 '\0', rm = 0 '\0', alimit = 0 '\0', info = 0x0, 
   2.114 -        entry_eip = 0}}, vm86_info = 0x0, screen_bitmap = 0, v86flags = 0, 
   2.115 -    v86mask = 0, saved_esp0 = 0}, fs = 0x0, files = 0x0, namespace = 0x0, 
   2.116 -  sigmask_lock = <incomplete type>, sig = 0x0, blocked = {sig = {0, 0}}, 
   2.117 -  pending = {head = 0x0, tail = 0xc2da04f8, signal = {sig = {0, 0}}}, 
   2.118 -  sas_ss_sp = 0, sas_ss_size = 0, notifier = 0, notifier_data = 0x0, 
   2.119 -  notifier_mask = 0x0, parent_exec_id = 7, self_exec_id = 8, 
   2.120 -  alloc_lock = <incomplete type>, journal_info = 0x0}
   2.121 +   $3 = {state = 4, flags = 4, sigpending = 0, addr_limit = {seg = 3221225472},
   2.122 +     exec_domain = 0xc016a040, need_resched = 0, ptrace = 0, lock_depth = -1, 
   2.123 +     counter = 1, nice = 0, policy = 0, mm = 0x0, processor = 0, 
   2.124 +     cpus_runnable = 1, cpus_allowed = 4294967295, run_list = {next = 0x0, 
   2.125 +       prev = 0x0}, sleep_time = 18995, next_task = 0xc017c000, 
   2.126 +     prev_task = 0xc2f94000, active_mm = 0x0, local_pages = {next = 0xc2da0054,
   2.127 +       prev = 0xc2da0054}, allocation_order = 0, nr_local_pages = 0, 
   2.128 +     ...
   2.129 +5. To resume Xen, enter the "continue" command to gdb.
   2.130 +   This sends the packet $c#63 along the serial channel.
   2.131 +
   2.132 +   (gdb) cont
   2.133 +   Continuing.
   2.134 +
   2.135 +Debugging Multiple Domains & Processes
   2.136 +--------------------------------------
   2.137 +
   2.138 +pdb supports debugging multiple domains & processes.  You can switch
   2.139 +between different domains and processes within domains and examine
   2.140 +variables in each.
   2.141 +
   2.142 +The pdb context identifies the current debug target.  It is stored
   2.143 +in the xen variable pdb_ctx and defaults to xen.
   2.144 +
   2.145 +   target    pdb_ctx.domain    pdb_ctx.process
   2.146 +   ------    --------------    ---------------
   2.147 +    xen           -1                 -1
   2.148 +  guest os      0,1,2,...            -1
   2.149 +   process      0,1,2,...          0,1,2,...
   2.150 +
   2.151 +Unfortunately, gdb doesn't understand debugging multiple process
   2.152 +simultaneously (we're working on it), so at present you are limited 
   2.153 +to just one set of symbols for symbolic debugging.  When debugging
   2.154 +processes, pdb currently supports just Linux 2.4.
   2.155 +
   2.156 +   define setup
   2.157 +      file xeno-clone/xeno.bk/xen/xen
   2.158 +      add-sym xeno-clone/xenolinux-2.4.25/vmlinux
   2.159 +      add-sym ~ach61/a.out
   2.160 +   end
   2.161 +
   2.162 +
   2.163 +1. Connect with gdb as before.  A couple of Linux-specific 
   2.164 +   symbols need to be defined.
   2.165 +
   2.166 +   (gdb) target remote <hostname>:<port + 1>              /* contact nsplitd */
   2.167 +   Remote debugging using serial.srg:12131
   2.168 +   continue_cpu_idle_loop () at current.h:10
   2.169 +   warning: shared library handler failed to enable breakpoint
   2.170 +   (gdb) set pdb_pidhash_addr = &pidhash
   2.171 +   (gdb) set pdb_init_task_union_addr = &init_task_union
   2.172 +
   2.173 +2. The pdb context defaults to Xen and we can read Xen's memory.
   2.174 +   An attempt to access domain 0 memory fails.
   2.175 +  
   2.176 +   (gdb) print pdb_ctx
   2.177 +   $1 = {valid = 0, domain = -1, process = -1, ptbr = 1052672}
   2.178 +   (gdb) print hexchars
   2.179 +   $2 = "0123456789abcdef"
   2.180 +   (gdb) print cpu_vendor_names
   2.181 +   Cannot access memory at address 0xc0191f80
   2.182 +
   2.183 +3. Now we change to domain 0.  In addition to changing pdb_ctx.domain,
   2.184 +   we need to change pdb_ctx.valid to signal pdb of the change.
   2.185 +   It is now possible to examine Xen and Linux memory.
   2.186 +
   2.187 +   (gdb) set pdb_ctx.domain=0
   2.188 +   (gdb) set pdb_ctx.valid=1
   2.189 +   (gdb) print hexchars
   2.190 +   $3 = "0123456789abcdef"
   2.191 +   (gdb) print cpu_vendor_names
   2.192 +   $4 = {0xc0158b46 "Intel", 0xc0158c37 "Cyrix", 0xc0158b55 "AMD", 
   2.193 +     0xc0158c3d "UMC", 0xc0158c41 "NexGen", 0xc0158c48 "Centaur", 
   2.194 +     0xc0158c50 "Rise", 0xc0158c55 "Transmeta"}
   2.195 +
   2.196 +4. Now change to a process within domain 0.  Again, we need to
   2.197 +   change pdb_ctx.valid in addition to pdb_ctx.process.
   2.198 +
   2.199 +   (gdb) set pdb_ctx.process=962
   2.200 +   (gdb) set pdb_ctx.valid =1
   2.201 +   (gdb) print pdb_ctx
   2.202 +   $1 = {valid = 0, domain = 0, process = 962, ptbr = 52998144}
   2.203 +   (gdb) print aho_a
   2.204 +   $2 = 20
   2.205 +
   2.206 +5. Now we can read the same variable from another process running
   2.207 +   the same executable in another domain.
   2.208 +
   2.209 +   (gdb) set pdb_ctx.domain=1
   2.210 +   (gdb) set pdb_ctx.process=1210
   2.211 +   (gdb) set pdb_ctx.valid=1
   2.212 +   (gdb) print pdb_ctx
   2.213 +   $3 = {valid = 0, domain = 1, process = 1210, ptbr = 70574080}
   2.214 +   (gdb) print aho_a
   2.215 +   $4 = 27
   2.216 +
   2.217 +
   2.218 +
   2.219 +
   2.220 +Changes
   2.221 +-------
   2.222 +
   2.223 +04.02.05 aho creation
   2.224 +04.03.31 aho add description on debugging multiple domains
     3.1 --- a/extras/mini-os/Makefile	Fri Apr 02 13:58:27 2004 +0000
     3.2 +++ b/extras/mini-os/Makefile	Fri Apr 02 14:03:03 2004 +0000
     3.3 @@ -24,17 +24,8 @@ hypervisor-ifs:
     3.4  	ln -sf ../../../xen/include/hypervisor-ifs h/hypervisor-ifs
     3.5  
     3.6  $(TARGET): hypervisor-ifs head.o $(OBJS)
     3.7 -	# Image will load at 0xC0000000. First bytes from head.o
     3.8 -	#$(LD) -N -Ttext 0xC0000000 head.o $(OBJS) -o $@.elf
     3.9  	$(LD) -N -T minios.lds head.o $(OBJS) -o $@.elf
    3.10 -	# Guest OS header -- first 8 bytes are identifier 'XenGuest'.
    3.11 -	echo -e -n 'XenGuest' >$@ 
    3.12 -	# Guest OS header -- next 4 bytes are load address (0xC0000000).
    3.13 -	echo -e -n '\000\000\000\300' >>$@
    3.14 -	# Create a raw bag of bytes from the ELF image.
    3.15 -	objcopy -O binary -R .note -R .comment $@.elf $@.raw
    3.16 -	# Guest OS header is immediately followed by raw OS image.
    3.17 -	cat $@.raw >>$@
    3.18 +	objcopy -R .note -R .comment $@.elf $@
    3.19  	gzip -f -9 -c $@ >$@.gz
    3.20  
    3.21  clean:
     4.1 --- a/extras/mini-os/head.S	Fri Apr 02 13:58:27 2004 +0000
     4.2 +++ b/extras/mini-os/head.S	Fri Apr 02 14:03:03 2004 +0000
     4.3 @@ -1,48 +1,18 @@
     4.4  #include <os.h>
     4.5  
     4.6 -/* Offsets in start_info structure */
     4.7 -#define MOD_START 20
     4.8 -#define MOD_LEN   24
     4.9 -
    4.10  .globl _start, shared_info
    4.11                          
    4.12  _start:
    4.13          cld
    4.14 -        
    4.15          lss stack_start,%esp
    4.16 -        
    4.17 -        /* Copy any module somewhere safe before it's clobbered by BSS. */
    4.18 -        mov  MOD_LEN(%esi),%ecx
    4.19 -        shr  $2,%ecx
    4.20 -        jz   2f        /* bail from copy loop if no module */
    4.21 -        
    4.22 -        mov  $_end,%edi
    4.23 -        add  MOD_LEN(%esi),%edi
    4.24 -        mov  MOD_START(%esi),%eax
    4.25 -        add  MOD_LEN(%esi),%eax
    4.26 -1:      sub  $4,%eax
    4.27 -        sub  $4,%edi
    4.28 -        mov  (%eax),%ebx
    4.29 -        mov  %ebx,(%edi)
    4.30 -        loop 1b
    4.31 -        mov  %edi,MOD_START(%esi)
    4.32 -
    4.33 -        /* Clear BSS first so that there are no surprises... */
    4.34 -2:      xorl %eax,%eax
    4.35 -        movl $__bss_start,%edi
    4.36 -        movl $_end,%ecx
    4.37 -        subl %edi,%ecx
    4.38 -        rep stosb
    4.39 -
    4.40          push %esi 
    4.41          call start_kernel
    4.42  
    4.43 -
    4.44  stack_start:
    4.45  	.long stack+8192, __KERNEL_DS
    4.46  
    4.47 -
    4.48 -        /* Unpleasant -- we actually use this PTE to map shared_info :-) */
    4.49 +        /* Unpleasant -- the PTE that maps this page is actually overwritten */
    4.50 +        /* to map the real shared-info page! :-)                             */
    4.51          .org 0x1000
    4.52  shared_info:
    4.53          .org 0x2000
     5.1 --- a/tools/xc/lib/xc.h	Fri Apr 02 13:58:27 2004 +0000
     5.2 +++ b/tools/xc/lib/xc.h	Fri Apr 02 14:03:03 2004 +0000
     5.3 @@ -74,6 +74,7 @@ int xc_netbsd_build(int xc_handle,
     5.4  
     5.5  int xc_bvtsched_global_set(int xc_handle,
     5.6                             unsigned long ctx_allow);
     5.7 +
     5.8  int xc_bvtsched_domain_set(int xc_handle,
     5.9                             u64 domid,
    5.10                             unsigned long mcuadv,
    5.11 @@ -81,13 +82,32 @@ int xc_bvtsched_domain_set(int xc_handle
    5.12                             unsigned long warpl,
    5.13                             unsigned long warpu);
    5.14  
    5.15 +int xc_bvtsched_global_get(int xc_handle,
    5.16 +			   unsigned long *ctx_allow);
    5.17 +
    5.18 +int xc_bvtsched_domain_get(int xc_handle,
    5.19 +                           u64 domid,
    5.20 +                           unsigned long *mcuadv,
    5.21 +                           unsigned long *warp,
    5.22 +                           unsigned long *warpl,
    5.23 +                           unsigned long *warpu);
    5.24 +
    5.25  int xc_atropos_domain_set(int xc_handle,
    5.26  			  u64 domid,
    5.27 +			  u64 period, u64 slice, u64 latency,
    5.28  			  int xtratime);
    5.29  
    5.30 +int xc_atropos_domain_get(int xc_handle,
    5.31 +                          u64 domid,
    5.32 +                          u64* period, u64 *slice, u64 *latency,
    5.33 +                          int *xtratime);
    5.34 +
    5.35  int xc_rrobin_global_set(int xc_handle,
    5.36  			 u64 slice);
    5.37  
    5.38 +int xc_rrobin_global_get(int xc_handle,
    5.39 +                         u64 *slice);
    5.40 +
    5.41  typedef struct {
    5.42      unsigned long credit_bytes;
    5.43      unsigned long credit_usec;
     6.1 --- a/tools/xc/lib/xc_atropos.c	Fri Apr 02 13:58:27 2004 +0000
     6.2 +++ b/tools/xc/lib/xc_atropos.c	Fri Apr 02 14:03:03 2004 +0000
     6.3 @@ -8,29 +8,44 @@
     6.4  
     6.5  #include "xc_private.h"
     6.6  
     6.7 -int xc_atropos_global_set(int xc_handle,
     6.8 -			  unsigned long ctx_allow)
     6.9 +int xc_atropos_domain_set(int xc_handle,
    6.10 +			  u64 domid, u64 period, u64 slice, u64 latency,
    6.11 +                          int xtratime)
    6.12  {
    6.13      dom0_op_t op;
    6.14 +    struct atropos_adjdom *p = &op.u.adjustdom.u.atropos;
    6.15  
    6.16 -    op.cmd = DOM0_SCHEDCTL;
    6.17 -    op.u.schedctl.sched_id = SCHED_BVT;
    6.18 +    op.cmd = DOM0_ADJUSTDOM;
    6.19 +    op.u.adjustdom.domain  = (domid_t)domid;
    6.20 +    op.u.adjustdom.sched_id = SCHED_ATROPOS;
    6.21 +    op.u.adjustdom.direction = SCHED_INFO_PUT;
    6.22  
    6.23 -    op.u.schedctl.u.bvt.ctx_allow = ctx_allow;
    6.24 +    p->period   = period;
    6.25 +    p->slice    = slice;
    6.26 +    p->latency  = latency;
    6.27 +    p->xtratime = xtratime;
    6.28  
    6.29      return do_dom0_op(xc_handle, &op);
    6.30  }
    6.31  
    6.32 -int xc_atropos_domain_set(int xc_handle,
    6.33 -			  u64 domid, int xtratime)
    6.34 +int xc_atropos_domain_get(int xc_handle, u64 domid, u64 *period,
    6.35 +                          u64 *slice, u64 *latency, int *xtratime)
    6.36  {
    6.37      dom0_op_t op;
    6.38 +    int ret;
    6.39 +    struct atropos_adjdom *p = &op.u.adjustdom.u.atropos;
    6.40  
    6.41 -    op.cmd = DOM0_ADJUSTDOM;
    6.42 -    op.u.adjustdom.domain  = (domid_t)domid;
    6.43 +    op.cmd = DOM0_ADJUSTDOM;    
    6.44 +    op.u.adjustdom.domain = (domid_t)domid;
    6.45      op.u.adjustdom.sched_id = SCHED_ATROPOS;
    6.46 +    op.u.adjustdom.direction = SCHED_INFO_GET;
    6.47  
    6.48 -    op.u.adjustdom.u.atropos.xtratime = xtratime;
    6.49 +    ret = do_dom0_op(xc_handle, &op);
    6.50  
    6.51 -    return do_dom0_op(xc_handle, &op);
    6.52 +    *period   = p->period;
    6.53 +    *slice    = p->slice;
    6.54 +    *latency  = p->latency;
    6.55 +    *xtratime = p->xtratime;
    6.56 +
    6.57 +    return ret;
    6.58  }
     7.1 --- a/tools/xc/lib/xc_bvtsched.c	Fri Apr 02 13:58:27 2004 +0000
     7.2 +++ b/tools/xc/lib/xc_bvtsched.c	Fri Apr 02 14:03:03 2004 +0000
     7.3 @@ -15,12 +15,29 @@ int xc_bvtsched_global_set(int xc_handle
     7.4  
     7.5      op.cmd = DOM0_SCHEDCTL;
     7.6      op.u.schedctl.sched_id = SCHED_BVT;
     7.7 -
     7.8 +    op.u.schedctl.direction = SCHED_INFO_PUT;
     7.9      op.u.schedctl.u.bvt.ctx_allow = ctx_allow;
    7.10  
    7.11      return do_dom0_op(xc_handle, &op);
    7.12  }
    7.13  
    7.14 +int xc_bvtsched_global_get(int xc_handle,
    7.15 +			   unsigned long *ctx_allow)
    7.16 +{
    7.17 +    dom0_op_t op;
    7.18 +    int ret;
    7.19 +    
    7.20 +    op.cmd = DOM0_SCHEDCTL;
    7.21 +    op.u.schedctl.sched_id = SCHED_BVT;
    7.22 +    op.u.schedctl.direction = SCHED_INFO_GET;
    7.23 +
    7.24 +    ret = do_dom0_op(xc_handle, &op);
    7.25 +
    7.26 +    *ctx_allow = op.u.schedctl.u.bvt.ctx_allow;
    7.27 +
    7.28 +    return ret;
    7.29 +}
    7.30 +
    7.31  int xc_bvtsched_domain_set(int xc_handle,
    7.32                             u64 domid,
    7.33                             unsigned long mcuadv,
    7.34 @@ -34,11 +51,38 @@ int xc_bvtsched_domain_set(int xc_handle
    7.35      op.cmd = DOM0_ADJUSTDOM;
    7.36      op.u.adjustdom.domain  = (domid_t)domid;
    7.37      op.u.adjustdom.sched_id = SCHED_BVT;
    7.38 +    op.u.adjustdom.direction = SCHED_INFO_PUT;
    7.39  
    7.40      bvtadj->mcu_adv = mcuadv;
    7.41      bvtadj->warp    = warp;
    7.42      bvtadj->warpl   = warpl;
    7.43      bvtadj->warpu   = warpu;
    7.44 -
    7.45      return do_dom0_op(xc_handle, &op);
    7.46  }
    7.47 +
    7.48 +
    7.49 +int xc_bvtsched_domain_get(int xc_handle,
    7.50 +			   u64 domid,
    7.51 +			   unsigned long *mcuadv,
    7.52 +			   unsigned long *warp,
    7.53 +                           unsigned long *warpl,
    7.54 +                           unsigned long *warpu)
    7.55 +{
    7.56 +    
    7.57 +    dom0_op_t op;
    7.58 +    int ret;
    7.59 +    struct bvt_adjdom *adjptr = &op.u.adjustdom.u.bvt;
    7.60 +
    7.61 +    op.cmd = DOM0_ADJUSTDOM;
    7.62 +    op.u.adjustdom.domain  = (domid_t)domid;
    7.63 +    op.u.adjustdom.sched_id = SCHED_BVT;
    7.64 +    op.u.adjustdom.direction = SCHED_INFO_GET;
    7.65 +
    7.66 +    ret = do_dom0_op(xc_handle, &op);
    7.67 +
    7.68 +    *mcuadv = adjptr->mcu_adv;
    7.69 +    *warp   = adjptr->warp;
    7.70 +    *warpl  = adjptr->warpl;
    7.71 +    *warpu  = adjptr->warpu;
    7.72 +    return ret;
    7.73 +}
     8.1 --- a/tools/xc/lib/xc_linux_build.c	Fri Apr 02 13:58:27 2004 +0000
     8.2 +++ b/tools/xc/lib/xc_linux_build.c	Fri Apr 02 14:03:03 2004 +0000
     8.3 @@ -3,15 +3,24 @@
     8.4   */
     8.5  
     8.6  #include "xc_private.h"
     8.7 +#define ELFSIZE 32
     8.8 +#include "xc_elf.h"
     8.9  #include <zlib.h>
    8.10  
    8.11 -/* This string is written to the head of every guest kernel image. */
    8.12 -#define GUEST_SIG   "XenGuest"
    8.13 -#define SIG_LEN    8
    8.14 -
    8.15  #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
    8.16  #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
    8.17  
    8.18 +#define round_pgup(_p)    (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
    8.19 +#define round_pgdown(_p)  ((_p)&PAGE_MASK)
    8.20 +
    8.21 +static int readelfimage_base_and_size(char *elfbase, 
    8.22 +                                      unsigned long elfsize,
    8.23 +                                      unsigned long *pkernstart,
    8.24 +                                      unsigned long *pkernend,
    8.25 +                                      unsigned long *pkernentry);
    8.26 +static int loadelfimage(char *elfbase, int pmh, unsigned long *parray,
    8.27 +                        unsigned long vstart);
    8.28 +
    8.29  static long get_tot_pages(int xc_handle, u64 domid)
    8.30  {
    8.31      dom0_op_t op;
    8.32 @@ -43,25 +52,6 @@ static int get_pfn_list(int xc_handle,
    8.33      return (ret < 0) ? -1 : op.u.getmemlist.num_pfns;
    8.34  }
    8.35  
    8.36 -/* Read the kernel header, extracting the image size and load address. */
    8.37 -static int read_kernel_header(gzFile gfd, long dom_size, 
    8.38 -                              unsigned long *load_addr)
    8.39 -{
    8.40 -    char signature[SIG_LEN];
    8.41 -
    8.42 -    gzread(gfd, signature, SIG_LEN);
    8.43 -    if ( strncmp(signature, GUEST_SIG, SIG_LEN) )
    8.44 -    {
    8.45 -        ERROR("Kernel image does not contain required signature");
    8.46 -        return -1;
    8.47 -    }
    8.48 -
    8.49 -    /* Read the load address which immediately follows the Xen signature. */
    8.50 -    gzread(gfd, load_addr, sizeof(unsigned long));
    8.51 -
    8.52 -    return 0;
    8.53 -}
    8.54 -
    8.55  static int copy_to_domain_page(int pm_handle,
    8.56                                 unsigned long dst_pfn, 
    8.57                                 void *src_page)
    8.58 @@ -75,12 +65,11 @@ static int copy_to_domain_page(int pm_ha
    8.59  }
    8.60  
    8.61  static int setup_guestos(int xc_handle,
    8.62 -                         u64 dom, 
    8.63 -                         gzFile kernel_gfd, 
    8.64 -                         gzFile initrd_gfd, 
    8.65 -                         unsigned long tot_pages,
    8.66 -                         unsigned long *virt_startinfo_addr, 
    8.67 -                         unsigned long virt_load_addr, 
    8.68 +                         u64 dom,
    8.69 +                         char *image, unsigned long image_size,
    8.70 +                         gzFile initrd_gfd, unsigned long initrd_len,
    8.71 +                         unsigned long nr_pages,
    8.72 +                         unsigned long *pvsi, unsigned long *pvke,
    8.73                           dom0_builddomain_t *builddomain, 
    8.74                           const char *cmdline,
    8.75                           unsigned long shared_info_frame)
    8.76 @@ -88,140 +77,184 @@ static int setup_guestos(int xc_handle,
    8.77      l1_pgentry_t *vl1tab=NULL, *vl1e=NULL;
    8.78      l2_pgentry_t *vl2tab=NULL, *vl2e=NULL;
    8.79      unsigned long *page_array = NULL;
    8.80 -    int alloc_index, num_pt_pages;
    8.81      unsigned long l2tab;
    8.82      unsigned long l1tab;
    8.83 -    unsigned long count, pt_start, i, j;
    8.84 -    unsigned long initrd_addr = 0, initrd_len = 0;
    8.85 +    unsigned long count, i;
    8.86      start_info_t *start_info;
    8.87      shared_info_t *shared_info;
    8.88 -    unsigned long ksize;
    8.89      mmu_t *mmu = NULL;
    8.90 -    int pm_handle;
    8.91 +    int pm_handle=-1, rc;
    8.92 +
    8.93 +    unsigned long nr_pt_pages;
    8.94 +    unsigned long ppt_alloc;
    8.95 +    unsigned long *physmap, *physmap_e, physmap_pfn;
    8.96 +
    8.97 +    unsigned long v_start;
    8.98 +    unsigned long vkern_start;
    8.99 +    unsigned long vkern_entry;
   8.100 +    unsigned long vkern_end;
   8.101 +    unsigned long vinitrd_start;
   8.102 +    unsigned long vinitrd_end;
   8.103 +    unsigned long vphysmap_start;
   8.104 +    unsigned long vphysmap_end;
   8.105 +    unsigned long vstartinfo_start;
   8.106 +    unsigned long vstartinfo_end;
   8.107 +    unsigned long vstack_start;
   8.108 +    unsigned long vstack_end;
   8.109 +    unsigned long vpt_start;
   8.110 +    unsigned long vpt_end;
   8.111 +    unsigned long v_end;
   8.112 +
   8.113 +    rc = readelfimage_base_and_size(image, image_size, 
   8.114 +                                    &vkern_start, &vkern_end, &vkern_entry);
   8.115 +    if ( rc != 0 )
   8.116 +        goto error_out;
   8.117 +    
   8.118 +    /*
   8.119 +     * Why do we need this? The number of page-table frames depends on the 
   8.120 +     * size of the bootstrap address space. But the size of the address space 
   8.121 +     * depends on the number of page-table frames (since each one is mapped 
   8.122 +     * read-only). We have a pair of simultaneous equations in two unknowns, 
   8.123 +     * which we solve by exhaustive search.
   8.124 +     */
   8.125 +    for ( nr_pt_pages = 2; ; nr_pt_pages++ )
   8.126 +    {
   8.127 +        v_start          = vkern_start & ~((1<<22)-1);
   8.128 +        vinitrd_start    = round_pgup(vkern_end);
   8.129 +        vinitrd_end      = vinitrd_start + initrd_len;
   8.130 +        vphysmap_start   = round_pgup(vinitrd_end);
   8.131 +        vphysmap_end     = vphysmap_start + (nr_pages * sizeof(unsigned long));
   8.132 +        vpt_start        = round_pgup(vphysmap_end);
   8.133 +        vpt_end          = vpt_start + (nr_pt_pages * PAGE_SIZE);
   8.134 +        vstartinfo_start = vpt_end;
   8.135 +        vstartinfo_end   = vstartinfo_start + PAGE_SIZE;
   8.136 +        vstack_start     = vstartinfo_end;
   8.137 +        vstack_end       = vstack_start + PAGE_SIZE;
   8.138 +        v_end            = (vstack_end + (1<<22)-1) & ~((1<<22)-1);
   8.139 +        if ( (v_end - vstack_end) < (512 << 10) )
   8.140 +            v_end += 1 << 22; /* Add extra 4MB to get >= 512kB padding. */
   8.141 +        if ( (((v_end - v_start) >> L2_PAGETABLE_SHIFT) + 1) <= nr_pt_pages )
   8.142 +            break;
   8.143 +    }
   8.144 +
   8.145 +    if ( (v_end - v_start) > (nr_pages * PAGE_SIZE) )
   8.146 +    {
   8.147 +        printf("Initial guest OS requires too much space\n"
   8.148 +               "(%luMB is greater than %luMB limit)\n",
   8.149 +               (v_end-v_start)>>20, (nr_pages<<PAGE_SHIFT)>>20);
   8.150 +        goto error_out;
   8.151 +    }
   8.152 +
   8.153 +    printf("VIRTUAL MEMORY ARRANGEMENT:\n"
   8.154 +           " Loaded kernel: %08lx->%08lx\n"
   8.155 +           " Init. ramdisk: %08lx->%08lx\n"
   8.156 +           " Phys-Mach map: %08lx->%08lx\n"
   8.157 +           " Page tables:   %08lx->%08lx\n"
   8.158 +           " Start info:    %08lx->%08lx\n"
   8.159 +           " Boot stack:    %08lx->%08lx\n"
   8.160 +           " TOTAL:         %08lx->%08lx\n",
   8.161 +           vkern_start, vkern_end, 
   8.162 +           vinitrd_start, vinitrd_end,
   8.163 +           vphysmap_start, vphysmap_end,
   8.164 +           vpt_start, vpt_end,
   8.165 +           vstartinfo_start, vstartinfo_end,
   8.166 +           vstack_start, vstack_end,
   8.167 +           v_start, v_end);
   8.168 +    printf(" ENTRY ADDRESS: %08lx\n", vkern_entry);
   8.169  
   8.170      memset(builddomain, 0, sizeof(*builddomain));
   8.171  
   8.172      if ( (pm_handle = init_pfn_mapper()) < 0 )
   8.173          goto error_out;
   8.174  
   8.175 -    if ( (page_array = malloc(tot_pages * sizeof(unsigned long))) == NULL )
   8.176 +    if ( (page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL )
   8.177      {
   8.178          PERROR("Could not allocate memory");
   8.179          goto error_out;
   8.180      }
   8.181  
   8.182 -    if ( get_pfn_list(xc_handle, dom, page_array, tot_pages) != tot_pages )
   8.183 +    if ( get_pfn_list(xc_handle, dom, page_array, nr_pages) != nr_pages )
   8.184      {
   8.185          PERROR("Could not get the page frame list");
   8.186          goto error_out;
   8.187      }
   8.188  
   8.189 -    /* Load the guest OS image. Let it take no more than 1/2 memory.*/
   8.190 -    for ( i = 0; i < ((tot_pages/2)*PAGE_SIZE); i += PAGE_SIZE )
   8.191 -    {
   8.192 -        char page[PAGE_SIZE];
   8.193 -        int size;
   8.194 -        if ( (size = gzread(kernel_gfd, page, PAGE_SIZE)) == -1 )
   8.195 -        {
   8.196 -            PERROR("Error reading kernel image, could not"
   8.197 -                   " read the whole image.");
   8.198 -            goto error_out;
   8.199 -        }
   8.200 -        if ( size == 0 )
   8.201 -            goto kernel_copied;
   8.202 -        copy_to_domain_page(pm_handle, page_array[i>>PAGE_SHIFT], page);
   8.203 -    }
   8.204 -    ERROR("Kernel too big to safely fit in domain memory");
   8.205 -    goto error_out;
   8.206 -
   8.207 - kernel_copied:
   8.208 -    /* ksize is kernel-image size rounded up to a page boundary. */
   8.209 -    ksize = i;
   8.210 +    loadelfimage(image, pm_handle, page_array, v_start);
   8.211  
   8.212      /* Load the initial ramdisk image. */
   8.213 -    if ( initrd_gfd )
   8.214 +    if ( initrd_len != 0 )
   8.215      {
   8.216 -        int size;
   8.217 -
   8.218 -        for ( j=0, i=ksize; i < ((tot_pages/2) * PAGE_SIZE); i += PAGE_SIZE )
   8.219 +        for ( i = (vinitrd_start - v_start); 
   8.220 +              i < (vinitrd_end - v_start); i += PAGE_SIZE )
   8.221          {
   8.222              char page[PAGE_SIZE];
   8.223 -            if ( (size = gzread(initrd_gfd, page, PAGE_SIZE)) == -1 )
   8.224 +            if ( gzread(initrd_gfd, page, PAGE_SIZE) == -1 )
   8.225              {
   8.226                  PERROR("Error reading initrd image, could not");
   8.227                  goto error_out;
   8.228              }
   8.229 -            j += size;
   8.230 -            if ( size > 0 )
   8.231 -                copy_to_domain_page(pm_handle, 
   8.232 -                                    page_array[i>>PAGE_SHIFT], page);
   8.233 -            if ( size < PAGE_SIZE )
   8.234 -                goto initrd_copied;
   8.235 +            copy_to_domain_page(pm_handle, 
   8.236 +                                page_array[i>>PAGE_SHIFT], page);
   8.237          }
   8.238 -        ERROR("Kernel/initrd too big to safely fit in domain memory");
   8.239 -        goto error_out;
   8.240 -        
   8.241 -    initrd_copied: 
   8.242 -        initrd_addr = virt_load_addr + ksize;
   8.243 -        initrd_len  = j;
   8.244      }
   8.245  
   8.246 -    alloc_index = tot_pages - 1;
   8.247 -
   8.248 -    /* Count bottom-level PTs, rounding up. */
   8.249 -    num_pt_pages = (l1_table_offset(virt_load_addr) + tot_pages + 1023) / 1024;
   8.250 -
   8.251 -    /* We must also count the page directory. */
   8.252 -    num_pt_pages++;
   8.253 -
   8.254 -    /* Index of first PT page. */
   8.255 -    pt_start = tot_pages - num_pt_pages;
   8.256 -
   8.257 -    /*
   8.258 -     * First allocate page for page dir. Allocation goes backwards from the end
   8.259 -     * of the allocated physical address space.
   8.260 -     */
   8.261 -    l2tab = page_array[alloc_index] << PAGE_SHIFT;
   8.262 -    alloc_index--;
   8.263 -    builddomain->ctxt.pt_base = l2tab;
   8.264 -
   8.265      if ( (mmu = init_mmu_updates(xc_handle, dom)) == NULL )
   8.266          goto error_out;
   8.267  
   8.268 +    /* First allocate page for page dir. */
   8.269 +    ppt_alloc = (vpt_start - v_start) >> PAGE_SHIFT;
   8.270 +    l2tab = page_array[ppt_alloc++] << PAGE_SHIFT;
   8.271 +    builddomain->ctxt.pt_base = l2tab;
   8.272 +
   8.273      /* Initialise the page tables. */
   8.274      if ( (vl2tab = map_pfn_writeable(pm_handle, l2tab >> PAGE_SHIFT)) == NULL )
   8.275          goto error_out;
   8.276      memset(vl2tab, 0, PAGE_SIZE);
   8.277 -    vl2e = &vl2tab[l2_table_offset(virt_load_addr)];
   8.278 -    for ( count = 0; count < tot_pages; count++ )
   8.279 +    vl2e = &vl2tab[l2_table_offset(v_start)];
   8.280 +    for ( count = 0; count < ((v_end-v_start)>>PAGE_SHIFT); count++ )
   8.281      {    
   8.282          if ( ((unsigned long)vl1e & (PAGE_SIZE-1)) == 0 )
   8.283          {
   8.284 -            l1tab = page_array[alloc_index--] << PAGE_SHIFT;
   8.285 +            l1tab = page_array[ppt_alloc++] << PAGE_SHIFT;
   8.286              if ( vl1tab != NULL )
   8.287                  unmap_pfn(pm_handle, vl1tab);
   8.288              if ( (vl1tab = map_pfn_writeable(pm_handle, 
   8.289                                               l1tab >> PAGE_SHIFT)) == NULL )
   8.290                  goto error_out;
   8.291              memset(vl1tab, 0, PAGE_SIZE);
   8.292 -            vl1e = &vl1tab[l1_table_offset(virt_load_addr + 
   8.293 -                                           (count<<PAGE_SHIFT))];
   8.294 +            vl1e = &vl1tab[l1_table_offset(v_start + (count<<PAGE_SHIFT))];
   8.295              *vl2e++ = l1tab | L2_PROT;
   8.296          }
   8.297  
   8.298          *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT;
   8.299 -        if ( count >= pt_start )
   8.300 +        if ( (count >= ((vpt_start-v_start)>>PAGE_SHIFT)) && 
   8.301 +             (count <  ((vpt_end  -v_start)>>PAGE_SHIFT)) )
   8.302              *vl1e &= ~_PAGE_RW;
   8.303          vl1e++;
   8.304 +    }
   8.305 +    unmap_pfn(pm_handle, vl1tab);
   8.306 +    unmap_pfn(pm_handle, vl2tab);
   8.307  
   8.308 +    /* Write the phys->machine and machine->phys table entries. */
   8.309 +    physmap_pfn = (vphysmap_start - v_start) >> PAGE_SHIFT;
   8.310 +    physmap = physmap_e = 
   8.311 +        map_pfn_writeable(pm_handle, page_array[physmap_pfn++]);
   8.312 +    for ( count = 0; count < nr_pages; count++ )
   8.313 +    {
   8.314          if ( add_mmu_update(xc_handle, mmu,
   8.315                              (page_array[count] << PAGE_SHIFT) | 
   8.316                              MMU_MACHPHYS_UPDATE, count) )
   8.317              goto error_out;
   8.318 +        *physmap_e++ = page_array[count];
   8.319 +        if ( ((unsigned long)physmap_e & (PAGE_SIZE-1)) == 0 )
   8.320 +        {
   8.321 +            unmap_pfn(pm_handle, physmap);
   8.322 +            physmap = physmap_e = 
   8.323 +                map_pfn_writeable(pm_handle, page_array[physmap_pfn++]);
   8.324 +        }
   8.325      }
   8.326 -    unmap_pfn(pm_handle, vl1tab);
   8.327 -    unmap_pfn(pm_handle, vl2tab);
   8.328 -
   8.329 +    unmap_pfn(pm_handle, physmap);
   8.330 +    
   8.331      /*
   8.332       * Pin down l2tab addr as page dir page - causes hypervisor to provide
   8.333       * correct protection for the page
   8.334 @@ -230,17 +263,20 @@ static int setup_guestos(int xc_handle,
   8.335                          l2tab | MMU_EXTENDED_COMMAND, MMUEXT_PIN_L2_TABLE) )
   8.336          goto error_out;
   8.337  
   8.338 -    *virt_startinfo_addr =
   8.339 -        virt_load_addr + ((alloc_index-1) << PAGE_SHIFT);
   8.340 -
   8.341 -    start_info = map_pfn_writeable(pm_handle, page_array[alloc_index-1]);
   8.342 +    start_info = map_pfn_writeable(
   8.343 +        pm_handle, page_array[(vstartinfo_start-v_start)>>PAGE_SHIFT]);
   8.344      memset(start_info, 0, sizeof(*start_info));
   8.345 -    start_info->pt_base     = virt_load_addr + ((tot_pages-1) << PAGE_SHIFT);
   8.346 -    start_info->mod_start   = initrd_addr;
   8.347 -    start_info->mod_len     = initrd_len;
   8.348 -    start_info->nr_pages    = tot_pages;
   8.349 -    start_info->shared_info = shared_info_frame << PAGE_SHIFT;
   8.350 -    start_info->flags       = 0;
   8.351 +    start_info->nr_pages     = nr_pages;
   8.352 +    start_info->shared_info  = shared_info_frame << PAGE_SHIFT;
   8.353 +    start_info->flags        = 0;
   8.354 +    start_info->pt_base      = vpt_start;
   8.355 +    start_info->nr_pt_frames = nr_pt_pages;
   8.356 +    start_info->mfn_list     = vphysmap_start;
   8.357 +    if ( initrd_len != 0 )
   8.358 +    {
   8.359 +        start_info->mod_start    = vinitrd_start;
   8.360 +        start_info->mod_len      = initrd_len;
   8.361 +    }
   8.362      strncpy(start_info->cmd_line, cmdline, MAX_CMD_LEN);
   8.363      start_info->cmd_line[MAX_CMD_LEN-1] = '\0';
   8.364      unmap_pfn(pm_handle, start_info);
   8.365 @@ -248,7 +284,9 @@ static int setup_guestos(int xc_handle,
   8.366      /* shared_info page starts its life empty. */
   8.367      shared_info = map_pfn_writeable(pm_handle, shared_info_frame);
   8.368      memset(shared_info, 0, PAGE_SIZE);
   8.369 -    shared_info->evtchn_upcall_mask = ~0UL; /* mask all upcalls */
   8.370 +    /* Mask all upcalls... */
   8.371 +    for ( i = 0; i < MAX_VIRT_CPUS; i++ )
   8.372 +        shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
   8.373      unmap_pfn(pm_handle, shared_info);
   8.374  
   8.375      /* Send the page update requests down to the hypervisor. */
   8.376 @@ -258,6 +296,10 @@ static int setup_guestos(int xc_handle,
   8.377      free(mmu);
   8.378      (void)close_pfn_mapper(pm_handle);
   8.379      free(page_array);
   8.380 +
   8.381 +    *pvsi = vstartinfo_start;
   8.382 +    *pvke = vkern_entry;
   8.383 +
   8.384      return 0;
   8.385  
   8.386   error_out:
   8.387 @@ -270,6 +312,69 @@ static int setup_guestos(int xc_handle,
   8.388      return -1;
   8.389  }
   8.390  
   8.391 +static unsigned long get_filesz(int fd)
   8.392 +{
   8.393 +    u16 sig;
   8.394 +    u32 _sz = 0;
   8.395 +    unsigned long sz;
   8.396 +
   8.397 +    lseek(fd, 0, SEEK_SET);
   8.398 +    read(fd, &sig, sizeof(sig));
   8.399 +    sz = lseek(fd, 0, SEEK_END);
   8.400 +    if ( sig == 0x8b1f ) /* GZIP signature? */
   8.401 +    {
   8.402 +        lseek(fd, -4, SEEK_END);
   8.403 +        read(fd, &_sz, 4);
   8.404 +        sz = _sz;
   8.405 +    }
   8.406 +    lseek(fd, 0, SEEK_SET);
   8.407 +
   8.408 +    return sz;
   8.409 +}
   8.410 +
   8.411 +static char *read_kernel_image(const char *filename, unsigned long *size)
   8.412 +{
   8.413 +    int kernel_fd = -1;
   8.414 +    gzFile kernel_gfd = NULL;
   8.415 +    char *image = NULL;
   8.416 +    unsigned int bytes;
   8.417 +
   8.418 +    if ( (kernel_fd = open(filename, O_RDONLY)) < 0 )
   8.419 +    {
   8.420 +        PERROR("Could not open kernel image");
   8.421 +        goto out;
   8.422 +    }
   8.423 +
   8.424 +    *size = get_filesz(kernel_fd);
   8.425 +
   8.426 +    if ( (kernel_gfd = gzdopen(kernel_fd, "rb")) == NULL )
   8.427 +    {
   8.428 +        PERROR("Could not allocate decompression state for state file");
   8.429 +        goto out;
   8.430 +    }
   8.431 +
   8.432 +    if ( (image = malloc(*size)) == NULL )
   8.433 +    {
   8.434 +        PERROR("Could not allocate memory for kernel image");
   8.435 +        goto out;
   8.436 +    }
   8.437 +
   8.438 +    if ( (bytes = gzread(kernel_gfd, image, *size)) != *size )
   8.439 +    {
   8.440 +        PERROR("Error reading kernel image, could not"
   8.441 +               " read the whole image (%d != %ld).", bytes, *size);
   8.442 +        free(image);
   8.443 +        image = NULL;
   8.444 +    }
   8.445 +
   8.446 + out:
   8.447 +    if ( kernel_gfd != NULL )
   8.448 +        gzclose(kernel_gfd);
   8.449 +    else if ( kernel_fd >= 0 )
   8.450 +        close(kernel_fd);
   8.451 +    return image;
   8.452 +}
   8.453 +
   8.454  int xc_linux_build(int xc_handle,
   8.455                     u64 domid,
   8.456                     const char *image_name,
   8.457 @@ -277,67 +382,39 @@ int xc_linux_build(int xc_handle,
   8.458                     const char *cmdline)
   8.459  {
   8.460      dom0_op_t launch_op, op;
   8.461 -    unsigned long load_addr;
   8.462 -    long tot_pages;
   8.463 -    int kernel_fd = -1, initrd_fd = -1;
   8.464 -    gzFile kernel_gfd = NULL, initrd_gfd = NULL;
   8.465 +    int initrd_fd = -1;
   8.466 +    gzFile initrd_gfd = NULL;
   8.467      int rc, i;
   8.468      full_execution_context_t *ctxt;
   8.469 -    unsigned long virt_startinfo_addr;
   8.470 +    unsigned long nr_pages;
   8.471 +    char         *image = NULL;
   8.472 +    unsigned long image_size, initrd_size=0;
   8.473 +    unsigned long vstartinfo_start, vkern_entry;
   8.474  
   8.475 -    if ( (tot_pages = get_tot_pages(xc_handle, domid)) < 0 )
   8.476 +    if ( (nr_pages = get_tot_pages(xc_handle, domid)) < 0 )
   8.477      {
   8.478          PERROR("Could not find total pages for domain");
   8.479 -        return 1;
   8.480 -    }
   8.481 -
   8.482 -    kernel_fd = open(image_name, O_RDONLY);
   8.483 -    if ( kernel_fd < 0 )
   8.484 -    {
   8.485 -        PERROR("Could not open kernel image");
   8.486 -        return 1;
   8.487 -    }
   8.488 -
   8.489 -    if ( (kernel_gfd = gzdopen(kernel_fd, "rb")) == NULL )
   8.490 -    {
   8.491 -        PERROR("Could not allocate decompression state for state file");
   8.492 -        close(kernel_fd);
   8.493 -        return 1;
   8.494 -    }
   8.495 -
   8.496 -    rc = read_kernel_header(kernel_gfd,
   8.497 -                            tot_pages << (PAGE_SHIFT - 10), 
   8.498 -                            &load_addr);
   8.499 -    if ( rc < 0 )
   8.500 -        goto error_out;
   8.501 -    
   8.502 -    if ( (load_addr & (PAGE_SIZE-1)) != 0 )
   8.503 -    {
   8.504 -        ERROR("We can only deal with page-aligned load addresses");
   8.505          goto error_out;
   8.506      }
   8.507  
   8.508 -    if ( (load_addr + (tot_pages << PAGE_SHIFT)) > HYPERVISOR_VIRT_START )
   8.509 -    {
   8.510 -        ERROR("Cannot map all domain memory without hitting Xen space");
   8.511 +    if ( (image = read_kernel_image(image_name, &image_size)) == NULL )
   8.512          goto error_out;
   8.513 -    }
   8.514  
   8.515      if ( (ramdisk_name != NULL) && (strlen(ramdisk_name) != 0) )
   8.516      {
   8.517 -        initrd_fd = open(ramdisk_name, O_RDONLY);
   8.518 -        if ( initrd_fd < 0 )
   8.519 +        if ( (initrd_fd = open(ramdisk_name, O_RDONLY)) < 0 )
   8.520          {
   8.521              PERROR("Could not open the initial ramdisk image");
   8.522              goto error_out;
   8.523          }
   8.524  
   8.525 +        initrd_size = get_filesz(initrd_fd);
   8.526 +
   8.527          if ( (initrd_gfd = gzdopen(initrd_fd, "rb")) == NULL )
   8.528          {
   8.529              PERROR("Could not allocate decompression state for initrd");
   8.530              goto error_out;
   8.531          }
   8.532 -
   8.533      }
   8.534  
   8.535      op.cmd = DOM0_GETDOMAININFO;
   8.536 @@ -355,23 +432,22 @@ int xc_linux_build(int xc_handle,
   8.537          goto error_out;
   8.538      }
   8.539  
   8.540 -    if ( setup_guestos(xc_handle, domid, kernel_gfd, initrd_gfd, tot_pages,
   8.541 -                       &virt_startinfo_addr,
   8.542 -                       load_addr, &launch_op.u.builddomain, cmdline,
   8.543 +    if ( setup_guestos(xc_handle, domid, image, image_size, 
   8.544 +                       initrd_gfd, initrd_size, nr_pages, 
   8.545 +                       &vstartinfo_start, &vkern_entry,
   8.546 +                       &launch_op.u.builddomain, cmdline,
   8.547                         op.u.getdomaininfo.shared_info_frame) < 0 )
   8.548      {
   8.549          ERROR("Error constructing guest OS");
   8.550          goto error_out;
   8.551      }
   8.552  
   8.553 -    if ( kernel_fd >= 0 )
   8.554 -        close(kernel_fd);
   8.555 -    if( kernel_gfd )
   8.556 -        gzclose(kernel_gfd);
   8.557      if ( initrd_fd >= 0 )
   8.558          close(initrd_fd);
   8.559 -    if( initrd_gfd )
   8.560 +    if ( initrd_gfd )
   8.561          gzclose(initrd_gfd);
   8.562 +    if ( image != NULL )
   8.563 +        free(image);
   8.564  
   8.565      ctxt = &launch_op.u.builddomain.ctxt;
   8.566  
   8.567 @@ -392,9 +468,9 @@ int xc_linux_build(int xc_handle,
   8.568      ctxt->cpu_ctxt.gs = FLAT_GUESTOS_DS;
   8.569      ctxt->cpu_ctxt.ss = FLAT_GUESTOS_DS;
   8.570      ctxt->cpu_ctxt.cs = FLAT_GUESTOS_CS;
   8.571 -    ctxt->cpu_ctxt.eip = load_addr;
   8.572 -    ctxt->cpu_ctxt.esp = virt_startinfo_addr;
   8.573 -    ctxt->cpu_ctxt.esi = virt_startinfo_addr;
   8.574 +    ctxt->cpu_ctxt.eip = vkern_entry;
   8.575 +    ctxt->cpu_ctxt.esp = vstartinfo_start;
   8.576 +    ctxt->cpu_ctxt.esi = vstartinfo_start;
   8.577      ctxt->cpu_ctxt.eflags = (1<<9) | (1<<2);
   8.578  
   8.579      /* FPU is set up to default initial state. */
   8.580 @@ -416,7 +492,7 @@ int xc_linux_build(int xc_handle,
   8.581  
   8.582      /* Ring 1 stack is the initial stack. */
   8.583      ctxt->guestos_ss  = FLAT_GUESTOS_DS;
   8.584 -    ctxt->guestos_esp = virt_startinfo_addr;
   8.585 +    ctxt->guestos_esp = vstartinfo_start;
   8.586  
   8.587      /* No debugging. */
   8.588      memset(ctxt->debugreg, 0, sizeof(ctxt->debugreg));
   8.589 @@ -436,14 +512,152 @@ int xc_linux_build(int xc_handle,
   8.590      return rc;
   8.591  
   8.592   error_out:
   8.593 -    if ( kernel_fd >= 0 )
   8.594 -        close(kernel_fd);
   8.595 -    if( kernel_gfd )
   8.596 -        gzclose(kernel_gfd);
   8.597 -    if ( initrd_fd >= 0 )
   8.598 +    if ( initrd_gfd != NULL )
   8.599 +        gzclose(initrd_gfd);
   8.600 +    else if ( initrd_fd >= 0 )
   8.601          close(initrd_fd);
   8.602 -    if( initrd_gfd )
   8.603 -        gzclose(initrd_gfd);
   8.604 +    if ( image != NULL )
   8.605 +        free(image);
   8.606  
   8.607      return -1;
   8.608  }
   8.609 +
   8.610 +static inline int is_loadable_phdr(Elf_Phdr *phdr)
   8.611 +{
   8.612 +    return ((phdr->p_type == PT_LOAD) &&
   8.613 +            ((phdr->p_flags & (PF_W|PF_X)) != 0));
   8.614 +}
   8.615 +
   8.616 +static int readelfimage_base_and_size(char *elfbase, 
   8.617 +                                      unsigned long elfsize,
   8.618 +                                      unsigned long *pkernstart,
   8.619 +                                      unsigned long *pkernend,
   8.620 +                                      unsigned long *pkernentry)
   8.621 +{
   8.622 +    Elf_Ehdr *ehdr = (Elf_Ehdr *)elfbase;
   8.623 +    Elf_Phdr *phdr;
   8.624 +    Elf_Shdr *shdr;
   8.625 +    unsigned long kernstart = ~0UL, kernend=0UL;
   8.626 +    char *shstrtab, *guestinfo;
   8.627 +    int h;
   8.628 +
   8.629 +    if ( !IS_ELF(*ehdr) )
   8.630 +    {
   8.631 +        ERROR("Kernel image does not have an ELF header.");
   8.632 +        return -EINVAL;
   8.633 +    }
   8.634 +
   8.635 +    if ( (ehdr->e_phoff + (ehdr->e_phnum * ehdr->e_phentsize)) > elfsize )
   8.636 +    {
   8.637 +	ERROR("ELF program headers extend beyond end of image.");
   8.638 +        return -EINVAL;
   8.639 +    }
   8.640 +
   8.641 +    if ( (ehdr->e_shoff + (ehdr->e_shnum * ehdr->e_shentsize)) > elfsize )
   8.642 +    {
   8.643 +	ERROR("ELF section headers extend beyond end of image.");
   8.644 +        return -EINVAL;
   8.645 +    }
   8.646 +
   8.647 +    /* Find the section-header strings table. */
   8.648 +    if ( ehdr->e_shstrndx == SHN_UNDEF )
   8.649 +    {
   8.650 +        ERROR("ELF image has no section-header strings table (shstrtab).");
   8.651 +        return -EINVAL;
   8.652 +    }
   8.653 +    shdr = (Elf_Shdr *)(elfbase + ehdr->e_shoff + 
   8.654 +                        (ehdr->e_shstrndx*ehdr->e_shentsize));
   8.655 +    shstrtab = elfbase + shdr->sh_offset;
   8.656 +    
   8.657 +    /* Find the special '__xen_guest' section and check its contents. */
   8.658 +    for ( h = 0; h < ehdr->e_shnum; h++ )
   8.659 +    {
   8.660 +        shdr = (Elf_Shdr *)(elfbase + ehdr->e_shoff + (h*ehdr->e_shentsize));
   8.661 +        if ( strcmp(&shstrtab[shdr->sh_name], "__xen_guest") != 0 )
   8.662 +            continue;
   8.663 +        guestinfo = elfbase + shdr->sh_offset;
   8.664 +        if ( (strstr(guestinfo, "GUEST_OS=linux") == NULL) ||
   8.665 +             (strstr(guestinfo, "XEN_VER=1.3") == NULL) )
   8.666 +        {
   8.667 +            ERROR("Will only load Linux images built for Xen v1.3");
   8.668 +            ERROR("Actually saw: '%s'", guestinfo);
   8.669 +            return -EINVAL;
   8.670 +        }
   8.671 +        break;
   8.672 +    }
   8.673 +    if ( h == ehdr->e_shnum )
   8.674 +    {
   8.675 +        ERROR("Not a Xen-ELF image: '__xen_guest' section not found.");
   8.676 +        return -EINVAL;
   8.677 +    }
   8.678 +
   8.679 +    for ( h = 0; h < ehdr->e_phnum; h++ ) 
   8.680 +    {
   8.681 +        phdr = (Elf_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize));
   8.682 +        if ( !is_loadable_phdr(phdr) )
   8.683 +            continue;
   8.684 +        if ( phdr->p_vaddr < kernstart )
   8.685 +            kernstart = phdr->p_vaddr;
   8.686 +        if ( (phdr->p_vaddr + phdr->p_memsz) > kernend )
   8.687 +            kernend = phdr->p_vaddr + phdr->p_memsz;
   8.688 +    }
   8.689 +
   8.690 +    if ( (kernstart > kernend) || 
   8.691 +         (ehdr->e_entry < kernstart) || 
   8.692 +         (ehdr->e_entry > kernend) )
   8.693 +    {
   8.694 +        ERROR("Malformed ELF image.");
   8.695 +        return -EINVAL;
   8.696 +    }
   8.697 +
   8.698 +    *pkernstart = kernstart;
   8.699 +    *pkernend   = kernend;
   8.700 +    *pkernentry = ehdr->e_entry;
   8.701 +
   8.702 +    return 0;
   8.703 +}
   8.704 +
   8.705 +static int loadelfimage(char *elfbase, int pmh, unsigned long *parray,
   8.706 +                        unsigned long vstart)
   8.707 +{
   8.708 +    Elf_Ehdr *ehdr = (Elf_Ehdr *)elfbase;
   8.709 +    Elf_Phdr *phdr;
   8.710 +    int h;
   8.711 +
   8.712 +    char         *va;
   8.713 +    unsigned long pa, done, chunksz;
   8.714 +
   8.715 +    for ( h = 0; h < ehdr->e_phnum; h++ ) 
   8.716 +    {
   8.717 +        phdr = (Elf_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize));
   8.718 +        if ( !is_loadable_phdr(phdr) )
   8.719 +	    continue;
   8.720 +        
   8.721 +        for ( done = 0; done < phdr->p_filesz; done += chunksz )
   8.722 +        {
   8.723 +            pa = (phdr->p_vaddr + done) - vstart;
   8.724 +            va = map_pfn_writeable(pmh, parray[pa>>PAGE_SHIFT]);
   8.725 +            va += pa & (PAGE_SIZE-1);
   8.726 +            chunksz = phdr->p_filesz - done;
   8.727 +            if ( chunksz > (PAGE_SIZE - (pa & (PAGE_SIZE-1))) )
   8.728 +                chunksz = PAGE_SIZE - (pa & (PAGE_SIZE-1));
   8.729 +            memcpy(va, elfbase + phdr->p_offset + done, chunksz);
   8.730 +            unmap_pfn(pmh, va);
   8.731 +        }
   8.732 +
   8.733 +        for ( ; done < phdr->p_memsz; done += chunksz )
   8.734 +        {
   8.735 +            pa = (phdr->p_vaddr + done) - vstart;
   8.736 +            va = map_pfn_writeable(pmh, parray[pa>>PAGE_SHIFT]);
   8.737 +            va += pa & (PAGE_SIZE-1);
   8.738 +            chunksz = phdr->p_memsz - done;
   8.739 +            if ( chunksz > (PAGE_SIZE - (pa & (PAGE_SIZE-1))) )
   8.740 +                chunksz = PAGE_SIZE - (pa & (PAGE_SIZE-1));
   8.741 +            memset(va, 0, chunksz);
   8.742 +            unmap_pfn(pmh, va);            
   8.743 +        }
   8.744 +    }
   8.745 +
   8.746 +    return 0;
   8.747 +}
   8.748 +
     9.1 --- a/tools/xc/lib/xc_misc.c	Fri Apr 02 13:58:27 2004 +0000
     9.2 +++ b/tools/xc/lib/xc_misc.c	Fri Apr 02 14:03:03 2004 +0000
     9.3 @@ -68,3 +68,20 @@ int xc_physinfo(int xc_handle,
     9.4      return 0;
     9.5  }
     9.6  
     9.7 +
     9.8 +int xc_sched_id(int xc_handle,
     9.9 +                int *sched_id)
    9.10 +{
    9.11 +    int ret;
    9.12 +    dom0_op_t op;
    9.13 +    
    9.14 +    op.cmd = DOM0_SCHED_ID;
    9.15 +    op.interface_version = DOM0_INTERFACE_VERSION;
    9.16 +    
    9.17 +    if((ret = do_dom0_op(xc_handle, &op))) return ret;
    9.18 +    
    9.19 +    *sched_id = op.u.sched_id.sched_id;
    9.20 +    
    9.21 +    return 0;
    9.22 +}
    9.23 +
    10.1 --- a/tools/xc/lib/xc_netbsd_build.c	Fri Apr 02 13:58:27 2004 +0000
    10.2 +++ b/tools/xc/lib/xc_netbsd_build.c	Fri Apr 02 14:03:03 2004 +0000
    10.3 @@ -75,7 +75,7 @@ static int setup_guestos(int xc_handle,
    10.4      shared_info_t *shared_info;
    10.5      unsigned long ksize;
    10.6      mmu_t *mmu = NULL;
    10.7 -    int pm_handle;
    10.8 +    int pm_handle, i;
    10.9  
   10.10      memset(builddomain, 0, sizeof(*builddomain));
   10.11  
   10.12 @@ -183,7 +183,9 @@ static int setup_guestos(int xc_handle,
   10.13      /* shared_info page starts its life empty. */
   10.14      shared_info = map_pfn_writeable(pm_handle, shared_info_frame);
   10.15      memset(shared_info, 0, PAGE_SIZE);
   10.16 -    shared_info->evtchn_upcall_mask = ~0UL; /* mask all upcalls */
   10.17 +    /* Mask all upcalls... */
   10.18 +    for ( i = 0; i < MAX_VIRT_CPUS; i++ )
   10.19 +        shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
   10.20      unmap_pfn(pm_handle, shared_info);
   10.21  
   10.22      /* Send the page update requests down to the hypervisor. */
    11.1 --- a/tools/xc/lib/xc_private.h	Fri Apr 02 13:58:27 2004 +0000
    11.2 +++ b/tools/xc/lib/xc_private.h	Fri Apr 02 14:03:03 2004 +0000
    11.3 @@ -55,11 +55,12 @@ typedef unsigned long l2_pgentry_t;
    11.4  #define l2_table_offset(_a) \
    11.5            ((_a) >> L2_PAGETABLE_SHIFT)
    11.6  
    11.7 -#define ERROR(_m)  \
    11.8 -    fprintf(stderr, "ERROR: %s\n", (_m))
    11.9 +#define ERROR(_m, _a...)  \
   11.10 +    fprintf(stderr, "ERROR: " _m "\n" , ## _a )
   11.11  
   11.12 -#define PERROR(_m) \
   11.13 -    fprintf(stderr, "ERROR: %s (%d = %s)\n", (_m), errno, strerror(errno))
   11.14 +#define PERROR(_m, _a...) \
   11.15 +    fprintf(stderr, "ERROR: " _m " (%d = %s)\n" , ## _a , \
   11.16 +            errno, strerror(errno))
   11.17  
   11.18  static inline int do_privcmd(int xc_handle,
   11.19                               unsigned int cmd, 
    12.1 --- a/tools/xc/lib/xc_rrobin.c	Fri Apr 02 13:58:27 2004 +0000
    12.2 +++ b/tools/xc/lib/xc_rrobin.c	Fri Apr 02 14:03:03 2004 +0000
    12.3 @@ -11,11 +11,27 @@
    12.4  int xc_rrobin_global_set(int xc_handle, u64 slice)
    12.5  {
    12.6      dom0_op_t op;
    12.7 +    op.cmd = DOM0_SCHEDCTL;
    12.8 +    op.u.schedctl.sched_id = SCHED_RROBIN;
    12.9 +    op.u.schedctl.direction = SCHED_INFO_PUT;
   12.10 +
   12.11 +    op.u.schedctl.u.rrobin.slice = slice;
   12.12 +    return do_dom0_op(xc_handle, &op);
   12.13 +}
   12.14 +
   12.15 +
   12.16 +int xc_rrobin_global_get(int xc_handle, u64 *slice)
   12.17 +{
   12.18 +    dom0_op_t op;
   12.19 +    int ret;
   12.20  
   12.21      op.cmd = DOM0_SCHEDCTL;
   12.22      op.u.schedctl.sched_id = SCHED_RROBIN;
   12.23 +    op.u.schedctl.direction = SCHED_INFO_GET;
   12.24  
   12.25 -    op.u.schedctl.u.rrobin.slice = slice;
   12.26 +    ret = do_dom0_op(xc_handle, &op);
   12.27  
   12.28 -    return do_dom0_op(xc_handle, &op);
   12.29 +    *slice = op.u.schedctl.u.rrobin.slice;
   12.30 +
   12.31 +    return ret;
   12.32  }
    13.1 --- a/tools/xc/py/Xc.c	Fri Apr 02 13:58:27 2004 +0000
    13.2 +++ b/tools/xc/py/Xc.c	Fri Apr 02 14:03:03 2004 +0000
    13.3 @@ -281,6 +281,23 @@ static PyObject *pyxc_bvtsched_global_se
    13.4      return zero;
    13.5  }
    13.6  
    13.7 +static PyObject *pyxc_bvtsched_global_get(PyObject *self,
    13.8 +					  PyObject *args,
    13.9 +					  PyObject *kwds)
   13.10 +{
   13.11 +    XcObject *xc = (XcObject *)self;
   13.12 +    
   13.13 +    unsigned long ctx_allow;
   13.14 +    
   13.15 +    if ( !PyArg_ParseTuple(args, "") )
   13.16 +        return NULL;
   13.17 +    
   13.18 +    if ( xc_bvtsched_global_get(xc->xc_handle, &ctx_allow) != 0 )
   13.19 +        return PyErr_SetFromErrno(xc_error);
   13.20 +    
   13.21 +    return Py_BuildValue("s:l", "ctx_allow", ctx_allow);
   13.22 +}
   13.23 +
   13.24  static PyObject *pyxc_bvtsched_domain_set(PyObject *self,
   13.25                                            PyObject *args,
   13.26                                            PyObject *kwds)
   13.27 @@ -305,6 +322,31 @@ static PyObject *pyxc_bvtsched_domain_se
   13.28      return zero;
   13.29  }
   13.30  
   13.31 +static PyObject *pyxc_bvtsched_domain_get(PyObject *self,
   13.32 +                                          PyObject *args,
   13.33 +                                          PyObject *kwds)
   13.34 +{
   13.35 +    XcObject *xc = (XcObject *)self;
   13.36 +    u64 dom;
   13.37 +    unsigned long mcuadv, warp, warpl, warpu;
   13.38 +    
   13.39 +    static char *kwd_list[] = { "dom", NULL };
   13.40 +
   13.41 +    if ( !PyArg_ParseTupleAndKeywords(args, kwds, "L", kwd_list, &dom) )
   13.42 +        return NULL;
   13.43 +    
   13.44 +    if ( xc_bvtsched_domain_get(xc->xc_handle, dom, &mcuadv, &warp,
   13.45 +                                &warpl, &warpu) != 0 )
   13.46 +        return PyErr_SetFromErrno(xc_error);
   13.47 +
   13.48 +    return Py_BuildValue("{s:L,s:l,s:l,s:l,s:l}",
   13.49 +                         "domain", dom,
   13.50 +                         "mcuadv", mcuadv,
   13.51 +                         "warp",   warp,
   13.52 +                         "warpl",  warpl,
   13.53 +                         "warpu",  warpu);
   13.54 +}
   13.55 +
   13.56  static PyObject *pyxc_vif_scheduler_set(PyObject *self,
   13.57                                          PyObject *args,
   13.58                                          PyObject *kwds)
   13.59 @@ -879,22 +921,52 @@ static PyObject *pyxc_atropos_domain_set
   13.60                                           PyObject *kwds)
   13.61  {
   13.62      XcObject *xc = (XcObject *)self;
   13.63 +    u64 domid;
   13.64 +    u64 period, slice, latency;
   13.65      int xtratime;
   13.66 -    u64 domid;
   13.67  
   13.68 -    static char *kwd_list[] = { "dom", "xtratime", NULL };
   13.69 +    static char *kwd_list[] = { "dom", "period", "slice", "latency",
   13.70 +				"xtratime", NULL };
   13.71      
   13.72 -    if( !PyArg_ParseTupleAndKeywords(args, kwds, "Li", kwd_list, &domid,
   13.73 -                                     &xtratime) )
   13.74 +    if( !PyArg_ParseTupleAndKeywords(args, kwds, "LLLLi", kwd_list, &domid,
   13.75 +                                     &period, &slice, &latency, &xtratime) )
   13.76          return NULL;
   13.77     
   13.78 -    if ( xc_atropos_domain_set(xc->xc_handle, domid, xtratime) != 0 )
   13.79 +    if ( xc_atropos_domain_set(xc->xc_handle, domid, period, slice,
   13.80 +			       latency, xtratime) != 0 )
   13.81          return PyErr_SetFromErrno(xc_error);
   13.82  
   13.83      Py_INCREF(zero);
   13.84      return zero;
   13.85  }
   13.86  
   13.87 +static PyObject *pyxc_atropos_domain_get(PyObject *self,
   13.88 +                                         PyObject *args,
   13.89 +                                         PyObject *kwds)
   13.90 +{
   13.91 +    XcObject *xc = (XcObject *)self;
   13.92 +    u64 domid;
   13.93 +    u64 period, slice, latency;
   13.94 +    int xtratime;
   13.95 +    
   13.96 +    static char *kwd_list[] = { "dom", NULL };
   13.97 +
   13.98 +    if( !PyArg_ParseTupleAndKeywords(args, kwds, "L", kwd_list, &domid) )
   13.99 +        return NULL;
  13.100 +    
  13.101 +    if ( xc_atropos_domain_get( xc->xc_handle, domid, &period,
  13.102 +                                &slice, &latency, &xtratime ) )
  13.103 +        return PyErr_SetFromErrno(xc_error);
  13.104 +
  13.105 +    return Py_BuildValue("{s:L,s:L,s:L,s:L,s:i}",
  13.106 +                         "domain",  domid,
  13.107 +                         "period",  period,
  13.108 +                         "slice",   slice,
  13.109 +                         "latency", latency,
  13.110 +                         "xtratime", xtratime);
  13.111 +}
  13.112 +
  13.113 +
  13.114  static PyObject *pyxc_rrobin_global_set(PyObject *self,
  13.115                                          PyObject *args,
  13.116                                          PyObject *kwds)
  13.117 @@ -936,6 +1008,22 @@ static PyObject *pyxc_shadow_control(PyO
  13.118      return zero;
  13.119  }
  13.120  
  13.121 +static PyObject *pyxc_rrobin_global_get(PyObject *self,
  13.122 +                                        PyObject *args,
  13.123 +                                        PyObject *kwds)
  13.124 +{
  13.125 +    XcObject *xc = (XcObject *)self;
  13.126 +    u64 slice;
  13.127 +
  13.128 +    if ( !PyArg_ParseTuple(args, "") )
  13.129 +        return NULL;
  13.130 +
  13.131 +    if ( xc_rrobin_global_get(xc->xc_handle, &slice) != 0 )
  13.132 +        return PyErr_SetFromErrno(xc_error);
  13.133 +    
  13.134 +    return Py_BuildValue("s:L", "slice", slice);
  13.135 +}
  13.136 +
  13.137  
  13.138  static PyMethodDef pyxc_methods[] = {
  13.139      { "domain_create", 
  13.140 @@ -1037,6 +1125,13 @@ static PyMethodDef pyxc_methods[] = {
  13.141        " ctx_allow [int]: Minimal guaranteed quantum (I think!).\n\n"
  13.142        "Returns: [int] 0 on success; -1 on error.\n" },
  13.143  
  13.144 +    { "bvtsched_global_get",
  13.145 +      (PyCFunction)pyxc_bvtsched_global_get,
  13.146 +      METH_KEYWORDS, "\n"
  13.147 +      "Get global tuning parameters for BVT scheduler.\n"
  13.148 +      "Returns: [dict]:\n"
  13.149 +      " ctx_allow [int]: context switch allowance\n" },
  13.150 +
  13.151      { "bvtsched_domain_set",
  13.152        (PyCFunction)pyxc_bvtsched_domain_set,
  13.153        METH_VARARGS | METH_KEYWORDS, "\n"
  13.154 @@ -1048,21 +1143,56 @@ static PyMethodDef pyxc_methods[] = {
  13.155        " warpu  [int]:  Internal BVT parameter.\n\n"
  13.156        "Returns: [int] 0 on success; -1 on error.\n" },
  13.157  
  13.158 +    { "bvtsched_domain_get",
  13.159 +      (PyCFunction)pyxc_bvtsched_domain_get,
  13.160 +      METH_KEYWORDS, "\n"
  13.161 +      "Get per-domain tuning parameters under the BVT scheduler.\n"
  13.162 +      " dom [long]: Identifier of domain to be queried.\n"
  13.163 +      "Returns [dict]:\n"
  13.164 +      " domain [long]: Domain ID.\n"
  13.165 +      " mcuadv [long]: MCU Advance.\n"
  13.166 +      " warp   [long]: Warp.\n"
  13.167 +      " warpu  [long]:\n"
  13.168 +      " warpl  [long]: Warp limit,\n"
  13.169 +    },
  13.170 +
  13.171      { "atropos_domain_set",
  13.172        (PyCFunction)pyxc_atropos_domain_set,
  13.173 -      METH_VARARGS | METH_KEYWORDS, "\n"
  13.174 -      "Set the extra time flag for a domain when running with Atropos.\n"
  13.175 -      " dom [long]: domain to set\n"
  13.176 +      METH_KEYWORDS, "\n"
  13.177 +      "Set the scheduling parameters for a domain when running with Atropos.\n"
  13.178 +      " dom      [long]: domain to set\n"
  13.179 +      " period   [long]: domain's scheduling period\n"
  13.180 +      " slice    [long]: domain's slice per period\n"
  13.181 +      " latency  [long]: wakeup latency hint\n"
  13.182        " xtratime [int]: boolean\n"
  13.183        "Returns: [int] 0 on success; -1 on error.\n" },
  13.184  
  13.185 +    { "atropos_domain_get",
  13.186 +      (PyCFunction)pyxc_atropos_domain_get,
  13.187 +      METH_KEYWORDS, "\n"
  13.188 +      "Get the current scheduling parameters for a domain when running with\n"
  13.189 +      "the Atropos scheduler."
  13.190 +      " dom      [long]: domain to query\n"
  13.191 +      "Returns:  [dict]\n"
  13.192 +      " domain   [long]: domain ID\n"
  13.193 +      " period   [long]: scheduler period\n"
  13.194 +      " slice    [long]: CPU reservation per period\n"
  13.195 +      " latency  [long]: unblocking latency hint\n"
  13.196 +      " xtratime [int] : 0 if not using slack time, nonzero otherwise\n" },
  13.197 +
  13.198      { "rrobin_global_set",
  13.199        (PyCFunction)pyxc_rrobin_global_set,
  13.200        METH_KEYWORDS, "\n"
  13.201        "Set Round Robin scheduler slice.\n"
  13.202        " slice [long]: Round Robin scheduler slice\n"
  13.203 -      "Returns: [int] 0 on success, throws an exception on failure\n"
  13.204 -    },
  13.205 +      "Returns: [int] 0 on success, throws an exception on failure\n" },
  13.206 +
  13.207 +    { "rrobin_global_get",
  13.208 +      (PyCFunction)pyxc_rrobin_global_get,
  13.209 +      METH_KEYWORDS, "\n"
  13.210 +      "Get Round Robin scheduler settings\n"
  13.211 +      "Returns [dict]:\n"
  13.212 +      " slice  [long]: Scheduler time slice.\n" },    
  13.213  
  13.214      { "vif_scheduler_set", 
  13.215        (PyCFunction)pyxc_vif_scheduler_set, 
    14.1 --- a/tools/xend/lib/main.py	Fri Apr 02 13:58:27 2004 +0000
    14.2 +++ b/tools/xend/lib/main.py	Fri Apr 02 14:03:03 2004 +0000
    14.3 @@ -41,8 +41,7 @@ def daemon_loop():
    14.4  
    14.5      # Interface via which we receive event notifications from other guest
    14.6      # OSes. This interface also allows us to clear/acknowledge outstanding
    14.7 -    # notifications --- successive notifications for the same channel are
    14.8 -    # dropped until the first notification is cleared.
    14.9 +    # notifications.
   14.10      notifier = xend.utils.notifier()
   14.11  
   14.12      ##
   14.13 @@ -169,56 +168,48 @@ def daemon_loop():
   14.14                  break
   14.15              (idx, type) = notification
   14.16  
   14.17 +            if not control_list.has_key(idx):
   14.18 +                continue
   14.19 +
   14.20 +            (port, rbuf, wbuf, con_if) = control_list[idx]
   14.21 +            work_done = False
   14.22 +
   14.23              # If we pick up a disconnect notification then we do any necessary
   14.24 -            # cleanup, even if the event channel doesn't belong to us.
   14.25 -            # This is intended to prevent the event-channel port space from
   14.26 -            # getting clogged with stale connections.
   14.27 -            if type == notifier.DISCONNECT:
   14.28 +            # cleanup.
   14.29 +            if type == notifier.EXCEPTION:
   14.30                  ret = xc.evtchn_status(idx)
   14.31 -                if ret['status'] == 'interdomain':
   14.32 -                    notifier.clear(idx, notifier.NORMAL)
   14.33 -                    notifier.clear(idx, notifier.DISCONNECT)
   14.34 -                    if control_list.has_key(idx):
   14.35 -                        (port, rbuf, wbuf, con_if) =  control_list[idx]
   14.36 -                        con_if.close()
   14.37 -                        del control_list[idx], port, rbuf, wbuf, con_if
   14.38 -                elif ret['status'] == 'unbound':
   14.39 -                    # There's noone to do the closure for us...
   14.40 -                    xc.evtchn_close(idx)
   14.41 -
   14.42 -            # A standard notification: probably means there are messages to
   14.43 -            # read or that there is space to write messages.
   14.44 -            elif type == notifier.NORMAL and control_list.has_key(idx):
   14.45 -                (port, rbuf, wbuf, con_if) = control_list[idx]
   14.46 -                work_done = False
   14.47 +                if ret['status'] == 'unbound':
   14.48 +                    notifier.unbind(idx)
   14.49 +                    con_if.close()
   14.50 +                    del control_list[idx], port, rbuf, wbuf, con_if
   14.51 +                    continue
   14.52  
   14.53 -                # We clear the notification before doing any work, to avoid
   14.54 -                # races.
   14.55 -                notifier.clear(idx, notifier.NORMAL)
   14.56 +            # Read incoming requests. Currently assume that request
   14.57 +            # message always containb console data.
   14.58 +            while port.request_to_read():
   14.59 +                msg = port.read_request()
   14.60 +                rbuf.write(msg.get_payload())
   14.61 +                port.write_response(msg)
   14.62 +                work_done = True
   14.63  
   14.64 -                # Read incoming requests. Currently assume that request
   14.65 -                # message always containb console data.
   14.66 -                while port.request_to_read():
   14.67 -                    msg = port.read_request()
   14.68 -                    rbuf.write(msg.get_payload())
   14.69 -                    port.write_response(msg)
   14.70 -                    work_done = True
   14.71 +            # Incoming responses are currently thrown on the floor.
   14.72 +            while port.response_to_read():
   14.73 +                msg = port.read_response()
   14.74 +                work_done = True
   14.75  
   14.76 -                # Incoming responses are currently thrown on the floor.
   14.77 -                while port.response_to_read():
   14.78 -                    msg = port.read_response()
   14.79 -                    work_done = True
   14.80 +            # Send as much pending console data as there is room for.
   14.81 +            while not wbuf.empty() and port.space_to_write_request():
   14.82 +                msg = xend.utils.message(0, 0, 0)
   14.83 +                msg.append_payload(wbuf.read(msg.MAX_PAYLOAD))
   14.84 +                port.write_request(msg)
   14.85 +                work_done = True
   14.86  
   14.87 -                # Send as much pending console data as there is room for.
   14.88 -                while not wbuf.empty() and port.space_to_write_request():
   14.89 -                    msg = xend.utils.message(0, 0, 0)
   14.90 -                    msg.append_payload(wbuf.read(msg.MAX_PAYLOAD))
   14.91 -                    port.write_request(msg)
   14.92 -                    work_done = True
   14.93 +            # Finally, notify the remote end of any work that we did.
   14.94 +            if work_done:
   14.95 +                port.notify()
   14.96  
   14.97 -                # Finally, notify the remote end of any work that we did.
   14.98 -                if work_done:
   14.99 -                    port.notify()
  14.100 +            # Unmask notifications for this port.
  14.101 +            notifier.unmask(idx)
  14.102  
  14.103  
  14.104  
    15.1 --- a/tools/xend/lib/manager.py	Fri Apr 02 13:58:27 2004 +0000
    15.2 +++ b/tools/xend/lib/manager.py	Fri Apr 02 14:03:03 2004 +0000
    15.3 @@ -14,10 +14,9 @@ import xend.console, xend.main, xend.uti
    15.4  ##  automatically allocated.
    15.5  ##
    15.6  def new_control_interface(dom, console_port=-1):
    15.7 -    # Allocate an event channel. Clear pending notifications.
    15.8 +    # Allocate an event channel and binbd to it.
    15.9      port = xend.utils.port(dom)
   15.10 -    xend.main.notifier.clear(port.local_port, xend.main.notifier.NORMAL)
   15.11 -    xend.main.notifier.clear(port.local_port, xend.main.notifier.DISCONNECT)
   15.12 +    xend.main.notifier.bind(port.local_port)
   15.13      
   15.14      # If necessary, compute a suitable TCP port for console I/O.
   15.15      if console_port < 0:
    16.1 --- a/tools/xend/lib/utils.c	Fri Apr 02 13:58:27 2004 +0000
    16.2 +++ b/tools/xend/lib/utils.c	Fri Apr 02 14:03:03 2004 +0000
    16.3 @@ -33,9 +33,15 @@
    16.4  #define EVTCHN_DEV_MAJOR 10
    16.5  #define EVTCHN_DEV_MINOR 200
    16.6  #define PORT_NORMAL     0x0000   /* A standard event notification.      */ 
    16.7 -#define PORT_DISCONNECT 0x8000   /* A port-disconnect notification.     */
    16.8 +#define PORT_EXCEPTION  0x8000   /* An exceptional notification.        */
    16.9  #define PORTIDX_MASK    0x7fff   /* Strip subtype to obtain port index. */
   16.10 -#define EVTCHN_RESET _IO('E', 1) /* Clear notification buffer. Clear errors. */
   16.11 +/* /dev/xen/evtchn ioctls: */
   16.12 +/* EVTCHN_RESET: Clear and reinit the event buffer. Clear error condition. */
   16.13 +#define EVTCHN_RESET  _IO('E', 1)
   16.14 +/* EVTCHN_BIND: Bind to teh specified event-channel port. */
   16.15 +#define EVTCHN_BIND   _IO('E', 2)
   16.16 +/* EVTCHN_UNBIND: Unbind from the specified event-channel port. */
   16.17 +#define EVTCHN_UNBIND _IO('E', 3)
   16.18  
   16.19  /* Size of a machine page frame. */
   16.20  #define PAGE_SIZE 4096
   16.21 @@ -76,18 +82,48 @@ static PyObject *xu_notifier_read(PyObje
   16.22      return Py_None;
   16.23  }
   16.24  
   16.25 -static PyObject *xu_notifier_clear(PyObject *self, PyObject *args)
   16.26 +static PyObject *xu_notifier_unmask(PyObject *self, PyObject *args)
   16.27  {
   16.28      xu_notifier_object *xun = (xu_notifier_object *)self;
   16.29      u16 v;
   16.30 -    int idx, type;
   16.31 +    int idx;
   16.32  
   16.33 -    if ( !PyArg_ParseTuple(args, "ii", &idx, &type) )
   16.34 +    if ( !PyArg_ParseTuple(args, "i", &idx) )
   16.35          return NULL;
   16.36 +
   16.37 +    v = (u16)idx;
   16.38      
   16.39 -    v = (u16)idx | (u16)type;
   16.40 +    (void)write(xun->evtchn_fd, &v, sizeof(v));
   16.41 +
   16.42 +    Py_INCREF(Py_None);
   16.43 +    return Py_None;
   16.44 +}
   16.45 +
   16.46 +static PyObject *xu_notifier_bind(PyObject *self, PyObject *args)
   16.47 +{
   16.48 +    xu_notifier_object *xun = (xu_notifier_object *)self;
   16.49 +    int idx;
   16.50 +
   16.51 +    if ( !PyArg_ParseTuple(args, "i", &idx) )
   16.52 +        return NULL;
   16.53  
   16.54 -    (void)write(xun->evtchn_fd, &v, sizeof(v));
   16.55 +    if ( ioctl(xun->evtchn_fd, EVTCHN_BIND, idx) != 0 )
   16.56 +        return PyErr_SetFromErrno(PyExc_IOError);
   16.57 +
   16.58 +    Py_INCREF(Py_None);
   16.59 +    return Py_None;
   16.60 +}
   16.61 +
   16.62 +static PyObject *xu_notifier_unbind(PyObject *self, PyObject *args)
   16.63 +{
   16.64 +    xu_notifier_object *xun = (xu_notifier_object *)self;
   16.65 +    int idx;
   16.66 +
   16.67 +    if ( !PyArg_ParseTuple(args, "i", &idx) )
   16.68 +        return NULL;
   16.69 +
   16.70 +    if ( ioctl(xun->evtchn_fd, EVTCHN_UNBIND, idx) != 0 )
   16.71 +        return PyErr_SetFromErrno(PyExc_IOError);
   16.72  
   16.73      Py_INCREF(Py_None);
   16.74      return Py_None;
   16.75 @@ -105,10 +141,20 @@ static PyMethodDef xu_notifier_methods[]
   16.76        METH_VARARGS,
   16.77        "Read a (@port, @type) pair.\n" },
   16.78  
   16.79 -    { "clear", 
   16.80 -      (PyCFunction)xu_notifier_clear,
   16.81 +    { "unmask", 
   16.82 +      (PyCFunction)xu_notifier_unmask,
   16.83        METH_VARARGS,
   16.84 -      "Clear a (@port, @type) pair.\n" },
   16.85 +      "Unmask notifications for a @port.\n" },
   16.86 +
   16.87 +    { "bind", 
   16.88 +      (PyCFunction)xu_notifier_bind,
   16.89 +      METH_VARARGS,
   16.90 +      "Get notifications for a @port.\n" },
   16.91 +
   16.92 +    { "unbind", 
   16.93 +      (PyCFunction)xu_notifier_unbind,
   16.94 +      METH_VARARGS,
   16.95 +      "No longer get notifications for a @port.\n" },
   16.96  
   16.97      { "fileno", 
   16.98        (PyCFunction)xu_notifier_fileno,
   16.99 @@ -147,8 +193,8 @@ static PyObject *xu_notifier_new(PyObjec
  16.100  
  16.101  static PyObject *xu_notifier_getattr(PyObject *obj, char *name)
  16.102  {
  16.103 -    if ( strcmp(name, "DISCONNECT") == 0 )
  16.104 -        return PyInt_FromLong(PORT_DISCONNECT);
  16.105 +    if ( strcmp(name, "EXCEPTION") == 0 )
  16.106 +        return PyInt_FromLong(PORT_EXCEPTION);
  16.107      if ( strcmp(name, "NORMAL") == 0 )
  16.108          return PyInt_FromLong(PORT_NORMAL);
  16.109      return Py_FindMethod(xu_notifier_methods, obj, name);
    17.1 --- a/xen/Rules.mk	Fri Apr 02 13:58:27 2004 +0000
    17.2 +++ b/xen/Rules.mk	Fri Apr 02 14:03:03 2004 +0000
    17.3 @@ -1,6 +1,8 @@
    17.4  COMPILE_ARCH := $(shell uname -m | sed -e s/i.86/i386/)
    17.5  TARGET_ARCH  ?= $(COMPILE_ARCH)
    17.6  
    17.7 +nodev ?= n
    17.8 +
    17.9  TARGET  := $(BASEDIR)/xen
   17.10  HDRS    := $(wildcard $(BASEDIR)/include/xen/*.h)
   17.11  HDRS    += $(wildcard $(BASEDIR)/include/scsi/*.h)
   17.12 @@ -20,11 +22,13 @@ ALL_OBJS += $(BASEDIR)/net/network.o
   17.13  ALL_OBJS += $(BASEDIR)/drivers/char/driver.o
   17.14  ALL_OBJS += $(BASEDIR)/drivers/pci/driver.o
   17.15  ALL_OBJS += $(BASEDIR)/drivers/net/driver.o
   17.16 +ifneq ($(nodev),y)
   17.17  ALL_OBJS += $(BASEDIR)/drivers/block/driver.o
   17.18  ALL_OBJS += $(BASEDIR)/drivers/cdrom/driver.o
   17.19  ALL_OBJS += $(BASEDIR)/drivers/ide/driver.o
   17.20  ALL_OBJS += $(BASEDIR)/drivers/scsi/driver.o
   17.21  ALL_OBJS += $(BASEDIR)/drivers/message/fusion/driver.o
   17.22 +endif
   17.23  ALL_OBJS += $(BASEDIR)/arch/$(TARGET_ARCH)/arch.o
   17.24  
   17.25  HOSTCC     = gcc
   17.26 @@ -32,6 +36,11 @@ HOSTCFLAGS = -Wall -Wstrict-prototypes -
   17.27  
   17.28  include $(BASEDIR)/arch/$(TARGET_ARCH)/Rules.mk
   17.29  
   17.30 +ifeq ($(nodev),y)
   17.31 +CFLAGS += -DNO_DEVICES_IN_XEN
   17.32 +CFLAGS := $(subst -Werror,,$(CFLAGS))
   17.33 +endif
   17.34 +
   17.35  %.o: %.c $(HDRS) Makefile
   17.36  	$(CC) $(CFLAGS) -c $< -o $@
   17.37  
    18.1 --- a/xen/arch/i386/Rules.mk	Fri Apr 02 13:58:27 2004 +0000
    18.2 +++ b/xen/arch/i386/Rules.mk	Fri Apr 02 14:03:03 2004 +0000
    18.3 @@ -7,8 +7,8 @@ LD := ld
    18.4  MONITOR_BASE := 0xFC500000
    18.5  # Bootloader should load monitor to this real address
    18.6  LOAD_BASE    := 0x00100000
    18.7 -CFLAGS  := -nostdinc -fno-builtin -fno-common -fno-strict-aliasing 
    18.8 -CFLAGS  += -iwithprefix include -O3 -Wall -Werror -DMONITOR_BASE=$(MONITOR_BASE)
    18.9 +CFLAGS  := -nostdinc -fno-builtin -fno-common -fno-strict-aliasing -O3
   18.10 +CFLAGS  += -iwithprefix include -Wall -Werror -DMONITOR_BASE=$(MONITOR_BASE)
   18.11  CFLAGS  += -fomit-frame-pointer -I$(BASEDIR)/include -D__KERNEL__ -DNDEBUG
   18.12  #CFLAGS  += -fomit-frame-pointer -I$(BASEDIR)/include -D__KERNEL__
   18.13  CFLAGS  += -Wno-pointer-arith -Wredundant-decls -m32
    19.1 --- a/xen/arch/i386/entry.S	Fri Apr 02 13:58:27 2004 +0000
    19.2 +++ b/xen/arch/i386/entry.S	Fri Apr 02 14:03:03 2004 +0000
    19.3 @@ -112,8 +112,8 @@ FAILSAFE_SEL    = 32
    19.4  FAILSAFE_ADDR   = 36
    19.5  
    19.6  /* Offsets in shared_info_t */
    19.7 -UPCALL_PENDING  = 0
    19.8 -UPCALL_MASK     = 4
    19.9 +#define UPCALL_PENDING /* 0 */
   19.10 +#define UPCALL_MASK       1
   19.11  
   19.12  /* Offsets in guest_trap_bounce */
   19.13  GTB_ERROR_CODE   =  0
   19.14 @@ -368,12 +368,11 @@ test_all_events:
   19.15          jnz  process_hyp_events
   19.16  /*test_guest_events:*/
   19.17          movl SHARED_INFO(%ebx),%eax
   19.18 -        movl UPCALL_MASK(%eax),%ecx
   19.19 -        notl %ecx
   19.20 -        andl UPCALL_PENDING(%eax),%ecx  # ECX = pending & ~mask
   19.21 -        andl $1,%ecx                    # Is bit 0 pending and not masked?
   19.22 +        testb $0xFF,UPCALL_MASK(%eax)
   19.23 +        jnz  restore_all_guest
   19.24 +        testb $0xFF,UPCALL_PENDING(%eax)
   19.25          jz   restore_all_guest
   19.26 -        lock btsl $0,UPCALL_MASK(%eax)  # Upcalls are masked during delivery
   19.27 +        movb $1,UPCALL_MASK(%eax)       # Upcalls are masked during delivery
   19.28  /*process_guest_events:*/
   19.29          movzwl PROCESSOR(%ebx),%edx
   19.30          shl  $4,%edx                    # sizeof(guest_trap_bounce) == 16
   19.31 @@ -712,7 +711,11 @@ ENTRY(hypervisor_call_table)
   19.32          .long SYMBOL_NAME(do_sched_op)
   19.33          .long SYMBOL_NAME(do_dom0_op)
   19.34          .long SYMBOL_NAME(do_network_op)         /* 10 */
   19.35 +#ifndef NO_DEVICES_IN_XEN
   19.36          .long SYMBOL_NAME(do_block_io_op)
   19.37 +#else
   19.38 +        .long SYMBOL_NAME(do_ni_syscall)
   19.39 +#endif
   19.40          .long SYMBOL_NAME(do_set_debugreg)
   19.41          .long SYMBOL_NAME(do_get_debugreg)
   19.42          .long SYMBOL_NAME(do_update_descriptor)
    20.1 --- a/xen/arch/i386/io_apic.c	Fri Apr 02 13:58:27 2004 +0000
    20.2 +++ b/xen/arch/i386/io_apic.c	Fri Apr 02 14:03:03 2004 +0000
    20.3 @@ -138,14 +138,10 @@ static void __init replace_pin_at_irq(un
    20.4  	static void name##_IO_APIC_irq (unsigned int irq)		\
    20.5  	__DO_ACTION(R, ACTION, FINAL)
    20.6  
    20.7 -DO_ACTION( __mask,             0, |= 0x00010000, io_apic_sync(entry->apic) )
    20.8 -						/* mask = 1 */
    20.9 -DO_ACTION( __unmask,           0, &= 0xfffeffff, )
   20.10 -						/* mask = 0 */
   20.11 -DO_ACTION( __mask_and_edge,    0, = (reg & 0xffff7fff) | 0x00010000, )
   20.12 -						/* mask = 1, trigger = 0 */
   20.13 -DO_ACTION( __unmask_and_level, 0, = (reg & 0xfffeffff) | 0x00008000, )
   20.14 -						/* mask = 0, trigger = 1 */
   20.15 +DO_ACTION( __mask,    0, |= 0x00010000, io_apic_sync(entry->apic) )
   20.16 +DO_ACTION( __unmask,  0, &= 0xfffeffff, )
   20.17 +DO_ACTION( __edge,    0, &= 0xffff7fff, )
   20.18 +DO_ACTION( __level,   0, |= 0x00008000, )
   20.19  
   20.20  static void mask_IO_APIC_irq (unsigned int irq)
   20.21  {
   20.22 @@ -1365,13 +1361,15 @@ static unsigned int startup_level_ioapic
   20.23  	return 0; /* don't check for pending */
   20.24  }
   20.25  
   20.26 -static void end_level_ioapic_irq (unsigned int irq)
   20.27 +static void mask_and_ack_level_ioapic_irq(unsigned int irq)
   20.28  {
   20.29  	unsigned long v;
   20.30  	int i;
   20.31  
   20.32  	balance_irq(irq);
   20.33  
   20.34 +	mask_IO_APIC_irq(irq);
   20.35 +
   20.36  /*
   20.37   * It appears there is an erratum which affects at least version 0x11
   20.38   * of I/O APIC (that's the 82093AA and cores integrated into various
   20.39 @@ -1405,7 +1403,7 @@ static void end_level_ioapic_irq (unsign
   20.40  		atomic_inc(&irq_mis_count);
   20.41  #endif
   20.42  		spin_lock(&ioapic_lock);
   20.43 -		__mask_and_edge_IO_APIC_irq(irq);
   20.44 +		__edge_IO_APIC_irq(irq);
   20.45  #ifdef APIC_LOCKUP_DEBUG
   20.46  		for (entry = irq_2_pin + irq;;) {
   20.47  			unsigned int reg;
   20.48 @@ -1421,12 +1419,15 @@ static void end_level_ioapic_irq (unsign
   20.49  			entry = irq_2_pin + entry->next;
   20.50  		}
   20.51  #endif
   20.52 -		__unmask_and_level_IO_APIC_irq(irq);
   20.53 +		__level_IO_APIC_irq(irq);
   20.54  		spin_unlock(&ioapic_lock);
   20.55  	}
   20.56  }
   20.57  
   20.58 -static void mask_and_ack_level_ioapic_irq (unsigned int irq) { /* nothing */ }
   20.59 +static void end_level_ioapic_irq(unsigned int irq)
   20.60 +{
   20.61 +	unmask_IO_APIC_irq(irq);
   20.62 +}
   20.63  
   20.64  static inline void init_IO_APIC_traps(void)
   20.65  {
    21.1 --- a/xen/arch/i386/irq.c	Fri Apr 02 13:58:27 2004 +0000
    21.2 +++ b/xen/arch/i386/irq.c	Fri Apr 02 14:03:03 2004 +0000
    21.3 @@ -24,6 +24,7 @@
    21.4  #include <xen/interrupt.h>
    21.5  #include <xen/irq.h>
    21.6  #include <xen/slab.h>
    21.7 +#include <xen/event.h>
    21.8  #include <asm/mpspec.h>
    21.9  #include <asm/io_apic.h>
   21.10  #include <asm/msr.h>
   21.11 @@ -66,6 +67,8 @@ irq_desc_t irq_desc[NR_IRQS] __cacheline
   21.12  unsigned long irq_affinity [NR_IRQS] = { [0 ... NR_IRQS-1] = ~0UL };
   21.13  #endif
   21.14  
   21.15 +static void __do_IRQ_guest(int irq);
   21.16 +
   21.17  /*
   21.18   * Special irq handlers.
   21.19   */
   21.20 @@ -333,7 +336,9 @@ void __global_restore_flags(unsigned lon
   21.21   * waste of time and is not what some drivers would
   21.22   * prefer.
   21.23   */
   21.24 -int handle_IRQ_event(unsigned int irq, struct pt_regs * regs, struct irqaction * action)
   21.25 +static int handle_IRQ_event(unsigned int irq, 
   21.26 +                            struct pt_regs * regs, 
   21.27 +                            struct irqaction * action)
   21.28  {
   21.29      int status;
   21.30      int cpu = smp_processor_id();
   21.31 @@ -483,6 +488,7 @@ asmlinkage unsigned int do_IRQ(struct pt
   21.32  
   21.33      spin_lock(&desc->lock);
   21.34      desc->handler->ack(irq);
   21.35 +
   21.36      /*
   21.37        REPLAY is when Linux resends an IRQ that was dropped earlier
   21.38        WAITING is used by probe to mark irqs that are being tested
   21.39 @@ -490,6 +496,14 @@ asmlinkage unsigned int do_IRQ(struct pt
   21.40      status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING);
   21.41      status |= IRQ_PENDING; /* we _want_ to handle it */
   21.42  
   21.43 +    /* We hook off guest-bound IRQs for special handling. */
   21.44 +    if ( status & IRQ_GUEST )
   21.45 +    {
   21.46 +        __do_IRQ_guest(irq);
   21.47 +        spin_unlock(&desc->lock);
   21.48 +        return 1;
   21.49 +    }
   21.50 +
   21.51      /*
   21.52       * If the IRQ is disabled for whatever reason, we cannot use the action we 
   21.53       * have.
   21.54 @@ -883,6 +897,13 @@ int setup_irq(unsigned int irq, struct i
   21.55       * The following block of code has to be executed atomically
   21.56       */
   21.57      spin_lock_irqsave(&desc->lock,flags);
   21.58 +
   21.59 +    if ( desc->status & IRQ_GUEST )
   21.60 +    {
   21.61 +        spin_unlock_irqrestore(&desc->lock,flags);
   21.62 +        return -EBUSY;
   21.63 +    }
   21.64 +
   21.65      p = &desc->action;
   21.66      if ((old = *p) != NULL) {
   21.67          /* Can't share interrupts unless both agree to */
   21.68 @@ -906,7 +927,164 @@ int setup_irq(unsigned int irq, struct i
   21.69          desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT | IRQ_WAITING);
   21.70          desc->handler->startup(irq);
   21.71      }
   21.72 +
   21.73      spin_unlock_irqrestore(&desc->lock,flags);
   21.74  
   21.75      return 0;
   21.76  }
   21.77 +
   21.78 +
   21.79 +
   21.80 +/*
   21.81 + * HANDLING OF GUEST-BOUND PHYSICAL IRQS
   21.82 + */
   21.83 +
   21.84 +#define IRQ_MAX_GUESTS 7
   21.85 +typedef struct {
   21.86 +    u8 nr_guests;
   21.87 +    u8 in_flight;
   21.88 +    u8 shareable;
   21.89 +    struct task_struct *guest[IRQ_MAX_GUESTS];
   21.90 +} irq_guest_action_t;
   21.91 +
   21.92 +static void __do_IRQ_guest(int irq)
   21.93 +{
   21.94 +    irq_desc_t *desc = &irq_desc[irq];
   21.95 +    irq_guest_action_t *action = (irq_guest_action_t *)desc->action;
   21.96 +    struct task_struct *p;
   21.97 +    int i;
   21.98 +
   21.99 +    for ( i = 0; i < action->nr_guests; i++ )
  21.100 +    {
  21.101 +        p = action->guest[i];
  21.102 +        if ( !test_and_set_bit(irq, &p->pirq_mask) )
  21.103 +            action->in_flight++;
  21.104 +        send_guest_pirq(p, irq);
  21.105 +    }
  21.106 +}
  21.107 +
  21.108 +int pirq_guest_unmask(struct task_struct *p)
  21.109 +{
  21.110 +    irq_desc_t *desc;
  21.111 +    int i, j, pirq;
  21.112 +    u32 m;
  21.113 +    shared_info_t *s = p->shared_info;
  21.114 +
  21.115 +    for ( i = 0; i < 2; i++ )
  21.116 +    {
  21.117 +        m = p->pirq_mask[i];
  21.118 +        while ( (j = ffs(m)) != 0 )
  21.119 +        {
  21.120 +            m &= ~(1 << --j);
  21.121 +            pirq = (i << 5) + j;
  21.122 +            desc = &irq_desc[pirq];
  21.123 +            spin_lock_irq(&desc->lock);
  21.124 +            if ( !test_bit(p->pirq_to_evtchn[pirq], &s->evtchn_mask[0]) &&
  21.125 +                 test_and_clear_bit(pirq, &p->pirq_mask) &&
  21.126 +                 (--((irq_guest_action_t *)desc->action)->in_flight == 0) )
  21.127 +                desc->handler->end(pirq);
  21.128 +            spin_unlock_irq(&desc->lock);
  21.129 +        }
  21.130 +    }
  21.131 +
  21.132 +    return 0;
  21.133 +}
  21.134 +
  21.135 +int pirq_guest_bind(struct task_struct *p, int irq, int will_share)
  21.136 +{
  21.137 +    unsigned long flags;
  21.138 +    irq_desc_t *desc = &irq_desc[irq];
  21.139 +    irq_guest_action_t *action;
  21.140 +    int rc = 0;
  21.141 +
  21.142 +    if ( !IS_PRIV(p) )
  21.143 +        return -EPERM;
  21.144 +
  21.145 +    spin_lock_irqsave(&desc->lock, flags);
  21.146 +
  21.147 +    action = (irq_guest_action_t *)desc->action;
  21.148 +
  21.149 +    if ( !(desc->status & IRQ_GUEST) )
  21.150 +    {
  21.151 +        if ( desc->action != NULL )
  21.152 +        {
  21.153 +            DPRINTK("Cannot bind IRQ %d to guest. In use by '%s'.\n",
  21.154 +                    irq, desc->action->name);
  21.155 +            rc = -EBUSY;
  21.156 +            goto out;
  21.157 +        }
  21.158 +
  21.159 +        action = kmalloc(sizeof(irq_guest_action_t), GFP_KERNEL);
  21.160 +        if ( (desc->action = (struct irqaction *)action) == NULL )
  21.161 +        {
  21.162 +            DPRINTK("Cannot bind IRQ %d to guest. Out of memory.\n", irq);
  21.163 +            rc = -ENOMEM;
  21.164 +            goto out;
  21.165 +        }
  21.166 +
  21.167 +        action->nr_guests = 0;
  21.168 +        action->in_flight = 0;
  21.169 +        action->shareable = will_share;
  21.170 +        
  21.171 +        desc->depth = 0;
  21.172 +        desc->status |= IRQ_GUEST;
  21.173 +        desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT | IRQ_WAITING);
  21.174 +        desc->handler->startup(irq);
  21.175 +    }
  21.176 +    else if ( !will_share || !action->shareable )
  21.177 +    {
  21.178 +        DPRINTK("Cannot bind IRQ %d to guest. Will not share with others.\n",
  21.179 +                irq);
  21.180 +        rc = -EBUSY;
  21.181 +        goto out;
  21.182 +    }
  21.183 +
  21.184 +    if ( action->nr_guests == IRQ_MAX_GUESTS )
  21.185 +    {
  21.186 +        DPRINTK("Cannot bind IRQ %d to guest. Already at max share.\n", irq);
  21.187 +        rc = -EBUSY;
  21.188 +        goto out;
  21.189 +    }
  21.190 +
  21.191 +    action->guest[action->nr_guests++] = p;
  21.192 +
  21.193 + out:
  21.194 +    spin_unlock_irqrestore(&desc->lock, flags);
  21.195 +    return rc;
  21.196 +}
  21.197 +
  21.198 +int pirq_guest_unbind(struct task_struct *p, int irq)
  21.199 +{
  21.200 +    unsigned long flags;
  21.201 +    irq_desc_t *desc = &irq_desc[irq];
  21.202 +    irq_guest_action_t *action;
  21.203 +    int i;
  21.204 +
  21.205 +    spin_lock_irqsave(&desc->lock, flags);
  21.206 +
  21.207 +    action = (irq_guest_action_t *)desc->action;
  21.208 +
  21.209 +    if ( test_and_clear_bit(irq, &p->pirq_mask) &&
  21.210 +         (--action->in_flight == 0) )
  21.211 +        desc->handler->end(irq);
  21.212 +
  21.213 +    if ( action->nr_guests == 1 )
  21.214 +    {
  21.215 +        desc->action = NULL;
  21.216 +        kfree(action);
  21.217 +        desc->status |= IRQ_DISABLED;
  21.218 +        desc->status &= ~IRQ_GUEST;
  21.219 +        desc->handler->shutdown(irq);
  21.220 +    }
  21.221 +    else
  21.222 +    {
  21.223 +        i = 0;
  21.224 +        while ( action->guest[i] != p )
  21.225 +            i++;
  21.226 +        memmove(&action->guest[i], &action->guest[i+1], IRQ_MAX_GUESTS-i-1);
  21.227 +        action->nr_guests--;
  21.228 +    }
  21.229 +
  21.230 +    spin_unlock_irqrestore(&desc->lock, flags);    
  21.231 +    return 0;
  21.232 +}
    22.1 --- a/xen/arch/i386/mm.c	Fri Apr 02 13:58:27 2004 +0000
    22.2 +++ b/xen/arch/i386/mm.c	Fri Apr 02 14:03:03 2004 +0000
    22.3 @@ -81,6 +81,13 @@ void __init paging_init(void)
    22.4  {
    22.5      unsigned long addr;
    22.6      void *ioremap_pt;
    22.7 +    int i;
    22.8 +
    22.9 +    /* Idle page table 1:1 maps the first part of physical memory. */
   22.10 +    for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
   22.11 +        idle_pg_table[i] = 
   22.12 +            mk_l2_pgentry((i << L2_PAGETABLE_SHIFT) | 
   22.13 +                          __PAGE_HYPERVISOR | _PAGE_PSE);
   22.14  
   22.15      /*
   22.16       * Fixed mappings, only the page table structure has to be
    23.1 --- a/xen/arch/i386/pdb-stub.c	Fri Apr 02 13:58:27 2004 +0000
    23.2 +++ b/xen/arch/i386/pdb-stub.c	Fri Apr 02 14:03:03 2004 +0000
    23.3 @@ -1,17 +1,41 @@
    23.4 +
    23.5 +/*
    23.6 + * pervasive debugger
    23.7 + * www.cl.cam.ac.uk/netos/pdb
    23.8 + *
    23.9 + * alex ho
   23.10 + * 2004
   23.11 + * university of cambridge computer laboratory
   23.12 + *
   23.13 + * code adapted originally from kgdb, nemesis, & gdbserver
   23.14 + */
   23.15 +
   23.16  #include <xen/lib.h>
   23.17  #include <xen/sched.h>
   23.18  #include <asm-i386/ptrace.h>
   23.19  #include <xen/keyhandler.h> 
   23.20  #include <asm/apic.h>
   23.21 +#include <asm/domain_page.h>                           /* [un]map_domain_mem */
   23.22 +#include <asm/processor.h>
   23.23  #include <asm/pdb.h>
   23.24  #include <xen/list.h>
   23.25  #include <xen/serial.h>
   23.26  
   23.27 +#undef PDB_DEBUG_TRACE
   23.28 +#ifdef PDB_DEBUG_TRACE
   23.29 +#define TRC(_x) _x
   23.30 +#else
   23.31 +#define TRC(_x)
   23.32 +#endif
   23.33 +
   23.34 +#define DEBUG_EXCEPTION     0x01
   23.35 +#define BREAKPT_EXCEPTION   0x03
   23.36 +#define PDB_LIVE_EXCEPTION  0x58
   23.37 +#define KEYPRESS_EXCEPTION  0x88
   23.38 +
   23.39  #define BUFMAX 400
   23.40  
   23.41 -#define PDB_DOMAIN_OFFSET 2              /* all domains are positive numbers */
   23.42 -
   23.43 -static const char hexchars[]="0123456789abcdef";
   23.44 +static const char hexchars[] = "0123456789abcdef";
   23.45  
   23.46  static int remote_debug;
   23.47  
   23.48 @@ -23,14 +47,31 @@ static int  pdb_in_buffer_ptr;
   23.49  static unsigned char  pdb_in_checksum;
   23.50  static unsigned char  pdb_xmit_checksum;
   23.51  
   23.52 -static int pdb_ctrl_thread = -1;
   23.53 -static int pdb_info_thread = -1;
   23.54 -static int pdb_stepping = 0;
   23.55 +/* function pointers in the near future... */
   23.56 +unsigned long pdb_linux_pid_ptbr (unsigned long cr3, int pid);
   23.57 +void pdb_linux_get_values(char *buffer, int length, unsigned long address,
   23.58 +			  int pid, unsigned long cr3);
   23.59 +
   23.60 +struct pdb_context
   23.61 +{
   23.62 +    int valid;
   23.63 +    int domain;
   23.64 +    int process;
   23.65 +    unsigned long ptbr;                   /* cached page table base register */
   23.66 +};
   23.67 +struct pdb_context pdb_ctx;
   23.68 +
   23.69 +int pdb_continue_thread = 0;
   23.70 +int pdb_general_thread = 0;
   23.71  
   23.72  void pdb_put_packet (unsigned char *buffer, int ack);
   23.73  
   23.74  int pdb_initialized = 0;
   23.75 -static int pdb_serhnd      = -1;
   23.76 +int pdb_page_fault_possible = 0;
   23.77 +int pdb_page_fault_scratch = 0;                     /* just a handy variable */
   23.78 +int pdb_page_fault = 0;
   23.79 +static int pdb_serhnd = -1;
   23.80 +static int pdb_stepping = 0;
   23.81  
   23.82  static inline void pdb_put_char(unsigned char c)
   23.83  {
   23.84 @@ -42,16 +83,6 @@ static inline unsigned char pdb_get_char
   23.85      return serial_getc(pdb_serhnd);
   23.86  }
   23.87  
   23.88 -static volatile int mem_err = 0;
   23.89 -void set_mem_err (void)                                   /* NOT USED YET... */
   23.90 -{
   23.91 -    mem_err = 1;
   23.92 -}
   23.93 -
   23.94 -/* These are separate functions so that they are so short and sweet
   23.95 -   that the compiler won't save any registers (if there is a fault
   23.96 -   to mem_fault, they won't get restored, so there better not be any
   23.97 -   saved).  */
   23.98  int
   23.99  get_char (char *addr)
  23.100  {
  23.101 @@ -73,48 +104,127 @@ pdb_process_query (char *ptr)
  23.102      }
  23.103      else if (strcmp(ptr, "fThreadInfo") == 0)
  23.104      {
  23.105 +#ifdef PDB_PAST
  23.106          struct task_struct *p;
  23.107          u_long flags;
  23.108 -	int count = 0, buf_idx = 0;
  23.109 -
  23.110 -        read_lock_irqsave (&tasklist_lock, flags);
  23.111 -
  23.112 -	pdb_out_buffer[buf_idx++] = 'm';
  23.113 -        for_each_domain ( p )
  23.114 -	{
  23.115 -	    domid_t domain = p->domain + PDB_DOMAIN_OFFSET;
  23.116 +#endif /* PDB_PAST */
  23.117  
  23.118 -	    if (count > 0)
  23.119 -	        pdb_out_buffer[buf_idx++] = ',';
  23.120 -	    /*
  23.121 -              if (domain < 0)
  23.122 -              {   pdb_out_buffer[buf_idx++] = '-'; domain = domain * -1; }
  23.123 -	    */
  23.124 -	    if (domain > 15)
  23.125 -	    {
  23.126 -	        pdb_out_buffer[buf_idx++] = hexchars[domain >> 4];
  23.127 -	    }
  23.128 -	    pdb_out_buffer[buf_idx++] = hexchars[domain % 16];
  23.129 -	    count++;
  23.130 -	}
  23.131 +        int buf_idx = 0;
  23.132 +
  23.133  	pdb_out_buffer[buf_idx++] = 'l';
  23.134  	pdb_out_buffer[buf_idx++] = 0;
  23.135  
  23.136 -        read_unlock_irqrestore(&tasklist_lock, flags);
  23.137 +#ifdef PDB_PAST
  23.138 +	switch (pdb_level)
  23.139 +	{
  23.140 +	case PDB_LVL_XEN:                        /* return a list of domains */
  23.141 +	{
  23.142 +	    int count = 0;
  23.143 +
  23.144 +	    read_lock_irqsave (&tasklist_lock, flags);
  23.145 +
  23.146 +	    pdb_out_buffer[buf_idx++] = 'm';
  23.147 +	    for_each_domain ( p )
  23.148 +	    {
  23.149 +	        domid_t domain = p->domain + PDB_ID_OFFSET;
  23.150 +
  23.151 +		if (count > 0)
  23.152 +		{
  23.153 +		    pdb_out_buffer[buf_idx++] = ',';
  23.154 +		}
  23.155 +		if (domain > 15)
  23.156 +		{
  23.157 +		    pdb_out_buffer[buf_idx++] = hexchars[domain >> 4];
  23.158 +		}
  23.159 +		pdb_out_buffer[buf_idx++] = hexchars[domain % 16];
  23.160 +		count++;
  23.161 +	    }
  23.162 +	    pdb_out_buffer[buf_idx++] = 0;
  23.163 +
  23.164 +	    read_unlock_irqrestore(&tasklist_lock, flags);
  23.165 +	    break;
  23.166 +	}
  23.167 +	case PDB_LVL_GUESTOS:                  /* return a list of processes */
  23.168 +	{
  23.169 +	    int foobar[20];
  23.170 +	    int loop, total;
  23.171 +
  23.172 +                                                       /* this cr3 is wrong! */
  23.173 +	    total = pdb_linux_process_list(pdb_ctx[pdb_level].info_cr3,
  23.174 +					   foobar, 20);
  23.175 +
  23.176 +	    pdb_out_buffer[buf_idx++] = 'm';     
  23.177 +	    pdb_out_buffer[buf_idx++] = '1';              /* 1 is to go back */
  23.178 +	    for (loop = 0; loop < total; loop++)
  23.179 +	    {
  23.180 +	        int pid = foobar[loop] + PDB_ID_OFFSET;
  23.181 +
  23.182 +		pdb_out_buffer[buf_idx++] = ',';
  23.183 +		if (pid > 15)
  23.184 +		{
  23.185 +		    pdb_out_buffer[buf_idx++] = hexchars[pid >> 4];
  23.186 +		}
  23.187 +		pdb_out_buffer[buf_idx++] = hexchars[pid % 16];
  23.188 +	    }
  23.189 +	    pdb_out_buffer[buf_idx++] = 0;
  23.190 +	    break;
  23.191 +	}
  23.192 +	case PDB_LVL_PROCESS:                                     /* hmmm... */
  23.193 +	{
  23.194 +	    pdb_out_buffer[buf_idx++] = 'm';
  23.195 +	    pdb_out_buffer[buf_idx++] = '1';              /* 1 is to go back */
  23.196 +	    break;
  23.197 +	}
  23.198 +	default:
  23.199 +	    break;
  23.200 +	}
  23.201 +#endif /* PDB_PAST */
  23.202 +
  23.203      }
  23.204      else if (strcmp(ptr, "sThreadInfo") == 0)
  23.205      {
  23.206 +        int buf_idx = 0;
  23.207 +
  23.208 +	pdb_out_buffer[buf_idx++] = 'l';
  23.209 +	pdb_out_buffer[buf_idx++] = 0;
  23.210      }
  23.211      else if (strncmp(ptr, "ThreadExtraInfo,", 16) == 0)
  23.212      {
  23.213          int thread = 0;
  23.214 -	char *message = "whatever!";
  23.215 +	char *message = "foobar ?";
  23.216 +
  23.217 +	ptr += 16;
  23.218 +        if (hexToInt (&ptr, &thread))
  23.219 +	{
  23.220 +            mem2hex (message, pdb_out_buffer, strlen(message) + 1);
  23.221 +	}
  23.222 +
  23.223 +#ifdef PDB_PAST
  23.224 +        int thread = 0;
  23.225 +	char message[16];
  23.226 +	struct task_struct *p;
  23.227 +
  23.228 +	p = find_domain_by_id(pdb_ctx[pdb_level].info);
  23.229 +	strncpy (message, p->name, 16);
  23.230 +	put_task_struct(p);
  23.231  
  23.232  	ptr += 16;
  23.233          if (hexToInt (&ptr, &thread))
  23.234  	{
  23.235              mem2hex ((char *)message, pdb_out_buffer, strlen(message) + 1);
  23.236  	}
  23.237 +#endif /* PDB_PAST */
  23.238 +
  23.239 +#ifdef PDB_FUTURE
  23.240 +      {
  23.241 +	char string[task_struct_comm_length];
  23.242 +
  23.243 +	string[0] = 0;
  23.244 +	pdb_linux_process_details (cr3, pid, string);
  23.245 +	printk (" (%s)", string);
  23.246 +      }
  23.247 +#endif /* PDB_FUTURE*/
  23.248 +
  23.249      }
  23.250      else if (strcmp(ptr, "Offsets") == 0)
  23.251      {
  23.252 @@ -126,7 +236,7 @@ pdb_process_query (char *ptr)
  23.253      }
  23.254      else
  23.255      {
  23.256 -        printk("pdb_process_query: unknown query [%s]\n", ptr);
  23.257 +        printk("pdb: error, unknown query [%s]\n", ptr);
  23.258      }
  23.259  }
  23.260  
  23.261 @@ -206,18 +316,62 @@ pdb_gdb_to_x86_regs (struct pt_regs *reg
  23.262  }
  23.263  
  23.264  int
  23.265 -pdb_process_command (char *ptr, struct pt_regs *regs)
  23.266 +pdb_process_command (char *ptr, struct pt_regs *regs, unsigned long cr3,
  23.267 +		     int sigval)
  23.268  {
  23.269 -    int sigval = 10;
  23.270      int length;
  23.271      unsigned long addr;
  23.272      int ack = 1;                           /* wait for ack in pdb_put_packet */
  23.273      int go = 0;
  23.274  
  23.275 -    DPRINTK("pdb: [%s]\n", ptr);
  23.276 +    TRC(printf("pdb: [%s]\n", ptr));
  23.277  
  23.278      pdb_out_buffer[0] = 0;
  23.279  
  23.280 +    if (pdb_ctx.valid == 1)
  23.281 +    {
  23.282 +        if (pdb_ctx.domain == -1)                        /* pdb context: xen */
  23.283 +	{
  23.284 +	    struct task_struct *p;
  23.285 +
  23.286 +	    p = &idle0_task;
  23.287 +	    if (p->mm.shadow_mode)
  23.288 +	        pdb_ctx.ptbr = pagetable_val(p->mm.shadow_table);
  23.289 +	    else
  23.290 +	        pdb_ctx.ptbr = pagetable_val(p->mm.pagetable);
  23.291 +	}
  23.292 +	else if (pdb_ctx.process == -1)             /* pdb context: guest os */
  23.293 +	{
  23.294 +	    struct task_struct *p;
  23.295 +
  23.296 +	    p = find_domain_by_id(pdb_ctx.domain);
  23.297 +	    if (p->mm.shadow_mode)
  23.298 +	        pdb_ctx.ptbr = pagetable_val(p->mm.shadow_table);
  23.299 +	    else
  23.300 +	        pdb_ctx.ptbr = pagetable_val(p->mm.pagetable);
  23.301 +	    put_task_struct(p);
  23.302 +	}
  23.303 +	else                                         /* pdb context: process */
  23.304 +	{
  23.305 +	    struct task_struct *p;
  23.306 +	    unsigned long domain_ptbr;
  23.307 +
  23.308 +	    p = find_domain_by_id(pdb_ctx.domain);
  23.309 +	    if (p->mm.shadow_mode)
  23.310 +	        domain_ptbr = pagetable_val(p->mm.shadow_table);
  23.311 +	    else
  23.312 +	        domain_ptbr = pagetable_val(p->mm.pagetable);
  23.313 +	    put_task_struct(p);
  23.314 +
  23.315 +	    pdb_ctx.ptbr = domain_ptbr;
  23.316 +	    /* pdb_ctx.ptbr = pdb_linux_pid_ptbr(domain_ptbr, pdb_ctx.process); */
  23.317 +	}
  23.318 +
  23.319 +	pdb_ctx.valid = 0;
  23.320 +	TRC(printk ("pdb change context (dom:%d, proc:%d) now 0x%lx\n",
  23.321 +		    pdb_ctx.domain, pdb_ctx.process, pdb_ctx.ptbr));
  23.322 +    }
  23.323 +
  23.324      switch (*ptr++)
  23.325      {
  23.326      case '?':
  23.327 @@ -228,15 +382,14 @@ pdb_process_command (char *ptr, struct p
  23.328          break;
  23.329      case 'S':                                            /* step with signal */
  23.330      case 's':                                                        /* step */
  23.331 -        regs->eflags |= 0x100;
  23.332 +        regs->eflags |= X86_EFLAGS_TF;
  23.333          pdb_stepping = 1;
  23.334          return 1;                                        
  23.335          /* not reached */
  23.336      case 'C':                                        /* continue with signal */
  23.337      case 'c':                                                    /* continue */
  23.338 -        regs->eflags &= ~0x100;
  23.339 -        /* jump out before replying to gdb */
  23.340 -        return 1;
  23.341 +        regs->eflags &= ~X86_EFLAGS_TF;
  23.342 +        return 1;                         /* jump out before replying to gdb */
  23.343          /* not reached */
  23.344      case 'd':
  23.345          remote_debug = !(remote_debug);                 /* toggle debug flag */
  23.346 @@ -301,24 +454,23 @@ pdb_process_command (char *ptr, struct p
  23.347      {
  23.348          int thread;
  23.349          char *next = &ptr[1];
  23.350 +
  23.351          if (hexToInt (&next, &thread))
  23.352          {
  23.353 -            if (thread > 0)
  23.354 -            {
  23.355 -                thread = thread - PDB_DOMAIN_OFFSET;
  23.356 -            }
  23.357              if (*ptr == 'c')
  23.358              {
  23.359 -                pdb_ctrl_thread = thread;
  23.360 +	        pdb_continue_thread = thread;
  23.361              }
  23.362              else if (*ptr == 'g')
  23.363              {
  23.364 -                pdb_info_thread = thread;
  23.365 +	        pdb_general_thread = thread;
  23.366              }
  23.367              else
  23.368              {
  23.369 -                printk ("ack, unknown command %c (thread: %d)\n", 
  23.370 +                printk ("pdb error: unknown set thread command %c (%d)\n", 
  23.371                          *ptr, thread);
  23.372 +		strcpy (pdb_out_buffer, "E00");
  23.373 +		break;
  23.374              }
  23.375          }
  23.376          strcpy (pdb_out_buffer, "OK");
  23.377 @@ -347,19 +499,31 @@ pdb_process_command (char *ptr, struct p
  23.378                  if (hexToInt (&ptr, &length))
  23.379                  {
  23.380                      ptr = 0;
  23.381 -                    mem_err = 0;
  23.382  
  23.383 -                    if (pdb_info_thread >= 0)
  23.384 +		    pdb_page_fault_possible = 1;
  23.385 +		    pdb_page_fault = 0;
  23.386 +		    if (addr >= PAGE_OFFSET)
  23.387 +		    {
  23.388 +                        mem2hex ((char *) addr, pdb_out_buffer, length); 
  23.389 +		    }
  23.390 +		    else if (pdb_ctx.process != -1)
  23.391 +		    {
  23.392 +		        pdb_linux_get_values(pdb_buffer, length, addr, 
  23.393 +					     pdb_ctx.process, pdb_ctx.ptbr);
  23.394 +                        mem2hex (pdb_buffer, pdb_out_buffer, length); 
  23.395 +		    }
  23.396 +                    else
  23.397                      {
  23.398 -                        pdb_get_values(pdb_info_thread, pdb_buffer, addr, length);
  23.399 +		        pdb_get_values (pdb_buffer, length, 
  23.400 +					pdb_ctx.ptbr, addr);
  23.401                          mem2hex (pdb_buffer, pdb_out_buffer, length);
  23.402                      }
  23.403 -                    else
  23.404 -                        mem2hex ((char *) addr, pdb_out_buffer, length); 
  23.405 -                    if (mem_err)
  23.406 -                    {
  23.407 +
  23.408 +		    pdb_page_fault_possible = 0;
  23.409 +		    if (pdb_page_fault)
  23.410 +		    {
  23.411                          strcpy (pdb_out_buffer, "E03");
  23.412 -                    }
  23.413 +		    }
  23.414                  }
  23.415  	    
  23.416          if (ptr)
  23.417 @@ -378,12 +542,20 @@ pdb_process_command (char *ptr, struct p
  23.418                  if (hexToInt (&ptr, &length))
  23.419                      if (*(ptr++) == ':')
  23.420                      {
  23.421 -                        mem_err = 0;
  23.422  
  23.423 -                        pdb_set_values(pdb_info_thread, 
  23.424 -                                       ptr, addr, length);
  23.425 -
  23.426 -                        if (mem_err)
  23.427 +		        pdb_page_fault_possible = 1;
  23.428 +			pdb_page_fault = 0;
  23.429 +			if (addr >= PAGE_OFFSET)
  23.430 +			{
  23.431 +			    hex2mem (ptr, (char *)addr, length);
  23.432 +			}
  23.433 +			else
  23.434 +			{
  23.435 +			    pdb_set_values (ptr, length,
  23.436 +					    pdb_ctx.ptbr, addr);
  23.437 +			}
  23.438 +			pdb_page_fault_possible = 0;
  23.439 +                        if (pdb_page_fault)
  23.440                          {
  23.441                              strcpy (pdb_out_buffer, "E03");
  23.442                          }
  23.443 @@ -402,20 +574,65 @@ pdb_process_command (char *ptr, struct p
  23.444      }
  23.445      case 'T':
  23.446      {
  23.447 -        int thread;
  23.448 -        if (hexToInt (&ptr, &thread))
  23.449 +        int id;
  23.450 +
  23.451 +        if (hexToInt (&ptr, &id))
  23.452          {
  23.453 -	    struct task_struct *p;
  23.454 -            thread -= PDB_DOMAIN_OFFSET;
  23.455 -            if ( (p = find_domain_by_id(thread)) == NULL)
  23.456 -                strcpy (pdb_out_buffer, "E00");
  23.457 -            else
  23.458 -                strcpy (pdb_out_buffer, "OK");
  23.459 -            put_task_struct(p);
  23.460 +	    strcpy (pdb_out_buffer, "E00");
  23.461 +
  23.462 +#ifdef PDB_PAST
  23.463 +
  23.464 +	    switch (pdb_level)                             /* previous level */
  23.465 +	    {
  23.466 +	        case PDB_LVL_XEN:
  23.467 +		{
  23.468 +		    struct task_struct *p;
  23.469 +		    id -= PDB_ID_OFFSET;
  23.470 +		    if ( (p = find_domain_by_id(id)) == NULL)
  23.471 +		        strcpy (pdb_out_buffer, "E00");
  23.472 +		    else
  23.473 +		        strcpy (pdb_out_buffer, "OK");
  23.474 +		    put_task_struct(p);
  23.475 +
  23.476 +		    pdb_level = PDB_LVL_GUESTOS;
  23.477 +		    pdb_ctx[pdb_level].ctrl = id;
  23.478 +		    pdb_ctx[pdb_level].info = id;
  23.479 +		    break;
  23.480 +		}
  23.481 +	        case PDB_LVL_GUESTOS:
  23.482 +		{
  23.483 +		    if (pdb_level == -1)
  23.484 +		    {
  23.485 +		        pdb_level = PDB_LVL_XEN;
  23.486 +		    }
  23.487 +		    else
  23.488 +		    {
  23.489 +		        pdb_level = PDB_LVL_PROCESS;
  23.490 +			pdb_ctx[pdb_level].ctrl = id;
  23.491 +			pdb_ctx[pdb_level].info = id;
  23.492 +		    }
  23.493 +		    break;
  23.494 +		}
  23.495 +	        case PDB_LVL_PROCESS:
  23.496 +		{
  23.497 +		    if (pdb_level == -1)
  23.498 +		    {
  23.499 +		        pdb_level = PDB_LVL_GUESTOS;
  23.500 +		    }
  23.501 +		    break;
  23.502 +		}
  23.503 +	        default:
  23.504 +		{
  23.505 +		    printk ("pdb internal error: invalid level [%d]\n", 
  23.506 +			    pdb_level);
  23.507 +		}
  23.508 +	    }
  23.509 +
  23.510 +#endif /* PDB_PAST */
  23.511          }
  23.512          break;
  23.513      }
  23.514 -    }                                                          /* switch */
  23.515 +    }
  23.516  
  23.517      /* reply to the request */
  23.518      pdb_put_packet (pdb_out_buffer, ack);
  23.519 @@ -436,6 +653,9 @@ int pdb_serial_input(u_char c, struct pt
  23.520  {
  23.521      int out = 1;
  23.522      int loop, count;
  23.523 +    unsigned long cr3;
  23.524 +
  23.525 +    __asm__ __volatile__ ("movl %%cr3,%0" : "=r" (cr3) : );
  23.526  
  23.527      switch (pdb_debug_state)
  23.528      {
  23.529 @@ -489,7 +709,8 @@ int pdb_serial_input(u_char c, struct pt
  23.530  		    pdb_in_buffer[loop - 3] = pdb_in_buffer[loop];
  23.531  	    }
  23.532  
  23.533 -	    pdb_process_command (pdb_in_buffer, regs);
  23.534 +	    pdb_process_command (pdb_in_buffer, regs, cr3,
  23.535 +				 PDB_LIVE_EXCEPTION);
  23.536  	}
  23.537  	pdb_debug_state = 0;
  23.538  	break;
  23.539 @@ -594,18 +815,20 @@ hexToInt (char **ptr, int *intValue)
  23.540   */
  23.541  struct pdb_breakpoint breakpoints;
  23.542  
  23.543 -void pdb_bkpt_add (unsigned long address)
  23.544 +void pdb_bkpt_add (unsigned long cr3, unsigned long address)
  23.545  {
  23.546      struct pdb_breakpoint *bkpt = kmalloc(sizeof(*bkpt), GFP_KERNEL);
  23.547 +    bkpt->cr3 = cr3;
  23.548      bkpt->address = address;
  23.549      list_add(&bkpt->list, &breakpoints.list);
  23.550  }
  23.551  
  23.552  /*
  23.553   * Check to see of the breakpoint is in the list of known breakpoints 
  23.554 - * Return 1 if it has been set, 0 otherwise.
  23.555 + * Return 1 if it has been set, NULL otherwise.
  23.556   */
  23.557 -struct pdb_breakpoint* pdb_bkpt_search (unsigned long address)
  23.558 +struct pdb_breakpoint* pdb_bkpt_search (unsigned long cr3, 
  23.559 +					unsigned long address)
  23.560  {
  23.561      struct list_head *list_entry;
  23.562      struct pdb_breakpoint *bkpt;
  23.563 @@ -613,7 +836,7 @@ struct pdb_breakpoint* pdb_bkpt_search (
  23.564      list_for_each(list_entry, &breakpoints.list)
  23.565      {
  23.566          bkpt = list_entry(list_entry, struct pdb_breakpoint, list);
  23.567 -	if ( bkpt->address == address )
  23.568 +	if ( bkpt->cr3 == cr3 && bkpt->address == address )
  23.569              return bkpt;
  23.570      }
  23.571  
  23.572 @@ -624,7 +847,7 @@ struct pdb_breakpoint* pdb_bkpt_search (
  23.573   * Remove a breakpoint to the list of known breakpoints.
  23.574   * Return 1 if the element was not found, otherwise 0.
  23.575   */
  23.576 -int pdb_bkpt_remove (unsigned long address)
  23.577 +int pdb_bkpt_remove (unsigned long cr3, unsigned long address)
  23.578  {
  23.579      struct list_head *list_entry;
  23.580      struct pdb_breakpoint *bkpt;
  23.581 @@ -632,7 +855,7 @@ int pdb_bkpt_remove (unsigned long addre
  23.582      list_for_each(list_entry, &breakpoints.list)
  23.583      {
  23.584          bkpt = list_entry(list_entry, struct pdb_breakpoint, list);
  23.585 -	if ( bkpt->address == address )
  23.586 +	if ( bkpt->cr3 == cr3 && bkpt->address == address )
  23.587  	{
  23.588              list_del(&bkpt->list);
  23.589              kfree(bkpt);
  23.590 @@ -643,6 +866,183 @@ int pdb_bkpt_remove (unsigned long addre
  23.591      return 1;
  23.592  }
  23.593  
  23.594 +/*
  23.595 + * Check to see if a memory write is really gdb setting a breakpoint
  23.596 + */
  23.597 +void pdb_bkpt_check (u_char *buffer, int length,
  23.598 +		     unsigned long cr3, unsigned long addr)
  23.599 +{
  23.600 +    if (length == 1 && buffer[0] == 'c' && buffer[1] == 'c')
  23.601 +    {
  23.602 +        /* inserting a new breakpoint */
  23.603 +        pdb_bkpt_add(cr3, addr);
  23.604 +        TRC(printk("pdb breakpoint detected at 0x%lx:0x%lx\n", cr3, addr));
  23.605 +    }
  23.606 +    else if ( pdb_bkpt_remove(cr3, addr) == 0 )
  23.607 +    {
  23.608 +        /* removing a breakpoint */
  23.609 +        TRC(printk("pdb breakpoint cleared at 0x%lx:0x%lx\n", cr3, addr));
  23.610 +    }
  23.611 +}
  23.612 +
  23.613 +/***********************************************************************/
  23.614 +
  23.615 +int pdb_change_values(u_char *buffer, int length,
  23.616 +		      unsigned long cr3, unsigned long addr, int rw);
  23.617 +int pdb_change_values_one_page(u_char *buffer, int length,
  23.618 +			       unsigned long cr3, unsigned long addr, int rw);
  23.619 +
  23.620 +#define __PDB_GET_VAL 1
  23.621 +#define __PDB_SET_VAL 2
  23.622 +
  23.623 +/*
  23.624 + * Set memory in a domain's address space
  23.625 + * Set "length" bytes at "address" from "domain" to the values in "buffer".
  23.626 + * Return the number of bytes set, 0 if there was a problem.
  23.627 + */
  23.628 +
  23.629 +int pdb_set_values(u_char *buffer, int length,
  23.630 +		   unsigned long cr3, unsigned long addr)
  23.631 +{
  23.632 +    int count = pdb_change_values(buffer, length, cr3, addr, __PDB_SET_VAL);
  23.633 +    pdb_bkpt_check(buffer, length, cr3, addr);
  23.634 +    return count;
  23.635 +}
  23.636 +
  23.637 +/*
  23.638 + * Read memory from a domain's address space.
  23.639 + * Fetch "length" bytes at "address" from "domain" into "buffer".
  23.640 + * Return the number of bytes read, 0 if there was a problem.
  23.641 + */
  23.642 +
  23.643 +int pdb_get_values(u_char *buffer, int length,
  23.644 +		   unsigned long cr3, unsigned long addr)
  23.645 +{
  23.646 +  return pdb_change_values(buffer, length, cr3, addr, __PDB_GET_VAL);
  23.647 +}
  23.648 +
  23.649 +/*
  23.650 + * Read or write memory in an address space
  23.651 + */
  23.652 +int pdb_change_values(u_char *buffer, int length,
  23.653 +		      unsigned long cr3, unsigned long addr, int rw)
  23.654 +{
  23.655 +    int remaining;                /* number of bytes to touch past this page */
  23.656 +    int bytes = 0;
  23.657 +
  23.658 +    while ( (remaining = (addr + length - 1) - (addr | (PAGE_SIZE - 1))) > 0)
  23.659 +    {
  23.660 +        bytes += pdb_change_values_one_page(buffer, length - remaining, 
  23.661 +					    cr3, addr, rw);
  23.662 +	buffer = buffer + (2 * (length - remaining));
  23.663 +	length = remaining;
  23.664 +	addr = (addr | (PAGE_SIZE - 1)) + 1;
  23.665 +    }
  23.666 +
  23.667 +    bytes += pdb_change_values_one_page(buffer, length, cr3, addr, rw);
  23.668 +    return bytes;
  23.669 +}
  23.670 +
  23.671 +/*
  23.672 + * Change memory in a process' address space in one page
  23.673 + * Read or write "length" bytes at "address" into/from "buffer"
  23.674 + * from the virtual address space referenced by "cr3".
  23.675 + * Return the number of bytes read, 0 if there was a problem.
  23.676 + */
  23.677 +
  23.678 +int pdb_change_values_one_page(u_char *buffer, int length,
  23.679 +			       unsigned long cr3, unsigned long addr, int rw)
  23.680 +{
  23.681 +    l2_pgentry_t* l2_table = NULL;
  23.682 +    l1_pgentry_t* l1_table = NULL;
  23.683 +    u_char *page;
  23.684 +    int bytes = 0;
  23.685 +
  23.686 +    l2_table = map_domain_mem(cr3); 
  23.687 +    l2_table += l2_table_offset(addr);
  23.688 +    if (!(l2_pgentry_val(*l2_table) & _PAGE_PRESENT)) 
  23.689 +    {
  23.690 +	if (pdb_page_fault_possible == 1)
  23.691 +	{
  23.692 +	    pdb_page_fault = 1;
  23.693 +	    TRC(printk("pdb: L2 error (0x%lx)\n", addr));
  23.694 +	}
  23.695 +	else
  23.696 +	{
  23.697 +	    struct task_struct *p = find_domain_by_id(0);
  23.698 +	    printk ("pdb error: cr3: 0x%lx    dom0cr3:  0x%lx\n",  cr3,
  23.699 +		    p->mm.shadow_mode ? pagetable_val(p->mm.shadow_table)
  23.700 +		    : pagetable_val(p->mm.pagetable));
  23.701 +	    put_task_struct(p);
  23.702 +	    printk ("pdb error: L2:0x%p (0x%lx)\n", 
  23.703 +		    l2_table, l2_pgentry_val(*l2_table));
  23.704 +	}
  23.705 +	goto exit2;
  23.706 +    }
  23.707 +
  23.708 +    if (l2_pgentry_val(*l2_table) & _PAGE_PSE)
  23.709 +    {
  23.710 +#define PSE_PAGE_SHIFT           L2_PAGETABLE_SHIFT
  23.711 +#define PSE_PAGE_SIZE	         (1UL << PSE_PAGE_SHIFT)
  23.712 +#define PSE_PAGE_MASK	         (~(PSE_PAGE_SIZE-1))
  23.713 +
  23.714 +#define L1_PAGE_BITS ( (ENTRIES_PER_L1_PAGETABLE - 1) << L1_PAGETABLE_SHIFT )
  23.715 +
  23.716 +#define pse_pgentry_to_phys(_x) (l2_pgentry_val(_x) & PSE_PAGE_MASK)
  23.717 +
  23.718 +        page = map_domain_mem(pse_pgentry_to_phys(*l2_table) +    /* 10 bits */
  23.719 +			      (addr & L1_PAGE_BITS));             /* 10 bits */
  23.720 +	page += addr & (PAGE_SIZE - 1);                           /* 12 bits */
  23.721 +    }
  23.722 +    else
  23.723 +    {
  23.724 +        l1_table = map_domain_mem(l2_pgentry_to_phys(*l2_table));
  23.725 +	l1_table += l1_table_offset(addr); 
  23.726 +	if (!(l1_pgentry_val(*l1_table) & _PAGE_PRESENT))
  23.727 +	{
  23.728 +	    if (pdb_page_fault_possible == 1)
  23.729 +	    {
  23.730 +	        pdb_page_fault = 1;
  23.731 +		TRC(printk ("pdb: L1 error (0x%lx)\n", addr));
  23.732 +	    }
  23.733 +	    else
  23.734 +	    {
  23.735 +	        printk ("L2:0x%p (0x%lx) L1:0x%p (0x%lx)\n", 
  23.736 +			l2_table, l2_pgentry_val(*l2_table),
  23.737 +			l1_table, l1_pgentry_val(*l1_table));
  23.738 +	    }
  23.739 +	    goto exit1;
  23.740 +	}
  23.741 +
  23.742 +	page = map_domain_mem(l1_pgentry_to_phys(*l1_table));
  23.743 +	page += addr & (PAGE_SIZE - 1);
  23.744 +    }
  23.745 +
  23.746 +    switch (rw)
  23.747 +    {
  23.748 +    case __PDB_GET_VAL:                                              /* read */
  23.749 +        memcpy (buffer, page, length);
  23.750 +	bytes = length;
  23.751 +	break;
  23.752 +    case __PDB_SET_VAL:                                             /* write */
  23.753 +        hex2mem (buffer, page, length);
  23.754 +	bytes = length;
  23.755 +	break;
  23.756 +    default:                                                      /* unknown */
  23.757 +        printk ("error: unknown RW flag: %d\n", rw);
  23.758 +	return 0;
  23.759 +    }
  23.760 +
  23.761 +    unmap_domain_mem((void *)page); 
  23.762 +exit1:
  23.763 +    if (l1_table != NULL)
  23.764 +        unmap_domain_mem((void *)l1_table);
  23.765 +exit2:
  23.766 +    unmap_domain_mem((void *)l2_table);
  23.767 +
  23.768 +    return bytes;
  23.769 +}
  23.770 +
  23.771  /***********************************************************************/
  23.772  
  23.773  void breakpoint(void);
  23.774 @@ -716,7 +1116,7 @@ void pdb_get_packet(char *buffer)
  23.775  	        pdb_put_char('+');
  23.776  		if (buffer[2] == ':')
  23.777  		{
  23.778 -		    printk ("gdb packet found with sequence ID\n");
  23.779 +		    printk ("pdb: obsolete gdb packet (sequence ID)\n");
  23.780  		}
  23.781  	    }
  23.782  	    else
  23.783 @@ -735,46 +1135,45 @@ void pdb_get_packet(char *buffer)
  23.784   * be propagated to the guest os.
  23.785   */
  23.786  
  23.787 -#define DEBUG_EXCEPTION     0x01
  23.788 -#define BREAKPT_EXCEPTION   0x03
  23.789 -#define KEYPRESS_EXCEPTION  0x88
  23.790 -
  23.791  int pdb_handle_exception(int exceptionVector,
  23.792  			 struct pt_regs *xen_regs)
  23.793  {
  23.794      int signal = 0;
  23.795 +    struct pdb_breakpoint* bkpt;
  23.796      int watchdog_save;
  23.797 +    unsigned long cr3;
  23.798 +
  23.799 +    __asm__ __volatile__ ("movl %%cr3,%0" : "=r" (cr3) : );
  23.800  
  23.801      /*
  23.802       * If PDB didn't set the breakpoint, is not single stepping, and the user
  23.803       * didn't press the magic debug key, then we don't handle the exception.
  23.804       */
  23.805 -    if ( (pdb_bkpt_search(xen_regs->eip - 1) == NULL) &&
  23.806 -         !pdb_stepping && (exceptionVector != KEYPRESS_EXCEPTION) )
  23.807 +    bkpt = pdb_bkpt_search(cr3, xen_regs->eip - 1);
  23.808 +    if ( (bkpt == NULL) &&
  23.809 +         !pdb_stepping && (exceptionVector != KEYPRESS_EXCEPTION) &&
  23.810 +	 xen_regs->eip < 0xc0000000)                   /* xenolinux for now! */
  23.811      {
  23.812 -        DPRINTK("pdb: external breakpoint at 0x%lx\n", xen_regs->eip);
  23.813 +        TRC(printf("pdb: user bkpt at 0x%lx:0x%lx\n", cr3, xen_regs->eip));
  23.814  	return 1;
  23.815      }
  23.816  
  23.817 -    printk("pdb_handle_exception [0x%x][0x%lx]\n",
  23.818 -           exceptionVector, xen_regs->eip);
  23.819 +    printk("pdb_handle_exception [0x%x][0x%lx:0x%lx]\n",
  23.820 +	   exceptionVector, cr3, xen_regs->eip);
  23.821  
  23.822      if ( pdb_stepping )
  23.823      {
  23.824          /* Stepped one instruction; now return to normal execution. */
  23.825 -        xen_regs->eflags &= ~0x100;
  23.826 +        xen_regs->eflags &= ~X86_EFLAGS_TF;
  23.827          pdb_stepping = 0;
  23.828      }
  23.829  
  23.830 -    if ( exceptionVector == BREAKPT_EXCEPTION )
  23.831 +    if ( exceptionVector == BREAKPT_EXCEPTION && bkpt != NULL)
  23.832      {
  23.833          /* Executed Int3: replace breakpoint byte with real program byte. */
  23.834          xen_regs->eip--;
  23.835      }
  23.836  
  23.837 -    watchdog_save = watchdog_on;
  23.838 -    watchdog_on = 0;
  23.839 -
  23.840      /* Generate a signal for GDB. */
  23.841      switch ( exceptionVector )
  23.842      {
  23.843 @@ -796,11 +1195,14 @@ int pdb_handle_exception(int exceptionVe
  23.844      pdb_out_buffer[3] = 0;
  23.845      pdb_put_packet(pdb_out_buffer, 1);
  23.846  
  23.847 +    watchdog_save = watchdog_on;
  23.848 +    watchdog_on = 0;
  23.849 +
  23.850      do {
  23.851          pdb_out_buffer[0] = 0;
  23.852  	pdb_get_packet(pdb_in_buffer);
  23.853      }
  23.854 -    while ( pdb_process_command(pdb_in_buffer, xen_regs) == 0 );
  23.855 +    while ( pdb_process_command(pdb_in_buffer, xen_regs, cr3, signal) == 0 );
  23.856  
  23.857      watchdog_on = watchdog_save;
  23.858  
  23.859 @@ -818,7 +1220,7 @@ void initialize_pdb()
  23.860      extern char opt_pdb[];
  23.861  
  23.862      /* Certain state must be initialised even when PDB will not be used. */
  23.863 -    breakpoints.address = 0;
  23.864 +    memset((void *) &breakpoints, 0, sizeof(breakpoints));
  23.865      INIT_LIST_HEAD(&breakpoints.list);
  23.866      pdb_stepping = 0;
  23.867  
  23.868 @@ -827,14 +1229,20 @@ void initialize_pdb()
  23.869  
  23.870      if ( (pdb_serhnd = parse_serial_handle(opt_pdb)) == -1 )
  23.871      {
  23.872 -        printk("Failed to initialise PDB on port %s\n", opt_pdb);
  23.873 +        printk("error: failed to initialize PDB on port %s\n", opt_pdb);
  23.874          return;
  23.875      }
  23.876  
  23.877 -    printk("Initialised pervasive debugger (PDB) on port %s\n", opt_pdb);
  23.878 +    pdb_ctx.valid = 1;
  23.879 +    pdb_ctx.domain = -1;
  23.880 +    pdb_ctx.process = -1;
  23.881 +    pdb_ctx.ptbr = 0;
  23.882 +
  23.883 +    printk("pdb: pervasive debugger (%s)   www.cl.cam.ac.uk/netos/pdb\n", 
  23.884 +	   opt_pdb);
  23.885  
  23.886      /* Acknowledge any spurious GDB packets. */
  23.887 -    serial_putc(pdb_serhnd, '+');
  23.888 +    pdb_put_char('+');
  23.889  
  23.890      add_key_handler('D', pdb_key_pressed, "enter pervasive debugger");
  23.891  
    24.1 --- a/xen/arch/i386/setup.c	Fri Apr 02 13:58:27 2004 +0000
    24.2 +++ b/xen/arch/i386/setup.c	Fri Apr 02 14:03:03 2004 +0000
    24.3 @@ -411,8 +411,6 @@ void __init start_of_day(void)
    24.4  
    24.5      check_nmi_watchdog();
    24.6  
    24.7 -    zap_low_mappings();
    24.8 -
    24.9  #ifdef CONFIG_PCI
   24.10      pci_init();
   24.11  #endif
   24.12 @@ -420,7 +418,9 @@ void __init start_of_day(void)
   24.13      if ( !setup_network_devices() )
   24.14          panic("Must have a network device!\n");
   24.15      net_init();            /* initializes virtual network system. */
   24.16 +#ifndef NO_DEVICES_IN_XEN
   24.17      initialize_block_io(); /* setup block devices */
   24.18 +#endif
   24.19  
   24.20  #ifdef CONFIG_SMP
   24.21      wait_init_idle = cpu_online_map;
    25.1 --- a/xen/arch/i386/traps.c	Fri Apr 02 13:58:27 2004 +0000
    25.2 +++ b/xen/arch/i386/traps.c	Fri Apr 02 14:03:03 2004 +0000
    25.3 @@ -206,7 +206,7 @@ static inline void do_trap(int trapnr, c
    25.4      gtb->cs         = ti->cs;
    25.5      gtb->eip        = ti->address;
    25.6      if ( TI_GET_IF(ti) )
    25.7 -        set_bit(0, &p->shared_info->evtchn_upcall_mask);
    25.8 +        p->shared_info->vcpu_data[0].evtchn_upcall_mask = 1;
    25.9      return; 
   25.10  
   25.11   fault_in_hypervisor:
   25.12 @@ -277,9 +277,7 @@ asmlinkage void do_int3(struct pt_regs *
   25.13      gtb->cs         = ti->cs;
   25.14      gtb->eip        = ti->address;
   25.15      if ( TI_GET_IF(ti) )
   25.16 -        set_bit(0, &p->shared_info->evtchn_upcall_mask);
   25.17 -    return;
   25.18 -
   25.19 +        p->shared_info->vcpu_data[0].evtchn_upcall_mask = 1;
   25.20  }
   25.21  
   25.22  asmlinkage void do_double_fault(void)
   25.23 @@ -339,11 +337,9 @@ asmlinkage void do_page_fault(struct pt_
   25.24              return; /* successfully copied the mapping */
   25.25      }
   25.26  
   25.27 -    if ( unlikely( p->mm.shadow_mode ) && addr < PAGE_OFFSET &&
   25.28 -	 shadow_fault( addr, error_code ) )
   25.29 -      {
   25.30 -	return; // return true if fault was handled 
   25.31 -      }
   25.32 +    if ( unlikely(p->mm.shadow_mode) && 
   25.33 +         (addr < PAGE_OFFSET) && shadow_fault(addr, error_code) )
   25.34 +	return; /* Return TRUE if fault was handled. */
   25.35  
   25.36      if ( unlikely(!(regs->xcs & 3)) )
   25.37          goto fault_in_hypervisor;
   25.38 @@ -355,7 +351,7 @@ asmlinkage void do_page_fault(struct pt_
   25.39      gtb->cs         = ti->cs;
   25.40      gtb->eip        = ti->address;
   25.41      if ( TI_GET_IF(ti) )
   25.42 -        set_bit(0, &p->shared_info->evtchn_upcall_mask);
   25.43 +        p->shared_info->vcpu_data[0].evtchn_upcall_mask = 1;
   25.44      return; 
   25.45  
   25.46   fault_in_hypervisor:
   25.47 @@ -363,7 +359,8 @@ asmlinkage void do_page_fault(struct pt_
   25.48      if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
   25.49      {
   25.50          perfc_incrc(copy_user_faults);
   25.51 -        //DPRINTK("copy_user fault: %08lx -> %08lx\n", regs->eip, fixup);
   25.52 +        if ( !p->mm.shadow_mode )
   25.53 +            DPRINTK("Page fault: %08lx -> %08lx\n", regs->eip, fixup);
   25.54          regs->eip = fixup;
   25.55          regs->xds = regs->xes = regs->xfs = regs->xgs = __HYPERVISOR_DS;
   25.56          return;
   25.57 @@ -386,6 +383,15 @@ asmlinkage void do_page_fault(struct pt_
   25.58  #endif
   25.59      }
   25.60  
   25.61 +    if (pdb_page_fault_possible)
   25.62 +    {
   25.63 +        pdb_page_fault = 1;
   25.64 +	/* make eax & edx valid to complete the instruction */
   25.65 +	regs->eax = (long)&pdb_page_fault_scratch;
   25.66 +	regs->edx = (long)&pdb_page_fault_scratch;
   25.67 +	return;
   25.68 +    }
   25.69 +
   25.70      show_registers(regs);
   25.71      panic("CPU%d FATAL PAGE FAULT\n"
   25.72            "[error_code=%08x]\n"
   25.73 @@ -444,7 +450,7 @@ asmlinkage void do_general_protection(st
   25.74      gtb->cs         = ti->cs;
   25.75      gtb->eip        = ti->address;
   25.76      if ( TI_GET_IF(ti) )
   25.77 -        set_bit(0, &p->shared_info->evtchn_upcall_mask);
   25.78 +        p->shared_info->vcpu_data[0].evtchn_upcall_mask = 1;
   25.79      return;
   25.80  
   25.81   gp_in_kernel:
    26.1 --- a/xen/common/debug-linux.c	Fri Apr 02 13:58:27 2004 +0000
    26.2 +++ b/xen/common/debug-linux.c	Fri Apr 02 14:03:03 2004 +0000
    26.3 @@ -1,13 +1,21 @@
    26.4 +
    26.5 +/*
    26.6 + * pervasive debugger
    26.7 + * www.cl.cam.ac.uk/netos/pdb
    26.8 + *
    26.9 + * alex ho
   26.10 + * 2004
   26.11 + * university of cambridge computer laboratory
   26.12 + *
   26.13 + * linux specific pdb stuff 
   26.14 + */
   26.15 +
   26.16  #include <xen/config.h>
   26.17  #include <xen/types.h>
   26.18  #include <xen/lib.h>
   26.19  #include <hypervisor-ifs/dom0_ops.h>
   26.20  #include <asm/pdb.h>
   26.21  
   26.22 -/* 
   26.23 - * linux specific pdb stuff 
   26.24 - */
   26.25 -
   26.26  /* from linux/sched.h */
   26.27  #define PIDHASH_SZ (4096 >> 2)
   26.28  #define pid_hashfn(x)	((((x) >> 8) ^ (x)) & (PIDHASH_SZ - 1))
   26.29 @@ -30,96 +38,191 @@
   26.30  #define ENTRIES_PER_L1_PAGETABLE 1024
   26.31  #define L1_PAGE_BITS ( (ENTRIES_PER_L1_PAGETABLE - 1) << PAGE_SHIFT )
   26.32  
   26.33 +void pdb_linux_process_details (unsigned long cr3, int pid, char *buffer);
   26.34  
   26.35  /* adapted from asm-xen/page.h */
   26.36 -static inline unsigned long machine_to_phys(domid_t domain, 
   26.37 +static inline unsigned long machine_to_phys(unsigned long cr3,
   26.38                                              unsigned long machine)
   26.39  {
   26.40    unsigned long phys;
   26.41 -  pdb_get_values(domain, (u_char *) &phys,
   26.42 +  pdb_get_values((u_char *) &phys, sizeof(phys), cr3,
   26.43  		 (unsigned long) machine_to_phys_mapping + 
   26.44 -                 (machine >> PAGE_SHIFT) * 4,
   26.45 -		 sizeof(phys));
   26.46 +                 (machine >> PAGE_SHIFT) * 4);
   26.47    phys = (phys << PAGE_SHIFT) | (machine & ~PAGE_MASK);
   26.48    return phys;
   26.49  }
   26.50  
   26.51 -
   26.52 -#define pidhash_addr 0xc018f260UL
   26.53 +unsigned long pdb_pidhash_addr         = 0xc01971e0UL;
   26.54 +unsigned long pdb_init_task_union_addr = 0xc0182000UL;
   26.55  
   26.56 -#define task_struct_mm_offset  0x2c
   26.57 -#define task_struct_pid_offset 0x7c
   26.58 +#define task_struct_mm_offset        0x2c
   26.59 +#define task_struct_next_task_offset 0x48
   26.60 +#define task_struct_pid_offset       0x7c
   26.61  #define task_struct_pidhash_next_offset 0xb0
   26.62 -#define mm_struct_pgd_offset   0x0c
   26.63 +#define task_struct_comm_offset      0x23e
   26.64 +#define task_struct_comm_length      0x10
   26.65 +
   26.66 +#define mm_struct_pgd_offset         0x0c
   26.67  
   26.68  /*
   26.69 -  static inline struct task_struct *find_task_by_pid(int pid)
   26.70 -  {
   26.71 -    struct task_struct *p, **htable = &pidhash[pid_hashfn(pid)];
   26.72 -
   26.73 -    for(p = *htable; p && p->pid != pid; p = p->pidhash_next) ;
   26.74 -    return p;
   26.75 -  }
   26.76 -*/
   26.77 -
   26.78 -/* read a byte from a process */
   26.79 -u_char pdb_linux_get_value(domid_t domain, int pid, unsigned long addr)
   26.80 + * find the task structure of a process (pid)
   26.81 + * given the cr3 of the guest os.
   26.82 + */
   26.83 +unsigned long pdb_linux_pid_task_struct (unsigned long cr3, int pid)
   26.84  {
   26.85 -  u_char result = 0;
   26.86 -  unsigned long task_struct_p, mm_p, pgd, task_struct_pid;
   26.87 -  unsigned long l2tab, page;
   26.88 +  unsigned long task_struct_p = (unsigned long) NULL;
   26.89 +  unsigned long task_struct_pid;
   26.90  
   26.91    /* find the task_struct of the given process */
   26.92 -  pdb_get_values(domain, (u_char *) &task_struct_p, 
   26.93 -		 pidhash_addr + pid_hashfn(pid) * 4,
   26.94 -		 sizeof(task_struct_p));
   26.95 +  pdb_get_values((u_char *) &task_struct_p, sizeof(task_struct_p),
   26.96 +		 cr3, pdb_pidhash_addr + pid_hashfn(pid) * 4);
   26.97  
   26.98    /* find the correct task struct */
   26.99    while (task_struct_p != (unsigned long)NULL)
  26.100    {
  26.101 -    pdb_get_values(domain, (u_char *) &task_struct_pid, 
  26.102 -		   task_struct_p + task_struct_pid_offset,
  26.103 -		   sizeof(task_struct_pid));
  26.104 +    pdb_get_values((u_char *) &task_struct_pid, sizeof(task_struct_pid),
  26.105 +		   cr3, task_struct_p + task_struct_pid_offset);
  26.106      if (task_struct_pid == pid)
  26.107      {
  26.108        break;
  26.109      }
  26.110 -    
  26.111 -    pdb_get_values(domain, (u_char *) &task_struct_p, 
  26.112 -		   task_struct_p + task_struct_pidhash_next_offset,
  26.113 -		   sizeof(task_struct_p));
  26.114 +
  26.115 +    pdb_get_values((u_char *) &task_struct_p, sizeof(task_struct_p),
  26.116 +		   cr3, task_struct_p + task_struct_pidhash_next_offset);
  26.117    }
  26.118 -  if (task_struct_p == (unsigned long)NULL)
  26.119 +  if (task_struct_p == (unsigned long) NULL)
  26.120    {
  26.121      /* oops */
  26.122 -    printk ("error: couldn't find process 0x%x in domain %llu\n", pid, domain);
  26.123 -    return 0;
  26.124 +    printk ("pdb error: couldn't find process 0x%x (0x%lx)\n", pid, cr3);
  26.125 +  }
  26.126 +
  26.127 +  return task_struct_p;
  26.128 +}
  26.129 +
  26.130 +/*
  26.131 + * find the ptbr of a process (pid)
  26.132 + * given the cr3 of the guest os.
  26.133 + */
  26.134 +unsigned long pdb_linux_pid_ptbr (unsigned long cr3, int pid)
  26.135 +{
  26.136 +  unsigned long task_struct_p;
  26.137 +  unsigned long mm_p, pgd;
  26.138 +
  26.139 +  task_struct_p = pdb_linux_pid_task_struct(cr3, pid);
  26.140 +  if (task_struct_p == (unsigned long) NULL)
  26.141 +  {
  26.142 +    return (unsigned long) NULL;
  26.143    }
  26.144  
  26.145    /* get the mm_struct within the task_struct */
  26.146 -  pdb_get_values(domain, (u_char *) &mm_p, 
  26.147 -		 task_struct_p + task_struct_mm_offset,
  26.148 -		 sizeof(mm_p));
  26.149 +  pdb_get_values((u_char *) &mm_p, sizeof(mm_p),
  26.150 +		 cr3, task_struct_p + task_struct_mm_offset);
  26.151    /* get the page global directory (cr3) within the mm_struct */
  26.152 -  pdb_get_values(domain, (u_char *) &pgd, 
  26.153 -		 mm_p + mm_struct_pgd_offset,
  26.154 -		 sizeof(pgd));
  26.155 +  pdb_get_values((u_char *) &pgd, sizeof(pgd),
  26.156 +		 cr3, mm_p + mm_struct_pgd_offset);
  26.157 +
  26.158 +  return pgd;
  26.159 +}
  26.160 +
  26.161 +
  26.162 +
  26.163 +/* read a byte from a process 
  26.164 + *
  26.165 + * in: pid: process id
  26.166 + *     cr3: ptbr for the process' domain
  26.167 + *     addr: address to read
  26.168 + */
  26.169 +
  26.170 +u_char pdb_linux_get_value(int pid, unsigned long cr3, unsigned long addr)
  26.171 +{
  26.172 +  u_char result = 0;
  26.173 +  unsigned long pgd;
  26.174 +  unsigned long l2tab, page;
  26.175 +
  26.176 +  /* get the process' pgd */
  26.177 +  pgd = pdb_linux_pid_ptbr(cr3, pid);
  26.178  
  26.179    /* get the l2 table entry */
  26.180 -  pdb_get_values(domain, (u_char *) &l2tab, 
  26.181 -		 pgd + (addr >> PGDIR_SHIFT) * 4,
  26.182 -		 sizeof(l2tab));
  26.183 -  l2tab = (unsigned long)__va(machine_to_phys(domain, l2tab) & PAGE_MASK);
  26.184 +  pdb_get_values((u_char *) &l2tab, sizeof(l2tab),
  26.185 +		 cr3, pgd + (addr >> PGDIR_SHIFT) * 4);
  26.186 +  l2tab = (unsigned long)__va(machine_to_phys(cr3, l2tab) & PAGE_MASK);
  26.187  
  26.188    /* get the page table entry */
  26.189 -  pdb_get_values(domain, (u_char *) &page,
  26.190 -		 l2tab + ((addr & L1_PAGE_BITS) >> PAGE_SHIFT) * 4,
  26.191 -		 sizeof(page));
  26.192 -  page = (unsigned long)__va(machine_to_phys(domain, page) & PAGE_MASK);
  26.193 +  pdb_get_values((u_char *) &page, sizeof(page),
  26.194 +		 cr3, l2tab + ((addr & L1_PAGE_BITS) >> PAGE_SHIFT) * 4);
  26.195 +  page = (unsigned long)__va(machine_to_phys(cr3, page) & PAGE_MASK);
  26.196  
  26.197    /* get the byte */
  26.198 -  pdb_get_values(domain, (u_char *) &result, page + (addr & ~PAGE_MASK),
  26.199 -		 sizeof(result));
  26.200 +  pdb_get_values((u_char *) &result, sizeof(result),
  26.201 +		 cr3, page + (addr & ~PAGE_MASK));
  26.202  
  26.203    return result;
  26.204  }
  26.205 +
  26.206 +void pdb_linux_get_values(char *buffer, int length, unsigned long address,
  26.207 +			  int pid, unsigned long cr3)
  26.208 +{
  26.209 +    int loop;
  26.210 +
  26.211 +    /* yes, this can be optimized... a lot */
  26.212 +    for (loop = 0; loop < length; loop++)
  26.213 +    {
  26.214 +        buffer[loop] = pdb_linux_get_value(pid, cr3, address + loop);
  26.215 +    }
  26.216 +}
  26.217 +
  26.218 +/*
  26.219 + * return 1 if is the virtual address is in the operating system's
  26.220 + * address space, else 0 
  26.221 + */
  26.222 +int pdb_linux_address_space (unsigned long addr)
  26.223 +{
  26.224 +    return (addr > PAGE_OFFSET);
  26.225 +}
  26.226 +
  26.227 +/* get a list of at most "max" processes
  26.228 + * return: number of threads found
  26.229 + *
  26.230 + *   init_task -> init_task_union.task
  26.231 + *   while (next_task != init_task) {}
  26.232 + */
  26.233 +int pdb_linux_process_list (unsigned long cr3, int array[], int max)
  26.234 +{
  26.235 +  unsigned long task_p, next_p;
  26.236 +  int pid;
  26.237 +  int count = 0;
  26.238 +
  26.239 +  /* task_p = init_task->next_task  */
  26.240 +  pdb_get_values((u_char *) &task_p, sizeof(task_p),
  26.241 +		 cr3, pdb_init_task_union_addr + task_struct_next_task_offset);
  26.242 +  
  26.243 +  while (task_p != pdb_init_task_union_addr)
  26.244 +  {
  26.245 +      pdb_get_values((u_char *) &pid, sizeof(pid),
  26.246 +		     cr3, task_p + task_struct_pid_offset);
  26.247 +
  26.248 +      array[count % max] = pid;
  26.249 +      count++;
  26.250 +
  26.251 +      pdb_get_values((u_char *) &next_p, sizeof(next_p),
  26.252 +		     cr3, task_p + task_struct_next_task_offset);
  26.253 +      task_p = next_p;
  26.254 +  }
  26.255 +
  26.256 +  return count;
  26.257 +}
  26.258 +
  26.259 +/*
  26.260 + * get additional details about a particular process
  26.261 + */
  26.262 +void pdb_linux_process_details (unsigned long cr3, int pid, char *buffer)
  26.263 +{
  26.264 +  unsigned long task_struct_p;
  26.265 +
  26.266 +  task_struct_p = pdb_linux_pid_task_struct(cr3, pid);
  26.267 +
  26.268 +  pdb_get_values((u_char *) buffer, task_struct_comm_length,
  26.269 +		 cr3, task_struct_p + task_struct_comm_offset);
  26.270 +  return;
  26.271 +}
  26.272 +
    27.1 --- a/xen/common/debug.c	Fri Apr 02 13:58:27 2004 +0000
    27.2 +++ b/xen/common/debug.c	Fri Apr 02 14:03:03 2004 +0000
    27.3 @@ -11,7 +11,6 @@
    27.4  #include <xen/sched.h>
    27.5  #include <xen/event.h>
    27.6  #include <asm/page.h>
    27.7 -#include <asm/domain_page.h>                           /* [un]map_domain_mem */
    27.8  #include <asm/pdb.h>
    27.9  
   27.10  #undef DEBUG_TRACE
   27.11 @@ -23,148 +22,8 @@
   27.12  
   27.13  /****************************************************************************/
   27.14  
   27.15 -extern int pdb_change_values(domid_t domain, 
   27.16 -                             u_char *buffer, unsigned long addr,
   27.17 -                             int length, int rw);
   27.18 -extern u_char pdb_linux_get_value(domid_t domain, int pid, unsigned long addr);
   27.19 -
   27.20 -/*
   27.21 - * Set memory in a domain's address space
   27.22 - * Set "length" bytes at "address" from "domain" to the values in "buffer".
   27.23 - * Return the number of bytes set, 0 if there was a problem.
   27.24 - *
   27.25 - * THIS WILL BECOME A MACRO
   27.26 - */
   27.27 -
   27.28 -int pdb_set_values(domid_t domain, 
   27.29 -                   u_char *buffer, unsigned long addr, int length)
   27.30 -{
   27.31 -    int count = pdb_change_values(domain, buffer, addr, length, 2);
   27.32 -
   27.33 -    /* this is a bit x86 specific at the moment... */
   27.34 -    if (length == 1 && buffer[0] == 'c' && buffer[1] == 'c')
   27.35 -    {
   27.36 -        /* inserting a new breakpoint */
   27.37 -        pdb_bkpt_add(addr);
   27.38 -        TRC(printk("pdb breakpoint detected at 0x%lx\n", addr));
   27.39 -    }
   27.40 -    else if ( pdb_bkpt_remove(addr) == 0 )
   27.41 -    {
   27.42 -        /* removing a breakpoint */
   27.43 -        TRC(printk("pdb breakpoint cleared at 0x%lx\n", addr));
   27.44 -    }
   27.45 -
   27.46 -    return count;
   27.47 -}
   27.48 -
   27.49 -/*
   27.50 - * Read memory from a domain's address space.
   27.51 - * Fetch "length" bytes at "address" from "domain" into "buffer".
   27.52 - * Return the number of bytes read, 0 if there was a problem.
   27.53 - *
   27.54 - * THIS WILL BECOME A MACRO
   27.55 - */
   27.56 -
   27.57 -int pdb_get_values(domid_t domain, 
   27.58 -                   u_char *buffer, unsigned long addr, int length)
   27.59 -{
   27.60 -    return pdb_change_values(domain, buffer, addr, length, 1);
   27.61 -}
   27.62 -
   27.63 -/*
   27.64 - * Change memory in  a domain's address space.
   27.65 - * Read or write "length" bytes at "address" from "domain" into/from "buffer".
   27.66 - * Return the number of bytes read, 0 if there was a problem.
   27.67 - * RW: 1 = read, 2 = write
   27.68 - */
   27.69 -
   27.70 -int pdb_change_values(domid_t domain, u_char *buffer, unsigned long addr,
   27.71 -                      int length, int rw)
   27.72 -{
   27.73 -    struct task_struct *p;
   27.74 -    l2_pgentry_t* l2_table = NULL;
   27.75 -    l1_pgentry_t* l1_table = NULL;
   27.76 -    u_char *page;
   27.77 -    int bytes = 0;
   27.78 -
   27.79 -    p = find_domain_by_id(domain);
   27.80 -
   27.81 -    if ((addr >> PAGE_SHIFT) == ((addr + length - 1) >> PAGE_SHIFT))
   27.82 -    {
   27.83 -        if (p->mm.shadow_mode )
   27.84 -          l2_table = map_domain_mem(pagetable_val(p->mm.shadow_table));
   27.85 -	else
   27.86 -          l2_table = map_domain_mem(pagetable_val(p->mm.pagetable));
   27.87 -
   27.88 -	l2_table += l2_table_offset(addr);
   27.89 -	if (!(l2_pgentry_val(*l2_table) & _PAGE_PRESENT)) 
   27.90 -	{
   27.91 -	    printk ("L2:0x%p (0x%lx) \n", l2_table, l2_pgentry_val(*l2_table));
   27.92 -	    goto exit2;
   27.93 -	}
   27.94 -
   27.95 -	if (l2_pgentry_val(*l2_table) & _PAGE_PSE)
   27.96 -	{
   27.97 -#define PSE_PAGE_SHIFT           L2_PAGETABLE_SHIFT
   27.98 -#define PSE_PAGE_SIZE	         (1UL << PSE_PAGE_SHIFT)
   27.99 -#define PSE_PAGE_MASK	         (~(PSE_PAGE_SIZE-1))
  27.100 -
  27.101 -#define L1_PAGE_BITS ( (ENTRIES_PER_L1_PAGETABLE - 1) << L1_PAGETABLE_SHIFT )
  27.102 -
  27.103 -#define pse_pgentry_to_phys(_x) (l2_pgentry_val(_x) & PSE_PAGE_MASK)
  27.104 -
  27.105 -	    page = map_domain_mem(pse_pgentry_to_phys(*l2_table) +/* 10 bits */
  27.106 -				  (addr & L1_PAGE_BITS));         /* 10 bits */
  27.107 -	    page += addr & (PAGE_SIZE - 1);                       /* 12 bits */
  27.108 -	}
  27.109 -	else
  27.110 -	{
  27.111 -	    l1_table = map_domain_mem(l2_pgentry_to_phys(*l2_table));
  27.112 -	    l1_table += l1_table_offset(addr); 
  27.113 -	    if (!(l1_pgentry_val(*l1_table) & _PAGE_PRESENT))
  27.114 -	    {
  27.115 -	        printk ("L2:0x%p (0x%lx) L1:0x%p (0x%lx)\n", 
  27.116 -			l2_table, l2_pgentry_val(*l2_table),
  27.117 -			l1_table, l1_pgentry_val(*l1_table));
  27.118 -		goto exit1;
  27.119 -	    }
  27.120 -
  27.121 -	    page = map_domain_mem(l1_pgentry_to_phys(*l1_table));
  27.122 -	    page += addr & (PAGE_SIZE - 1);
  27.123 -	}
  27.124 -
  27.125 -	switch (rw)
  27.126 -	{
  27.127 -	case 1:                                                      /* read */
  27.128 -	    memcpy (buffer, page, length);
  27.129 -	    bytes = length;
  27.130 -	    break;
  27.131 -	case 2:                                                     /* write */
  27.132 -	    hex2mem (buffer, page, length);
  27.133 -	    bytes = length;
  27.134 -	    break;
  27.135 -	default:                                                  /* unknown */
  27.136 -	    printk ("error: unknown RW flag: %d\n", rw);
  27.137 -	    return 0;
  27.138 -	}
  27.139 -
  27.140 -	unmap_domain_mem((void *)page); 
  27.141 -    exit1:
  27.142 -	if (l1_table != NULL)
  27.143 -	    unmap_domain_mem((void *)l1_table);
  27.144 -    exit2:
  27.145 -	unmap_domain_mem((void *)l2_table);
  27.146 -    }
  27.147 -    else
  27.148 -    {
  27.149 -        /* read spans pages. need to recurse */
  27.150 -        printk ("pdb memory SPAN! addr:0x%lx l: %x\n", addr, length);
  27.151 -    }
  27.152 -
  27.153 -    put_task_struct(p);
  27.154 -    return bytes;
  27.155 -}
  27.156 -
  27.157 +extern u_char pdb_linux_get_value(int pid, unsigned long cr3, 
  27.158 +				  unsigned long addr);
  27.159  
  27.160  /*
  27.161   * interactively call pervasive debugger from a privileged domain
  27.162 @@ -210,6 +69,14 @@ void pdb_do_debug (dom0_op_t *op)
  27.163          {
  27.164              int loop;
  27.165              u_char x;
  27.166 +	    unsigned long cr3;
  27.167 +	    struct task_struct *p;
  27.168 +
  27.169 +	    p = find_domain_by_id(op->u.debug.domain);
  27.170 +	    if (p->mm.shadow_mode)
  27.171 +	      cr3 = pagetable_val(p->mm.shadow_table);
  27.172 +	    else
  27.173 +	      cr3 = pagetable_val(p->mm.pagetable);
  27.174  
  27.175              for (loop = 0; loop < op->u.debug.in2; loop++)         /* length */
  27.176              { 
  27.177 @@ -217,15 +84,14 @@ void pdb_do_debug (dom0_op_t *op)
  27.178                  {
  27.179                      printk ("\n%08x ", op->u.debug.in1 + loop);
  27.180                  }
  27.181 -                x = pdb_linux_get_value(op->u.debug.domain,        /* domain */
  27.182 -					op->u.debug.in3,             /* pid */
  27.183 -					op->u.debug.in1 + loop);     /* addr */
  27.184 +                x = pdb_linux_get_value(op->u.debug.in3,
  27.185 +					cr3, op->u.debug.in1 + loop);
  27.186                  printk (" %02x", x);
  27.187              }
  27.188              printk ("\n");
  27.189 +	    put_task_struct(p);
  27.190              break;
  27.191          }
  27.192 -
  27.193          case 's' :
  27.194  	{
  27.195  	    struct task_struct * p = find_domain_by_id(op->u.debug.domain);
  27.196 @@ -233,7 +99,9 @@ void pdb_do_debug (dom0_op_t *op)
  27.197  	    if (p != NULL)
  27.198  	    {
  27.199  	        if (p->state != TASK_STOPPED)
  27.200 -                    send_guest_virq(p, VIRQ_STOP);
  27.201 +		{
  27.202 +		  send_guest_virq(p, VIRQ_STOP);
  27.203 +		}
  27.204  		put_task_struct(p);
  27.205  	    }
  27.206  	    else
    28.1 --- a/xen/common/dom0_ops.c	Fri Apr 02 13:58:27 2004 +0000
    28.2 +++ b/xen/common/dom0_ops.c	Fri Apr 02 14:03:03 2004 +0000
    28.3 @@ -209,12 +209,14 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
    28.4      case DOM0_SCHEDCTL:
    28.5      {
    28.6          ret = sched_ctl(&op->u.schedctl);
    28.7 +        copy_to_user(u_dom0_op, op, sizeof(*op));
    28.8      }
    28.9      break;
   28.10  
   28.11      case DOM0_ADJUSTDOM:
   28.12      {
   28.13          ret = sched_adjdom(&op->u.adjustdom);
   28.14 +        copy_to_user(u_dom0_op, op, sizeof(*op));
   28.15      }
   28.16      break;
   28.17  
   28.18 @@ -284,7 +286,6 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
   28.19              if ( (p->state == TASK_STOPPED) || (p->state == TASK_DYING) )
   28.20                  op->u.getdomaininfo.state = DOMSTATE_STOPPED;
   28.21              op->u.getdomaininfo.hyp_events  = p->hyp_events;
   28.22 -//            op->u.getdomaininfo.mcu_advance = p->mcu_advance;
   28.23              op->u.getdomaininfo.tot_pages   = p->tot_pages;
   28.24              op->u.getdomaininfo.cpu_time    = p->cpu_time;
   28.25              op->u.getdomaininfo.shared_info_frame = 
   28.26 @@ -507,6 +508,14 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
   28.27          }
   28.28      }
   28.29      break;
   28.30 +
   28.31 +    case DOM0_SCHED_ID:
   28.32 +    {
   28.33 +        op->u.sched_id.sched_id = sched_id();
   28.34 +
   28.35 +        copy_to_user(u_dom0_op, op, sizeof(*op));
   28.36 +        ret = 0;        
   28.37 +    }
   28.38       
   28.39      default:
   28.40          ret = -ENOSYS;
    29.1 --- a/xen/common/domain.c	Fri Apr 02 13:58:27 2004 +0000
    29.2 +++ b/xen/common/domain.c	Fri Apr 02 14:03:03 2004 +0000
    29.3 @@ -20,6 +20,13 @@
    29.4  #include <xen/vbd.h>
    29.5  #include <asm/i387.h>
    29.6  
    29.7 +#ifdef CONFIG_X86_64BITMODE
    29.8 +#define ELFSIZE 64
    29.9 +#else
   29.10 +#define ELFSIZE 32
   29.11 +#endif
   29.12 +#include <xen/elf.h>
   29.13 +
   29.14  #if !defined(CONFIG_X86_64BITMODE)
   29.15  /* No ring-3 access in initial page tables. */
   29.16  #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
   29.17 @@ -31,6 +38,9 @@
   29.18  #define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
   29.19  #define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
   29.20  
   29.21 +#define round_pgup(_p)    (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
   29.22 +#define round_pgdown(_p)  ((_p)&PAGE_MASK)
   29.23 +
   29.24  /* Both these structures are protected by the tasklist_lock. */
   29.25  rwlock_t tasklist_lock __cacheline_aligned = RW_LOCK_UNLOCKED;
   29.26  struct task_struct *task_hash[TASK_HASH_SIZE];
   29.27 @@ -459,7 +469,7 @@ unsigned int alloc_new_dom_mem(struct ta
   29.28                                     (PAGE_SHIFT-10))) )
   29.29          {
   29.30              free_all_dom_mem(p);
   29.31 -            return -1;
   29.32 +            return -ENOMEM;
   29.33          }
   29.34      }
   29.35  
   29.36 @@ -555,39 +565,166 @@ int final_setup_guestos(struct task_stru
   29.37      return 0;
   29.38  }
   29.39  
   29.40 -static unsigned long alloc_page_from_domain(unsigned long * cur_addr, 
   29.41 -    unsigned long * index)
   29.42 +static inline int is_loadable_phdr(Elf_Phdr *phdr)
   29.43  {
   29.44 -    unsigned long ret = *cur_addr;
   29.45 -    struct list_head *ent = frame_table[ret >> PAGE_SHIFT].list.prev;
   29.46 -    *cur_addr = list_entry(ent, struct pfn_info, list) - frame_table;
   29.47 -    *cur_addr <<= PAGE_SHIFT;
   29.48 -    (*index)--;    
   29.49 -    return ret;
   29.50 +    return ((phdr->p_type == PT_LOAD) &&
   29.51 +            ((phdr->p_flags & (PF_W|PF_X)) != 0));
   29.52  }
   29.53  
   29.54 -/*
   29.55 - * setup_guestos is used for building dom0 solely. other domains are built in
   29.56 - * userspace dom0 and final setup is being done by final_setup_guestos.
   29.57 - */
   29.58 -int setup_guestos(struct task_struct *p, dom0_createdomain_t *params, 
   29.59 -                  unsigned int num_vifs,
   29.60 -                  char *phy_data_start, unsigned long data_len, 
   29.61 -		  char *cmdline, unsigned long initrd_len)
   29.62 +static int readelfimage_base_and_size(char *elfbase, 
   29.63 +                                      unsigned long elfsize,
   29.64 +                                      unsigned long *pkernstart,
   29.65 +                                      unsigned long *pkernend,
   29.66 +                                      unsigned long *pkernentry)
   29.67  {
   29.68 -    struct list_head *list_ent;
   29.69 -    char *src, *vsrc, *dst, *data_start;
   29.70 -    int i;
   29.71 +    Elf_Ehdr *ehdr = (Elf_Ehdr *)elfbase;
   29.72 +    Elf_Phdr *phdr;
   29.73 +    Elf_Shdr *shdr;
   29.74 +    unsigned long kernstart = ~0UL, kernend=0UL;
   29.75 +    char *shstrtab, *guestinfo;
   29.76 +    int h;
   29.77 +
   29.78 +    if ( !IS_ELF(*ehdr) )
   29.79 +    {
   29.80 +        printk("Kernel image does not have an ELF header.\n");
   29.81 +        return -EINVAL;
   29.82 +    }
   29.83 +
   29.84 +    if ( (ehdr->e_phoff + (ehdr->e_phnum * ehdr->e_phentsize)) > elfsize )
   29.85 +    {
   29.86 +	printk("ELF program headers extend beyond end of image.\n");
   29.87 +        return -EINVAL;
   29.88 +    }
   29.89 +
   29.90 +    if ( (ehdr->e_shoff + (ehdr->e_shnum * ehdr->e_shentsize)) > elfsize )
   29.91 +    {
   29.92 +	printk("ELF section headers extend beyond end of image.\n");
   29.93 +        return -EINVAL;
   29.94 +    }
   29.95 +
   29.96 +    /* Find the section-header strings table. */
   29.97 +    if ( ehdr->e_shstrndx == SHN_UNDEF )
   29.98 +    {
   29.99 +        printk("ELF image has no section-header strings table (shstrtab).\n");
  29.100 +        return -EINVAL;
  29.101 +    }
  29.102 +    shdr = (Elf_Shdr *)(elfbase + ehdr->e_shoff + 
  29.103 +                        (ehdr->e_shstrndx*ehdr->e_shentsize));
  29.104 +    shstrtab = elfbase + shdr->sh_offset;
  29.105 +    
  29.106 +    /* Find the special '__xen_guest' section and check its contents. */
  29.107 +    for ( h = 0; h < ehdr->e_shnum; h++ )
  29.108 +    {
  29.109 +        shdr = (Elf_Shdr *)(elfbase + ehdr->e_shoff + (h*ehdr->e_shentsize));
  29.110 +        if ( strcmp(&shstrtab[shdr->sh_name], "__xen_guest") != 0 )
  29.111 +            continue;
  29.112 +        guestinfo = elfbase + shdr->sh_offset;
  29.113 +        printk("Xen-ELF header found: '%s'\n", guestinfo);
  29.114 +        if ( (strstr(guestinfo, "GUEST_OS=linux") == NULL) ||
  29.115 +             (strstr(guestinfo, "XEN_VER=1.3") == NULL) )
  29.116 +        {
  29.117 +            printk("ERROR: Xen will only load Linux built for Xen v1.3\n");
  29.118 +            return -EINVAL;
  29.119 +        }
  29.120 +        break;
  29.121 +    }
  29.122 +    if ( h == ehdr->e_shnum )
  29.123 +    {
  29.124 +        printk("Not a Xen-ELF image: '__xen_guest' section not found.\n");
  29.125 +        return -EINVAL;
  29.126 +    }
  29.127 +
  29.128 +    for ( h = 0; h < ehdr->e_phnum; h++ ) 
  29.129 +    {
  29.130 +        phdr = (Elf_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize));
  29.131 +        if ( !is_loadable_phdr(phdr) )
  29.132 +            continue;
  29.133 +        if ( phdr->p_vaddr < kernstart )
  29.134 +            kernstart = phdr->p_vaddr;
  29.135 +        if ( (phdr->p_vaddr + phdr->p_memsz) > kernend )
  29.136 +            kernend = phdr->p_vaddr + phdr->p_memsz;
  29.137 +    }
  29.138 +
  29.139 +    if ( (kernstart > kernend) || 
  29.140 +         (ehdr->e_entry < kernstart) || 
  29.141 +         (ehdr->e_entry > kernend) )
  29.142 +    {
  29.143 +        printk("Malformed ELF image.\n");
  29.144 +        return -EINVAL;
  29.145 +    }
  29.146 +
  29.147 +    *pkernstart = kernstart;
  29.148 +    *pkernend   = kernend;
  29.149 +    *pkernentry = ehdr->e_entry;
  29.150 +
  29.151 +    return 0;
  29.152 +}
  29.153 +
  29.154 +static int loadelfimage(char *elfbase)
  29.155 +{
  29.156 +    Elf_Ehdr *ehdr = (Elf_Ehdr *)elfbase;
  29.157 +    Elf_Phdr *phdr;
  29.158 +    int h;
  29.159 +  
  29.160 +    for ( h = 0; h < ehdr->e_phnum; h++ ) 
  29.161 +    {
  29.162 +        phdr = (Elf_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize));
  29.163 +        if ( !is_loadable_phdr(phdr) )
  29.164 +	    continue;
  29.165 +        if ( phdr->p_filesz != 0 )
  29.166 +            memcpy((char *)phdr->p_vaddr, elfbase + phdr->p_offset, 
  29.167 +                   phdr->p_filesz);
  29.168 +        if ( phdr->p_memsz > phdr->p_filesz )
  29.169 +            memset((char *)phdr->p_vaddr + phdr->p_filesz, 0, 
  29.170 +                   phdr->p_memsz - phdr->p_filesz);
  29.171 +    }
  29.172 +
  29.173 +    return 0;
  29.174 +}
  29.175 +
  29.176 +int construct_dom0(struct task_struct *p, 
  29.177 +                   unsigned long alloc_start,
  29.178 +                   unsigned long alloc_end,
  29.179 +                   unsigned int num_vifs,
  29.180 +                   char *image_start, unsigned long image_len, 
  29.181 +                   char *initrd_start, unsigned long initrd_len,
  29.182 +                   char *cmdline)
  29.183 +{
  29.184 +    char *dst;
  29.185 +    int i, rc;
  29.186      domid_t dom = p->domain;
  29.187 -    unsigned long phys_l1tab, phys_l2tab;
  29.188 -    unsigned long cur_address, alloc_address;
  29.189 -    unsigned long virt_load_address, virt_stack_address;
  29.190 -    start_info_t  *virt_startinfo_address;
  29.191 +    unsigned long pfn, mfn;
  29.192 +    unsigned long nr_pages = (alloc_end - alloc_start) >> PAGE_SHIFT;
  29.193 +    unsigned long nr_pt_pages;
  29.194      unsigned long count;
  29.195 -    unsigned long alloc_index;
  29.196      l2_pgentry_t *l2tab, *l2start;
  29.197      l1_pgentry_t *l1tab = NULL, *l1start = NULL;
  29.198      struct pfn_info *page = NULL;
  29.199 +    start_info_t *si;
  29.200 +
  29.201 +    /*
  29.202 +     * This fully describes the memory layout of the initial domain. All 
  29.203 +     * *_start address are page-aligned, except v_start (and v_end) which are 
  29.204 +     * superpage-aligned.
  29.205 +     */
  29.206 +    unsigned long v_start;
  29.207 +    unsigned long vkern_start;
  29.208 +    unsigned long vkern_entry;
  29.209 +    unsigned long vkern_end;
  29.210 +    unsigned long vinitrd_start;
  29.211 +    unsigned long vinitrd_end;
  29.212 +    unsigned long vphysmap_start;
  29.213 +    unsigned long vphysmap_end;
  29.214 +    unsigned long vstartinfo_start;
  29.215 +    unsigned long vstartinfo_end;
  29.216 +    unsigned long vstack_start;
  29.217 +    unsigned long vstack_end;
  29.218 +    unsigned long vpt_start;
  29.219 +    unsigned long vpt_end;
  29.220 +    unsigned long v_end;
  29.221 +
  29.222 +    /* Machine address of next candidate page-table page. */
  29.223 +    unsigned long mpt_alloc;
  29.224  
  29.225      extern void physdev_init_dom0(struct task_struct *);
  29.226      extern void ide_probe_devices(xen_disk_info_t *);
  29.227 @@ -597,67 +734,114 @@ int setup_guestos(struct task_struct *p,
  29.228      xen_disk_t *xd;
  29.229  
  29.230      /* Sanity! */
  29.231 -    if ( p->domain != 0 ) BUG();
  29.232 -    if ( test_bit(PF_CONSTRUCTED, &p->flags) ) BUG();
  29.233 +    if ( p->domain != 0 ) 
  29.234 +        BUG();
  29.235 +    if ( test_bit(PF_CONSTRUCTED, &p->flags) ) 
  29.236 +        BUG();
  29.237 +
  29.238 +    printk("*** LOADING DOMAIN 0 ***\n");
  29.239  
  29.240      /*
  29.241       * This is all a bit grim. We've moved the modules to the "safe" physical 
  29.242       * memory region above MAP_DIRECTMAP_ADDRESS (48MB). Later in this 
  29.243 -     * routeine, we're going to copy it down into the region that's actually 
  29.244 +     * routine we're going to copy it down into the region that's actually 
  29.245       * been allocated to domain 0. This is highly likely to be overlapping, so 
  29.246       * we use a forward copy.
  29.247       * 
  29.248       * MAP_DIRECTMAP_ADDRESS should be safe. The worst case is a machine with 
  29.249       * 4GB and lots of network/disk cards that allocate loads of buffers. 
  29.250 -     * We'll have to revist this if we ever support PAE (64GB).
  29.251 +     * We'll have to revisit this if we ever support PAE (64GB).
  29.252       */
  29.253  
  29.254 -    data_start = map_domain_mem((unsigned long)phy_data_start);
  29.255 +    rc = readelfimage_base_and_size(image_start, image_len,
  29.256 +                                    &vkern_start, &vkern_end, &vkern_entry);
  29.257 +    if ( rc != 0 )
  29.258 +        return rc;
  29.259  
  29.260 -    if ( strncmp(data_start, "XenGuest", 8) )
  29.261 +    /*
  29.262 +     * Why do we need this? The number of page-table frames depends on the 
  29.263 +     * size of the bootstrap address space. But the size of the address space 
  29.264 +     * depends on the number of page-table frames (since each one is mapped 
  29.265 +     * read-only). We have a pair of simultaneous equations in two unknowns, 
  29.266 +     * which we solve by exhaustive search.
  29.267 +     */
  29.268 +    for ( nr_pt_pages = 2; ; nr_pt_pages++ )
  29.269      {
  29.270 -        printk("DOM%llu: Invalid guest OS image - bad signature\n", dom);
  29.271 -        unmap_domain_mem(data_start);
  29.272 -        return -1;
  29.273 +        v_start          = vkern_start & ~((1<<22)-1);
  29.274 +        vinitrd_start    = round_pgup(vkern_end);
  29.275 +        vinitrd_end      = vinitrd_start + initrd_len;
  29.276 +        vphysmap_start   = round_pgup(vinitrd_end);
  29.277 +        vphysmap_end     = vphysmap_start + (nr_pages * sizeof(unsigned long));
  29.278 +        vpt_start        = round_pgup(vphysmap_end);
  29.279 +        vpt_end          = vpt_start + (nr_pt_pages * PAGE_SIZE);
  29.280 +        vstartinfo_start = vpt_end;
  29.281 +        vstartinfo_end   = vstartinfo_start + PAGE_SIZE;
  29.282 +        vstack_start     = vstartinfo_end;
  29.283 +        vstack_end       = vstack_start + PAGE_SIZE;
  29.284 +        v_end            = (vstack_end + (1<<22)-1) & ~((1<<22)-1);
  29.285 +        if ( (v_end - vstack_end) < (512 << 10) )
  29.286 +            v_end += 1 << 22; /* Add extra 4MB to get >= 512kB padding. */
  29.287 +        if ( (((v_end - v_start) >> L2_PAGETABLE_SHIFT) + 1) <= nr_pt_pages )
  29.288 +            break;
  29.289      }
  29.290  
  29.291 -    virt_load_address = *(unsigned long *)(data_start + 8);
  29.292 -    if ( (virt_load_address & (PAGE_SIZE-1)) )
  29.293 +    if ( (v_end - v_start) > (nr_pages * PAGE_SIZE) )
  29.294      {
  29.295 -        printk("DOM%llu: Guest OS load address not page-aligned (%08lx)\n",
  29.296 -               dom, virt_load_address);
  29.297 -        unmap_domain_mem(data_start);
  29.298 -        return -1;
  29.299 -    }
  29.300 -
  29.301 -    if ( alloc_new_dom_mem(p, params->memory_kb) )
  29.302 -    {
  29.303 -        printk("DOM%llu: Not enough memory --- reduce dom0_mem ??\n", dom);
  29.304 -        unmap_domain_mem(data_start);
  29.305 +        printk("Initial guest OS requires too much space\n"
  29.306 +               "(%luMB is greater than %luMB limit)\n",
  29.307 +               (v_end-v_start)>>20, (nr_pages<<PAGE_SHIFT)>>20);
  29.308          return -ENOMEM;
  29.309      }
  29.310  
  29.311 -    alloc_address = list_entry(p->page_list.prev, struct pfn_info, list) -
  29.312 -        frame_table;
  29.313 -    alloc_address <<= PAGE_SHIFT;
  29.314 -    alloc_index = p->tot_pages;
  29.315 +    printk("PHYSICAL MEMORY ARRANGEMENT:\n"
  29.316 +           " Kernel image:  %p->%p\n"
  29.317 +           " Initrd image:  %p->%p\n"
  29.318 +           " Dom0 alloc.:   %08lx->%08lx\n",
  29.319 +           image_start, image_start + image_len,
  29.320 +           initrd_start, initrd_start + initrd_len,
  29.321 +           alloc_start, alloc_end);
  29.322 +    printk("VIRTUAL MEMORY ARRANGEMENT:\n"
  29.323 +           " Loaded kernel: %08lx->%08lx\n"
  29.324 +           " Init. ramdisk: %08lx->%08lx\n"
  29.325 +           " Phys-Mach map: %08lx->%08lx\n"
  29.326 +           " Page tables:   %08lx->%08lx\n"
  29.327 +           " Start info:    %08lx->%08lx\n"
  29.328 +           " Boot stack:    %08lx->%08lx\n"
  29.329 +           " TOTAL:         %08lx->%08lx\n",
  29.330 +           vkern_start, vkern_end, 
  29.331 +           vinitrd_start, vinitrd_end,
  29.332 +           vphysmap_start, vphysmap_end,
  29.333 +           vpt_start, vpt_end,
  29.334 +           vstartinfo_start, vstartinfo_end,
  29.335 +           vstack_start, vstack_end,
  29.336 +           v_start, v_end);
  29.337 +    printk(" ENTRY ADDRESS: %08lx\n", vkern_entry);
  29.338  
  29.339 -    if ( data_len > (params->memory_kb << 9) )
  29.340 +    /*
  29.341 +     * Protect the lowest 1GB of memory. We use a temporary mapping there
  29.342 +     * from which we copy the kernel and ramdisk images.
  29.343 +     */
  29.344 +    if ( v_start < (1<<30) )
  29.345      {
  29.346 -        printk("DOM%llu: Guest OS image is too large\n"
  29.347 -               "       (%luMB is greater than %uMB limit for a\n"
  29.348 -               "        %uMB address space)\n",
  29.349 -               dom, data_len>>20,
  29.350 -               (params->memory_kb)>>11,
  29.351 -               (params->memory_kb)>>10);
  29.352 -        unmap_domain_mem(data_start);
  29.353 -        free_all_dom_mem(p);
  29.354 -        return -1;
  29.355 +        printk("Initial loading isn't allowed to lowest 1GB of memory.\n");
  29.356 +        return -EINVAL;
  29.357      }
  29.358  
  29.359 -    printk("DOM%llu: Guest OS virtual load address is %08lx\n", dom,
  29.360 -           virt_load_address);
  29.361 -    
  29.362 +    /* Construct a frame-allocation list for the initial domain. */
  29.363 +    for ( pfn = (alloc_start>>PAGE_SHIFT); 
  29.364 +          pfn < (alloc_end>>PAGE_SHIFT); 
  29.365 +          pfn++ )
  29.366 +    {
  29.367 +        page = &frame_table[pfn];
  29.368 +        page->u.domain        = p;
  29.369 +        page->type_and_flags  = 0;
  29.370 +        page->count_and_flags = PGC_allocated | 1;
  29.371 +        list_add_tail(&page->list, &p->page_list);
  29.372 +        p->tot_pages++;
  29.373 +    }
  29.374 +
  29.375 +    mpt_alloc = (vpt_start - v_start) + alloc_start;
  29.376 +
  29.377      SET_GDT_ENTRIES(p, DEFAULT_GDT_ENTRIES);
  29.378      SET_GDT_ADDRESS(p, DEFAULT_GDT_ADDRESS);
  29.379  
  29.380 @@ -671,157 +855,140 @@ int setup_guestos(struct task_struct *p,
  29.381      for ( i = 0; i < 256; i++ ) 
  29.382          p->thread.traps[i].cs = FLAT_GUESTOS_CS;
  29.383  
  29.384 -    /*
  29.385 -     * WARNING: The new domain must have its 'processor' field
  29.386 -     * filled in by now !!
  29.387 -     */
  29.388 -    phys_l2tab = alloc_page_from_domain(&alloc_address, &alloc_index);
  29.389 -    l2start = l2tab = map_domain_mem(phys_l2tab);
  29.390 +    /* WARNING: The new domain must have its 'processor' field filled in! */
  29.391 +    l2start = l2tab = (l2_pgentry_t *)mpt_alloc; mpt_alloc += PAGE_SIZE;
  29.392      memcpy(l2tab, &idle_pg_table[0], PAGE_SIZE);
  29.393 +    l2tab[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
  29.394 +        mk_l2_pgentry((unsigned long)l2start | __PAGE_HYPERVISOR);
  29.395      l2tab[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] =
  29.396          mk_l2_pgentry(__pa(p->mm.perdomain_pt) | __PAGE_HYPERVISOR);
  29.397 -    l2tab[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
  29.398 -        mk_l2_pgentry(phys_l2tab | __PAGE_HYPERVISOR);
  29.399 -    memset(l2tab, 0, DOMAIN_ENTRIES_PER_L2_PAGETABLE*sizeof(l2_pgentry_t));
  29.400 -    p->mm.pagetable = mk_pagetable(phys_l2tab);
  29.401 +    p->mm.pagetable = mk_pagetable((unsigned long)l2start);
  29.402  
  29.403 -    l2tab += l2_table_offset(virt_load_address);
  29.404 -    cur_address = list_entry(p->page_list.next, struct pfn_info, list) -
  29.405 -        frame_table;
  29.406 -    cur_address <<= PAGE_SHIFT;
  29.407 -    for ( count = 0; count < p->tot_pages; count++ )
  29.408 +    l2tab += l2_table_offset(v_start);
  29.409 +    mfn = alloc_start >> PAGE_SHIFT;
  29.410 +    for ( count = 0; count < ((v_end-v_start)>>PAGE_SHIFT); count++ )
  29.411      {
  29.412          if ( !((unsigned long)l1tab & (PAGE_SIZE-1)) )
  29.413          {
  29.414 -            if ( l1tab != NULL ) unmap_domain_mem(l1start);
  29.415 -            phys_l1tab = alloc_page_from_domain(&alloc_address, &alloc_index);
  29.416 -            *l2tab++ = mk_l2_pgentry(phys_l1tab|L2_PROT);
  29.417 -            l1start = l1tab = map_domain_mem(phys_l1tab);
  29.418 +            l1start = l1tab = (l1_pgentry_t *)mpt_alloc; 
  29.419 +            mpt_alloc += PAGE_SIZE;
  29.420 +            *l2tab++ = mk_l2_pgentry((unsigned long)l1start | L2_PROT);
  29.421              clear_page(l1tab);
  29.422 -            l1tab += l1_table_offset(
  29.423 -                virt_load_address + (count << PAGE_SHIFT));
  29.424          }
  29.425 -        *l1tab++ = mk_l1_pgentry(cur_address|L1_PROT);
  29.426 +        *l1tab++ = mk_l1_pgentry((mfn << PAGE_SHIFT) | L1_PROT);
  29.427          
  29.428 -        page = &frame_table[cur_address >> PAGE_SHIFT];
  29.429 +        page = &frame_table[mfn];
  29.430          set_bit(_PGC_tlb_flush_on_type_change, &page->count_and_flags);
  29.431          if ( !get_page_and_type(page, p, PGT_writeable_page) )
  29.432              BUG();
  29.433 -        /* Set up the MPT entry. */
  29.434 -        machine_to_phys_mapping[cur_address >> PAGE_SHIFT] = count;
  29.435  
  29.436 -        list_ent = frame_table[cur_address >> PAGE_SHIFT].list.next;
  29.437 -        cur_address = list_entry(list_ent, struct pfn_info, list) -
  29.438 -            frame_table;
  29.439 -        cur_address <<= PAGE_SHIFT;
  29.440 +        mfn++;
  29.441      }
  29.442 -    unmap_domain_mem(l1start);
  29.443  
  29.444 -    /* pages that are part of page tables must be read only */
  29.445 -    l2tab = l2start + l2_table_offset(virt_load_address + 
  29.446 -        (alloc_index << PAGE_SHIFT));
  29.447 -    l1start = l1tab = map_domain_mem(l2_pgentry_to_phys(*l2tab));
  29.448 -    l1tab += l1_table_offset(virt_load_address + (alloc_index << PAGE_SHIFT));
  29.449 +    /* Pages that are part of page tables must be read only. */
  29.450 +    l2tab = l2start + l2_table_offset(vpt_start);
  29.451 +    l1start = l1tab = (l1_pgentry_t *)l2_pgentry_to_phys(*l2tab);
  29.452 +    l1tab += l1_table_offset(vpt_start);
  29.453      l2tab++;
  29.454 -    for ( count = alloc_index; count < p->tot_pages; count++ ) 
  29.455 +    for ( count = 0; count < nr_pt_pages; count++ ) 
  29.456      {
  29.457          *l1tab = mk_l1_pgentry(l1_pgentry_val(*l1tab) & ~_PAGE_RW);
  29.458 -        page = frame_table + l1_pgentry_to_pagenr(*l1tab);
  29.459 -        page->type_and_flags &= ~PGT_type_mask;
  29.460 -        page->type_and_flags |= PGT_l1_page_table;
  29.461 -        get_page(page, p); /* an extra ref because of readable mapping */
  29.462 +        page = &frame_table[l1_pgentry_to_pagenr(*l1tab)];
  29.463 +        if ( count == 0 )
  29.464 +        {
  29.465 +            page->type_and_flags &= ~PGT_type_mask;
  29.466 +            page->type_and_flags |= PGT_l2_page_table;
  29.467 +            get_page(page, p); /* an extra ref because of readable mapping */
  29.468 +            /* Get another ref to L2 page so that it can be pinned. */
  29.469 +            if ( !get_page_and_type(page, p, PGT_l2_page_table) )
  29.470 +                BUG();
  29.471 +            set_bit(_PGC_guest_pinned, &page->count_and_flags);
  29.472 +        }
  29.473 +        else
  29.474 +        {
  29.475 +            page->type_and_flags &= ~PGT_type_mask;
  29.476 +            page->type_and_flags |= PGT_l1_page_table;
  29.477 +            get_page(page, p); /* an extra ref because of readable mapping */
  29.478 +        }
  29.479          l1tab++;
  29.480          if( !((unsigned long)l1tab & (PAGE_SIZE - 1)) )
  29.481 -        {
  29.482 -            unmap_domain_mem(l1start);
  29.483 -            l1start = l1tab = map_domain_mem(l2_pgentry_to_phys(*l2tab));
  29.484 -            l2tab++;
  29.485 -        }
  29.486 +            l1start = l1tab = (l1_pgentry_t *)l2_pgentry_to_phys(*l2tab);
  29.487      }
  29.488 -    /* Rewrite last L1 page to be a L2 page. */
  29.489 -    page->type_and_flags &= ~PGT_type_mask;
  29.490 -    page->type_and_flags |= PGT_l2_page_table;
  29.491 -    /* Get another ref to L2 page so that it can be pinned. */
  29.492 -    if ( !get_page_and_type(page, p, PGT_l2_page_table) )
  29.493 -        BUG();
  29.494 -    set_bit(_PGC_guest_pinned, &page->count_and_flags);
  29.495 -    unmap_domain_mem(l1start);
  29.496  
  29.497 -    /* Set up shared info area. */
  29.498 +    /* Set up shared-info area. */
  29.499      update_dom_time(p->shared_info);
  29.500      p->shared_info->domain_time = 0;
  29.501 -    p->shared_info->evtchn_upcall_mask = ~0UL; /* mask all upcalls */
  29.502 -
  29.503 -    virt_startinfo_address = (start_info_t *)
  29.504 -        (virt_load_address + ((alloc_index - 1) << PAGE_SHIFT));
  29.505 -    virt_stack_address  = (unsigned long)virt_startinfo_address;
  29.506 -    
  29.507 -    unmap_domain_mem(l2start);
  29.508 +    /* Mask all upcalls... */
  29.509 +    for ( i = 0; i < MAX_VIRT_CPUS; i++ )
  29.510 +        p->shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
  29.511  
  29.512      /* Install the new page tables. */
  29.513      __cli();
  29.514      write_cr3_counted(pagetable_val(p->mm.pagetable));
  29.515  
  29.516 -    /* Copy the guest OS image. */    
  29.517 -    src  = (char *)(phy_data_start + 12);
  29.518 -    vsrc = (char *)(data_start + 12); /* data_start invalid after first page*/
  29.519 -    dst  = (char *)virt_load_address;
  29.520 -    while ( src < (phy_data_start+data_len) )
  29.521 -    {
  29.522 -	*dst++ = *vsrc++;
  29.523 -	src++;
  29.524 -	if ( (((unsigned long)src) & (PAGE_SIZE-1)) == 0 )
  29.525 -        {
  29.526 -	    unmap_domain_mem(vsrc-1);
  29.527 -	    vsrc = map_domain_mem((unsigned long)src);
  29.528 -        }
  29.529 -    }
  29.530 -    unmap_domain_mem(vsrc);
  29.531 +    /* Copy the OS image. */
  29.532 +    (void)loadelfimage(image_start);
  29.533 +
  29.534 +    /* Copy the initial ramdisk. */
  29.535 +    if ( initrd_len != 0 )
  29.536 +        memcpy((void *)vinitrd_start, initrd_start, initrd_len);
  29.537      
  29.538      /* Set up start info area. */
  29.539 -    memset(virt_startinfo_address, 0, sizeof(*virt_startinfo_address));
  29.540 -    virt_startinfo_address->nr_pages = p->tot_pages;
  29.541 -    virt_startinfo_address->shared_info = virt_to_phys(p->shared_info);
  29.542 -    virt_startinfo_address->pt_base = virt_load_address + 
  29.543 -        ((p->tot_pages - 1) << PAGE_SHIFT); 
  29.544 +    si = (start_info_t *)vstartinfo_start;
  29.545 +    memset(si, 0, PAGE_SIZE);
  29.546 +    si->nr_pages     = p->tot_pages;
  29.547 +    si->shared_info  = virt_to_phys(p->shared_info);
  29.548 +    si->flags        = SIF_PRIVILEGED | SIF_INITDOMAIN;
  29.549 +    si->pt_base      = vpt_start;
  29.550 +    si->nr_pt_frames = nr_pt_pages;
  29.551 +    si->mfn_list     = vphysmap_start;
  29.552  
  29.553 -    virt_startinfo_address->flags  = 0;
  29.554 -    if ( IS_PRIV(p) )
  29.555 -        virt_startinfo_address->flags |= SIF_PRIVILEGED;
  29.556 -    if ( p->domain == 0 )
  29.557 -        virt_startinfo_address->flags |= SIF_INITDOMAIN;
  29.558 -
  29.559 -    if ( initrd_len )
  29.560 +    /* Write the phys->machine and machine->phys table entries. */
  29.561 +    for ( pfn = 0; pfn < p->tot_pages; pfn++ )
  29.562      {
  29.563 -	virt_startinfo_address->mod_start = (unsigned long)dst-initrd_len;
  29.564 -	virt_startinfo_address->mod_len   = initrd_len;
  29.565 -	printk("Initrd len 0x%lx, start at 0x%08lx\n",
  29.566 -	       virt_startinfo_address->mod_len, 
  29.567 -               virt_startinfo_address->mod_start);
  29.568 +        mfn = (alloc_start >> PAGE_SHIFT) + pfn;
  29.569 +        ((unsigned long *)vphysmap_start)[pfn] = mfn;
  29.570 +        machine_to_phys_mapping[mfn] = pfn;
  29.571      }
  29.572  
  29.573 -    /* Add virtual network interfaces and point to them in startinfo. */
  29.574 -    while ( num_vifs-- > 0 )
  29.575 -        (void)create_net_vif(dom);
  29.576 +    if ( initrd_len != 0 )
  29.577 +    {
  29.578 +	si->mod_start = vinitrd_start;
  29.579 +	si->mod_len   = initrd_len;
  29.580 +	printk("Initrd len 0x%lx, start at 0x%08lx\n",
  29.581 +	       si->mod_len, si->mod_start);
  29.582 +    }
  29.583  
  29.584 -    dst = virt_startinfo_address->cmd_line;
  29.585 +    dst = si->cmd_line;
  29.586      if ( cmdline != NULL )
  29.587      {
  29.588          for ( i = 0; i < 255; i++ )
  29.589          {
  29.590 -            if ( cmdline[i] == '\0' ) break;
  29.591 +            if ( cmdline[i] == '\0' )
  29.592 +                break;
  29.593              *dst++ = cmdline[i];
  29.594          }
  29.595      }
  29.596      *dst = '\0';
  29.597  
  29.598 -    /* NB: Give up the VGA console if DOM0 is ocnfigured to grab it. */
  29.599 -    console_endboot(strstr(cmdline, "tty0") != NULL);
  29.600 -
  29.601      /* Reinstate the caller's page tables. */
  29.602      write_cr3_counted(pagetable_val(current->mm.pagetable));
  29.603      __sti();
  29.604  
  29.605 +    /* Destroy low mappings - they were only for our convenience. */
  29.606 +    for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
  29.607 +        if ( l2_pgentry_val(l2start[i]) & _PAGE_PSE )
  29.608 +            l2start[i] = mk_l2_pgentry(0);
  29.609 +    zap_low_mappings(); /* Do the same for the idle page tables. */
  29.610 +    
  29.611 +    /* Give up the VGA console if DOM0 is configured to grab it. */
  29.612 +    console_endboot(strstr(cmdline, "tty0") != NULL);
  29.613 +
  29.614 +    /* Add virtual network interfaces. */
  29.615 +    while ( num_vifs-- > 0 )
  29.616 +        (void)create_net_vif(dom);
  29.617 +
  29.618 +#ifndef NO_DEVICES_IN_XEN
  29.619      /* DOM0 gets access to all real block devices. */
  29.620  #define MAX_REAL_DISKS 256
  29.621      xd = kmalloc(MAX_REAL_DISKS * sizeof(xen_disk_t), GFP_KERNEL);
  29.622 @@ -843,20 +1010,18 @@ int setup_guestos(struct task_struct *p,
  29.623              BUG();
  29.624      }
  29.625      kfree(xd);
  29.626 +#endif
  29.627  
  29.628      /* DOM0 gets access to everything. */
  29.629      physdev_init_dom0(p);
  29.630  
  29.631      set_bit(PF_CONSTRUCTED, &p->flags);
  29.632  
  29.633 -#if 0 // XXXXX DO NOT CHECK IN ENBALED !!! (but useful for testing so leave) 
  29.634 +#if 0 // XXXXX DO NOT CHECK IN ENABLED !!! (but useful for testing so leave) 
  29.635      shadow_mode_enable(&p->mm, SHM_test); 
  29.636  #endif
  29.637  
  29.638 -    new_thread(p, 
  29.639 -               (unsigned long)virt_load_address, 
  29.640 -               (unsigned long)virt_stack_address, 
  29.641 -               (unsigned long)virt_startinfo_address);
  29.642 +    new_thread(p, vkern_entry, vstack_end, vstartinfo_start);
  29.643  
  29.644      return 0;
  29.645  }
    30.1 --- a/xen/common/event_channel.c	Fri Apr 02 13:58:27 2004 +0000
    30.2 +++ b/xen/common/event_channel.c	Fri Apr 02 14:03:03 2004 +0000
    30.3 @@ -21,6 +21,7 @@
    30.4  #include <xen/errno.h>
    30.5  #include <xen/sched.h>
    30.6  #include <xen/event.h>
    30.7 +#include <xen/irq.h>
    30.8  
    30.9  #include <hypervisor-ifs/hypervisor-if.h>
   30.10  #include <hypervisor-ifs/event_channel.h>
   30.11 @@ -45,7 +46,7 @@ static int get_free_port(struct task_str
   30.12          if ( max == MAX_EVENT_CHANNELS )
   30.13              return -ENOSPC;
   30.14          
   30.15 -        max = (max == 0) ? 4 : (max * 2);
   30.16 +        max *= 2;
   30.17          
   30.18          chn = kmalloc(max * sizeof(event_channel_t), GFP_KERNEL);
   30.19          if ( unlikely(chn == NULL) )
   30.20 @@ -153,11 +154,11 @@ static long evtchn_bind_virq(evtchn_bind
   30.21  
   30.22      /*
   30.23       * Port 0 is the fallback port for VIRQs that haven't been explicitly
   30.24 -     * bound yet. The exception is the 'error VIRQ', which is permanently 
   30.25 +     * bound yet. The exception is the 'misdirect VIRQ', which is permanently 
   30.26       * bound to port 0.
   30.27       */
   30.28      if ( ((port = p->virq_to_evtchn[virq]) != 0) ||
   30.29 -         (virq == VIRQ_ERROR) ||
   30.30 +         (virq == VIRQ_MISDIRECT) ||
   30.31           ((port = get_free_port(p)) < 0) )
   30.32          goto out;
   30.33  
   30.34 @@ -181,27 +182,35 @@ static long evtchn_bind_pirq(evtchn_bind
   30.35  {
   30.36      struct task_struct *p = current;
   30.37      int pirq = bind->pirq;
   30.38 -    int port;
   30.39 +    int port, rc;
   30.40  
   30.41      if ( pirq >= ARRAY_SIZE(p->pirq_to_evtchn) )
   30.42          return -EINVAL;
   30.43  
   30.44      spin_lock(&p->event_channel_lock);
   30.45  
   30.46 -    if ( ((port = p->pirq_to_evtchn[pirq]) != 0) ||
   30.47 -         ((port = get_free_port(p)) < 0) )
   30.48 +    if ( ((rc = port = p->pirq_to_evtchn[pirq]) != 0) ||
   30.49 +         ((rc = port = get_free_port(p)) < 0) )
   30.50          goto out;
   30.51  
   30.52 +    p->pirq_to_evtchn[pirq] = port;
   30.53 +    rc = pirq_guest_bind(p, pirq, 
   30.54 +                         !!(bind->flags & BIND_PIRQ__WILL_SHARE));
   30.55 +    if ( rc != 0 )
   30.56 +    {
   30.57 +        p->pirq_to_evtchn[pirq] = 0;
   30.58 +        DPRINTK("Couldn't bind to PIRQ %d (error=%d)\n", pirq, rc);
   30.59 +        goto out;
   30.60 +    }
   30.61 +
   30.62      p->event_channel[port].state  = ECS_PIRQ;
   30.63      p->event_channel[port].u.pirq = pirq;
   30.64  
   30.65 -    p->pirq_to_evtchn[pirq] = port;
   30.66 -
   30.67   out:
   30.68      spin_unlock(&p->event_channel_lock);
   30.69  
   30.70 -    if ( port < 0 )
   30.71 -        return port;
   30.72 +    if ( rc < 0 )
   30.73 +        return rc;
   30.74  
   30.75      bind->port = port;
   30.76      return 0;
   30.77 @@ -220,7 +229,7 @@ static long __evtchn_close(struct task_s
   30.78  
   30.79      chn1 = p1->event_channel;
   30.80  
   30.81 -    /* NB. Port 0 is special (VIRQ_ERROR). Never let it be closed. */
   30.82 +    /* NB. Port 0 is special (VIRQ_MISDIRECT). Never let it be closed. */
   30.83      if ( (port1 <= 0) || (port1 >= p1->max_event_channel) )
   30.84      {
   30.85          rc = -EINVAL;
   30.86 @@ -237,7 +246,8 @@ static long __evtchn_close(struct task_s
   30.87          break;
   30.88  
   30.89      case ECS_PIRQ:
   30.90 -        p1->pirq_to_evtchn[chn1[port1].u.pirq] = 0;
   30.91 +        if ( (rc = pirq_guest_unbind(p1, chn1[port1].u.pirq)) == 0 )
   30.92 +            p1->pirq_to_evtchn[chn1[port1].u.pirq] = 0;
   30.93          break;
   30.94  
   30.95      case ECS_VIRQ:
   30.96 @@ -277,10 +287,7 @@ static long __evtchn_close(struct task_s
   30.97          if ( chn2[port2].u.remote.dom != p1 )
   30.98              BUG();
   30.99  
  30.100 -        chn2[port2].state         = ECS_UNBOUND;
  30.101 -        chn2[port2].u.remote.dom  = NULL;
  30.102 -        chn2[port2].u.remote.port = 0xFFFF;
  30.103 -
  30.104 +        chn2[port2].state = ECS_UNBOUND;
  30.105          evtchn_set_exception(p2, port2);
  30.106  
  30.107          break;
  30.108 @@ -289,10 +296,7 @@ static long __evtchn_close(struct task_s
  30.109          BUG();
  30.110      }
  30.111  
  30.112 -    chn1[port1].state         = ECS_FREE;
  30.113 -    chn1[port1].u.remote.dom  = NULL;
  30.114 -    chn1[port1].u.remote.port = 0xFFFF;
  30.115 -    
  30.116 +    chn1[port1].state = ECS_FREE;
  30.117      evtchn_set_exception(p1, port1);
  30.118  
  30.119   out:
  30.120 @@ -366,6 +370,7 @@ static long evtchn_status(evtchn_status_
  30.121      domid_t             dom = status->dom;
  30.122      int                 port = status->port;
  30.123      event_channel_t    *chn;
  30.124 +    long                rc = 0;
  30.125  
  30.126      if ( dom == DOMID_SELF )
  30.127          dom = current->domain;
  30.128 @@ -381,8 +386,8 @@ static long evtchn_status(evtchn_status_
  30.129  
  30.130      if ( (port < 0) || (port >= p->max_event_channel) )
  30.131      {
  30.132 -        spin_unlock(&p->event_channel_lock);
  30.133 -        return -EINVAL;
  30.134 +        rc = -EINVAL;
  30.135 +        goto out;
  30.136      }
  30.137  
  30.138      switch ( chn[port].state )
  30.139 @@ -410,8 +415,10 @@ static long evtchn_status(evtchn_status_
  30.140          BUG();
  30.141      }
  30.142  
  30.143 + out:
  30.144      spin_unlock(&p->event_channel_lock);
  30.145 -    return 0;
  30.146 +    put_task_struct(p);
  30.147 +    return rc;
  30.148  }
  30.149  
  30.150  
  30.151 @@ -476,7 +483,7 @@ int init_event_channels(struct task_stru
  30.152      p->max_event_channel = INIT_EVENT_CHANNELS;
  30.153      memset(p->event_channel, 0, INIT_EVENT_CHANNELS * sizeof(event_channel_t));
  30.154      p->event_channel[0].state  = ECS_VIRQ;
  30.155 -    p->event_channel[0].u.virq = VIRQ_ERROR;
  30.156 +    p->event_channel[0].u.virq = VIRQ_MISDIRECT;
  30.157      return 0;
  30.158  }
  30.159  
    31.1 --- a/xen/common/kernel.c	Fri Apr 02 13:58:27 2004 +0000
    31.2 +++ b/xen/common/kernel.c	Fri Apr 02 14:03:03 2004 +0000
    31.3 @@ -104,10 +104,12 @@ void cmain(unsigned long magic, multiboo
    31.4      dom0_createdomain_t dom0_params;
    31.5      unsigned long max_page;
    31.6      unsigned char *cmdline;
    31.7 -    module_t *mod;
    31.8 +    module_t *mod = (module_t *)__va(mbi->mods_addr);
    31.9      void *heap_start;
   31.10      int i;
   31.11      unsigned long max_mem;
   31.12 +    unsigned long dom0_memory_start, dom0_memory_end;
   31.13 +    unsigned long initial_images_start, initial_images_end;
   31.14  
   31.15      /* Parse the command-line options. */
   31.16      cmdline = (unsigned char *)(mbi->cmdline ? __va(mbi->cmdline) : NULL);
   31.17 @@ -215,6 +217,19 @@ void cmain(unsigned long magic, multiboo
   31.18             max_page >> (20-PAGE_SHIFT),
   31.19  	   max_mem  >> (20-PAGE_SHIFT) );
   31.20  
   31.21 +    initial_images_start = MAX_DIRECTMAP_ADDRESS;
   31.22 +    initial_images_end   = initial_images_start + 
   31.23 +        (mod[mbi->mods_count-1].mod_end - mod[0].mod_start);
   31.24 +    dom0_memory_start    = (initial_images_end + ((4<<20)-1)) & ~((4<<20)-1);
   31.25 +    dom0_memory_end      = dom0_memory_start + (opt_dom0_mem << 10);
   31.26 +    dom0_memory_end      = (dom0_memory_end + PAGE_SIZE - 1) & PAGE_MASK;
   31.27 +    
   31.28 +    /* Cheesy sanity check: enough memory for DOM0 allocation + some slack? */
   31.29 +    if ( (dom0_memory_end + (8<<20)) > (max_page<<PAGE_SHIFT) )
   31.30 +        panic("Not enough memory to craete initial domain!\n");
   31.31 +
   31.32 +    add_to_domain_alloc_list(dom0_memory_end, max_page << PAGE_SHIFT);
   31.33 +
   31.34      heap_start = memguard_init(&_end);
   31.35  
   31.36      printk("Xen heap size is %luKB\n", 
   31.37 @@ -243,24 +258,30 @@ void cmain(unsigned long magic, multiboo
   31.38      /* Create initial domain 0. */
   31.39      dom0_params.memory_kb = opt_dom0_mem;
   31.40      new_dom = do_createdomain(0, 0);
   31.41 -    if ( new_dom == NULL ) panic("Error creating domain 0\n");
   31.42 +    if ( new_dom == NULL )
   31.43 +        panic("Error creating domain 0\n");
   31.44  
   31.45      set_bit(PF_PRIVILEGED, &new_dom->flags);
   31.46  
   31.47      /*
   31.48       * We're going to setup domain0 using the module(s) that we stashed safely
   31.49 -     * above our MAX_DIRECTMAP_ADDRESS in boot/Boot.S The second module, if
   31.50 -     * present, is an initrd ramdisk
   31.51 +     * above our MAX_DIRECTMAP_ADDRESS in boot/boot.S. The second module, if
   31.52 +     * present, is an initrd ramdisk.
   31.53       */
   31.54 -    mod = (module_t *)__va(mbi->mods_addr);
   31.55 -    if ( setup_guestos(new_dom, 
   31.56 -                       &dom0_params, 1,
   31.57 -                       (char *)MAX_DIRECTMAP_ADDRESS, 
   31.58 -                       mod[mbi->mods_count-1].mod_end - mod[0].mod_start,
   31.59 -                       __va(mod[0].string),
   31.60 -		       (mbi->mods_count == 2) ?
   31.61 -                       (mod[1].mod_end - mod[1].mod_start):0)
   31.62 -         != 0 ) panic("Could not set up DOM0 guest OS\n");
   31.63 +    if ( construct_dom0(new_dom, dom0_memory_start, dom0_memory_end, 1,
   31.64 +                        (char *)initial_images_start, 
   31.65 +                        mod[0].mod_end-mod[0].mod_start,
   31.66 +                        (mbi->mods_count == 1) ? 0 :
   31.67 +                        (char *)initial_images_start + 
   31.68 +                        (mod[1].mod_start-mod[0].mod_start),
   31.69 +                        (mbi->mods_count == 1) ? 0 :
   31.70 +                        mod[mbi->mods_count-1].mod_end - mod[1].mod_start,
   31.71 +                        __va(mod[0].string)) != 0)
   31.72 +        panic("Could not set up DOM0 guest OS\n");
   31.73 +
   31.74 +    /* The stash space for the initial kernel image can now be freed up. */
   31.75 +    add_to_domain_alloc_list(__pa(frame_table) + frame_table_size,
   31.76 +                             dom0_memory_start);
   31.77  
   31.78      wake_up(new_dom);
   31.79  
    32.1 --- a/xen/common/keyhandler.c	Fri Apr 02 13:58:27 2004 +0000
    32.2 +++ b/xen/common/keyhandler.c	Fri Apr 02 14:03:03 2004 +0000
    32.3 @@ -4,6 +4,7 @@
    32.4  #include <xen/event.h>
    32.5  #include <xen/console.h>
    32.6  #include <xen/serial.h>
    32.7 +#include <xen/sched.h>
    32.8  
    32.9  #define KEY_MAX 256
   32.10  #define STR_MAX  64
   32.11 @@ -74,29 +75,6 @@ static void kill_dom0(u_char key, void *
   32.12      kill_other_domain(0, 0);
   32.13  }
   32.14  
   32.15 -
   32.16 -/* XXX SMH: this is keir's fault */
   32.17 -static char *task_states[] = 
   32.18 -{ 
   32.19 -    "Runnable  ", 
   32.20 -    "Int Sleep ", 
   32.21 -    "UInt Sleep", 
   32.22 -    NULL,
   32.23 -    "Stopped   ", 
   32.24 -    NULL,
   32.25 -    NULL,
   32.26 -    NULL,
   32.27 -    "Dying     ",
   32.28 -    NULL,
   32.29 -    NULL,
   32.30 -    NULL,
   32.31 -    NULL,
   32.32 -    NULL,
   32.33 -    NULL,
   32.34 -    NULL,
   32.35 -    "Sched priv"
   32.36 -}; 
   32.37 -
   32.38  void do_task_queues(u_char key, void *dev_id, struct pt_regs *regs) 
   32.39  {
   32.40      unsigned long       flags;
   32.41 @@ -111,13 +89,14 @@ void do_task_queues(u_char key, void *de
   32.42  
   32.43      for_each_domain ( p )
   32.44      {
   32.45 -        printk("Xen: DOM %llu, CPU %d [has=%c], state = %s, "
   32.46 -               "hyp_events = %08x\n", 
   32.47 -               p->domain, p->processor, p->has_cpu ? 'T':'F', 
   32.48 -               task_states[p->state], p->hyp_events); 
   32.49 +        printk("Xen: DOM %llu, CPU %d [has=%c], state = ",
   32.50 +               p->domain, p->processor, p->has_cpu ? 'T':'F'); 
   32.51 +        sched_prn_state(p ->state);
   32.52 +	printk(", hyp_events = %08x\n", p->hyp_events);
   32.53          s = p->shared_info; 
   32.54 -        printk("Guest: upcall_pend = %08lx, upcall_mask = %08lx\n", 
   32.55 -               s->evtchn_upcall_pending, s->evtchn_upcall_mask);
   32.56 +        printk("Guest: upcall_pend = %02x, upcall_mask = %02x\n", 
   32.57 +               s->vcpu_data[0].evtchn_upcall_pending, 
   32.58 +               s->vcpu_data[0].evtchn_upcall_mask);
   32.59          printk("Notifying guest...\n"); 
   32.60          send_guest_virq(p, VIRQ_DEBUG);
   32.61      }
    33.1 --- a/xen/common/memory.c	Fri Apr 02 13:58:27 2004 +0000
    33.2 +++ b/xen/common/memory.c	Fri Apr 02 14:03:03 2004 +0000
    33.3 @@ -170,7 +170,7 @@ unsigned long frame_table_size;
    33.4  unsigned long max_page;
    33.5  
    33.6  struct list_head free_list;
    33.7 -spinlock_t free_list_lock = SPIN_LOCK_UNLOCKED;
    33.8 +spinlock_t free_list_lock;
    33.9  unsigned int free_pfns;
   33.10  
   33.11  /* Used to defer flushing of memory structures. */
   33.12 @@ -191,10 +191,6 @@ static struct {
   33.13   */
   33.14  void __init init_frametable(unsigned long nr_pages)
   33.15  {
   33.16 -    struct pfn_info *pf;
   33.17 -    unsigned long page_index;
   33.18 -    unsigned long flags;
   33.19 -
   33.20      memset(percpu_info, 0, sizeof(percpu_info));
   33.21  
   33.22      max_page = nr_pages;
   33.23 @@ -203,23 +199,28 @@ void __init init_frametable(unsigned lon
   33.24      frame_table = (struct pfn_info *)FRAMETABLE_VIRT_START;
   33.25      memset(frame_table, 0, frame_table_size);
   33.26  
   33.27 +    spin_lock_init(&free_list_lock);
   33.28 +    INIT_LIST_HEAD(&free_list);    
   33.29      free_pfns = 0;
   33.30  
   33.31 -    /* Put all domain-allocatable memory on a free list. */
   33.32 +}
   33.33 +
   33.34 +void add_to_domain_alloc_list(unsigned long ps, unsigned long pe)
   33.35 +{
   33.36 +    struct pfn_info *pf;
   33.37 +    unsigned long i;
   33.38 +    unsigned long flags;
   33.39 +
   33.40      spin_lock_irqsave(&free_list_lock, flags);
   33.41 -    INIT_LIST_HEAD(&free_list);
   33.42 -    for( page_index = (__pa(frame_table) + frame_table_size) >> PAGE_SHIFT; 
   33.43 -         page_index < nr_pages;
   33.44 -         page_index++ )      
   33.45 +    for ( i = ps >> PAGE_SHIFT; i < (pe >> PAGE_SHIFT); i++ )
   33.46      {
   33.47 -        pf = list_entry(&frame_table[page_index].list, struct pfn_info, list);
   33.48 +        pf = list_entry(&frame_table[i].list, struct pfn_info, list);
   33.49          list_add_tail(&pf->list, &free_list);
   33.50          free_pfns++;
   33.51      }
   33.52      spin_unlock_irqrestore(&free_list_lock, flags);
   33.53  }
   33.54  
   33.55 -
   33.56  static void __invalidate_shadow_ldt(struct task_struct *p)
   33.57  {
   33.58      int i;
    34.1 --- a/xen/common/page_alloc.c	Fri Apr 02 13:58:27 2004 +0000
    34.2 +++ b/xen/common/page_alloc.c	Fri Apr 02 14:03:03 2004 +0000
    34.3 @@ -56,6 +56,13 @@ static void map_alloc(unsigned long firs
    34.4  {
    34.5      unsigned long start_off, end_off, curr_idx, end_idx;
    34.6  
    34.7 +#ifndef NDEBUG
    34.8 +    unsigned long i;
    34.9 +    /* Check that the block isn't already allocated. */
   34.10 +    for ( i = 0; i < nr_pages; i++ )
   34.11 +        ASSERT(!allocated_in_map(first_page + i));
   34.12 +#endif
   34.13 +
   34.14      curr_idx  = first_page / PAGES_PER_MAPWORD;
   34.15      start_off = first_page & (PAGES_PER_MAPWORD-1);
   34.16      end_idx   = (first_page + nr_pages) / PAGES_PER_MAPWORD;
   34.17 @@ -78,6 +85,13 @@ static void map_free(unsigned long first
   34.18  {
   34.19      unsigned long start_off, end_off, curr_idx, end_idx;
   34.20  
   34.21 +#ifndef NDEBUG
   34.22 +    unsigned long i;
   34.23 +    /* Check that the block isn't already freed. */
   34.24 +    for ( i = 0; i < nr_pages; i++ )
   34.25 +        ASSERT(allocated_in_map(first_page + i));
   34.26 +#endif
   34.27 +
   34.28      curr_idx = first_page / PAGES_PER_MAPWORD;
   34.29      start_off = first_page & (PAGES_PER_MAPWORD-1);
   34.30      end_idx   = (first_page + nr_pages) / PAGES_PER_MAPWORD;
   34.31 @@ -227,7 +241,7 @@ void __init init_page_allocator(unsigned
   34.32      /* Allocate space for the allocation bitmap. */
   34.33      bitmap_size  = (max+1) >> (PAGE_SHIFT+3);
   34.34      bitmap_size  = round_pgup(bitmap_size);
   34.35 -    alloc_bitmap = (unsigned long *)__va(min);
   34.36 +    alloc_bitmap = (unsigned long *)phys_to_virt(min);
   34.37      min         += bitmap_size;
   34.38      range        = max - min;
   34.39  
   34.40 @@ -240,6 +254,8 @@ void __init init_page_allocator(unsigned
   34.41      min += PAGE_OFFSET;
   34.42      max += PAGE_OFFSET;
   34.43  
   34.44 +    printk("Initialising Xen allocator with %luMB memory\n", range >> 20);
   34.45 +
   34.46      p         = min;
   34.47      remaining = range;
   34.48      while ( remaining != 0 )
   34.49 @@ -280,12 +296,12 @@ retry:
   34.50      spin_lock_irqsave(&alloc_lock, flags);
   34.51  
   34.52      /* Find smallest order which can satisfy the request. */
   34.53 -    for ( i = order; i < FREELIST_SIZE; i++ ) {
   34.54 +    for ( i = order; i < FREELIST_SIZE; i++ )
   34.55  	if ( !FREELIST_EMPTY(i) ) 
   34.56  	    break;
   34.57 -    }
   34.58  
   34.59 -    if ( i == FREELIST_SIZE ) goto no_memory;
   34.60 +    if ( i == FREELIST_SIZE ) 
   34.61 +        goto no_memory;
   34.62   
   34.63      /* Unlink a chunk. */
   34.64      alloc_ch = free_head[i];
   34.65 @@ -315,7 +331,7 @@ retry:
   34.66          GUARD(spare_ch, i);
   34.67      }
   34.68      
   34.69 -    map_alloc(__pa(alloc_ch)>>PAGE_SHIFT, 1<<order);
   34.70 +    map_alloc(virt_to_phys(alloc_ch)>>PAGE_SHIFT, 1<<order);
   34.71  
   34.72      spin_unlock_irqrestore(&alloc_lock, flags);
   34.73  
   34.74 @@ -327,9 +343,10 @@ retry:
   34.75      return((unsigned long)alloc_ch);
   34.76  
   34.77   no_memory:
   34.78 +    spin_unlock_irqrestore(&alloc_lock, flags);
   34.79 +        
   34.80      if ( attempts++ < 8 )
   34.81      {
   34.82 -        spin_unlock_irqrestore(&alloc_lock, flags);
   34.83          kmem_cache_reap(0);
   34.84          goto retry;
   34.85      }
   34.86 @@ -348,14 +365,11 @@ void __free_pages(unsigned long p, int o
   34.87      chunk_head_t *ch;
   34.88      chunk_tail_t *ct;
   34.89      unsigned long flags;
   34.90 -    unsigned long pagenr = __pa(p) >> PAGE_SHIFT;
   34.91 +    unsigned long pfn = virt_to_phys((void *)p) >> PAGE_SHIFT;
   34.92  
   34.93      spin_lock_irqsave(&alloc_lock, flags);
   34.94  
   34.95  #ifdef MEMORY_GUARD
   34.96 -    /* Check that the block isn't already freed. */
   34.97 -    if ( !allocated_in_map(pagenr) )
   34.98 -        BUG();
   34.99      /* Check that the block isn't already guarded. */
  34.100      if ( __put_user(1, (int*)p) )
  34.101          BUG();
  34.102 @@ -363,7 +377,7 @@ void __free_pages(unsigned long p, int o
  34.103      memset((void *)p, 0xaa, size);
  34.104  #endif
  34.105  
  34.106 -    map_free(pagenr, 1<<order);
  34.107 +    map_free(pfn, 1<<order);
  34.108      
  34.109      /* Merge chunks as far as possible. */
  34.110      for ( ; ; )
  34.111 @@ -371,18 +385,19 @@ void __free_pages(unsigned long p, int o
  34.112          if ( (p & size) )
  34.113          {
  34.114              /* Merge with predecessor block? */
  34.115 -            if ( allocated_in_map(pagenr-1) )
  34.116 +            if ( allocated_in_map(pfn-1) )
  34.117                  break;
  34.118              ct = (chunk_tail_t *)p - 1;
  34.119              if ( TAIL_LEVEL(ct) != order )
  34.120                  break;
  34.121 -            ch = (chunk_head_t *)(p - size);
  34.122 -            p -= size;
  34.123 +            p   -= size;
  34.124 +            pfn -= 1<<order;
  34.125 +            ch   = (chunk_head_t *)p;
  34.126          }
  34.127          else
  34.128          {
  34.129              /* Merge with successor block? */
  34.130 -            if ( allocated_in_map(pagenr+(1<<order)) )
  34.131 +            if ( allocated_in_map(pfn+(1<<order)) )
  34.132                  break;
  34.133              ch = (chunk_head_t *)(p + size);
  34.134              if ( HEAD_LEVEL(ch) != order )
    35.1 --- a/xen/common/physdev.c	Fri Apr 02 13:58:27 2004 +0000
    35.2 +++ b/xen/common/physdev.c	Fri Apr 02 14:03:03 2004 +0000
    35.3 @@ -1,41 +1,23 @@
    35.4  /* -*-  Mode:C; c-basic-offset:4; tab-width:4 -*-
    35.5   ****************************************************************************
    35.6 - * (C) 2004 - Rolf Neugebauer - Intel Research Cambridge
    35.7 + * (c) 2004 - Rolf Neugebauer - Intel Research Cambridge
    35.8 + * (c) 2004 - Keir Fraser - University of Cambridge
    35.9   ****************************************************************************
   35.10 - *
   35.11 - *        File: phys_dev.c
   35.12 - *      Author: Rolf Neugebauer (rolf.neugebauer@intel.com)
   35.13 - *        Date: Feb 2004
   35.14   * 
   35.15   * Description: allows a domain to access devices on the PCI bus
   35.16   *
   35.17 - * a guest os may be given access to particular devices on the PCI
   35.18 - * bus. to allow the standard PCI device discovery to work it may
   35.19 - * also have limited access to devices (bridges) in the PCI device
   35.20 - * tree between the device and the PCI root device.
   35.21 - *
   35.22 - * for each domain a list of PCI devices is maintained, describing the
   35.23 + * A guest OS may be given access to particular devices on the PCI bus.
   35.24 + * For each domain a list of PCI devices is maintained, describing the
   35.25   * access mode for the domain. 
   35.26   *
   35.27 - * guests can figure out the virtualised, or better, partioned PCI space
   35.28 - * through normal pci config register access. Some of the accesses, in
   35.29 - * particular write access are faked out. For example the sequence for
   35.30 - * for detecting the IO regions, which require writes to determine the
   35.31 - * size of teh region, is faked out by a very simple state machine, 
   35.32 - * preventing direct writes to the PCI config registers by a guest.
   35.33 - *
   35.34 - * Interrupt handling is currently done in a very cheese fashion.
   35.35 - * We take the default irq controller code and replace it with our own.
   35.36 - * If an interrupt comes in it is acked using the PICs normal routine. Then
   35.37 - * an event is send to the receiving domain which has to explicitly call
   35.38 - * once it is finished dealing with the interrupt. Only then the PICs end
   35.39 - * handler is called. very cheesy with all sorts of problems but it seems 
   35.40 - * to work in normal cases. No shared interrupts are allowed.
   35.41 - *
   35.42 - * XXX this code is not SMP safe at the moment!
   35.43 + * Guests can figure out the virtualised PCI space through normal PCI config
   35.44 + * register access. Some of the accesses, in particular write accesses, are
   35.45 + * faked. For example the sequence for detecting the IO regions, which requires
   35.46 + * writes to determine the size of the region, is faked out by a very simple
   35.47 + * state machine, preventing direct writes to the PCI config registers by a
   35.48 + * guest.
   35.49   */
   35.50  
   35.51 -
   35.52  #include <xen/config.h>
   35.53  #include <xen/lib.h>
   35.54  #include <xen/types.h>
   35.55 @@ -47,51 +29,45 @@
   35.56  #include <hypervisor-ifs/hypervisor-if.h>
   35.57  #include <hypervisor-ifs/physdev.h>
   35.58  
   35.59 -#if 1
   35.60 -#define DBG(_x...)
   35.61 +/* Called by PHYSDEV_PCI_INITIALISE_DEVICE to finalise IRQ routing. */
   35.62 +extern void pcibios_enable_irq(struct pci_dev *dev);
   35.63 +
   35.64 +#if 0
   35.65 +#define VERBOSE_INFO(_f, _a...) printk( _f , ## _a )
   35.66  #else
   35.67 -#define DBG(_x...) printk(_x)
   35.68 +#define VERBOSE_INFO(_f, _a...) ((void)0)
   35.69  #endif
   35.70  
   35.71 +#if 1 || !defined(NDEBUG)
   35.72 +#define INFO(_f, _a...) printk( _f, ## _a )
   35.73 +#else
   35.74 +#define INFO(_f, _a...) ((void)0)
   35.75 +#endif
   35.76 +
   35.77 +
   35.78  #define ACC_READ  1
   35.79  #define ACC_WRITE 2
   35.80  
   35.81 -/* upper bounds for PCI  devices */
   35.82 +/* Upper bounds for PCI-device addressing. */
   35.83  #define PCI_BUSMAX  255
   35.84  #define PCI_DEVMAX   31
   35.85  #define PCI_FUNCMAX   7
   35.86  #define PCI_REGMAX  255
   35.87  
   35.88 -/* bit offsets into state */
   35.89 +/* Bit offsets into state. */
   35.90  #define ST_BASE_ADDRESS  0   /* bits 0-5: are for base address access */
   35.91  #define ST_ROM_ADDRESS   6   /* bit 6: is for rom address access */    
   35.92 -#define ST_IRQ_DELIVERED 7   /* bit 7: waiting for end irq call */    
   35.93  
   35.94 -typedef struct _phys_dev_st
   35.95 -{
   35.96 +typedef struct _phys_dev_st {
   35.97      int flags;                       /* flags for access etc */
   35.98      struct pci_dev *dev;             /* the device */
   35.99      struct list_head node;           /* link to the list */
  35.100      struct task_struct *owner;       /* 'owner of this device' */
  35.101      int state;                       /* state for various checks */
  35.102 -
  35.103 -    hw_irq_controller *new_handler;  /* saved old handler */
  35.104 -    hw_irq_controller *orig_handler; /* saved old handler */
  35.105 -
  35.106  } phys_dev_t;
  35.107  
  35.108  
  35.109 -#define MAX_IRQS 32
  35.110 -/* an array of device descriptors index by IRQ number */
  35.111 -static phys_dev_t *irqs[MAX_IRQS];
  35.112 -
  35.113 -/*
  35.114 - * 
  35.115 - * General functions
  35.116 - * 
  35.117 - */
  35.118 -
  35.119 -/* find a device on the device list */
  35.120 +/* Find a device on a per-domain device list. */
  35.121  static phys_dev_t *find_pdev(struct task_struct *p, struct pci_dev *dev)
  35.122  {
  35.123      phys_dev_t *t, *res = NULL;
  35.124 @@ -109,24 +85,22 @@ static phys_dev_t *find_pdev(struct task
  35.125      return res;
  35.126  }
  35.127  
  35.128 -/* add the device to the list of devices task p can access */
  35.129 +/* Add a device to a per-domain device-access list. */
  35.130  static void add_dev_to_task(struct task_struct *p, 
  35.131                              struct pci_dev *dev, int acc)
  35.132  {
  35.133 -    
  35.134      phys_dev_t *pdev;
  35.135      
  35.136      if ( (pdev = find_pdev(p, dev)) )
  35.137      {
  35.138 -        /* device already on list, update access  */
  35.139 +        /* Sevice already on list: update access permissions. */
  35.140          pdev->flags = acc;
  35.141          return;
  35.142      }
  35.143  
  35.144 -    /* add device */
  35.145      if ( !(pdev = kmalloc(sizeof(phys_dev_t), GFP_KERNEL)) )
  35.146      {
  35.147 -        printk("error allocating pdev structure\n");
  35.148 +        INFO("Error allocating pdev structure.\n");
  35.149          return;
  35.150      }
  35.151      
  35.152 @@ -137,7 +111,6 @@ static void add_dev_to_task(struct task_
  35.153  
  35.154      if ( acc == ACC_WRITE )
  35.155          pdev->owner = p;
  35.156 -
  35.157  }
  35.158  
  35.159  /*
  35.160 @@ -161,11 +134,11 @@ int physdev_pci_access_modify(
  35.161  
  35.162      if ( !enable )
  35.163      {
  35.164 -        DPRINTK("Disallowing access is not yet supported.\n");
  35.165 +        INFO("Disallowing access is not yet supported.\n");
  35.166          return -EINVAL;
  35.167      }
  35.168  
  35.169 -    DPRINTK("physdev_pci_access_modify: %02x:%02x:%02x\n", bus, dev, func);
  35.170 +    INFO("physdev_pci_access_modify: %02x:%02x:%02x\n", bus, dev, func);
  35.171  
  35.172      if ( (p = find_domain_by_id(dom)) == NULL ) 
  35.173          return -ESRCH;
  35.174 @@ -176,36 +149,36 @@ int physdev_pci_access_modify(
  35.175      /* Grant write access to the specified device. */
  35.176      if ( (pdev = pci_find_slot(bus, PCI_DEVFN(dev, func))) == NULL )
  35.177      {
  35.178 -        DPRINTK("  dev does not exist\n");
  35.179 +        INFO("  dev does not exist\n");
  35.180          return -ENODEV;
  35.181      }
  35.182      add_dev_to_task(p, pdev, ACC_WRITE);
  35.183 -    DPRINTK("  add RW %02x:%02x:%02x\n", pdev->bus->number,
  35.184 -            PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
  35.185 +    INFO("  add RW %02x:%02x:%02x\n", pdev->bus->number,
  35.186 +         PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
  35.187  
  35.188  
  35.189      /* Grant read access to the root device. */
  35.190      if ( (rdev = pci_find_slot(0, PCI_DEVFN(0, 0))) == NULL )
  35.191      {
  35.192 -        DPRINTK("  bizarre -- no PCI root dev\n");
  35.193 +        INFO("  bizarre -- no PCI root dev\n");
  35.194          return -ENODEV;
  35.195      }
  35.196      add_dev_to_task(p, rdev, ACC_READ);
  35.197 -    DPRINTK("  add R0 %02x:%02x:%02x\n", 0, 0, 0);
  35.198 +    INFO("  add R0 %02x:%02x:%02x\n", 0, 0, 0);
  35.199  
  35.200      /* Grant read access to all devices on the path to the root. */
  35.201      for ( tdev = pdev->bus->self; tdev != NULL; tdev = tdev->bus->self )
  35.202      {
  35.203          add_dev_to_task(p, tdev, ACC_READ);
  35.204 -        DPRINTK("  add RO %02x:%02x:%02x\n", tdev->bus->number,
  35.205 -                PCI_SLOT(tdev->devfn), PCI_FUNC(tdev->devfn));
  35.206 +        INFO("  add RO %02x:%02x:%02x\n", tdev->bus->number,
  35.207 +             PCI_SLOT(tdev->devfn), PCI_FUNC(tdev->devfn));
  35.208      }
  35.209  
  35.210      if ( pdev->hdr_type == PCI_HEADER_TYPE_NORMAL )
  35.211          return 0;
  35.212      
  35.213 -    /* The  device is a bridge or cardbus. */
  35.214 -    printk("XXX can't give access to bridge devices yet\n");
  35.215 +    /* The device is a bridge or cardbus. */
  35.216 +    INFO("XXX can't give access to bridge devices yet\n");
  35.217  
  35.218      return 0;
  35.219  }
  35.220 @@ -227,15 +200,15 @@ inline static int check_dev_acc (struct 
  35.221      if ( bus > PCI_BUSMAX || dev > PCI_DEVMAX || func > PCI_FUNCMAX )
  35.222          return -EINVAL;
  35.223  
  35.224 -    DBG("a=%c b=%x d=%x f=%x ", (acc == ACC_READ) ? 'R' : 'W',
  35.225 -        mask, bus, dev, func);
  35.226 +    VERBOSE_INFO("a=%c b=%x d=%x f=%x ", (acc == ACC_READ) ? 'R' : 'W',
  35.227 +                 mask, bus, dev, func);
  35.228  
  35.229      /* check target device */
  35.230      target_devfn = PCI_DEVFN(dev, func);
  35.231      target_dev   = pci_find_slot(bus, target_devfn);
  35.232      if ( !target_dev )
  35.233      {
  35.234 -        DBG("target does not exist\n");
  35.235 +        VERBOSE_INFO("target does not exist\n");
  35.236          return -ENODEV;
  35.237      }
  35.238  
  35.239 @@ -243,7 +216,7 @@ inline static int check_dev_acc (struct 
  35.240      target_pdev = find_pdev(p, target_dev);
  35.241      if ( !target_pdev )
  35.242      {
  35.243 -        DBG("dom has no access to target\n");
  35.244 +        VERBOSE_INFO("dom has no access to target\n");
  35.245          return -EPERM;
  35.246      }
  35.247  
  35.248 @@ -251,11 +224,6 @@ inline static int check_dev_acc (struct 
  35.249      return 0;
  35.250  }
  35.251  
  35.252 -/*
  35.253 - * 
  35.254 - * PCI config space access
  35.255 - * 
  35.256 - */
  35.257  
  35.258  /*
  35.259   * Base address registers contain the base address for IO regions.
  35.260 @@ -276,60 +244,58 @@ inline static int check_dev_acc (struct 
  35.261   * cleared again. If the guest attempts to "restores" a wrong value an
  35.262   * error is flagged.
  35.263   */
  35.264 -static int do_base_address_access(phys_dev_t *pdev, int acc,
  35.265 -                                  int seg, int bus, int dev, int func, 
  35.266 -                                  int reg, int len, u32 *val)
  35.267 +static int do_base_address_access(phys_dev_t *pdev, int acc, int idx, 
  35.268 +                                  int len, u32 *val)
  35.269  {
  35.270 -    int idx, st_bit, ret = -EINVAL;
  35.271 +    int st_bit, reg = PCI_BASE_ADDRESS_0 + (idx*4), ret = -EINVAL;
  35.272 +    struct pci_dev *dev = pdev->dev;
  35.273      u32 orig_val, sz;
  35.274      struct resource *res;
  35.275  
  35.276 -    idx    = (reg - PCI_BASE_ADDRESS_0)/4;
  35.277 +    if ( len != sizeof(u32) )
  35.278 +    {
  35.279 +        INFO("Guest attempting sub-dword %s to BASE_ADDRESS %d\n", 
  35.280 +             (acc == ACC_READ) ? "read" : "write", idx);
  35.281 +        return -EPERM;
  35.282 +    }
  35.283 +
  35.284      st_bit = idx + ST_BASE_ADDRESS;
  35.285      res    = &(pdev->dev->resource[idx]);
  35.286  
  35.287      if ( acc == ACC_WRITE )
  35.288      {
  35.289 -        if ( *val == 0xffffffff || 
  35.290 -             ((res->flags & IORESOURCE_IO) && *val == 0xffff) )
  35.291 +        if ( (*val == 0xffffffff) || 
  35.292 +             ((res->flags & IORESOURCE_IO) && (*val == 0xffff)) )
  35.293          {
  35.294 -            /* set bit and return */
  35.295 +            /* Set bit and return. */
  35.296              set_bit(st_bit, &pdev->state);
  35.297              ret = 0;
  35.298          }
  35.299          else
  35.300          {
  35.301 -            /* assume guest wants to set the base address */
  35.302 +            /* Assume guest wants to set the base address. */
  35.303              clear_bit(st_bit, &pdev->state);
  35.304  
  35.305              /* check if guest tries to restore orig value */
  35.306 -            ret = pci_config_read(seg, bus, dev, func, reg, len, &orig_val);
  35.307 -            if ( *val != orig_val ) 
  35.308 +            ret = pci_read_config_dword(dev, reg, &orig_val);
  35.309 +            if ( (ret == 0) && (*val != orig_val) ) 
  35.310              {
  35.311 -                printk("caution: guest tried to change base address range.\n");
  35.312 +                INFO("Guest attempting update to BASE_ADDRESS %d\n", idx);
  35.313                  ret = -EPERM;
  35.314              }
  35.315          }
  35.316 -        DBG("fixed pci write: %02x:%02x:%02x reg=0x%02x len=0x%02x"
  35.317 -            " val=0x%08x %lx\n", bus, dev, func, reg, len, *val,
  35.318 -            pdev->state);
  35.319 -
  35.320 +        VERBOSE_INFO("fixed pci write: %02x:%02x:%02x reg=0x%02x len=0x%02x"
  35.321 +                     " val=0x%08x %lx\n", 
  35.322 +                     dev->bus->number, PCI_SLOT(dev->devfn), 
  35.323 +                     PCI_FUNC(dev->devfn), reg, len, *val, pdev->state);
  35.324      }
  35.325 -
  35.326      else if ( acc == ACC_READ )
  35.327      {
  35.328 -        if ( !test_bit(st_bit, &pdev->state) )
  35.329 +        ret = pci_read_config_dword(dev, reg, val);
  35.330 +        if ( (ret == 0) && test_bit(st_bit, &pdev->state) )
  35.331          {
  35.332 -            /* just read and return */
  35.333 -            ret = pci_config_read(seg, bus, dev, func, reg, len, val);
  35.334 -        }
  35.335 -        else
  35.336 -        {
  35.337 -            /* fake value */
  35.338 -            ret = pci_config_read(seg, bus, dev, func, reg, len, &orig_val);
  35.339 -
  35.340 +            /* Cook the value. */
  35.341              sz  = res->end - res->start;
  35.342 -
  35.343              if ( res->flags & IORESOURCE_MEM )
  35.344              {
  35.345                  /* this is written out explicitly for clarity */
  35.346 @@ -354,77 +320,75 @@ static int do_base_address_access(phys_d
  35.347                  *val = *val & (sz << 2);
  35.348                  *val = *val | 0x1;
  35.349              }
  35.350 -            ret = 0;
  35.351          }
  35.352 -        DBG("fixed pci read : %02x:%02x:%02x reg=0x%02x len=0x%02x"
  35.353 -            " val=0x%08x %lx\n", bus, dev, func, reg, len, *val, pdev->state);
  35.354 +        VERBOSE_INFO("fixed pci read: %02x:%02x:%02x reg=0x%02x len=0x%02x"
  35.355 +                     " val=0x%08x %lx\n", 
  35.356 +                     dev->bus->number, PCI_SLOT(dev->devfn), 
  35.357 +                     PCI_FUNC(dev->devfn), reg, len, *val, pdev->state);
  35.358      }
  35.359  
  35.360      return ret;
  35.361  }
  35.362  
  35.363 -/*
  35.364 - * fake out read/write access to rom address register
  35.365 - * pretty much the same as a above
  35.366 - */
  35.367 -static int do_rom_address_access(phys_dev_t *pdev, int acc,
  35.368 -                                  int seg, int bus, int dev, int func, 
  35.369 -                                  int reg, int len, u32 *val)
  35.370 +
  35.371 +static int do_rom_address_access(phys_dev_t *pdev, int acc, int len, u32 *val)
  35.372  {
  35.373      int st_bit, ret = -EINVAL;
  35.374 +    struct pci_dev *dev = pdev->dev;
  35.375      u32 orig_val, sz;
  35.376      struct resource *res;
  35.377  
  35.378 +    if ( len != sizeof(u32) )
  35.379 +    {
  35.380 +        INFO("Guest attempting sub-dword %s to ROM_ADDRESS\n", 
  35.381 +             (acc == ACC_READ) ? "read" : "write");
  35.382 +        return -EPERM;
  35.383 +    }
  35.384 +
  35.385      st_bit = ST_ROM_ADDRESS;
  35.386      res = &(pdev->dev->resource[PCI_ROM_RESOURCE]);
  35.387  
  35.388      if ( acc == ACC_WRITE )
  35.389      {
  35.390 -        if ( *val == 0xffffffff || *val == 0xfffffffe)
  35.391 +        if ( (*val == 0xffffffff) || (*val == 0xfffffffe) )
  35.392          {
  35.393 -            /* 0xffffffff would be unusual, but we check anyway */
  35.394 -            /* set bit and return */
  35.395 +            /* NB. 0xffffffff would be unusual, but we trap it anyway. */
  35.396              set_bit(st_bit, &pdev->state);
  35.397              ret = 0;
  35.398          }
  35.399          else
  35.400          {
  35.401 -            /* assume guest wants to set the base address */
  35.402 +            /* Assume guest wants simply to set the base address. */
  35.403              clear_bit(st_bit, &pdev->state);
  35.404              
  35.405 -            /* check if guest tries to restore orig value */
  35.406 -            ret = pci_config_read(seg, bus, dev, func, reg, len, &orig_val);
  35.407 -            if ( (*val != orig_val) ) 
  35.408 +            /* Check if guest tries to restore the original value. */
  35.409 +            ret = pci_read_config_dword(dev, PCI_ROM_ADDRESS, &orig_val);
  35.410 +            if ( (ret == 0) && (*val != orig_val) ) 
  35.411              {
  35.412 -                if (*val != 0x00000000 )
  35.413 +                if ( (*val != 0x00000000) )
  35.414                  {
  35.415 -                    printk("caution: guest tried to change rom address.\n");
  35.416 +                    INFO("caution: guest tried to change rom address.\n");
  35.417                      ret = -EPERM;
  35.418                  }
  35.419                  else
  35.420                  {
  35.421 -                    printk ("guest disabled rom access for %02x:%02x:%02x\n",
  35.422 -                            bus, dev, func);
  35.423 -                    ret = 0;
  35.424 +                    INFO("guest disabled rom access for %02x:%02x:%02x\n",
  35.425 +                         dev->bus->number, PCI_SLOT(dev->devfn), 
  35.426 +                         PCI_FUNC(dev->devfn));
  35.427                  }
  35.428              }
  35.429 -
  35.430          }
  35.431 -        DBG("fixed pci write: %02x:%02x:%02x reg=0x%02x len=0x%02x"
  35.432 -            " val=0x%08x %lx\n", bus, dev, func, reg, len, *val, pdev->state);
  35.433 -     
  35.434 +        VERBOSE_INFO("fixed pci write: %02x:%02x:%02x reg=0x%02x len=0x%02x"
  35.435 +                     " val=0x%08x %lx\n", 
  35.436 +                     dev->bus->number, PCI_SLOT(dev->devfn), 
  35.437 +                     PCI_FUNC(dev->devfn), reg, len, *val, pdev->state);
  35.438      }
  35.439      else if ( acc == ACC_READ )
  35.440      {
  35.441 -       if ( !test_bit(st_bit, &pdev->state) )
  35.442 +        ret = pci_read_config_dword(dev, PCI_ROM_ADDRESS, val);
  35.443 +        if ( (ret == 0) && test_bit(st_bit, &pdev->state) )
  35.444          {
  35.445 -            /* just read and return */
  35.446 -            ret = pci_config_read(seg, bus, dev, func, reg, len, val);
  35.447 -        }
  35.448 -        else
  35.449 -        {
  35.450 -            /* fake value */
  35.451 -            ret = pci_config_read(seg, bus, dev, func, reg, len, &orig_val);
  35.452 +            /* Cook the value. */
  35.453              sz  = res->end - res->start;
  35.454              *val = 0xffffffff;
  35.455              /* leave bit 0 untouched */
  35.456 @@ -434,306 +398,162 @@ static int do_rom_address_access(phys_de
  35.457              *val = *val & (sz << 11);
  35.458              *val = *val | (orig_val & 0x1);
  35.459          }
  35.460 +        VERBOSE_INFO("fixed pci read: %02x:%02x:%02x reg=0x%02x len=0x%02x"
  35.461 +                     " val=0x%08x %lx\n", 
  35.462 +                     dev->bus->number, PCI_SLOT(dev->devfn), 
  35.463 +                     PCI_FUNC(dev->devfn), reg, len, *val, pdev->state);
  35.464 +    }
  35.465  
  35.466 -        DBG("fixed pci read : %02x:%02x:%02x reg=0x%02x len=0x%02x"
  35.467 -            " val=0x%08x %lx\n", bus, dev, func, reg, len, *val, pdev->state);
  35.468 -    }
  35.469      return ret;
  35.470  
  35.471  }
  35.472  
  35.473  /*
  35.474 - * handle a domains pci config space read access if it has access to
  35.475 - * the device.
  35.476 - * For some registers for read-only devices (e.g. address base registers)
  35.477 - * we need to maintain a state machine.
  35.478 + * Handle a PCI config space read access if the domain has access privileges.
  35.479   */
  35.480 -static long pci_cfgreg_read(int seg, int bus, int dev, int func, int reg,
  35.481 +static long pci_cfgreg_read(int bus, int dev, int func, int reg,
  35.482                              int len, u32 *val)
  35.483  {
  35.484 -    int ret = 0;
  35.485 +    int ret;
  35.486      phys_dev_t *pdev;
  35.487  
  35.488 -    ret = check_dev_acc(current, bus, dev, func, &pdev);
  35.489 -    if ( ret != 0 )
  35.490 +    if ( (ret = check_dev_acc(current, bus, dev, func, &pdev)) != 0 )
  35.491          return ret;
  35.492  
  35.493 -    /* fake out read requests for some registers */
  35.494 -    switch (reg)
  35.495 +    /* Fake out read requests for some registers. */
  35.496 +    switch ( reg )
  35.497      {
  35.498      case PCI_BASE_ADDRESS_0:
  35.499 +        ret = do_base_address_access(pdev, ACC_READ, 0, len, val);
  35.500 +        break;
  35.501 +
  35.502      case PCI_BASE_ADDRESS_1:
  35.503 +        ret = do_base_address_access(pdev, ACC_READ, 1, len, val);
  35.504 +        break;
  35.505 +
  35.506      case PCI_BASE_ADDRESS_2:
  35.507 +        ret = do_base_address_access(pdev, ACC_READ, 2, len, val);
  35.508 +        break;
  35.509 +
  35.510      case PCI_BASE_ADDRESS_3:
  35.511 +        ret = do_base_address_access(pdev, ACC_READ, 3, len, val);
  35.512 +        break;
  35.513 +
  35.514      case PCI_BASE_ADDRESS_4:
  35.515 +        ret = do_base_address_access(pdev, ACC_READ, 4, len, val);
  35.516 +        break;
  35.517 +
  35.518      case PCI_BASE_ADDRESS_5:
  35.519 -        ret = do_base_address_access (pdev, ACC_READ, seg, bus, dev, 
  35.520 -                                      func, reg, len, val);
  35.521 -        return ret;
  35.522 +        ret = do_base_address_access(pdev, ACC_READ, 5, len, val);
  35.523          break;
  35.524 +
  35.525      case PCI_ROM_ADDRESS:
  35.526 -        ret = do_rom_address_access (pdev, ACC_READ, seg, bus, dev, 
  35.527 -                                      func, reg, len, val);
  35.528 -        return ret;
  35.529 +        ret = do_rom_address_access(pdev, ACC_READ, len, val);
  35.530          break;        
  35.531 +
  35.532 +    case PCI_INTERRUPT_LINE:
  35.533 +        *val = pdev->dev->irq;
  35.534 +        ret = 0;
  35.535 +        break;
  35.536 +
  35.537      default:
  35.538 +        ret = pci_config_read(0, bus, dev, func, reg, len, val);        
  35.539 +        VERBOSE_INFO("pci read : %02x:%02x:%02x reg=0x%02x len=0x%02x "
  35.540 +                     "val=0x%08x\n", bus, dev, func, reg, len, *val);
  35.541          break;
  35.542      }
  35.543  
  35.544 -    ret = pci_config_read(seg, bus, dev, func, reg, len, val);
  35.545 -
  35.546 -    DBG("pci read : %02x:%02x:%02x reg=0x%02x len=0x%02x val=0x%08x\n",
  35.547 -        bus, dev, func, reg, len, *val);
  35.548      return ret;
  35.549  }
  35.550  
  35.551 +
  35.552  /*
  35.553 - * handle a domains pci config space write accesses if it has access to
  35.554 - * the device.
  35.555 - * for some registers a state machine is maintained to fake out r/w access.
  35.556 - * By default no write access is allowed but we may change that in the future.
  35.557 + * Handle a PCI config space write access if the domain has access privileges.
  35.558   */
  35.559 -static long pci_cfgreg_write(int seg, int bus, int dev, int func, int reg,
  35.560 +static long pci_cfgreg_write(int bus, int dev, int func, int reg,
  35.561                               int len, u32 val)
  35.562  {
  35.563 -    int ret = 0;
  35.564 +    int ret;
  35.565      phys_dev_t *pdev;
  35.566  
  35.567 -    ret = check_dev_acc(current, bus, dev, func, &pdev);
  35.568 -    if ( ret != 0 )
  35.569 +    if ( (ret = check_dev_acc(current, bus, dev, func, &pdev)) != 0 )
  35.570          return ret;
  35.571  
  35.572      /* special treatment for some registers */
  35.573      switch (reg)
  35.574      {
  35.575      case PCI_BASE_ADDRESS_0:
  35.576 +        ret = do_base_address_access(pdev, ACC_WRITE, 0, len, &val);
  35.577 +        break;
  35.578 +
  35.579      case PCI_BASE_ADDRESS_1:
  35.580 +        ret = do_base_address_access(pdev, ACC_WRITE, 1, len, &val);
  35.581 +        break;
  35.582 +
  35.583      case PCI_BASE_ADDRESS_2:
  35.584 +        ret = do_base_address_access(pdev, ACC_WRITE, 2, len, &val);
  35.585 +        break;
  35.586 +
  35.587      case PCI_BASE_ADDRESS_3:
  35.588 +        ret = do_base_address_access(pdev, ACC_WRITE, 3, len, &val);
  35.589 +        break;
  35.590 +
  35.591      case PCI_BASE_ADDRESS_4:
  35.592 +        ret = do_base_address_access(pdev, ACC_WRITE, 4, len, &val);
  35.593 +        break;
  35.594 +
  35.595      case PCI_BASE_ADDRESS_5:
  35.596 -        ret = do_base_address_access (pdev, ACC_WRITE, seg, bus, dev, 
  35.597 -                                      func, reg, len, &val);
  35.598 -        return ret;
  35.599 +        ret = do_base_address_access(pdev, ACC_WRITE, 5, len, &val);
  35.600          break;
  35.601 +
  35.602      case PCI_ROM_ADDRESS:
  35.603 -        ret = do_rom_address_access (pdev, ACC_WRITE, seg, bus, dev, 
  35.604 -                                      func, reg, len, &val);
  35.605 -        return ret;
  35.606 +        ret = do_rom_address_access(pdev, ACC_WRITE, len, &val);
  35.607          break;        
  35.608 -#if 0
  35.609 -    case 0xe0: /* XXX some device drivers seem to write to this.... */
  35.610 -        printk("pci write hack allowed %02x:%02x:%02x: "
  35.611 -                   "reg=0x%02x len=0x%02x val=0x%08x\n",
  35.612 -                   bus, dev, func, reg, len, val);
  35.613 -        break;        
  35.614 -#endif
  35.615 +
  35.616      default:
  35.617 -        //if ( pdev->flags != ACC_WRITE ) 
  35.618 -        /* XXX for debug we disallow all write access */
  35.619 +        if ( pdev->flags != ACC_WRITE ) 
  35.620          {
  35.621 -            printk("pci write not allowed %02x:%02x:%02x: "
  35.622 -                   "reg=0x%02x len=0x%02x val=0x%08x\n",
  35.623 -                   bus, dev, func, reg, len, val);
  35.624 -            return -EPERM;
  35.625 +            INFO("pci write not allowed %02x:%02x:%02x: "
  35.626 +                 "reg=0x%02x len=0x%02x val=0x%08x\n",
  35.627 +                 bus, dev, func, reg, len, val);
  35.628 +            ret = -EPERM;
  35.629 +        }
  35.630 +        else
  35.631 +        {
  35.632 +            ret = pci_config_write(0, bus, dev, func, reg, len, val);
  35.633 +            VERBOSE_INFO("pci write: %02x:%02x:%02x reg=0x%02x len=0x%02x "
  35.634 +                         "val=0x%08x\n", bus, dev, func, reg, len, val);
  35.635          }
  35.636          break;
  35.637      }
  35.638  
  35.639 -    ret = pci_config_write(seg, bus, dev, func, reg, len, val);
  35.640 -
  35.641 -    DBG("pci write: %02x:%02x:%02x reg=0x%02x len=0x%02x val=0x%08x\n",
  35.642 -        bus, dev, func, reg, len, val);
  35.643      return ret;
  35.644  }
  35.645  
  35.646 -/*
  35.647 - * 
  35.648 - * Interrupt handling
  35.649 - * 
  35.650 - */
  35.651  
  35.652 -
  35.653 -/*
  35.654 - * return the IRQ xen assigned to the device.
  35.655 - * This may be different to what is in the PCI confic space!
  35.656 - * XXX RN: I'm not sure we need this. we could just intercept PCI config
  35.657 - * reads on PCI_INTERRUPT_LINE and return the correct value.
  35.658 - */
  35.659 -static long pci_find_irq(int seg, int bus, int dev, int func, u32 *val)
  35.660 -{
  35.661 -    int ret = 0;
  35.662 -    phys_dev_t *pdev;
  35.663 -
  35.664 -    ret = check_dev_acc(current, bus, dev, func, &pdev);
  35.665 -    if ( ret != 0 )
  35.666 -        return ret;
  35.667 -
  35.668 -    *val = pdev->dev->irq;
  35.669 -    return 0;
  35.670 -}
  35.671 -
  35.672 -static void phys_dev_interrupt(int irq, void *dev_id, struct pt_regs *ptregs)
  35.673 +static long pci_probe_root_buses(u32 *busmask)
  35.674  {
  35.675 -    phys_dev_t          *pdev;
  35.676 -
  35.677 -    if ( (pdev = (phys_dev_t *)dev_id) == NULL )
  35.678 -    {
  35.679 -        printk("spurious interrupt, no proper device id, %d\n", irq);
  35.680 -        return;
  35.681 -    }
  35.682 -    
  35.683 -    /* XXX KAF: introduced race here? */
  35.684 -    set_bit(ST_IRQ_DELIVERED, &pdev->state);
  35.685 -    send_guest_pirq(pdev->owner, irq);
  35.686 -}
  35.687 -
  35.688 -/* this is called instead of the PICs original end handler. 
  35.689 - * the real end handler is only called once the guest signalled the handling
  35.690 - * of the event. */
  35.691 -static void end_virt_irq (unsigned int i)
  35.692 -{
  35.693 -    /* nothing */
  35.694 -}
  35.695 -
  35.696 -/*
  35.697 - * a guest request an IRQ from a device to be routed to it
  35.698 - * - shared interrupts are not allowed for now
  35.699 - * - we change the hw_irq handler to something else
  35.700 - */
  35.701 -static long pirq_request(int irq)
  35.702 -{
  35.703 -    int err;
  35.704 -    phys_dev_t *pdev = NULL, *t;
  35.705 -    hw_irq_controller *new, *orig;
  35.706 +    phys_dev_t *pdev;
  35.707      struct list_head *tmp;
  35.708  
  35.709 -    printk("request irq %d\n", irq);
  35.710 +    memset(busmask, 0, 256/8);
  35.711  
  35.712 -    /* find pdev */
  35.713 -
  35.714 -    list_for_each(tmp, &current->pcidev_list)
  35.715 +    list_for_each ( tmp, &current->pcidev_list )
  35.716      {
  35.717 -        t = list_entry(tmp,  phys_dev_t, node);
  35.718 -        if ( t->dev->irq == irq )
  35.719 -        {
  35.720 -            pdev = t;
  35.721 -            break;
  35.722 -        }
  35.723 -    }
  35.724 -
  35.725 -    if ( pdev == NULL )
  35.726 -    {
  35.727 -        printk("no device matching IRQ %d\n", irq);
  35.728 -        return -EINVAL;
  35.729 -    }
  35.730 -
  35.731 -    if ( irq >= MAX_IRQS )
  35.732 -    {
  35.733 -        printk("requested IRQ to big %d\n", irq);
  35.734 -        return -EINVAL;
  35.735 +        pdev = list_entry(tmp, phys_dev_t, node);
  35.736 +        set_bit(pdev->dev->bus->number, busmask);
  35.737      }
  35.738  
  35.739 -    if ( irqs[irq] != NULL )
  35.740 -    {
  35.741 -        printk ("irq already in use %d\n", irq);
  35.742 -        return -EPERM;
  35.743 -    }
  35.744 -
  35.745 -    /* allocate a hw_irq controller and copy the original */
  35.746 -    if ( !(new  = kmalloc(sizeof(hw_irq_controller), GFP_KERNEL)) )
  35.747 -    {
  35.748 -        printf("error allocating new irq controller\n");
  35.749 -        return -ENOMEM;
  35.750 -    }
  35.751 -    orig = irq_desc[irq].handler;
  35.752 -    new->typename = orig->typename;
  35.753 -    new->startup = orig->startup;
  35.754 -    new->shutdown = orig->shutdown;
  35.755 -    new->enable = orig->enable;
  35.756 -    new->disable = orig->disable;
  35.757 -    new->ack = orig->ack;
  35.758 -    new->end = orig->end;
  35.759 -    new->set_affinity = orig->set_affinity;
  35.760 -
  35.761 -    /* swap the end routine */
  35.762 -    new->end = end_virt_irq;
  35.763 -
  35.764 -    /* change the irq controllers */
  35.765 -    pdev->orig_handler = orig;
  35.766 -    pdev->new_handler  = new;
  35.767 -    irq_desc[irq].handler = new;
  35.768 -    irqs[irq] = pdev;
  35.769 -    
  35.770 -    printk ("setup handler %d\n", irq);
  35.771 -
  35.772 -    /* request the IRQ. this is not shared and we use a slow handler! */
  35.773 -    err = request_irq(irq, phys_dev_interrupt, SA_INTERRUPT,
  35.774 -                      "foo", (void *)pdev);
  35.775 -    if ( err )
  35.776 -    {
  35.777 -        printk("error requesting irq\n");
  35.778 -        /* restore original */
  35.779 -        irq_desc[irq].handler = pdev->orig_handler;
  35.780 -        /* free memory */
  35.781 -        kfree(new);
  35.782 -        return err;
  35.783 -    }
  35.784 -
  35.785 -    printk ("done\n");
  35.786 -
  35.787 -    return 0;
  35.788 -}
  35.789 -
  35.790 -long pirq_free(int irq)
  35.791 -{
  35.792 -    phys_dev_t *pdev;
  35.793 -
  35.794 -    if ( irq >= MAX_IRQS )
  35.795 -    {
  35.796 -        printk("requested IRQ to big %d\n", irq);
  35.797 -        return -EINVAL;
  35.798 -    }
  35.799 -
  35.800 -    if ( irqs[irq] == NULL )
  35.801 -    {
  35.802 -        printk ("irq not used %d\n", irq);
  35.803 -        return -EINVAL;
  35.804 -    }
  35.805 -
  35.806 -    pdev = irqs[irq];
  35.807 -
  35.808 -    /* shutdown IRQ */
  35.809 -    free_irq(irq, (void *)pdev);
  35.810 -
  35.811 -    /* restore irq controller  */
  35.812 -    irq_desc[irq].handler = pdev->orig_handler;
  35.813 -
  35.814 -    /* clean up */
  35.815 -    pdev->orig_handler = NULL;
  35.816 -    irqs[irq] = NULL;
  35.817 -    kfree(pdev->new_handler);
  35.818 -    pdev->new_handler = NULL;
  35.819 -
  35.820 -    printk("freed irq %d", irq);
  35.821 -    return 0;
  35.822 -}
  35.823 -
  35.824 -static long pci_unmask_irq(void)
  35.825 -{
  35.826 -#if 0
  35.827 -    clear_bit(ST_IRQ_DELIVERED, &pdev->state);
  35.828 -    pdev->orig_handler->end(irq);
  35.829 -#endif
  35.830      return 0;
  35.831  }
  35.832  
  35.833  
  35.834  /*
  35.835 - * demux hypervisor call.
  35.836 + * Demuxing hypercall.
  35.837   */
  35.838  long do_physdev_op(physdev_op_t *uop)
  35.839  {
  35.840 +    phys_dev_t *pdev;
  35.841      physdev_op_t op;
  35.842      long ret;
  35.843  
  35.844 @@ -742,28 +562,39 @@ long do_physdev_op(physdev_op_t *uop)
  35.845  
  35.846      switch ( op.cmd )
  35.847      {
  35.848 -    case PHYSDEVOP_CFGREG_READ:
  35.849 -        ret = pci_cfgreg_read(op.u.cfg_read.seg, op.u.cfg_read.bus,
  35.850 -                              op.u.cfg_read.dev, op.u.cfg_read.func,
  35.851 -                              op.u.cfg_read.reg, op.u.cfg_read.len,
  35.852 -                              &op.u.cfg_read.value);
  35.853 +    case PHYSDEVOP_PCI_CFGREG_READ:
  35.854 +        ret = pci_cfgreg_read(op.u.pci_cfgreg_read.bus,
  35.855 +                              op.u.pci_cfgreg_read.dev, 
  35.856 +                              op.u.pci_cfgreg_read.func,
  35.857 +                              op.u.pci_cfgreg_read.reg, 
  35.858 +                              op.u.pci_cfgreg_read.len,
  35.859 +                              &op.u.pci_cfgreg_read.value);
  35.860          break;
  35.861  
  35.862 -    case PHYSDEVOP_CFGREG_WRITE:
  35.863 -        ret = pci_cfgreg_write(op.u.cfg_write.seg, op.u.cfg_write.bus,
  35.864 -                               op.u.cfg_write.dev, op.u.cfg_write.func,
  35.865 -                               op.u.cfg_write.reg, op.u.cfg_write.len,
  35.866 -                               op.u.cfg_write.value);
  35.867 +    case PHYSDEVOP_PCI_CFGREG_WRITE:
  35.868 +        ret = pci_cfgreg_write(op.u.pci_cfgreg_write.bus,
  35.869 +                               op.u.pci_cfgreg_write.dev, 
  35.870 +                               op.u.pci_cfgreg_write.func,
  35.871 +                               op.u.pci_cfgreg_write.reg, 
  35.872 +                               op.u.pci_cfgreg_write.len,
  35.873 +                               op.u.pci_cfgreg_write.value);
  35.874          break;
  35.875  
  35.876 -    case PHYSDEVOP_FIND_IRQ:
  35.877 -        ret = pci_find_irq(op.u.find_irq.seg, op.u.find_irq.bus,
  35.878 -                           op.u.find_irq.dev, op.u.find_irq.func,
  35.879 -                           &op.u.find_irq.irq);
  35.880 +    case PHYSDEVOP_PCI_INITIALISE_DEVICE:
  35.881 +        if ( (ret = check_dev_acc(current, 
  35.882 +                                  op.u.pci_initialise_device.bus, 
  35.883 +                                  op.u.pci_initialise_device.dev, 
  35.884 +                                  op.u.pci_initialise_device.func, 
  35.885 +                                  &pdev)) == 0 )
  35.886 +            pcibios_enable_irq(pdev->dev);
  35.887 +        break;
  35.888 +
  35.889 +    case PHYSDEVOP_PCI_PROBE_ROOT_BUSES:
  35.890 +        ret = pci_probe_root_buses(op.u.pci_probe_root_buses.busmask);
  35.891          break;
  35.892  
  35.893      case PHYSDEVOP_UNMASK_IRQ:
  35.894 -        ret = pci_unmask_irq();
  35.895 +        ret = pirq_guest_unmask(current);
  35.896          break;
  35.897  
  35.898      default:
  35.899 @@ -776,23 +607,22 @@ long do_physdev_op(physdev_op_t *uop)
  35.900  }
  35.901  
  35.902  
  35.903 -/*
  35.904 - * Domain 0 has read access to all devices.
  35.905 - * XXX this is a bit of a hack
  35.906 - */
  35.907 +/* Domain 0 has read access to all devices. */
  35.908  void physdev_init_dom0(struct task_struct *p)
  35.909  {
  35.910      struct pci_dev *dev;
  35.911      phys_dev_t *pdev;
  35.912  
  35.913 -    printk("Give DOM0 read access to all PCI devices\n");
  35.914 +    INFO("Give DOM0 read access to all PCI devices\n");
  35.915  
  35.916      pci_for_each_dev(dev)
  35.917      {
  35.918 -        /* add device */
  35.919 +        /* Skip bridges and other peculiarities for now. */
  35.920 +        if ( dev->hdr_type != PCI_HEADER_TYPE_NORMAL )
  35.921 +            continue;
  35.922          pdev = kmalloc(sizeof(phys_dev_t), GFP_KERNEL);
  35.923          pdev->dev = dev;
  35.924 -        pdev->flags = ACC_READ;
  35.925 +        pdev->flags = ACC_WRITE;
  35.926          pdev->state = 0;
  35.927          pdev->owner = p;
  35.928          list_add(&pdev->node, &p->pcidev_list);
    36.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    36.2 +++ b/xen/common/sched_atropos.c	Fri Apr 02 14:03:03 2004 +0000
    36.3 @@ -0,0 +1,602 @@
    36.4 +/*
    36.5 + *	atropos.c
    36.6 + *	---------
    36.7 + *
    36.8 + * Copyright (c) 1994 University of Cambridge Computer Laboratory.
    36.9 + * This is part of Nemesis; consult your contract for terms and conditions.
   36.10 + *
   36.11 + * ID : $Id: atropos.c 1.1 Tue, 13 Apr 1999 13:30:49 +0100 dr10009 $
   36.12 + *
   36.13 + * This is the "atropos" CPU scheduler. 
   36.14 + */
   36.15 +
   36.16 +/* Ported to Xen's generic scheduler interface by Mark Williamson
   36.17 + * these modifications are (C) 2004 Intel Research Cambridge
   36.18 + */
   36.19 +
   36.20 +#include <xen/config.h>
   36.21 +#include <xen/init.h>
   36.22 +#include <xen/lib.h>
   36.23 +#include <xen/time.h>
   36.24 +#include <xen/sched.h>
   36.25 +#include <xen/sched-if.h>
   36.26 +#include <hypervisor-ifs/sched_ctl.h>
   36.27 +#include <xen/trace.h>
   36.28 +
   36.29 +#define ATROPOS_TASK_UNBLOCKED 16
   36.30 +#define ATROPOS_TASK_WAIT      32
   36.31 +
   36.32 +#define Activation_Reason_Allocated 1
   36.33 +#define Activation_Reason_Preempted 2
   36.34 +#define Activation_Reason_Extra     3
   36.35 +
   36.36 +/* The following will be used for atropos-specific per-domain data fields */
   36.37 +struct at_dom_info
   36.38 +{
   36.39 +    /* MAW Xen additions */
   36.40 +    struct task_struct *owner; /* the task_struct this data belongs to */
   36.41 +    struct list_head waitq;    /* wait queue                           */
   36.42 +    int reason;                /* reason domain was last scheduled     */
   36.43 +
   36.44 +    /* (what remains of) the original fields */
   36.45 +
   36.46 +    s_time_t     deadline;       /* Next deadline                */
   36.47 +    s_time_t     prevddln;       /* Previous deadline            */
   36.48 +    
   36.49 +    s_time_t     remain;         /* Time remaining this period   */
   36.50 +    s_time_t     period;         /* Period of time allocation    */
   36.51 +    s_time_t     slice;          /* Length of allocation         */
   36.52 +    s_time_t     latency;        /* Unblocking latency           */
   36.53 +
   36.54 +    int          xtratime;       /* Prepared to accept extra?    */
   36.55 +};
   36.56 +
   36.57 +
   36.58 +struct at_cpu_info
   36.59 +{
   36.60 +    struct list_head waitq; /* wait queue*/
   36.61 +};
   36.62 +
   36.63 +
   36.64 +#define DOM_INFO(_p) ( (struct at_dom_info *)((_p)->sched_priv) )
   36.65 +#define CPU_INF(_p)  ( (struct at_cpu_info *)((_p).sched_priv) )
   36.66 +#define WAITQ(cpu)   (&( CPU_INF(schedule_data[cpu]) )->waitq )
   36.67 +#define RUNQ(cpu)    (&schedule_data[cpu].runqueue)
   36.68 +
   36.69 +#define BESTEFFORT_QUANTUM MILLISECS(5)
   36.70 +
   36.71 +/* SLAB cache for struct at_dom_info objects */
   36.72 +static kmem_cache_t *dom_info_cache;
   36.73 +
   36.74 +/** calculate the length of a linked list */
   36.75 +static int q_len(struct list_head *q) 
   36.76 +{
   36.77 +    int i = 0;
   36.78 +    struct list_head *tmp;
   36.79 +    list_for_each(tmp, q) i++;
   36.80 +    return i;
   36.81 +}
   36.82 +
   36.83 +
   36.84 +/** waitq_el - get the task_struct that owns a wait queue list element */
   36.85 +static inline struct task_struct * waitq_el(struct list_head *l)
   36.86 +{
   36.87 +    struct at_dom_info *inf;
   36.88 +    inf = list_entry(l, struct at_dom_info, waitq);
   36.89 +    return inf->owner;
   36.90 +}
   36.91 +
   36.92 +
   36.93 +/*
   36.94 + * requeue
   36.95 + *
   36.96 + * Places the specified domain on the appropriate queue.
   36.97 + * The wait queue is ordered by the time at which the domain
   36.98 + * will receive more CPU time.  If a domain has no guaranteed time
   36.99 + * left then the domain will be placed on the WAIT queue until
  36.100 + * its next period. 
  36.101 + *
  36.102 + * Note that domains can be on the wait queue with remain > 0 
  36.103 + * as a result of being blocked for a short time.
  36.104 + * These are scheduled in preference to domains with remain < 0 
  36.105 + * in an attempt to improve interactive performance.
  36.106 + */
  36.107 +static void requeue(struct task_struct *sdom)
  36.108 +{
  36.109 +    struct at_dom_info *inf = DOM_INFO(sdom);
  36.110 +    struct list_head *prev = WAITQ(sdom->processor);
  36.111 +    struct list_head *next;
  36.112 +
  36.113 +    if(sdom->state == ATROPOS_TASK_WAIT ||
  36.114 +       sdom->state == ATROPOS_TASK_UNBLOCKED )
  36.115 +    {
  36.116 +        /* insert into ordered wait queue */
  36.117 +
  36.118 +        prev = WAITQ(sdom->processor);
  36.119 +        list_for_each(next, WAITQ(sdom->processor))
  36.120 +        {
  36.121 +            struct at_dom_info *i = 
  36.122 +                list_entry(next, struct at_dom_info, waitq);
  36.123 +            if ( i->deadline > inf->deadline )
  36.124 +            {
  36.125 +                __list_add(&inf->waitq, prev, next);
  36.126 +                break;
  36.127 +            }
  36.128 +
  36.129 +            prev = next;
  36.130 +        }
  36.131 +
  36.132 +        /* put the domain on the end of the list if it hasn't been put
  36.133 +         * elsewhere */
  36.134 +        if ( next == WAITQ(sdom->processor) )
  36.135 +            list_add_tail(&inf->waitq, WAITQ(sdom->processor));
  36.136 +    }
  36.137 +    else if ( sdom->state == TASK_RUNNING )
  36.138 +    {
  36.139 +        /* insert into ordered run queue */
  36.140 +        prev = RUNQ(sdom->processor);
  36.141 +
  36.142 +        list_for_each(next, RUNQ(sdom->processor))
  36.143 +        {
  36.144 +            struct task_struct *p = list_entry(next, struct task_struct,
  36.145 +                                               run_list);
  36.146 +
  36.147 +            if( DOM_INFO(p)->deadline > inf->deadline || is_idle_task(p) )
  36.148 +            {
  36.149 +                __list_add(&sdom->run_list, prev, next);
  36.150 +                break;
  36.151 +            }
  36.152 +
  36.153 +            prev = next;
  36.154 +        }
  36.155 +
  36.156 +        if ( next == RUNQ(sdom->processor) )
  36.157 +            list_add_tail(&sdom->run_list, RUNQ(sdom->processor));
  36.158 +    }
  36.159 +    /* silently ignore tasks in other states like BLOCKED, DYING, STOPPED, etc
  36.160 +     * - they shouldn't be on any queue */
  36.161 +}
  36.162 +
  36.163 +/* prepare a task to be added to scheduling */
  36.164 +static void at_add_task(struct task_struct *p)
  36.165 +{
  36.166 +    s_time_t now = NOW();
  36.167 +
  36.168 +    ASSERT( p->sched_priv != NULL );
  36.169 +
  36.170 +    DOM_INFO(p)->owner = p;
  36.171 +    p->lastschd = now;
  36.172 + 
  36.173 +    if(is_idle_task(p))
  36.174 +      DOM_INFO(p)->slice = MILLISECS(5);
  36.175 +
  36.176 +    /* DOM 0's scheduling parameters must be set here in order for it to boot
  36.177 +     * the system! */
  36.178 +    if(p->domain == 0)
  36.179 +    {
  36.180 +        DOM_INFO(p)->remain = MILLISECS(15);
  36.181 +        DOM_INFO(p)->period = MILLISECS(20);
  36.182 +        DOM_INFO(p)->slice  = MILLISECS(15);
  36.183 +        DOM_INFO(p)->latency = MILLISECS(10);
  36.184 +        DOM_INFO(p)->xtratime = 1;
  36.185 +        DOM_INFO(p)->deadline = now;
  36.186 +        DOM_INFO(p)->prevddln = now;
  36.187 +    }
  36.188 +    else /* other domains run basically best effort unless otherwise set */
  36.189 +    {
  36.190 +        DOM_INFO(p)->remain = 0;
  36.191 +        DOM_INFO(p)->period = MILLISECS(10000);
  36.192 +        DOM_INFO(p)->slice  = MILLISECS(10);
  36.193 +        DOM_INFO(p)->latency = MILLISECS(10000);
  36.194 +        DOM_INFO(p)->xtratime = 1;
  36.195 +        DOM_INFO(p)->deadline = now + MILLISECS(10000);
  36.196 +        DOM_INFO(p)->prevddln = 0;
  36.197 +    }
  36.198 +
  36.199 +    INIT_LIST_HEAD(&(DOM_INFO(p)->waitq));
  36.200 +}
  36.201 +
  36.202 +
  36.203 +/**
  36.204 + * dequeue - remove a domain from any queues it is on.
  36.205 + * @sdom:    the task to remove
  36.206 + */
  36.207 +static void dequeue(struct task_struct *sdom)
  36.208 +{
  36.209 +    struct at_dom_info *inf = DOM_INFO(sdom);
  36.210 +
  36.211 +    ASSERT(sdom->domain != IDLE_DOMAIN_ID);
  36.212 +    
  36.213 +    /* just delete it from all the queues! */
  36.214 +    list_del(&inf->waitq);
  36.215 +    INIT_LIST_HEAD(&inf->waitq);
  36.216 +    
  36.217 +    if(__task_on_runqueue(sdom))
  36.218 +        __del_from_runqueue(sdom);
  36.219 +
  36.220 +    sdom->run_list.next = NULL;
  36.221 +    sdom->run_list.prev = NULL;
  36.222 +
  36.223 +}
  36.224 +
  36.225 +
  36.226 +/*
  36.227 + * unblock
  36.228 + *
  36.229 + * This function deals with updating the sdom for a domain
  36.230 + * which has just been unblocked.  
  36.231 + *
  36.232 + * ASSERT: On entry, the sdom has already been removed from the block
  36.233 + * queue (it can be done more efficiently if we know that it
  36.234 + * is on the head of the queue) but its deadline field has not been
  36.235 + * restored yet.
  36.236 + */
  36.237 +static void unblock(struct task_struct *sdom)
  36.238 +{
  36.239 +    s_time_t time = NOW();
  36.240 +    struct at_dom_info *inf = DOM_INFO(sdom);
  36.241 +    
  36.242 +    dequeue(sdom);
  36.243 +
  36.244 +    /* We distinguish two cases... short and long blocks */
  36.245 +    if ( inf->deadline < time ) {
  36.246 +	/* The sdom has passed its deadline since it was blocked. 
  36.247 +	   Give it its new deadline based on the latency value. */
  36.248 +	inf->prevddln = time; 
  36.249 +	inf->deadline = time + inf->latency;
  36.250 +	inf->remain   = inf->slice;
  36.251 +        if(inf->remain > 0)
  36.252 +            sdom->state = TASK_RUNNING;
  36.253 +        else
  36.254 +            sdom->state = ATROPOS_TASK_WAIT;
  36.255 +        
  36.256 +    } else {
  36.257 +	/* We leave REMAIN intact, but put this domain on the WAIT
  36.258 +	   queue marked as recently unblocked.  It will be given
  36.259 +	   priority over other domains on the wait queue until while
  36.260 +	   REMAIN>0 in a generous attempt to help it make up for its
  36.261 +	   own foolishness. */
  36.262 +	if(inf->remain > 0)
  36.263 +            sdom->state = ATROPOS_TASK_UNBLOCKED;
  36.264 +        else
  36.265 +            sdom->state = ATROPOS_TASK_WAIT;
  36.266 +    }
  36.267 +
  36.268 +    requeue(sdom);
  36.269 +
  36.270 +}
  36.271 +
  36.272 +/**
  36.273 + * ATROPOS - main scheduler function
  36.274 + */
  36.275 +task_slice_t ksched_scheduler(s_time_t time)
  36.276 +{
  36.277 +    struct task_struct	*cur_sdom = current;  /* Current sdom           */
  36.278 +    s_time_t     newtime;
  36.279 +    s_time_t      ranfor;	        /* How long the domain ran      */
  36.280 +    struct task_struct	*sdom;	        /* tmp. scheduling domain	*/
  36.281 +    int   reason;                       /* reason for reschedule        */
  36.282 +    int cpu = cur_sdom->processor;      /* current CPU                  */
  36.283 +    struct at_dom_info *cur_info;
  36.284 +    static unsigned long waitq_rrobin = 0;
  36.285 +    int i;
  36.286 +    task_slice_t ret;
  36.287 +
  36.288 +    cur_info = DOM_INFO(cur_sdom);
  36.289 +
  36.290 +    ASSERT( cur_sdom != NULL);
  36.291 +
  36.292 +    /* If we were spinning in the idle loop, there is no current
  36.293 +     * domain to deschedule. */
  36.294 +    if (is_idle_task(cur_sdom)) {
  36.295 +	goto deschedule_done;
  36.296 +    }
  36.297 +
  36.298 +    /*****************************
  36.299 +     * 
  36.300 +     * Deschedule the current scheduling domain
  36.301 +     *
  36.302 +     ****************************/
  36.303 +
  36.304 +   /* Record the time the domain was preempted and for how long it
  36.305 +       ran.  Work out if the domain is going to be blocked to save
  36.306 +       some pointless queue shuffling */
  36.307 +    cur_sdom->lastdeschd = time;
  36.308 +
  36.309 +    ranfor = (time - cur_sdom->lastschd);
  36.310 +
  36.311 +    dequeue(cur_sdom);
  36.312 +
  36.313 +    if ((cur_sdom->state == TASK_RUNNING) ||
  36.314 +        (cur_sdom->state == ATROPOS_TASK_UNBLOCKED)) {
  36.315 +
  36.316 +	/* In this block, we are doing accounting for an sdom which has 
  36.317 +	   been running in contracted time.  Note that this could now happen
  36.318 +	   even if the domain is on the wait queue (i.e. if it blocked) */
  36.319 +
  36.320 +	/* Deduct guaranteed time from the domain */
  36.321 +	cur_info->remain  -= ranfor;
  36.322 +
  36.323 +	/* If guaranteed time has run out... */
  36.324 +	if ( cur_info->remain <= 0 ) {
  36.325 +	    /* Move domain to correct position in WAIT queue */
  36.326 +            /* XXX sdom_unblocked doesn't need this since it is 
  36.327 +	     already in the correct place. */
  36.328 +	    cur_sdom->state = ATROPOS_TASK_WAIT;
  36.329 +	}
  36.330 +    }
  36.331 +
  36.332 +    requeue(cur_sdom);
  36.333 +
  36.334 +  deschedule_done:
  36.335 +
  36.336 +    /*****************************
  36.337 +     * 
  36.338 +     * We have now successfully descheduled the current sdom.
  36.339 +     * The next task is the allocate CPU time to any sdom it is due to.
  36.340 +     *
  36.341 +       ****************************/
  36.342 +    cur_sdom = NULL;
  36.343 +
  36.344 +    /*****************************
  36.345 +     * 
  36.346 +     * Allocate CPU time to any waiting domains who have passed their
  36.347 +     * period deadline.  If necessary, move them to run queue.
  36.348 +     *
  36.349 +     ****************************/
  36.350 +    while(!list_empty(WAITQ(cpu)) && 
  36.351 +	  DOM_INFO(sdom = waitq_el(WAITQ(cpu)->next))->deadline <= time ) {
  36.352 +
  36.353 +	struct at_dom_info *inf = DOM_INFO(sdom);
  36.354 +
  36.355 +        dequeue(sdom);
  36.356 +
  36.357 +	/* Domain begins a new period and receives a slice of CPU 
  36.358 +	 * If this domain has been blocking then throw away the
  36.359 +	 * rest of it's remain - it can't be trusted */
  36.360 +	if (inf->remain > 0) 
  36.361 +	    inf->remain = inf->slice;
  36.362 +    	else 
  36.363 +	    inf->remain += inf->slice;
  36.364 +	inf->prevddln = inf->deadline;
  36.365 +	inf->deadline += inf->period;
  36.366 +        if(inf->remain > 0)
  36.367 +            sdom->state = TASK_RUNNING;
  36.368 +        else
  36.369 +            sdom->state = ATROPOS_TASK_WAIT;
  36.370 +
  36.371 +	/* Place on the appropriate queue */
  36.372 +	requeue(sdom);
  36.373 +    }
  36.374 +
  36.375 +    /*****************************
  36.376 +     * 
  36.377 +     * Next we need to pick an sdom to run.
  36.378 +     * If anything is actually 'runnable', we run that. 
  36.379 +     * If nothing is, we pick a waiting sdom to run optimistically.
  36.380 +     * If there aren't even any of those, we have to spin waiting for an
  36.381 +     * event or a suitable time condition to happen.
  36.382 +     *
  36.383 +     ****************************/
  36.384 +    
  36.385 +    /* we guarantee there's always something on the runqueue */
  36.386 +    cur_sdom = list_entry(RUNQ(cpu)->next,
  36.387 +                          struct task_struct, run_list);
  36.388 +
  36.389 +    cur_info = DOM_INFO(cur_sdom);
  36.390 +    newtime = time + cur_info->remain;
  36.391 +    reason  = (cur_info->prevddln > cur_sdom->lastschd) ?
  36.392 +      Activation_Reason_Allocated : Activation_Reason_Preempted;
  36.393 +
  36.394 +    /* MAW - the idle domain is always on the run queue.  We run from the
  36.395 +     * runqueue if it's NOT the idle domain or if there's nothing on the wait
  36.396 +     * queue */
  36.397 +    if (cur_sdom->domain == IDLE_DOMAIN_ID && !list_empty(WAITQ(cpu))) {
  36.398 +
  36.399 +        struct list_head *item;
  36.400 +
  36.401 +	/* Try running a domain on the WAIT queue - this part of the
  36.402 +	   scheduler isn't particularly efficient but then again, we
  36.403 +	   don't have any guaranteed domains to worry about. */
  36.404 +	
  36.405 +	/* See if there are any unblocked domains on the WAIT
  36.406 +	   queue who we can give preferential treatment to. */
  36.407 +        list_for_each(item, WAITQ(cpu))
  36.408 +        {
  36.409 +            struct at_dom_info *inf =
  36.410 +                list_entry(item, struct at_dom_info, waitq);
  36.411 +
  36.412 +            sdom = inf->owner;
  36.413 +            
  36.414 +	    if (sdom->state == ATROPOS_TASK_UNBLOCKED) {
  36.415 +		cur_sdom = sdom;
  36.416 +		cur_info  = inf;
  36.417 +		newtime  = time + inf->remain;
  36.418 +		reason   = Activation_Reason_Preempted;
  36.419 +		goto found;
  36.420 +	    }
  36.421 +	}
  36.422 +
  36.423 +        /* init values needed to approximate round-robin for slack time */
  36.424 +        i = 0;
  36.425 +        if ( waitq_rrobin >= q_len(WAITQ(cpu)))
  36.426 +            waitq_rrobin = 0;
  36.427 +        
  36.428 +	/* Last chance: pick a domain on the wait queue with the XTRA
  36.429 +	   flag set.  The NEXT_OPTM field is used to cheaply achieve
  36.430 +	   an approximation of round-robin order */
  36.431 +        list_for_each(item, WAITQ(cpu))
  36.432 +            {
  36.433 +                struct at_dom_info *inf =
  36.434 +                    list_entry(item, struct at_dom_info, waitq);
  36.435 +                
  36.436 +                sdom = inf->owner;
  36.437 +
  36.438 +                if (inf->xtratime && i >= waitq_rrobin) {
  36.439 +                    cur_sdom = sdom;
  36.440 +                    cur_info  = inf;
  36.441 +                    newtime = time + BESTEFFORT_QUANTUM;
  36.442 +                    reason  = Activation_Reason_Extra;
  36.443 +                    waitq_rrobin = i + 1; /* set this value ready for next */
  36.444 +                    goto found;
  36.445 +                }
  36.446 +
  36.447 +                i++;
  36.448 +            }
  36.449 +
  36.450 +    }
  36.451 +
  36.452 +    found:
  36.453 +    /**********************
  36.454 +     * 
  36.455 +     * We now have to work out the time when we next need to
  36.456 +     * make a scheduling decision.  We set the alarm timer
  36.457 +     * to cause an interrupt at that time.
  36.458 +     *
  36.459 +     **********************/
  36.460 +
  36.461 +#define MIN(x,y) ( ( x < y ) ? x : y )
  36.462 +#define MAX(x,y) ( ( x > y ) ? x : y )
  36.463 +
  36.464 +    /* If we might be able to run a waiting domain before this one has */
  36.465 +    /* exhausted its time, cut short the time allocation */
  36.466 +    if (!list_empty(WAITQ(cpu)))
  36.467 +    {
  36.468 +	newtime = MIN(newtime,
  36.469 +                      DOM_INFO(waitq_el(WAITQ(cpu)->next))->deadline);
  36.470 +    }
  36.471 +
  36.472 +    /* don't allow pointlessly small time slices */
  36.473 +    newtime = MAX(newtime, time + BESTEFFORT_QUANTUM);
  36.474 +    
  36.475 +    ret.task = cur_sdom;
  36.476 +    ret.time = newtime - time;
  36.477 +
  36.478 +    cur_sdom->min_slice = newtime - time;
  36.479 +    DOM_INFO(cur_sdom)->reason = reason;
  36.480 +
  36.481 +    TRACE_2D(0, (cur_sdom->domain >> 32), ((u32)cur_sdom->domain));
  36.482 + 
  36.483 +    return ret;
  36.484 +}
  36.485 +
  36.486 +
  36.487 +/* set up some private data structures */
  36.488 +static int at_init_scheduler()
  36.489 +{
  36.490 +    int i;
  36.491 +    
  36.492 +    for( i = 0; i < NR_CPUS; i++)
  36.493 +    {
  36.494 +        if( (CPU_INF(schedule_data[i]) = kmalloc(sizeof(struct at_cpu_info),
  36.495 +                                            GFP_KERNEL)) == NULL )
  36.496 +            return -1;
  36.497 +        WAITQ(i)->next = WAITQ(i);
  36.498 +        WAITQ(i)->prev = WAITQ(i);
  36.499 +    }
  36.500 +
  36.501 +    dom_info_cache = kmem_cache_create("Atropos dom info",
  36.502 +                                       sizeof(struct at_dom_info),
  36.503 +                                       0, 0, NULL, NULL);
  36.504 +
  36.505 +    return 0;
  36.506 +}
  36.507 +
  36.508 +/* dump relevant per-cpu state for a run queue dump */
  36.509 +static void at_dump_cpu_state(int cpu)
  36.510 +{
  36.511 +    printk("Waitq len: %d Runq len: %d ",
  36.512 +           q_len(WAITQ(cpu)),
  36.513 +           q_len(RUNQ(cpu)));
  36.514 +}
  36.515 +
  36.516 +/* print relevant per-domain info for a run queue dump */
  36.517 +static void at_dump_runq_el(struct task_struct *p)
  36.518 +{
  36.519 +    printk("lastschd = %llu, xtratime = %d ",
  36.520 +           p->lastschd, DOM_INFO(p)->xtratime);
  36.521 +}
  36.522 +
  36.523 +
  36.524 +/* set or fetch domain scheduling parameters */
  36.525 +static int at_adjdom(struct task_struct *p, struct sched_adjdom_cmd *cmd)
  36.526 +{
  36.527 +    if ( cmd->direction == SCHED_INFO_PUT )
  36.528 +    {
  36.529 +        DOM_INFO(p)->period   = cmd->u.atropos.period;
  36.530 +        DOM_INFO(p)->slice    = cmd->u.atropos.slice;
  36.531 +        DOM_INFO(p)->latency  = cmd->u.atropos.latency;
  36.532 +        DOM_INFO(p)->xtratime = !!cmd->u.atropos.xtratime;
  36.533 +    }
  36.534 +    else if ( cmd->direction == SCHED_INFO_GET )
  36.535 +    {
  36.536 +        cmd->u.atropos.period   = DOM_INFO(p)->period;
  36.537 +        cmd->u.atropos.slice    = DOM_INFO(p)->slice;
  36.538 +        cmd->u.atropos.latency  = DOM_INFO(p)->latency;
  36.539 +        cmd->u.atropos.xtratime = DOM_INFO(p)->xtratime;
  36.540 +    }
  36.541 +
  36.542 +    return 0;
  36.543 +}
  36.544 +
  36.545 +
  36.546 +/** at_alloc_task - allocate private info for a task */
  36.547 +static int at_alloc_task(struct task_struct *p)
  36.548 +{
  36.549 +    ASSERT(p != NULL);
  36.550 +
  36.551 +    if( (DOM_INFO(p) = kmem_cache_alloc(dom_info_cache, GFP_KERNEL)) == NULL )
  36.552 +        return -1;
  36.553 +
  36.554 +    if(p->domain == IDLE_DOMAIN_ID)
  36.555 +      printk("ALLOC IDLE ON CPU %d\n", p->processor);
  36.556 +
  36.557 +    memset(DOM_INFO(p), 0, sizeof(struct at_dom_info));
  36.558 +
  36.559 +    return 0;
  36.560 +}
  36.561 +
  36.562 +
  36.563 +/* free memory associated with a task */
  36.564 +static void at_free_task(struct task_struct *p)
  36.565 +{
  36.566 +    kmem_cache_free( dom_info_cache, DOM_INFO(p) );
  36.567 +}
  36.568 +
  36.569 +/* print decoded domain private state value (if known) */
  36.570 +static int at_prn_state(int state)
  36.571 +{
  36.572 +    int ret = 0;
  36.573 +    
  36.574 +    switch(state)
  36.575 +    {
  36.576 +    case ATROPOS_TASK_UNBLOCKED:
  36.577 +        printk("Unblocked");
  36.578 +        break;
  36.579 +    case ATROPOS_TASK_WAIT:
  36.580 +        printk("Wait");
  36.581 +        break;
  36.582 +    default:
  36.583 +        ret = -1;
  36.584 +    }
  36.585 +
  36.586 +    return ret;
  36.587 +}
  36.588 +    
  36.589 +
  36.590 +struct scheduler sched_atropos_def = {
  36.591 +    .name           = "Atropos Soft Real Time Scheduler",
  36.592 +    .opt_name       = "atropos",
  36.593 +    .sched_id       = SCHED_ATROPOS,
  36.594 +
  36.595 +    .init_scheduler = at_init_scheduler,
  36.596 +    .alloc_task     = at_alloc_task,
  36.597 +    .add_task       = at_add_task,
  36.598 +    .free_task      = at_free_task,
  36.599 +    .wake_up        = unblock,
  36.600 +    .do_schedule    = ksched_scheduler,
  36.601 +    .adjdom         = at_adjdom,
  36.602 +    .dump_cpu_state = at_dump_cpu_state,
  36.603 +    .dump_runq_el   = at_dump_runq_el,
  36.604 +    .prn_state      = at_prn_state,
  36.605 +};
    37.1 --- a/xen/common/sched_bvt.c	Fri Apr 02 13:58:27 2004 +0000
    37.2 +++ b/xen/common/sched_bvt.c	Fri Apr 02 14:03:03 2004 +0000
    37.3 @@ -151,6 +151,7 @@ void bvt_wake_up(struct task_struct *p)
    37.4      struct bvt_dom_info *inf = BVT_INFO(p);
    37.5  
    37.6      ASSERT(inf != NULL);
    37.7 +    
    37.8  
    37.9      /* set the BVT parameters */
   37.10      if (inf->avt < CPU_SVT(p->processor))
   37.11 @@ -166,19 +167,25 @@ void bvt_wake_up(struct task_struct *p)
   37.12  /* 
   37.13   * Block the currently-executing domain until a pertinent event occurs.
   37.14   */
   37.15 -static long bvt_do_block(struct task_struct *p)
   37.16 +static void bvt_do_block(struct task_struct *p)
   37.17  {
   37.18      BVT_INFO(p)->warpback = 0; 
   37.19 -    return 0;
   37.20  }
   37.21  
   37.22  /* Control the scheduler. */
   37.23  int bvt_ctl(struct sched_ctl_cmd *cmd)
   37.24  {
   37.25      struct bvt_ctl *params = &cmd->u.bvt;
   37.26 +
   37.27 +    if ( cmd->direction == SCHED_INFO_PUT )
   37.28 +    { 
   37.29 +        ctx_allow = params->ctx_allow;
   37.30 +    }
   37.31 +    else
   37.32 +    {
   37.33 +        params->ctx_allow = ctx_allow;
   37.34 +    }
   37.35      
   37.36 -    ctx_allow = params->ctx_allow;
   37.37 -
   37.38      return 0;
   37.39  }
   37.40  
   37.41 @@ -187,24 +194,40 @@ int bvt_adjdom(struct task_struct *p,
   37.42                 struct sched_adjdom_cmd *cmd)
   37.43  {
   37.44      struct bvt_adjdom *params = &cmd->u.bvt;
   37.45 -    unsigned long mcu_adv = params->mcu_adv,
   37.46 -                    warp  = params->warp,
   37.47 -                    warpl = params->warpl,
   37.48 -                    warpu = params->warpu;
   37.49 -    
   37.50 -    struct bvt_dom_info *inf = BVT_INFO(p);
   37.51 +    unsigned long flags;
   37.52  
   37.53 -    /* Sanity -- this can avoid divide-by-zero. */
   37.54 -    if ( mcu_adv == 0 )
   37.55 -        return -EINVAL;
   37.56 +    if ( cmd->direction == SCHED_INFO_PUT )
   37.57 +    {
   37.58 +        unsigned long mcu_adv = params->mcu_adv,
   37.59 +            warp  = params->warp,
   37.60 +            warpl = params->warpl,
   37.61 +            warpu = params->warpu;
   37.62 +        
   37.63 +        struct bvt_dom_info *inf = BVT_INFO(p);
   37.64 +        
   37.65 +        /* Sanity -- this can avoid divide-by-zero. */
   37.66 +        if ( mcu_adv == 0 )
   37.67 +            return -EINVAL;
   37.68 +        
   37.69 +        spin_lock_irqsave(&schedule_lock[p->processor], flags);   
   37.70 +        inf->mcu_advance = mcu_adv;
   37.71 +        inf->warp = warp;
   37.72 +        inf->warpl = warpl;
   37.73 +        inf->warpu = warpu;
   37.74 +        spin_unlock_irqrestore(&schedule_lock[p->processor], flags);
   37.75 +    }
   37.76 +    else if ( cmd->direction == SCHED_INFO_GET )
   37.77 +    {
   37.78 +        struct bvt_dom_info *inf = BVT_INFO(p);
   37.79  
   37.80 -    spin_lock_irq(&schedule_lock[p->processor]);   
   37.81 -    inf->mcu_advance = mcu_adv;
   37.82 -    inf->warp = warp;
   37.83 -    inf->warpl = warpl;
   37.84 -    inf->warpu = warpu;
   37.85 -    spin_unlock_irq(&schedule_lock[p->processor]); 
   37.86 -
   37.87 +        spin_lock_irqsave(&schedule_lock[p->processor], flags);   
   37.88 +        params->mcu_adv = inf->mcu_advance;
   37.89 +        params->warp    = inf->warp;
   37.90 +        params->warpl   = inf->warpl;
   37.91 +        params->warpu   = inf->warpu;
   37.92 +        spin_unlock_irqrestore(&schedule_lock[p->processor], flags);
   37.93 +    }
   37.94 +    
   37.95      return 0;
   37.96  }
   37.97  
    38.1 --- a/xen/common/sched_rrobin.c	Fri Apr 02 13:58:27 2004 +0000
    38.2 +++ b/xen/common/sched_rrobin.c	Fri Apr 02 14:03:03 2004 +0000
    38.3 @@ -1,5 +1,5 @@
    38.4  /****************************************************************************
    38.5 - * Very stupid Round Robin Scheduler for Xen
    38.6 + * Round Robin Scheduler for Xen
    38.7   *
    38.8   * by Mark Williamson (C) 2004 Intel Research Cambridge
    38.9   */
   38.10 @@ -33,7 +33,15 @@ static task_slice_t rr_do_schedule(s_tim
   38.11  
   38.12  static int rr_ctl(struct sched_ctl_cmd *cmd)
   38.13  {
   38.14 -    rr_slice = cmd->u.rrobin.slice;
   38.15 +    if(cmd->direction == SCHED_INFO_PUT)
   38.16 +    {
   38.17 +        rr_slice = cmd->u.rrobin.slice;
   38.18 +    }
   38.19 +    else /* cmd->direction == SCHED_INFO_GET */
   38.20 +    {
   38.21 +        cmd->u.rrobin.slice = rr_slice;
   38.22 +    }
   38.23 +    
   38.24      return 0;
   38.25  }
   38.26  
    39.1 --- a/xen/common/schedule.c	Fri Apr 02 13:58:27 2004 +0000
    39.2 +++ b/xen/common/schedule.c	Fri Apr 02 14:03:03 2004 +0000
    39.3 @@ -41,7 +41,7 @@
    39.4  #define TIME_SLOP      (s32)MICROSECS(50)     /* allow time to slip a bit */
    39.5  
    39.6  /*
    39.7 - * XXX Pull trace-related #defines out of here and into an auto-generated
    39.8 + * TODO MAW pull trace-related #defines out of here and into an auto-generated
    39.9   * header file later on!
   39.10   */
   39.11  #define TRC_SCHED_DOM_ADD             0x00010000
   39.12 @@ -68,23 +68,25 @@ static void t_timer_fn(unsigned long unu
   39.13  static void dom_timer_fn(unsigned long data);
   39.14  static void fallback_timer_fn(unsigned long unused);
   39.15  
   39.16 -/* This is global for now so that private implementations can reach it. */
   39.17 +/* This is global for now so that private implementations can reach it */
   39.18  schedule_data_t schedule_data[NR_CPUS];
   39.19  
   39.20  /*
   39.21 - * XXX It would be nice if the schedulers array could get populated
   39.22 + * TODO: It would be nice if the schedulers array could get populated
   39.23   * automagically without having to hack the code in here.
   39.24   */
   39.25 -extern struct scheduler sched_bvt_def, sched_rrobin_def;
   39.26 +extern struct scheduler sched_bvt_def, sched_rrobin_def, sched_atropos_def;
   39.27  static struct scheduler *schedulers[] = { &sched_bvt_def,
   39.28                                            &sched_rrobin_def,
   39.29 +                                          &sched_atropos_def,
   39.30                                            NULL};
   39.31  
   39.32  /* Operations for the current scheduler. */
   39.33  static struct scheduler ops;
   39.34  
   39.35 -#define SCHED_FN(fn, ...) \
   39.36 -    ((ops.fn != NULL) ? (ops.fn(__VA_ARGS__)) : (typeof(ops.fn(__VA_ARGS__)))0)
   39.37 +#define SCHED_OP(fn, ...)                                 \
   39.38 +         (( ops.fn != NULL ) ? ops.fn( __VA_ARGS__ )      \
   39.39 +          : (typeof(ops.fn(__VA_ARGS__)))0 )
   39.40  
   39.41  spinlock_t schedule_lock[NR_CPUS] __cacheline_aligned;
   39.42  
   39.43 @@ -101,7 +103,7 @@ extern kmem_cache_t *task_struct_cachep;
   39.44  
   39.45  void free_task_struct(struct task_struct *p)
   39.46  {
   39.47 -    SCHED_FN(free_task, p);
   39.48 +    SCHED_OP(free_task, p);
   39.49      kmem_cache_free(task_struct_cachep, p);
   39.50  }
   39.51  
   39.52 @@ -114,15 +116,15 @@ struct task_struct *alloc_task_struct(vo
   39.53  
   39.54      if ( (p = kmem_cache_alloc(task_struct_cachep,GFP_KERNEL)) == NULL )
   39.55          return NULL;
   39.56 -
   39.57 -    memset(p, 0, sizeof(*p));    
   39.58 +    
   39.59 +    memset(p, 0, sizeof(*p));
   39.60  
   39.61 -    if ( SCHED_FN(alloc_task, p) < 0)
   39.62 +    if ( SCHED_OP(alloc_task, p) < 0 )
   39.63      {
   39.64 -        kmem_cache_free(task_struct_cachep, p);
   39.65 +        kmem_cache_free(task_struct_cachep,p);
   39.66          return NULL;
   39.67      }
   39.68 -    
   39.69 +
   39.70      return p;
   39.71  }
   39.72  
   39.73 @@ -146,7 +148,7 @@ void sched_add_domain(struct task_struct
   39.74          schedule_data[p->processor].idle = p;
   39.75      }
   39.76  
   39.77 -    SCHED_FN(add_task, p);
   39.78 +    SCHED_OP(add_task, p);
   39.79  
   39.80      TRACE_3D(TRC_SCHED_DOM_ADD, _HIGH32(p->domain), _LOW32(p->domain), p);
   39.81  }
   39.82 @@ -160,7 +162,7 @@ int sched_rem_domain(struct task_struct 
   39.83  
   39.84      rem_ac_timer(&p->timer);
   39.85  
   39.86 -    SCHED_FN(rem_task, p);
   39.87 +    SCHED_OP(rem_task, p);
   39.88  
   39.89      TRACE_3D(TRC_SCHED_DOM_REM, _HIGH32(p->domain), _LOW32(p->domain), p);
   39.90  
   39.91 @@ -172,9 +174,9 @@ void init_idle_task(void)
   39.92      unsigned long flags;
   39.93      struct task_struct *p = current;
   39.94  
   39.95 -    if ( SCHED_FN(alloc_task, p) < 0 )
   39.96 -        panic("Failed to allocate scheduler private data for idle task");
   39.97 -    SCHED_FN(add_task, p);
   39.98 +    if ( SCHED_OP(alloc_task, p) < 0)
   39.99 +		panic("Failed to allocate scheduler private data for idle task");
  39.100 +    SCHED_OP(add_task, p);
  39.101  
  39.102      spin_lock_irqsave(&schedule_lock[p->processor], flags);
  39.103      p->has_cpu = 1;
  39.104 @@ -190,12 +192,12 @@ void __wake_up(struct task_struct *p)
  39.105  
  39.106      ASSERT(p->state != TASK_DYING);
  39.107  
  39.108 -    if ( unlikely(__task_on_runqueue(p)) )
  39.109 +    if ( unlikely(__task_on_runqueue(p)) )        
  39.110          return;
  39.111  
  39.112      p->state = TASK_RUNNING;
  39.113  
  39.114 -    SCHED_FN(wake_up, p);
  39.115 +    SCHED_OP(wake_up, p);
  39.116  
  39.117  #ifdef WAKEUP_HISTO
  39.118      p->wokenup = NOW();
  39.119 @@ -217,7 +219,7 @@ void wake_up(struct task_struct *p)
  39.120  static long do_block(void)
  39.121  {
  39.122      ASSERT(current->domain != IDLE_DOMAIN_ID);
  39.123 -    clear_bit(0, &current->shared_info->evtchn_upcall_mask);
  39.124 +    current->shared_info->vcpu_data[0].evtchn_upcall_mask = 0;
  39.125      current->state = TASK_INTERRUPTIBLE;
  39.126      TRACE_2D(TRC_SCHED_BLOCK, current->domain, current);
  39.127      __enter_scheduler();
  39.128 @@ -327,15 +329,12 @@ long do_set_timer_op(unsigned long timeo
  39.129      return 0;
  39.130  }
  39.131  
  39.132 +/** sched_id - fetch ID of current scheduler */
  39.133 +int sched_id()
  39.134 +{
  39.135 +    return ops.sched_id;
  39.136 +}
  39.137  
  39.138 -/**
  39.139 - * sched_ctl - dispatch a scheduler control operation
  39.140 - * @cmd:       the command passed in the dom0 op
  39.141 - *
  39.142 - * Given a generic scheduler control operation, call the control function for
  39.143 - * the scheduler in use, passing the appropriate control information from the
  39.144 - * union supplied.
  39.145 - */
  39.146  long sched_ctl(struct sched_ctl_cmd *cmd)
  39.147  {
  39.148      TRACE_0D(TRC_SCHED_CTL);
  39.149 @@ -343,7 +342,7 @@ long sched_ctl(struct sched_ctl_cmd *cmd
  39.150      if ( cmd->sched_id != ops.sched_id )
  39.151          return -EINVAL;
  39.152  
  39.153 -    return SCHED_FN(control, cmd);
  39.154 +    return SCHED_OP(control, cmd);
  39.155  }
  39.156  
  39.157  
  39.158 @@ -355,6 +354,9 @@ long sched_adjdom(struct sched_adjdom_cm
  39.159      if ( cmd->sched_id != ops.sched_id )
  39.160          return -EINVAL;
  39.161  
  39.162 +    if ( cmd->direction != SCHED_INFO_PUT && cmd->direction != SCHED_INFO_GET )
  39.163 +        return -EINVAL;
  39.164 +
  39.165      p = find_domain_by_id(cmd->domain);
  39.166  
  39.167      if( p == NULL )
  39.168 @@ -362,7 +364,7 @@ long sched_adjdom(struct sched_adjdom_cm
  39.169  
  39.170      TRACE_2D(TRC_SCHED_ADJDOM, _HIGH32(p->domain), _LOW32(p->domain));
  39.171  
  39.172 -    SCHED_FN(adjdom, p, cmd);
  39.173 +    SCHED_OP(adjdom, p, cmd);
  39.174  
  39.175      put_task_struct(p); 
  39.176      return 0;
  39.177 @@ -378,7 +380,7 @@ long sched_adjdom(struct sched_adjdom_cm
  39.178   */
  39.179  unsigned long __reschedule(struct task_struct *p)
  39.180  {
  39.181 -    int cpu = p->processor;
  39.182 +       int cpu = p->processor;
  39.183      struct task_struct *curr;
  39.184      s_time_t now, min_time;
  39.185  
  39.186 @@ -403,7 +405,7 @@ unsigned long __reschedule(struct task_s
  39.187      if ( schedule_data[cpu].s_timer.expires > min_time + TIME_SLOP )
  39.188          mod_ac_timer(&schedule_data[cpu].s_timer, min_time);
  39.189  
  39.190 -    return SCHED_FN(reschedule, p);
  39.191 +    return SCHED_OP(reschedule, p);
  39.192  }
  39.193  
  39.194  void reschedule(struct task_struct *p)
  39.195 @@ -412,6 +414,7 @@ void reschedule(struct task_struct *p)
  39.196  
  39.197      spin_lock_irqsave(&schedule_lock[p->processor], flags);
  39.198      cpu_mask = __reschedule(p);
  39.199 +
  39.200      spin_unlock_irqrestore(&schedule_lock[p->processor], flags);
  39.201  
  39.202  #ifdef CONFIG_SMP
  39.203 @@ -447,7 +450,6 @@ asmlinkage void __enter_scheduler(void)
  39.204      ASSERT(!in_interrupt());
  39.205      ASSERT(__task_on_runqueue(prev));
  39.206      ASSERT(prev->state != TASK_UNINTERRUPTIBLE);
  39.207 -    ASSERT(prev != NULL);
  39.208  
  39.209      if ( prev->state == TASK_INTERRUPTIBLE )
  39.210      {
  39.211 @@ -455,19 +457,16 @@ asmlinkage void __enter_scheduler(void)
  39.212          if ( signal_pending(prev) )
  39.213              prev->state = TASK_RUNNING;
  39.214          else
  39.215 -            SCHED_FN(do_block, prev);
  39.216 +            SCHED_OP(do_block, prev);
  39.217      }
  39.218  
  39.219 +    prev->cpu_time += now - prev->lastschd;
  39.220 +
  39.221      /* get policy-specific decision on scheduling... */
  39.222      next_slice = ops.do_schedule(now);
  39.223  
  39.224      r_time = next_slice.time;
  39.225 -    next   = next_slice.task;
  39.226 -
  39.227 -    if ( likely(!is_idle_task(prev)) ) 
  39.228 -        prev->cpu_time += (now - prev->lastschd);
  39.229 -
  39.230 -    /* now, switch to the new task... */
  39.231 +    next = next_slice.task;
  39.232  
  39.233      prev->has_cpu = 0;
  39.234      next->has_cpu = 1;
  39.235 @@ -511,8 +510,6 @@ asmlinkage void __enter_scheduler(void)
  39.236  
  39.237      TRACE_2D(TRC_SCHED_SWITCH, next->domain, next);
  39.238  
  39.239 -    ASSERT(next->processor == current->processor);
  39.240 -
  39.241      switch_to(prev, next);
  39.242      
  39.243      if ( unlikely(prev->state == TASK_DYING) ) 
  39.244 @@ -547,7 +544,6 @@ int idle_cpu(int cpu)
  39.245  static void s_timer_fn(unsigned long unused)
  39.246  {
  39.247      TRACE_0D(TRC_SCHED_S_TIMER_FN);
  39.248 -    
  39.249      set_bit(_HYP_EVENT_NEED_RESCHED, &current->hyp_events);
  39.250      perfc_incrc(sched_irq);
  39.251  }
  39.252 @@ -559,6 +555,8 @@ static void t_timer_fn(unsigned long unu
  39.253  
  39.254      TRACE_0D(TRC_SCHED_T_TIMER_FN);
  39.255  
  39.256 +    TRACE_0D(TRC_SCHED_T_TIMER_FN);
  39.257 +
  39.258      if ( !is_idle_task(p) )
  39.259          send_guest_virq(p, VIRQ_TIMER);
  39.260  
  39.261 @@ -638,10 +636,8 @@ void __init scheduler_init(void)
  39.262      if ( ops.do_schedule == NULL)
  39.263          panic("Chosen scheduler has NULL do_schedule!");
  39.264  
  39.265 -    if ( SCHED_FN(init_scheduler) < 0 )
  39.266 +    if ( SCHED_OP(init_scheduler) < 0 )
  39.267          panic("Initialising scheduler failed!");
  39.268 -
  39.269 -    SCHED_FN(add_task, &idle0_task);
  39.270  }
  39.271  
  39.272  /*
  39.273 @@ -681,7 +677,7 @@ static void dump_rqueue(struct list_head
  39.274      list_for_each (list, queue) {
  39.275          p = list_entry(list, struct task_struct, run_list);
  39.276          printk("%3d: %llu has=%c ", loop++, p->domain, p->has_cpu ? 'T':'F');
  39.277 -        SCHED_FN(dump_runq_el, p);
  39.278 +        SCHED_OP(dump_runq_el, p);
  39.279          printk("c=0x%X%08X\n", (u32)(p->cpu_time>>32), (u32)p->cpu_time);
  39.280          printk("         l: %lx n: %lx  p: %lx\n",
  39.281                 (unsigned long)list, (unsigned long)list->next,
  39.282 @@ -697,18 +693,48 @@ void dump_runq(u_char key, void *dev_id,
  39.283      int i;
  39.284  
  39.285      printk("Scheduler: %s (%s)\n", ops.name, ops.opt_name);
  39.286 -    SCHED_FN(dump_settings);
  39.287 +    SCHED_OP(dump_settings);
  39.288      printk("NOW=0x%08X%08X\n",  (u32)(now>>32), (u32)now); 
  39.289      for (i = 0; i < smp_num_cpus; i++) {
  39.290          spin_lock_irqsave(&schedule_lock[i], flags);
  39.291          printk("CPU[%02d] ", i);
  39.292 -        SCHED_FN(dump_cpu_state,i);
  39.293 +        SCHED_OP(dump_cpu_state,i);
  39.294          dump_rqueue(&schedule_data[i].runqueue, "rq"); 
  39.295          spin_unlock_irqrestore(&schedule_lock[i], flags);
  39.296      }
  39.297      return; 
  39.298  }
  39.299  
  39.300 +/* print human-readable "state", given the numeric code for that state */
  39.301 +void sched_prn_state(int state)
  39.302 +{
  39.303 +    int ret = 0;
  39.304 +    
  39.305 +    switch(state)
  39.306 +    {
  39.307 +    case TASK_RUNNING:
  39.308 +        printk("Running");
  39.309 +        break;
  39.310 +    case TASK_INTERRUPTIBLE:
  39.311 +        printk("Int sleep");
  39.312 +        break;
  39.313 +    case TASK_UNINTERRUPTIBLE:
  39.314 +        printk("UInt sleep");
  39.315 +        break;
  39.316 +    case TASK_STOPPED:
  39.317 +        printk("Stopped");
  39.318 +        break;
  39.319 +    case TASK_DYING:
  39.320 +        printk("Dying");
  39.321 +        break;
  39.322 +    default:
  39.323 +        ret = SCHED_OP(prn_state, state);
  39.324 +    }
  39.325 +
  39.326 +    if ( ret != 0 )
  39.327 +        printk("Unknown");
  39.328 +}
  39.329 +
  39.330  #if defined(WAKEUP_HISTO) || defined(BLOCKTIME_HISTO)
  39.331  void print_sched_histo(u_char key, void *dev_id, struct pt_regs *regs)
  39.332  {
    40.1 --- a/xen/drivers/Makefile	Fri Apr 02 13:58:27 2004 +0000
    40.2 +++ b/xen/drivers/Makefile	Fri Apr 02 14:03:03 2004 +0000
    40.3 @@ -3,11 +3,13 @@ default:
    40.4  	$(MAKE) -C char
    40.5  	$(MAKE) -C pci
    40.6  	$(MAKE) -C net
    40.7 -	$(MAKE) -C block
    40.8 -	$(MAKE) -C cdrom
    40.9 -	$(MAKE) -C ide
   40.10 -	$(MAKE) -C scsi
   40.11 -	$(MAKE) -C message/fusion
   40.12 +	if [ "$(nodev)" != "y" ]; then \
   40.13 +	$(MAKE) -C block ; \
   40.14 +	$(MAKE) -C cdrom ; \
   40.15 +	$(MAKE) -C ide ; \
   40.16 +	$(MAKE) -C scsi ; \
   40.17 +	$(MAKE) -C message/fusion ; \
   40.18 +	fi
   40.19  
   40.20  clean:
   40.21  	$(MAKE) -C char clean
    41.1 --- a/xen/drivers/char/console.c	Fri Apr 02 13:58:27 2004 +0000
    41.2 +++ b/xen/drivers/char/console.c	Fri Apr 02 14:03:03 2004 +0000
    41.3 @@ -277,12 +277,11 @@ static void serial_rx(unsigned char c, s
    41.4      }
    41.5      else
    41.6      {
    41.7 -        /* Flush any pending CTRL-a's. They weren't for us. */
    41.8 -        for ( ; ctrl_a_count != 0; ctrl_a_count-- )
    41.9 -            __serial_rx(CTRL_A, regs);
   41.10 -        /* Finally process the just-received character. */
   41.11 -        __serial_rx(c, regs);
   41.12 +        ctrl_a_count = 0;
   41.13      }
   41.14 +
   41.15 +    /* Finally process the just-received character. */
   41.16 +    __serial_rx(c, regs);
   41.17  }
   41.18  
   41.19  long do_console_io(int cmd, int count, char *buffer)
    42.1 --- a/xen/drivers/pci/pci.c	Fri Apr 02 13:58:27 2004 +0000
    42.2 +++ b/xen/drivers/pci/pci.c	Fri Apr 02 14:03:03 2004 +0000
    42.3 @@ -656,6 +656,7 @@ out:
    42.4  int
    42.5  pci_register_driver(struct pci_driver *drv)
    42.6  {
    42.7 +#ifndef NO_DEVICES_IN_XEN
    42.8  	struct pci_dev *dev;
    42.9  	int count = 0;
   42.10  
   42.11 @@ -665,6 +666,9 @@ pci_register_driver(struct pci_driver *d
   42.12  			count += pci_announce_device(drv, dev);
   42.13  	}
   42.14  	return count;
   42.15 +#else
   42.16 +	return 0;
   42.17 +#endif
   42.18  }
   42.19  
   42.20  /**
   42.21 @@ -680,6 +684,7 @@ pci_register_driver(struct pci_driver *d
   42.22  void
   42.23  pci_unregister_driver(struct pci_driver *drv)
   42.24  {
   42.25 +#ifndef NO_DEVICES_IN_XEN
   42.26  	struct pci_dev *dev;
   42.27  
   42.28  	list_del(&drv->node);
   42.29 @@ -690,6 +695,7 @@ pci_unregister_driver(struct pci_driver 
   42.30  			dev->driver = NULL;
   42.31  		}
   42.32  	}
   42.33 +#endif
   42.34  }
   42.35  
   42.36  #ifdef CONFIG_HOTPLUG
    43.1 --- a/xen/include/asm-i386/elf.h	Fri Apr 02 13:58:27 2004 +0000
    43.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    43.3 @@ -1,233 +0,0 @@
    43.4 -/*
    43.5 - *  GRUB  --  GRand Unified Bootloader
    43.6 - *  Copyright (C) 1996  Erich Boleyn  <erich@uruk.org>
    43.7 - *  Copyright (C) 2001  Free Software Foundation, Inc.
    43.8 - *
    43.9 - *  This program is free software; you can redistribute it and/or modify
   43.10 - *  it under the terms of the GNU General Public License as published by
   43.11 - *  the Free Software Foundation; either version 2 of the License, or
   43.12 - *  (at your option) any later version.
   43.13 - *
   43.14 - *  This program is distributed in the hope that it will be useful,
   43.15 - *  but WITHOUT ANY WARRANTY; without even the implied warranty of
   43.16 - *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   43.17 - *  GNU General Public License for more details.
   43.18 - *
   43.19 - *  You should have received a copy of the GNU General Public License
   43.20 - *  along with this program; if not, write to the Free Software
   43.21 - *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
   43.22 - */
   43.23 -
   43.24 -/* 32-bit data types */
   43.25 -
   43.26 -typedef unsigned long Elf32_Addr;
   43.27 -typedef unsigned short Elf32_Half;
   43.28 -typedef unsigned long Elf32_Off;
   43.29 -typedef signed long Elf32_Sword;
   43.30 -typedef unsigned long Elf32_Word;
   43.31 -/* "unsigned char" already exists */
   43.32 -
   43.33 -/* ELF header */
   43.34 -typedef struct
   43.35 -{
   43.36 -  
   43.37 -#define EI_NIDENT 16
   43.38 -  
   43.39 -  /* first four characters are defined below */
   43.40 -#define EI_MAG0		0
   43.41 -#define ELFMAG0		0x7f
   43.42 -#define EI_MAG1		1
   43.43 -#define ELFMAG1		'E'
   43.44 -#define EI_MAG2		2
   43.45 -#define ELFMAG2		'L'
   43.46 -#define EI_MAG3		3
   43.47 -#define ELFMAG3		'F'
   43.48 -  
   43.49 -#define EI_CLASS	4	/* data sizes */
   43.50 -#define ELFCLASS32	1	/* i386 -- up to 32-bit data sizes present */
   43.51 -  
   43.52 -#define EI_DATA		5	/* data type and ordering */
   43.53 -#define ELFDATA2LSB	1	/* i386 -- LSB 2's complement */
   43.54 -  
   43.55 -#define EI_VERSION	6	/* version number.  "e_version" must be the same */
   43.56 -#define EV_CURRENT      1	/* current version number */
   43.57 -  
   43.58 -#define EI_PAD		7	/* from here in is just padding */
   43.59 -  
   43.60 -#define EI_BRAND	8	/* start of OS branding (This is
   43.61 -				   obviously illegal against the ELF
   43.62 -				   standard.) */
   43.63 -  
   43.64 -  unsigned char e_ident[EI_NIDENT];	/* basic identification block */
   43.65 -  
   43.66 -#define ET_EXEC		2	/* we only care about executable types */
   43.67 -  Elf32_Half e_type;		/* file types */
   43.68 -  
   43.69 -#define EM_386		3	/* i386 -- obviously use this one */
   43.70 -  Elf32_Half e_machine;	/* machine types */
   43.71 -  Elf32_Word e_version;	/* use same as "EI_VERSION" above */
   43.72 -  Elf32_Addr e_entry;		/* entry point of the program */
   43.73 -  Elf32_Off e_phoff;		/* program header table file offset */
   43.74 -  Elf32_Off e_shoff;		/* section header table file offset */
   43.75 -  Elf32_Word e_flags;		/* flags */
   43.76 -  Elf32_Half e_ehsize;		/* elf header size in bytes */
   43.77 -  Elf32_Half e_phentsize;	/* program header entry size */
   43.78 -  Elf32_Half e_phnum;		/* number of entries in program header */
   43.79 -  Elf32_Half e_shentsize;	/* section header entry size */
   43.80 -  Elf32_Half e_shnum;		/* number of entries in section header */
   43.81 -  
   43.82 -#define SHN_UNDEF       0
   43.83 -#define SHN_LORESERVE   0xff00
   43.84 -#define SHN_LOPROC      0xff00
   43.85 -#define SHN_HIPROC      0xff1f
   43.86 -#define SHN_ABS         0xfff1
   43.87 -#define SHN_COMMON      0xfff2
   43.88 -#define SHN_HIRESERVE   0xffff
   43.89 -  Elf32_Half e_shstrndx;	/* section header table index */
   43.90 -}
   43.91 -Elf32_Ehdr;
   43.92 -
   43.93 -
   43.94 -#define BOOTABLE_I386_ELF(h) \
   43.95 - ((h.e_ident[EI_MAG0] == ELFMAG0) & (h.e_ident[EI_MAG1] == ELFMAG1) \
   43.96 -  & (h.e_ident[EI_MAG2] == ELFMAG2) & (h.e_ident[EI_MAG3] == ELFMAG3) \
   43.97 -  & (h.e_ident[EI_CLASS] == ELFCLASS32) & (h.e_ident[EI_DATA] == ELFDATA2LSB) \
   43.98 -  & (h.e_ident[EI_VERSION] == EV_CURRENT) & (h.e_type == ET_EXEC) \
   43.99 -  & (h.e_machine == EM_386) & (h.e_version == EV_CURRENT))
  43.100 -
  43.101 -/* section table - ? */
  43.102 -typedef struct
  43.103 -{
  43.104 -  Elf32_Word	sh_name;		/* Section name (string tbl index) */
  43.105 -  Elf32_Word	sh_type;		/* Section type */
  43.106 -  Elf32_Word	sh_flags;		/* Section flags */
  43.107 -  Elf32_Addr	sh_addr;		/* Section virtual addr at execution */
  43.108 -  Elf32_Off	sh_offset;		/* Section file offset */
  43.109 -  Elf32_Word	sh_size;		/* Section size in bytes */
  43.110 -  Elf32_Word	sh_link;		/* Link to another section */
  43.111 -  Elf32_Word	sh_info;		/* Additional section information */
  43.112 -  Elf32_Word	sh_addralign;		/* Section alignment */
  43.113 -  Elf32_Word	sh_entsize;		/* Entry size if section holds table */
  43.114 -}
  43.115 -Elf32_Shdr;
  43.116 -
  43.117 -/* symbol table - page 4-25, figure 4-15 */
  43.118 -typedef struct
  43.119 -{
  43.120 -  Elf32_Word st_name;
  43.121 -  Elf32_Addr st_value;
  43.122 -  Elf32_Word st_size;
  43.123 -  unsigned char st_info;
  43.124 -  unsigned char st_other;
  43.125 -  Elf32_Half st_shndx;
  43.126 -}
  43.127 -Elf32_Sym;
  43.128 -
  43.129 -/* symbol type and binding attributes - page 4-26 */
  43.130 -
  43.131 -#define ELF32_ST_BIND(i)    ((i) >> 4)
  43.132 -#define ELF32_ST_TYPE(i)    ((i) & 0xf)
  43.133 -#define ELF32_ST_INFO(b,t)  (((b)<<4)+((t)&0xf))
  43.134 -
  43.135 -/* symbol binding - page 4-26, figure 4-16 */
  43.136 -
  43.137 -#define STB_LOCAL    0
  43.138 -#define STB_GLOBAL   1
  43.139 -#define STB_WEAK     2
  43.140 -#define STB_LOPROC  13
  43.141 -#define STB_HIPROC  15
  43.142 -
  43.143 -/* symbol types - page 4-28, figure 4-17 */
  43.144 -
  43.145 -#define STT_NOTYPE   0
  43.146 -#define STT_OBJECT   1
  43.147 -#define STT_FUNC     2
  43.148 -#define STT_SECTION  3
  43.149 -#define STT_FILE     4
  43.150 -#define STT_LOPROC  13
  43.151 -#define STT_HIPROC  15
  43.152 -
  43.153 -
  43.154 -/* Macros to split/combine relocation type and symbol page 4-32 */
  43.155 -
  43.156 -#define ELF32_R_SYM(__i)	((__i)>>8)
  43.157 -#define ELF32_R_TYPE(__i)	((unsigned char) (__i))
  43.158 -#define ELF32_R_INFO(__s, __t)	(((__s)<<8) + (unsigned char) (__t))
  43.159 -
  43.160 -
  43.161 -/* program header - page 5-2, figure 5-1 */
  43.162 -
  43.163 -typedef struct
  43.164 -{
  43.165 -  Elf32_Word p_type;
  43.166 -  Elf32_Off p_offset;
  43.167 -  Elf32_Addr p_vaddr;
  43.168 -  Elf32_Addr p_paddr;
  43.169 -  Elf32_Word p_filesz;
  43.170 -  Elf32_Word p_memsz;
  43.171 -  Elf32_Word p_flags;
  43.172 -  Elf32_Word p_align;
  43.173 -}
  43.174 -Elf32_Phdr;
  43.175 -
  43.176 -/* segment types - page 5-3, figure 5-2 */
  43.177 -
  43.178 -#define PT_NULL		0
  43.179 -#define PT_LOAD		1
  43.180 -#define PT_DYNAMIC	2
  43.181 -#define PT_INTERP	3
  43.182 -#define PT_NOTE		4
  43.183 -#define PT_SHLIB	5
  43.184 -#define PT_PHDR		6
  43.185 -
  43.186 -#define PT_LOPROC	0x70000000
  43.187 -#define PT_HIPROC	0x7fffffff
  43.188 -
  43.189 -/* segment permissions - page 5-6 */
  43.190 -
  43.191 -#define PF_X		0x1
  43.192 -#define PF_W		0x2
  43.193 -#define PF_R		0x4
  43.194 -#define PF_MASKPROC	0xf0000000
  43.195 -
  43.196 -
  43.197 -/* dynamic structure - page 5-15, figure 5-9 */
  43.198 -
  43.199 -typedef struct
  43.200 -{
  43.201 -  Elf32_Sword d_tag;
  43.202 -  union
  43.203 -  {
  43.204 -    Elf32_Word d_val;
  43.205 -    Elf32_Addr d_ptr;
  43.206 -  }
  43.207 -  d_un;
  43.208 -}
  43.209 -Elf32_Dyn;
  43.210 -
  43.211 -/* Dynamic array tags - page 5-16, figure 5-10.  */
  43.212 -
  43.213 -#define DT_NULL		0
  43.214 -#define DT_NEEDED	1
  43.215 -#define DT_PLTRELSZ	2
  43.216 -#define DT_PLTGOT	3
  43.217 -#define DT_HASH		4
  43.218 -#define DT_STRTAB	5
  43.219 -#define DT_SYMTAB	6
  43.220 -#define DT_RELA		7
  43.221 -#define DT_RELASZ	8
  43.222 -#define DT_RELAENT      9
  43.223 -#define DT_STRSZ	10
  43.224 -#define DT_SYMENT	11
  43.225 -#define DT_INIT		12
  43.226 -#define DT_FINI		13
  43.227 -#define DT_SONAME	14
  43.228 -#define DT_RPATH	15
  43.229 -#define DT_SYMBOLIC	16
  43.230 -#define DT_REL		17
  43.231 -#define DT_RELSZ	18
  43.232 -#define DT_RELENT	19
  43.233 -#define DT_PLTREL	20
  43.234 -#define DT_DEBUG	21
  43.235 -#define DT_TEXTREL	22
  43.236 -#define DT_JMPREL	23
    44.1 --- a/xen/include/asm-i386/irq.h	Fri Apr 02 13:58:27 2004 +0000
    44.2 +++ b/xen/include/asm-i386/irq.h	Fri Apr 02 14:03:03 2004 +0000
    44.3 @@ -192,10 +192,10 @@ extern unsigned long prof_shift;
    44.4  
    44.5  #include <xen/irq.h>
    44.6  
    44.7 -#ifdef CONFIG_SMP /*more of this file should probably be ifdefed SMP */
    44.8 +#if defined(CONFIG_X86_IO_APIC)
    44.9  static inline void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i) {
   44.10 -	if (IO_APIC_IRQ(i))
   44.11 -		send_IPI_self(IO_APIC_VECTOR(i));
   44.12 +        if (IO_APIC_IRQ(i))
   44.13 +                send_IPI_self(IO_APIC_VECTOR(i));
   44.14  }
   44.15  #else
   44.16  static inline void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i) {}
    45.1 --- a/xen/include/asm-i386/pdb.h	Fri Apr 02 13:58:27 2004 +0000
    45.2 +++ b/xen/include/asm-i386/pdb.h	Fri Apr 02 14:03:03 2004 +0000
    45.3 @@ -1,6 +1,7 @@
    45.4  
    45.5  /*
    45.6   * pervasive debugger
    45.7 + * www.cl.cam.ac.uk/netos/pdb
    45.8   *
    45.9   * alex ho
   45.10   * 2004
   45.11 @@ -13,18 +14,22 @@
   45.12  
   45.13  #include <asm/ptrace.h>
   45.14  #include <xen/list.h>
   45.15 +#include <hypervisor-ifs/hypervisor-if.h>                   /* for domain id */
   45.16  
   45.17  extern int pdb_initialized;
   45.18  extern int pdb_com_port;
   45.19  extern int pdb_high_bit;
   45.20 +extern int pdb_page_fault_possible;
   45.21 +extern int pdb_page_fault_scratch;
   45.22 +extern int pdb_page_fault;
   45.23  
   45.24  extern void initialize_pdb(void);
   45.25  
   45.26  /* Get/set values from generic debug interface. */
   45.27 -extern int pdb_set_values(domid_t domain, u_char *buffer, 
   45.28 -                          unsigned long addr, int length);
   45.29 -extern int pdb_get_values(domid_t domain, u_char *buffer,
   45.30 -                          unsigned long addr, int length);
   45.31 +extern int pdb_set_values(u_char *buffer, int length,
   45.32 +                          unsigned long cr3, unsigned long addr);
   45.33 +extern int pdb_get_values(u_char *buffer, int length,
   45.34 +                          unsigned long cr3, unsigned long addr);
   45.35  
   45.36  /* External entry points. */
   45.37  extern int pdb_handle_exception(int exceptionVector,
   45.38 @@ -37,10 +42,13 @@ struct pdb_breakpoint
   45.39  {
   45.40      struct list_head list;
   45.41      unsigned long address;
   45.42 +    unsigned long cr3;
   45.43 +    domid_t domain;
   45.44  };
   45.45 -extern void pdb_bkpt_add (unsigned long address);
   45.46 -extern struct pdb_breakpoint* pdb_bkpt_search (unsigned long address);
   45.47 -extern int pdb_bkpt_remove (unsigned long address);
   45.48 +extern void pdb_bkpt_add (unsigned long cr3, unsigned long address);
   45.49 +extern struct pdb_breakpoint* pdb_bkpt_search (unsigned long cr3, 
   45.50 +					       unsigned long address);
   45.51 +extern int pdb_bkpt_remove (unsigned long cr3, unsigned long address);
   45.52  
   45.53  /* Conversions. */
   45.54  extern int   hex (char);
    46.1 --- a/xen/include/asm-x86_64/elf.h	Fri Apr 02 13:58:27 2004 +0000
    46.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    46.3 @@ -1,233 +0,0 @@
    46.4 -/*
    46.5 - *  GRUB  --  GRand Unified Bootloader
    46.6 - *  Copyright (C) 1996  Erich Boleyn  <erich@uruk.org>
    46.7 - *  Copyright (C) 2001  Free Software Foundation, Inc.
    46.8 - *
    46.9 - *  This program is free software; you can redistribute it and/or modify
   46.10 - *  it under the terms of the GNU General Public License as published by
   46.11 - *  the Free Software Foundation; either version 2 of the License, or
   46.12 - *  (at your option) any later version.
   46.13 - *
   46.14 - *  This program is distributed in the hope that it will be useful,
   46.15 - *  but WITHOUT ANY WARRANTY; without even the implied warranty of
   46.16 - *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   46.17 - *  GNU General Public License for more details.
   46.18 - *
   46.19 - *  You should have received a copy of the GNU General Public License
   46.20 - *  along with this program; if not, write to the Free Software
   46.21 - *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
   46.22 - */
   46.23 -
   46.24 -/* 32-bit data types */
   46.25 -
   46.26 -typedef unsigned long Elf32_Addr;
   46.27 -typedef unsigned short Elf32_Half;
   46.28 -typedef unsigned long Elf32_Off;
   46.29 -typedef signed long Elf32_Sword;
   46.30 -typedef unsigned long Elf32_Word;
   46.31 -/* "unsigned char" already exists */
   46.32 -
   46.33 -/* ELF header */
   46.34 -typedef struct
   46.35 -{
   46.36 -  
   46.37 -#define EI_NIDENT 16
   46.38 -  
   46.39 -  /* first four characters are defined below */
   46.40 -#define EI_MAG0		0
   46.41 -#define ELFMAG0		0x7f
   46.42 -#define EI_MAG1		1
   46.43 -#define ELFMAG1		'E'
   46.44 -#define EI_MAG2		2
   46.45 -#define ELFMAG2		'L'
   46.46 -#define EI_MAG3		3
   46.47 -#define ELFMAG3		'F'
   46.48 -  
   46.49 -#define EI_CLASS	4	/* data sizes */
   46.50 -#define ELFCLASS32	1	/* i386 -- up to 32-bit data sizes present */
   46.51 -  
   46.52 -#define EI_DATA		5	/* data type and ordering */
   46.53 -#define ELFDATA2LSB	1	/* i386 -- LSB 2's complement */
   46.54 -  
   46.55 -#define EI_VERSION	6	/* version number.  "e_version" must be the same */
   46.56 -#define EV_CURRENT      1	/* current version number */
   46.57 -  
   46.58 -#define EI_PAD		7	/* from here in is just padding */
   46.59 -  
   46.60 -#define EI_BRAND	8	/* start of OS branding (This is
   46.61 -				   obviously illegal against the ELF
   46.62 -				   standard.) */
   46.63 -  
   46.64 -  unsigned char e_ident[EI_NIDENT];	/* basic identification block */
   46.65 -  
   46.66 -#define ET_EXEC		2	/* we only care about executable types */
   46.67 -  Elf32_Half e_type;		/* file types */
   46.68 -  
   46.69 -#define EM_386		3	/* i386 -- obviously use this one */
   46.70 -  Elf32_Half e_machine;	/* machine types */
   46.71 -  Elf32_Word e_version;	/* use same as "EI_VERSION" above */
   46.72 -  Elf32_Addr e_entry;		/* entry point of the program */
   46.73 -  Elf32_Off e_phoff;		/* program header table file offset */
   46.74 -  Elf32_Off e_shoff;		/* section header table file offset */
   46.75 -  Elf32_Word e_flags;		/* flags */
   46.76 -  Elf32_Half e_ehsize;		/* elf header size in bytes */
   46.77 -  Elf32_Half e_phentsize;	/* program header entry size */
   46.78 -  Elf32_Half e_phnum;		/* number of entries in program header */
   46.79 -  Elf32_Half e_shentsize;	/* section header entry size */
   46.80 -  Elf32_Half e_shnum;		/* number of entries in section header */
   46.81 -  
   46.82 -#define SHN_UNDEF       0
   46.83 -#define SHN_LORESERVE   0xff00
   46.84 -#define SHN_LOPROC      0xff00
   46.85 -#define SHN_HIPROC      0xff1f
   46.86 -#define SHN_ABS         0xfff1
   46.87 -#define SHN_COMMON      0xfff2
   46.88 -#define SHN_HIRESERVE   0xffff
   46.89 -  Elf32_Half e_shstrndx;	/* section header table index */
   46.90 -}
   46.91 -Elf32_Ehdr;
   46.92 -
   46.93 -
   46.94 -#define BOOTABLE_I386_ELF(h) \
   46.95 - ((h.e_ident[EI_MAG0] == ELFMAG0) & (h.e_ident[EI_MAG1] == ELFMAG1) \
   46.96 -  & (h.e_ident[EI_MAG2] == ELFMAG2) & (h.e_ident[EI_MAG3] == ELFMAG3) \
   46.97 -  & (h.e_ident[EI_CLASS] == ELFCLASS32) & (h.e_ident[EI_DATA] == ELFDATA2LSB) \
   46.98 -  & (h.e_ident[EI_VERSION] == EV_CURRENT) & (h.e_type == ET_EXEC) \
   46.99 -  & (h.e_machine == EM_386) & (h.e_version == EV_CURRENT))
  46.100 -
  46.101 -/* section table - ? */
  46.102 -typedef struct
  46.103 -{
  46.104 -  Elf32_Word	sh_name;		/* Section name (string tbl index) */
  46.105 -  Elf32_Word	sh_type;		/* Section type */
  46.106 -  Elf32_Word	sh_flags;		/* Section flags */
  46.107 -  Elf32_Addr	sh_addr;		/* Section virtual addr at execution */
  46.108 -  Elf32_Off	sh_offset;		/* Section file offset */
  46.109 -  Elf32_Word	sh_size;		/* Section size in bytes */
  46.110 -  Elf32_Word	sh_link;		/* Link to another section */
  46.111 -  Elf32_Word	sh_info;		/* Additional section information */
  46.112 -  Elf32_Word	sh_addralign;		/* Section alignment */
  46.113 -  Elf32_Word	sh_entsize;		/* Entry size if section holds table */
  46.114 -}
  46.115 -Elf32_Shdr;
  46.116 -
  46.117 -/* symbol table - page 4-25, figure 4-15 */
  46.118 -typedef struct
  46.119 -{
  46.120 -  Elf32_Word st_name;
  46.121 -  Elf32_Addr st_value;
  46.122 -  Elf32_Word st_size;
  46.123 -  unsigned char st_info;
  46.124 -  unsigned char st_other;
  46.125 -  Elf32_Half st_shndx;
  46.126 -}
  46.127 -Elf32_Sym;
  46.128 -
  46.129 -/* symbol type and binding attributes - page 4-26 */
  46.130 -
  46.131 -#define ELF32_ST_BIND(i)    ((i) >> 4)
  46.132 -#define ELF32_ST_TYPE(i)    ((i) & 0xf)
  46.133 -#define ELF32_ST_INFO(b,t)  (((b)<<4)+((t)&0xf))
  46.134 -
  46.135 -/* symbol binding - page 4-26, figure 4-16 */
  46.136 -
  46.137 -#define STB_LOCAL    0
  46.138 -#define STB_GLOBAL   1
  46.139 -#define STB_WEAK     2
  46.140 -#define STB_LOPROC  13
  46.141 -#define STB_HIPROC  15
  46.142 -
  46.143 -/* symbol types - page 4-28, figure 4-17 */
  46.144 -
  46.145 -#define STT_NOTYPE   0
  46.146 -#define STT_OBJECT   1
  46.147 -#define STT_FUNC     2
  46.148 -#define STT_SECTION  3
  46.149 -#define STT_FILE     4
  46.150 -#define STT_LOPROC  13
  46.151 -#define STT_HIPROC  15
  46.152 -
  46.153 -
  46.154 -/* Macros to split/combine relocation type and symbol page 4-32 */
  46.155 -
  46.156 -#define ELF32_R_SYM(__i)	((__i)>>8)
  46.157 -#define ELF32_R_TYPE(__i)	((unsigned char) (__i))
  46.158 -#define ELF32_R_INFO(__s, __t)	(((__s)<<8) + (unsigned char) (__t))
  46.159 -
  46.160 -
  46.161 -/* program header - page 5-2, figure 5-1 */
  46.162 -
  46.163 -typedef struct
  46.164 -{
  46.165 -  Elf32_Word p_type;
  46.166 -  Elf32_Off p_offset;
  46.167 -  Elf32_Addr p_vaddr;
  46.168 -  Elf32_Addr p_paddr;
  46.169 -  Elf32_Word p_filesz;
  46.170 -  Elf32_Word p_memsz;
  46.171 -  Elf32_Word p_flags;
  46.172 -  Elf32_Word p_align;
  46.173 -}
  46.174 -Elf32_Phdr;
  46.175 -
  46.176 -/* segment types - page 5-3, figure 5-2 */
  46.177 -
  46.178 -#define PT_NULL		0
  46.179 -#define PT_LOAD		1
  46.180 -#define PT_DYNAMIC	2
  46.181 -#define PT_INTERP	3
  46.182 -#define PT_NOTE		4
  46.183 -#define PT_SHLIB	5
  46.184 -#define PT_PHDR		6
  46.185 -
  46.186 -#define PT_LOPROC	0x70000000
  46.187 -#define PT_HIPROC	0x7fffffff
  46.188 -
  46.189 -/* segment permissions - page 5-6 */
  46.190 -
  46.191 -#define PF_X		0x1
  46.192 -#define PF_W		0x2
  46.193 -#define PF_R		0x4
  46.194 -#define PF_MASKPROC	0xf0000000
  46.195 -
  46.196 -
  46.197 -/* dynamic structure - page 5-15, figure 5-9 */
  46.198 -
  46.199 -typedef struct
  46.200 -{
  46.201 -  Elf32_Sword d_tag;
  46.202 -  union
  46.203 -  {
  46.204 -    Elf32_Word d_val;
  46.205 -    Elf32_Addr d_ptr;
  46.206 -  }
  46.207 -  d_un;
  46.208 -}
  46.209 -Elf32_Dyn;
  46.210 -
  46.211 -/* Dynamic array tags - page 5-16, figure 5-10.  */
  46.212 -
  46.213 -#define DT_NULL		0
  46.214 -#define DT_NEEDED	1
  46.215 -#define DT_PLTRELSZ	2
  46.216 -#define DT_PLTGOT	3
  46.217 -#define DT_HASH		4
  46.218 -#define DT_STRTAB	5
  46.219 -#define DT_SYMTAB	6
  46.220 -#define DT_RELA		7
  46.221 -#define DT_RELASZ	8
  46.222 -#define DT_RELAENT      9
  46.223 -#define DT_STRSZ	10
  46.224 -#define DT_SYMENT	11
  46.225 -#define DT_INIT		12
  46.226 -#define DT_FINI		13
  46.227 -#define DT_SONAME	14
  46.228 -#define DT_RPATH	15
  46.229 -#define DT_SYMBOLIC	16
  46.230 -#define DT_REL		17
  46.231 -#define DT_RELSZ	18
  46.232 -#define DT_RELENT	19
  46.233 -#define DT_PLTREL	20
  46.234 -#define DT_DEBUG	21
  46.235 -#define DT_TEXTREL	22
  46.236 -#define DT_JMPREL	23
    47.1 --- a/xen/include/asm-x86_64/irq.h	Fri Apr 02 13:58:27 2004 +0000
    47.2 +++ b/xen/include/asm-x86_64/irq.h	Fri Apr 02 14:03:03 2004 +0000
    47.3 @@ -124,10 +124,10 @@ extern unsigned long prof_shift;
    47.4  
    47.5  #include <xen/irq.h>
    47.6  
    47.7 -#ifdef CONFIG_SMP /*more of this file should probably be ifdefed SMP */
    47.8 +#if defined(CONFIG_X86_IO_APIC)
    47.9  static inline void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i) {
   47.10 -	if (IO_APIC_IRQ(i))
   47.11 -		send_IPI_self(IO_APIC_VECTOR(i));
   47.12 +        if (IO_APIC_IRQ(i))
   47.13 +                send_IPI_self(IO_APIC_VECTOR(i));
   47.14  }
   47.15  #else
   47.16  static inline void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i) {}
    48.1 --- a/xen/include/hypervisor-ifs/dom0_ops.h	Fri Apr 02 13:58:27 2004 +0000
    48.2 +++ b/xen/include/hypervisor-ifs/dom0_ops.h	Fri Apr 02 14:03:03 2004 +0000
    48.3 @@ -18,7 +18,7 @@
    48.4   * This makes sure that old versions of dom0 tools will stop working in a
    48.5   * well-defined way (rather than crashing the machine, for instance).
    48.6   */
    48.7 -#define DOM0_INTERFACE_VERSION   0xAAAA000A
    48.8 +#define DOM0_INTERFACE_VERSION   0xAAAA000B
    48.9  
   48.10  #define MAX_CMD_LEN       256
   48.11  #define MAX_DOMAIN_NAME    16
   48.12 @@ -96,7 +96,6 @@ typedef struct dom0_getdomaininfo_st
   48.13  #define DOMSTATE_STOPPED             1
   48.14      int state;
   48.15      int hyp_events;
   48.16 -    unsigned long mcu_advance;
   48.17      unsigned int tot_pages;
   48.18      long long cpu_time;
   48.19      unsigned long shared_info_frame;  /* MFN of shared_info struct */
   48.20 @@ -214,10 +213,20 @@ typedef struct dom0_pcidev_access_st
   48.21      int          enable;
   48.22  } dom0_pcidev_access_t;
   48.23  
   48.24 +/*
   48.25 + * Get the ID of the current scheduler.
   48.26 + */
   48.27 +#define DOM0_SCHED_ID        24
   48.28 +typedef struct dom0_sched_id_st
   48.29 +{
   48.30 +    /* OUT variable */
   48.31 +    int sched_id;
   48.32 +} dom0_sched_id_t;
   48.33 +
   48.34  /* 
   48.35   * Control shadow pagetables operation
   48.36   */
   48.37 -#define DOM0_SHADOW_CONTROL   24
   48.38 +#define DOM0_SHADOW_CONTROL   25
   48.39  
   48.40  #define DOM0_SHADOW_CONTROL_OP_OFF         0
   48.41  #define DOM0_SHADOW_CONTROL_OP_ENABLE_TEST 1
   48.42 @@ -257,6 +266,7 @@ typedef struct dom0_op_st
   48.43          dom0_gettbufs_t         gettbufs;
   48.44          dom0_physinfo_t         physinfo;
   48.45          dom0_pcidev_access_t    pcidev_access;
   48.46 +        dom0_sched_id_t         sched_id;
   48.47  	dom0_shadow_control_t   shadow_control;
   48.48      } u;
   48.49  } dom0_op_t;
    49.1 --- a/xen/include/hypervisor-ifs/event_channel.h	Fri Apr 02 13:58:27 2004 +0000
    49.2 +++ b/xen/include/hypervisor-ifs/event_channel.h	Fri Apr 02 14:03:03 2004 +0000
    49.3 @@ -50,6 +50,8 @@ typedef struct evtchn_bind_pirq
    49.4  {
    49.5      /* IN parameters. */
    49.6      int pirq;
    49.7 +#define BIND_PIRQ__WILL_SHARE 1
    49.8 +    unsigned int flags; /* BIND_PIRQ__* */
    49.9      /* OUT parameters. */
   49.10      int port;
   49.11  } evtchn_bind_pirq_t;
    50.1 --- a/xen/include/hypervisor-ifs/hypervisor-if.h	Fri Apr 02 13:58:27 2004 +0000
    50.2 +++ b/xen/include/hypervisor-ifs/hypervisor-if.h	Fri Apr 02 14:03:03 2004 +0000
    50.3 @@ -56,19 +56,19 @@
    50.4   * Virtual interrupts that a guest OS may receive from the hypervisor.
    50.5   */
    50.6  
    50.7 -#define VIRQ_BLKDEV    0  /* A block device response has been queued. */
    50.8 -#define VIRQ_TIMER     1  /* A timeout has been updated. */
    50.9 -#define VIRQ_DIE       2  /* OS is about to be killed. Clean up please! */
   50.10 -#define VIRQ_DEBUG     3  /* Request guest to dump debug info (gross!) */
   50.11 -#define VIRQ_NET       4  /* There are packets for transmission. */
   50.12 -#define VIRQ_PS2       5  /* PS/2 keyboard or mouse event(s) */
   50.13 -#define VIRQ_STOP      6  /* Prepare for stopping and possible pickling */
   50.14 -#define VIRQ_EVTCHN    7  /* Event pending on an event channel */
   50.15 -#define VIRQ_VBD_UPD   8  /* Event to signal VBDs should be reprobed */
   50.16 -#define VIRQ_CONSOLE   9  /* This is only for domain-0 initial console. */
   50.17 -#define VIRQ_PHYSIRQ  10  /* Event to signal pending physical IRQs. */
   50.18 -#define VIRQ_ERROR    11  /* Catch-all virtual interrupt. */
   50.19 -#define NR_VIRQS      12
   50.20 +#define VIRQ_BLKDEV     0  /* A block device response has been queued. */
   50.21 +#define VIRQ_TIMER      1  /* A timeout has been updated. */
   50.22 +#define VIRQ_DIE        2  /* OS is about to be killed. Clean up please! */
   50.23 +#define VIRQ_DEBUG      3  /* Request guest to dump debug info (gross!) */
   50.24 +#define VIRQ_NET        4  /* There are packets for transmission. */
   50.25 +#define VIRQ_PS2        5  /* PS/2 keyboard or mouse event(s) */
   50.26 +#define VIRQ_STOP       6  /* Prepare for stopping and possible pickling */
   50.27 +#define VIRQ_EVTCHN     7  /* Event pending on an event channel */
   50.28 +#define VIRQ_VBD_UPD    8  /* Event to signal VBDs should be reprobed */
   50.29 +#define VIRQ_CONSOLE    9  /* This is only for domain-0 initial console. */
   50.30 +#define VIRQ_PHYSIRQ   10  /* Event to signal pending physical IRQs. */
   50.31 +#define VIRQ_MISDIRECT 11  /* Catch-all virtual interrupt. */
   50.32 +#define NR_VIRQS       12
   50.33  
   50.34  /*
   50.35   * MMU_XXX: specified in least 2 bits of 'ptr' field. These bits are masked
   50.36 @@ -150,6 +150,9 @@ typedef struct
   50.37  /* Event channel endpoints per domain. */
   50.38  #define NR_EVENT_CHANNELS 1024
   50.39  
   50.40 +/* No support for multi-processor guests. */
   50.41 +#define MAX_VIRT_CPUS 1
   50.42 +
   50.43  /*
   50.44   * Xen/guestos shared data -- pointer provided in start_info.
   50.45   * NB. We expect that this struct is smaller than a page.
   50.46 @@ -157,13 +160,39 @@ typedef struct
   50.47  typedef struct shared_info_st
   50.48  {
   50.49      /*
   50.50 -     * If bit 0 in evtchn_upcall_pending is transitioned 0->1, and bit 0 in 
   50.51 -     * evtchn_upcall_mask is clear, then an asynchronous upcall is scheduled. 
   50.52 -     * The upcall mask can be used to prevent unbounded reentrancy and stack 
   50.53 -     * overflow (in this way, acts as a kind of interrupt-enable flag).
   50.54 +     * Per-VCPU information goes here. This will be cleaned up more when Xen 
   50.55 +     * actually supports multi-VCPU guests.
   50.56       */
   50.57 -    unsigned long evtchn_upcall_pending;
   50.58 -    unsigned long evtchn_upcall_mask;
   50.59 +    struct {
   50.60 +        /*
   50.61 +         * 'evtchn_upcall_pending' is written non-zero by Xen to indicate
   50.62 +         * a pending notification for a particular VCPU. It is then cleared 
   50.63 +         * by the guest OS /before/ checking for pending work, thus avoiding
   50.64 +         * a set-and-check race. Note that the mask is only accessed by Xen
   50.65 +         * on the CPU that is currently hosting the VCPU. This means that the
   50.66 +         * pending and mask flags can be updated by the guest without special
   50.67 +         * synchronisation (i.e., no need for the x86 LOCK prefix).
   50.68 +         * This may seem suboptimal because if the pending flag is set by
   50.69 +         * a different CPU then an IPI may be scheduled even when the mask
   50.70 +         * is set. However, note:
   50.71 +         *  1. The task of 'interrupt holdoff' is covered by the per-event-
   50.72 +         *     channel mask bits. A 'noisy' event that is continually being
   50.73 +         *     triggered can be masked at source at this very precise
   50.74 +         *     granularity.
   50.75 +         *  2. The main purpose of the per-VCPU mask is therefore to restrict
   50.76 +         *     reentrant execution: whether for concurrency control, or to
   50.77 +         *     prevent unbounded stack usage. Whatever the purpose, we expect
   50.78 +         *     that the mask will be asserted only for short periods at a time,
   50.79 +         *     and so the likelihood of a 'spurious' IPI is suitably small.
   50.80 +         * The mask is read before making an event upcall to the guest: a
   50.81 +         * non-zero mask therefore guarantees that the VCPU will not receive
   50.82 +         * an upcall activation. The mask is cleared when the VCPU requests
   50.83 +         * to block: this avoids wakeup-waiting races.
   50.84 +         */
   50.85 +        u8 evtchn_upcall_pending;
   50.86 +        u8 evtchn_upcall_mask;
   50.87 +        u8 pad0, pad1;
   50.88 +    } vcpu_data[MAX_VIRT_CPUS];
   50.89  
   50.90      /*
   50.91       * A domain can have up to 1024 "event channels" on which it can send
   50.92 @@ -187,23 +216,22 @@ typedef struct shared_info_st
   50.93       *  2. EXCEPTION -- notifies the domain that there has been some
   50.94       *     exceptional event associated with this channel (e.g. remote
   50.95       *     disconnect, physical IRQ error). This bit is cleared by the guest.
   50.96 +     *     A 0->1 transition of this bit will cause the PENDING bit to be set.
   50.97       *  3. MASK -- if this bit is clear then a 0->1 transition of PENDING
   50.98 -     *     or EXCEPTION will cause an asynchronous upcall to be scheduled.
   50.99 -     *     This bit is only updated by the guest. It is read-only within Xen.
  50.100 -     *     If a channel becomes pending or an exceptional event occurs while
  50.101 -     *     the channel is masked then the 'edge' is lost (i.e., when the
  50.102 -     *     channel is unmasked, the guest must manually handle pending
  50.103 -     *     notifications as no upcall will be scheduled by Xen).
  50.104 +     *     will cause an asynchronous upcall to be scheduled. This bit is only
  50.105 +     *     updated by the guest. It is read-only within Xen. If a channel
  50.106 +     *     becomes pending while the channel is masked then the 'edge' is lost
  50.107 +     *     (i.e., when the channel is unmasked, the guest must manually handle
  50.108 +     *     pending notifications as no upcall will be scheduled by Xen).
  50.109       * 
  50.110 -     * To expedite scanning of pending notifications and exceptions, any 
  50.111 -     * 0->1 transition on an unmasked channel causes a corresponding bit in
  50.112 -     * a 32-bit selector to be set. Each bit in the selector covers a 32-bit
  50.113 -     * word in the PENDING or EXCEPTION bitfield array.
  50.114 +     * To expedite scanning of pending notifications, any 0->1 pending
  50.115 +     * transition on an unmasked channel causes a corresponding bit in a
  50.116 +     * 32-bit selector to be set. Each bit in the selector covers a 32-bit
  50.117 +     * word in the PENDING bitfield array.
  50.118       */
  50.119      u32 evtchn_pending[32];
  50.120      u32 evtchn_pending_sel;
  50.121      u32 evtchn_exception[32];
  50.122 -    u32 evtchn_exception_sel;
  50.123      u32 evtchn_mask[32];
  50.124  
  50.125      /*
  50.126 @@ -248,18 +276,41 @@ typedef struct shared_info_st
  50.127  } shared_info_t;
  50.128  
  50.129  /*
  50.130 - * NB. We expect that this struct is smaller than a page.
  50.131 + * Start-of-day memory layout for the initial domain (DOM0):
  50.132 + *  1. The domain is started within contiguous virtual-memory region.
  50.133 + *  2. The contiguous region begins and ends on an aligned 4MB boundary.
  50.134 + *  3. The region start corresponds to the load address of the OS image.
  50.135 + *     If the load address is not 4MB aligned then the address is rounded down.
  50.136 + *  4. This the order of bootstrap elements in the initial virtual region:
  50.137 + *      a. relocated kernel image
  50.138 + *      b. initial ram disk              [mod_start, mod_len]
  50.139 + *      c. list of allocated page frames [mfn_list, nr_pages]
  50.140 + *      d. bootstrap page tables         [pt_base, CR3 (x86)]
  50.141 + *      e. start_info_t structure        [register ESI (x86)]
  50.142 + *      f. bootstrap stack               [register ESP (x86)]
  50.143 + *  5. Bootstrap elements are packed together, but each is 4kB-aligned.
  50.144 + *  6. The initial ram disk may be omitted.
  50.145 + *  7. The list of page frames forms a contiguous 'pseudo-physical' memory
  50.146 + *     layout for the domain. In particular, the bootstrap virtual-memory
  50.147 + *     region is a 1:1 mapping to the first section of the pseudo-physical map.
  50.148 + *  8. All bootstrap elements are mapped read-writeable for the guest OS. The
  50.149 + *     only exception is the bootstrap page table, which is mapped read-only.
  50.150 + *  9. There is guaranteed to be at least 512kB padding after the final
  50.151 + *     bootstrap element. If necessary, the bootstrap virtual region is
  50.152 + *     extended by an extra 4MB to ensure this.
  50.153   */
  50.154  typedef struct start_info_st {
  50.155      /* THE FOLLOWING ARE FILLED IN BOTH ON INITIAL BOOT AND ON RESUME.     */
  50.156 -    unsigned long nr_pages;	  /* total pages allocated to this domain. */
  50.157 -    unsigned long shared_info;	  /* MACHINE address of shared info struct.*/
  50.158 +    unsigned long nr_pages;       /* total pages allocated to this domain. */
  50.159 +    unsigned long shared_info;    /* MACHINE address of shared info struct.*/
  50.160      unsigned long flags;          /* SIF_xxx flags.                        */
  50.161      /* THE FOLLOWING ARE ONLY FILLED IN ON INITIAL BOOT (NOT RESUME).      */
  50.162 -    unsigned long pt_base;	  /* VIRTUAL address of page directory.    */
  50.163 -    unsigned long mod_start;	  /* VIRTUAL address of pre-loaded module. */
  50.164 -    unsigned long mod_len;	  /* Size (bytes) of pre-loaded module.    */
  50.165 -    unsigned char cmd_line[1];	  /* Variable-length options.              */
  50.166 +    unsigned long pt_base;        /* VIRTUAL address of page directory.    */
  50.167 +    unsigned long nr_pt_frames;   /* Number of bootstrap p.t. frames.      */
  50.168 +    unsigned long mfn_list;       /* VIRTUAL address of page-frame list.   */
  50.169 +    unsigned long mod_start;      /* VIRTUAL address of pre-loaded module. */
  50.170 +    unsigned long mod_len;        /* Size (bytes) of pre-loaded module.    */
  50.171 +    unsigned char cmd_line[1];    /* Variable-length options.              */
  50.172  } start_info_t;
  50.173  
  50.174  /* These flags are passed in the 'flags' field of start_info_t. */
    51.1 --- a/xen/include/hypervisor-ifs/physdev.h	Fri Apr 02 13:58:27 2004 +0000
    51.2 +++ b/xen/include/hypervisor-ifs/physdev.h	Fri Apr 02 14:03:03 2004 +0000
    51.3 @@ -1,70 +1,66 @@
    51.4  /* -*-  Mode:C; c-basic-offset:4; tab-width:4 -*-
    51.5   ****************************************************************************
    51.6 - * (C) 2004 - Rolf Neugebauer - Intel Research Cambridge
    51.7 + * (c) 2004 - Rolf Neugebauer - Intel Research Cambridge
    51.8 + * (c) 2004 - Keir Fraser - University of Cambridge
    51.9   ****************************************************************************
   51.10 - *
   51.11 - *        File: physdev.h
   51.12 - *      Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk)
   51.13 - *        Date: Feb 2004
   51.14 - * 
   51.15   * Description: Interface for domains to access physical devices on the PCI bus
   51.16   */
   51.17  
   51.18  #ifndef __HYPERVISOR_IFS_PHYSDEV_H__
   51.19  #define __HYPERVISOR_IFS_PHYSDEV_H__
   51.20  
   51.21 -/*
   51.22 - * Commands to HYPERVISOR_physdev_op()
   51.23 - */
   51.24 -#define PHYSDEVOP_CFGREG_READ   0
   51.25 -#define PHYSDEVOP_CFGREG_WRITE  1
   51.26 -#define PHYSDEVOP_FIND_IRQ      2
   51.27 -#define PHYSDEVOP_UNMASK_IRQ    3
   51.28 +/* Commands to HYPERVISOR_physdev_op() */
   51.29 +#define PHYSDEVOP_PCI_CFGREG_READ       0
   51.30 +#define PHYSDEVOP_PCI_CFGREG_WRITE      1
   51.31 +#define PHYSDEVOP_PCI_INITIALISE_DEVICE 2
   51.32 +#define PHYSDEVOP_PCI_PROBE_ROOT_BUSES  3
   51.33 +#define PHYSDEVOP_UNMASK_IRQ            4
   51.34  
   51.35 -/* read pci config */
   51.36 -typedef struct physdevop_cfgreg_read_st
   51.37 +/* Read from PCI configuration space. */
   51.38 +typedef struct physdevop_pci_cfgreg_read_st
   51.39  {
   51.40 -    int seg;        /* IN */
   51.41      int bus;        /* IN */
   51.42      int dev;        /* IN */
   51.43      int func;       /* IN */
   51.44      int reg;        /* IN */
   51.45      int len;        /* IN */
   51.46      u32 value;      /* OUT */
   51.47 -} physdevop_cfgreg_read_t;
   51.48 +} physdevop_pci_cfgreg_read_t;
   51.49  
   51.50 -/* write pci config */
   51.51 -typedef struct physdevop_cfgred_write_st
   51.52 +/* Write to PCI configuration space. */
   51.53 +typedef struct physdevop_pci_cfgreg_write_st
   51.54  {
   51.55 -    int seg;        /* IN */
   51.56      int bus;        /* IN */
   51.57      int dev;        /* IN */
   51.58      int func;       /* IN */
   51.59      int reg;        /* IN */
   51.60      int len;        /* IN */
   51.61      u32 value;      /* IN */
   51.62 -} physdevop_cfgreg_write_t;
   51.63 +} physdevop_pci_cfgreg_write_t;
   51.64  
   51.65 -/* get the real IRQ for a device */
   51.66 -typedef struct physdevop_find_irq_st
   51.67 +/* Do final initialisation of a PCI device (e.g., last-moment IRQ routing). */
   51.68 +typedef struct physdevop_pci_initialise_device_st
   51.69  {
   51.70 -    int seg;      /* IN */
   51.71      int bus;      /* IN */
   51.72      int dev;      /* IN */
   51.73      int func;     /* IN */
   51.74 -    u32 irq;      /* OUT */
   51.75 -} physdevop_find_irq_t;
   51.76 +} physdevop_pci_initialise_device_t;
   51.77 +
   51.78 +/* Find the root buses for subsequent scanning. */
   51.79 +typedef struct physdevop_pci_probe_root_buses_st
   51.80 +{
   51.81 +    u32 busmask[256/32]; /* OUT */
   51.82 +} physdevop_pci_probe_root_buses_t;
   51.83  
   51.84  typedef struct _physdev_op_st 
   51.85  {
   51.86      unsigned long cmd;
   51.87 -
   51.88 -    /* command parameters */
   51.89      union
   51.90      {
   51.91 -        physdevop_cfgreg_read_t  cfg_read;
   51.92 -        physdevop_cfgreg_write_t cfg_write;
   51.93 -        physdevop_find_irq_t     find_irq;
   51.94 +        physdevop_pci_cfgreg_read_t       pci_cfgreg_read;
   51.95 +        physdevop_pci_cfgreg_write_t      pci_cfgreg_write;
   51.96 +        physdevop_pci_initialise_device_t pci_initialise_device;
   51.97 +        physdevop_pci_probe_root_buses_t  pci_probe_root_buses;
   51.98      } u;
   51.99  } physdev_op_t;
  51.100  
    52.1 --- a/xen/include/hypervisor-ifs/sched_ctl.h	Fri Apr 02 13:58:27 2004 +0000
    52.2 +++ b/xen/include/hypervisor-ifs/sched_ctl.h	Fri Apr 02 14:03:03 2004 +0000
    52.3 @@ -7,18 +7,24 @@
    52.4  #ifndef __SCHED_CTL_H__
    52.5  #define __SCHED_CTL_H__
    52.6  
    52.7 -/* Scheduler types. */
    52.8 +/* Scheduler types */
    52.9  #define SCHED_BVT      0
   52.10  #define SCHED_ATROPOS  1
   52.11  #define SCHED_RROBIN   2
   52.12  
   52.13 +/* these describe the intended direction used for a scheduler control or domain
   52.14 + * command */
   52.15 +#define SCHED_INFO_PUT 0
   52.16 +#define SCHED_INFO_GET 1
   52.17 +
   52.18  /*
   52.19 - * Generic scheduler control command: union of all scheduler control command
   52.20 - * structures.
   52.21 + * Generic scheduler control command - used to adjust system-wide scheduler
   52.22 + * parameters
   52.23   */
   52.24  struct sched_ctl_cmd
   52.25  {
   52.26      unsigned int sched_id;
   52.27 +    int direction;          /* are we getting or putting settings? */
   52.28      
   52.29      union
   52.30      {
   52.31 @@ -40,6 +46,7 @@ struct sched_adjdom_cmd
   52.32  {
   52.33      unsigned int sched_id;
   52.34      domid_t domain;
   52.35 +    int direction;          /* are we getting or putting settings? */
   52.36      
   52.37      union
   52.38      {
   52.39 @@ -53,6 +60,9 @@ struct sched_adjdom_cmd
   52.40  
   52.41          struct atropos_adjdom
   52.42          {
   52.43 +            u64 period;
   52.44 +            u64 slice;
   52.45 +            u64 latency;
   52.46              int xtratime;
   52.47          } atropos;
   52.48      } u;
    53.1 --- a/xen/include/xen/blkdev.h	Fri Apr 02 13:58:27 2004 +0000
    53.2 +++ b/xen/include/xen/blkdev.h	Fri Apr 02 14:03:03 2004 +0000
    53.3 @@ -26,9 +26,15 @@ typedef struct {
    53.4  
    53.5  extern kdev_t xendev_to_physdev(unsigned short xendev);
    53.6  
    53.7 +#ifndef NO_DEVICES_IN_XEN
    53.8  extern void init_blkdev_info(struct task_struct *);
    53.9  extern void unlink_blkdev_info(struct task_struct *);
   53.10  extern void destroy_blkdev_info(struct task_struct *);
   53.11 +#else
   53.12 +#define init_blkdev_info(_p)    ((void)0)
   53.13 +#define unlink_blkdev_info(_p)  ((void)0)
   53.14 +#define destroy_blkdev_info(_p) ((void)0)
   53.15 +#endif
   53.16  
   53.17  extern int unregister_blkdev(unsigned int, const char *);
   53.18  extern int invalidate_device(kdev_t, int);
    54.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    54.2 +++ b/xen/include/xen/elf.h	Fri Apr 02 14:03:03 2004 +0000
    54.3 @@ -0,0 +1,523 @@
    54.4 +/*
    54.5 + * Copyright (c) 1995, 1996 Erik Theisen.  All rights reserved.
    54.6 + *
    54.7 + * Redistribution and use in source and binary forms, with or without
    54.8 + * modification, are permitted provided that the following conditions
    54.9 + * are met:
   54.10 + * 1. Redistributions of source code must retain the above copyright
   54.11 + *    notice, this list of conditions and the following disclaimer.
   54.12 + * 2. Redistributions in binary form must reproduce the above copyright
   54.13 + *    notice, this list of conditions and the following disclaimer in the
   54.14 + *    documentation and/or other materials provided with the distribution.
   54.15 + * 3. The name of the author may not be used to endorse or promote products
   54.16 + *    derived from this software without specific prior written permission
   54.17 + *
   54.18 + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
   54.19 + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   54.20 + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
   54.21 + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
   54.22 + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
   54.23 + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
   54.24 + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
   54.25 + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
   54.26 + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
   54.27 + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   54.28 + */
   54.29 +
   54.30 +typedef u8	Elf_Byte;
   54.31 +
   54.32 +typedef u32	Elf32_Addr;	/* Unsigned program address */
   54.33 +typedef u32	Elf32_Off;	/* Unsigned file offset */
   54.34 +typedef s32	Elf32_Sword;	/* Signed large integer */
   54.35 +typedef u32	Elf32_Word;	/* Unsigned large integer */
   54.36 +typedef u16	Elf32_Half;	/* Unsigned medium integer */
   54.37 +
   54.38 +typedef u64	Elf64_Addr;
   54.39 +typedef u64	Elf64_Off;
   54.40 +typedef s32	Elf64_Shalf;
   54.41 +
   54.42 +typedef s32	Elf64_Sword;
   54.43 +typedef u32	Elf64_Word;
   54.44 +
   54.45 +typedef s64	Elf64_Sxword;
   54.46 +typedef u64	Elf64_Xword;
   54.47 +
   54.48 +typedef u32	Elf64_Half;
   54.49 +typedef u16	Elf64_Quarter;
   54.50 +
   54.51 +/*
   54.52 + * e_ident[] identification indexes 
   54.53 + * See http://www.caldera.com/developers/gabi/2000-07-17/ch4.eheader.html
   54.54 + */
   54.55 +#define EI_MAG0		0		/* file ID */
   54.56 +#define EI_MAG1		1		/* file ID */
   54.57 +#define EI_MAG2		2		/* file ID */
   54.58 +#define EI_MAG3		3		/* file ID */
   54.59 +#define EI_CLASS	4		/* file class */
   54.60 +#define EI_DATA		5		/* data encoding */
   54.61 +#define EI_VERSION	6		/* ELF header version */
   54.62 +#define EI_OSABI	7		/* OS/ABI ID */
   54.63 +#define EI_ABIVERSION	8		/* ABI version */ 
   54.64 +#define EI_PAD		9		/* start of pad bytes */
   54.65 +#define EI_NIDENT	16		/* Size of e_ident[] */
   54.66 +
   54.67 +/* e_ident[] magic number */
   54.68 +#define	ELFMAG0		0x7f		/* e_ident[EI_MAG0] */
   54.69 +#define	ELFMAG1		'E'		/* e_ident[EI_MAG1] */
   54.70 +#define	ELFMAG2		'L'		/* e_ident[EI_MAG2] */
   54.71 +#define	ELFMAG3		'F'		/* e_ident[EI_MAG3] */
   54.72 +#define	ELFMAG		"\177ELF"	/* magic */
   54.73 +#define	SELFMAG		4		/* size of magic */
   54.74 +
   54.75 +/* e_ident[] file class */
   54.76 +#define	ELFCLASSNONE	0		/* invalid */
   54.77 +#define	ELFCLASS32	1		/* 32-bit objs */
   54.78 +#define	ELFCLASS64	2		/* 64-bit objs */
   54.79 +#define	ELFCLASSNUM	3		/* number of classes */
   54.80 +
   54.81 +/* e_ident[] data encoding */
   54.82 +#define ELFDATANONE	0		/* invalid */
   54.83 +#define ELFDATA2LSB	1		/* Little-Endian */
   54.84 +#define ELFDATA2MSB	2		/* Big-Endian */
   54.85 +#define ELFDATANUM	3		/* number of data encode defines */
   54.86 +
   54.87 +/* e_ident[] Operating System/ABI */
   54.88 +#define ELFOSABI_SYSV		0	/* UNIX System V ABI */
   54.89 +#define ELFOSABI_HPUX		1	/* HP-UX operating system */
   54.90 +#define ELFOSABI_NETBSD		2	/* NetBSD */
   54.91 +#define ELFOSABI_LINUX		3	/* GNU/Linux */
   54.92 +#define ELFOSABI_HURD		4	/* GNU/Hurd */
   54.93 +#define ELFOSABI_86OPEN		5	/* 86Open common IA32 ABI */
   54.94 +#define ELFOSABI_SOLARIS	6	/* Solaris */
   54.95 +#define ELFOSABI_MONTEREY	7	/* Monterey */
   54.96 +#define ELFOSABI_IRIX		8	/* IRIX */
   54.97 +#define ELFOSABI_FREEBSD	9	/* FreeBSD */
   54.98 +#define ELFOSABI_TRU64		10	/* TRU64 UNIX */
   54.99 +#define ELFOSABI_MODESTO	11	/* Novell Modesto */
  54.100 +#define ELFOSABI_OPENBSD	12	/* OpenBSD */
  54.101 +#define ELFOSABI_ARM		97	/* ARM */
  54.102 +#define ELFOSABI_STANDALONE	255	/* Standalone (embedded) application */
  54.103 +
  54.104 +/* e_ident */
  54.105 +#define IS_ELF(ehdr) ((ehdr).e_ident[EI_MAG0] == ELFMAG0 && \
  54.106 +                      (ehdr).e_ident[EI_MAG1] == ELFMAG1 && \
  54.107 +                      (ehdr).e_ident[EI_MAG2] == ELFMAG2 && \
  54.108 +                      (ehdr).e_ident[EI_MAG3] == ELFMAG3)
  54.109 +
  54.110 +/* ELF Header */
  54.111 +typedef struct elfhdr {
  54.112 +	unsigned char	e_ident[EI_NIDENT]; /* ELF Identification */
  54.113 +	Elf32_Half	e_type;		/* object file type */
  54.114 +	Elf32_Half	e_machine;	/* machine */
  54.115 +	Elf32_Word	e_version;	/* object file version */
  54.116 +	Elf32_Addr	e_entry;	/* virtual entry point */
  54.117 +	Elf32_Off	e_phoff;	/* program header table offset */
  54.118 +	Elf32_Off	e_shoff;	/* section header table offset */
  54.119 +	Elf32_Word	e_flags;	/* processor-specific flags */
  54.120 +	Elf32_Half	e_ehsize;	/* ELF header size */
  54.121 +	Elf32_Half	e_phentsize;	/* program header entry size */
  54.122 +	Elf32_Half	e_phnum;	/* number of program header entries */
  54.123 +	Elf32_Half	e_shentsize;	/* section header entry size */
  54.124 +	Elf32_Half	e_shnum;	/* number of section header entries */
  54.125 +	Elf32_Half	e_shstrndx;	/* section header table's "section 
  54.126 +					   header string table" entry offset */
  54.127 +} Elf32_Ehdr;
  54.128 +
  54.129 +typedef struct {
  54.130 +	unsigned char	e_ident[EI_NIDENT];	/* Id bytes */
  54.131 +	Elf64_Quarter	e_type;			/* file type */
  54.132 +	Elf64_Quarter	e_machine;		/* machine type */
  54.133 +	Elf64_Half	e_version;		/* version number */
  54.134 +	Elf64_Addr	e_entry;		/* entry point */
  54.135 +	Elf64_Off	e_phoff;		/* Program hdr offset */
  54.136 +	Elf64_Off	e_shoff;		/* Section hdr offset */
  54.137 +	Elf64_Half	e_flags;		/* Processor flags */
  54.138 +	Elf64_Quarter	e_ehsize;		/* sizeof ehdr */
  54.139 +	Elf64_Quarter	e_phentsize;		/* Program header entry size */
  54.140 +	Elf64_Quarter	e_phnum;		/* Number of program headers */
  54.141 +	Elf64_Quarter	e_shentsize;		/* Section header entry size */
  54.142 +	Elf64_Quarter	e_shnum;		/* Number of section headers */
  54.143 +	Elf64_Quarter	e_shstrndx;		/* String table index */
  54.144 +} Elf64_Ehdr;
  54.145 +
  54.146 +/* e_type */
  54.147 +#define ET_NONE		0		/* No file type */
  54.148 +#define ET_REL		1		/* relocatable file */
  54.149 +#define ET_EXEC		2		/* executable file */
  54.150 +#define ET_DYN		3		/* shared object file */
  54.151 +#define ET_CORE		4		/* core file */
  54.152 +#define ET_NUM		5		/* number of types */
  54.153 +#define ET_LOPROC	0xff00		/* reserved range for processor */
  54.154 +#define ET_HIPROC	0xffff		/*  specific e_type */
  54.155 +
  54.156 +/* e_machine */
  54.157 +#define EM_NONE		0		/* No Machine */
  54.158 +#define EM_M32		1		/* AT&T WE 32100 */
  54.159 +#define EM_SPARC	2		/* SPARC */
  54.160 +#define EM_386		3		/* Intel 80386 */
  54.161 +#define EM_68K		4		/* Motorola 68000 */
  54.162 +#define EM_88K		5		/* Motorola 88000 */
  54.163 +#define EM_486		6		/* Intel 80486 - unused? */
  54.164 +#define EM_860		7		/* Intel 80860 */
  54.165 +#define EM_MIPS		8		/* MIPS R3000 Big-Endian only */
  54.166 +/* 
  54.167 + * Don't know if EM_MIPS_RS4_BE,
  54.168 + * EM_SPARC64, EM_PARISC,
  54.169 + * or EM_PPC are ABI compliant
  54.170 + */
  54.171 +#define EM_MIPS_RS4_BE	10		/* MIPS R4000 Big-Endian */
  54.172 +#define EM_SPARC64	11		/* SPARC v9 64-bit unoffical */
  54.173 +#define EM_PARISC	15		/* HPPA */
  54.174 +#define EM_SPARC32PLUS	18		/* Enhanced instruction set SPARC */
  54.175 +#define EM_PPC		20		/* PowerPC */
  54.176 +#define EM_ARM		40		/* Advanced RISC Machines ARM */
  54.177 +#define EM_ALPHA	41		/* DEC ALPHA */
  54.178 +#define EM_SPARCV9	43		/* SPARC version 9 */
  54.179 +#define EM_ALPHA_EXP	0x9026		/* DEC ALPHA */
  54.180 +#define EM_X86_64	62		/* AMD x86-64 architecture */
  54.181 +#define EM_VAX		75		/* DEC VAX */
  54.182 +#define EM_NUM		15		/* number of machine types */
  54.183 +
  54.184 +/* Version */
  54.185 +#define EV_NONE		0		/* Invalid */
  54.186 +#define EV_CURRENT	1		/* Current */
  54.187 +#define EV_NUM		2		/* number of versions */
  54.188 +
  54.189 +/* Section Header */
  54.190 +typedef struct {
  54.191 +	Elf32_Word	sh_name;	/* name - index into section header
  54.192 +					   string table section */
  54.193 +	Elf32_Word	sh_type;	/* type */
  54.194 +	Elf32_Word	sh_flags;	/* flags */
  54.195 +	Elf32_Addr	sh_addr;	/* address */
  54.196 +	Elf32_Off	sh_offset;	/* file offset */
  54.197 +	Elf32_Word	sh_size;	/* section size */
  54.198 +	Elf32_Word	sh_link;	/* section header table index link */
  54.199 +	Elf32_Word	sh_info;	/* extra information */
  54.200 +	Elf32_Word	sh_addralign;	/* address alignment */
  54.201 +	Elf32_Word	sh_entsize;	/* section entry size */
  54.202 +} Elf32_Shdr;
  54.203 +
  54.204 +typedef struct {
  54.205 +	Elf64_Half	sh_name;	/* section name */
  54.206 +	Elf64_Half	sh_type;	/* section type */
  54.207 +	Elf64_Xword	sh_flags;	/* section flags */
  54.208 +	Elf64_Addr	sh_addr;	/* virtual address */
  54.209 +	Elf64_Off	sh_offset;	/* file offset */
  54.210 +	Elf64_Xword	sh_size;	/* section size */
  54.211 +	Elf64_Half	sh_link;	/* link to another */
  54.212 +	Elf64_Half	sh_info;	/* misc info */
  54.213 +	Elf64_Xword	sh_addralign;	/* memory alignment */
  54.214 +	Elf64_Xword	sh_entsize;	/* table entry size */
  54.215 +} Elf64_Shdr;
  54.216 +
  54.217 +/* Special Section Indexes */
  54.218 +#define SHN_UNDEF	0		/* undefined */
  54.219 +#define SHN_LORESERVE	0xff00		/* lower bounds of reserved indexes */
  54.220 +#define SHN_LOPROC	0xff00		/* reserved range for processor */
  54.221 +#define SHN_HIPROC	0xff1f		/*   specific section indexes */
  54.222 +#define SHN_ABS		0xfff1		/* absolute value */
  54.223 +#define SHN_COMMON	0xfff2		/* common symbol */
  54.224 +#define SHN_HIRESERVE	0xffff		/* upper bounds of reserved indexes */
  54.225 +
  54.226 +/* sh_type */
  54.227 +#define SHT_NULL	0		/* inactive */
  54.228 +#define SHT_PROGBITS	1		/* program defined information */
  54.229 +#define SHT_SYMTAB	2		/* symbol table section */
  54.230 +#define SHT_STRTAB	3		/* string table section */
  54.231 +#define SHT_RELA	4		/* relocation section with addends*/
  54.232 +#define SHT_HASH	5		/* symbol hash table section */
  54.233 +#define SHT_DYNAMIC	6		/* dynamic section */
  54.234 +#define SHT_NOTE	7		/* note section */
  54.235 +#define SHT_NOBITS	8		/* no space section */
  54.236 +#define SHT_REL		9		/* relation section without addends */
  54.237 +#define SHT_SHLIB	10		/* reserved - purpose unknown */
  54.238 +#define SHT_DYNSYM	11		/* dynamic symbol table section */
  54.239 +#define SHT_NUM		12		/* number of section types */
  54.240 +#define SHT_LOPROC	0x70000000	/* reserved range for processor */
  54.241 +#define SHT_HIPROC	0x7fffffff	/*  specific section header types */
  54.242 +#define SHT_LOUSER	0x80000000	/* reserved range for application */
  54.243 +#define SHT_HIUSER	0xffffffff	/*  specific indexes */
  54.244 +
  54.245 +/* Section names */
  54.246 +#define ELF_BSS         ".bss"		/* uninitialized data */
  54.247 +#define ELF_DATA        ".data"		/* initialized data */
  54.248 +#define ELF_DEBUG       ".debug"	/* debug */
  54.249 +#define ELF_DYNAMIC     ".dynamic"	/* dynamic linking information */
  54.250 +#define ELF_DYNSTR      ".dynstr"	/* dynamic string table */
  54.251 +#define ELF_DYNSYM      ".dynsym"	/* dynamic symbol table */
  54.252 +#define ELF_FINI        ".fini"		/* termination code */
  54.253 +#define ELF_GOT         ".got"		/* global offset table */
  54.254 +#define ELF_HASH        ".hash"		/* symbol hash table */
  54.255 +#define ELF_INIT        ".init"		/* initialization code */
  54.256 +#define ELF_REL_DATA    ".rel.data"	/* relocation data */
  54.257 +#define ELF_REL_FINI    ".rel.fini"	/* relocation termination code */
  54.258 +#define ELF_REL_INIT    ".rel.init"	/* relocation initialization code */
  54.259 +#define ELF_REL_DYN     ".rel.dyn"	/* relocaltion dynamic link info */
  54.260 +#define ELF_REL_RODATA  ".rel.rodata"	/* relocation read-only data */
  54.261 +#define ELF_REL_TEXT    ".rel.text"	/* relocation code */
  54.262 +#define ELF_RODATA      ".rodata"	/* read-only data */
  54.263 +#define ELF_SHSTRTAB    ".shstrtab"	/* section header string table */
  54.264 +#define ELF_STRTAB      ".strtab"	/* string table */
  54.265 +#define ELF_SYMTAB      ".symtab"	/* symbol table */
  54.266 +#define ELF_TEXT        ".text"		/* code */
  54.267 +
  54.268 +
  54.269 +/* Section Attribute Flags - sh_flags */
  54.270 +#define SHF_WRITE	0x1		/* Writable */
  54.271 +#define SHF_ALLOC	0x2		/* occupies memory */
  54.272 +#define SHF_EXECINSTR	0x4		/* executable */
  54.273 +#define SHF_MASKPROC	0xf0000000	/* reserved bits for processor */
  54.274 +					/*  specific section attributes */
  54.275 +
  54.276 +/* Symbol Table Entry */
  54.277 +typedef struct elf32_sym {
  54.278 +	Elf32_Word	st_name;	/* name - index into string table */
  54.279 +	Elf32_Addr	st_value;	/* symbol value */
  54.280 +	Elf32_Word	st_size;	/* symbol size */
  54.281 +	unsigned char	st_info;	/* type and binding */
  54.282 +	unsigned char	st_other;	/* 0 - no defined meaning */
  54.283 +	Elf32_Half	st_shndx;	/* section header index */
  54.284 +} Elf32_Sym;
  54.285 +
  54.286 +typedef struct {
  54.287 +	Elf64_Half	st_name;	/* Symbol name index in str table */
  54.288 +	Elf_Byte	st_info;	/* type / binding attrs */
  54.289 +	Elf_Byte	st_other;	/* unused */
  54.290 +	Elf64_Quarter	st_shndx;	/* section index of symbol */
  54.291 +	Elf64_Xword	st_value;	/* value of symbol */
  54.292 +	Elf64_Xword	st_size;	/* size of symbol */
  54.293 +} Elf64_Sym;
  54.294 +
  54.295 +/* Symbol table index */
  54.296 +#define STN_UNDEF	0		/* undefined */
  54.297 +
  54.298 +/* Extract symbol info - st_info */
  54.299 +#define ELF32_ST_BIND(x)	((x) >> 4)
  54.300 +#define ELF32_ST_TYPE(x)	(((unsigned int) x) & 0xf)
  54.301 +#define ELF32_ST_INFO(b,t)	(((b) << 4) + ((t) & 0xf))
  54.302 +
  54.303 +#define ELF64_ST_BIND(x)	((x) >> 4)
  54.304 +#define ELF64_ST_TYPE(x)	(((unsigned int) x) & 0xf)
  54.305 +#define ELF64_ST_INFO(b,t)	(((b) << 4) + ((t) & 0xf))
  54.306 +
  54.307 +/* Symbol Binding - ELF32_ST_BIND - st_info */
  54.308 +#define STB_LOCAL	0		/* Local symbol */
  54.309 +#define STB_GLOBAL	1		/* Global symbol */
  54.310 +#define STB_WEAK	2		/* like global - lower precedence */
  54.311 +#define STB_NUM		3		/* number of symbol bindings */
  54.312 +#define STB_LOPROC	13		/* reserved range for processor */
  54.313 +#define STB_HIPROC	15		/*  specific symbol bindings */
  54.314 +
  54.315 +/* Symbol type - ELF32_ST_TYPE - st_info */
  54.316 +#define STT_NOTYPE	0		/* not specified */
  54.317 +#define STT_OBJECT	1		/* data object */
  54.318 +#define STT_FUNC	2		/* function */
  54.319 +#define STT_SECTION	3		/* section */
  54.320 +#define STT_FILE	4		/* file */
  54.321 +#define STT_NUM		5		/* number of symbol types */
  54.322 +#define STT_LOPROC	13		/* reserved range for processor */
  54.323 +#define STT_HIPROC	15		/*  specific symbol types */
  54.324 +
  54.325 +/* Relocation entry with implicit addend */
  54.326 +typedef struct {
  54.327 +	Elf32_Addr	r_offset;	/* offset of relocation */
  54.328 +	Elf32_Word	r_info;		/* symbol table index and type */
  54.329 +} Elf32_Rel;
  54.330 +
  54.331 +/* Relocation entry with explicit addend */
  54.332 +typedef struct {
  54.333 +	Elf32_Addr	r_offset;	/* offset of relocation */
  54.334 +	Elf32_Word	r_info;		/* symbol table index and type */
  54.335 +	Elf32_Sword	r_addend;
  54.336 +} Elf32_Rela;
  54.337 +
  54.338 +/* Extract relocation info - r_info */
  54.339 +#define ELF32_R_SYM(i)		((i) >> 8)
  54.340 +#define ELF32_R_TYPE(i)		((unsigned char) (i))
  54.341 +#define ELF32_R_INFO(s,t) 	(((s) << 8) + (unsigned char)(t))
  54.342 +
  54.343 +typedef struct {
  54.344 +	Elf64_Xword	r_offset;	/* where to do it */
  54.345 +	Elf64_Xword	r_info;		/* index & type of relocation */
  54.346 +} Elf64_Rel;
  54.347 +
  54.348 +typedef struct {
  54.349 +	Elf64_Xword	r_offset;	/* where to do it */
  54.350 +	Elf64_Xword	r_info;		/* index & type of relocation */
  54.351 +	Elf64_Sxword	r_addend;	/* adjustment value */
  54.352 +} Elf64_Rela;
  54.353 +
  54.354 +#define	ELF64_R_SYM(info)	((info) >> 32)
  54.355 +#define	ELF64_R_TYPE(info)	((info) & 0xFFFFFFFF)
  54.356 +#define ELF64_R_INFO(s,t) 	(((s) << 32) + (u32)(t))
  54.357 +
  54.358 +/* Program Header */
  54.359 +typedef struct {
  54.360 +	Elf32_Word	p_type;		/* segment type */
  54.361 +	Elf32_Off	p_offset;	/* segment offset */
  54.362 +	Elf32_Addr	p_vaddr;	/* virtual address of segment */
  54.363 +	Elf32_Addr	p_paddr;	/* physical address - ignored? */
  54.364 +	Elf32_Word	p_filesz;	/* number of bytes in file for seg. */
  54.365 +	Elf32_Word	p_memsz;	/* number of bytes in mem. for seg. */
  54.366 +	Elf32_Word	p_flags;	/* flags */
  54.367 +	Elf32_Word	p_align;	/* memory alignment */
  54.368 +} Elf32_Phdr;
  54.369 +
  54.370 +typedef struct {
  54.371 +	Elf64_Half	p_type;		/* entry type */
  54.372 +	Elf64_Half	p_flags;	/* flags */
  54.373 +	Elf64_Off	p_offset;	/* offset */
  54.374 +	Elf64_Addr	p_vaddr;	/* virtual address */
  54.375 +	Elf64_Addr	p_paddr;	/* physical address */
  54.376 +	Elf64_Xword	p_filesz;	/* file size */
  54.377 +	Elf64_Xword	p_memsz;	/* memory size */
  54.378 +	Elf64_Xword	p_align;	/* memory & file alignment */
  54.379 +} Elf64_Phdr;
  54.380 +
  54.381 +/* Segment types - p_type */
  54.382 +#define PT_NULL		0		/* unused */
  54.383 +#define PT_LOAD		1		/* loadable segment */
  54.384 +#define PT_DYNAMIC	2		/* dynamic linking section */
  54.385 +#define PT_INTERP	3		/* the RTLD */
  54.386 +#define PT_NOTE		4		/* auxiliary information */
  54.387 +#define PT_SHLIB	5		/* reserved - purpose undefined */
  54.388 +#define PT_PHDR		6		/* program header */
  54.389 +#define PT_NUM		7		/* Number of segment types */
  54.390 +#define PT_LOPROC	0x70000000	/* reserved range for processor */
  54.391 +#define PT_HIPROC	0x7fffffff	/*  specific segment types */
  54.392 +
  54.393 +/* Segment flags - p_flags */
  54.394 +#define PF_X		0x1		/* Executable */
  54.395 +#define PF_W		0x2		/* Writable */
  54.396 +#define PF_R		0x4		/* Readable */
  54.397 +#define PF_MASKPROC	0xf0000000	/* reserved bits for processor */
  54.398 +					/*  specific segment flags */
  54.399 +
  54.400 +/* Dynamic structure */
  54.401 +typedef struct {
  54.402 +	Elf32_Sword	d_tag;		/* controls meaning of d_val */
  54.403 +	union {
  54.404 +		Elf32_Word	d_val;	/* Multiple meanings - see d_tag */
  54.405 +		Elf32_Addr	d_ptr;	/* program virtual address */
  54.406 +	} d_un;
  54.407 +} Elf32_Dyn;
  54.408 +
  54.409 +typedef struct {
  54.410 +	Elf64_Xword	d_tag;		/* controls meaning of d_val */
  54.411 +	union {
  54.412 +		Elf64_Addr	d_ptr;
  54.413 +		Elf64_Xword	d_val;
  54.414 +	} d_un;
  54.415 +} Elf64_Dyn;
  54.416 +
  54.417 +/* Dynamic Array Tags - d_tag */
  54.418 +#define DT_NULL		0		/* marks end of _DYNAMIC array */
  54.419 +#define DT_NEEDED	1		/* string table offset of needed lib */
  54.420 +#define DT_PLTRELSZ	2		/* size of relocation entries in PLT */
  54.421 +#define DT_PLTGOT	3		/* address PLT/GOT */
  54.422 +#define DT_HASH		4		/* address of symbol hash table */
  54.423 +#define DT_STRTAB	5		/* address of string table */
  54.424 +#define DT_SYMTAB	6		/* address of symbol table */
  54.425 +#define DT_RELA		7		/* address of relocation table */
  54.426 +#define DT_RELASZ	8		/* size of relocation table */
  54.427 +#define DT_RELAENT	9		/* size of relocation entry */
  54.428 +#define DT_STRSZ	10		/* size of string table */
  54.429 +#define DT_SYMENT	11		/* size of symbol table entry */
  54.430 +#define DT_INIT		12		/* address of initialization func. */
  54.431 +#define DT_FINI		13		/* address of termination function */
  54.432 +#define DT_SONAME	14		/* string table offset of shared obj */
  54.433 +#define DT_RPATH	15		/* string table offset of library
  54.434 +					   search path */
  54.435 +#define DT_SYMBOLIC	16		/* start sym search in shared obj. */
  54.436 +#define DT_REL		17		/* address of rel. tbl. w addends */
  54.437 +#define DT_RELSZ	18		/* size of DT_REL relocation table */
  54.438 +#define DT_RELENT	19		/* size of DT_REL relocation entry */
  54.439 +#define DT_PLTREL	20		/* PLT referenced relocation entry */
  54.440 +#define DT_DEBUG	21		/* bugger */
  54.441 +#define DT_TEXTREL	22		/* Allow rel. mod. to unwritable seg */
  54.442 +#define DT_JMPREL	23		/* add. of PLT's relocation entries */
  54.443 +#define DT_BIND_NOW	24		/* Bind now regardless of env setting */
  54.444 +#define DT_NUM		25		/* Number used. */
  54.445 +#define DT_LOPROC	0x70000000	/* reserved range for processor */
  54.446 +#define DT_HIPROC	0x7fffffff	/*  specific dynamic array tags */
  54.447 +	
  54.448 +/* Standard ELF hashing function */
  54.449 +unsigned int elf_hash(const unsigned char *name);
  54.450 +
  54.451 +/*
  54.452 + * Note Definitions
  54.453 + */
  54.454 +typedef struct {
  54.455 +	Elf32_Word namesz;
  54.456 +	Elf32_Word descsz;
  54.457 +	Elf32_Word type;
  54.458 +} Elf32_Note;
  54.459 +
  54.460 +typedef struct {
  54.461 +	Elf64_Half namesz;
  54.462 +	Elf64_Half descsz;
  54.463 +	Elf64_Half type;
  54.464 +} Elf64_Note;
  54.465 +
  54.466 +
  54.467 +#if defined(ELFSIZE)
  54.468 +#define CONCAT(x,y)	__CONCAT(x,y)
  54.469 +#define ELFNAME(x)	CONCAT(elf,CONCAT(ELFSIZE,CONCAT(_,x)))
  54.470 +#define ELFNAME2(x,y)	CONCAT(x,CONCAT(_elf,CONCAT(ELFSIZE,CONCAT(_,y))))
  54.471 +#define ELFNAMEEND(x)	CONCAT(x,CONCAT(_elf,ELFSIZE))
  54.472 +#define ELFDEFNNAME(x)	CONCAT(ELF,CONCAT(ELFSIZE,CONCAT(_,x)))
  54.473 +#endif
  54.474 +
  54.475 +#if defined(ELFSIZE) && (ELFSIZE == 32)
  54.476 +#define Elf_Ehdr	Elf32_Ehdr
  54.477 +#define Elf_Phdr	Elf32_Phdr
  54.478 +#define Elf_Shdr	Elf32_Shdr
  54.479 +#define Elf_Sym		Elf32_Sym
  54.480 +#define Elf_Rel		Elf32_Rel
  54.481 +#define Elf_RelA	Elf32_Rela
  54.482 +#define Elf_Dyn		Elf32_Dyn
  54.483 +#define Elf_Word	Elf32_Word
  54.484 +#define Elf_Sword	Elf32_Sword
  54.485 +#define Elf_Addr	Elf32_Addr
  54.486 +#define Elf_Off		Elf32_Off
  54.487 +#define Elf_Nhdr	Elf32_Nhdr
  54.488 +#define Elf_Note	Elf32_Note
  54.489 +
  54.490 +#define ELF_R_SYM	ELF32_R_SYM
  54.491 +#define ELF_R_TYPE	ELF32_R_TYPE
  54.492 +#define ELF_R_INFO	ELF32_R_INFO
  54.493 +#define ELFCLASS	ELFCLASS32
  54.494 +
  54.495 +#define ELF_ST_BIND	ELF32_ST_BIND
  54.496 +#define ELF_ST_TYPE	ELF32_ST_TYPE
  54.497 +#define ELF_ST_INFO	ELF32_ST_INFO
  54.498 +
  54.499 +#define AuxInfo		Aux32Info
  54.500 +#elif defined(ELFSIZE) && (ELFSIZE == 64)
  54.501 +#define Elf_Ehdr	Elf64_Ehdr
  54.502 +#define Elf_Phdr	Elf64_Phdr
  54.503 +#define Elf_Shdr	Elf64_Shdr
  54.504 +#define Elf_Sym		Elf64_Sym
  54.505 +#define Elf_Rel		Elf64_Rel
  54.506 +#define Elf_RelA	Elf64_Rela
  54.507 +#define Elf_Dyn		Elf64_Dyn
  54.508 +#define Elf_Word	Elf64_Word
  54.509 +#define Elf_Sword	Elf64_Sword
  54.510 +#define Elf_Addr	Elf64_Addr
  54.511 +#define Elf_Off		Elf64_Off
  54.512 +#define Elf_Nhdr	Elf64_Nhdr
  54.513 +#define Elf_Note	Elf64_Note
  54.514 +
  54.515 +#define ELF_R_SYM	ELF64_R_SYM
  54.516 +#define ELF_R_TYPE	ELF64_R_TYPE
  54.517 +#define ELF_R_INFO	ELF64_R_INFO
  54.518 +#define ELFCLASS	ELFCLASS64
  54.519 +
  54.520 +#define ELF_ST_BIND	ELF64_ST_BIND
  54.521 +#define ELF_ST_TYPE	ELF64_ST_TYPE
  54.522 +#define ELF_ST_INFO	ELF64_ST_INFO
  54.523 +
  54.524 +#define AuxInfo		Aux64Info
  54.525 +#endif
  54.526 +
    55.1 --- a/xen/include/xen/event.h	Fri Apr 02 13:58:27 2004 +0000
    55.2 +++ b/xen/include/xen/event.h	Fri Apr 02 14:03:03 2004 +0000
    55.3 @@ -18,7 +18,7 @@
    55.4   */
    55.5  
    55.6  /* Schedule an asynchronous callback for the specified domain. */
    55.7 -static inline void __guest_notify(struct task_struct *p)
    55.8 +static inline void guest_schedule_to_run(struct task_struct *p)
    55.9  {
   55.10  #ifdef CONFIG_SMP
   55.11      unsigned long flags, cpu_mask;
   55.12 @@ -41,23 +41,11 @@ static inline void __guest_notify(struct
   55.13  #endif
   55.14  }
   55.15  
   55.16 -static inline void guest_notify(struct task_struct *p)
   55.17 -{
   55.18 -    /*
   55.19 -     * Upcall already pending or upcalls masked?
   55.20 -     * NB. Suitably synchronised on x86:
   55.21 -     *  We must set the pending bit before checking the mask, but this is
   55.22 -     *  guaranteed to occur because test_and_set_bit() is an ordering barrier.
   55.23 -     */
   55.24 -    if ( !test_and_set_bit(0, &p->shared_info->evtchn_upcall_pending) &&
   55.25 -         !test_bit(0, &p->shared_info->evtchn_upcall_mask) )
   55.26 -        __guest_notify(p);
   55.27 -}
   55.28 -
   55.29 -
   55.30  /*
   55.31   * EVENT-CHANNEL NOTIFICATIONS
   55.32 - * NB. As in guest_notify, evtchn_set_* is suitably synchronised on x86.
   55.33 + * NB. On x86, the atomic bit operations also act as memory barriers. There
   55.34 + * is therefore sufficiently strict ordering for this architecture -- others
   55.35 + * may require explicit memory barriers.
   55.36   */
   55.37  
   55.38  static inline void evtchn_set_pending(struct task_struct *p, int port)
   55.39 @@ -66,16 +54,17 @@ static inline void evtchn_set_pending(st
   55.40      if ( !test_and_set_bit(port,    &s->evtchn_pending[0]) &&
   55.41           !test_bit        (port,    &s->evtchn_mask[0])    &&
   55.42           !test_and_set_bit(port>>5, &s->evtchn_pending_sel) )
   55.43 -        guest_notify(p);
   55.44 +    {
   55.45 +        /* The VCPU pending flag must be set /after/ update to evtchn-pend. */
   55.46 +        p->shared_info->vcpu_data[0].evtchn_upcall_pending = 1;
   55.47 +        guest_schedule_to_run(p);
   55.48 +    }
   55.49  }
   55.50  
   55.51  static inline void evtchn_set_exception(struct task_struct *p, int port)
   55.52  {
   55.53 -    shared_info_t *s = p->shared_info;
   55.54 -    if ( !test_and_set_bit(port,    &s->evtchn_exception[0]) &&
   55.55 -         !test_bit        (port,    &s->evtchn_mask[0])      &&
   55.56 -         !test_and_set_bit(port>>5, &s->evtchn_exception_sel) )
   55.57 -        guest_notify(p);
   55.58 +    if ( !test_and_set_bit(port, &p->shared_info->evtchn_exception[0]) )
   55.59 +        evtchn_set_pending(p, port);
   55.60  }
   55.61  
   55.62  /*
   55.63 @@ -106,7 +95,7 @@ static inline void send_guest_pirq(struc
   55.64  static inline void send_hyp_event(struct task_struct *p, int event)
   55.65  {
   55.66      if ( !test_and_set_bit(event, &p->hyp_events) )
   55.67 -        __guest_notify(p);
   55.68 +        guest_schedule_to_run(p);
   55.69  }
   55.70  
   55.71  /* Called on return from (architecture-dependent) entry.S. */
    56.1 --- a/xen/include/xen/interrupt.h	Fri Apr 02 13:58:27 2004 +0000
    56.2 +++ b/xen/include/xen/interrupt.h	Fri Apr 02 14:03:03 2004 +0000
    56.3 @@ -16,7 +16,7 @@ typedef void irqreturn_t;
    56.4  #define IRQ_NONE
    56.5  #define IRQ_HANDLED
    56.6  #define IRQ_RETVAL(x)
    56.7 -                                                                                
    56.8 +
    56.9  struct irqaction {
   56.10  	void (*handler)(int, void *, struct pt_regs *);
   56.11  	unsigned long flags;
   56.12 @@ -26,7 +26,6 @@ struct irqaction {
   56.13  	struct irqaction *next;
   56.14  };
   56.15  
   56.16 -
   56.17  enum {
   56.18  	TIMER_BH = 0,
   56.19  	SCSI_BH
    57.1 --- a/xen/include/xen/irq.h	Fri Apr 02 13:58:27 2004 +0000
    57.2 +++ b/xen/include/xen/irq.h	Fri Apr 02 14:03:03 2004 +0000
    57.3 @@ -1,5 +1,5 @@
    57.4 -#ifndef __irq_h
    57.5 -#define __irq_h
    57.6 +#ifndef __XEN_IRQ_H__
    57.7 +#define __XEN_IRQ_H__
    57.8  
    57.9  #include <xen/config.h>
   57.10  #include <xen/spinlock.h>
   57.11 @@ -14,26 +14,24 @@
   57.12  #define IRQ_REPLAY	8	/* IRQ has been replayed but not acked yet */
   57.13  #define IRQ_AUTODETECT	16	/* IRQ is being autodetected */
   57.14  #define IRQ_WAITING	32	/* IRQ not yet seen - for autodetection */
   57.15 -#define IRQ_LEVEL	64	/* IRQ level triggered */
   57.16 -#define IRQ_MASKED	128	/* IRQ masked - shouldn't be seen again */
   57.17 -#define IRQ_PER_CPU	256	/* IRQ is per CPU */
   57.18 +#define IRQ_GUEST       64      /* IRQ is handled by guest OS(es) */
   57.19  
   57.20  /*
   57.21   * Interrupt controller descriptor. This is all we need
   57.22   * to describe about the low-level hardware. 
   57.23   */
   57.24  struct hw_interrupt_type {
   57.25 -	const char * typename;
   57.26 -	unsigned int (*startup)(unsigned int irq);
   57.27 -	void (*shutdown)(unsigned int irq);
   57.28 -	void (*enable)(unsigned int irq);
   57.29 -	void (*disable)(unsigned int irq);
   57.30 -	void (*ack)(unsigned int irq);
   57.31 -	void (*end)(unsigned int irq);
   57.32 -	void (*set_affinity)(unsigned int irq, unsigned long mask);
   57.33 +    const char *typename;
   57.34 +    unsigned int (*startup)(unsigned int irq);
   57.35 +    void (*shutdown)(unsigned int irq);
   57.36 +    void (*enable)(unsigned int irq);
   57.37 +    void (*disable)(unsigned int irq);
   57.38 +    void (*ack)(unsigned int irq);
   57.39 +    void (*end)(unsigned int irq);
   57.40 +    void (*set_affinity)(unsigned int irq, unsigned long mask);
   57.41  };
   57.42  
   57.43 -typedef struct hw_interrupt_type  hw_irq_controller;
   57.44 +typedef struct hw_interrupt_type hw_irq_controller;
   57.45  
   57.46  #include <asm/irq.h>
   57.47  
   57.48 @@ -45,19 +43,23 @@ typedef struct hw_interrupt_type  hw_irq
   57.49   * Pad this out to 32 bytes for cache and indexing reasons.
   57.50   */
   57.51  typedef struct {
   57.52 -	unsigned int status;		/* IRQ status */
   57.53 -	hw_irq_controller *handler;
   57.54 -	struct irqaction *action;	/* IRQ action list */
   57.55 -	unsigned int depth;		/* nested irq disables */
   57.56 -	spinlock_t lock;
   57.57 +    unsigned int status;		/* IRQ status */
   57.58 +    hw_irq_controller *handler;
   57.59 +    struct irqaction *action;	/* IRQ action list */
   57.60 +    unsigned int depth;		/* nested irq disables */
   57.61 +    spinlock_t lock;
   57.62  } ____cacheline_aligned irq_desc_t;
   57.63  
   57.64 -extern irq_desc_t irq_desc [NR_IRQS];
   57.65 +extern irq_desc_t irq_desc[NR_IRQS];
   57.66  
   57.67 -extern int handle_IRQ_event(unsigned int, struct pt_regs *, struct irqaction *);
   57.68 -extern int setup_irq(unsigned int , struct irqaction * );
   57.69 +extern int setup_irq(unsigned int, struct irqaction *);
   57.70  
   57.71 -extern hw_irq_controller no_irq_type;  /* needed in every arch ? */
   57.72 +extern hw_irq_controller no_irq_type;
   57.73  extern void no_action(int cpl, void *dev_id, struct pt_regs *regs);
   57.74  
   57.75 -#endif /* __asm_h */
   57.76 +struct task_struct;
   57.77 +extern int pirq_guest_unmask(struct task_struct *p);
   57.78 +extern int pirq_guest_bind(struct task_struct *p, int irq, int will_share);
   57.79 +extern int pirq_guest_unbind(struct task_struct *p, int irq);
   57.80 +
   57.81 +#endif /* __XEN_IRQ_H__ */
    58.1 --- a/xen/include/xen/mm.h	Fri Apr 02 13:58:27 2004 +0000
    58.2 +++ b/xen/include/xen/mm.h	Fri Apr 02 14:03:03 2004 +0000
    58.3 @@ -126,6 +126,7 @@ extern spinlock_t free_list_lock;
    58.4  extern unsigned int free_pfns;
    58.5  extern unsigned long max_page;
    58.6  void init_frametable(unsigned long nr_pages);
    58.7 +void add_to_domain_alloc_list(unsigned long ps, unsigned long pe);
    58.8  
    58.9  struct pfn_info *alloc_domain_page(struct task_struct *p);
   58.10  void free_domain_page(struct pfn_info *page);
    59.1 --- a/xen/include/xen/sched-if.h	Fri Apr 02 13:58:27 2004 +0000
    59.2 +++ b/xen/include/xen/sched-if.h	Fri Apr 02 14:03:03 2004 +0000
    59.3 @@ -40,8 +40,7 @@ struct scheduler
    59.4      void         (*free_task)      (struct task_struct *);
    59.5      void         (*rem_task)       (struct task_struct *);
    59.6      void         (*wake_up)        (struct task_struct *);
    59.7 -    /* XXX why does do_block need to return anything at all? */
    59.8 -    long         (*do_block)       (struct task_struct *);
    59.9 +    void         (*do_block)       (struct task_struct *);
   59.10      task_slice_t (*do_schedule)    (s_time_t);
   59.11      int          (*control)        (struct sched_ctl_cmd *);
   59.12      int          (*adjdom)         (struct task_struct *,
   59.13 @@ -50,6 +49,7 @@ struct scheduler
   59.14      void         (*dump_settings)  (void);
   59.15      void         (*dump_cpu_state) (int);
   59.16      void         (*dump_runq_el)   (struct task_struct *);
   59.17 +    int          (*prn_state)      (int);
   59.18      void         (*pause)          (struct task_struct *);
   59.19  };
   59.20  
    60.1 --- a/xen/include/xen/sched.h	Fri Apr 02 13:58:27 2004 +0000
    60.2 +++ b/xen/include/xen/sched.h	Fri Apr 02 14:03:03 2004 +0000
    60.3 @@ -4,6 +4,9 @@
    60.4  #include <xen/config.h>
    60.5  #include <xen/types.h>
    60.6  #include <xen/spinlock.h>
    60.7 +#include <xen/config.h>
    60.8 +#include <xen/types.h>
    60.9 +#include <xen/spinlock.h>
   60.10  #include <asm/ptrace.h>
   60.11  #include <xen/smp.h>
   60.12  #include <asm/page.h>
   60.13 @@ -161,8 +164,10 @@ struct task_struct
   60.14       * domain's event-channel spinlock. Read accesses can also synchronise on 
   60.15       * the lock, but races don't usually matter.
   60.16       */
   60.17 -    u16 pirq_to_evtchn[64];
   60.18 +#define NR_PIRQS 128 /* Put this somewhere sane! */
   60.19 +    u16 pirq_to_evtchn[NR_PIRQS];
   60.20      u16 virq_to_evtchn[NR_VIRQS];
   60.21 +    u32 pirq_mask[NR_PIRQS/32];
   60.22  
   60.23      /* Physical I/O */
   60.24      spinlock_t       pcidev_lock;
   60.25 @@ -224,10 +229,13 @@ struct task_struct *alloc_task_struct();
   60.26  
   60.27  extern struct task_struct *do_createdomain(
   60.28      domid_t dom_id, unsigned int cpu);
   60.29 -extern int setup_guestos(
   60.30 -    struct task_struct *p, dom0_createdomain_t *params, unsigned int num_vifs,
   60.31 -    char *data_start, unsigned long data_len, 
   60.32 -    char *cmdline, unsigned long initrd_len);
   60.33 +extern int construct_dom0(struct task_struct *p, 
   60.34 +                          unsigned long alloc_start,
   60.35 +                          unsigned long alloc_end,
   60.36 +                          unsigned int num_vifs,
   60.37 +                          char *image_start, unsigned long image_len, 
   60.38 +                          char *initrd_start, unsigned long initrd_len,
   60.39 +                          char *cmdline);
   60.40  extern int final_setup_guestos(struct task_struct *p, dom0_builddomain_t *);
   60.41  
   60.42  struct task_struct *find_domain_by_id(domid_t dom);
   60.43 @@ -266,6 +274,7 @@ void sched_add_domain(struct task_struct
   60.44  int  sched_rem_domain(struct task_struct *p);
   60.45  long sched_ctl(struct sched_ctl_cmd *);
   60.46  long sched_adjdom(struct sched_adjdom_cmd *);
   60.47 +int  sched_id();
   60.48  void sched_pause_sync(struct task_struct *);
   60.49  void init_idle_task(void);
   60.50  void __wake_up(struct task_struct *p);
   60.51 @@ -290,10 +299,10 @@ static inline long schedule_timeout(long
   60.52      return 0;
   60.53  }
   60.54  
   60.55 -#define signal_pending(_p) \
   60.56 -    (((_p)->hyp_events != 0) ||                                 \
   60.57 -     (test_bit(0, &(_p)->shared_info->evtchn_upcall_pending) && \
   60.58 -      !test_bit(0, &(_p)->shared_info->evtchn_upcall_mask)))
   60.59 +#define signal_pending(_p)                                      \
   60.60 +    ( (_p)->hyp_events ||                                       \
   60.61 +      ((_p)->shared_info->vcpu_data[0].evtchn_upcall_pending && \
   60.62 +       !(_p)->shared_info->vcpu_data[0].evtchn_upcall_mask) )
   60.63  
   60.64  void domain_init(void);
   60.65  
   60.66 @@ -303,6 +312,7 @@ void startup_cpu_idle_loop(void);
   60.67  void continue_cpu_idle_loop(void);
   60.68  
   60.69  void continue_nonidle_task(void);
   60.70 +void sched_prn_state(int state);
   60.71  
   60.72  /* This task_hash and task_list are protected by the tasklist_lock. */
   60.73  #define TASK_HASH_SIZE 256
    61.1 --- a/xenolinux-2.4.25-sparse/arch/xen/Makefile	Fri Apr 02 13:58:27 2004 +0000
    61.2 +++ b/xenolinux-2.4.25-sparse/arch/xen/Makefile	Fri Apr 02 14:03:03 2004 +0000
    61.3 @@ -19,7 +19,7 @@
    61.4  override EXTRAVERSION := -xen$(EXTRAVERSION)
    61.5  
    61.6  LD=$(CROSS_COMPILE)ld -m elf_i386
    61.7 -OBJCOPY=$(CROSS_COMPILE)objcopy -O binary -R .note -R .comment -S
    61.8 +OBJCOPY=$(CROSS_COMPILE)objcopy -R .note -R .comment -S
    61.9  LDFLAGS=-e stext
   61.10  LINKFLAGS =-T $(TOPDIR)/arch/xen/vmlinux.lds $(LDFLAGS)
   61.11  
   61.12 @@ -49,9 +49,14 @@ endif
   61.13  HEAD := arch/xen/kernel/head.o arch/xen/kernel/init_task.o
   61.14  
   61.15  SUBDIRS += arch/xen/kernel arch/xen/mm arch/xen/lib
   61.16 -SUBDIRS += arch/xen/drivers/console arch/xen/drivers/network
   61.17 -SUBDIRS += arch/xen/drivers/evtchn arch/xen/drivers/block
   61.18 -SUBDIRS += arch/xen/drivers/balloon arch/xen/drivers/vnetif
   61.19 +SUBDIRS += arch/xen/drivers/console 
   61.20 +ifndef CONFIG_XEN_PHYSDEV_ACCESS
   61.21 +SUBDIRS += arch/xen/drivers/network
   61.22 +endif
   61.23 +SUBDIRS += arch/xen/drivers/evtchn
   61.24 +SUBDIRS += arch/xen/drivers/block
   61.25 +SUBDIRS += arch/xen/drivers/balloon
   61.26 +SUBDIRS += arch/xen/drivers/vnetif
   61.27  ifdef CONFIG_XEN_PRIVILEGED_GUEST
   61.28  SUBDIRS += arch/xen/drivers/dom0 
   61.29  endif
   61.30 @@ -60,7 +65,9 @@ CORE_FILES += arch/xen/kernel/kernel.o a
   61.31  CORE_FILES += arch/xen/drivers/evtchn/drv.o
   61.32  CORE_FILES += arch/xen/drivers/console/drv.o
   61.33  CORE_FILES += arch/xen/drivers/block/drv.o
   61.34 +ifndef CONFIG_XEN_PHYSDEV_ACCESS
   61.35  CORE_FILES += arch/xen/drivers/network/drv.o
   61.36 +endif
   61.37  CORE_FILES += arch/xen/drivers/vnetif/drv.o
   61.38  ifdef CONFIG_XEN_PRIVILEGED_GUEST
   61.39  CORE_FILES += arch/xen/drivers/dom0/drv.o
    62.1 --- a/xenolinux-2.4.25-sparse/arch/xen/boot/Makefile	Fri Apr 02 13:58:27 2004 +0000
    62.2 +++ b/xenolinux-2.4.25-sparse/arch/xen/boot/Makefile	Fri Apr 02 14:03:03 2004 +0000
    62.3 @@ -6,17 +6,9 @@ xenolinux.gz: xenolinux
    62.4  	gzip -f -9 < $< > $@
    62.5  
    62.6  xenolinux: $(TOPDIR)/vmlinux
    62.7 -	# Guest OS header -- first 8 bytes are identifier 'XenGuest'.
    62.8 -	echo -e -n 'XenGuest' >$@ 
    62.9 -	# Guest OS header -- next 4 bytes are load address (0xC0000000).
   62.10 -	echo -e -n '\000\000\000\300' >>$@
   62.11 -	$(OBJCOPY) $< xenolinux.body
   62.12 -	# Guest OS header is immediately followed by raw OS image.
   62.13 -	# Start address must be at byte 0.
   62.14 -	cat xenolinux.body >>$@
   62.15 -	rm -f xenolinux.body
   62.16 +	$(OBJCOPY) $< $@
   62.17  
   62.18  dep:
   62.19  
   62.20  clean:
   62.21 -	rm -f xenolinux xenolinux.gz
   62.22 \ No newline at end of file
   62.23 +	rm -f xenolinux*
    63.1 --- a/xenolinux-2.4.25-sparse/arch/xen/config.in	Fri Apr 02 13:58:27 2004 +0000
    63.2 +++ b/xenolinux-2.4.25-sparse/arch/xen/config.in	Fri Apr 02 14:03:03 2004 +0000
    63.3 @@ -13,8 +13,9 @@ define_bool CONFIG_SBUS n
    63.4  define_bool CONFIG_UID16 y
    63.5  
    63.6  mainmenu_option next_comment
    63.7 -comment 'Xenolinux'
    63.8 +comment 'Xen'
    63.9  bool 'Support for privileged operations (domain 0)' CONFIG_XEN_PRIVILEGED_GUEST
   63.10 +bool 'Support for direct physical device access' CONFIG_XEN_PHYSDEV_ACCESS
   63.11  endmenu
   63.12  # The IBM S/390 patch needs this.
   63.13  define_bool CONFIG_NO_IDLE_HZ y
   63.14 @@ -77,14 +78,14 @@ if [ "$CONFIG_MVIAC3_2" = "y" ]; then
   63.15     define_int  CONFIG_X86_L1_CACHE_SHIFT 5
   63.16  fi
   63.17  
   63.18 -if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
   63.19 -   tristate 'BIOS Enhanced Disk Drive calls determine boot disk (EXPERIMENTAL)' CONFIG_EDD
   63.20 -fi
   63.21 +#if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
   63.22 +#   tristate 'BIOS Enhanced Disk Drive calls determine boot disk (EXPERIMENTAL)' CONFIG_EDD
   63.23 +#fi
   63.24  
   63.25  choice 'High Memory Support' \
   63.26  	"off    CONFIG_NOHIGHMEM \
   63.27 -	 4GB    CONFIG_HIGHMEM4G \
   63.28 -	 64GB   CONFIG_HIGHMEM64G" off
   63.29 +	 4GB    CONFIG_HIGHMEM4G" off
   63.30 +#	 64GB   CONFIG_HIGHMEM64G" off
   63.31  if [ "$CONFIG_HIGHMEM4G" = "y" ]; then
   63.32     define_bool CONFIG_HIGHMEM y
   63.33  fi
   63.34 @@ -93,6 +94,10 @@ if [ "$CONFIG_HIGHMEM64G" = "y" ]; then
   63.35     define_bool CONFIG_X86_PAE y
   63.36  fi
   63.37  
   63.38 +if [ "$CONFIG_HIGHMEM" = "y" ]; then
   63.39 +   bool 'HIGHMEM I/O support' CONFIG_HIGHIO
   63.40 +fi
   63.41 +
   63.42  #bool 'Symmetric multi-processing support' CONFIG_SMP
   63.43  #if [ "$CONFIG_SMP" = "y" -a "$CONFIG_X86_CMPXCHG" = "y" ]; then
   63.44  #   define_bool CONFIG_HAVE_DEC_LOCK y
   63.45 @@ -104,15 +109,20 @@ comment 'General setup'
   63.46  
   63.47  bool 'Networking support' CONFIG_NET
   63.48  
   63.49 -bool 'PCI support' CONFIG_PCI
   63.50 -if [ "$CONFIG_PCI" = "y" ]; then
   63.51 -   tristate '    3c590/3c900 series (592/595/597) "Vortex/Boomerang" support' CONFIG_VORTEX
   63.52 -   tristate 'Intel(R) PRO/1000 Gigabit Ethernet support' CONFIG_E1000
   63.53 -   if [ "$CONFIG_E1000" != "n" ]; then
   63.54 -      bool '  Use Rx Polling (NAPI)' CONFIG_E1000_NAPI
   63.55 +if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" ]; then
   63.56 +   bool 'PCI support' CONFIG_PCI
   63.57 +   source drivers/pci/Config.in
   63.58 +
   63.59 +   bool 'Support for hot-pluggable devices' CONFIG_HOTPLUG
   63.60 +
   63.61 +   if [ "$CONFIG_HOTPLUG" = "y" ] ; then
   63.62 +      source drivers/pcmcia/Config.in
   63.63 +      source drivers/hotplug/Config.in
   63.64 +   else
   63.65 +      define_bool CONFIG_PCMCIA n
   63.66 +      define_bool CONFIG_HOTPLUG_PCI n
   63.67     fi
   63.68  fi
   63.69 -source drivers/pci/Config.in
   63.70  
   63.71  bool 'System V IPC' CONFIG_SYSVIPC
   63.72  bool 'BSD Process Accounting' CONFIG_BSD_PROCESS_ACCT
   63.73 @@ -129,31 +139,119 @@ bool 'Select task to kill on out of memo
   63.74  
   63.75  endmenu
   63.76  
   63.77 +if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" ]; then
   63.78 +   source drivers/mtd/Config.in
   63.79 +
   63.80 +   source drivers/parport/Config.in
   63.81 +
   63.82 +   source drivers/pnp/Config.in
   63.83 +
   63.84 +   source drivers/block/Config.in
   63.85 +
   63.86 +   source drivers/md/Config.in
   63.87 +fi
   63.88 +
   63.89  if [ "$CONFIG_NET" = "y" ]; then
   63.90     source net/Config.in
   63.91  fi
   63.92  
   63.93 +if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "n" ]; then
   63.94 +   #
   63.95 +   # Block device driver configuration
   63.96 +   #
   63.97 +   mainmenu_option next_comment
   63.98 +   comment 'Block devices'
   63.99 +   tristate 'Loopback device support' CONFIG_BLK_DEV_LOOP
  63.100 +   dep_tristate 'Network block device support' CONFIG_BLK_DEV_NBD $CONFIG_NET
  63.101 +   tristate 'RAM disk support' CONFIG_BLK_DEV_RAM
  63.102 +   if [ "$CONFIG_BLK_DEV_RAM" = "y" -o "$CONFIG_BLK_DEV_RAM" = "m" ]; then
  63.103 +      int '  Default RAM disk size' CONFIG_BLK_DEV_RAM_SIZE 4096
  63.104 +   fi
  63.105 +   dep_bool '  Initial RAM disk (initrd) support' CONFIG_BLK_DEV_INITRD $CONFIG_BLK_DEV_RAM
  63.106 +   bool 'Per partition statistics in /proc/partitions' CONFIG_BLK_STATS
  63.107 +   bool 'XenoLinux virtual block device support' CONFIG_XEN_VBD
  63.108 +   define_bool CONFIG_BLK_DEV_HD n
  63.109 +   endmenu
  63.110 +else
  63.111 +   mainmenu_option next_comment
  63.112 +   comment 'ATA/IDE/MFM/RLL support'
  63.113  
  63.114 -#
  63.115 -# Block device driver configuration
  63.116 -#
  63.117 -mainmenu_option next_comment
  63.118 -comment 'Block devices'
  63.119 -tristate 'Loopback device support' CONFIG_BLK_DEV_LOOP
  63.120 -dep_tristate 'Network block device support' CONFIG_BLK_DEV_NBD $CONFIG_NET
  63.121 -tristate 'RAM disk support' CONFIG_BLK_DEV_RAM
  63.122 -if [ "$CONFIG_BLK_DEV_RAM" = "y" -o "$CONFIG_BLK_DEV_RAM" = "m" ]; then
  63.123 -   int '  Default RAM disk size' CONFIG_BLK_DEV_RAM_SIZE 4096
  63.124 +   tristate 'ATA/IDE/MFM/RLL support' CONFIG_IDE
  63.125 +
  63.126 +   if [ "$CONFIG_IDE" != "n" ]; then
  63.127 +      source drivers/ide/Config.in
  63.128 +   else
  63.129 +      define_bool CONFIG_BLK_DEV_HD n
  63.130 +   fi
  63.131 +   endmenu
  63.132 +
  63.133 +   mainmenu_option next_comment
  63.134 +   comment 'SCSI support'
  63.135 +
  63.136 +   tristate 'SCSI support' CONFIG_SCSI
  63.137 +
  63.138 +   if [ "$CONFIG_SCSI" != "n" ]; then
  63.139 +      source drivers/scsi/Config.in
  63.140 +   fi
  63.141 +   endmenu
  63.142 +
  63.143 +   source drivers/message/fusion/Config.in
  63.144 +
  63.145 +   source drivers/ieee1394/Config.in
  63.146 +
  63.147 +   source drivers/message/i2o/Config.in
  63.148 +
  63.149 +   if [ "$CONFIG_NET" = "y" ]; then
  63.150 +      mainmenu_option next_comment
  63.151 +      comment 'Network device support'
  63.152 +
  63.153 +      bool 'Network device support' CONFIG_NETDEVICES
  63.154 +      if [ "$CONFIG_NETDEVICES" = "y" ]; then
  63.155 +         source drivers/net/Config.in
  63.156 +         if [ "$CONFIG_ATM" = "y" -o "$CONFIG_ATM" = "m" ]; then
  63.157 +            source drivers/atm/Config.in
  63.158 +         fi
  63.159 +      fi
  63.160 +      endmenu
  63.161 +   fi
  63.162 +
  63.163 +   source net/ax25/Config.in
  63.164 +
  63.165 +   source net/irda/Config.in
  63.166 +
  63.167 +   mainmenu_option next_comment
  63.168 +   comment 'ISDN subsystem'
  63.169 +   if [ "$CONFIG_NET" != "n" ]; then
  63.170 +      tristate 'ISDN support' CONFIG_ISDN
  63.171 +      if [ "$CONFIG_ISDN" != "n" ]; then
  63.172 +         source drivers/isdn/Config.in
  63.173 +      fi
  63.174 +   fi
  63.175 +   endmenu
  63.176 +
  63.177 +   if [ "$CONFIG_ISA" = "y" ]; then
  63.178 +       mainmenu_option next_comment
  63.179 +       comment 'Old CD-ROM drivers (not SCSI, not IDE)'
  63.180 +   
  63.181 +       bool 'Support non-SCSI/IDE/ATAPI CDROM drives' CONFIG_CD_NO_IDESCSI
  63.182 +       if [ "$CONFIG_CD_NO_IDESCSI" != "n" ]; then
  63.183 +          source drivers/cdrom/Config.in
  63.184 +       fi
  63.185 +       endmenu
  63.186 +   fi
  63.187 +
  63.188 +   #
  63.189 +   # input before char - char/joystick depends on it. As does USB.
  63.190 +   #
  63.191 +   source drivers/input/Config.in
  63.192  fi
  63.193 -dep_bool '  Initial RAM disk (initrd) support' CONFIG_BLK_DEV_INITRD $CONFIG_BLK_DEV_RAM
  63.194 -bool 'Per partition statistics in /proc/partitions' CONFIG_BLK_STATS
  63.195 -bool 'XenoLinux virtual block device support' CONFIG_XEN_VBD
  63.196 -#endmenu
  63.197 -define_bool CONFIG_BLK_DEV_HD n
  63.198 -endmenu
  63.199  
  63.200  source drivers/char/Config.in
  63.201  
  63.202 +if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" ]; then
  63.203 +   source drivers/media/Config.in
  63.204 +fi
  63.205 +
  63.206  source fs/Config.in
  63.207  
  63.208  mainmenu_option next_comment
  63.209 @@ -164,14 +262,37 @@ bool 'Xen console support' CONFIG_XEN_CO
  63.210  if [ "$CONFIG_VT" = "y" ]; then
  63.211     bool 'VGA text console' CONFIG_VGA_CONSOLE
  63.212     bool 'Dummy console' CONFIG_DUMMY_CONSOLE 
  63.213 +   if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" ]; then
  63.214 +      bool 'Video mode selection support' CONFIG_VIDEO_SELECT
  63.215 +      if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
  63.216 +         tristate 'MDA text console (dual-headed) (EXPERIMENTAL)' CONFIG_MDA_CONSOLE
  63.217 +         source drivers/video/Config.in
  63.218 +      fi
  63.219 +   fi
  63.220  fi
  63.221  endmenu
  63.222  
  63.223 +if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" ]; then
  63.224 +   mainmenu_option next_comment
  63.225 +   comment 'Sound'
  63.226 +
  63.227 +   tristate 'Sound card support' CONFIG_SOUND
  63.228 +   if [ "$CONFIG_SOUND" != "n" ]; then
  63.229 +      source drivers/sound/Config.in
  63.230 +   fi
  63.231 +   endmenu
  63.232 +
  63.233 +   source drivers/usb/Config.in
  63.234 +
  63.235 +   source net/bluetooth/Config.in
  63.236 +fi
  63.237 +
  63.238  mainmenu_option next_comment
  63.239  comment 'Kernel hacking'
  63.240  
  63.241  bool 'Kernel debugging' CONFIG_DEBUG_KERNEL
  63.242  if [ "$CONFIG_DEBUG_KERNEL" != "n" ]; then
  63.243 +   bool '  Check for stack overflows' CONFIG_DEBUG_STACKOVERFLOW
  63.244     bool '  Debug high memory support' CONFIG_DEBUG_HIGHMEM
  63.245     bool '  Debug memory allocations' CONFIG_DEBUG_SLAB
  63.246     bool '  Memory mapped I/O debugging' CONFIG_DEBUG_IOVIRT
    64.1 --- a/xenolinux-2.4.25-sparse/arch/xen/defconfig	Fri Apr 02 13:58:27 2004 +0000
    64.2 +++ b/xenolinux-2.4.25-sparse/arch/xen/defconfig	Fri Apr 02 14:03:03 2004 +0000
    64.3 @@ -12,6 +12,7 @@ CONFIG_UID16=y
    64.4  #
    64.5  # support for privileged domains
    64.6  CONFIG_XEN_PRIVILEGED_GUEST=y
    64.7 +# CONFIG_XEN_PHYSDEV_ACCESS is not set
    64.8  # on-demand timer setting (taken from s390 patch set)
    64.9  CONFIG_NO_IDLE_HZ=y
   64.10  
   64.11 @@ -434,6 +435,7 @@ CONFIG_DUMMY_CONSOLE=y
   64.12  # Kernel hacking
   64.13  #
   64.14  CONFIG_DEBUG_KERNEL=y
   64.15 +# CONFIG_DEBUG_STACKOVERFLOW is not set
   64.16  # CONFIG_DEBUG_HIGHMEM is not set
   64.17  # CONFIG_DEBUG_SLAB is not set
   64.18  # CONFIG_DEBUG_IOVIRT is not set
    65.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    65.2 +++ b/xenolinux-2.4.25-sparse/arch/xen/defconfig-physdev	Fri Apr 02 14:03:03 2004 +0000
    65.3 @@ -0,0 +1,1005 @@
    65.4 +#
    65.5 +# Automatically generated make config: don't edit
    65.6 +#
    65.7 +CONFIG_XEN=y
    65.8 +CONFIG_X86=y
    65.9 +CONFIG_ISA=y
   65.10 +# CONFIG_SBUS is not set
   65.11 +CONFIG_UID16=y
   65.12 +
   65.13 +#
   65.14 +# Xen
   65.15 +#
   65.16 +CONFIG_XEN_PRIVILEGED_GUEST=y
   65.17 +CONFIG_XEN_PHYSDEV_ACCESS=y
   65.18 +CONFIG_NO_IDLE_HZ=y
   65.19 +
   65.20 +#
   65.21 +# Code maturity level options
   65.22 +#
   65.23 +CONFIG_EXPERIMENTAL=y
   65.24 +
   65.25 +#
   65.26 +# Loadable module support
   65.27 +#
   65.28 +CONFIG_MODULES=y
   65.29 +CONFIG_MODVERSIONS=y
   65.30 +CONFIG_KMOD=y
   65.31 +
   65.32 +#
   65.33 +# Processor type and features
   65.34 +#
   65.35 +CONFIG_M686=y
   65.36 +# CONFIG_MPENTIUMIII is not set
   65.37 +# CONFIG_MPENTIUM4 is not set
   65.38 +# CONFIG_MK7 is not set
   65.39 +# CONFIG_MK8 is not set
   65.40 +# CONFIG_MVIAC3_2 is not set
   65.41 +CONFIG_X86_WP_WORKS_OK=y
   65.42 +CONFIG_X86_INVLPG=y
   65.43 +CONFIG_X86_CMPXCHG=y
   65.44 +CONFIG_X86_XADD=y
   65.45 +CONFIG_X86_BSWAP=y
   65.46 +CONFIG_X86_POPAD_OK=y
   65.47 +# CONFIG_RWSEM_GENERIC_SPINLOCK is not set
   65.48 +CONFIG_RWSEM_XCHGADD_ALGORITHM=y
   65.49 +CONFIG_X86_GOOD_APIC=y
   65.50 +CONFIG_X86_PGE=y
   65.51 +CONFIG_X86_USE_PPRO_CHECKSUM=y
   65.52 +CONFIG_X86_TSC=y
   65.53 +CONFIG_X86_L1_CACHE_SHIFT=5
   65.54 +
   65.55 +#
   65.56 +# General setup
   65.57 +#
   65.58 +CONFIG_NET=y
   65.59 +CONFIG_PCI=y
   65.60 +CONFIG_PCI_NAMES=y
   65.61 +CONFIG_HOTPLUG=y
   65.62 +
   65.63 +#
   65.64 +# PCMCIA/CardBus support
   65.65 +#
   65.66 +# CONFIG_PCMCIA is not set
   65.67 +
   65.68 +#
   65.69 +# PCI Hotplug Support
   65.70 +#
   65.71 +# CONFIG_HOTPLUG_PCI is not set
   65.72 +# CONFIG_HOTPLUG_PCI_COMPAQ is not set
   65.73 +# CONFIG_HOTPLUG_PCI_COMPAQ_NVRAM is not set
   65.74 +CONFIG_SYSVIPC=y
   65.75 +# CONFIG_BSD_PROCESS_ACCT is not set
   65.76 +CONFIG_SYSCTL=y
   65.77 +CONFIG_KCORE_ELF=y
   65.78 +# CONFIG_KCORE_AOUT is not set
   65.79 +CONFIG_BINFMT_AOUT=y
   65.80 +CONFIG_BINFMT_ELF=y
   65.81 +# CONFIG_BINFMT_MISC is not set
   65.82 +# CONFIG_OOM_KILLER is not set
   65.83 +
   65.84 +#
   65.85 +# Memory Technology Devices (MTD)
   65.86 +#
   65.87 +# CONFIG_MTD is not set
   65.88 +
   65.89 +#
   65.90 +# Parallel port support
   65.91 +#
   65.92 +CONFIG_PARPORT=y
   65.93 +CONFIG_PARPORT_PC=y
   65.94 +# CONFIG_PARPORT_PC_FIFO is not set
   65.95 +# CONFIG_PARPORT_PC_SUPERIO is not set
   65.96 +# CONFIG_PARPORT_PC_PCMCIA is not set
   65.97 +# CONFIG_PARPORT_AMIGA is not set
   65.98 +# CONFIG_PARPORT_MFC3 is not set
   65.99 +# CONFIG_PARPORT_ATARI is not set
  65.100 +# CONFIG_PARPORT_GSC is not set
  65.101 +# CONFIG_PARPORT_SUNBPP is not set
  65.102 +# CONFIG_PARPORT_IP22 is not set
  65.103 +# CONFIG_PARPORT_OTHER is not set
  65.104 +CONFIG_PARPORT_1284=y
  65.105 +
  65.106 +#
  65.107 +# Plug and Play configuration
  65.108 +#
  65.109 +CONFIG_PNP=y
  65.110 +# CONFIG_ISAPNP is not set
  65.111 +
  65.112 +#
  65.113 +# Block devices
  65.114 +#
  65.115 +CONFIG_BLK_DEV_FD=y
  65.116 +# CONFIG_BLK_DEV_XD is not set
  65.117 +# CONFIG_PARIDE is not set
  65.118 +# CONFIG_BLK_CPQ_DA is not set
  65.119 +# CONFIG_BLK_CPQ_CISS_DA is not set
  65.120 +# CONFIG_CISS_SCSI_TAPE is not set
  65.121 +# CONFIG_CISS_MONITOR_THREAD is not set
  65.122 +# CONFIG_BLK_DEV_DAC960 is not set
  65.123 +# CONFIG_BLK_DEV_UMEM is not set
  65.124 +CONFIG_BLK_DEV_LOOP=y
  65.125 +CONFIG_BLK_DEV_NBD=y
  65.126 +CONFIG_BLK_DEV_RAM=y
  65.127 +CONFIG_BLK_DEV_RAM_SIZE=4096
  65.128 +CONFIG_BLK_DEV_INITRD=y
  65.129 +# CONFIG_BLK_STATS is not set
  65.130 +
  65.131 +#
  65.132 +# Multi-device support (RAID and LVM)
  65.133 +#
  65.134 +CONFIG_MD=y
  65.135 +CONFIG_BLK_DEV_MD=y
  65.136 +CONFIG_MD_LINEAR=y
  65.137 +CONFIG_MD_RAID0=y
  65.138 +CONFIG_MD_RAID1=y
  65.139 +CONFIG_MD_RAID5=y
  65.140 +CONFIG_MD_MULTIPATH=y
  65.141 +CONFIG_BLK_DEV_LVM=y
  65.142 +
  65.143 +#
  65.144 +# Networking options
  65.145 +#
  65.146 +CONFIG_PACKET=y
  65.147 +CONFIG_PACKET_MMAP=y
  65.148 +# CONFIG_NETLINK_DEV is not set
  65.149 +CONFIG_NETFILTER=y
  65.150 +# CONFIG_NETFILTER_DEBUG is not set
  65.151 +CONFIG_FILTER=y
  65.152 +CONFIG_UNIX=y
  65.153 +CONFIG_INET=y
  65.154 +# CONFIG_IP_MULTICAST is not set
  65.155 +# CONFIG_IP_ADVANCED_ROUTER is not set
  65.156 +CONFIG_IP_PNP=y
  65.157 +CONFIG_IP_PNP_DHCP=y
  65.158 +# CONFIG_IP_PNP_BOOTP is not set
  65.159 +# CONFIG_IP_PNP_RARP is not set
  65.160 +# CONFIG_NET_IPIP is not set
  65.161 +# CONFIG_NET_IPGRE is not set
  65.162 +# CONFIG_ARPD is not set
  65.163 +# CONFIG_INET_ECN is not set
  65.164 +# CONFIG_SYN_COOKIES is not set
  65.165 +
  65.166 +#
  65.167 +#   IP: Netfilter Configuration
  65.168 +#
  65.169 +CONFIG_IP_NF_CONNTRACK=y
  65.170 +CONFIG_IP_NF_FTP=y
  65.171 +# CONFIG_IP_NF_AMANDA is not set
  65.172 +CONFIG_IP_NF_TFTP=y
  65.173 +CONFIG_IP_NF_IRC=y
  65.174 +# CONFIG_IP_NF_QUEUE is not set
  65.175 +CONFIG_IP_NF_IPTABLES=y
  65.176 +# CONFIG_IP_NF_MATCH_LIMIT is not set
  65.177 +# CONFIG_IP_NF_MATCH_MAC is not set
  65.178 +# CONFIG_IP_NF_MATCH_PKTTYPE is not set
  65.179 +# CONFIG_IP_NF_MATCH_MARK is not set
  65.180 +# CONFIG_IP_NF_MATCH_MULTIPORT is not set
  65.181 +# CONFIG_IP_NF_MATCH_TOS is not set
  65.182 +# CONFIG_IP_NF_MATCH_RECENT is not set
  65.183 +# CONFIG_IP_NF_MATCH_ECN is not set
  65.184 +# CONFIG_IP_NF_MATCH_DSCP is not set
  65.185 +# CONFIG_IP_NF_MATCH_AH_ESP is not set
  65.186 +# CONFIG_IP_NF_MATCH_LENGTH is not set
  65.187 +# CONFIG_IP_NF_MATCH_TTL is not set
  65.188 +# CONFIG_IP_NF_MATCH_TCPMSS is not set
  65.189 +# CONFIG_IP_NF_MATCH_HELPER is not set
  65.190 +CONFIG_IP_NF_MATCH_STATE=y
  65.191 +CONFIG_IP_NF_MATCH_CONNTRACK=y
  65.192 +# CONFIG_IP_NF_MATCH_UNCLEAN is not set
  65.193 +# CONFIG_IP_NF_MATCH_OWNER is not set
  65.194 +CONFIG_IP_NF_FILTER=y
  65.195 +CONFIG_IP_NF_TARGET_REJECT=y
  65.196 +# CONFIG_IP_NF_TARGET_MIRROR is not set
  65.197 +CONFIG_IP_NF_NAT=y
  65.198 +CONFIG_IP_NF_NAT_NEEDED=y
  65.199 +CONFIG_IP_NF_TARGET_MASQUERADE=y
  65.200 +CONFIG_IP_NF_TARGET_REDIRECT=y
  65.201 +# CONFIG_IP_NF_NAT_LOCAL is not set
  65.202 +# CONFIG_IP_NF_NAT_SNMP_BASIC is not set
  65.203 +CONFIG_IP_NF_NAT_IRC=y
  65.204 +CONFIG_IP_NF_NAT_FTP=y
  65.205 +CONFIG_IP_NF_NAT_TFTP=y
  65.206 +# CONFIG_IP_NF_MANGLE is not set
  65.207 +CONFIG_IP_NF_TARGET_LOG=y
  65.208 +CONFIG_IP_NF_TARGET_ULOG=y
  65.209 +# CONFIG_IP_NF_TARGET_TCPMSS is not set
  65.210 +# CONFIG_IP_NF_ARPTABLES is not set
  65.211 +
  65.212 +#
  65.213 +#   IP: Virtual Server Configuration
  65.214 +#
  65.215 +# CONFIG_IP_VS is not set
  65.216 +# CONFIG_IPV6 is not set
  65.217 +# CONFIG_KHTTPD is not set
  65.218 +
  65.219 +#
  65.220 +#    SCTP Configuration (EXPERIMENTAL)
  65.221 +#
  65.222 +CONFIG_IPV6_SCTP__=y
  65.223 +# CONFIG_IP_SCTP is not set
  65.224 +# CONFIG_ATM is not set
  65.225 +# CONFIG_VLAN_8021Q is not set
  65.226 +
  65.227 +#
  65.228 +#  
  65.229 +#
  65.230 +# CONFIG_IPX is not set
  65.231 +# CONFIG_ATALK is not set
  65.232 +
  65.233 +#
  65.234 +# Appletalk devices
  65.235 +#
  65.236 +# CONFIG_DEV_APPLETALK is not set
  65.237 +# CONFIG_DECNET is not set
  65.238 +# CONFIG_BRIDGE is not set
  65.239 +# CONFIG_X25 is not set
  65.240 +# CONFIG_LAPB is not set
  65.241 +# CONFIG_LLC is not set
  65.242 +# CONFIG_NET_DIVERT is not set
  65.243 +# CONFIG_ECONET is not set
  65.244 +# CONFIG_WAN_ROUTER is not set
  65.245 +# CONFIG_NET_FASTROUTE is not set
  65.246 +# CONFIG_NET_HW_FLOWCONTROL is not set
  65.247 +
  65.248 +#
  65.249 +# QoS and/or fair queueing
  65.250 +#
  65.251 +# CONFIG_NET_SCHED is not set
  65.252 +
  65.253 +#
  65.254 +# Network testing
  65.255 +#
  65.256 +# CONFIG_NET_PKTGEN is not set
  65.257 +
  65.258 +#
  65.259 +# ATA/IDE/MFM/RLL support
  65.260 +#
  65.261 +CONFIG_IDE=y
  65.262 +
  65.263 +#
  65.264 +# IDE, ATA and ATAPI Block devices
  65.265 +#
  65.266 +CONFIG_BLK_DEV_IDE=y
  65.267 +
  65.268 +#
  65.269 +# Please see Documentation/ide.txt for help/info on IDE drives
  65.270 +#
  65.271 +# CONFIG_BLK_DEV_HD_IDE is not set
  65.272 +# CONFIG_BLK_DEV_HD is not set
  65.273 +CONFIG_BLK_DEV_IDEDISK=y
  65.274 +CONFIG_IDEDISK_MULTI_MODE=y
  65.275 +CONFIG_IDEDISK_STROKE=y
  65.276 +# CONFIG_BLK_DEV_IDECS is not set
  65.277 +CONFIG_BLK_DEV_IDECD=y
  65.278 +CONFIG_BLK_DEV_IDETAPE=y
  65.279 +CONFIG_BLK_DEV_IDEFLOPPY=y
  65.280 +CONFIG_BLK_DEV_IDESCSI=y
  65.281 +CONFIG_IDE_TASK_IOCTL=y
  65.282 +
  65.283 +#
  65.284 +# IDE chipset support/bugfixes
  65.285 +#
  65.286 +CONFIG_BLK_DEV_CMD640=y
  65.287 +CONFIG_BLK_DEV_CMD640_ENHANCED=y
  65.288 +# CONFIG_BLK_DEV_ISAPNP is not set
  65.289 +CONFIG_BLK_DEV_IDEPCI=y
  65.290 +CONFIG_BLK_DEV_GENERIC=y
  65.291 +CONFIG_IDEPCI_SHARE_IRQ=y
  65.292 +CONFIG_BLK_DEV_IDEDMA_PCI=y
  65.293 +CONFIG_BLK_DEV_OFFBOARD=y
  65.294 +# CONFIG_BLK_DEV_IDEDMA_FORCED is not set
  65.295 +CONFIG_IDEDMA_PCI_AUTO=y
  65.296 +# CONFIG_IDEDMA_ONLYDISK is not set
  65.297 +CONFIG_BLK_DEV_IDEDMA=y
  65.298 +# CONFIG_IDEDMA_PCI_WIP is not set
  65.299 +CONFIG_BLK_DEV_ADMA100=y
  65.300 +CONFIG_BLK_DEV_AEC62XX=y
  65.301 +CONFIG_BLK_DEV_ALI15X3=y
  65.302 +CONFIG_WDC_ALI15X3=y
  65.303 +CONFIG_BLK_DEV_AMD74XX=y
  65.304 +CONFIG_AMD74XX_OVERRIDE=y
  65.305 +CONFIG_BLK_DEV_CMD64X=y
  65.306 +CONFIG_BLK_DEV_TRIFLEX=y
  65.307 +CONFIG_BLK_DEV_CY82C693=y
  65.308 +CONFIG_BLK_DEV_CS5530=y
  65.309 +CONFIG_BLK_DEV_HPT34X=y
  65.310 +# CONFIG_HPT34X_AUTODMA is not set
  65.311 +CONFIG_BLK_DEV_HPT366=y
  65.312 +CONFIG_BLK_DEV_PIIX=y
  65.313 +CONFIG_BLK_DEV_NS87415=y
  65.314 +# CONFIG_BLK_DEV_OPTI621 is not set
  65.315 +CONFIG_BLK_DEV_PDC202XX_OLD=y
  65.316 +CONFIG_PDC202XX_BURST=y
  65.317 +CONFIG_BLK_DEV_PDC202XX_NEW=y
  65.318 +CONFIG_PDC202XX_FORCE=y
  65.319 +CONFIG_BLK_DEV_RZ1000=y
  65.320 +CONFIG_BLK_DEV_SC1200=y
  65.321 +CONFIG_BLK_DEV_SVWKS=y
  65.322 +CONFIG_BLK_DEV_SIIMAGE=y
  65.323 +CONFIG_BLK_DEV_SIS5513=y
  65.324 +CONFIG_BLK_DEV_SLC90E66=y
  65.325 +CONFIG_BLK_DEV_TRM290=y
  65.326 +CONFIG_BLK_DEV_VIA82CXXX=y
  65.327 +CONFIG_IDE_CHIPSETS=y
  65.328 +
  65.329 +#
  65.330 +# Note: most of these also require special kernel boot parameters
  65.331 +#
  65.332 +# CONFIG_BLK_DEV_4DRIVES is not set
  65.333 +# CONFIG_BLK_DEV_ALI14XX is not set
  65.334 +# CONFIG_BLK_DEV_DTC2278 is not set
  65.335 +# CONFIG_BLK_DEV_HT6560B is not set
  65.336 +# CONFIG_BLK_DEV_PDC4030 is not set
  65.337 +# CONFIG_BLK_DEV_QD65XX is not set
  65.338 +# CONFIG_BLK_DEV_UMC8672 is not set
  65.339 +CONFIG_IDEDMA_AUTO=y
  65.340 +# CONFIG_IDEDMA_IVB is not set
  65.341 +# CONFIG_DMA_NONPCI is not set
  65.342 +CONFIG_BLK_DEV_PDC202XX=y
  65.343 +# CONFIG_BLK_DEV_ATARAID is not set
  65.344 +# CONFIG_BLK_DEV_ATARAID_PDC is not set
  65.345 +# CONFIG_BLK_DEV_ATARAID_HPT is not set
  65.346 +# CONFIG_BLK_DEV_ATARAID_SII is not set
  65.347 +
  65.348 +#
  65.349 +# SCSI support
  65.350 +#
  65.351 +CONFIG_SCSI=y
  65.352 +
  65.353 +#
  65.354 +# SCSI support type (disk, tape, CD-ROM)
  65.355 +#
  65.356 +CONFIG_BLK_DEV_SD=y
  65.357 +CONFIG_SD_EXTRA_DEVS=40
  65.358 +# CONFIG_CHR_DEV_ST is not set
  65.359 +# CONFIG_CHR_DEV_OSST is not set
  65.360 +# CONFIG_BLK_DEV_SR is not set
  65.361 +CONFIG_CHR_DEV_SG=y
  65.362 +
  65.363 +#
  65.364 +# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
  65.365 +#
  65.366 +# CONFIG_SCSI_DEBUG_QUEUES is not set
  65.367 +# CONFIG_SCSI_MULTI_LUN is not set
  65.368 +# CONFIG_SCSI_CONSTANTS is not set
  65.369 +# CONFIG_SCSI_LOGGING is not set
  65.370 +
  65.371 +#
  65.372 +# SCSI low-level drivers
  65.373 +#
  65.374 +# CONFIG_BLK_DEV_3W_XXXX_RAID is not set
  65.375 +# CONFIG_SCSI_7000FASST is not set
  65.376 +# CONFIG_SCSI_ACARD is not set
  65.377 +# CONFIG_SCSI_AHA152X is not set
  65.378 +# CONFIG_SCSI_AHA1542 is not set
  65.379 +# CONFIG_SCSI_AHA1740 is not set
  65.380 +CONFIG_SCSI_AACRAID=y
  65.381 +# CONFIG_SCSI_AIC7XXX is not set
  65.382 +CONFIG_SCSI_AIC79XX=y
  65.383 +CONFIG_AIC79XX_CMDS_PER_DEVICE=32
  65.384 +CONFIG_AIC79XX_RESET_DELAY_MS=15000
  65.385 +# CONFIG_AIC79XX_BUILD_FIRMWARE is not set
  65.386 +# CONFIG_AIC79XX_ENABLE_RD_STRM is not set
  65.387 +CONFIG_AIC79XX_DEBUG_ENABLE=y
  65.388 +CONFIG_AIC79XX_DEBUG_MASK=0
  65.389 +# CONFIG_AIC79XX_REG_PRETTY_PRINT is not set
  65.390 +# CONFIG_SCSI_AIC7XXX_OLD is not set
  65.391 +# CONFIG_SCSI_DPT_I2O is not set
  65.392 +# CONFIG_SCSI_ADVANSYS is not set
  65.393 +# CONFIG_SCSI_IN2000 is not set
  65.394 +# CONFIG_SCSI_AM53C974 is not set
  65.395 +CONFIG_SCSI_MEGARAID=y
  65.396 +# CONFIG_SCSI_MEGARAID2 is not set
  65.397 +CONFIG_SCSI_BUSLOGIC=y
  65.398 +# CONFIG_SCSI_OMIT_FLASHPOINT is not set
  65.399 +CONFIG_SCSI_CPQFCTS=y
  65.400 +# CONFIG_SCSI_DMX3191D is not set
  65.401 +CONFIG_SCSI_DTC3280=y
  65.402 +# CONFIG_SCSI_EATA is not set
  65.403 +# CONFIG_SCSI_EATA_DMA is not set
  65.404 +# CONFIG_SCSI_EATA_PIO is not set
  65.405 +# CONFIG_SCSI_FUTURE_DOMAIN is not set
  65.406 +# CONFIG_SCSI_GDTH is not set
  65.407 +# CONFIG_SCSI_GENERIC_NCR5380 is not set
  65.408 +# CONFIG_SCSI_IPS is not set
  65.409 +# CONFIG_SCSI_INITIO is not set
  65.410 +# CONFIG_SCSI_INIA100 is not set
  65.411 +# CONFIG_SCSI_PPA is not set
  65.412 +# CONFIG_SCSI_IMM is not set
  65.413 +# CONFIG_SCSI_NCR53C406A is not set
  65.414 +# CONFIG_SCSI_NCR53C7xx is not set
  65.415 +CONFIG_SCSI_SYM53C8XX_2=y
  65.416 +CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1
  65.417 +CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16
  65.418 +CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64
  65.419 +# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set
  65.420 +# CONFIG_SCSI_PAS16 is not set
  65.421 +# CONFIG_SCSI_PCI2000 is not set
  65.422 +# CONFIG_SCSI_PCI2220I is not set
  65.423 +# CONFIG_SCSI_PSI240I is not set
  65.424 +# CONFIG_SCSI_QLOGIC_FAS is not set
  65.425 +# CONFIG_SCSI_QLOGIC_ISP is not set
  65.426 +# CONFIG_SCSI_QLOGIC_FC is not set
  65.427 +# CONFIG_SCSI_QLOGIC_1280 is not set
  65.428 +# CONFIG_SCSI_SEAGATE is not set
  65.429 +# CONFIG_SCSI_SIM710 is not set
  65.430 +# CONFIG_SCSI_SYM53C416 is not set
  65.431 +# CONFIG_SCSI_DC390T is not set
  65.432 +# CONFIG_SCSI_T128 is not set
  65.433 +# CONFIG_SCSI_U14_34F is not set
  65.434 +# CONFIG_SCSI_ULTRASTOR is not set
  65.435 +# CONFIG_SCSI_NSP32 is not set
  65.436 +# CONFIG_SCSI_DEBUG is not set
  65.437 +
  65.438 +#
  65.439 +# Fusion MPT device support
  65.440 +#
  65.441 +# CONFIG_FUSION is not set
  65.442 +# CONFIG_FUSION_BOOT is not set
  65.443 +# CONFIG_FUSION_ISENSE is not set
  65.444 +# CONFIG_FUSION_CTL is not set
  65.445 +# CONFIG_FUSION_LAN is not set
  65.446 +
  65.447 +#
  65.448 +# IEEE 1394 (FireWire) support (EXPERIMENTAL)
  65.449 +#
  65.450 +# CONFIG_IEEE1394 is not set
  65.451 +
  65.452 +#
  65.453 +# I2O device support
  65.454 +#
  65.455 +# CONFIG_I2O is not set
  65.456 +# CONFIG_I2O_PCI is not set
  65.457 +# CONFIG_I2O_BLOCK is not set
  65.458 +# CONFIG_I2O_LAN is not set
  65.459 +# CONFIG_I2O_SCSI is not set
  65.460 +# CONFIG_I2O_PROC is not set
  65.461 +
  65.462 +#
  65.463 +# Network device support
  65.464 +#
  65.465 +CONFIG_NETDEVICES=y
  65.466 +
  65.467 +#
  65.468 +# ARCnet devices
  65.469 +#
  65.470 +# CONFIG_ARCNET is not set
  65.471 +# CONFIG_DUMMY is not set
  65.472 +# CONFIG_BONDING is not set
  65.473 +# CONFIG_EQUALIZER is not set
  65.474 +# CONFIG_TUN is not set
  65.475 +# CONFIG_ETHERTAP is not set
  65.476 +
  65.477 +#
  65.478 +# Ethernet (10 or 100Mbit)
  65.479 +#
  65.480 +CONFIG_NET_ETHERNET=y
  65.481 +# CONFIG_SUNLANCE is not set
  65.482 +# CONFIG_HAPPYMEAL is not set
  65.483 +# CONFIG_SUNBMAC is not set
  65.484 +# CONFIG_SUNQE is not set
  65.485 +# CONFIG_SUNGEM is not set
  65.486 +CONFIG_NET_VENDOR_3COM=y
  65.487 +# CONFIG_EL1 is not set
  65.488 +# CONFIG_EL2 is not set
  65.489 +# CONFIG_ELPLUS is not set
  65.490 +# CONFIG_EL16 is not set
  65.491 +# CONFIG_EL3 is not set
  65.492 +# CONFIG_3C515 is not set
  65.493 +# CONFIG_ELMC is not set
  65.494 +# CONFIG_ELMC_II is not set
  65.495 +CONFIG_VORTEX=y
  65.496 +# CONFIG_TYPHOON is not set
  65.497 +# CONFIG_LANCE is not set
  65.498 +# CONFIG_NET_VENDOR_SMC is not set
  65.499 +# CONFIG_NET_VENDOR_RACAL is not set
  65.500 +# CONFIG_AT1700 is not set
  65.501 +# CONFIG_DEPCA is not set
  65.502 +# CONFIG_HP100 is not set
  65.503 +# CONFIG_NET_ISA is not set
  65.504 +CONFIG_NET_PCI=y
  65.505 +CONFIG_PCNET32=y
  65.506 +# CONFIG_AMD8111_ETH is not set
  65.507 +# CONFIG_ADAPTEC_STARFIRE is not set
  65.508 +# CONFIG_AC3200 is not set
  65.509 +# CONFIG_APRICOT is not set
  65.510 +# CONFIG_B44 is not set
  65.511 +# CONFIG_CS89x0 is not set
  65.512 +CONFIG_TULIP=y
  65.513 +# CONFIG_TULIP_MWI is not set
  65.514 +# CONFIG_TULIP_MMIO is not set
  65.515 +# CONFIG_DE4X5 is not set
  65.516 +# CONFIG_DGRS is not set
  65.517 +# CONFIG_DM9102 is not set
  65.518 +# CONFIG_EEPRO100 is not set
  65.519 +# CONFIG_EEPRO100_PIO is not set
  65.520 +# CONFIG_E100 is not set
  65.521 +# CONFIG_LNE390 is not set
  65.522 +# CONFIG_FEALNX is not set
  65.523 +# CONFIG_NATSEMI is not set
  65.524 +# CONFIG_NE2K_PCI is not set
  65.525 +# CONFIG_NE3210 is not set
  65.526 +# CONFIG_ES3210 is not set
  65.527 +# CONFIG_8139CP is not set
  65.528 +# CONFIG_8139TOO is not set
  65.529 +# CONFIG_8139TOO_PIO is not set
  65.530 +# CONFIG_8139TOO_TUNE_TWISTER is not set
  65.531 +# CONFIG_8139TOO_8129 is not set
  65.532 +# CONFIG_8139_OLD_RX_RESET is not set
  65.533 +# CONFIG_SIS900 is not set
  65.534 +# CONFIG_EPIC100 is not set
  65.535 +# CONFIG_SUNDANCE is not set
  65.536 +# CONFIG_SUNDANCE_MMIO is not set
  65.537 +# CONFIG_TLAN is not set
  65.538 +# CONFIG_VIA_RHINE is not set
  65.539 +# CONFIG_VIA_RHINE_MMIO is not set
  65.540 +# CONFIG_WINBOND_840 is not set
  65.541 +# CONFIG_NET_POCKET is not set
  65.542 +
  65.543 +#
  65.544 +# Ethernet (1000 Mbit)
  65.545 +#
  65.546 +CONFIG_ACENIC=y
  65.547 +# CONFIG_ACENIC_OMIT_TIGON_I is not set
  65.548 +# CONFIG_DL2K is not set
  65.549 +CONFIG_E1000=y
  65.550 +# CONFIG_E1000_NAPI is not set
  65.551 +# CONFIG_MYRI_SBUS is not set
  65.552 +# CONFIG_NS83820 is not set
  65.553 +# CONFIG_HAMACHI is not set
  65.554 +# CONFIG_YELLOWFIN is not set
  65.555 +# CONFIG_R8169 is not set
  65.556 +# CONFIG_SK98LIN is not set
  65.557 +CONFIG_TIGON3=y
  65.558 +# CONFIG_FDDI is not set
  65.559 +# CONFIG_HIPPI is not set
  65.560 +# CONFIG_PLIP is not set
  65.561 +# CONFIG_PPP is not set
  65.562 +# CONFIG_SLIP is not set
  65.563 +
  65.564 +#
  65.565 +# Wireless LAN (non-hamradio)
  65.566 +#
  65.567 +# CONFIG_NET_RADIO is not set
  65.568 +
  65.569 +#
  65.570 +# Token Ring devices
  65.571 +#
  65.572 +# CONFIG_TR is not set
  65.573 +# CONFIG_NET_FC is not set
  65.574 +# CONFIG_RCPCI is not set
  65.575 +# CONFIG_SHAPER is not set
  65.576 +
  65.577 +#
  65.578 +# Wan interfaces
  65.579 +#
  65.580 +# CONFIG_WAN is not set
  65.581 +
  65.582 +#
  65.583 +# Amateur Radio support
  65.584 +#
  65.585 +# CONFIG_HAMRADIO is not set
  65.586 +
  65.587 +#
  65.588 +# IrDA (infrared) support
  65.589 +#
  65.590 +# CONFIG_IRDA is not set
  65.591 +
  65.592 +#
  65.593 +# ISDN subsystem
  65.594 +#
  65.595 +# CONFIG_ISDN is not set
  65.596 +
  65.597 +#
  65.598 +# Old CD-ROM drivers (not SCSI, not IDE)
  65.599 +#
  65.600 +# CONFIG_CD_NO_IDESCSI is not set
  65.601 +
  65.602 +#
  65.603 +# Input core support
  65.604 +#
  65.605 +# CONFIG_INPUT is not set
  65.606 +# CONFIG_INPUT_KEYBDEV is not set
  65.607 +# CONFIG_INPUT_MOUSEDEV is not set
  65.608 +# CONFIG_INPUT_JOYDEV is not set
  65.609 +# CONFIG_INPUT_EVDEV is not set
  65.610 +# CONFIG_INPUT_UINPUT is not set
  65.611 +
  65.612 +#
  65.613 +# Character devices
  65.614 +#
  65.615 +CONFIG_VT=y
  65.616 +CONFIG_VT_CONSOLE=y
  65.617 +# CONFIG_SERIAL is not set
  65.618 +# CONFIG_SERIAL_EXTENDED is not set
  65.619 +# CONFIG_SERIAL_NONSTANDARD is not set
  65.620 +CONFIG_UNIX98_PTYS=y
  65.621 +CONFIG_UNIX98_PTY_COUNT=256
  65.622 +# CONFIG_PRINTER is not set
  65.623 +# CONFIG_PPDEV is not set
  65.624 +# CONFIG_TIPAR is not set
  65.625 +
  65.626 +#
  65.627 +# I2C support
  65.628 +#
  65.629 +# CONFIG_I2C is not set
  65.630 +
  65.631 +#
  65.632 +# Mice
  65.633 +#
  65.634 +# CONFIG_BUSMOUSE is not set
  65.635 +CONFIG_MOUSE=y
  65.636 +CONFIG_PSMOUSE=y
  65.637 +# CONFIG_82C710_MOUSE is not set
  65.638 +# CONFIG_PC110_PAD is not set
  65.639 +# CONFIG_MK712_MOUSE is not set
  65.640 +
  65.641 +#
  65.642 +# Joysticks
  65.643 +#
  65.644 +# CONFIG_INPUT_GAMEPORT is not set
  65.645 +
  65.646 +#
  65.647 +# Input core support is needed for gameports
  65.648 +#
  65.649 +
  65.650 +#
  65.651 +# Input core support is needed for joysticks
  65.652 +#
  65.653 +# CONFIG_QIC02_TAPE is not set
  65.654 +# CONFIG_IPMI_HANDLER is not set
  65.655 +# CONFIG_IPMI_PANIC_EVENT is not set
  65.656 +# CONFIG_IPMI_DEVICE_INTERFACE is not set
  65.657 +# CONFIG_IPMI_KCS is not set
  65.658 +# CONFIG_IPMI_WATCHDOG is not set
  65.659 +
  65.660 +#
  65.661 +# Watchdog Cards
  65.662 +#
  65.663 +# CONFIG_WATCHDOG is not set
  65.664 +# CONFIG_SCx200 is not set
  65.665 +# CONFIG_SCx200_GPIO is not set
  65.666 +# CONFIG_AMD_RNG is not set
  65.667 +# CONFIG_INTEL_RNG is not set
  65.668 +# CONFIG_HW_RANDOM is not set
  65.669 +# CONFIG_AMD_PM768 is not set
  65.670 +# CONFIG_NVRAM is not set
  65.671 +# CONFIG_RTC is not set
  65.672 +# CONFIG_DTLK is not set
  65.673 +# CONFIG_R3964 is not set
  65.674 +# CONFIG_APPLICOM is not set
  65.675 +# CONFIG_SONYPI is not set
  65.676 +
  65.677 +#
  65.678 +# Ftape, the floppy tape device driver
  65.679 +#
  65.680 +# CONFIG_FTAPE is not set
  65.681 +# CONFIG_AGP is not set
  65.682 +
  65.683 +#
  65.684 +# Direct Rendering Manager (XFree86 DRI support)
  65.685 +#
  65.686 +# CONFIG_DRM is not set
  65.687 +# CONFIG_MWAVE is not set
  65.688 +# CONFIG_OBMOUSE is not set
  65.689 +
  65.690 +#
  65.691 +# Multimedia devices
  65.692 +#
  65.693 +# CONFIG_VIDEO_DEV is not set
  65.694 +
  65.695 +#
  65.696 +# File systems
  65.697 +#
  65.698 +# CONFIG_QUOTA is not set
  65.699 +# CONFIG_QFMT_V2 is not set
  65.700 +CONFIG_AUTOFS_FS=y
  65.701 +CONFIG_AUTOFS4_FS=y
  65.702 +# CONFIG_REISERFS_FS is not set
  65.703 +# CONFIG_REISERFS_CHECK is not set
  65.704 +# CONFIG_REISERFS_PROC_INFO is not set
  65.705 +# CONFIG_ADFS_FS is not set
  65.706 +# CONFIG_ADFS_FS_RW is not set
  65.707 +# CONFIG_AFFS_FS is not set
  65.708 +# CONFIG_HFS_FS is not set
  65.709 +# CONFIG_HFSPLUS_FS is not set
  65.710 +# CONFIG_BEFS_FS is not set
  65.711 +# CONFIG_BEFS_DEBUG is not set
  65.712 +# CONFIG_BFS_FS is not set
  65.713 +CONFIG_EXT3_FS=y
  65.714 +CONFIG_JBD=y
  65.715 +# CONFIG_JBD_DEBUG is not set
  65.716 +CONFIG_FAT_FS=y
  65.717 +CONFIG_MSDOS_FS=y
  65.718 +CONFIG_UMSDOS_FS=y
  65.719 +CONFIG_VFAT_FS=y
  65.720 +# CONFIG_EFS_FS is not set
  65.721 +# CONFIG_JFFS_FS is not set
  65.722 +# CONFIG_JFFS2_FS is not set
  65.723 +# CONFIG_CRAMFS is not set
  65.724 +CONFIG_TMPFS=y
  65.725 +CONFIG_RAMFS=y
  65.726 +CONFIG_ISO9660_FS=y
  65.727 +CONFIG_JOLIET=y
  65.728 +CONFIG_ZISOFS=y
  65.729 +# CONFIG_JFS_FS is not set
  65.730 +# CONFIG_JFS_DEBUG is not set
  65.731 +# CONFIG_JFS_STATISTICS is not set
  65.732 +# CONFIG_MINIX_FS is not set
  65.733 +# CONFIG_VXFS_FS is not set
  65.734 +# CONFIG_NTFS_FS is not set
  65.735 +# CONFIG_NTFS_RW is not set
  65.736 +# CONFIG_HPFS_FS is not set
  65.737 +CONFIG_PROC_FS=y
  65.738 +# CONFIG_DEVFS_FS is not set
  65.739 +# CONFIG_DEVFS_MOUNT is not set
  65.740 +# CONFIG_DEVFS_DEBUG is not set
  65.741 +CONFIG_DEVPTS_FS=y
  65.742 +# CONFIG_QNX4FS_FS is not set
  65.743 +# CONFIG_QNX4FS_RW is not set
  65.744 +# CONFIG_ROMFS_FS is not set
  65.745 +CONFIG_EXT2_FS=y
  65.746 +# CONFIG_SYSV_FS is not set
  65.747 +# CONFIG_UDF_FS is not set
  65.748 +# CONFIG_UDF_RW is not set
  65.749 +# CONFIG_UFS_FS is not set
  65.750 +# CONFIG_UFS_FS_WRITE is not set
  65.751 +# CONFIG_XFS_FS is not set
  65.752 +# CONFIG_XFS_QUOTA is not set
  65.753 +# CONFIG_XFS_RT is not set
  65.754 +# CONFIG_XFS_TRACE is not set
  65.755 +# CONFIG_XFS_DEBUG is not set
  65.756 +
  65.757 +#
  65.758 +# Network File Systems
  65.759 +#
  65.760 +# CONFIG_CODA_FS is not set
  65.761 +# CONFIG_INTERMEZZO_FS is not set
  65.762 +CONFIG_NFS_FS=y
  65.763 +CONFIG_NFS_V3=y
  65.764 +# CONFIG_NFS_DIRECTIO is not set
  65.765 +CONFIG_ROOT_NFS=y
  65.766 +CONFIG_NFSD=y
  65.767 +CONFIG_NFSD_V3=y
  65.768 +# CONFIG_NFSD_TCP is not set
  65.769 +CONFIG_SUNRPC=y
  65.770 +CONFIG_LOCKD=y
  65.771 +CONFIG_LOCKD_V4=y
  65.772 +# CONFIG_SMB_FS is not set
  65.773 +# CONFIG_NCP_FS is not set
  65.774 +# CONFIG_NCPFS_PACKET_SIGNING is not set
  65.775 +# CONFIG_NCPFS_IOCTL_LOCKING is not set
  65.776 +# CONFIG_NCPFS_STRONG is not set
  65.777 +# CONFIG_NCPFS_NFS_NS is not set
  65.778 +# CONFIG_NCPFS_OS2_NS is not set
  65.779 +# CONFIG_NCPFS_SMALLDOS is not set
  65.780 +# CONFIG_NCPFS_NLS is not set
  65.781 +# CONFIG_NCPFS_EXTRAS is not set
  65.782 +CONFIG_ZISOFS_FS=y
  65.783 +
  65.784 +#
  65.785 +# Partition Types
  65.786 +#
  65.787 +CONFIG_PARTITION_ADVANCED=y
  65.788 +# CONFIG_ACORN_PARTITION is not set
  65.789 +# CONFIG_OSF_PARTITION is not set
  65.790 +# CONFIG_AMIGA_PARTITION is not set
  65.791 +# CONFIG_ATARI_PARTITION is not set
  65.792 +# CONFIG_MAC_PARTITION is not set
  65.793 +CONFIG_MSDOS_PARTITION=y
  65.794 +# CONFIG_BSD_DISKLABEL is not set
  65.795 +# CONFIG_MINIX_SUBPARTITION is not set
  65.796 +# CONFIG_SOLARIS_X86_PARTITION is not set
  65.797 +# CONFIG_UNIXWARE_DISKLABEL is not set
  65.798 +# CONFIG_LDM_PARTITION is not set
  65.799 +# CONFIG_SGI_PARTITION is not set
  65.800 +# CONFIG_ULTRIX_PARTITION is not set
  65.801 +# CONFIG_SUN_PARTITION is not set
  65.802 +# CONFIG_EFI_PARTITION is not set
  65.803 +# CONFIG_SMB_NLS is not set
  65.804 +CONFIG_NLS=y
  65.805 +
  65.806 +#
  65.807 +# Native Language Support
  65.808 +#
  65.809 +CONFIG_NLS_DEFAULT="iso8559-1"
  65.810 +# CONFIG_NLS_CODEPAGE_437 is not set
  65.811 +# CONFIG_NLS_CODEPAGE_737 is not set
  65.812 +# CONFIG_NLS_CODEPAGE_775 is not set
  65.813 +# CONFIG_NLS_CODEPAGE_850 is not set
  65.814 +# CONFIG_NLS_CODEPAGE_852 is not set
  65.815 +# CONFIG_NLS_CODEPAGE_855 is not set
  65.816 +# CONFIG_NLS_CODEPAGE_857 is not set
  65.817 +# CONFIG_NLS_CODEPAGE_860 is not set
  65.818 +# CONFIG_NLS_CODEPAGE_861 is not set
  65.819 +# CONFIG_NLS_CODEPAGE_862 is not set
  65.820 +# CONFIG_NLS_CODEPAGE_863 is not set
  65.821 +# CONFIG_NLS_CODEPAGE_864 is not set
  65.822 +# CONFIG_NLS_CODEPAGE_865 is not set
  65.823 +# CONFIG_NLS_CODEPAGE_866 is not set
  65.824 +# CONFIG_NLS_CODEPAGE_869 is not set
  65.825 +# CONFIG_NLS_CODEPAGE_936 is not set
  65.826 +# CONFIG_NLS_CODEPAGE_950 is not set
  65.827 +# CONFIG_NLS_CODEPAGE_932 is not set
  65.828 +# CONFIG_NLS_CODEPAGE_949 is not set
  65.829 +# CONFIG_NLS_CODEPAGE_874 is not set
  65.830 +# CONFIG_NLS_ISO8859_8 is not set
  65.831 +# CONFIG_NLS_CODEPAGE_1250 is not set
  65.832 +# CONFIG_NLS_CODEPAGE_1251 is not set
  65.833 +CONFIG_NLS_ISO8859_1=y
  65.834 +# CONFIG_NLS_ISO8859_2 is not set
  65.835 +# CONFIG_NLS_ISO8859_3 is not set
  65.836 +# CONFIG_NLS_ISO8859_4 is not set
  65.837 +# CONFIG_NLS_ISO8859_5 is not set
  65.838 +# CONFIG_NLS_ISO8859_6 is not set
  65.839 +# CONFIG_NLS_ISO8859_7 is not set
  65.840 +# CONFIG_NLS_ISO8859_9 is not set
  65.841 +# CONFIG_NLS_ISO8859_13 is not set
  65.842 +# CONFIG_NLS_ISO8859_14 is not set
  65.843 +# CONFIG_NLS_ISO8859_15 is not set
  65.844 +# CONFIG_NLS_KOI8_R is not set
  65.845 +# CONFIG_NLS_KOI8_U is not set
  65.846 +# CONFIG_NLS_UTF8 is not set
  65.847 +
  65.848 +#
  65.849 +# Console drivers
  65.850 +#
  65.851 +CONFIG_XEN_CONSOLE=y
  65.852 +CONFIG_VGA_CONSOLE=y
  65.853 +CONFIG_DUMMY_CONSOLE=y
  65.854 +# CONFIG_VIDEO_SELECT is not set
  65.855 +# CONFIG_MDA_CONSOLE is not set
  65.856 +
  65.857 +#
  65.858 +# Frame-buffer support
  65.859 +#
  65.860 +# CONFIG_FB is not set
  65.861 +
  65.862 +#
  65.863 +# Sound
  65.864 +#
  65.865 +# CONFIG_SOUND is not set
  65.866 +
  65.867 +#
  65.868 +# USB support
  65.869 +#
  65.870 +CONFIG_USB=y
  65.871 +CONFIG_USB_DEBUG=y
  65.872 +
  65.873 +#
  65.874 +# Miscellaneous USB options
  65.875 +#
  65.876 +# CONFIG_USB_DEVICEFS is not set
  65.877 +# CONFIG_USB_BANDWIDTH is not set
  65.878 +
  65.879 +#
  65.880 +# USB Host Controller Drivers
  65.881 +#
  65.882 +# CONFIG_USB_EHCI_HCD is not set
  65.883 +CONFIG_USB_UHCI=y
  65.884 +# CONFIG_USB_UHCI_ALT is not set
  65.885 +CONFIG_USB_OHCI=y
  65.886 +# CONFIG_USB_SL811HS_ALT is not set
  65.887 +# CONFIG_USB_SL811HS is not set
  65.888 +
  65.889 +#
  65.890 +# USB Device Class drivers
  65.891 +#
  65.892 +# CONFIG_USB_AUDIO is not set
  65.893 +# CONFIG_USB_EMI26 is not set
  65.894 +# CONFIG_USB_BLUETOOTH is not set
  65.895 +# CONFIG_USB_MIDI is not set
  65.896 +# CONFIG_USB_STORAGE is not set
  65.897 +# CONFIG_USB_STORAGE_DEBUG is not set
  65.898 +# CONFIG_USB_STORAGE_DATAFAB is not set
  65.899 +# CONFIG_USB_STORAGE_FREECOM is not set
  65.900 +# CONFIG_USB_STORAGE_ISD200 is not set
  65.901 +# CONFIG_USB_STORAGE_DPCM is not set
  65.902 +# CONFIG_USB_STORAGE_HP8200e is not set
  65.903 +# CONFIG_USB_STORAGE_SDDR09 is not set
  65.904 +# CONFIG_USB_STORAGE_SDDR55 is not set
  65.905 +# CONFIG_USB_STORAGE_JUMPSHOT is not set
  65.906 +# CONFIG_USB_ACM is not set
  65.907 +# CONFIG_USB_PRINTER is not set
  65.908 +
  65.909 +#
  65.910 +# USB Human Interface Devices (HID)
  65.911 +#
  65.912 +# CONFIG_USB_HID is not set
  65.913 +
  65.914 +#
  65.915 +#     Input core support is needed for USB HID input layer or HIDBP support
  65.916 +#
  65.917 +# CONFIG_USB_HIDINPUT is not set
  65.918 +# CONFIG_USB_HIDDEV is not set
  65.919 +# CONFIG_USB_KBD is not set
  65.920 +# CONFIG_USB_MOUSE is not set
  65.921 +# CONFIG_USB_AIPTEK is not set
  65.922 +# CONFIG_USB_WACOM is not set
  65.923 +# CONFIG_USB_KBTAB is not set
  65.924 +# CONFIG_USB_POWERMATE is not set
  65.925 +
  65.926 +#
  65.927 +# USB Imaging devices
  65.928 +#
  65.929 +# CONFIG_USB_DC2XX is not set
  65.930 +# CONFIG_USB_MDC800 is not set
  65.931 +# CONFIG_USB_SCANNER is not set
  65.932 +# CONFIG_USB_MICROTEK is not set
  65.933 +# CONFIG_USB_HPUSBSCSI is not set
  65.934 +
  65.935 +#
  65.936 +# USB Multimedia devices
  65.937 +#
  65.938 +
  65.939 +#
  65.940 +#   Video4Linux support is needed for USB Multimedia device support
  65.941 +#
  65.942 +
  65.943 +#
  65.944 +# USB Network adaptors
  65.945 +#
  65.946 +# CONFIG_USB_PEGASUS is not set
  65.947 +# CONFIG_USB_RTL8150 is not set
  65.948 +# CONFIG_USB_KAWETH is not set
  65.949 +# CONFIG_USB_CATC is not set
  65.950 +# CONFIG_USB_CDCETHER is not set
  65.951 +# CONFIG_USB_USBNET is not set
  65.952 +
  65.953 +#
  65.954 +# USB port drivers
  65.955 +#
  65.956 +# CONFIG_USB_USS720 is not set
  65.957 +
  65.958 +#
  65.959 +# USB Serial Converter support
  65.960 +#
  65.961 +# CONFIG_USB_SERIAL is not set
  65.962 +
  65.963 +#
  65.964 +# USB Miscellaneous drivers
  65.965 +#
  65.966 +# CONFIG_USB_RIO500 is not set
  65.967 +# CONFIG_USB_AUERSWALD is not set
  65.968 +# CONFIG_USB_TIGL is not set
  65.969 +# CONFIG_USB_BRLVGER is not set
  65.970 +# CONFIG_USB_LCD is not set
  65.971 +
  65.972 +#
  65.973 +# Support for USB gadgets
  65.974 +#
  65.975 +# CONFIG_USB_GADGET is not set
  65.976 +
  65.977 +#
  65.978 +# Bluetooth support
  65.979 +#
  65.980 +# CONFIG_BLUEZ is not set
  65.981 +
  65.982 +#
  65.983 +# Kernel hacking
  65.984 +#
  65.985 +CONFIG_DEBUG_KERNEL=y
  65.986 +# CONFIG_DEBUG_STACKOVERFLOW is not set
  65.987 +# CONFIG_DEBUG_HIGHMEM is not set
  65.988 +# CONFIG_DEBUG_SLAB is not set
  65.989 +# CONFIG_DEBUG_IOVIRT is not set
  65.990 +# CONFIG_MAGIC_SYSRQ is not set
  65.991 +# CONFIG_DEBUG_SPINLOCK is not set
  65.992 +# CONFIG_DEBUG_BUGVERBOSE is not set
  65.993 +CONFIG_KALLSYMS=y
  65.994 +# CONFIG_FRAME_POINTER is not set
  65.995 +CONFIG_LOG_BUF_SHIFT=0
  65.996 +
  65.997 +#
  65.998 +# Cryptographic options
  65.999 +#
 65.1000 +# CONFIG_CRYPTO is not set
 65.1001 +
 65.1002 +#
 65.1003 +# Library routines
 65.1004 +#
 65.1005 +# CONFIG_CRC32 is not set
 65.1006 +CONFIG_ZLIB_INFLATE=y
 65.1007 +# CONFIG_ZLIB_DEFLATE is not set
 65.1008 +# CONFIG_FW_LOADER is not set
    66.1 --- a/xenolinux-2.4.25-sparse/arch/xen/drivers/console/console.c	Fri Apr 02 13:58:27 2004 +0000
    66.2 +++ b/xenolinux-2.4.25-sparse/arch/xen/drivers/console/console.c	Fri Apr 02 14:03:03 2004 +0000
    66.3 @@ -143,8 +143,6 @@ void xen_console_init(void)
    66.4      }
    66.5  
    66.6      register_console(&kcons_info);
    66.7 -
    66.8 -    evtchn_clear_error_virq();
    66.9  }
   66.10  
   66.11  
    67.1 --- a/xenolinux-2.4.25-sparse/arch/xen/drivers/evtchn/evtchn.c	Fri Apr 02 13:58:27 2004 +0000
    67.2 +++ b/xenolinux-2.4.25-sparse/arch/xen/drivers/evtchn/evtchn.c	Fri Apr 02 14:03:03 2004 +0000
    67.3 @@ -40,35 +40,25 @@ static unsigned int ring_cons, ring_prod
    67.4  static DECLARE_WAIT_QUEUE_HEAD(evtchn_wait);
    67.5  static struct fasync_struct *evtchn_async_queue;
    67.6  
    67.7 -/*
    67.8 - * Pending normal notifications and pending exceptional notifications.
    67.9 - * 'Pending' means that we received an upcall but this is not yet ack'ed
   67.10 - * from userspace by writing to /dev/xen/evtchn.
   67.11 - */
   67.12 -static u32 pend_nrm[32], pend_exc[32];
   67.13 +/* Which ports is user-space bound to? */
   67.14 +static u32 bound_ports[32];
   67.15  
   67.16  static spinlock_t lock;
   67.17  
   67.18 -void evtchn_device_upcall(int port, int exception)
   67.19 +void evtchn_device_upcall(int port)
   67.20  {
   67.21      u16 port_subtype;
   67.22 +    shared_info_t *s = HYPERVISOR_shared_info;
   67.23  
   67.24      spin_lock(&lock);
   67.25  
   67.26      mask_evtchn(port);
   67.27 +    clear_evtchn(port);
   67.28  
   67.29 -    if ( likely(!exception) )
   67.30 -    {
   67.31 -        clear_evtchn(port);
   67.32 -        set_bit(port, &pend_nrm[0]);
   67.33 +    if ( likely(!synch_test_and_clear_bit(port, &s->evtchn_exception[0])) )
   67.34          port_subtype = PORT_NORMAL;
   67.35 -    }
   67.36      else
   67.37 -    {
   67.38 -        clear_evtchn_exception(port);
   67.39 -        set_bit(port, &pend_exc[0]);
   67.40          port_subtype = PORT_EXCEPTION;
   67.41 -    }
   67.42  
   67.43      if ( ring != NULL )
   67.44      {
   67.45 @@ -92,28 +82,8 @@ void evtchn_device_upcall(int port, int 
   67.46  
   67.47  static void __evtchn_reset_buffer_ring(void)
   67.48  {
   67.49 -    u32          m;
   67.50 -    unsigned int i, j;
   67.51 -
   67.52 -    /* Initialise the ring with currently outstanding notifications. */
   67.53 +    /* Initialise the ring to empty. Clear errors. */
   67.54      ring_cons = ring_prod = ring_overflow = 0;
   67.55 -
   67.56 -    for ( i = 0; i < 32; i++ )
   67.57 -    {
   67.58 -        m = pend_exc[i];
   67.59 -        while ( (j = ffs(m)) != 0 )
   67.60 -        {
   67.61 -            m &= ~(1 << --j);
   67.62 -            ring[ring_prod++] = (u16)(((i * 32) + j) | PORT_EXCEPTION);
   67.63 -        }
   67.64 -
   67.65 -        m = pend_nrm[i];
   67.66 -        while ( (j = ffs(m)) != 0 )
   67.67 -        {
   67.68 -            m &= ~(1 << --j);
   67.69 -            ring[ring_prod++] = (u16)(((i * 32) + j) | PORT_NORMAL);
   67.70 -        }
   67.71 -    }
   67.72  }
   67.73  
   67.74  static ssize_t evtchn_read(struct file *file, char *buf,
   67.75 @@ -232,11 +202,8 @@ static ssize_t evtchn_write(struct file 
   67.76  
   67.77      spin_lock_irq(&lock);
   67.78      for ( i = 0; i < (count/2); i++ )
   67.79 -    {
   67.80 -        clear_bit(kbuf[i]&PORTIDX_MASK, 
   67.81 -                  (kbuf[i]&PORT_EXCEPTION) ? &pend_exc[0] : &pend_nrm[0]);
   67.82 -        unmask_evtchn(kbuf[i]&PORTIDX_MASK);
   67.83 -    }
   67.84 +        if ( test_bit(kbuf[i], &bound_ports[0]) )
   67.85 +            unmask_evtchn(kbuf[i]);
   67.86      spin_unlock_irq(&lock);
   67.87  
   67.88      rc = count;
   67.89 @@ -249,14 +216,35 @@ static ssize_t evtchn_write(struct file 
   67.90  static int evtchn_ioctl(struct inode *inode, struct file *file,
   67.91                          unsigned int cmd, unsigned long arg)
   67.92  {
   67.93 -    if ( cmd != EVTCHN_RESET )
   67.94 -        return -EINVAL;
   67.95 +    int rc = 0;
   67.96 +    
   67.97 +    spin_lock_irq(&lock);
   67.98 +    
   67.99 +    switch ( cmd )
  67.100 +    {
  67.101 +    case EVTCHN_RESET:
  67.102 +        __evtchn_reset_buffer_ring();
  67.103 +        break;
  67.104 +    case EVTCHN_BIND:
  67.105 +        if ( !test_and_set_bit(arg, &bound_ports[0]) )
  67.106 +            unmask_evtchn(arg);
  67.107 +        else
  67.108 +            rc = -EINVAL;
  67.109 +        break;
  67.110 +    case EVTCHN_UNBIND:
  67.111 +        if ( test_and_clear_bit(arg, &bound_ports[0]) )
  67.112 +            mask_evtchn(arg);
  67.113 +        else
  67.114 +            rc = -EINVAL;
  67.115 +        break;
  67.116 +    default:
  67.117 +        rc = -ENOSYS;
  67.118 +        break;
  67.119 +    }
  67.120  
  67.121 -    spin_lock_irq(&lock);
  67.122 -    __evtchn_reset_buffer_ring();
  67.123      spin_unlock_irq(&lock);   
  67.124  
  67.125 -    return 0;
  67.126 +    return rc;
  67.127  }
  67.128  
  67.129  static unsigned int evtchn_poll(struct file *file, poll_table *wait)
  67.130 @@ -298,12 +286,17 @@ static int evtchn_open(struct inode *ino
  67.131  
  67.132  static int evtchn_release(struct inode *inode, struct file *filp)
  67.133  {
  67.134 +    int i;
  67.135 +
  67.136      spin_lock_irq(&lock);
  67.137      if ( ring != NULL )
  67.138      {
  67.139          free_page((unsigned long)ring);
  67.140          ring = NULL;
  67.141      }
  67.142 +    for ( i = 0; i < NR_EVENT_CHANNELS; i++ )
  67.143 +        if ( test_and_clear_bit(i, &bound_ports[0]) )
  67.144 +            mask_evtchn(i);
  67.145      spin_unlock_irq(&lock);
  67.146  
  67.147      evtchn_dev_inuse = 0;
    68.1 --- a/xenolinux-2.4.25-sparse/arch/xen/kernel/Makefile	Fri Apr 02 13:58:27 2004 +0000
    68.2 +++ b/xenolinux-2.4.25-sparse/arch/xen/kernel/Makefile	Fri Apr 02 14:03:03 2004 +0000
    68.3 @@ -13,7 +13,7 @@ obj-y	:= process.o semaphore.o signal.o 
    68.4  		i386_ksyms.o i387.o evtchn.o pci-dma.o
    68.5  
    68.6  ifdef CONFIG_PCI
    68.7 -obj-y	+= pci-i386.o pci-pc.o pci-irq.o
    68.8 +obj-y	+= pci-i386.o pci-pc.o
    68.9  endif
   68.10  
   68.11  include $(TOPDIR)/Rules.make
    69.1 --- a/xenolinux-2.4.25-sparse/arch/xen/kernel/entry.S	Fri Apr 02 13:58:27 2004 +0000
    69.2 +++ b/xenolinux-2.4.25-sparse/arch/xen/kernel/entry.S	Fri Apr 02 14:03:03 2004 +0000
    69.3 @@ -80,7 +80,7 @@ processor	= 52
    69.4  
    69.5  /* Offsets into shared_info_t. */
    69.6  #define evtchn_upcall_pending /* 0 */
    69.7 -#define evtchn_upcall_mask       4
    69.8 +#define evtchn_upcall_mask       1
    69.9  
   69.10  ENOSYS = 38
   69.11  
   69.12 @@ -210,18 +210,18 @@ ENTRY(system_call)
   69.13  	movl %eax,EAX(%esp)		# save the return value
   69.14  ENTRY(ret_from_sys_call)
   69.15          movl SYMBOL_NAME(HYPERVISOR_shared_info),%esi
   69.16 -        lock btsl $0,evtchn_upcall_mask(%esi) # make tests atomic
   69.17 +        movb $1,evtchn_upcall_mask(%esi) # make tests atomic
   69.18  ret_syscall_tests:
   69.19  	cmpl $0,need_resched(%ebx)
   69.20  	jne reschedule
   69.21  	cmpl $0,sigpending(%ebx)
   69.22  	je   safesti                    # ensure need_resched updates are seen
   69.23  signal_return:
   69.24 -	lock btrl $0,evtchn_upcall_mask(%esi) # reenable event callbacks
   69.25 +	movb $0,evtchn_upcall_mask(%esi) # reenable event callbacks
   69.26  	movl %esp,%eax
   69.27  	xorl %edx,%edx
   69.28  	call SYMBOL_NAME(do_signal)
   69.29 -	jmp  ret_from_sys_call
   69.30 +	jmp  safesti
   69.31  
   69.32  	ALIGN
   69.33  restore_all:
   69.34 @@ -254,9 +254,9 @@ ret_from_exception:
   69.35  
   69.36  	ALIGN
   69.37  reschedule:
   69.38 -        lock btrl $0,evtchn_upcall_mask(%esi)  # reenable event callbacks
   69.39 -	call SYMBOL_NAME(schedule)             # test
   69.40 -	jmp ret_from_sys_call
   69.41 +        movb $0,evtchn_upcall_mask(%esi)  # reenable event callbacks
   69.42 +	call SYMBOL_NAME(schedule)        # test
   69.43 +	jmp  ret_from_sys_call
   69.44  
   69.45  ENTRY(divide_error)
   69.46  	pushl $0		# no error code
   69.47 @@ -317,12 +317,12 @@ 11:     push %esp
   69.48          movb CS(%esp),%cl
   69.49  	test $2,%cl          # slow return to ring 2 or 3
   69.50  	jne  ret_syscall_tests
   69.51 -safesti:lock btrl $0,evtchn_upcall_mask(%esi) # reenable event callbacks
   69.52 +safesti:movb $0,evtchn_upcall_mask(%esi) # reenable event callbacks
   69.53  scrit:  /**** START OF CRITICAL REGION ****/
   69.54 -        testb $1,evtchn_upcall_pending(%esi)
   69.55 +        testb $0xFF,evtchn_upcall_pending(%esi)
   69.56          jnz  14f