ia64/xen-unstable

changeset 4808:487de0451d2b

bitkeeper revision 1.1389.1.36 (428075b7XUww-l4YTG96au0lbtOO_g)

Merge firebug.cl.cam.ac.uk:/auto/groups/xeno-xenod/BK/xen-unstable.bk
into firebug.cl.cam.ac.uk:/local/scratch/cl349/xen-unstable.bk
author cl349@firebug.cl.cam.ac.uk[cl349]
date Tue May 10 08:49:59 2005 +0000 (2005-05-10)
parents 9c9be1b0e3b1 f470118a979e
children 754aa05abf61
files .rootkeys BitKeeper/etc/ignore BitKeeper/etc/logging_ok docs/misc/sedf_scheduler_mini-HOWTO.txt freebsd-5.3-xen-sparse/i386-xen/i386-xen/clock.c freebsd-5.3-xen-sparse/i386-xen/i386-xen/evtchn.c freebsd-5.3-xen-sparse/i386-xen/i386-xen/machdep.c freebsd-5.3-xen-sparse/i386-xen/i386-xen/mp_machdep.c freebsd-5.3-xen-sparse/i386-xen/i386-xen/pmap.c freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_machdep.c freebsd-5.3-xen-sparse/i386-xen/include/hypervisor.h freebsd-5.3-xen-sparse/i386-xen/include/pcpu.h freebsd-5.3-xen-sparse/i386-xen/include/pmap.h freebsd-5.3-xen-sparse/i386-xen/include/xenfunc.h linux-2.4.29-xen-sparse/mkbuildtree linux-2.6.11-xen-sparse/arch/xen/Kconfig.drivers linux-2.6.11-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32 linux-2.6.11-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32 linux-2.6.11-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64 linux-2.6.11-xen-sparse/arch/xen/i386/Kconfig linux-2.6.11-xen-sparse/arch/xen/i386/kernel/Makefile linux-2.6.11-xen-sparse/arch/xen/i386/kernel/acpi/Makefile linux-2.6.11-xen-sparse/arch/xen/i386/kernel/acpi/boot.c linux-2.6.11-xen-sparse/arch/xen/i386/kernel/apic.c linux-2.6.11-xen-sparse/arch/xen/i386/kernel/io_apic.c linux-2.6.11-xen-sparse/arch/xen/i386/kernel/mpparse.c linux-2.6.11-xen-sparse/arch/xen/i386/kernel/setup.c linux-2.6.11-xen-sparse/arch/xen/i386/mm/init.c linux-2.6.11-xen-sparse/arch/xen/i386/mm/ioremap.c linux-2.6.11-xen-sparse/arch/xen/i386/mm/pgtable.c linux-2.6.11-xen-sparse/arch/xen/i386/pci/Makefile linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c linux-2.6.11-xen-sparse/arch/xen/kernel/reboot.c linux-2.6.11-xen-sparse/arch/xen/x86_64/mm/init.c linux-2.6.11-xen-sparse/arch/xen/x86_64/mm/ioremap.c linux-2.6.11-xen-sparse/arch/xen/x86_64/pci/Makefile linux-2.6.11-xen-sparse/drivers/acpi/tables.c linux-2.6.11-xen-sparse/drivers/xen/Makefile linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c linux-2.6.11-xen-sparse/drivers/xen/privcmd/privcmd.c linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/fixmap.h linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/io_apic.h linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/fixmap.h linux-2.6.11-xen-sparse/mkbuildtree tools/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/linux-xen-low.c tools/libxc/Makefile tools/libxc/xc.h tools/libxc/xc_domain.c tools/libxc/xc_physdev.c tools/libxc/xc_sedf.c tools/libxutil/sxpr_parser.h tools/python/xen/lowlevel/xc/xc.c tools/python/xen/xend/XendClient.py tools/python/xen/xend/XendDomain.py tools/python/xen/xend/server/SrvDomain.py tools/python/xen/xm/main.py xen/arch/ia64/domain.c xen/arch/ia64/idle0_task.c xen/arch/ia64/xenmisc.c xen/arch/ia64/xensetup.c xen/arch/x86/Makefile xen/arch/x86/Rules.mk xen/arch/x86/acpi.c xen/arch/x86/acpi/boot.c xen/arch/x86/apic.c xen/arch/x86/audit.c xen/arch/x86/dom0_ops.c xen/arch/x86/domain.c xen/arch/x86/domain_build.c xen/arch/x86/i387.c xen/arch/x86/i8259.c xen/arch/x86/idle0_task.c xen/arch/x86/io_apic.c xen/arch/x86/irq.c xen/arch/x86/microcode.c xen/arch/x86/mm.c xen/arch/x86/mpparse.c xen/arch/x86/mtrr/generic.c xen/arch/x86/mtrr/main.c xen/arch/x86/nmi.c xen/arch/x86/pci-irq.c xen/arch/x86/pci-pc.c xen/arch/x86/pci-x86.c xen/arch/x86/pci-x86.h xen/arch/x86/physdev.c xen/arch/x86/setup.c xen/arch/x86/shadow.c xen/arch/x86/smpboot.c xen/arch/x86/string.c xen/arch/x86/traps.c xen/arch/x86/vmx.c xen/arch/x86/x86_32/asm-offsets.c xen/arch/x86/x86_32/mm.c xen/arch/x86/x86_32/traps.c xen/arch/x86/x86_64/mm.c xen/common/Makefile xen/common/bitmap.c xen/common/dom0_ops.c xen/common/domain.c xen/common/event_channel.c xen/common/keyhandler.c xen/common/page_alloc.c xen/common/physdev.c xen/common/sched_bvt.c xen/common/sched_sedf.c xen/common/schedule.c xen/common/xmalloc.c xen/drivers/Makefile xen/drivers/acpi/Makefile xen/drivers/acpi/acpi_ksyms.c xen/drivers/acpi/tables.c xen/drivers/char/console.c xen/drivers/char/serial.c xen/drivers/pci/Makefile xen/drivers/pci/compat.c xen/drivers/pci/gen-devlist.c xen/drivers/pci/names.c xen/drivers/pci/pci.c xen/drivers/pci/pci.ids xen/drivers/pci/quirks.c xen/drivers/pci/setup-res.c xen/include/acpi/acconfig.h xen/include/acpi/acdebug.h xen/include/acpi/acdisasm.h xen/include/acpi/acdispat.h xen/include/acpi/acevents.h xen/include/acpi/acexcep.h xen/include/acpi/acglobal.h xen/include/acpi/achware.h xen/include/acpi/acinterp.h xen/include/acpi/aclocal.h xen/include/acpi/acmacros.h xen/include/acpi/acnamesp.h xen/include/acpi/acobject.h xen/include/acpi/acoutput.h xen/include/acpi/acparser.h xen/include/acpi/acpi.h xen/include/acpi/acpi_bus.h xen/include/acpi/acpi_drivers.h xen/include/acpi/acpiosxf.h xen/include/acpi/acpixf.h xen/include/acpi/acresrc.h xen/include/acpi/acstruct.h xen/include/acpi/actables.h xen/include/acpi/actbl.h xen/include/acpi/actbl1.h xen/include/acpi/actbl2.h xen/include/acpi/actbl71.h xen/include/acpi/actypes.h xen/include/acpi/acutils.h xen/include/acpi/amlcode.h xen/include/acpi/amlresrc.h xen/include/acpi/platform/acenv.h xen/include/acpi/platform/acgcc.h xen/include/acpi/platform/aclinux.h xen/include/asm-ia64/config.h xen/include/asm-ia64/serial.h xen/include/asm-x86/apic.h xen/include/asm-x86/apicdef.h xen/include/asm-x86/config.h xen/include/asm-x86/debugger.h xen/include/asm-x86/domain.h xen/include/asm-x86/fixmap.h xen/include/asm-x86/i387.h xen/include/asm-x86/io_apic.h xen/include/asm-x86/io_ports.h xen/include/asm-x86/irq.h xen/include/asm-x86/mach-default/apm.h xen/include/asm-x86/mach-default/bios_ebda.h xen/include/asm-x86/mach-default/do_timer.h xen/include/asm-x86/mach-default/entry_arch.h xen/include/asm-x86/mach-default/io_ports.h xen/include/asm-x86/mach-default/irq_vectors.h xen/include/asm-x86/mach-default/irq_vectors_limits.h xen/include/asm-x86/mach-default/mach_apic.h xen/include/asm-x86/mach-default/mach_apicdef.h xen/include/asm-x86/mach-default/mach_ipi.h xen/include/asm-x86/mach-default/mach_mpparse.h xen/include/asm-x86/mach-default/mach_mpspec.h xen/include/asm-x86/mach-default/mach_reboot.h xen/include/asm-x86/mach-default/mach_time.h xen/include/asm-x86/mach-default/mach_timer.h xen/include/asm-x86/mach-default/mach_traps.h xen/include/asm-x86/mach-default/mach_wakecpu.h xen/include/asm-x86/mach-default/pci-functions.h xen/include/asm-x86/mach-default/setup_arch_post.h xen/include/asm-x86/mach-default/setup_arch_pre.h xen/include/asm-x86/mach-default/smpboot_hooks.h xen/include/asm-x86/mach_apic.h xen/include/asm-x86/mm.h xen/include/asm-x86/mpspec.h xen/include/asm-x86/mpspec_def.h xen/include/asm-x86/page.h xen/include/asm-x86/physdev.h xen/include/asm-x86/processor.h xen/include/asm-x86/shadow.h xen/include/asm-x86/smp.h xen/include/asm-x86/smpboot.h xen/include/asm-x86/system.h xen/include/asm-x86/time.h xen/include/asm-x86/x86_32/domain_page.h xen/include/asm-x86/x86_64/string.h xen/include/public/dom0_ops.h xen/include/public/physdev.h xen/include/public/sched_ctl.h xen/include/xen/acpi.h xen/include/xen/adv_sched_hist.h xen/include/xen/bitmap.h xen/include/xen/bitops.h xen/include/xen/config.h xen/include/xen/cpumask.h xen/include/xen/event.h xen/include/xen/irq.h xen/include/xen/lib.h xen/include/xen/perfc_defn.h xen/include/xen/physdev.h xen/include/xen/sched-if.h xen/include/xen/sched.h xen/include/xen/serial.h xen/include/xen/slab.h xen/include/xen/smp.h xen/include/xen/spinlock.h xen/include/xen/types.h
line diff
     1.1 --- a/.rootkeys	Tue May 03 14:42:00 2005 +0000
     1.2 +++ b/.rootkeys	Tue May 10 08:49:59 2005 +0000
     1.3 @@ -21,6 +21,7 @@ 4022a73cgxX1ryj1HgS-IwwB6NUi2A docs/misc
     1.4  412f4bd9sm5mCQ8BkrgKcAKZGadq7Q docs/misc/blkif-drivers-explained.txt
     1.5  420b949cy9ZGzED74Fz_DaWlK7tT4g docs/misc/crashdb.txt
     1.6  4251a1f82AexscYEiF4Iku8Gc_kWfQ docs/misc/grant-tables.txt
     1.7 +424d462b5GuApQ_NyMsRFt9LbrsWow docs/misc/sedf_scheduler_mini-HOWTO.txt
     1.8  40d6ccbfKKBq8jE0ula4eHEzBiQuDA docs/misc/xen_config.html
     1.9  410a4c2bAO_m_l4RsiiPHnZ4ixHWbQ docs/misc/xend.tex
    1.10  3f9e7d564bWFB-Czjv1qdmE6o0GqNg docs/src/interface.tex
    1.11 @@ -353,7 +354,6 @@ 41979925z1MsKU1SfuuheM1IFDQ_bA linux-2.6
    1.12  4118b6a418gnL6AZsTdglC92YGqYTg linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/highmem.h
    1.13  42539fb5A9hsS3NFQ-2VY4y1TONZZQ linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/hypercall.h
    1.14  40f5623aJVXQwpJMOLE99XgvGsfQ8Q linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/io.h
    1.15 -42778a69MXZVxch4pQqYsMPS0WnNSg linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/io_apic.h
    1.16  40f5623aKXkBBxgpLx2NcvkncQ1Yyw linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mach-xen/irq_vectors.h
    1.17  40f5623aDMCsWOFO0jktZ4e8sjwvEg linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_post.h
    1.18  40f5623arsFXkGdPvIqvFi3yFXGR0Q linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_pre.h
    1.19 @@ -745,6 +745,7 @@ 3fbba6dctWRWlFJkYb6hdix2X4WMuw tools/lib
    1.20  3fbba6dcbVrG2hPzEzwdeV_UC8kydQ tools/libxc/xc_private.h
    1.21  42337174PxyzzPk62raDiYCIsfStDg tools/libxc/xc_ptrace.c
    1.22  4273458duzL--nsTfT6e_q6Kfij48g tools/libxc/xc_ptrace_core.c
    1.23 +41ebbfe9U0b0kI-HgjK7VEY4EvW7_w tools/libxc/xc_sedf.c
    1.24  41dde8b0pLfAKMs_L9Uri2hnzHiCRQ tools/libxc/xc_vmx_build.c
    1.25  40e1b09dMYB4ItGCqcMIzirdMd9I-w tools/libxutil/Makefile
    1.26  40e033325Sjqs-_4TuzeUEprP_gYFg tools/libxutil/allocate.c
    1.27 @@ -1188,7 +1189,7 @@ 421098b6lY2JzrV1oFDbrt7XQhtElg xen/arch/
    1.28  427664f5eygrc3nEhI3RKf0Y37PzyA xen/arch/ia64/xentime.c
    1.29  3ddb79bcZbRBzT3elFWSX7u6NtMagQ xen/arch/x86/Makefile
    1.30  3ddb79bcBQF85CfLS4i1WGZ4oLLaCA xen/arch/x86/Rules.mk
    1.31 -3e5636e5FAYZ5_vQnmgwFJfSdmO5Mw xen/arch/x86/acpi.c
    1.32 +3e5636e5FAYZ5_vQnmgwFJfSdmO5Mw xen/arch/x86/acpi/boot.c
    1.33  3ddb79bcsjinG9k1KcvbVBuas1R2dA xen/arch/x86/apic.c
    1.34  42360b3244-Q6BpEKhR_A1YtG1wPNQ xen/arch/x86/audit.c
    1.35  3ddb79c4yGZ7_22QAFFwPzqP4NSHwA xen/arch/x86/boot/mkelf32.c
    1.36 @@ -1218,10 +1219,7 @@ 41aaf567tqrKGSTDK8OVeAbpeoccPw xen/arch/
    1.37  41aaf567a36esU-rUK7twPiv-yTFyw xen/arch/x86/mtrr/mtrr.h
    1.38  41aaf567DcTL6pqVtLZJI5cSryyA1A xen/arch/x86/mtrr/state.c
    1.39  3f12cff65EV3qOG2j37Qm0ShgvXGRw xen/arch/x86/nmi.c
    1.40 -3ddb79bdHe6_Uij4-glW91vInNtBYQ xen/arch/x86/pci-irq.c
    1.41 -3ddb79bcZ_2FxINljqNSkqa17ISyJw xen/arch/x86/pci-pc.c
    1.42 -3ddb79bdeJ7_86z03yTAPIeeywOg3Q xen/arch/x86/pci-x86.c
    1.43 -3ddb79bdIKgipvGoqExEQ7jawfVowA xen/arch/x86/pci-x86.h
    1.44 +4051bcecFeq4DE70p4zGO5setf47CA xen/arch/x86/physdev.c
    1.45  3ddb79bc7KxGCEJsgBnkDX7XjD_ZEQ xen/arch/x86/rwlock.c
    1.46  3ddb79bcrD6Z_rUvSDgrvjyb4846Eg xen/arch/x86/setup.c
    1.47  405b8599xI_PoEr3zZoJ2on-jdn7iw xen/arch/x86/shadow.c
    1.48 @@ -1253,6 +1251,7 @@ 40e96d3akN3Hu_J5Bk-WXD8OGscrYQ xen/arch/
    1.49  422f27c8J9DQfCpegccMid59XhSmGA xen/arch/x86/x86_emulate.c
    1.50  3ddb79bdff-gj-jFGKjOejeHLqL8Lg xen/common/Makefile
    1.51  3e397e66AyyD5fYraAySWuwi9uqSXg xen/common/ac_timer.c
    1.52 +427fa2d0J0LU2s5oKbsM0nTZ2iyd2Q xen/common/bitmap.c
    1.53  3ddb79bdLX_P6iB7ILiblRLWvebapg xen/common/dom0_ops.c
    1.54  3e6377e4i0c9GtKN65e99OtRbw3AZw xen/common/dom_mem_ops.c
    1.55  3ddb79bdYO5D8Av12NHqPeSviav7cg xen/common/domain.c
    1.56 @@ -1265,9 +1264,9 @@ 3ddb79bduhSEZI8xa7IbGQCpap5y2A xen/commo
    1.57  41a61536SZbR6cj1ukWTb0DYU-vz9w xen/common/multicall.c
    1.58  3ddb79bdD4SLmmdMD7yLW5HcUWucXw xen/common/page_alloc.c
    1.59  3e54c38dkHAev597bPr71-hGzTdocg xen/common/perfc.c
    1.60 -4051bcecFeq4DE70p4zGO5setf47CA xen/common/physdev.c
    1.61  3ddb79bdHqdQpATqC0rmUZNbsb6L6A xen/common/resource.c
    1.62  40589968dD2D1aejwSOvrROg7fOvGQ xen/common/sched_bvt.c
    1.63 +41ebbfe9oF1BF3cH5v7yE3eOL9uPbA xen/common/sched_sedf.c
    1.64  3e397e6619PgAfBbw2XFbXkewvUWgw xen/common/schedule.c
    1.65  3ddb79bd0gVQYmL2zvuJnldvD0AGxQ xen/common/softirq.c
    1.66  3e7f358awXBC3Vw-wFRwPw18qL1khg xen/common/string.c
    1.67 @@ -1276,50 +1275,29 @@ 3ddb79bd3zgV33PHdt-cgh3sxcb1hw xen/commo
    1.68  4203fb92Qcy7mGpauBdq09J-WAqfoA xen/common/xmalloc.c
    1.69  3ddb79c0ppNeJtjC4va8j41ADCnchA xen/drivers/Makefile
    1.70  40715b2bi9gU43-cYzlmPDgreYQchw xen/drivers/acpi/Makefile
    1.71 -40715b2bDxNCz5LFV8FAXihmYJZFUQ xen/drivers/acpi/acpi_ksyms.c
    1.72  40715b2cNVOegtvyft_AHFKJYRprfA xen/drivers/acpi/tables.c
    1.73  3e4a8cb7alzQCDKS7MlioPoHBKYkdQ xen/drivers/char/Makefile
    1.74  4049e6bfNSIq7s7OV-Bd69QD0RpR2Q xen/drivers/char/console.c
    1.75  3e4a8cb7nMChlro4wvOBo76n__iCFA xen/drivers/char/serial.c
    1.76 -3ddb79beUWngyIhMHgyPtuTem4o4JA xen/drivers/pci/Makefile
    1.77 -3ddb79beU9td0Mnm0VUMklerBa37qQ xen/drivers/pci/compat.c
    1.78 -3ddb79beHkGQE58z5t5gyUCYiwOxvw xen/drivers/pci/gen-devlist.c
    1.79 -3ddb79bfoQcFKLf5P6wZlDl36alWdQ xen/drivers/pci/names.c
    1.80 -3ddb79bfyX7-pD6XdxY_mdNrJR20iw xen/drivers/pci/pci.c
    1.81 -3ddb79bf2AS7YBGwooE_Kbv7XgUqNQ xen/drivers/pci/pci.ids
    1.82 -3ddb79bf7sTn85WtP_8Nc2YEmmVExQ xen/drivers/pci/quirks.c
    1.83 -3ddb79bfJaf0bkE1Y67bnll8-kjEPg xen/drivers/pci/setup-res.c
    1.84  40715b2cFpte_UNWnBZW0Du7z9AhTQ xen/include/acpi/acconfig.h
    1.85 -40715b2cEQWX-PaxEH30qI48K1krnQ xen/include/acpi/acdebug.h
    1.86 -40715b2c_7j-oy3ZNAuqE3IFNPzArg xen/include/acpi/acdisasm.h
    1.87 -40715b2cFdcPx-2FHGM1Q-M-592xYQ xen/include/acpi/acdispat.h
    1.88 -40715b2cKqD_Lihnlmvnyl4AAl3EFw xen/include/acpi/acevents.h
    1.89  40715b2ctNvVZ058w8eM8DR9hOat_A xen/include/acpi/acexcep.h
    1.90  40715b2com8I01qcHcAw47e93XsCqQ xen/include/acpi/acglobal.h
    1.91  40715b2cS1t4uI3sMsu-c0M4qqAIrw xen/include/acpi/achware.h
    1.92 -40715b2chSz545A4Tq-y3WAtanzPMQ xen/include/acpi/acinterp.h
    1.93  40715b2cGf23lRI58NphiaDQl698-w xen/include/acpi/aclocal.h
    1.94  40715b2cdG7tCF2NMk0j1RCQQPzPXg xen/include/acpi/acmacros.h
    1.95 -40715b2ckP2XZZDkSTehu6riuMogDA xen/include/acpi/acnamesp.h
    1.96  40715b2c4AvHYn2-2YIyt3mx-Mm5tw xen/include/acpi/acobject.h
    1.97  40715b2cPUXsHzmchvXx7QHAfW0nMw xen/include/acpi/acoutput.h
    1.98 -40715b2cNbYpt9CHmCHg3RG7fBdACA xen/include/acpi/acparser.h
    1.99  40715b2cWM_6zR14U9Tp0s_q8D002A xen/include/acpi/acpi.h
   1.100  40715b2dcJDTiROgyMk9swD_veWktA xen/include/acpi/acpi_bus.h
   1.101  40715b2dRFlZK6apnH7WkUCBdyFXWA xen/include/acpi/acpi_drivers.h
   1.102  40715b2dtgZhNGAzlyBMe3kqve3mqw xen/include/acpi/acpiosxf.h
   1.103  40715b2dpW5TY7n5rzCufsDhJVWuMQ xen/include/acpi/acpixf.h
   1.104 -40715b2dp9-_D9LTjcvtf-Yopih5mQ xen/include/acpi/acresrc.h
   1.105  40715b2djvd97KbIpt4wyJgxwqCqmg xen/include/acpi/acstruct.h
   1.106 -40715b2dRIU9cQgmTjtCRNleIJDAYg xen/include/acpi/actables.h
   1.107  40715b2dy8ECRkSo9x0tRRueAjPx1g xen/include/acpi/actbl.h
   1.108  40715b2ds4J-XWn9Ix-lgBiJffNgxw xen/include/acpi/actbl1.h
   1.109  40715b2d_aMKMjKKNImJR4km52KRHA xen/include/acpi/actbl2.h
   1.110 -40715b2dxC03aemijgL-iDH_-BkKRw xen/include/acpi/actbl71.h
   1.111  40715b2d0oQUijKwEw6SDJ4LhD8c4g xen/include/acpi/actypes.h
   1.112  40715b2dBByvcAtRpN5mafyEYLcBWA xen/include/acpi/acutils.h
   1.113 -40715b2d_iPcZ0uUVGblPvjsHOwE5Q xen/include/acpi/amlcode.h
   1.114 -40715b2d3CdS6dIpZDTiCJRlDG3LCA xen/include/acpi/amlresrc.h
   1.115  40715b2dKRW7A71SNaeV6zfrEzYxPw xen/include/acpi/platform/acenv.h
   1.116  40715b2d8fYydJMcODFrV1ocLklGDg xen/include/acpi/platform/acgcc.h
   1.117  40715b2d1yZkqyAt0kgx2xEwsatuuA xen/include/acpi/platform/aclinux.h
   1.118 @@ -1369,18 +1347,39 @@ 3ddb79c3TMDjkxVndKFKnGiwY0HzDg xen/inclu
   1.119  4204e7acwXDo-5iAAiO2eQbtDeYZXA xen/include/asm-x86/init.h
   1.120  3ddb79c3fQ_O3o5NHK2N8AJdk0Ea4Q xen/include/asm-x86/io.h
   1.121  3ddb79c2TKeScYHQZreTdHqYNLbehQ xen/include/asm-x86/io_apic.h
   1.122 -42605109qxLSrHVE2SRhGXmgk907iw xen/include/asm-x86/io_ports.h
   1.123  3ddb79c2L7rTlFzazOLW1XuSZefpFw xen/include/asm-x86/irq.h
   1.124  404f1b93OjLO4bFfBXYNaJdIqlNz-Q xen/include/asm-x86/ldt.h
   1.125 -4260510aYPj2kr6rMbBfMxcvvmXndQ xen/include/asm-x86/mach_apic.h
   1.126 +427fa2d0m8MOSSXT13zgb-q0fGA_Dw xen/include/asm-x86/mach-default/apm.h
   1.127 +427fa2d0suK9Av7vsAXhsQxZjqpc_Q xen/include/asm-x86/mach-default/bios_ebda.h
   1.128 +427fa2d0yC3KzLozoeK3Xa3uGVfIdw xen/include/asm-x86/mach-default/do_timer.h
   1.129 +427fa2d0bWQkR1mW5OBYxn07AN-bDw xen/include/asm-x86/mach-default/entry_arch.h
   1.130 +427fa2d0-SWcuwbdSypo4953bc2JdQ xen/include/asm-x86/mach-default/io_ports.h
   1.131 +427fa2d0eyAl7LAeO-SVV4IW7lZPGQ xen/include/asm-x86/mach-default/irq_vectors.h
   1.132 +427fa2d0df7VWG4KKpnKbKR2Cbd1_w xen/include/asm-x86/mach-default/irq_vectors_limits.h
   1.133 +4260510aYPj2kr6rMbBfMxcvvmXndQ xen/include/asm-x86/mach-default/mach_apic.h
   1.134 +427fa2d0I3FWjE2tWdOhlEOJn7stcg xen/include/asm-x86/mach-default/mach_apicdef.h
   1.135 +427fa2d093fDS2gOBLcl7Yndzl7HmA xen/include/asm-x86/mach-default/mach_ipi.h
   1.136 +427fa2d0Y7bD35d-FvDAeiJDIdRw2A xen/include/asm-x86/mach-default/mach_mpparse.h
   1.137 +427fa2d0aLQgE9e1GY9ZP5jrMOC8pQ xen/include/asm-x86/mach-default/mach_mpspec.h
   1.138 +427fa2d0fJ5nNn5ydJuOaZIL6F2fjQ xen/include/asm-x86/mach-default/mach_reboot.h
   1.139 +427fa2d0VlN555TE68TjKMsrOoFXNA xen/include/asm-x86/mach-default/mach_time.h
   1.140 +427fa2d0C0jWTKYjy7WJjGKeujSpSg xen/include/asm-x86/mach-default/mach_timer.h
   1.141 +427fa2d0UXLiS1scpNrK26ZT6Oes3g xen/include/asm-x86/mach-default/mach_traps.h
   1.142 +427fa2d0OfglYyfpDTD5DII4M0uZRw xen/include/asm-x86/mach-default/mach_wakecpu.h
   1.143 +427fa2d0_OBPxdi5Qo04JWgZhz7BFA xen/include/asm-x86/mach-default/pci-functions.h
   1.144 +427fa2d0mrTtXrliqDfLuJc5LLVXaA xen/include/asm-x86/mach-default/setup_arch_post.h
   1.145 +427fa2d0Uoo7gC61Kep6Yy7Os367Hg xen/include/asm-x86/mach-default/setup_arch_pre.h
   1.146 +427fa2d1EKnA8zCq2QLHiGOUqOgszg xen/include/asm-x86/mach-default/smpboot_hooks.h
   1.147  3ddb79c3I98vWcQR8xEo34JMJ4Ahyw xen/include/asm-x86/mc146818rtc.h
   1.148  40ec25fd7cSvbP7Biw91zaU_g0xsEQ xen/include/asm-x86/mm.h
   1.149  3ddb79c3n_UbPuxlkNxvvLycClIkxA xen/include/asm-x86/mpspec.h
   1.150 +427fa2d1eJRenftJJnRyLsHKl1ghtA xen/include/asm-x86/mpspec_def.h
   1.151  3ddb79c2wa0dA_LGigxOelSGbJ284Q xen/include/asm-x86/msr.h
   1.152  41aaf567Mi3OishhvrCtET1y-mxQBg xen/include/asm-x86/mtrr.h
   1.153  41a61536MFhNalgbVmYGXAhQsPTZNw xen/include/asm-x86/multicall.h
   1.154  3ddb79c3xjYnrv5t3VqYlR4tNEOl4Q xen/include/asm-x86/page.h
   1.155  3ddb79c3ysKUbxZuwKBRK3WXU2TlEg xen/include/asm-x86/pci.h
   1.156 +42422fb0FVX-TJkSvAXnbfwMf19XFA xen/include/asm-x86/physdev.h
   1.157  3ddb79c2QF5-pZGzuX4QukPCDAl59A xen/include/asm-x86/processor.h
   1.158  40cf1596bim9F9DNdV75klgRSZ6Y2A xen/include/asm-x86/regs.h
   1.159  3ddb79c2plf7ciNgoNjU-RsbUzawsw xen/include/asm-x86/rwlock.h
   1.160 @@ -1436,11 +1435,15 @@ 4266bd01Ul-pC01ZVvBkhBnv5eqzvw xen/inclu
   1.161  3ddb79c25UE59iu4JJcbRalx95mvcg xen/include/public/xen.h
   1.162  3e397e66m2tO3s-J8Jnr7Ws_tGoPTg xen/include/xen/ac_timer.h
   1.163  40715b2epYl2jBbxzz9CI2rgIca7Zg xen/include/xen/acpi.h
   1.164 +422f0995xCgnbsVhTjSncnqIABs64g xen/include/xen/adv_sched_hist.h
   1.165 +427fa2d1wyoVbvCyZRLposYjA_D_4g xen/include/xen/bitmap.h
   1.166 +427fa2d1ItcC_yWuBUkhc7adedP5ow xen/include/xen/bitops.h
   1.167  3ddb79c0c0cX_DZE209-Bb-Rx1v-Aw xen/include/xen/cache.h
   1.168  41f2cea7Yna7xc0X9fyavIjoSFFeVg xen/include/xen/compile.h.in
   1.169  3f840f12CkbYSlwMrY2S11Mpyxg7Nw xen/include/xen/compiler.h
   1.170  3ddb79c259jh8hE7vre_8NuE7nwNSA xen/include/xen/config.h
   1.171  3eb165e0eawr3R-p2ZQtSdLWtLRN_A xen/include/xen/console.h
   1.172 +427fa2d1bQCWgEQqTTh5MjG4MPEH9g xen/include/xen/cpumask.h
   1.173  3ddb79c1V44RD26YqCUm-kqIupM37A xen/include/xen/ctype.h
   1.174  3ddb79c05DdHQ0UxX_jKsXdR4QlMCA xen/include/xen/delay.h
   1.175  40f2b4a2hC3HtChu-ArD8LyojxWMjg xen/include/xen/domain.h
   1.176 @@ -1463,7 +1466,6 @@ 3ddb79c2Fg44_PBPVxHSC0gTOMq4Ow xen/inclu
   1.177  3ddb79c0MOVXq8qZDQRGb6z64_xAwg xen/include/xen/pci_ids.h
   1.178  3e54c38dlSCVdyVM4PKcrSfzLLxWUQ xen/include/xen/perfc.h
   1.179  3e54c38de9SUSYSAwxDf_DwkpAnQFA xen/include/xen/perfc_defn.h
   1.180 -42422fb0FVX-TJkSvAXnbfwMf19XFA xen/include/xen/physdev.h
   1.181  3ddb79c04nQVR3EYM5L4zxDV_MCo1g xen/include/xen/prefetch.h
   1.182  3e4540ccU1sgCx8seIMGlahmMfv7yQ xen/include/xen/reboot.h
   1.183  40589969nPq3DMzv24RDb5LXE9brHw xen/include/xen/sched-if.h
     2.1 --- a/BitKeeper/etc/logging_ok	Tue May 03 14:42:00 2005 +0000
     2.2 +++ b/BitKeeper/etc/logging_ok	Tue May 10 08:49:59 2005 +0000
     2.3 @@ -60,6 +60,7 @@ kmacy@shemp.lab.netapp.com
     2.4  kraxel@bytesex.org
     2.5  laudney@eclipse.(none)
     2.6  leendert@watson.ibm.com
     2.7 +lists-xen@pimb.org
     2.8  lynx@idefix.cl.cam.ac.uk
     2.9  maf46@burn.cl.cam.ac.uk
    2.10  mafetter@fleming.research
    2.11 @@ -99,3 +100,4 @@ vh249@airwolf.cl.cam.ac.uk
    2.12  vh249@arcadians.cl.cam.ac.uk
    2.13  xen-ia64.adm@bkbits.net
    2.14  xenbk@gandalf.hpl.hp.com
    2.15 +ydroneaud@mandriva.com
     3.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.2 +++ b/docs/misc/sedf_scheduler_mini-HOWTO.txt	Tue May 10 08:49:59 2005 +0000
     3.3 @@ -0,0 +1,44 @@
     3.4 +sEDF scheduler
     3.5 +--------------
     3.6 +Author:
     3.7 +   Stephan.Diestelhorst@{cl.cam.ac.uk, inf.tu-dresden.de}
     3.8 +   
     3.9 +Overview:
    3.10 +  This scheduler provides weighted CPU sharing in an intuitive way and
    3.11 +  uses realtime-algorithms to ensure time guarantees.
    3.12 +
    3.13 +Usage:
    3.14 +   -add "sched=sedf" on Xen's boot command-line
    3.15 +   -create domains as usual
    3.16 +   -use "xm sedf <dom-id> <period> <slice> <latency-hint> <extra> <weight>"
    3.17 +    Where:
    3.18 +      -period/slice are the normal EDF scheduling parameters in nanosecs
    3.19 +      -latency-hint is the scaled period in case the domain is doing heavy I/O
    3.20 +         (unused by the currently compiled version)
    3.21 +      -extra is a flag (0/1), which controls whether the domain can run in
    3.22 +       extra-time
    3.23 +      -weight is mutually exclusive with period/slice and specifies another
    3.24 +       way of setting a domains cpu slice
    3.25 +
    3.26 +Examples:
    3.27 + normal EDF (20ms/5ms):
    3.28 +  xm sedf <dom-id> 20000000 5000000 0 0 0
    3.29 +  
    3.30 + best-effort domains (i.e. non-realtime):
    3.31 +  xm sedf <dom-id> 20000000 0 0 1 0
    3.32     3.33 + normal EDF (20ms/5ms) + share of extra-time:
    3.34 +  xm sedf <dom-id> 20000000 5000000 0 1 0
    3.35 +  
    3.36 + 4 domains with weights 2:3:4:2
    3.37 +  xm sedf <d1> 0 0 0 0 2
    3.38 +  xm sedf <d2> 0 0 0 0 3
    3.39 +  xm sedf <d3> 0 0 0 0 4
    3.40 +  xm sedf <d4> 0 0 0 0 2
    3.41 +  
    3.42 + 1 fully-specified (10ms/3ms) domain, 3 other domains share
    3.43 + available rest in 2:7:3 ratio:
    3.44 +  xm sedf <d1> 10000000 3000000 0 0 0
    3.45 +  xm sedf <d2> 0 0 0 0 2
    3.46 +  xm sedf <d3> 0 0 0 0 7
    3.47 +  xm sedf <d4> 0 0 0 0 3
    3.48 \ No newline at end of file
     4.1 --- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/clock.c	Tue May 03 14:42:00 2005 +0000
     4.2 +++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/clock.c	Tue May 10 08:49:59 2005 +0000
     4.3 @@ -87,6 +87,12 @@
     4.4  
     4.5  /* XEN specific defines */
     4.6  #include <machine/xen_intr.h>
     4.7 +#include <vm/vm.h>   /* needed by machine/pmap.h */
     4.8 +#include <vm/pmap.h> /* needed by machine/pmap.h */
     4.9 +#include <machine/pmap.h> /* needed by xen-os.h */
    4.10 +#include <machine/hypervisor-ifs.h>
    4.11 +#include <machine/xen-os.h> /* needed by xenfunc.h */
    4.12 +#include <machine/xenfunc.h>
    4.13  
    4.14  /*
    4.15   * 32-bit time_t's can't reach leap years before 1904 or after 2036, so we
    4.16 @@ -129,7 +135,15 @@ static uint64_t shadow_system_time;
    4.17  static uint32_t shadow_time_version;
    4.18  static struct timeval shadow_tv;
    4.19  
    4.20 +#define DEFINE_PER_CPU(type, name) \
    4.21 +    __typeof__(type) per_cpu__##name
    4.22 +
    4.23 +#define per_cpu(var, cpu)           (*((void)cpu, &per_cpu__##var))
    4.24 +
    4.25 +
    4.26  static uint64_t processed_system_time;/* System time (ns) at last processing. */
    4.27 +static DEFINE_PER_CPU(uint64_t, processed_system_time);
    4.28 +
    4.29  
    4.30  #define NS_PER_TICK (1000000000ULL/hz)
    4.31  
    4.32 @@ -202,18 +216,19 @@ static struct timecounter xen_timecounte
    4.33  static void 
    4.34  clkintr(struct clockframe *frame)
    4.35  {
    4.36 -    int64_t delta;
    4.37 +    int64_t cpu_delta, delta;
    4.38 +    int cpu = smp_processor_id();
    4.39      long ticks = 0;
    4.40  
    4.41 -
    4.42      do {
    4.43      	__get_time_values_from_xen();
    4.44 -    	delta = (int64_t)(shadow_system_time + 
    4.45 -			  xen_get_offset() * 1000 - 
    4.46 -			  processed_system_time);
    4.47 +    	delta = cpu_delta = (int64_t)shadow_system_time + 
    4.48 +		(int64_t)xen_get_offset() * 1000;
    4.49 +	delta -= processed_system_time;
    4.50 +	cpu_delta -= per_cpu(processed_system_time, cpu);
    4.51      } while (!TIME_VALUES_UP_TO_DATE);
    4.52  
    4.53 -    if (unlikely(delta < 0)) {
    4.54 +    if (unlikely(delta < 0) || unlikely(cpu_delta < 0)) {
    4.55          printk("Timer ISR: Time went backwards: %lld\n", delta);
    4.56          return;
    4.57      }
    4.58 @@ -225,15 +240,28 @@ clkintr(struct clockframe *frame)
    4.59          delta -= NS_PER_TICK;
    4.60          processed_system_time += NS_PER_TICK;
    4.61      }
    4.62 -
    4.63 -    if (ticks > 0) {
    4.64 -	if (frame)
    4.65 -		timer_func(frame);
    4.66 -#ifdef SMP
    4.67 -	if (timer_func == hardclock && frame)
    4.68 -		forward_hardclock();
    4.69 +    /* Local CPU jiffy work. */
    4.70 +    while (cpu_delta >= NS_PER_TICK) {
    4.71 +	    cpu_delta -= NS_PER_TICK;
    4.72 +	    per_cpu(processed_system_time, cpu) += NS_PER_TICK;
    4.73 +#if 0
    4.74 +	    update_process_times(user_mode(regs));
    4.75 +	    profile_tick(CPU_PROFILING, regs);
    4.76  #endif
    4.77      }
    4.78 +    if (ticks > 0) {
    4.79 +	if (frame) timer_func(frame);
    4.80 +    }
    4.81 +    
    4.82 +    if (cpu != 0)
    4.83 +	    return;
    4.84 +    /*
    4.85 +     * Take synchronised time from Xen once a minute if we're not
    4.86 +     * synchronised ourselves, and we haven't chosen to keep an independent
    4.87 +     * time base.
    4.88 +     */
    4.89 +    
    4.90 +    /* XXX TODO */
    4.91  }
    4.92  
    4.93  #include "opt_ddb.h"
    4.94 @@ -429,7 +457,7 @@ resettodr()
    4.95   * Start clocks running.
    4.96   */
    4.97  void
    4.98 -cpu_initclocks()
    4.99 +cpu_initclocks(void)
   4.100  {
   4.101  	int diag;
   4.102  	int time_irq = bind_virq_to_irq(VIRQ_TIMER);
   4.103 @@ -445,8 +473,26 @@ cpu_initclocks()
   4.104  	/* initialize xen values */
   4.105  	__get_time_values_from_xen();
   4.106  	processed_system_time = shadow_system_time;
   4.107 +	per_cpu(processed_system_time, 0) = processed_system_time;
   4.108 +
   4.109  }
   4.110  
   4.111 +#ifdef SMP 
   4.112 +void
   4.113 +ap_cpu_initclocks(void)
   4.114 +{
   4.115 +	int irq;
   4.116 +	int cpu = smp_processor_id();
   4.117 +
   4.118 +	per_cpu(processed_system_time, cpu) = shadow_system_time;
   4.119 +	
   4.120 +	irq = bind_virq_to_irq(VIRQ_TIMER);
   4.121 +	PCPU_SET(time_irq, irq);
   4.122 +	PANIC_IF(intr_add_handler("clk", irq, (driver_intr_t *)clkintr, 
   4.123 +				  NULL, INTR_TYPE_CLK | INTR_FAST, NULL));
   4.124 +}
   4.125 +#endif
   4.126 +
   4.127  void
   4.128  cpu_startprofclock(void)
   4.129  {
     5.1 --- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/evtchn.c	Tue May 03 14:42:00 2005 +0000
     5.2 +++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/evtchn.c	Tue May 10 08:49:59 2005 +0000
     5.3 @@ -79,9 +79,14 @@ evtchn_do_upcall(struct intrframe *frame
     5.4                  l2 &= ~(1 << l2i);
     5.5              
     5.6                  port = (l1i << 5) + l2i;
     5.7 +		irq = evtchn_to_irq[port];
     5.8 +#ifdef SMP		
     5.9 +		if (irq == PCPU_GET(cpuast)) 
    5.10 +			continue;
    5.11 +#endif
    5.12                  if ( (owned = mtx_owned(&sched_lock)) != 0 )
    5.13                      mtx_unlock_spin_flags(&sched_lock, MTX_QUIET);
    5.14 -                if ( (irq = evtchn_to_irq[port]) != -1 ) {
    5.15 +                if ( irq != -1 ) {
    5.16  		    struct intsrc *isrc = intr_lookup_source(irq);
    5.17  		    intr_execute_handlers(isrc, frame);
    5.18  		} else {
    5.19 @@ -584,6 +589,7 @@ ap_evtchn_init(int cpu)
    5.20          PCPU_GET(virq_to_irq)[i] = -1;
    5.21  }
    5.22  
    5.23 +
    5.24  static void 
    5.25  evtchn_init(void *dummy __unused)
    5.26  {
    5.27 @@ -591,13 +597,6 @@ evtchn_init(void *dummy __unused)
    5.28      struct xenpic *xp;
    5.29      struct xenpic_intsrc *pin;
    5.30  
    5.31 -    /*
    5.32 -     * xenpic_lock: in order to allow an interrupt to occur in a critical
    5.33 -     * 	        section, to set pcpu->ipending (etc...) properly, we
    5.34 -     *	        must be able to get the icu lock, so it can't be
    5.35 -     *	        under witness.
    5.36 -     */
    5.37 -    mtx_init(&irq_mapping_update_lock, "xp", NULL, MTX_DEF);
    5.38  
    5.39      /* XXX -- expedience hack */
    5.40      PCPU_SET(virq_to_irq, (int *)&virq_to_irq[0]);
    5.41 @@ -657,3 +656,11 @@ evtchn_init(void *dummy __unused)
    5.42  }
    5.43  
    5.44  SYSINIT(evtchn_init, SI_SUB_INTR, SI_ORDER_ANY, evtchn_init, NULL);
    5.45 +    /*
    5.46 +     * xenpic_lock: in order to allow an interrupt to occur in a critical
    5.47 +     * 	        section, to set pcpu->ipending (etc...) properly, we
    5.48 +     *	        must be able to get the icu lock, so it can't be
    5.49 +     *	        under witness.
    5.50 +     */
    5.51 +
    5.52 +MTX_SYSINIT(irq_mapping_update_lock, &irq_mapping_update_lock, "xp", MTX_DEF|MTX_NOWITNESS);
     6.1 --- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/machdep.c	Tue May 03 14:42:00 2005 +0000
     6.2 +++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/machdep.c	Tue May 10 08:49:59 2005 +0000
     6.3 @@ -78,6 +78,7 @@
     6.4  #include <sys/sched.h>
     6.5  #include <sys/sysent.h>
     6.6  #include <sys/sysctl.h>
     6.7 +#include <sys/smp.h>
     6.8  #include <sys/ucontext.h>
     6.9  #include <sys/vmmeter.h>
    6.10  #include <sys/bus.h>
    6.11 @@ -883,14 +884,6 @@ SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_
    6.12  static void
    6.13  cpu_idle_default(void)
    6.14  {
    6.15 -#if 0
    6.16 -	/*
    6.17 -	 * we must absolutely guarentee that hlt is the
    6.18 -	 * absolute next instruction after sti or we
    6.19 -	 * introduce a timing window.
    6.20 -	 */
    6.21 -	__asm __volatile("sti; hlt");
    6.22 -#endif
    6.23  	idle_block();
    6.24  	enable_intr();
    6.25  }
    6.26 @@ -1376,6 +1369,7 @@ pteinfo_t *pteinfo_list;
    6.27  unsigned long *xen_machine_phys = ((unsigned long *)VADDR(1008, 0));
    6.28  int preemptable;
    6.29  int gdt_set;
    6.30 +static int ncpus;
    6.31  
    6.32  /* Linux infection */
    6.33  #define PAGE_OFFSET  KERNBASE
    6.34 @@ -1387,6 +1381,10 @@ initvalues(start_info_t *startinfo)
    6.35      int i;
    6.36      vm_paddr_t pdir_shadow_ma, KPTphys;
    6.37      vm_offset_t *pdir_shadow;
    6.38 +#ifdef SMP
    6.39 +    int j;
    6.40 +#endif
    6.41 +
    6.42  #ifdef WRITABLE_PAGETABLES
    6.43      printk("using writable pagetables\n");
    6.44      HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables);
    6.45 @@ -1447,18 +1445,19 @@ initvalues(start_info_t *startinfo)
    6.46  
    6.47  
    6.48  #ifdef SMP
    6.49 +#if 0
    6.50      /* allocate cpu0 private page */
    6.51      cpu0prvpage = (KERNBASE + (tmpindex << PAGE_SHIFT));
    6.52      tmpindex++; 
    6.53 -
    6.54 +#endif
    6.55      /* allocate SMP page table */
    6.56      SMPpt = (unsigned long *)(KERNBASE + (tmpindex << PAGE_SHIFT));
    6.57 -
    6.58 +#if 0
    6.59      /* Map the private page into the SMP page table */
    6.60      SMPpt[0] = vtomach(cpu0prvpage) | PG_RW | PG_M | PG_V | PG_A;
    6.61 -
    6.62 +#endif
    6.63      /* map SMP page table RO */
    6.64 -    PT_SET_MA(SMPpt, vtomach(SMPpt) & ~PG_RW);
    6.65 +    PT_SET_MA(SMPpt, *vtopte((vm_offset_t)SMPpt) & ~PG_RW);
    6.66  
    6.67      /* put the page table into the page directory */
    6.68      xen_queue_pt_update((vm_paddr_t)(IdlePTD + MPPTDI), 
    6.69 @@ -1496,44 +1495,61 @@ initvalues(start_info_t *startinfo)
    6.70      tmpindex++;
    6.71  
    6.72      HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list = (unsigned long)xen_phys_machine;
    6.73 +    ncpus = HYPERVISOR_shared_info->n_vcpu; 
    6.74 +#ifdef SMP
    6.75 +    for (i = 0; i < ncpus; i++) {
    6.76 +	    int npages = (sizeof(struct privatespace) + 1)/PAGE_SIZE;
    6.77 +	    for (j = 0; j < npages; j++) {
    6.78 +		    vm_paddr_t ma = xpmap_ptom(tmpindex << PAGE_SHIFT);
    6.79 +		    tmpindex++;
    6.80 +		    PT_SET_VA_MA(SMPpt + i*npages + j, ma | PG_A | PG_V | PG_RW | PG_M, FALSE);
    6.81 +	    }
    6.82 +    }
    6.83 +    xen_flush_queue();
    6.84 +#endif
    6.85      
    6.86      init_first = tmpindex;
    6.87      
    6.88  }
    6.89  
    6.90 +
    6.91 +trap_info_t trap_table[] = {
    6.92 +	{ 0,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(div)},
    6.93 +	{ 1,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dbg)},
    6.94 +	{ 3,   3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bpt)},
    6.95 +	{ 4,   3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ofl)},
    6.96 +	/* This is UPL on Linux and KPL on BSD */
    6.97 +	{ 5,   3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bnd)},
    6.98 +	{ 6,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ill)},
    6.99 +	{ 7,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dna)},
   6.100 +	/*
   6.101 +	 * { 8,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(XXX)},
   6.102 +	 *   no handler for double fault
   6.103 +	 */
   6.104 +	{ 9,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpusegm)},
   6.105 +	{10,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(tss)},
   6.106 +	{11,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(missing)},
   6.107 +	{12,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(stk)},
   6.108 +	{13,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(prot)},
   6.109 +	{14,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(page)},
   6.110 +	{15,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(rsvd)},
   6.111 +	{16,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpu)},
   6.112 +	{17,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(align)},
   6.113 +	{18,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(mchk)},
   6.114 +	{19,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(xmm)},
   6.115 +	{0x80, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(int0x80_syscall)},
   6.116 +	{  0, 0,           0, 0 }
   6.117 +};
   6.118 +
   6.119  void
   6.120  init386(void)
   6.121  {
   6.122  	int gsel_tss, metadata_missing, off, x, error;
   6.123  	struct pcpu *pc;
   6.124  	unsigned long gdtmachpfn;
   6.125 -	trap_info_t trap_table[] = {
   6.126 -	    { 0,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(div)},
   6.127 -	    { 1,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dbg)},
   6.128 -	    { 3,   3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bpt)},
   6.129 -	    { 4,   3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ofl)},
   6.130 -	    /* This is UPL on Linux and KPL on BSD */
   6.131 -	    { 5,   3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bnd)},
   6.132 -	    { 6,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ill)},
   6.133 -	    { 7,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dna)},
   6.134 -	    /*
   6.135 -	     * { 8,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(XXX)},
   6.136 -	     *   no handler for double fault
   6.137 -	     */
   6.138 -	    { 9,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpusegm)},
   6.139 -	    {10,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(tss)},
   6.140 -	    {11,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(missing)},
   6.141 -	    {12,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(stk)},
   6.142 -	    {13,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(prot)},
   6.143 -	    {14,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(page)},
   6.144 -	    {15,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(rsvd)},
   6.145 -	    {16,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpu)},
   6.146 -	    {17,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(align)},
   6.147 -	    {18,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(mchk)},
   6.148 -	    {19,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(xmm)},
   6.149 -	    {0x80, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(int0x80_syscall)},
   6.150 -	    {  0, 0,           0, 0 }
   6.151 -        };
   6.152 +#ifdef SMP
   6.153 +	int i;
   6.154 +#endif
   6.155  	proc0.p_uarea = proc0uarea;
   6.156  	thread0.td_kstack = proc0kstack;
   6.157  	thread0.td_pcb = (struct pcb *)
   6.158 @@ -1583,26 +1599,42 @@ init386(void)
   6.159  	gdt_segs[GDATA_SEL].ssd_limit = atop(0 - ((1 << 26) - (1 << 22) + (1 << 16))); 
   6.160  #endif
   6.161  #ifdef SMP
   6.162 -	/* this correspond to the cpu private page as mapped into the SMP page 
   6.163 -	 * table in initvalues
   6.164 +	/* XXX this will blow up if there are more than 512/NGDT vcpus - will never 
   6.165 +	 * be an issue in the real world but should add an assert on general principles
   6.166 +	 * we'll likely blow up when we hit LAST_RESERVED_GDT_ENTRY, at which point we
   6.167 +	 * would need to start allocating more pages for the GDT
   6.168  	 */
   6.169  	pc = &SMP_prvspace[0].pcpu;
   6.170 -	gdt_segs[GPRIV_SEL].ssd_limit =
   6.171 -		atop(sizeof(struct privatespace) - 1);
   6.172 +	for (i = 0; i < ncpus; i++) {
   6.173 +		cpu_add(i, (i == 0));
   6.174 +
   6.175 +		gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[i];
   6.176 +		gdt_segs[GPRIV_SEL].ssd_limit =
   6.177 +			atop(sizeof(struct privatespace) - 1);
   6.178 +		gdt_segs[GPROC0_SEL].ssd_base =
   6.179 +			(int) &SMP_prvspace[i].pcpu.pc_common_tss;
   6.180 +		SMP_prvspace[i].pcpu.pc_prvspace =
   6.181 +			&SMP_prvspace[i].pcpu;
   6.182 +		
   6.183 +		for (x = 0; x < NGDT; x++) {
   6.184 +			ssdtosd(&gdt_segs[x], &gdt[i * NGDT + x].sd);
   6.185 +		}
   6.186 +	}
   6.187  #else
   6.188  	pc = &__pcpu;
   6.189  	gdt_segs[GPRIV_SEL].ssd_limit =
   6.190  		atop(sizeof(struct pcpu) - 1);
   6.191 -#endif
   6.192  	gdt_segs[GPRIV_SEL].ssd_base = (int) pc;
   6.193  	gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss;
   6.194  	for (x = 0; x < NGDT; x++)
   6.195  	    ssdtosd(&gdt_segs[x], &gdt[x].sd);
   6.196 +#endif
   6.197 +
   6.198  
   6.199  	PT_SET_MA(gdt, *vtopte((unsigned long)gdt) & ~PG_RW);
   6.200  	gdtmachpfn = vtomach(gdt) >> PAGE_SHIFT;
   6.201 -	if ((error = HYPERVISOR_set_gdt(&gdtmachpfn, LAST_RESERVED_GDT_ENTRY + 1))) 
   6.202 -	    panic("set_gdt failed");
   6.203 +	PANIC_IF(HYPERVISOR_set_gdt(&gdtmachpfn, LAST_RESERVED_GDT_ENTRY + 1) != 0);
   6.204 +
   6.205  	
   6.206  	lgdt_finish();
   6.207  	gdt_set = 1;
     7.1 --- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/mp_machdep.c	Tue May 03 14:42:00 2005 +0000
     7.2 +++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/mp_machdep.c	Tue May 10 08:49:59 2005 +0000
     7.3 @@ -83,7 +83,16 @@
     7.4  #include <machine/specialreg.h>
     7.5  #include <machine/privatespace.h>
     7.6  
     7.7 +
     7.8 +/* XEN includes */
     7.9  #include <machine/xenfunc.h>
    7.10 +#include <machine/xen_intr.h>
    7.11 +
    7.12 +void Xhypervisor_callback(void);
    7.13 +void failsafe_callback(void);
    7.14 +
    7.15 +/***************/
    7.16 +
    7.17  
    7.18  #define WARMBOOT_TARGET		0
    7.19  #define WARMBOOT_OFF		(KERNBASE + 0x0467)
    7.20 @@ -94,6 +103,10 @@
    7.21  #define BIOS_RESET		(0x0f)
    7.22  #define BIOS_WARM		(0x0a)
    7.23  
    7.24 +
    7.25 +#undef POSTCODE
    7.26 +#define POSTCODE(x)
    7.27 +
    7.28  /*
    7.29   * this code MUST be enabled here and in mpboot.s.
    7.30   * it follows the very early stages of AP boot by placing values in CMOS ram.
    7.31 @@ -175,6 +188,8 @@ extern pt_entry_t *KPTphys;
    7.32  /* SMP page table page */
    7.33  extern pt_entry_t *SMPpt;
    7.34  
    7.35 +extern trap_info_t trap_table[];
    7.36 +
    7.37  struct pcb stoppcbs[MAXCPU];
    7.38  
    7.39  /* Variables needed for SMP tlb shootdown. */
    7.40 @@ -208,7 +223,9 @@ static u_int boot_address;
    7.41  
    7.42  static void	set_logical_apic_ids(void);
    7.43  static int	start_all_aps(void);
    7.44 +#if 0
    7.45  static void	install_ap_tramp(void);
    7.46 +#endif
    7.47  static int	start_ap(int apic_id);
    7.48  static void	release_aps(void *dummy);
    7.49  
    7.50 @@ -314,6 +331,7 @@ int
    7.51  cpu_mp_probe(void)
    7.52  {
    7.53  
    7.54 +	mp_ncpus = HYPERVISOR_shared_info->n_vcpu;
    7.55  	/*
    7.56  	 * Always record BSP in CPU map so that the mbuf init code works
    7.57  	 * correctly.
    7.58 @@ -342,20 +360,24 @@ cpu_mp_probe(void)
    7.59  	return (1);
    7.60  }
    7.61  
    7.62 -/*
    7.63 - * Initialize the IPI handlers and start up the AP's.
    7.64 - */
    7.65 -void
    7.66 -cpu_mp_start(void)
    7.67 +static void
    7.68 +cpu_mp_ipi_init(void)
    7.69  {
    7.70 -	int i;
    7.71 -
    7.72 -	POSTCODE(MP_START_POST);
    7.73 -
    7.74 -	/* Initialize the logical ID to APIC ID table. */
    7.75 -	for (i = 0; i < MAXCPU; i++)
    7.76 -		cpu_apic_ids[i] = -1;
    7.77 -
    7.78 +	int irq;
    7.79 +	int cpu = smp_processor_id();
    7.80 +	/* 
    7.81 +	 * these are not needed by XenFreeBSD - from Keir:
    7.82 +	 * For TLB-flush related IPIs, Xen has hypercalls 
    7.83 +	 * you should use instead. You can pass a pointer 
    7.84 +	 * to a vcpu bitmap to update_va_mapping(), and to
    7.85 +	 * MMUEXT_flush_tlb_multi and MMEXT_invlpg_multi. 
    7.86 +	 * Xen will then make sure that those vcpus get 
    7.87 +	 * flushed appropriately before returning to the
    7.88 +	 * caller.
    7.89 +	 * There is also no indication that we need to forward
    7.90 +	 * clock interrupts.
    7.91 +	 */
    7.92 +#if 0 
    7.93  	/* Install an inter-CPU IPI for TLB invalidation */
    7.94  	setidt(IPI_INVLTLB, IDTVEC(invltlb),
    7.95  	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
    7.96 @@ -371,22 +393,69 @@ cpu_mp_start(void)
    7.97  	/* Install an inter-CPU IPI for forwarding statclock() */
    7.98  	setidt(IPI_STATCLOCK, IDTVEC(statclock),
    7.99  	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
   7.100 -	
   7.101 +#endif
   7.102 +
   7.103 +	/* 
   7.104 +	 * These can all be consolidated. For now leaving 
   7.105 +	 * as individual IPIs.
   7.106 +	 *
   7.107 +	 */
   7.108 +#if 0
   7.109  	/* Install an inter-CPU IPI for lazy pmap release */
   7.110  	setidt(IPI_LAZYPMAP, IDTVEC(lazypmap),
   7.111  	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
   7.112 +#else
   7.113 +	irq = bind_ipi_on_cpu_to_irq(cpu, IPI_LAZYPMAP);
   7.114 +	PCPU_SET(lazypmap, irq);
   7.115 +	PANIC_IF(intr_add_handler("pmap_lazyfix", irq, 
   7.116 +				  (driver_intr_t *)pmap_lazyfix_action, 
   7.117 +				  NULL, INTR_TYPE_CLK | INTR_FAST, NULL));
   7.118 +#endif
   7.119  
   7.120 +#if 0
   7.121  	/* Install an inter-CPU IPI for all-CPU rendezvous */
   7.122  	setidt(IPI_RENDEZVOUS, IDTVEC(rendezvous),
   7.123  	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
   7.124 +#else 
   7.125 +	irq = bind_ipi_on_cpu_to_irq(cpu, IPI_RENDEZVOUS);
   7.126 +	PCPU_SET(rendezvous, irq);
   7.127 +	PANIC_IF(intr_add_handler("smp_rendezvous", irq, 
   7.128 +				  (driver_intr_t *)smp_rendezvous_action, 
   7.129 +				  NULL, INTR_TYPE_CLK | INTR_FAST, NULL));
   7.130 +#endif
   7.131  
   7.132 +#if 0
   7.133  	/* Install an inter-CPU IPI for forcing an additional software trap */
   7.134  	setidt(IPI_AST, IDTVEC(cpuast),
   7.135  	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
   7.136 -
   7.137 +#else
   7.138 +	irq = bind_ipi_on_cpu_to_irq(cpu, IPI_AST);
   7.139 +	PCPU_SET(cpuast, irq);
   7.140 +#endif
   7.141 +	/* XXX ignore for now */
   7.142 +#if 0 
   7.143  	/* Install an inter-CPU IPI for CPU stop/restart */
   7.144  	setidt(IPI_STOP, IDTVEC(cpustop),
   7.145  	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
   7.146 +#endif
   7.147 +
   7.148 +}
   7.149 +
   7.150 +SYSINIT(ipi_setup, SI_SUB_INTR, SI_ORDER_ANY, cpu_mp_ipi_init, NULL);
   7.151 +
   7.152 +/*
   7.153 + * Initialize the IPI handlers and start up the AP's.
   7.154 + */
   7.155 +void
   7.156 +cpu_mp_start(void) /* --- Start here --- */
   7.157 +{
   7.158 +	int i;
   7.159 +
   7.160 +	POSTCODE(MP_START_POST);
   7.161 +
   7.162 +	/* Initialize the logical ID to APIC ID table. */
   7.163 +	for (i = 0; i < MAXCPU; i++)
   7.164 +		cpu_apic_ids[i] = -1;
   7.165  
   7.166  
   7.167  	/* Set boot_cpu_id if needed. */
   7.168 @@ -437,35 +506,44 @@ cpu_mp_announce(void)
   7.169  void
   7.170  init_secondary(void)
   7.171  {
   7.172 -	int	gsel_tss;
   7.173 -	int	x, myid;
   7.174 +	int	myid;
   7.175 +	unsigned long gdtmachpfn;
   7.176 +	printk("MADE IT!!");
   7.177 +
   7.178  #if 0
   7.179  	u_int	cr0;
   7.180  #endif
   7.181 +	/* Steps to booting SMP on xen as gleaned from XenLinux:
   7.182 +	 * - cpu_init() - processor specific initialization
   7.183 +	 * - smp_callin() 
   7.184 +	 *    - wait 2s for BP to finish its startup sequence
   7.185 +	 *    - map_cpu_to_logical_apicid()
   7.186 +	 *    - save cpuid info
   7.187 +	 *    - set bit in callin map to let master (BP?) continue
   7.188 +	 * - local setup timer() - per cpu timer initialization
   7.189 +	 * - ldebug_setup() - bind debug IRQ to local CPU.
   7.190 +	 * - smp_intr_init() - IPI setup that we do in cpu_mp_start
   7.191 +	 * - local_irq_enable() - enable interrupts locally
   7.192 +	 * - cpu_set(id, map) - announce that we're up
   7.193 +	 * - cpu_idle() - make us schedulable
   7.194 +	 */
   7.195 +
   7.196 +
   7.197  	/* bootAP is set in start_ap() to our ID. */
   7.198  	myid = bootAP;
   7.199 -	gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[myid];
   7.200 -	gdt_segs[GPROC0_SEL].ssd_base =
   7.201 -		(int) &SMP_prvspace[myid].pcpu.pc_common_tss;
   7.202 -	SMP_prvspace[myid].pcpu.pc_prvspace =
   7.203 -		&SMP_prvspace[myid].pcpu;
   7.204  
   7.205 -	for (x = 0; x < NGDT; x++) {
   7.206 -		ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd);
   7.207 -	}
   7.208 +	gdtmachpfn = vtomach(gdt) >> PAGE_SHIFT;
   7.209 +	PANIC_IF(HYPERVISOR_set_gdt(&gdtmachpfn, LAST_RESERVED_GDT_ENTRY + 1) != 0); 
   7.210  
   7.211 -#if 0
   7.212 -	r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
   7.213 -	r_gdt.rd_base = (int) &gdt[myid * NGDT];
   7.214 -	lgdt(&r_gdt);			/* does magic intra-segment return */
   7.215 +	
   7.216 +	lgdt_finish();
   7.217  
   7.218 -	lidt(&r_idt);
   7.219 -	lldt(_default_ldt);
   7.220 -#endif
   7.221 +	PCPU_SET(cpuid, myid);
   7.222 +
   7.223 +
   7.224 +	set_user_ldt((struct mdproc *)_default_ldt);
   7.225  	PCPU_SET(currentldt, _default_ldt);
   7.226  
   7.227 -	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
   7.228 -	gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;
   7.229  	PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */
   7.230  	PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
   7.231  	PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
   7.232 @@ -557,6 +635,13 @@ init_secondary(void)
   7.233  	while (smp_started == 0)
   7.234  		ia32_pause();
   7.235  
   7.236 +	/* need to wait until now to setup the IPIs as SI_SUB_CPU is
   7.237 +	 * much earlier than SI_SUB_INTR
   7.238 +	 */  
   7.239 +	ap_evtchn_init(myid);
   7.240 +	ap_cpu_initclocks();
   7.241 +	cpu_mp_ipi_init();
   7.242 +
   7.243  	/* ok, now grab sched_lock and enter the scheduler */
   7.244  	mtx_lock_spin(&sched_lock);
   7.245  
   7.246 @@ -610,28 +695,35 @@ set_logical_apic_ids(void)
   7.247  static int
   7.248  start_all_aps(void)
   7.249  {
   7.250 -#ifndef PC98
   7.251 -	u_char mpbiosreason;
   7.252 -#endif
   7.253 -	u_long mpbioswarmvec;
   7.254  	struct pcpu *pc;
   7.255  	char *stack;
   7.256 -	uintptr_t kptbase;
   7.257 -	int i, pg, apic_id, cpu;
   7.258 +	int i, apic_id, cpu;
   7.259 +
   7.260 +	/* 
   7.261 +	 * This function corresponds most closely to 
   7.262 +	 * smp_boot_cpus in XenLinux - the sequence there 
   7.263 +	 * is:
   7.264 +	 * - check if SMP config is found - if not:
   7.265 +	 *     - clear the I/O APIC IRQs
   7.266 +	 *     - map cpu to logical apicid
   7.267 +	 *     - exit
   7.268 +	 * - smp_intr_init - IPI initialization
   7.269 +	 * - map cpu to logical apicid
   7.270 +	 * - boot each of the vcpus
   7.271 +	 * - clear and then construct the cpu sibling [logical CPUs] map.
   7.272 +	 *
   7.273 +	 */
   7.274  
   7.275  	POSTCODE(START_ALL_APS_POST);
   7.276  
   7.277  	mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
   7.278 -
   7.279 +#if 0
   7.280  	/* install the AP 1st level boot code */
   7.281  	install_ap_tramp();
   7.282  
   7.283  	/* save the current value of the warm-start vector */
   7.284  	mpbioswarmvec = *((u_long *) WARMBOOT_OFF);
   7.285 -#ifndef PC98
   7.286 -	outb(CMOS_REG, BIOS_RESET);
   7.287 -	mpbiosreason = inb(CMOS_DATA);
   7.288 -#endif
   7.289 +
   7.290  
   7.291  	/* set up temporary P==V mapping for AP boot */
   7.292  	/* XXX this is a hack, we should boot the AP on its own stack/PTD */
   7.293 @@ -640,7 +732,7 @@ start_all_aps(void)
   7.294  		PTD[i] = (pd_entry_t)(PG_V | PG_RW |
   7.295  		    ((kptbase + i * PAGE_SIZE) & PG_FRAME));
   7.296  	invltlb();
   7.297 -
   7.298 +#endif
   7.299  	/* start each AP */
   7.300  	for (cpu = 0, apic_id = 0; apic_id < MAXCPU; apic_id++) {
   7.301  		if (!cpu_info[apic_id].cpu_present ||
   7.302 @@ -650,7 +742,7 @@ start_all_aps(void)
   7.303  
   7.304  		/* save APIC ID for this logical ID */
   7.305  		cpu_apic_ids[cpu] = apic_id;
   7.306 -
   7.307 +#if 0
   7.308  		/* first page of AP's private space */
   7.309  		pg = cpu * i386_btop(sizeof(struct privatespace));
   7.310  
   7.311 @@ -665,11 +757,14 @@ start_all_aps(void)
   7.312  		for (i = 0; i < KSTACK_PAGES; i++)
   7.313  			SMPpt[pg + 1 + i] = (pt_entry_t)
   7.314  			    (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
   7.315 +#endif
   7.316 +		pc = &SMP_prvspace[cpu].pcpu;
   7.317  
   7.318  		/* prime data page for it to use */
   7.319  		pcpu_init(pc, cpu, sizeof(struct pcpu));
   7.320  		pc->pc_apic_id = apic_id;
   7.321  
   7.322 +#if 0
   7.323  		/* setup a vector to our boot code */
   7.324  		*((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
   7.325  		*((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4);
   7.326 @@ -677,7 +772,7 @@ start_all_aps(void)
   7.327  		outb(CMOS_REG, BIOS_RESET);
   7.328  		outb(CMOS_DATA, BIOS_WARM);	/* 'warm-start' */
   7.329  #endif
   7.330 -
   7.331 +#endif
   7.332  		bootSTK = &SMP_prvspace[cpu].idlekstack[KSTACK_PAGES *
   7.333  		    PAGE_SIZE];
   7.334  		bootAP = cpu;
   7.335 @@ -700,13 +795,10 @@ start_all_aps(void)
   7.336  	/* build our map of 'other' CPUs */
   7.337  	PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
   7.338  
   7.339 +#if 0
   7.340  	/* restore the warmstart vector */
   7.341  	*(u_long *) WARMBOOT_OFF = mpbioswarmvec;
   7.342 -#ifndef PC98
   7.343 -	outb(CMOS_REG, BIOS_RESET);
   7.344 -	outb(CMOS_DATA, mpbiosreason);
   7.345  #endif
   7.346 -
   7.347  	/*
   7.348  	 * Set up the idle context for the BSP.  Similar to above except
   7.349  	 * that some was done by locore, some by pmap.c and some is implicit
   7.350 @@ -739,7 +831,7 @@ extern void bootDataSeg(void);
   7.351  extern void MPentry(void);
   7.352  extern u_int MP_GDT;
   7.353  extern u_int mp_gdtbase;
   7.354 -
   7.355 +#if 0
   7.356  static void
   7.357  install_ap_tramp(void)
   7.358  {
   7.359 @@ -791,6 +883,21 @@ install_ap_tramp(void)
   7.360  	*dst16 = (u_int) boot_address & 0xffff;
   7.361  	*dst8 = ((u_int) boot_address >> 16) & 0xff;
   7.362  }
   7.363 +#endif
   7.364 +
   7.365 +static int 
   7.366 +cpu_mp_trap_init(trap_info_t *trap_ctxt)
   7.367 +{
   7.368 +
   7.369 +        trap_info_t *t = trap_table;
   7.370 +
   7.371 +        for (t = trap_table; t->address; t++) {
   7.372 +                trap_ctxt[t->vector].flags = t->flags;
   7.373 +                trap_ctxt[t->vector].cs = t->cs;
   7.374 +                trap_ctxt[t->vector].address = t->address;
   7.375 +        }
   7.376 +        return 0x80 /*SYSCALL_VECTOR*/;
   7.377 +}
   7.378  
   7.379  /*
   7.380   * This function starts the AP (application processor) identified
   7.381 @@ -802,8 +909,25 @@ install_ap_tramp(void)
   7.382  static int
   7.383  start_ap(int apic_id)
   7.384  {
   7.385 -	int vector, ms;
   7.386 -	int cpus;
   7.387 +	int vector, ms, i;
   7.388 +	int cpus, boot_error;
   7.389 +	vcpu_guest_context_t ctxt;
   7.390 +
   7.391 +	/* 
   7.392 +	 * This is the FreeBSD equivalent to do_boot_cpu(apicid) in
   7.393 +	 * smpboot.c. 
   7.394 +	 * its initialization sequence consists of:
   7.395 +	 * - fork_idle(cpu) to create separate idle context
   7.396 +	 * - initialization of idle's context to start_secondary
   7.397 +	 * - initialization of cpu ctxt to start in startup_32_smp
   7.398 +	 * - then we call HYPERVISOR_boot_vcpu with the cpu index and
   7.399 +	 *   a pointer to the context.
   7.400 +	 * - on boot success we:
   7.401 +	 *   - set ourselves in the callout_map
   7.402 +	 *   - wait up to 5 seconds for us to be set in the callin map
   7.403 +	 * - set x86_cpu_to_apicid[cpu] = apicid;
   7.404 +	 *
   7.405 +	 */
   7.406  
   7.407  	POSTCODE(START_AP_POST);
   7.408  
   7.409 @@ -813,6 +937,55 @@ start_ap(int apic_id)
   7.410  	/* used as a watchpoint to signal AP startup */
   7.411  	cpus = mp_naps;
   7.412  
   7.413 +	memset(&ctxt, 0, sizeof(ctxt));
   7.414 +
   7.415 +	ctxt.user_regs.ds = GSEL(GDATA_SEL, SEL_KPL);
   7.416 +	ctxt.user_regs.es = GSEL(GDATA_SEL, SEL_KPL);
   7.417 +	ctxt.user_regs.fs = 0;
   7.418 +	ctxt.user_regs.gs = 0;
   7.419 +	ctxt.user_regs.ss = __KERNEL_DS;
   7.420 +	ctxt.user_regs.cs = __KERNEL_CS;
   7.421 +	ctxt.user_regs.eip = (unsigned long)init_secondary;
   7.422 +	ctxt.user_regs.esp = (unsigned long)bootSTK;
   7.423 +#ifdef notyet
   7.424 +	ctxt.user_regs.eflags = (1<<9) | (1<<2) | (idle->thread.io_pl<<12);
   7.425 +#else
   7.426 +	ctxt.user_regs.eflags = (1<<9) | (1<<2);
   7.427 +#endif
   7.428 +	/* FPU is set up to default initial state. */
   7.429 +	memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt));
   7.430 +
   7.431 +	/* Virtual IDT is empty at start-of-day. */
   7.432 +	for ( i = 0; i < 256; i++ )
   7.433 +	{
   7.434 +		ctxt.trap_ctxt[i].vector = i;
   7.435 +		ctxt.trap_ctxt[i].cs     = FLAT_KERNEL_CS;
   7.436 +	}
   7.437 +	ctxt.fast_trap_idx = cpu_mp_trap_init(ctxt.trap_ctxt);
   7.438 +
   7.439 +	/* No LDT. */
   7.440 +	ctxt.ldt_ents = 0;
   7.441 +
   7.442 +	/* Ring 1 stack is the initial stack. */
   7.443 +	ctxt.kernel_ss = __KERNEL_DS;
   7.444 +	ctxt.kernel_sp = (unsigned long)bootSTK;
   7.445 +
   7.446 +	/* Callback handlers. */
   7.447 +	ctxt.event_callback_cs     = __KERNEL_CS;
   7.448 +	ctxt.event_callback_eip    = (unsigned long)Xhypervisor_callback;
   7.449 +	ctxt.failsafe_callback_cs  = __KERNEL_CS;
   7.450 +	ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
   7.451 +
   7.452 +	ctxt.pt_base = (vm_paddr_t)IdlePTD;
   7.453 +
   7.454 +	boot_error = HYPERVISOR_boot_vcpu(bootAP, &ctxt);
   7.455 +
   7.456 +	
   7.457 +	if (boot_error) 
   7.458 +		printk("Houston we have a problem\n");
   7.459 +	else
   7.460 +		printk("boot_vcpu succeeded\n");
   7.461 +#if 0
   7.462  	/*
   7.463  	 * first we do an INIT/RESET IPI this INIT IPI might be run, reseting
   7.464  	 * and running the target CPU. OR this INIT IPI might be latched (P5
   7.465 @@ -862,6 +1035,7 @@ start_ap(int apic_id)
   7.466  	    APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
   7.467  	    vector, apic_id);
   7.468  	lapic_ipi_wait(-1);
   7.469 +#endif
   7.470  	DELAY(200);		/* wait ~200uS */
   7.471  
   7.472  	/* Wait up to 5 seconds for it to start. */
     8.1 --- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/pmap.c	Tue May 03 14:42:00 2005 +0000
     8.2 +++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/pmap.c	Tue May 10 08:49:59 2005 +0000
     8.3 @@ -1374,7 +1374,6 @@ static u_int *lazymask;
     8.4  static u_int lazyptd;
     8.5  static volatile u_int lazywait;
     8.6  
     8.7 -void pmap_lazyfix_action(void);
     8.8  
     8.9  void
    8.10  pmap_lazyfix_action(void)
     9.1 --- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_machdep.c	Tue May 03 14:42:00 2005 +0000
     9.2 +++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_machdep.c	Tue May 10 08:49:59 2005 +0000
     9.3 @@ -380,8 +380,6 @@ printk(const char *fmt, ...)
     9.4          (void)HYPERVISOR_console_write(buf, ret);
     9.5  }
     9.6  
     9.7 -#define PANIC_IF(exp) if (unlikely(exp)) {printk("%s failed\n",#exp); panic("%s: %s:%d", #exp, __FILE__, __LINE__);} 
     9.8 -
     9.9  
    9.10  #define XPQUEUE_SIZE 128
    9.11  #ifdef SMP
    10.1 --- a/freebsd-5.3-xen-sparse/i386-xen/include/hypervisor.h	Tue May 03 14:42:00 2005 +0000
    10.2 +++ b/freebsd-5.3-xen-sparse/i386-xen/include/hypervisor.h	Tue May 10 08:49:59 2005 +0000
    10.3 @@ -441,4 +441,20 @@ HYPERVISOR_vm_assist(unsigned int cmd, u
    10.4      return ret;
    10.5  }
    10.6  
    10.7 +static inline int
    10.8 +HYPERVISOR_boot_vcpu(
    10.9 +    unsigned long vcpu, vcpu_guest_context_t *ctxt)
   10.10 +{
   10.11 +    int ret;
   10.12 +    unsigned long ign1, ign2;
   10.13 +
   10.14 +    __asm__ __volatile__ (
   10.15 +        TRAP_INSTR
   10.16 +        : "=a" (ret), "=b" (ign1), "=c" (ign2)
   10.17 +	: "0" (__HYPERVISOR_boot_vcpu), "1" (vcpu), "2" (ctxt)
   10.18 +	: "memory");
   10.19 +
   10.20 +    return ret;
   10.21 +}
   10.22 +
   10.23  #endif /* __HYPERVISOR_H__ */
    11.1 --- a/freebsd-5.3-xen-sparse/i386-xen/include/pcpu.h	Tue May 03 14:42:00 2005 +0000
    11.2 +++ b/freebsd-5.3-xen-sparse/i386-xen/include/pcpu.h	Tue May 10 08:49:59 2005 +0000
    11.3 @@ -53,7 +53,12 @@
    11.4          int     *pc_ipi_to_evtchn;                                      \
    11.5          int     *pc_virq_to_irq;                                        \
    11.6          u_int   pc_cr2;                                                 \
    11.7 -        u_int   pc_pdir                                        
    11.8 +        u_int   pc_pdir;                                                \
    11.9 +        u_int   pc_lazypmap;                                            \
   11.10 +        u_int   pc_rendezvous;                                          \
   11.11 +        u_int   pc_cpuast;                                              \
   11.12 +        u_int   pc_time_irq;                                              \
   11.13 +        uint64_t pc_processed_system_time;  
   11.14  
   11.15  #if defined(lint)
   11.16   
    12.1 --- a/freebsd-5.3-xen-sparse/i386-xen/include/pmap.h	Tue May 03 14:42:00 2005 +0000
    12.2 +++ b/freebsd-5.3-xen-sparse/i386-xen/include/pmap.h	Tue May 10 08:49:59 2005 +0000
    12.3 @@ -343,6 +343,7 @@ void	pmap_set_pg(void);
    12.4  void	pmap_invalidate_page(pmap_t, vm_offset_t);
    12.5  void	pmap_invalidate_range(pmap_t, vm_offset_t, vm_offset_t);
    12.6  void	pmap_invalidate_all(pmap_t);
    12.7 +void    pmap_lazyfix_action(void);
    12.8  
    12.9  void pmap_map_readonly(pmap_t pmap, vm_offset_t va, int len);
   12.10  void pmap_map_readwrite(pmap_t pmap, vm_offset_t va, int len);
    13.1 --- a/freebsd-5.3-xen-sparse/i386-xen/include/xenfunc.h	Tue May 03 14:42:00 2005 +0000
    13.2 +++ b/freebsd-5.3-xen-sparse/i386-xen/include/xenfunc.h	Tue May 10 08:49:59 2005 +0000
    13.3 @@ -61,6 +61,9 @@ void load_cr3(uint32_t val);
    13.4  void xen_machphys_update(unsigned long, unsigned long);
    13.5  void xen_update_descriptor(union descriptor *, union descriptor *);
    13.6  void lldt(u_short sel);
    13.7 +void ap_cpu_initclocks(void);
    13.8 +
    13.9 +
   13.10  /*
   13.11   * Invalidate a patricular VA on all cpus
   13.12   *
   13.13 @@ -79,5 +82,6 @@ invltlb(void)
   13.14  	
   13.15  }
   13.16  
   13.17 +#define PANIC_IF(exp) if (unlikely(exp)) {printk("%s failed\n",#exp); panic("%s: %s:%d", #exp, __FILE__, __LINE__);} 
   13.18  
   13.19  #endif /* _XEN_XENFUNC_H_ */
    14.1 --- a/linux-2.4.29-xen-sparse/mkbuildtree	Tue May 03 14:42:00 2005 +0000
    14.2 +++ b/linux-2.4.29-xen-sparse/mkbuildtree	Tue May 10 08:49:59 2005 +0000
    14.3 @@ -66,7 +66,7 @@ relative_lndir ()
    14.4      (
    14.5      cd $i
    14.6      pref=`echo $i | sed -e 's#/[^/]*#../#g' -e 's#^\.##'`
    14.7 -    for j in `find . -type f -o -type l -maxdepth 1`; do
    14.8 +    for j in `find . -maxdepth 1 -type f -o -type l`; do
    14.9        ln -sf ${pref}${REAL_DIR}/$i/$j ${SYMLINK_DIR}/$i/$j
   14.10      done
   14.11      )
   14.12 @@ -232,7 +232,6 @@ ln -sf ../../../${LINUX_26}/arch/xen/ker
   14.13  ln -sf ../../../${LINUX_26}/arch/xen/kernel/gnttab.c
   14.14  ln -sf ../../../${LINUX_26}/arch/xen/kernel/reboot.c
   14.15  ln -sf ../../../${LINUX_26}/arch/xen/kernel/skbuff.c
   14.16 -ln -sf ../../../${LINUX_26}/arch/xen/i386/kernel/pci-dma.c
   14.17  
   14.18  cd ${AD}/arch/xen/lib
   14.19  ln -sf ../../i386/lib/checksum.S 
    15.1 --- a/linux-2.6.11-xen-sparse/arch/xen/Kconfig.drivers	Tue May 03 14:42:00 2005 +0000
    15.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/Kconfig.drivers	Tue May 10 08:49:59 2005 +0000
    15.3 @@ -49,9 +49,46 @@ source "drivers/infiniband/Kconfig"
    15.4  endif
    15.5  
    15.6  if !XEN_PHYSDEV_ACCESS
    15.7 +
    15.8 +menu "Character devices"
    15.9 +
   15.10  config UNIX98_PTYS
   15.11  	bool
   15.12  	default y
   15.13 +
   15.14 +config LEGACY_PTYS
   15.15 +	bool "Legacy (BSD) PTY support"
   15.16 +	default y
   15.17 +	---help---
   15.18 +	  A pseudo terminal (PTY) is a software device consisting of two
   15.19 +	  halves: a master and a slave. The slave device behaves identical to
   15.20 +	  a physical terminal; the master device is used by a process to
   15.21 +	  read data from and write data to the slave, thereby emulating a
   15.22 +	  terminal. Typical programs for the master side are telnet servers
   15.23 +	  and xterms.
   15.24 +
   15.25 +	  Linux has traditionally used the BSD-like names /dev/ptyxx
   15.26 +	  for masters and /dev/ttyxx for slaves of pseudo
   15.27 +	  terminals. This scheme has a number of problems, including
   15.28 +	  security.  This option enables these legacy devices; on most
   15.29 +	  systems, it is safe to say N.
   15.30 +
   15.31 +
   15.32 +config LEGACY_PTY_COUNT
   15.33 +	int "Maximum number of legacy PTY in use"
   15.34 +	depends on LEGACY_PTYS
   15.35 +	range 1 256
   15.36 +	default "256"
   15.37 +	---help---
   15.38 +	  The maximum number of legacy PTYs that can be used at any one time.
   15.39 +	  The default is 256, and should be more than enough.  Embedded
   15.40 +	  systems may want to reduce this to save memory.
   15.41 +
   15.42 +	  When not in use, each legacy PTY occupies 12 bytes on 32-bit
   15.43 +	  architectures and 24 bytes on 64-bit architectures.
   15.44 +
   15.45 +endmenu
   15.46 +
   15.47  endif
   15.48  
   15.49  endmenu
    16.1 --- a/linux-2.6.11-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32	Tue May 03 14:42:00 2005 +0000
    16.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32	Tue May 10 08:49:59 2005 +0000
    16.3 @@ -1,7 +1,7 @@
    16.4  #
    16.5  # Automatically generated make config: don't edit
    16.6  # Linux kernel version: 2.6.11-xen0
    16.7 -# Tue May  3 13:25:48 2005
    16.8 +# Wed May  4 17:11:56 2005
    16.9  #
   16.10  CONFIG_XEN=y
   16.11  CONFIG_ARCH_XEN=y
   16.12 @@ -146,7 +146,7 @@ CONFIG_PCI=y
   16.13  # CONFIG_PCI_GOMMCONFIG is not set
   16.14  # CONFIG_PCI_GODIRECT is not set
   16.15  CONFIG_PCI_GOANY=y
   16.16 -CONFIG_PCI_BIOS=y
   16.17 +# CONFIG_PCI_BIOS is not set
   16.18  CONFIG_PCI_DIRECT=y
   16.19  # CONFIG_PCIEPORTBUS is not set
   16.20  # CONFIG_PCI_MSI is not set
   16.21 @@ -176,16 +176,22 @@ CONFIG_PCMCIA_PROBE=y
   16.22  # Kernel hacking
   16.23  #
   16.24  CONFIG_DEBUG_KERNEL=y
   16.25 +CONFIG_MAGIC_SYSRQ=y
   16.26 +# CONFIG_SCHEDSTATS is not set
   16.27 +# CONFIG_DEBUG_SLAB is not set
   16.28 +# CONFIG_DEBUG_PREEMPT is not set
   16.29 +# CONFIG_DEBUG_SPINLOCK is not set
   16.30 +# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
   16.31 +# CONFIG_DEBUG_KOBJECT is not set
   16.32 +CONFIG_DEBUG_BUGVERBOSE=y
   16.33 +# CONFIG_DEBUG_INFO is not set
   16.34 +# CONFIG_DEBUG_FS is not set
   16.35 +# CONFIG_FRAME_POINTER is not set
   16.36  CONFIG_EARLY_PRINTK=y
   16.37  # CONFIG_DEBUG_STACKOVERFLOW is not set
   16.38 +# CONFIG_KPROBES is not set
   16.39  # CONFIG_DEBUG_STACK_USAGE is not set
   16.40 -# CONFIG_DEBUG_SLAB is not set
   16.41 -CONFIG_MAGIC_SYSRQ=y
   16.42 -# CONFIG_DEBUG_SPINLOCK is not set
   16.43  # CONFIG_DEBUG_PAGEALLOC is not set
   16.44 -# CONFIG_DEBUG_INFO is not set
   16.45 -# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
   16.46 -# CONFIG_FRAME_POINTER is not set
   16.47  # CONFIG_4KSTACKS is not set
   16.48  CONFIG_X86_FIND_SMP_CONFIG=y
   16.49  CONFIG_X86_MPPARSE=y
    17.1 --- a/linux-2.6.11-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32	Tue May 03 14:42:00 2005 +0000
    17.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32	Tue May 10 08:49:59 2005 +0000
    17.3 @@ -1,7 +1,7 @@
    17.4  #
    17.5  # Automatically generated make config: don't edit
    17.6  # Linux kernel version: 2.6.11-xenU
    17.7 -# Wed Apr  6 09:20:09 2005
    17.8 +# Wed May  4 17:14:10 2005
    17.9  #
   17.10  CONFIG_XEN=y
   17.11  CONFIG_ARCH_XEN=y
   17.12 @@ -132,16 +132,22 @@ CONFIG_HAVE_DEC_LOCK=y
   17.13  # Kernel hacking
   17.14  #
   17.15  CONFIG_DEBUG_KERNEL=y
   17.16 +CONFIG_MAGIC_SYSRQ=y
   17.17 +# CONFIG_SCHEDSTATS is not set
   17.18 +# CONFIG_DEBUG_SLAB is not set
   17.19 +# CONFIG_DEBUG_PREEMPT is not set
   17.20 +# CONFIG_DEBUG_SPINLOCK is not set
   17.21 +# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
   17.22 +# CONFIG_DEBUG_KOBJECT is not set
   17.23 +CONFIG_DEBUG_BUGVERBOSE=y
   17.24 +# CONFIG_DEBUG_INFO is not set
   17.25 +# CONFIG_DEBUG_FS is not set
   17.26 +# CONFIG_FRAME_POINTER is not set
   17.27  CONFIG_EARLY_PRINTK=y
   17.28  # CONFIG_DEBUG_STACKOVERFLOW is not set
   17.29 +# CONFIG_KPROBES is not set
   17.30  # CONFIG_DEBUG_STACK_USAGE is not set
   17.31 -# CONFIG_DEBUG_SLAB is not set
   17.32 -CONFIG_MAGIC_SYSRQ=y
   17.33 -# CONFIG_DEBUG_SPINLOCK is not set
   17.34  # CONFIG_DEBUG_PAGEALLOC is not set
   17.35 -# CONFIG_DEBUG_INFO is not set
   17.36 -# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
   17.37 -# CONFIG_FRAME_POINTER is not set
   17.38  # CONFIG_4KSTACKS is not set
   17.39  CONFIG_GENERIC_HARDIRQS=y
   17.40  CONFIG_GENERIC_IRQ_PROBE=y
   17.41 @@ -334,6 +340,12 @@ CONFIG_NETDEVICES=y
   17.42  # CONFIG_SHAPER is not set
   17.43  # CONFIG_NETCONSOLE is not set
   17.44  CONFIG_UNIX98_PTYS=y
   17.45 +CONFIG_LEGACY_PTYS=y
   17.46 +CONFIG_LEGACY_PTY_COUNT=256
   17.47 +
   17.48 +#
   17.49 +# Character devices
   17.50 +#
   17.51  
   17.52  #
   17.53  # File systems
    18.1 --- a/linux-2.6.11-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64	Tue May 03 14:42:00 2005 +0000
    18.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64	Tue May 10 08:49:59 2005 +0000
    18.3 @@ -316,6 +316,12 @@ CONFIG_NETDEVICES=y
    18.4  # CONFIG_SHAPER is not set
    18.5  # CONFIG_NETCONSOLE is not set
    18.6  CONFIG_UNIX98_PTYS=y
    18.7 +CONFIG_LEGACY_PTYS=y
    18.8 +CONFIG_LEGACY_PTY_COUNT=256
    18.9 +
   18.10 +#
   18.11 +# Character devices
   18.12 +#
   18.13  
   18.14  #
   18.15  # File systems
    19.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/Kconfig	Tue May 03 14:42:00 2005 +0000
    19.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/Kconfig	Tue May 10 08:49:59 2005 +0000
    19.3 @@ -12,6 +12,11 @@ config XENARCH
    19.4  config X86
    19.5  	bool
    19.6  	default y
    19.7 +	help
    19.8 +	  This is Linux's home port.  Linux was originally native to the Intel
    19.9 +	  386, and runs on all the later x86 processors including the Intel
   19.10 +	  486, 586, Pentiums, and various instruction-set-compatible chips by
   19.11 +	  AMD, Cyrix, and others.
   19.12  
   19.13  config MMU
   19.14  	bool
   19.15 @@ -782,13 +787,19 @@ endchoice
   19.16  config PCI_BIOS
   19.17  	bool
   19.18  	depends on !X86_VISWS && PCI && (PCI_GOBIOS || PCI_GOANY)
   19.19 -	default n
   19.20 +	default y
   19.21  
   19.22  config PCI_DIRECT
   19.23  	bool
   19.24   	depends on PCI && ((PCI_GODIRECT || PCI_GOANY) || X86_VISWS)
   19.25  	default y
   19.26  
   19.27 +config PCI_MMCONFIG
   19.28 +	bool
   19.29 +	depends on PCI && (PCI_GOMMCONFIG || (PCI_GOANY && ACPI))
   19.30 +	select ACPI_BOOT
   19.31 +	default y
   19.32 +
   19.33  source "drivers/pci/pcie/Kconfig"
   19.34  
   19.35  source "drivers/pci/Kconfig"
   19.36 @@ -856,129 +867,7 @@ endmenu
   19.37  
   19.38  endif
   19.39  
   19.40 -menu "Kernel hacking"
   19.41 -
   19.42 -config DEBUG_KERNEL
   19.43 -	bool "Kernel debugging"
   19.44 -	help
   19.45 -	  Say Y here if you are developing drivers or trying to debug and
   19.46 -	  identify kernel problems.
   19.47 -
   19.48 -config EARLY_PRINTK
   19.49 -	bool "Early printk" if EMBEDDED
   19.50 -	default y
   19.51 -	help
   19.52 -	  Write kernel log output directly into the VGA buffer or to a serial
   19.53 -	  port.
   19.54 -
   19.55 -	  This is useful for kernel debugging when your machine crashes very
   19.56 -	  early before the console code is initialized. For normal operation
   19.57 -	  it is not recommended because it looks ugly and doesn't cooperate
   19.58 -	  with klogd/syslogd or the X server. You should normally N here,
   19.59 -	  unless you want to debug such a crash.
   19.60 -
   19.61 -config DEBUG_STACKOVERFLOW
   19.62 -	bool "Check for stack overflows"
   19.63 -	depends on DEBUG_KERNEL
   19.64 -
   19.65 -config DEBUG_STACK_USAGE
   19.66 -	bool "Stack utilization instrumentation"
   19.67 -	depends on DEBUG_KERNEL
   19.68 -	help
   19.69 -	  Enables the display of the minimum amount of free stack which each
   19.70 -	  task has ever had available in the sysrq-T and sysrq-P debug output.
   19.71 -
   19.72 -	  This option will slow down process creation somewhat.
   19.73 -
   19.74 -config DEBUG_SLAB
   19.75 -	bool "Debug memory allocations"
   19.76 -	depends on DEBUG_KERNEL
   19.77 -	help
   19.78 -	  Say Y here to have the kernel do limited verification on memory
   19.79 -	  allocation as well as poisoning memory on free to catch use of freed
   19.80 -	  memory.
   19.81 -
   19.82 -config MAGIC_SYSRQ
   19.83 -	bool "Magic SysRq key"
   19.84 -	depends on DEBUG_KERNEL
   19.85 -	help
   19.86 -	  If you say Y here, you will have some control over the system even
   19.87 -	  if the system crashes for example during kernel debugging (e.g., you
   19.88 -	  will be able to flush the buffer cache to disk, reboot the system
   19.89 -	  immediately or dump some status information). This is accomplished
   19.90 -	  by pressing various keys while holding SysRq (Alt+PrintScreen). It
   19.91 -	  also works on a serial console (on PC hardware at least), if you
   19.92 -	  send a BREAK and then within 5 seconds a command keypress. The
   19.93 -	  keys are documented in <file:Documentation/sysrq.txt>. Don't say Y
   19.94 -	  unless you really know what this hack does.
   19.95 -
   19.96 -config DEBUG_SPINLOCK
   19.97 -	bool "Spinlock debugging"
   19.98 -	depends on DEBUG_KERNEL
   19.99 -	help
  19.100 -	  Say Y here and build SMP to catch missing spinlock initialization
  19.101 -	  and certain other kinds of spinlock errors commonly made.  This is
  19.102 -	  best used in conjunction with the NMI watchdog so that spinlock
  19.103 -	  deadlocks are also debuggable.
  19.104 -
  19.105 -config DEBUG_PAGEALLOC
  19.106 -	bool "Page alloc debugging"
  19.107 -	depends on DEBUG_KERNEL
  19.108 -	help
  19.109 -	  Unmap pages from the kernel linear mapping after free_pages().
  19.110 -	  This results in a large slowdown, but helps to find certain types
  19.111 -	  of memory corruptions.
  19.112 -
  19.113 -config DEBUG_HIGHMEM
  19.114 -	bool "Highmem debugging"
  19.115 -	depends on DEBUG_KERNEL && HIGHMEM
  19.116 -	help
  19.117 -	  This options enables addition error checking for high memory systems.
  19.118 -	  Disable for production systems.
  19.119 -
  19.120 -config DEBUG_INFO
  19.121 -	bool "Compile the kernel with debug info"
  19.122 -	depends on DEBUG_KERNEL
  19.123 -	help
  19.124 -          If you say Y here the resulting kernel image will include
  19.125 -	  debugging info resulting in a larger kernel image.
  19.126 -	  Say Y here only if you plan to use gdb to debug the kernel.
  19.127 -	  If you don't debug the kernel, you can say N.
  19.128 -	  
  19.129 -config DEBUG_SPINLOCK_SLEEP
  19.130 -	bool "Sleep-inside-spinlock checking"
  19.131 -	help
  19.132 -	  If you say Y here, various routines which may sleep will become very
  19.133 -	  noisy if they are called with a spinlock held.	
  19.134 -
  19.135 -config FRAME_POINTER
  19.136 -	bool "Compile the kernel with frame pointers"
  19.137 -	help
  19.138 -	  If you say Y here the resulting kernel image will be slightly larger
  19.139 -	  and slower, but it will give very useful debugging information.
  19.140 -	  If you don't debug the kernel, you can say N, but we may not be able
  19.141 -	  to solve problems without frame pointers.
  19.142 -
  19.143 -config 4KSTACKS
  19.144 -	bool "Use 4Kb for kernel stacks instead of 8Kb"
  19.145 -	help
  19.146 -	  If you say Y here the kernel will use a 4Kb stacksize for the
  19.147 -	  kernel stack attached to each process/thread. This facilitates
  19.148 -	  running more threads on a system and also reduces the pressure
  19.149 -	  on the VM subsystem for higher order allocations. This option
  19.150 -	  will also use IRQ stacks to compensate for the reduced stackspace.
  19.151 -
  19.152 -config X86_FIND_SMP_CONFIG
  19.153 -	bool
  19.154 -	depends on X86_LOCAL_APIC || X86_VOYAGER
  19.155 -	default n
  19.156 -
  19.157 -config X86_MPPARSE
  19.158 -	bool
  19.159 -	depends on X86_LOCAL_APIC && !X86_VISWS
  19.160 -	default y
  19.161 -
  19.162 -endmenu
  19.163 +source "arch/i386/Kconfig.debug"
  19.164  
  19.165  #
  19.166  # Use the generic interrupt handling code in kernel/irq/:
    20.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/Makefile	Tue May 03 14:42:00 2005 +0000
    20.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/Makefile	Tue May 10 08:49:59 2005 +0000
    20.3 @@ -20,7 +20,7 @@ s-obj-y	:=
    20.4  
    20.5  obj-y				+= cpu/
    20.6  obj-y				+= timers/
    20.7 -c-obj-$(CONFIG_ACPI_BOOT)	+= acpi/
    20.8 +obj-$(CONFIG_ACPI_BOOT)		+= acpi/
    20.9  #c-obj-$(CONFIG_X86_BIOS_REBOOT)	+= reboot.o
   20.10  c-obj-$(CONFIG_MCA)		+= mca.o
   20.11  c-obj-$(CONFIG_X86_MSR)		+= msr.o
   20.12 @@ -29,10 +29,10 @@ obj-$(CONFIG_MICROCODE)		+= microcode.o
   20.13  c-obj-$(CONFIG_APM)		+= apm.o
   20.14  obj-$(CONFIG_X86_SMP)		+= smp.o smpboot.o
   20.15  #obj-$(CONFIG_X86_TRAMPOLINE)	+= trampoline.o
   20.16 -c-obj-$(CONFIG_X86_MPPARSE)	+= mpparse.o
   20.17 +obj-$(CONFIG_X86_MPPARSE)	+= mpparse.o
   20.18  obj-$(CONFIG_X86_LOCAL_APIC)	+= apic.o
   20.19  c-obj-$(CONFIG_X86_LOCAL_APIC)	+= nmi.o
   20.20 -c-obj-$(CONFIG_X86_IO_APIC)	+= io_apic.o
   20.21 +obj-$(CONFIG_X86_IO_APIC)	+= io_apic.o
   20.22  c-obj-$(CONFIG_X86_NUMAQ)	+= numaq.o
   20.23  c-obj-$(CONFIG_X86_SUMMIT_NUMA)	+= summit.o
   20.24  c-obj-$(CONFIG_MODULES)		+= module.o
    21.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/acpi/Makefile	Tue May 03 14:42:00 2005 +0000
    21.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/acpi/Makefile	Tue May 10 08:49:59 2005 +0000
    21.3 @@ -7,3 +7,7 @@ c-link                                  
    21.4  $(patsubst %.o,$(obj)/%.c,$(c-obj-y) $(c-link)):
    21.5  	@ln -fsn $(srctree)/arch/i386/kernel/acpi/$(notdir $@) $@
    21.6  
    21.7 +obj-y	+= $(c-obj-y) $(s-obj-y)
    21.8 +
    21.9 +clean-files += $(patsubst %.o,%.c,$(c-obj-y) $(c-obj-) $(c-link))
   21.10 +clean-files += $(patsubst %.o,%.S,$(s-obj-y) $(s-obj-) $(s-link))
    22.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/acpi/boot.c	Tue May 03 14:42:00 2005 +0000
    22.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/acpi/boot.c	Tue May 10 08:49:59 2005 +0000
    22.3 @@ -109,17 +109,17 @@ enum acpi_irq_model_id		acpi_irq_model =
    22.4  
    22.5  char *__acpi_map_table(unsigned long phys_addr, unsigned long size)
    22.6  {
    22.7 -        unsigned int i,j;
    22.8 +	unsigned int i,j;
    22.9  
   22.10 -        j = PAGE_ALIGN(size) >> PAGE_SHIFT;
   22.11 -        for (i = 0; (i < FIX_ACPI_PAGES) && j ; i++, j--) {
   22.12 -                __set_fixmap_ma(FIX_ACPI_END - i,
   22.13 -                                (phys_addr & PAGE_MASK) + (i << PAGE_SHIFT),
   22.14 -                                PAGE_KERNEL);
   22.15 -        }
   22.16 +	j = PAGE_ALIGN(size) >> PAGE_SHIFT;
   22.17 +	for (i = 0; (i < FIX_ACPI_PAGES) && j ; i++, j--) {
   22.18 +		set_fixmap(FIX_ACPI_END - i,
   22.19 +			   (phys_addr & PAGE_MASK) + (i << PAGE_SHIFT));
   22.20 +	}
   22.21  
   22.22 -        return (char *) __fix_to_virt(FIX_ACPI_END) + (phys_addr & ~PAGE_MASK);
   22.23 +	return (char *) __fix_to_virt(FIX_ACPI_END) + (phys_addr & ~PAGE_MASK);
   22.24  }
   22.25 +
   22.26  #else
   22.27  #ifdef	CONFIG_X86_64
   22.28  
   22.29 @@ -523,7 +523,7 @@ acpi_scan_rsdp (
   22.30  {
   22.31  	unsigned long		offset = 0;
   22.32  	unsigned long		sig_len = sizeof("RSD PTR ") - 1;
   22.33 -        unsigned long           vstart = isa_bus_to_virt(start);
   22.34 +	unsigned long		vstart = (unsigned long)isa_bus_to_virt(start);
   22.35  
   22.36  	/*
   22.37  	 * Scan all 16-byte boundaries of the physical memory region for the
   22.38 @@ -649,16 +649,15 @@ acpi_find_rsdp (void)
   22.39  		else if (efi.acpi)
   22.40  			return __pa(efi.acpi);
   22.41  	}
   22.42 -
   22.43  	/*
   22.44  	 * Scan memory looking for the RSDP signature. First search EBDA (low
   22.45  	 * memory) paragraphs and then search upper memory (E0000-FFFFF).
   22.46  	 */
   22.47  	rsdp_phys = acpi_scan_rsdp (0, 0x400);
   22.48  	if (!rsdp_phys)
   22.49 -		rsdp_phys = acpi_scan_rsdp (0xE0000, 0xFFFFF);
   22.50 +		rsdp_phys = acpi_scan_rsdp (0xE0000, 0x20000);
   22.51  
   22.52 -        __set_fixmap_ma(FIX_ACPI_RSDP_PAGE, rsdp_phys, PAGE_KERNEL);
   22.53 +	set_fixmap(FIX_ACPI_RSDP_PAGE, rsdp_phys);
   22.54  
   22.55  	return rsdp_phys;
   22.56  }
   22.57 @@ -673,10 +672,6 @@ acpi_parse_madt_lapic_entries(void)
   22.58  {
   22.59  	int count;
   22.60  
   22.61 -#ifdef CONFIG_XEN
   22.62 -        return 0;
   22.63 -#endif
   22.64 -
   22.65  	/* 
   22.66  	 * Note that the LAPIC address is obtained from the MADT (32-bit value)
   22.67  	 * and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value).
   22.68 @@ -872,7 +867,7 @@ acpi_boot_table_init(void)
   22.69  	}
   22.70  
   22.71  #ifdef __i386__
   22.72 -	//check_acpi_pci();
   22.73 +	check_acpi_pci();
   22.74  #endif
   22.75  
   22.76  	acpi_table_parse(ACPI_BOOT, acpi_parse_sbf);
    23.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/apic.c	Tue May 03 14:42:00 2005 +0000
    23.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/apic.c	Tue May 10 08:49:59 2005 +0000
    23.3 @@ -16,8 +16,32 @@
    23.4  
    23.5  #include <linux/config.h>
    23.6  #include <linux/init.h>
    23.7 -#include <asm/apic.h>
    23.8  
    23.9 +#include <linux/mm.h>
   23.10 +#include <linux/irq.h>
   23.11 +#include <linux/delay.h>
   23.12 +#include <linux/bootmem.h>
   23.13 +#include <linux/smp_lock.h>
   23.14 +#include <linux/interrupt.h>
   23.15 +#include <linux/mc146818rtc.h>
   23.16 +#include <linux/kernel_stat.h>
   23.17 +#include <linux/sysdev.h>
   23.18 +
   23.19 +#include <asm/atomic.h>
   23.20 +#include <asm/smp.h>
   23.21 +#include <asm/mtrr.h>
   23.22 +#include <asm/mpspec.h>
   23.23 +#include <asm/desc.h>
   23.24 +#include <asm/arch_hooks.h>
   23.25 +#include <asm/hpet.h>
   23.26 +
   23.27 +#include <mach_apic.h>
   23.28 +
   23.29 +#include "io_ports.h"
   23.30 +
   23.31 +/*
   23.32 + * Debug level
   23.33 + */
   23.34  int apic_verbosity;
   23.35  
   23.36  int get_physical_broadcast(void)
   23.37 @@ -49,5 +73,11 @@ void ack_bad_irq(unsigned int irq)
   23.38   */
   23.39  int __init APIC_init_uniprocessor (void)
   23.40  {
   23.41 +#ifdef CONFIG_X86_IO_APIC
   23.42 +	if (smp_found_config)
   23.43 +		if (!skip_ioapic_setup && nr_ioapics)
   23.44 +			setup_IO_APIC();
   23.45 +#endif
   23.46 +
   23.47  	return 0;
   23.48  }
    24.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/io_apic.c	Tue May 03 14:42:00 2005 +0000
    24.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/io_apic.c	Tue May 10 08:49:59 2005 +0000
    24.3 @@ -37,19 +37,56 @@
    24.4  #include <asm/smp.h>
    24.5  #include <asm/desc.h>
    24.6  #include <asm/timer.h>
    24.7 -#include <asm/io_apic.h>
    24.8 -#include <asm/apic.h>
    24.9  
   24.10  #include <mach_apic.h>
   24.11  
   24.12  #include "io_ports.h"
   24.13  
   24.14 +#ifdef CONFIG_XEN
   24.15 +
   24.16 +#include <asm-xen/xen-public/xen.h>
   24.17 +#include <asm-xen/xen-public/physdev.h>
   24.18 +
   24.19 +/* Fake i8259 */
   24.20 +#define make_8259A_irq(_irq)     (io_apic_irqs &= ~(1UL<<(_irq)))
   24.21 +#define disable_8259A_irq(_irq)  ((void)0)
   24.22 +#define i8259A_irq_pending(_irq) (0)
   24.23 +
   24.24 +unsigned long io_apic_irqs;
   24.25 +
   24.26 +static inline unsigned int xen_io_apic_read(unsigned int apic, unsigned int reg)
   24.27 +{
   24.28 +	physdev_op_t op;
   24.29 +	int ret;
   24.30 +
   24.31 +	op.cmd = PHYSDEVOP_APIC_READ;
   24.32 +	op.u.apic_op.apic = mp_ioapics[apic].mpc_apicid;
   24.33 +	op.u.apic_op.offset = reg;
   24.34 +	ret = HYPERVISOR_physdev_op(&op);
   24.35 +	if (ret)
   24.36 +		return ret;
   24.37 +	return op.u.apic_op.value;
   24.38 +}
   24.39 +
   24.40 +static inline void xen_io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
   24.41 +{
   24.42 +	physdev_op_t op;
   24.43 +
   24.44 +	op.cmd = PHYSDEVOP_APIC_WRITE;
   24.45 +	op.u.apic_op.apic = mp_ioapics[apic].mpc_apicid;
   24.46 +	op.u.apic_op.offset = reg;
   24.47 +	op.u.apic_op.value = value;
   24.48 +	HYPERVISOR_physdev_op(&op);
   24.49 +}
   24.50 +
   24.51 +#define io_apic_read(a,r)    xen_io_apic_read(a,r)
   24.52 +#define io_apic_write(a,r,v) xen_io_apic_write(a,r,v)
   24.53 +
   24.54 +#endif /* CONFIG_XEN */
   24.55 +
   24.56  int (*ioapic_renumber_irq)(int ioapic, int irq);
   24.57  atomic_t irq_mis_count;
   24.58  
   24.59 -unsigned long io_apic_irqs;
   24.60 -int skip_ioapic_setup;
   24.61 -
   24.62  static DEFINE_SPINLOCK(ioapic_lock);
   24.63  
   24.64  /*
   24.65 @@ -89,24 +126,6 @@ int vector_irq[NR_VECTORS] = { [0 ... NR
   24.66  #define vector_to_irq(vector)	(vector)
   24.67  #endif
   24.68  
   24.69 -
   24.70 -#ifndef CONFIG_SMP
   24.71 -void fastcall send_IPI_self(int vector)
   24.72 -{
   24.73 -     return; 
   24.74 -}
   24.75 -#endif
   24.76 -
   24.77 -int irqbalance_disable(char *str)
   24.78 -{
   24.79 -     return 0; 
   24.80 -}
   24.81 -
   24.82 -void print_IO_APIC(void)
   24.83 -{
   24.84 -     return; 
   24.85 -}
   24.86 -
   24.87  /*
   24.88   * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
   24.89   * shared ISA-space IRQs, so we have to support them. We are super
   24.90 @@ -130,6 +149,583 @@ static void add_pin_to_irq(unsigned int 
   24.91  	entry->pin = pin;
   24.92  }
   24.93  
   24.94 +#ifndef CONFIG_XEN
   24.95 +/*
   24.96 + * Reroute an IRQ to a different pin.
   24.97 + */
   24.98 +static void __init replace_pin_at_irq(unsigned int irq,
   24.99 +				      int oldapic, int oldpin,
  24.100 +				      int newapic, int newpin)
  24.101 +{
  24.102 +	struct irq_pin_list *entry = irq_2_pin + irq;
  24.103 +
  24.104 +	while (1) {
  24.105 +		if (entry->apic == oldapic && entry->pin == oldpin) {
  24.106 +			entry->apic = newapic;
  24.107 +			entry->pin = newpin;
  24.108 +		}
  24.109 +		if (!entry->next)
  24.110 +			break;
  24.111 +		entry = irq_2_pin + entry->next;
  24.112 +	}
  24.113 +}
  24.114 +
  24.115 +static void __modify_IO_APIC_irq (unsigned int irq, unsigned long enable, unsigned long disable)
  24.116 +{
  24.117 +	struct irq_pin_list *entry = irq_2_pin + irq;
  24.118 +	unsigned int pin, reg;
  24.119 +
  24.120 +	for (;;) {
  24.121 +		pin = entry->pin;
  24.122 +		if (pin == -1)
  24.123 +			break;
  24.124 +		reg = io_apic_read(entry->apic, 0x10 + pin*2);
  24.125 +		reg &= ~disable;
  24.126 +		reg |= enable;
  24.127 +		io_apic_modify(entry->apic, 0x10 + pin*2, reg);
  24.128 +		if (!entry->next)
  24.129 +			break;
  24.130 +		entry = irq_2_pin + entry->next;
  24.131 +	}
  24.132 +}
  24.133 +
  24.134 +/* mask = 1 */
  24.135 +static void __mask_IO_APIC_irq (unsigned int irq)
  24.136 +{
  24.137 +	__modify_IO_APIC_irq(irq, 0x00010000, 0);
  24.138 +}
  24.139 +
  24.140 +/* mask = 0 */
  24.141 +static void __unmask_IO_APIC_irq (unsigned int irq)
  24.142 +{
  24.143 +	__modify_IO_APIC_irq(irq, 0, 0x00010000);
  24.144 +}
  24.145 +
  24.146 +/* mask = 1, trigger = 0 */
  24.147 +static void __mask_and_edge_IO_APIC_irq (unsigned int irq)
  24.148 +{
  24.149 +	__modify_IO_APIC_irq(irq, 0x00010000, 0x00008000);
  24.150 +}
  24.151 +
  24.152 +/* mask = 0, trigger = 1 */
  24.153 +static void __unmask_and_level_IO_APIC_irq (unsigned int irq)
  24.154 +{
  24.155 +	__modify_IO_APIC_irq(irq, 0x00008000, 0x00010000);
  24.156 +}
  24.157 +
  24.158 +static void mask_IO_APIC_irq (unsigned int irq)
  24.159 +{
  24.160 +	unsigned long flags;
  24.161 +
  24.162 +	spin_lock_irqsave(&ioapic_lock, flags);
  24.163 +	__mask_IO_APIC_irq(irq);
  24.164 +	spin_unlock_irqrestore(&ioapic_lock, flags);
  24.165 +}
  24.166 +
  24.167 +static void unmask_IO_APIC_irq (unsigned int irq)
  24.168 +{
  24.169 +	unsigned long flags;
  24.170 +
  24.171 +	spin_lock_irqsave(&ioapic_lock, flags);
  24.172 +	__unmask_IO_APIC_irq(irq);
  24.173 +	spin_unlock_irqrestore(&ioapic_lock, flags);
  24.174 +}
  24.175 +
  24.176 +void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
  24.177 +{
  24.178 +	struct IO_APIC_route_entry entry;
  24.179 +	unsigned long flags;
  24.180 +	
  24.181 +	/* Check delivery_mode to be sure we're not clearing an SMI pin */
  24.182 +	spin_lock_irqsave(&ioapic_lock, flags);
  24.183 +	*(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
  24.184 +	*(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
  24.185 +	spin_unlock_irqrestore(&ioapic_lock, flags);
  24.186 +	if (entry.delivery_mode == dest_SMI)
  24.187 +		return;
  24.188 +
  24.189 +	/*
  24.190 +	 * Disable it in the IO-APIC irq-routing table:
  24.191 +	 */
  24.192 +	memset(&entry, 0, sizeof(entry));
  24.193 +	entry.mask = 1;
  24.194 +	spin_lock_irqsave(&ioapic_lock, flags);
  24.195 +	io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0));
  24.196 +	io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1));
  24.197 +	spin_unlock_irqrestore(&ioapic_lock, flags);
  24.198 +}
  24.199 +
  24.200 +static void clear_IO_APIC (void)
  24.201 +{
  24.202 +	int apic, pin;
  24.203 +
  24.204 +	for (apic = 0; apic < nr_ioapics; apic++)
  24.205 +		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
  24.206 +			clear_IO_APIC_pin(apic, pin);
  24.207 +}
  24.208 +
  24.209 +static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
  24.210 +{
  24.211 +	unsigned long flags;
  24.212 +	int pin;
  24.213 +	struct irq_pin_list *entry = irq_2_pin + irq;
  24.214 +	unsigned int apicid_value;
  24.215 +	
  24.216 +	apicid_value = cpu_mask_to_apicid(cpumask);
  24.217 +	/* Prepare to do the io_apic_write */
  24.218 +	apicid_value = apicid_value << 24;
  24.219 +	spin_lock_irqsave(&ioapic_lock, flags);
  24.220 +	for (;;) {
  24.221 +		pin = entry->pin;
  24.222 +		if (pin == -1)
  24.223 +			break;
  24.224 +		io_apic_write(entry->apic, 0x10 + 1 + pin*2, apicid_value);
  24.225 +		if (!entry->next)
  24.226 +			break;
  24.227 +		entry = irq_2_pin + entry->next;
  24.228 +	}
  24.229 +	spin_unlock_irqrestore(&ioapic_lock, flags);
  24.230 +}
  24.231 +#else
  24.232 +#define clear_IO_APIC() ((void)0)
  24.233 +#endif
  24.234 +
  24.235 +#if defined(CONFIG_IRQBALANCE)
  24.236 +# include <asm/processor.h>	/* kernel_thread() */
  24.237 +# include <linux/kernel_stat.h>	/* kstat */
  24.238 +# include <linux/slab.h>		/* kmalloc() */
  24.239 +# include <linux/timer.h>	/* time_after() */
  24.240 + 
  24.241 +# ifdef CONFIG_BALANCED_IRQ_DEBUG
  24.242 +#  define TDprintk(x...) do { printk("<%ld:%s:%d>: ", jiffies, __FILE__, __LINE__); printk(x); } while (0)
  24.243 +#  define Dprintk(x...) do { TDprintk(x); } while (0)
  24.244 +# else
  24.245 +#  define TDprintk(x...) 
  24.246 +#  define Dprintk(x...) 
  24.247 +# endif
  24.248 +
  24.249 +cpumask_t __cacheline_aligned pending_irq_balance_cpumask[NR_IRQS];
  24.250 +
  24.251 +#define IRQBALANCE_CHECK_ARCH -999
  24.252 +static int irqbalance_disabled = IRQBALANCE_CHECK_ARCH;
  24.253 +static int physical_balance = 0;
  24.254 +
  24.255 +struct irq_cpu_info {
  24.256 +	unsigned long * last_irq;
  24.257 +	unsigned long * irq_delta;
  24.258 +	unsigned long irq;
  24.259 +} irq_cpu_data[NR_CPUS];
  24.260 +
  24.261 +#define CPU_IRQ(cpu)		(irq_cpu_data[cpu].irq)
  24.262 +#define LAST_CPU_IRQ(cpu,irq)   (irq_cpu_data[cpu].last_irq[irq])
  24.263 +#define IRQ_DELTA(cpu,irq) 	(irq_cpu_data[cpu].irq_delta[irq])
  24.264 +
  24.265 +#define IDLE_ENOUGH(cpu,now) \
  24.266 +		(idle_cpu(cpu) && ((now) - irq_stat[(cpu)].idle_timestamp > 1))
  24.267 +
  24.268 +#define IRQ_ALLOWED(cpu, allowed_mask)	cpu_isset(cpu, allowed_mask)
  24.269 +
  24.270 +#define CPU_TO_PACKAGEINDEX(i) (first_cpu(cpu_sibling_map[i]))
  24.271 +
  24.272 +#define MAX_BALANCED_IRQ_INTERVAL	(5*HZ)
  24.273 +#define MIN_BALANCED_IRQ_INTERVAL	(HZ/2)
  24.274 +#define BALANCED_IRQ_MORE_DELTA		(HZ/10)
  24.275 +#define BALANCED_IRQ_LESS_DELTA		(HZ)
  24.276 +
  24.277 +long balanced_irq_interval = MAX_BALANCED_IRQ_INTERVAL;
  24.278 +
  24.279 +static unsigned long move(int curr_cpu, cpumask_t allowed_mask,
  24.280 +			unsigned long now, int direction)
  24.281 +{
  24.282 +	int search_idle = 1;
  24.283 +	int cpu = curr_cpu;
  24.284 +
  24.285 +	goto inside;
  24.286 +
  24.287 +	do {
  24.288 +		if (unlikely(cpu == curr_cpu))
  24.289 +			search_idle = 0;
  24.290 +inside:
  24.291 +		if (direction == 1) {
  24.292 +			cpu++;
  24.293 +			if (cpu >= NR_CPUS)
  24.294 +				cpu = 0;
  24.295 +		} else {
  24.296 +			cpu--;
  24.297 +			if (cpu == -1)
  24.298 +				cpu = NR_CPUS-1;
  24.299 +		}
  24.300 +	} while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu,allowed_mask) ||
  24.301 +			(search_idle && !IDLE_ENOUGH(cpu,now)));
  24.302 +
  24.303 +	return cpu;
  24.304 +}
  24.305 +
  24.306 +static inline void balance_irq(int cpu, int irq)
  24.307 +{
  24.308 +	unsigned long now = jiffies;
  24.309 +	cpumask_t allowed_mask;
  24.310 +	unsigned int new_cpu;
  24.311 +		
  24.312 +	if (irqbalance_disabled)
  24.313 +		return; 
  24.314 +
  24.315 +	cpus_and(allowed_mask, cpu_online_map, irq_affinity[irq]);
  24.316 +	new_cpu = move(cpu, allowed_mask, now, 1);
  24.317 +	if (cpu != new_cpu) {
  24.318 +		irq_desc_t *desc = irq_desc + irq;
  24.319 +		unsigned long flags;
  24.320 +
  24.321 +		spin_lock_irqsave(&desc->lock, flags);
  24.322 +		pending_irq_balance_cpumask[irq] = cpumask_of_cpu(new_cpu);
  24.323 +		spin_unlock_irqrestore(&desc->lock, flags);
  24.324 +	}
  24.325 +}
  24.326 +
  24.327 +static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold)
  24.328 +{
  24.329 +	int i, j;
  24.330 +	Dprintk("Rotating IRQs among CPUs.\n");
  24.331 +	for (i = 0; i < NR_CPUS; i++) {
  24.332 +		for (j = 0; cpu_online(i) && (j < NR_IRQS); j++) {
  24.333 +			if (!irq_desc[j].action)
  24.334 +				continue;
  24.335 +			/* Is it a significant load ?  */
  24.336 +			if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i),j) <
  24.337 +						useful_load_threshold)
  24.338 +				continue;
  24.339 +			balance_irq(i, j);
  24.340 +		}
  24.341 +	}
  24.342 +	balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
  24.343 +		balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);	
  24.344 +	return;
  24.345 +}
  24.346 +
  24.347 +static void do_irq_balance(void)
  24.348 +{
  24.349 +	int i, j;
  24.350 +	unsigned long max_cpu_irq = 0, min_cpu_irq = (~0);
  24.351 +	unsigned long move_this_load = 0;
  24.352 +	int max_loaded = 0, min_loaded = 0;
  24.353 +	int load;
  24.354 +	unsigned long useful_load_threshold = balanced_irq_interval + 10;
  24.355 +	int selected_irq;
  24.356 +	int tmp_loaded, first_attempt = 1;
  24.357 +	unsigned long tmp_cpu_irq;
  24.358 +	unsigned long imbalance = 0;
  24.359 +	cpumask_t allowed_mask, target_cpu_mask, tmp;
  24.360 +
  24.361 +	for (i = 0; i < NR_CPUS; i++) {
  24.362 +		int package_index;
  24.363 +		CPU_IRQ(i) = 0;
  24.364 +		if (!cpu_online(i))
  24.365 +			continue;
  24.366 +		package_index = CPU_TO_PACKAGEINDEX(i);
  24.367 +		for (j = 0; j < NR_IRQS; j++) {
  24.368 +			unsigned long value_now, delta;
  24.369 +			/* Is this an active IRQ? */
  24.370 +			if (!irq_desc[j].action)
  24.371 +				continue;
  24.372 +			if ( package_index == i )
  24.373 +				IRQ_DELTA(package_index,j) = 0;
  24.374 +			/* Determine the total count per processor per IRQ */
  24.375 +			value_now = (unsigned long) kstat_cpu(i).irqs[j];
  24.376 +
  24.377 +			/* Determine the activity per processor per IRQ */
  24.378 +			delta = value_now - LAST_CPU_IRQ(i,j);
  24.379 +
  24.380 +			/* Update last_cpu_irq[][] for the next time */
  24.381 +			LAST_CPU_IRQ(i,j) = value_now;
  24.382 +
  24.383 +			/* Ignore IRQs whose rate is less than the clock */
  24.384 +			if (delta < useful_load_threshold)
  24.385 +				continue;
  24.386 +			/* update the load for the processor or package total */
  24.387 +			IRQ_DELTA(package_index,j) += delta;
  24.388 +
  24.389 +			/* Keep track of the higher numbered sibling as well */
  24.390 +			if (i != package_index)
  24.391 +				CPU_IRQ(i) += delta;
  24.392 +			/*
  24.393 +			 * We have sibling A and sibling B in the package
  24.394 +			 *
  24.395 +			 * cpu_irq[A] = load for cpu A + load for cpu B
  24.396 +			 * cpu_irq[B] = load for cpu B
  24.397 +			 */
  24.398 +			CPU_IRQ(package_index) += delta;
  24.399 +		}
  24.400 +	}
  24.401 +	/* Find the least loaded processor package */
  24.402 +	for (i = 0; i < NR_CPUS; i++) {
  24.403 +		if (!cpu_online(i))
  24.404 +			continue;
  24.405 +		if (i != CPU_TO_PACKAGEINDEX(i))
  24.406 +			continue;
  24.407 +		if (min_cpu_irq > CPU_IRQ(i)) {
  24.408 +			min_cpu_irq = CPU_IRQ(i);
  24.409 +			min_loaded = i;
  24.410 +		}
  24.411 +	}
  24.412 +	max_cpu_irq = ULONG_MAX;
  24.413 +
  24.414 +tryanothercpu:
  24.415 +	/* Look for heaviest loaded processor.
  24.416 +	 * We may come back to get the next heaviest loaded processor.
  24.417 +	 * Skip processors with trivial loads.
  24.418 +	 */
  24.419 +	tmp_cpu_irq = 0;
  24.420 +	tmp_loaded = -1;
  24.421 +	for (i = 0; i < NR_CPUS; i++) {
  24.422 +		if (!cpu_online(i))
  24.423 +			continue;
  24.424 +		if (i != CPU_TO_PACKAGEINDEX(i))
  24.425 +			continue;
  24.426 +		if (max_cpu_irq <= CPU_IRQ(i)) 
  24.427 +			continue;
  24.428 +		if (tmp_cpu_irq < CPU_IRQ(i)) {
  24.429 +			tmp_cpu_irq = CPU_IRQ(i);
  24.430 +			tmp_loaded = i;
  24.431 +		}
  24.432 +	}
  24.433 +
  24.434 +	if (tmp_loaded == -1) {
  24.435 + 	 /* In the case of small number of heavy interrupt sources, 
  24.436 +	  * loading some of the cpus too much. We use Ingo's original 
  24.437 +	  * approach to rotate them around.
  24.438 +	  */
  24.439 +		if (!first_attempt && imbalance >= useful_load_threshold) {
  24.440 +			rotate_irqs_among_cpus(useful_load_threshold);
  24.441 +			return;
  24.442 +		}
  24.443 +		goto not_worth_the_effort;
  24.444 +	}
  24.445 +	
  24.446 +	first_attempt = 0;		/* heaviest search */
  24.447 +	max_cpu_irq = tmp_cpu_irq;	/* load */
  24.448 +	max_loaded = tmp_loaded;	/* processor */
  24.449 +	imbalance = (max_cpu_irq - min_cpu_irq) / 2;
  24.450 +	
  24.451 +	Dprintk("max_loaded cpu = %d\n", max_loaded);
  24.452 +	Dprintk("min_loaded cpu = %d\n", min_loaded);
  24.453 +	Dprintk("max_cpu_irq load = %ld\n", max_cpu_irq);
  24.454 +	Dprintk("min_cpu_irq load = %ld\n", min_cpu_irq);
  24.455 +	Dprintk("load imbalance = %lu\n", imbalance);
  24.456 +
  24.457 +	/* if imbalance is less than approx 10% of max load, then
  24.458 +	 * observe diminishing returns action. - quit
  24.459 +	 */
  24.460 +	if (imbalance < (max_cpu_irq >> 3)) {
  24.461 +		Dprintk("Imbalance too trivial\n");
  24.462 +		goto not_worth_the_effort;
  24.463 +	}
  24.464 +
  24.465 +tryanotherirq:
  24.466 +	/* if we select an IRQ to move that can't go where we want, then
  24.467 +	 * see if there is another one to try.
  24.468 +	 */
  24.469 +	move_this_load = 0;
  24.470 +	selected_irq = -1;
  24.471 +	for (j = 0; j < NR_IRQS; j++) {
  24.472 +		/* Is this an active IRQ? */
  24.473 +		if (!irq_desc[j].action)
  24.474 +			continue;
  24.475 +		if (imbalance <= IRQ_DELTA(max_loaded,j))
  24.476 +			continue;
  24.477 +		/* Try to find the IRQ that is closest to the imbalance
  24.478 +		 * without going over.
  24.479 +		 */
  24.480 +		if (move_this_load < IRQ_DELTA(max_loaded,j)) {
  24.481 +			move_this_load = IRQ_DELTA(max_loaded,j);
  24.482 +			selected_irq = j;
  24.483 +		}
  24.484 +	}
  24.485 +	if (selected_irq == -1) {
  24.486 +		goto tryanothercpu;
  24.487 +	}
  24.488 +
  24.489 +	imbalance = move_this_load;
  24.490 +	
  24.491 +	/* For physical_balance case, we accumlated both load
  24.492 +	 * values in the one of the siblings cpu_irq[],
  24.493 +	 * to use the same code for physical and logical processors
  24.494 +	 * as much as possible. 
  24.495 +	 *
  24.496 +	 * NOTE: the cpu_irq[] array holds the sum of the load for
  24.497 +	 * sibling A and sibling B in the slot for the lowest numbered
  24.498 +	 * sibling (A), _AND_ the load for sibling B in the slot for
  24.499 +	 * the higher numbered sibling.
  24.500 +	 *
  24.501 +	 * We seek the least loaded sibling by making the comparison
  24.502 +	 * (A+B)/2 vs B
  24.503 +	 */
  24.504 +	load = CPU_IRQ(min_loaded) >> 1;
  24.505 +	for_each_cpu_mask(j, cpu_sibling_map[min_loaded]) {
  24.506 +		if (load > CPU_IRQ(j)) {
  24.507 +			/* This won't change cpu_sibling_map[min_loaded] */
  24.508 +			load = CPU_IRQ(j);
  24.509 +			min_loaded = j;
  24.510 +		}
  24.511 +	}
  24.512 +
  24.513 +	cpus_and(allowed_mask, cpu_online_map, irq_affinity[selected_irq]);
  24.514 +	target_cpu_mask = cpumask_of_cpu(min_loaded);
  24.515 +	cpus_and(tmp, target_cpu_mask, allowed_mask);
  24.516 +
  24.517 +	if (!cpus_empty(tmp)) {
  24.518 +		irq_desc_t *desc = irq_desc + selected_irq;
  24.519 +		unsigned long flags;
  24.520 +
  24.521 +		Dprintk("irq = %d moved to cpu = %d\n",
  24.522 +				selected_irq, min_loaded);
  24.523 +		/* mark for change destination */
  24.524 +		spin_lock_irqsave(&desc->lock, flags);
  24.525 +		pending_irq_balance_cpumask[selected_irq] =
  24.526 +					cpumask_of_cpu(min_loaded);
  24.527 +		spin_unlock_irqrestore(&desc->lock, flags);
  24.528 +		/* Since we made a change, come back sooner to 
  24.529 +		 * check for more variation.
  24.530 +		 */
  24.531 +		balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
  24.532 +			balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);	
  24.533 +		return;
  24.534 +	}
  24.535 +	goto tryanotherirq;
  24.536 +
  24.537 +not_worth_the_effort:
  24.538 +	/*
  24.539 +	 * if we did not find an IRQ to move, then adjust the time interval
  24.540 +	 * upward
  24.541 +	 */
  24.542 +	balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL,
  24.543 +		balanced_irq_interval + BALANCED_IRQ_MORE_DELTA);	
  24.544 +	Dprintk("IRQ worth rotating not found\n");
  24.545 +	return;
  24.546 +}
  24.547 +
  24.548 +static int balanced_irq(void *unused)
  24.549 +{
  24.550 +	int i;
  24.551 +	unsigned long prev_balance_time = jiffies;
  24.552 +	long time_remaining = balanced_irq_interval;
  24.553 +
  24.554 +	daemonize("kirqd");
  24.555 +	
  24.556 +	/* push everything to CPU 0 to give us a starting point.  */
  24.557 +	for (i = 0 ; i < NR_IRQS ; i++) {
  24.558 +		pending_irq_balance_cpumask[i] = cpumask_of_cpu(0);
  24.559 +	}
  24.560 +
  24.561 +	for ( ; ; ) {
  24.562 +		set_current_state(TASK_INTERRUPTIBLE);
  24.563 +		time_remaining = schedule_timeout(time_remaining);
  24.564 +		try_to_freeze(PF_FREEZE);
  24.565 +		if (time_after(jiffies,
  24.566 +				prev_balance_time+balanced_irq_interval)) {
  24.567 +			do_irq_balance();
  24.568 +			prev_balance_time = jiffies;
  24.569 +			time_remaining = balanced_irq_interval;
  24.570 +		}
  24.571 +	}
  24.572 +	return 0;
  24.573 +}
  24.574 +
  24.575 +static int __init balanced_irq_init(void)
  24.576 +{
  24.577 +	int i;
  24.578 +	struct cpuinfo_x86 *c;
  24.579 +	cpumask_t tmp;
  24.580 +
  24.581 +	cpus_shift_right(tmp, cpu_online_map, 2);
  24.582 +        c = &boot_cpu_data;
  24.583 +	/* When not overwritten by the command line ask subarchitecture. */
  24.584 +	if (irqbalance_disabled == IRQBALANCE_CHECK_ARCH)
  24.585 +		irqbalance_disabled = NO_BALANCE_IRQ;
  24.586 +	if (irqbalance_disabled)
  24.587 +		return 0;
  24.588 +	
  24.589 +	 /* disable irqbalance completely if there is only one processor online */
  24.590 +	if (num_online_cpus() < 2) {
  24.591 +		irqbalance_disabled = 1;
  24.592 +		return 0;
  24.593 +	}
  24.594 +	/*
  24.595 +	 * Enable physical balance only if more than 1 physical processor
  24.596 +	 * is present
  24.597 +	 */
  24.598 +	if (smp_num_siblings > 1 && !cpus_empty(tmp))
  24.599 +		physical_balance = 1;
  24.600 +
  24.601 +	for (i = 0; i < NR_CPUS; i++) {
  24.602 +		if (!cpu_online(i))
  24.603 +			continue;
  24.604 +		irq_cpu_data[i].irq_delta = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
  24.605 +		irq_cpu_data[i].last_irq = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
  24.606 +		if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) {
  24.607 +			printk(KERN_ERR "balanced_irq_init: out of memory");
  24.608 +			goto failed;
  24.609 +		}
  24.610 +		memset(irq_cpu_data[i].irq_delta,0,sizeof(unsigned long) * NR_IRQS);
  24.611 +		memset(irq_cpu_data[i].last_irq,0,sizeof(unsigned long) * NR_IRQS);
  24.612 +	}
  24.613 +	
  24.614 +	printk(KERN_INFO "Starting balanced_irq\n");
  24.615 +	if (kernel_thread(balanced_irq, NULL, CLONE_KERNEL) >= 0) 
  24.616 +		return 0;
  24.617 +	else 
  24.618 +		printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
  24.619 +failed:
  24.620 +	for (i = 0; i < NR_CPUS; i++) {
  24.621 +		if(irq_cpu_data[i].irq_delta)
  24.622 +			kfree(irq_cpu_data[i].irq_delta);
  24.623 +		if(irq_cpu_data[i].last_irq)
  24.624 +			kfree(irq_cpu_data[i].last_irq);
  24.625 +	}
  24.626 +	return 0;
  24.627 +}
  24.628 +
  24.629 +int __init irqbalance_disable(char *str)
  24.630 +{
  24.631 +	irqbalance_disabled = 1;
  24.632 +	return 0;
  24.633 +}
  24.634 +
  24.635 +__setup("noirqbalance", irqbalance_disable);
  24.636 +
  24.637 +static inline void move_irq(int irq)
  24.638 +{
  24.639 +	/* note - we hold the desc->lock */
  24.640 +	if (unlikely(!cpus_empty(pending_irq_balance_cpumask[irq]))) {
  24.641 +		set_ioapic_affinity_irq(irq, pending_irq_balance_cpumask[irq]);
  24.642 +		cpus_clear(pending_irq_balance_cpumask[irq]);
  24.643 +	}
  24.644 +}
  24.645 +
  24.646 +late_initcall(balanced_irq_init);
  24.647 +
  24.648 +#else /* !CONFIG_IRQBALANCE */
  24.649 +static inline void move_irq(int irq) { }
  24.650 +#endif /* CONFIG_IRQBALANCE */
  24.651 +
  24.652 +#ifndef CONFIG_SMP
  24.653 +void fastcall send_IPI_self(int vector)
  24.654 +{
  24.655 +#ifndef CONFIG_XEN
  24.656 +	unsigned int cfg;
  24.657 +
  24.658 +	/*
  24.659 +	 * Wait for idle.
  24.660 +	 */
  24.661 +	apic_wait_icr_idle();
  24.662 +	cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL;
  24.663 +	/*
  24.664 +	 * Send the IPI. The write to APIC_ICR fires this off.
  24.665 +	 */
  24.666 +	apic_write_around(APIC_ICR, cfg);
  24.667 +#endif
  24.668 +}
  24.669 +#endif /* !CONFIG_SMP */
  24.670 +
  24.671  /*
  24.672   * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
  24.673   * specific CPU-side IRQs.
  24.674 @@ -138,6 +734,88 @@ static void add_pin_to_irq(unsigned int 
  24.675  #define MAX_PIRQS 8
  24.676  int pirq_entries [MAX_PIRQS];
  24.677  int pirqs_enabled;
  24.678 +int skip_ioapic_setup;
  24.679 +
  24.680 +static int __init ioapic_setup(char *str)
  24.681 +{
  24.682 +	skip_ioapic_setup = 1;
  24.683 +	return 1;
  24.684 +}
  24.685 +
  24.686 +__setup("noapic", ioapic_setup);
  24.687 +
  24.688 +static int __init ioapic_pirq_setup(char *str)
  24.689 +{
  24.690 +	int i, max;
  24.691 +	int ints[MAX_PIRQS+1];
  24.692 +
  24.693 +	get_options(str, ARRAY_SIZE(ints), ints);
  24.694 +
  24.695 +	for (i = 0; i < MAX_PIRQS; i++)
  24.696 +		pirq_entries[i] = -1;
  24.697 +
  24.698 +	pirqs_enabled = 1;
  24.699 +	apic_printk(APIC_VERBOSE, KERN_INFO
  24.700 +			"PIRQ redirection, working around broken MP-BIOS.\n");
  24.701 +	max = MAX_PIRQS;
  24.702 +	if (ints[0] < MAX_PIRQS)
  24.703 +		max = ints[0];
  24.704 +
  24.705 +	for (i = 0; i < max; i++) {
  24.706 +		apic_printk(APIC_VERBOSE, KERN_DEBUG
  24.707 +				"... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
  24.708 +		/*
  24.709 +		 * PIRQs are mapped upside down, usually.
  24.710 +		 */
  24.711 +		pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
  24.712 +	}
  24.713 +	return 1;
  24.714 +}
  24.715 +
  24.716 +__setup("pirq=", ioapic_pirq_setup);
  24.717 +
  24.718 +/*
  24.719 + * Find the IRQ entry number of a certain pin.
  24.720 + */
  24.721 +static int find_irq_entry(int apic, int pin, int type)
  24.722 +{
  24.723 +	int i;
  24.724 +
  24.725 +	for (i = 0; i < mp_irq_entries; i++)
  24.726 +		if (mp_irqs[i].mpc_irqtype == type &&
  24.727 +		    (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid ||
  24.728 +		     mp_irqs[i].mpc_dstapic == MP_APIC_ALL) &&
  24.729 +		    mp_irqs[i].mpc_dstirq == pin)
  24.730 +			return i;
  24.731 +
  24.732 +	return -1;
  24.733 +}
  24.734 +
  24.735 +#ifndef CONFIG_XEN
  24.736 +/*
  24.737 + * Find the pin to which IRQ[irq] (ISA) is connected
  24.738 + */
  24.739 +static int find_isa_irq_pin(int irq, int type)
  24.740 +{
  24.741 +	int i;
  24.742 +
  24.743 +	for (i = 0; i < mp_irq_entries; i++) {
  24.744 +		int lbus = mp_irqs[i].mpc_srcbus;
  24.745 +
  24.746 +		if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
  24.747 +		     mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
  24.748 +		     mp_bus_id_to_type[lbus] == MP_BUS_MCA ||
  24.749 +		     mp_bus_id_to_type[lbus] == MP_BUS_NEC98
  24.750 +		    ) &&
  24.751 +		    (mp_irqs[i].mpc_irqtype == type) &&
  24.752 +		    (mp_irqs[i].mpc_srcbusirq == irq))
  24.753 +
  24.754 +			return mp_irqs[i].mpc_dstirq;
  24.755 +	}
  24.756 +	return -1;
  24.757 +}
  24.758 +#endif
  24.759 +
  24.760  /*
  24.761   * Find a specific PCI IRQ entry.
  24.762   * Not an __init, possibly needed by modules
  24.763 @@ -184,6 +862,236 @@ int IO_APIC_get_PCI_irq_vector(int bus, 
  24.764  	return best_guess;
  24.765  }
  24.766  
  24.767 +#ifndef CONFIG_XEN
  24.768 +/*
  24.769 + * This function currently is only a helper for the i386 smp boot process where 
  24.770 + * we need to reprogram the ioredtbls to cater for the cpus which have come online
  24.771 + * so mask in all cases should simply be TARGET_CPUS
  24.772 + */
  24.773 +void __init setup_ioapic_dest(void)
  24.774 +{
  24.775 +	int pin, ioapic, irq, irq_entry;
  24.776 +
  24.777 +	if (skip_ioapic_setup == 1)
  24.778 +		return;
  24.779 +
  24.780 +	for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
  24.781 +		for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
  24.782 +			irq_entry = find_irq_entry(ioapic, pin, mp_INT);
  24.783 +			if (irq_entry == -1)
  24.784 +				continue;
  24.785 +			irq = pin_2_irq(irq_entry, ioapic, pin);
  24.786 +			set_ioapic_affinity_irq(irq, TARGET_CPUS);
  24.787 +		}
  24.788 +
  24.789 +	}
  24.790 +}
  24.791 +#endif /* !CONFIG_XEN */
  24.792 +
  24.793 +/*
  24.794 + * EISA Edge/Level control register, ELCR
  24.795 + */
  24.796 +static int EISA_ELCR(unsigned int irq)
  24.797 +{
  24.798 +	if (irq < 16) {
  24.799 +		unsigned int port = 0x4d0 + (irq >> 3);
  24.800 +		return (inb(port) >> (irq & 7)) & 1;
  24.801 +	}
  24.802 +	apic_printk(APIC_VERBOSE, KERN_INFO
  24.803 +			"Broken MPtable reports ISA irq %d\n", irq);
  24.804 +	return 0;
  24.805 +}
  24.806 +
  24.807 +/* EISA interrupts are always polarity zero and can be edge or level
  24.808 + * trigger depending on the ELCR value.  If an interrupt is listed as
  24.809 + * EISA conforming in the MP table, that means its trigger type must
  24.810 + * be read in from the ELCR */
  24.811 +
  24.812 +#define default_EISA_trigger(idx)	(EISA_ELCR(mp_irqs[idx].mpc_srcbusirq))
  24.813 +#define default_EISA_polarity(idx)	(0)
  24.814 +
  24.815 +/* ISA interrupts are always polarity zero edge triggered,
  24.816 + * when listed as conforming in the MP table. */
  24.817 +
  24.818 +#define default_ISA_trigger(idx)	(0)
  24.819 +#define default_ISA_polarity(idx)	(0)
  24.820 +
  24.821 +/* PCI interrupts are always polarity one level triggered,
  24.822 + * when listed as conforming in the MP table. */
  24.823 +
  24.824 +#define default_PCI_trigger(idx)	(1)
  24.825 +#define default_PCI_polarity(idx)	(1)
  24.826 +
  24.827 +/* MCA interrupts are always polarity zero level triggered,
  24.828 + * when listed as conforming in the MP table. */
  24.829 +
  24.830 +#define default_MCA_trigger(idx)	(1)
  24.831 +#define default_MCA_polarity(idx)	(0)
  24.832 +
  24.833 +/* NEC98 interrupts are always polarity zero edge triggered,
  24.834 + * when listed as conforming in the MP table. */
  24.835 +
  24.836 +#define default_NEC98_trigger(idx)     (0)
  24.837 +#define default_NEC98_polarity(idx)    (0)
  24.838 +
  24.839 +static int __init MPBIOS_polarity(int idx)
  24.840 +{
  24.841 +	int bus = mp_irqs[idx].mpc_srcbus;
  24.842 +	int polarity;
  24.843 +
  24.844 +	/*
  24.845 +	 * Determine IRQ line polarity (high active or low active):
  24.846 +	 */
  24.847 +	switch (mp_irqs[idx].mpc_irqflag & 3)
  24.848 +	{
  24.849 +		case 0: /* conforms, ie. bus-type dependent polarity */
  24.850 +		{
  24.851 +			switch (mp_bus_id_to_type[bus])
  24.852 +			{
  24.853 +				case MP_BUS_ISA: /* ISA pin */
  24.854 +				{
  24.855 +					polarity = default_ISA_polarity(idx);
  24.856 +					break;
  24.857 +				}
  24.858 +				case MP_BUS_EISA: /* EISA pin */
  24.859 +				{
  24.860 +					polarity = default_EISA_polarity(idx);
  24.861 +					break;
  24.862 +				}
  24.863 +				case MP_BUS_PCI: /* PCI pin */
  24.864 +				{
  24.865 +					polarity = default_PCI_polarity(idx);
  24.866 +					break;
  24.867 +				}
  24.868 +				case MP_BUS_MCA: /* MCA pin */
  24.869 +				{
  24.870 +					polarity = default_MCA_polarity(idx);
  24.871 +					break;
  24.872 +				}
  24.873 +				case MP_BUS_NEC98: /* NEC 98 pin */
  24.874 +				{
  24.875 +					polarity = default_NEC98_polarity(idx);
  24.876 +					break;
  24.877 +				}
  24.878 +				default:
  24.879 +				{
  24.880 +					printk(KERN_WARNING "broken BIOS!!\n");
  24.881 +					polarity = 1;
  24.882 +					break;
  24.883 +				}
  24.884 +			}
  24.885 +			break;
  24.886 +		}
  24.887 +		case 1: /* high active */
  24.888 +		{
  24.889 +			polarity = 0;
  24.890 +			break;
  24.891 +		}
  24.892 +		case 2: /* reserved */
  24.893 +		{
  24.894 +			printk(KERN_WARNING "broken BIOS!!\n");
  24.895 +			polarity = 1;
  24.896 +			break;
  24.897 +		}
  24.898 +		case 3: /* low active */
  24.899 +		{
  24.900 +			polarity = 1;
  24.901 +			break;
  24.902 +		}
  24.903 +		default: /* invalid */
  24.904 +		{
  24.905 +			printk(KERN_WARNING "broken BIOS!!\n");
  24.906 +			polarity = 1;
  24.907 +			break;
  24.908 +		}
  24.909 +	}
  24.910 +	return polarity;
  24.911 +}
  24.912 +
  24.913 +static int MPBIOS_trigger(int idx)
  24.914 +{
  24.915 +	int bus = mp_irqs[idx].mpc_srcbus;
  24.916 +	int trigger;
  24.917 +
  24.918 +	/*
  24.919 +	 * Determine IRQ trigger mode (edge or level sensitive):
  24.920 +	 */
  24.921 +	switch ((mp_irqs[idx].mpc_irqflag>>2) & 3)
  24.922 +	{
  24.923 +		case 0: /* conforms, ie. bus-type dependent */
  24.924 +		{
  24.925 +			switch (mp_bus_id_to_type[bus])
  24.926 +			{
  24.927 +				case MP_BUS_ISA: /* ISA pin */
  24.928 +				{
  24.929 +					trigger = default_ISA_trigger(idx);
  24.930 +					break;
  24.931 +				}
  24.932 +				case MP_BUS_EISA: /* EISA pin */
  24.933 +				{
  24.934 +					trigger = default_EISA_trigger(idx);
  24.935 +					break;
  24.936 +				}
  24.937 +				case MP_BUS_PCI: /* PCI pin */
  24.938 +				{
  24.939 +					trigger = default_PCI_trigger(idx);
  24.940 +					break;
  24.941 +				}
  24.942 +				case MP_BUS_MCA: /* MCA pin */
  24.943 +				{
  24.944 +					trigger = default_MCA_trigger(idx);
  24.945 +					break;
  24.946 +				}
  24.947 +				case MP_BUS_NEC98: /* NEC 98 pin */
  24.948 +				{
  24.949 +					trigger = default_NEC98_trigger(idx);
  24.950 +					break;
  24.951 +				}
  24.952 +				default:
  24.953 +				{
  24.954 +					printk(KERN_WARNING "broken BIOS!!\n");
  24.955 +					trigger = 1;
  24.956 +					break;
  24.957 +				}
  24.958 +			}
  24.959 +			break;
  24.960 +		}
  24.961 +		case 1: /* edge */
  24.962 +		{
  24.963 +			trigger = 0;
  24.964 +			break;
  24.965 +		}
  24.966 +		case 2: /* reserved */
  24.967 +		{
  24.968 +			printk(KERN_WARNING "broken BIOS!!\n");
  24.969 +			trigger = 1;
  24.970 +			break;
  24.971 +		}
  24.972 +		case 3: /* level */
  24.973 +		{
  24.974 +			trigger = 1;
  24.975 +			break;
  24.976 +		}
  24.977 +		default: /* invalid */
  24.978 +		{
  24.979 +			printk(KERN_WARNING "broken BIOS!!\n");
  24.980 +			trigger = 0;
  24.981 +			break;
  24.982 +		}
  24.983 +	}
  24.984 +	return trigger;
  24.985 +}
  24.986 +
  24.987 +static inline int irq_polarity(int idx)
  24.988 +{
  24.989 +	return MPBIOS_polarity(idx);
  24.990 +}
  24.991 +
  24.992 +static inline int irq_trigger(int idx)
  24.993 +{
  24.994 +	return MPBIOS_trigger(idx);
  24.995 +}
  24.996 +
  24.997  static int pin_2_irq(int idx, int apic, int pin)
  24.998  {
  24.999  	int irq, i;
 24.1000 @@ -250,26 +1158,41 @@ static int pin_2_irq(int idx, int apic, 
 24.1001  	return irq;
 24.1002  }
 24.1003  
 24.1004 +static inline int IO_APIC_irq_trigger(int irq)
 24.1005 +{
 24.1006 +	int apic, idx, pin;
 24.1007 +
 24.1008 +	for (apic = 0; apic < nr_ioapics; apic++) {
 24.1009 +		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
 24.1010 +			idx = find_irq_entry(apic,pin,mp_INT);
 24.1011 +			if ((idx != -1) && (irq == pin_2_irq(idx,apic,pin)))
 24.1012 +				return irq_trigger(idx);
 24.1013 +		}
 24.1014 +	}
 24.1015 +	/*
 24.1016 +	 * nonexistent IRQs are edge default
 24.1017 +	 */
 24.1018 +	return 0;
 24.1019 +}
 24.1020 +
 24.1021  /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
 24.1022 -u8 irq_vector[NR_IRQ_VECTORS] = { FIRST_DEVICE_VECTOR , 0 };
 24.1023 +u8 irq_vector[NR_IRQ_VECTORS]; /* = { FIRST_DEVICE_VECTOR , 0 }; */
 24.1024  
 24.1025  int assign_irq_vector(int irq)
 24.1026  {
 24.1027  	static int current_vector = FIRST_DEVICE_VECTOR;
 24.1028 -        physdev_op_t op;
 24.1029 -        int ret;
 24.1030 +	physdev_op_t op;
 24.1031  
 24.1032  	BUG_ON(irq >= NR_IRQ_VECTORS);
 24.1033  	if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0)
 24.1034  		return IO_APIC_VECTOR(irq);
 24.1035  
 24.1036 -        op.cmd = PHYSDEVOP_ASSIGN_VECTOR;
 24.1037 -        op.u.irq_op.irq = irq;
 24.1038 -        ret = HYPERVISOR_physdev_op(&op);
 24.1039 -        if (ret)
 24.1040 -            return -ENOSPC;
 24.1041 +	op.cmd = PHYSDEVOP_ASSIGN_VECTOR;
 24.1042 +	op.u.irq_op.irq = irq;
 24.1043 +	if (HYPERVISOR_physdev_op(&op))
 24.1044 +		return -ENOSPC;
 24.1045 +	current_vector = op.u.irq_op.vector;
 24.1046  
 24.1047 -        current_vector = op.u.irq_op.vector;
 24.1048  	vector_irq[current_vector] = irq;
 24.1049  	if (irq != AUTO_ASSIGN)
 24.1050  		IO_APIC_VECTOR(irq) = current_vector;
 24.1051 @@ -277,12 +1200,1268 @@ int assign_irq_vector(int irq)
 24.1052  	return current_vector;
 24.1053  }
 24.1054  
 24.1055 +#ifndef CONFIG_XEN
 24.1056 +static struct hw_interrupt_type ioapic_level_type;
 24.1057 +static struct hw_interrupt_type ioapic_edge_type;
 24.1058 +
 24.1059 +#define IOAPIC_AUTO	-1
 24.1060 +#define IOAPIC_EDGE	0
 24.1061 +#define IOAPIC_LEVEL	1
 24.1062 +
 24.1063 +static inline void ioapic_register_intr(int irq, int vector, unsigned long trigger)
 24.1064 +{
 24.1065 +	if (use_pci_vector() && !platform_legacy_irq(irq)) {
 24.1066 +		if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
 24.1067 +				trigger == IOAPIC_LEVEL)
 24.1068 +			irq_desc[vector].handler = &ioapic_level_type;
 24.1069 +		else
 24.1070 +			irq_desc[vector].handler = &ioapic_edge_type;
 24.1071 +		set_intr_gate(vector, interrupt[vector]);
 24.1072 +	} else	{
 24.1073 +		if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
 24.1074 +				trigger == IOAPIC_LEVEL)
 24.1075 +			irq_desc[irq].handler = &ioapic_level_type;
 24.1076 +		else
 24.1077 +			irq_desc[irq].handler = &ioapic_edge_type;
 24.1078 +		set_intr_gate(vector, interrupt[irq]);
 24.1079 +	}
 24.1080 +}
 24.1081 +#else
 24.1082 +#define ioapic_register_intr(_irq,_vector,_trigger) ((void)0)
 24.1083 +#endif
 24.1084 +
 24.1085 +void __init setup_IO_APIC_irqs(void)
 24.1086 +{
 24.1087 +	struct IO_APIC_route_entry entry;
 24.1088 +	int apic, pin, idx, irq, first_notcon = 1, vector;
 24.1089 +	unsigned long flags;
 24.1090 +
 24.1091 +	apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
 24.1092 +
 24.1093 +	for (apic = 0; apic < nr_ioapics; apic++) {
 24.1094 +	for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
 24.1095 +
 24.1096 +		/*
 24.1097 +		 * add it to the IO-APIC irq-routing table:
 24.1098 +		 */
 24.1099 +		memset(&entry,0,sizeof(entry));
 24.1100 +
 24.1101 +		entry.delivery_mode = INT_DELIVERY_MODE;
 24.1102 +		entry.dest_mode = INT_DEST_MODE;
 24.1103 +		entry.mask = 0;				/* enable IRQ */
 24.1104 +		entry.dest.logical.logical_dest = 
 24.1105 +					cpu_mask_to_apicid(TARGET_CPUS);
 24.1106 +
 24.1107 +		idx = find_irq_entry(apic,pin,mp_INT);
 24.1108 +		if (idx == -1) {
 24.1109 +			if (first_notcon) {
 24.1110 +				apic_printk(APIC_VERBOSE, KERN_DEBUG
 24.1111 +						" IO-APIC (apicid-pin) %d-%d",
 24.1112 +						mp_ioapics[apic].mpc_apicid,
 24.1113 +						pin);
 24.1114 +				first_notcon = 0;
 24.1115 +			} else
 24.1116 +				apic_printk(APIC_VERBOSE, ", %d-%d",
 24.1117 +					mp_ioapics[apic].mpc_apicid, pin);
 24.1118 +			continue;
 24.1119 +		}
 24.1120 +
 24.1121 +		entry.trigger = irq_trigger(idx);
 24.1122 +		entry.polarity = irq_polarity(idx);
 24.1123 +
 24.1124 +		if (irq_trigger(idx)) {
 24.1125 +			entry.trigger = 1;
 24.1126 +			entry.mask = 1;
 24.1127 +		}
 24.1128 +
 24.1129 +		irq = pin_2_irq(idx, apic, pin);
 24.1130 +		/*
 24.1131 +		 * skip adding the timer int on secondary nodes, which causes
 24.1132 +		 * a small but painful rift in the time-space continuum
 24.1133 +		 */
 24.1134 +		if (multi_timer_check(apic, irq))
 24.1135 +			continue;
 24.1136 +		else
 24.1137 +			add_pin_to_irq(irq, apic, pin);
 24.1138 +
 24.1139 +		if (/*!apic &&*/ !IO_APIC_IRQ(irq))
 24.1140 +			continue;
 24.1141 +
 24.1142 +		if (IO_APIC_IRQ(irq)) {
 24.1143 +			vector = assign_irq_vector(irq);
 24.1144 +			entry.vector = vector;
 24.1145 +			ioapic_register_intr(irq, vector, IOAPIC_AUTO);
 24.1146 +		
 24.1147 +			if (!apic && (irq < 16))
 24.1148 +				disable_8259A_irq(irq);
 24.1149 +		}
 24.1150 +		spin_lock_irqsave(&ioapic_lock, flags);
 24.1151 +		io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
 24.1152 +		io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
 24.1153 +		spin_unlock_irqrestore(&ioapic_lock, flags);
 24.1154 +	}
 24.1155 +	}
 24.1156 +
 24.1157 +	if (!first_notcon)
 24.1158 +		apic_printk(APIC_VERBOSE, " not connected.\n");
 24.1159 +}
 24.1160 +
 24.1161 +/*
 24.1162 + * Set up the 8259A-master output pin:
 24.1163 + */
 24.1164 +#ifndef CONFIG_XEN
 24.1165 +void __init setup_ExtINT_IRQ0_pin(unsigned int pin, int vector)
 24.1166 +{
 24.1167 +	struct IO_APIC_route_entry entry;
 24.1168 +	unsigned long flags;
 24.1169 +
 24.1170 +	memset(&entry,0,sizeof(entry));
 24.1171 +
 24.1172 +	disable_8259A_irq(0);
 24.1173 +
 24.1174 +	/* mask LVT0 */
 24.1175 +	apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
 24.1176 +
 24.1177 +	/*
 24.1178 +	 * We use logical delivery to get the timer IRQ
 24.1179 +	 * to the first CPU.
 24.1180 +	 */
 24.1181 +	entry.dest_mode = INT_DEST_MODE;
 24.1182 +	entry.mask = 0;					/* unmask IRQ now */
 24.1183 +	entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
 24.1184 +	entry.delivery_mode = INT_DELIVERY_MODE;
 24.1185 +	entry.polarity = 0;
 24.1186 +	entry.trigger = 0;
 24.1187 +	entry.vector = vector;
 24.1188 +
 24.1189 +	/*
 24.1190 +	 * The timer IRQ doesn't have to know that behind the
 24.1191 +	 * scene we have a 8259A-master in AEOI mode ...
 24.1192 +	 */
 24.1193 +	irq_desc[0].handler = &ioapic_edge_type;
 24.1194 +
 24.1195 +	/*
 24.1196 +	 * Add it to the IO-APIC irq-routing table:
 24.1197 +	 */
 24.1198 +	spin_lock_irqsave(&ioapic_lock, flags);
 24.1199 +	io_apic_write(0, 0x11+2*pin, *(((int *)&entry)+1));
 24.1200 +	io_apic_write(0, 0x10+2*pin, *(((int *)&entry)+0));
 24.1201 +	spin_unlock_irqrestore(&ioapic_lock, flags);
 24.1202 +
 24.1203 +	enable_8259A_irq(0);
 24.1204 +}
 24.1205 +
 24.1206 +static inline void UNEXPECTED_IO_APIC(void)
 24.1207 +{
 24.1208 +}
 24.1209 +
 24.1210 +void __init print_IO_APIC(void)
 24.1211 +{
 24.1212 +	int apic, i;
 24.1213 +	union IO_APIC_reg_00 reg_00;
 24.1214 +	union IO_APIC_reg_01 reg_01;
 24.1215 +	union IO_APIC_reg_02 reg_02;
 24.1216 +	union IO_APIC_reg_03 reg_03;
 24.1217 +	unsigned long flags;
 24.1218 +
 24.1219 +	if (apic_verbosity == APIC_QUIET)
 24.1220 +		return;
 24.1221 +
 24.1222 + 	printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
 24.1223 +	for (i = 0; i < nr_ioapics; i++)
 24.1224 +		printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
 24.1225 +		       mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]);
 24.1226 +
 24.1227 +	/*
 24.1228 +	 * We are a bit conservative about what we expect.  We have to
 24.1229 +	 * know about every hardware change ASAP.
 24.1230 +	 */
 24.1231 +	printk(KERN_INFO "testing the IO APIC.......................\n");
 24.1232 +
 24.1233 +	for (apic = 0; apic < nr_ioapics; apic++) {
 24.1234 +
 24.1235 +	spin_lock_irqsave(&ioapic_lock, flags);
 24.1236 +	reg_00.raw = io_apic_read(apic, 0);
 24.1237 +	reg_01.raw = io_apic_read(apic, 1);
 24.1238 +	if (reg_01.bits.version >= 0x10)
 24.1239 +		reg_02.raw = io_apic_read(apic, 2);
 24.1240 +	if (reg_01.bits.version >= 0x20)
 24.1241 +		reg_03.raw = io_apic_read(apic, 3);
 24.1242 +	spin_unlock_irqrestore(&ioapic_lock, flags);
 24.1243 +
 24.1244 +	printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid);
 24.1245 +	printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
 24.1246 +	printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.bits.ID);
 24.1247 +	printk(KERN_DEBUG ".......    : Delivery Type: %X\n", reg_00.bits.delivery_type);
 24.1248 +	printk(KERN_DEBUG ".......    : LTS          : %X\n", reg_00.bits.LTS);
 24.1249 +	if (reg_00.bits.ID >= get_physical_broadcast())
 24.1250 +		UNEXPECTED_IO_APIC();
 24.1251 +	if (reg_00.bits.__reserved_1 || reg_00.bits.__reserved_2)
 24.1252 +		UNEXPECTED_IO_APIC();
 24.1253 +
 24.1254 +	printk(KERN_DEBUG ".... register #01: %08X\n", reg_01.raw);
 24.1255 +	printk(KERN_DEBUG ".......     : max redirection entries: %04X\n", reg_01.bits.entries);
 24.1256 +	if (	(reg_01.bits.entries != 0x0f) && /* older (Neptune) boards */
 24.1257 +		(reg_01.bits.entries != 0x17) && /* typical ISA+PCI boards */
 24.1258 +		(reg_01.bits.entries != 0x1b) && /* Compaq Proliant boards */
 24.1259 +		(reg_01.bits.entries != 0x1f) && /* dual Xeon boards */
 24.1260 +		(reg_01.bits.entries != 0x22) && /* bigger Xeon boards */
 24.1261 +		(reg_01.bits.entries != 0x2E) &&
 24.1262 +		(reg_01.bits.entries != 0x3F)
 24.1263 +	)
 24.1264 +		UNEXPECTED_IO_APIC();
 24.1265 +
 24.1266 +	printk(KERN_DEBUG ".......     : PRQ implemented: %X\n", reg_01.bits.PRQ);
 24.1267 +	printk(KERN_DEBUG ".......     : IO APIC version: %04X\n", reg_01.bits.version);
 24.1268 +	if (	(reg_01.bits.version != 0x01) && /* 82489DX IO-APICs */
 24.1269 +		(reg_01.bits.version != 0x10) && /* oldest IO-APICs */
 24.1270 +		(reg_01.bits.version != 0x11) && /* Pentium/Pro IO-APICs */
 24.1271 +		(reg_01.bits.version != 0x13) && /* Xeon IO-APICs */
 24.1272 +		(reg_01.bits.version != 0x20)    /* Intel P64H (82806 AA) */
 24.1273 +	)
 24.1274 +		UNEXPECTED_IO_APIC();
 24.1275 +	if (reg_01.bits.__reserved_1 || reg_01.bits.__reserved_2)
 24.1276 +		UNEXPECTED_IO_APIC();
 24.1277 +
 24.1278 +	/*
 24.1279 +	 * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02,
 24.1280 +	 * but the value of reg_02 is read as the previous read register
 24.1281 +	 * value, so ignore it if reg_02 == reg_01.
 24.1282 +	 */
 24.1283 +	if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) {
 24.1284 +		printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
 24.1285 +		printk(KERN_DEBUG ".......     : arbitration: %02X\n", reg_02.bits.arbitration);
 24.1286 +		if (reg_02.bits.__reserved_1 || reg_02.bits.__reserved_2)
 24.1287 +			UNEXPECTED_IO_APIC();
 24.1288 +	}
 24.1289 +
 24.1290 +	/*
 24.1291 +	 * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02
 24.1292 +	 * or reg_03, but the value of reg_0[23] is read as the previous read
 24.1293 +	 * register value, so ignore it if reg_03 == reg_0[12].
 24.1294 +	 */
 24.1295 +	if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw &&
 24.1296 +	    reg_03.raw != reg_01.raw) {
 24.1297 +		printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw);
 24.1298 +		printk(KERN_DEBUG ".......     : Boot DT    : %X\n", reg_03.bits.boot_DT);
 24.1299 +		if (reg_03.bits.__reserved_1)
 24.1300 +			UNEXPECTED_IO_APIC();
 24.1301 +	}
 24.1302 +
 24.1303 +	printk(KERN_DEBUG ".... IRQ redirection table:\n");
 24.1304 +
 24.1305 +	printk(KERN_DEBUG " NR Log Phy Mask Trig IRR Pol"
 24.1306 +			  " Stat Dest Deli Vect:   \n");
 24.1307 +
 24.1308 +	for (i = 0; i <= reg_01.bits.entries; i++) {
 24.1309 +		struct IO_APIC_route_entry entry;
 24.1310 +
 24.1311 +		spin_lock_irqsave(&ioapic_lock, flags);
 24.1312 +		*(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2);
 24.1313 +		*(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2);
 24.1314 +		spin_unlock_irqrestore(&ioapic_lock, flags);
 24.1315 +
 24.1316 +		printk(KERN_DEBUG " %02x %03X %02X  ",
 24.1317 +			i,
 24.1318 +			entry.dest.logical.logical_dest,
 24.1319 +			entry.dest.physical.physical_dest
 24.1320 +		);
 24.1321 +
 24.1322 +		printk("%1d    %1d    %1d   %1d   %1d    %1d    %1d    %02X\n",
 24.1323 +			entry.mask,
 24.1324 +			entry.trigger,
 24.1325 +			entry.irr,
 24.1326 +			entry.polarity,
 24.1327 +			entry.delivery_status,
 24.1328 +			entry.dest_mode,
 24.1329 +			entry.delivery_mode,
 24.1330 +			entry.vector
 24.1331 +		);
 24.1332 +	}
 24.1333 +	}
 24.1334 +	if (use_pci_vector())
 24.1335 +		printk(KERN_INFO "Using vector-based indexing\n");
 24.1336 +	printk(KERN_DEBUG "IRQ to pin mappings:\n");
 24.1337 +	for (i = 0; i < NR_IRQS; i++) {
 24.1338 +		struct irq_pin_list *entry = irq_2_pin + i;
 24.1339 +		if (entry->pin < 0)
 24.1340 +			continue;
 24.1341 + 		if (use_pci_vector() && !platform_legacy_irq(i))
 24.1342 +			printk(KERN_DEBUG "IRQ%d ", IO_APIC_VECTOR(i));
 24.1343 +		else
 24.1344 +			printk(KERN_DEBUG "IRQ%d ", i);
 24.1345 +		for (;;) {
 24.1346 +			printk("-> %d:%d", entry->apic, entry->pin);
 24.1347 +			if (!entry->next)
 24.1348 +				break;
 24.1349 +			entry = irq_2_pin + entry->next;
 24.1350 +		}
 24.1351 +		printk("\n");
 24.1352 +	}
 24.1353 +
 24.1354 +	printk(KERN_INFO ".................................... done.\n");
 24.1355 +
 24.1356 +	return;
 24.1357 +}
 24.1358 +
 24.1359 +static void print_APIC_bitfield (int base)
 24.1360 +{
 24.1361 +	unsigned int v;
 24.1362 +	int i, j;
 24.1363 +
 24.1364 +	if (apic_verbosity == APIC_QUIET)
 24.1365 +		return;
 24.1366 +
 24.1367 +	printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG);
 24.1368 +	for (i = 0; i < 8; i++) {
 24.1369 +		v = apic_read(base + i*0x10);
 24.1370 +		for (j = 0; j < 32; j++) {
 24.1371 +			if (v & (1<<j))
 24.1372 +				printk("1");
 24.1373 +			else
 24.1374 +				printk("0");
 24.1375 +		}
 24.1376 +		printk("\n");
 24.1377 +	}
 24.1378 +}
 24.1379 +
 24.1380 +void /*__init*/ print_local_APIC(void * dummy)
 24.1381 +{
 24.1382 +	unsigned int v, ver, maxlvt;
 24.1383 +
 24.1384 +	if (apic_verbosity == APIC_QUIET)
 24.1385 +		return;
 24.1386 +
 24.1387 +	printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
 24.1388 +		smp_processor_id(), hard_smp_processor_id());
 24.1389 +	v = apic_read(APIC_ID);
 24.1390 +	printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, GET_APIC_ID(v));
 24.1391 +	v = apic_read(APIC_LVR);
 24.1392 +	printk(KERN_INFO "... APIC VERSION: %08x\n", v);
 24.1393 +	ver = GET_APIC_VERSION(v);
 24.1394 +	maxlvt = get_maxlvt();
 24.1395 +
 24.1396 +	v = apic_read(APIC_TASKPRI);
 24.1397 +	printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
 24.1398 +
 24.1399 +	if (APIC_INTEGRATED(ver)) {			/* !82489DX */
 24.1400 +		v = apic_read(APIC_ARBPRI);
 24.1401 +		printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
 24.1402 +			v & APIC_ARBPRI_MASK);
 24.1403 +		v = apic_read(APIC_PROCPRI);
 24.1404 +		printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
 24.1405 +	}
 24.1406 +
 24.1407 +	v = apic_read(APIC_EOI);
 24.1408 +	printk(KERN_DEBUG "... APIC EOI: %08x\n", v);
 24.1409 +	v = apic_read(APIC_RRR);
 24.1410 +	printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
 24.1411 +	v = apic_read(APIC_LDR);
 24.1412 +	printk(KERN_DEBUG "... APIC LDR: %08x\n", v);
 24.1413 +	v = apic_read(APIC_DFR);
 24.1414 +	printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
 24.1415 +	v = apic_read(APIC_SPIV);
 24.1416 +	printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
 24.1417 +
 24.1418 +	printk(KERN_DEBUG "... APIC ISR field:\n");
 24.1419 +	print_APIC_bitfield(APIC_ISR);
 24.1420 +	printk(KERN_DEBUG "... APIC TMR field:\n");
 24.1421 +	print_APIC_bitfield(APIC_TMR);
 24.1422 +	printk(KERN_DEBUG "... APIC IRR field:\n");
 24.1423 +	print_APIC_bitfield(APIC_IRR);
 24.1424 +
 24.1425 +	if (APIC_INTEGRATED(ver)) {		/* !82489DX */
 24.1426 +		if (maxlvt > 3)		/* Due to the Pentium erratum 3AP. */
 24.1427 +			apic_write(APIC_ESR, 0);
 24.1428 +		v = apic_read(APIC_ESR);
 24.1429 +		printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
 24.1430 +	}
 24.1431 +
 24.1432 +	v = apic_read(APIC_ICR);
 24.1433 +	printk(KERN_DEBUG "... APIC ICR: %08x\n", v);
 24.1434 +	v = apic_read(APIC_ICR2);
 24.1435 +	printk(KERN_DEBUG "... APIC ICR2: %08x\n", v);
 24.1436 +
 24.1437 +	v = apic_read(APIC_LVTT);
 24.1438 +	printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
 24.1439 +
 24.1440 +	if (maxlvt > 3) {                       /* PC is LVT#4. */
 24.1441 +		v = apic_read(APIC_LVTPC);
 24.1442 +		printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v);
 24.1443 +	}
 24.1444 +	v = apic_read(APIC_LVT0);
 24.1445 +	printk(KERN_DEBUG "... APIC LVT0: %08x\n", v);
 24.1446 +	v = apic_read(APIC_LVT1);
 24.1447 +	printk(KERN_DEBUG "... APIC LVT1: %08x\n", v);
 24.1448 +
 24.1449 +	if (maxlvt > 2) {			/* ERR is LVT#3. */
 24.1450 +		v = apic_read(APIC_LVTERR);
 24.1451 +		printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v);
 24.1452 +	}
 24.1453 +
 24.1454 +	v = apic_read(APIC_TMICT);
 24.1455 +	printk(KERN_DEBUG "... APIC TMICT: %08x\n", v);
 24.1456 +	v = apic_read(APIC_TMCCT);
 24.1457 +	printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
 24.1458 +	v = apic_read(APIC_TDCR);
 24.1459 +	printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
 24.1460 +	printk("\n");
 24.1461 +}
 24.1462 +
 24.1463 +void print_all_local_APICs (void)
 24.1464 +{
 24.1465 +	on_each_cpu(print_local_APIC, NULL, 1, 1);
 24.1466 +}
 24.1467 +
 24.1468 +void /*__init*/ print_PIC(void)
 24.1469 +{
 24.1470 +	extern spinlock_t i8259A_lock;
 24.1471 +	unsigned int v;
 24.1472 +	unsigned long flags;
 24.1473 +
 24.1474 +	if (apic_verbosity == APIC_QUIET)
 24.1475 +		return;
 24.1476 +
 24.1477 +	printk(KERN_DEBUG "\nprinting PIC contents\n");
 24.1478 +
 24.1479 +	spin_lock_irqsave(&i8259A_lock, flags);
 24.1480 +
 24.1481 +	v = inb(0xa1) << 8 | inb(0x21);
 24.1482 +	printk(KERN_DEBUG "... PIC  IMR: %04x\n", v);
 24.1483 +
 24.1484 +	v = inb(0xa0) << 8 | inb(0x20);
 24.1485 +	printk(KERN_DEBUG "... PIC  IRR: %04x\n", v);
 24.1486 +
 24.1487 +	outb(0x0b,0xa0);
 24.1488 +	outb(0x0b,0x20);
 24.1489 +	v = inb(0xa0) << 8 | inb(0x20);
 24.1490 +	outb(0x0a,0xa0);
 24.1491 +	outb(0x0a,0x20);
 24.1492 +
 24.1493 +	spin_unlock_irqrestore(&i8259A_lock, flags);
 24.1494 +
 24.1495 +	printk(KERN_DEBUG "... PIC  ISR: %04x\n", v);
 24.1496 +
 24.1497 +	v = inb(0x4d1) << 8 | inb(0x4d0);
 24.1498 +	printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
 24.1499 +}
 24.1500 +#else
 24.1501 +void __init print_IO_APIC(void) { }
 24.1502 +#endif /* !CONFIG_XEN */
 24.1503 +
 24.1504 +static void __init enable_IO_APIC(void)
 24.1505 +{
 24.1506 +	union IO_APIC_reg_01 reg_01;
 24.1507 +	int i;
 24.1508 +	unsigned long flags;
 24.1509 +
 24.1510 +	for (i = 0; i < PIN_MAP_SIZE; i++) {
 24.1511 +		irq_2_pin[i].pin = -1;
 24.1512 +		irq_2_pin[i].next = 0;
 24.1513 +	}
 24.1514 +	if (!pirqs_enabled)
 24.1515 +		for (i = 0; i < MAX_PIRQS; i++)
 24.1516 +			pirq_entries[i] = -1;
 24.1517 +
 24.1518 +	/*
 24.1519 +	 * The number of IO-APIC IRQ registers (== #pins):
 24.1520 +	 */
 24.1521 +	for (i = 0; i < nr_ioapics; i++) {
 24.1522 +		spin_lock_irqsave(&ioapic_lock, flags);
 24.1523 +		reg_01.raw = io_apic_read(i, 1);
 24.1524 +		spin_unlock_irqrestore(&ioapic_lock, flags);
 24.1525 +		nr_ioapic_registers[i] = reg_01.bits.entries+1;
 24.1526 +	}
 24.1527 +
 24.1528 +	/*
 24.1529 +	 * Do not trust the IO-APIC being empty at bootup
 24.1530 +	 */
 24.1531 +	clear_IO_APIC();
 24.1532 +}
 24.1533 +
 24.1534 +/*
 24.1535 + * Not an __init, needed by the reboot code
 24.1536 + */
 24.1537 +void disable_IO_APIC(void)
 24.1538 +{
 24.1539 +	/*
 24.1540 +	 * Clear the IO-APIC before rebooting:
 24.1541 +	 */
 24.1542 +	clear_IO_APIC();
 24.1543 +
 24.1544 +#ifndef CONFIG_XEN
 24.1545 +	disconnect_bsp_APIC();
 24.1546 +#endif
 24.1547 +}
 24.1548 +
 24.1549 +/*
 24.1550 + * function to set the IO-APIC physical IDs based on the
 24.1551 + * values stored in the MPC table.
 24.1552 + *
 24.1553 + * by Matt Domsch <Matt_Domsch@dell.com>  Tue Dec 21 12:25:05 CST 1999
 24.1554 + */
 24.1555 +
 24.1556 +#if !defined(CONFIG_XEN) && !defined(CONFIG_X86_NUMAQ)
 24.1557 +static void __init setup_ioapic_ids_from_mpc(void)
 24.1558 +{
 24.1559 +	union IO_APIC_reg_00 reg_00;
 24.1560 +	physid_mask_t phys_id_present_map;
 24.1561 +	int apic;
 24.1562 +	int i;
 24.1563 +	unsigned char old_id;
 24.1564 +	unsigned long flags;
 24.1565 +
 24.1566 +	/*
 24.1567 +	 * This is broken; anything with a real cpu count has to
 24.1568 +	 * circumvent this idiocy regardless.
 24.1569 +	 */
 24.1570 +	phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map);
 24.1571 +
 24.1572 +	/*
 24.1573 +	 * Set the IOAPIC ID to the value stored in the MPC table.
 24.1574 +	 */
 24.1575 +	for (apic = 0; apic < nr_ioapics; apic++) {
 24.1576 +
 24.1577 +		/* Read the register 0 value */
 24.1578 +		spin_lock_irqsave(&ioapic_lock, flags);
 24.1579 +		reg_00.raw = io_apic_read(apic, 0);
 24.1580 +		spin_unlock_irqrestore(&ioapic_lock, flags);
 24.1581 +		
 24.1582 +		old_id = mp_ioapics[apic].mpc_apicid;
 24.1583 +
 24.1584 +		if (mp_ioapics[apic].mpc_apicid >= get_physical_broadcast()) {
 24.1585 +			printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
 24.1586 +				apic, mp_ioapics[apic].mpc_apicid);
 24.1587 +			printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
 24.1588 +				reg_00.bits.ID);
 24.1589 +			mp_ioapics[apic].mpc_apicid = reg_00.bits.ID;
 24.1590 +		}
 24.1591 +
 24.1592 +		/* Don't check I/O APIC IDs for some xAPIC systems.  They have
 24.1593 +		 * no meaning without the serial APIC bus. */
 24.1594 +		if (NO_IOAPIC_CHECK)
 24.1595 +			continue;
 24.1596 +		/*
 24.1597 +		 * Sanity check, is the ID really free? Every APIC in a
 24.1598 +		 * system must have a unique ID or we get lots of nice
 24.1599 +		 * 'stuck on smp_invalidate_needed IPI wait' messages.
 24.1600 +		 */
 24.1601 +		if (check_apicid_used(phys_id_present_map,
 24.1602 +					mp_ioapics[apic].mpc_apicid)) {
 24.1603 +			printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
 24.1604 +				apic, mp_ioapics[apic].mpc_apicid);
 24.1605 +			for (i = 0; i < get_physical_broadcast(); i++)
 24.1606 +				if (!physid_isset(i, phys_id_present_map))
 24.1607 +					break;
 24.1608 +			if (i >= get_physical_broadcast())
 24.1609 +				panic("Max APIC ID exceeded!\n");
 24.1610 +			printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
 24.1611 +				i);
 24.1612 +			physid_set(i, phys_id_present_map);
 24.1613 +			mp_ioapics[apic].mpc_apicid = i;
 24.1614 +		} else {
 24.1615 +			physid_mask_t tmp;
 24.1616 +			tmp = apicid_to_cpu_present(mp_ioapics[apic].mpc_apicid);
 24.1617 +			apic_printk(APIC_VERBOSE, "Setting %d in the "
 24.1618 +					"phys_id_present_map\n",
 24.1619 +					mp_ioapics[apic].mpc_apicid);
 24.1620 +			physids_or(phys_id_present_map, phys_id_present_map, tmp);
 24.1621 +		}
 24.1622 +
 24.1623 +
 24.1624 +		/*
 24.1625 +		 * We need to adjust the IRQ routing table
 24.1626 +		 * if the ID changed.
 24.1627 +		 */
 24.1628 +		if (old_id != mp_ioapics[apic].mpc_apicid)
 24.1629 +			for (i = 0; i < mp_irq_entries; i++)
 24.1630 +				if (mp_irqs[i].mpc_dstapic == old_id)
 24.1631 +					mp_irqs[i].mpc_dstapic
 24.1632 +						= mp_ioapics[apic].mpc_apicid;
 24.1633 +
 24.1634 +		/*
 24.1635 +		 * Read the right value from the MPC table and
 24.1636 +		 * write it into the ID register.
 24.1637 +	 	 */
 24.1638 +		apic_printk(APIC_VERBOSE, KERN_INFO
 24.1639 +			"...changing IO-APIC physical APIC ID to %d ...",
 24.1640 +			mp_ioapics[apic].mpc_apicid);
 24.1641 +
 24.1642 +		reg_00.bits.ID = mp_ioapics[apic].mpc_apicid;
 24.1643 +		spin_lock_irqsave(&ioapic_lock, flags);
 24.1644 +		io_apic_write(apic, 0, reg_00.raw);
 24.1645 +		spin_unlock_irqrestore(&ioapic_lock, flags);
 24.1646 +
 24.1647 +		/*
 24.1648 +		 * Sanity check
 24.1649 +		 */
 24.1650 +		spin_lock_irqsave(&ioapic_lock, flags);
 24.1651 +		reg_00.raw = io_apic_read(apic, 0);
 24.1652 +		spin_unlock_irqrestore(&ioapic_lock, flags);
 24.1653 +		if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid)
 24.1654 +			printk("could not set ID!\n");
 24.1655 +		else
 24.1656 +			apic_printk(APIC_VERBOSE, " ok.\n");
 24.1657 +	}
 24.1658 +}
 24.1659 +#else
 24.1660 +static void __init setup_ioapic_ids_from_mpc(void) { }
 24.1661 +#endif
 24.1662 +
 24.1663 +#ifndef CONFIG_XEN
 24.1664 +/*
 24.1665 + * There is a nasty bug in some older SMP boards, their mptable lies
 24.1666 + * about the timer IRQ. We do the following to work around the situation:
 24.1667 + *
 24.1668 + *	- timer IRQ defaults to IO-APIC IRQ
 24.1669 + *	- if this function detects that timer IRQs are defunct, then we fall
 24.1670 + *	  back to ISA timer IRQs
 24.1671 + */
 24.1672 +static int __init timer_irq_works(void)
 24.1673 +{
 24.1674 +	unsigned long t1 = jiffies;
 24.1675 +
 24.1676 +	local_irq_enable();
 24.1677 +	/* Let ten ticks pass... */
 24.1678 +	mdelay((10 * 1000) / HZ);
 24.1679 +
 24.1680 +	/*
 24.1681 +	 * Expect a few ticks at least, to be sure some possible
 24.1682 +	 * glue logic does not lock up after one or two first
 24.1683 +	 * ticks in a non-ExtINT mode.  Also the local APIC
 24.1684 +	 * might have cached one ExtINT interrupt.  Finally, at
 24.1685 +	 * least one tick may be lost due to delays.
 24.1686 +	 */
 24.1687 +	if (jiffies - t1 > 4)
 24.1688 +		return 1;
 24.1689 +
 24.1690 +	return 0;
 24.1691 +}
 24.1692 +
 24.1693 +/*
 24.1694 + * In the SMP+IOAPIC case it might happen that there are an unspecified
 24.1695 + * number of pending IRQ events unhandled. These cases are very rare,
 24.1696 + * so we 'resend' these IRQs via IPIs, to the same CPU. It's much
 24.1697 + * better to do it this way as thus we do not have to be aware of
 24.1698 + * 'pending' interrupts in the IRQ path, except at this point.
 24.1699 + */
 24.1700 +/*
 24.1701 + * Edge triggered needs to resend any interrupt
 24.1702 + * that was delayed but this is now handled in the device
 24.1703 + * independent code.
 24.1704 + */
 24.1705 +
 24.1706 +/*
 24.1707 + * Starting up a edge-triggered IO-APIC interrupt is
 24.1708 + * nasty - we need to make sure that we get the edge.
 24.1709 + * If it is already asserted for some reason, we need
 24.1710 + * return 1 to indicate that is was pending.
 24.1711 + *
 24.1712 + * This is not complete - we should be able to fake
 24.1713 + * an edge even if it isn't on the 8259A...
 24.1714 + */
 24.1715 +static unsigned int startup_edge_ioapic_irq(unsigned int irq)
 24.1716 +{
 24.1717 +	int was_pending = 0;
 24.1718 +	unsigned long flags;
 24.1719 +
 24.1720 +	spin_lock_irqsave(&ioapic_lock, flags);
 24.1721 +	if (irq < 16) {
 24.1722 +		disable_8259A_irq(irq);
 24.1723 +		if (i8259A_irq_pending(irq))
 24.1724 +			was_pending = 1;
 24.1725 +	}
 24.1726 +	__unmask_IO_APIC_irq(irq);
 24.1727 +	spin_unlock_irqrestore(&ioapic_lock, flags);
 24.1728 +
 24.1729 +	return was_pending;
 24.1730 +}
 24.1731 +
 24.1732 +/*
 24.1733 + * Once we have recorded IRQ_PENDING already, we can mask the
 24.1734 + * interrupt for real. This prevents IRQ storms from unhandled
 24.1735 + * devices.
 24.1736 + */
 24.1737 +static void ack_edge_ioapic_irq(unsigned int irq)
 24.1738 +{
 24.1739 +	move_irq(irq);
 24.1740 +	if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED))
 24.1741 +					== (IRQ_PENDING | IRQ_DISABLED))
 24.1742 +		mask_IO_APIC_irq(irq);
 24.1743 +	ack_APIC_irq();
 24.1744 +}
 24.1745 +
 24.1746 +/*
 24.1747 + * Level triggered interrupts can just be masked,
 24.1748 + * and shutting down and starting up the interrupt
 24.1749 + * is the same as enabling and disabling them -- except
 24.1750 + * with a startup need to return a "was pending" value.
 24.1751 + *
 24.1752 + * Level triggered interrupts are special because we
 24.1753 + * do not touch any IO-APIC register while handling
 24.1754 + * them. We ack the APIC in the end-IRQ handler, not
 24.1755 + * in the start-IRQ-handler. Protection against reentrance
 24.1756 + * from the same interrupt is still provided, both by the
 24.1757 + * generic IRQ layer and by the fact that an unacked local
 24.1758 + * APIC does not accept IRQs.
 24.1759 + */
 24.1760 +static unsigned int startup_level_ioapic_irq (unsigned int irq)
 24.1761 +{
 24.1762 +	unmask_IO_APIC_irq(irq);
 24.1763 +
 24.1764 +	return 0; /* don't check for pending */
 24.1765 +}
 24.1766 +
 24.1767 +static void end_level_ioapic_irq (unsigned int irq)
 24.1768 +{
 24.1769 +	unsigned long v;
 24.1770 +	int i;
 24.1771 +
 24.1772 +	move_irq(irq);
 24.1773 +/*
 24.1774 + * It appears there is an erratum which affects at least version 0x11
 24.1775 + * of I/O APIC (that's the 82093AA and cores integrated into various
 24.1776 + * chipsets).  Under certain conditions a level-triggered interrupt is
 24.1777 + * erroneously delivered as edge-triggered one but the respective IRR
 24.1778 + * bit gets set nevertheless.  As a result the I/O unit expects an EOI
 24.1779 + * message but it will never arrive and further interrupts are blocked
 24.1780 + * from the source.  The exact reason is so far unknown, but the
 24.1781 + * phenomenon was observed when two consecutive interrupt requests
 24.1782 + * from a given source get delivered to the same CPU and the source is
 24.1783 + * temporarily disabled in between.
 24.1784 + *
 24.1785 + * A workaround is to simulate an EOI message manually.  We achieve it
 24.1786 + * by setting the trigger mode to edge and then to level when the edge
 24.1787 + * trigger mode gets detected in the TMR of a local APIC for a
 24.1788 + * level-triggered interrupt.  We mask the source for the time of the
 24.1789 + * operation to prevent an edge-triggered interrupt escaping meanwhile.
 24.1790 + * The idea is from Manfred Spraul.  --macro
 24.1791 + */
 24.1792 +	i = IO_APIC_VECTOR(irq);
 24.1793 +
 24.1794 +	v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
 24.1795 +
 24.1796 +	ack_APIC_irq();
 24.1797 +
 24.1798 +	if (!(v & (1 << (i & 0x1f)))) {
 24.1799 +		atomic_inc(&irq_mis_count);
 24.1800 +		spin_lock(&ioapic_lock);
 24.1801 +		__mask_and_edge_IO_APIC_irq(irq);
 24.1802 +		__unmask_and_level_IO_APIC_irq(irq);
 24.1803 +		spin_unlock(&ioapic_lock);
 24.1804 +	}
 24.1805 +}
 24.1806 +
 24.1807 +#ifdef CONFIG_PCI_MSI
 24.1808 +static unsigned int startup_edge_ioapic_vector(unsigned int vector)
 24.1809 +{
 24.1810 +	int irq = vector_to_irq(vector);
 24.1811 +
 24.1812 +	return startup_edge_ioapic_irq(irq);
 24.1813 +}
 24.1814 +
 24.1815 +static void ack_edge_ioapic_vector(unsigned int vector)
 24.1816 +{
 24.1817 +	int irq = vector_to_irq(vector);
 24.1818 +
 24.1819 +	ack_edge_ioapic_irq(irq);
 24.1820 +}
 24.1821 +
 24.1822 +static unsigned int startup_level_ioapic_vector (unsigned int vector)
 24.1823 +{
 24.1824 +	int irq = vector_to_irq(vector);
 24.1825 +
 24.1826 +	return startup_level_ioapic_irq (irq);
 24.1827 +}
 24.1828 +
 24.1829 +static void end_level_ioapic_vector (unsigned int vector)
 24.1830 +{
 24.1831 +	int irq = vector_to_irq(vector);
 24.1832 +
 24.1833 +	end_level_ioapic_irq(irq);
 24.1834 +}
 24.1835 +
 24.1836 +static void mask_IO_APIC_vector (unsigned int vector)
 24.1837 +{
 24.1838 +	int irq = vector_to_irq(vector);
 24.1839 +
 24.1840 +	mask_IO_APIC_irq(irq);
 24.1841 +}
 24.1842 +
 24.1843 +static void unmask_IO_APIC_vector (unsigned int vector)
 24.1844 +{
 24.1845 +	int irq = vector_to_irq(vector);
 24.1846 +
 24.1847 +	unmask_IO_APIC_irq(irq);
 24.1848 +}
 24.1849 +
 24.1850 +static void set_ioapic_affinity_vector (unsigned int vector,
 24.1851 +					cpumask_t cpu_mask)
 24.1852 +{
 24.1853 +	int irq = vector_to_irq(vector);
 24.1854 +
 24.1855 +	set_ioapic_affinity_irq(irq, cpu_mask);
 24.1856 +}
 24.1857 +#endif
 24.1858 +
 24.1859 +/*
 24.1860 + * Level and edge triggered IO-APIC interrupts need different handling,
 24.1861 + * so we use two separate IRQ descriptors. Edge triggered IRQs can be
 24.1862 + * handled with the level-triggered descriptor, but that one has slightly
 24.1863 + * more overhead. Level-triggered interrupts cannot be handled with the
 24.1864 + * edge-triggered handler, without risking IRQ storms and other ugly
 24.1865 + * races.
 24.1866 + */
 24.1867 +static struct hw_interrupt_type ioapic_edge_type = {
 24.1868 +	.typename 	= "IO-APIC-edge",
 24.1869 +	.startup 	= startup_edge_ioapic,
 24.1870 +	.shutdown 	= shutdown_edge_ioapic,
 24.1871 +	.enable 	= enable_edge_ioapic,
 24.1872 +	.disable 	= disable_edge_ioapic,
 24.1873 +	.ack 		= ack_edge_ioapic,
 24.1874 +	.end 		= end_edge_ioapic,
 24.1875 +	.set_affinity 	= set_ioapic_affinity,
 24.1876 +};
 24.1877 +
 24.1878 +static struct hw_interrupt_type ioapic_level_type = {
 24.1879 +	.typename 	= "IO-APIC-level",
 24.1880 +	.startup 	= startup_level_ioapic,
 24.1881 +	.shutdown 	= shutdown_level_ioapic,
 24.1882 +	.enable 	= enable_level_ioapic,
 24.1883 +	.disable 	= disable_level_ioapic,
 24.1884 +	.ack 		= mask_and_ack_level_ioapic,
 24.1885 +	.end 		= end_level_ioapic,
 24.1886 +	.set_affinity 	= set_ioapic_affinity,
 24.1887 +};
 24.1888 +#endif /* !CONFIG_XEN */
 24.1889 +
 24.1890 +static inline void init_IO_APIC_traps(void)
 24.1891 +{
 24.1892 +	int irq;
 24.1893 +
 24.1894 +	/*
 24.1895 +	 * NOTE! The local APIC isn't very good at handling
 24.1896 +	 * multiple interrupts at the same interrupt level.
 24.1897 +	 * As the interrupt level is determined by taking the
 24.1898 +	 * vector number and shifting that right by 4, we
 24.1899 +	 * want to spread these out a bit so that they don't
 24.1900 +	 * all fall in the same interrupt level.
 24.1901 +	 *
 24.1902 +	 * Also, we've got to be careful not to trash gate
 24.1903 +	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
 24.1904 +	 */
 24.1905 +	for (irq = 0; irq < NR_IRQS ; irq++) {
 24.1906 +		int tmp = irq;
 24.1907 +		if (use_pci_vector()) {
 24.1908 +			if (!platform_legacy_irq(tmp))
 24.1909 +				if ((tmp = vector_to_irq(tmp)) == -1)
 24.1910 +					continue;
 24.1911 +		}
 24.1912 +		if (IO_APIC_IRQ(tmp) && !IO_APIC_VECTOR(tmp)) {
 24.1913 +			/*
 24.1914 +			 * Hmm.. We don't have an entry for this,
 24.1915 +			 * so default to an old-fashioned 8259
 24.1916 +			 * interrupt if we can..
 24.1917 +			 */
 24.1918 +			if (irq < 16)
 24.1919 +				make_8259A_irq(irq);
 24.1920 +#ifndef CONFIG_XEN
 24.1921 +			else
 24.1922 +				/* Strange. Oh, well.. */
 24.1923 +				irq_desc[irq].handler = &no_irq_type;
 24.1924 +#endif
 24.1925 +		}
 24.1926 +	}
 24.1927 +}
 24.1928 +
 24.1929 +#ifndef CONFIG_XEN
 24.1930 +static void enable_lapic_irq (unsigned int irq)
 24.1931 +{
 24.1932 +	unsigned long v;
 24.1933 +
 24.1934 +	v = apic_read(APIC_LVT0);
 24.1935 +	apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED);
 24.1936 +}
 24.1937 +
 24.1938 +static void disable_lapic_irq (unsigned int irq)
 24.1939 +{
 24.1940 +	unsigned long v;
 24.1941 +
 24.1942 +	v = apic_read(APIC_LVT0);
 24.1943 +	apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED);
 24.1944 +}
 24.1945 +
 24.1946 +static void ack_lapic_irq (unsigned int irq)
 24.1947 +{
 24.1948 +	ack_APIC_irq();
 24.1949 +}
 24.1950 +
 24.1951 +static void end_lapic_irq (unsigned int i) { /* nothing */ }
 24.1952 +
 24.1953 +static struct hw_interrupt_type lapic_irq_type = {
 24.1954 +	.typename 	= "local-APIC-edge",
 24.1955 +	.startup 	= NULL, /* startup_irq() not used for IRQ0 */
 24.1956 +	.shutdown 	= NULL, /* shutdown_irq() not used for IRQ0 */
 24.1957 +	.enable 	= enable_lapic_irq,
 24.1958 +	.disable 	= disable_lapic_irq,
 24.1959 +	.ack 		= ack_lapic_irq,
 24.1960 +	.end 		= end_lapic_irq
 24.1961 +};
 24.1962 +
 24.1963 +static void setup_nmi (void)
 24.1964 +{
 24.1965 +	/*
 24.1966 + 	 * Dirty trick to enable the NMI watchdog ...
 24.1967 +	 * We put the 8259A master into AEOI mode and
 24.1968 +	 * unmask on all local APICs LVT0 as NMI.
 24.1969 +	 *
 24.1970 +	 * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
 24.1971 +	 * is from Maciej W. Rozycki - so we do not have to EOI from
 24.1972 +	 * the NMI handler or the timer interrupt.
 24.1973 +	 */ 
 24.1974 +	apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
 24.1975 +
 24.1976 +	on_each_cpu(enable_NMI_through_LVT0, NULL, 1, 1);
 24.1977 +
 24.1978 +	apic_printk(APIC_VERBOSE, " done.\n");
 24.1979 +}
 24.1980 +
 24.1981 +/*
 24.1982 + * This looks a bit hackish but it's about the only one way of sending
 24.1983 + * a few INTA cycles to 8259As and any associated glue logic.  ICR does
 24.1984 + * not support the ExtINT mode, unfortunately.  We need to send these
 24.1985 + * cycles as some i82489DX-based boards have glue logic that keeps the
 24.1986 + * 8259A interrupt line asserted until INTA.  --macro
 24.1987 + */
 24.1988 +static inline void unlock_ExtINT_logic(void)
 24.1989 +{
 24.1990 +	int pin, i;
 24.1991 +	struct IO_APIC_route_entry entry0, entry1;
 24.1992 +	unsigned char save_control, save_freq_select;
 24.1993 +	unsigned long flags;
 24.1994 +
 24.1995 +	pin = find_isa_irq_pin(8, mp_INT);
 24.1996 +	if (pin == -1)
 24.1997 +		return;
 24.1998 +
 24.1999 +	spin_lock_irqsave(&ioapic_lock, flags);
 24.2000 +	*(((int *)&entry0) + 1) = io_apic_read(0, 0x11 + 2 * pin);
 24.2001 +	*(((int *)&entry0) + 0) = io_apic_read(0, 0x10 + 2 * pin);
 24.2002 +	spin_unlock_irqrestore(&ioapic_lock, flags);
 24.2003 +	clear_IO_APIC_pin(0, pin);
 24.2004 +
 24.2005 +	memset(&entry1, 0, sizeof(entry1));
 24.2006 +
 24.2007 +	entry1.dest_mode = 0;			/* physical delivery */
 24.2008 +	entry1.mask = 0;			/* unmask IRQ now */
 24.2009 +	entry1.dest.physical.physical_dest = hard_smp_processor_id();
 24.2010 +	entry1.delivery_mode = dest_ExtINT;
 24.2011 +	entry1.polarity = entry0.polarity;
 24.2012 +	entry1.trigger = 0;
 24.2013 +	entry1.vector = 0;
 24.2014 +
 24.2015 +	spin_lock_irqsave(&ioapic_lock, flags);
 24.2016 +	io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry1) + 1));
 24.2017 +	io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry1) + 0));
 24.2018 +	spin_unlock_irqrestore(&ioapic_lock, flags);
 24.2019 +
 24.2020 +	save_control = CMOS_READ(RTC_CONTROL);
 24.2021 +	save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
 24.2022 +	CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6,
 24.2023 +		   RTC_FREQ_SELECT);
 24.2024 +	CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL);
 24.2025 +
 24.2026 +	i = 100;
 24.2027 +	while (i-- > 0) {
 24.2028 +		mdelay(10);
 24.2029 +		if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF)
 24.2030 +			i -= 10;
 24.2031 +	}
 24.2032 +
 24.2033 +	CMOS_WRITE(save_control, RTC_CONTROL);
 24.2034 +	CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
 24.2035 +	clear_IO_APIC_pin(0, pin);
 24.2036 +
 24.2037 +	spin_lock_irqsave(&ioapic_lock, flags);
 24.2038 +	io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry0) + 1));
 24.2039 +	io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry0) + 0));
 24.2040 +	spin_unlock_irqrestore(&ioapic_lock, flags);
 24.2041 +}
 24.2042 +
 24.2043 +/*
 24.2044 + * This code may look a bit paranoid, but it's supposed to cooperate with
 24.2045 + * a wide range of boards and BIOS bugs.  Fortunately only the timer IRQ
 24.2046 + * is so screwy.  Thanks to Brian Perkins for testing/hacking this beast
 24.2047 + * fanatically on his truly buggy board.
 24.2048 + */
 24.2049 +static inline void check_timer(void)
 24.2050 +{
 24.2051 +	int pin1, pin2;
 24.2052 +	int vector;
 24.2053 +
 24.2054 +	/*
 24.2055 +	 * get/set the timer IRQ vector:
 24.2056 +	 */
 24.2057 +	disable_8259A_irq(0);
 24.2058 +	vector = assign_irq_vector(0);
 24.2059 +	set_intr_gate(vector, interrupt[0]);
 24.2060 +
 24.2061 +	/*
 24.2062 +	 * Subtle, code in do_timer_interrupt() expects an AEOI
 24.2063 +	 * mode for the 8259A whenever interrupts are routed
 24.2064 +	 * through I/O APICs.  Also IRQ0 has to be enabled in
 24.2065 +	 * the 8259A which implies the virtual wire has to be
 24.2066 +	 * disabled in the local APIC.
 24.2067 +	 */
 24.2068 +	apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
 24.2069 +	init_8259A(1);
 24.2070 +	timer_ack = 1;
 24.2071 +	enable_8259A_irq(0);
 24.2072 +
 24.2073 +	pin1 = find_isa_irq_pin(0, mp_INT);
 24.2074 +	pin2 = find_isa_irq_pin(0, mp_ExtINT);
 24.2075 +
 24.2076 +	printk(KERN_INFO "..TIMER: vector=0x%02X pin1=%d pin2=%d\n", vector, pin1, pin2);
 24.2077 +
 24.2078 +	if (pin1 != -1) {
 24.2079 +		/*
 24.2080 +		 * Ok, does IRQ0 through the IOAPIC work?
 24.2081 +		 */
 24.2082 +		unmask_IO_APIC_irq(0);
 24.2083 +		if (timer_irq_works()) {
 24.2084 +			if (nmi_watchdog == NMI_IO_APIC) {
 24.2085 +				disable_8259A_irq(0);
 24.2086 +				setup_nmi();
 24.2087 +				enable_8259A_irq(0);
 24.2088 +				check_nmi_watchdog();
 24.2089 +			}
 24.2090 +			return;
 24.2091 +		}
 24.2092 +		clear_IO_APIC_pin(0, pin1);
 24.2093 +		printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n");
 24.2094 +	}
 24.2095 +
 24.2096 +	printk(KERN_INFO "...trying to set up timer (IRQ0) through the 8259A ... ");
 24.2097 +	if (pin2 != -1) {
 24.2098 +		printk("\n..... (found pin %d) ...", pin2);
 24.2099 +		/*
 24.2100 +		 * legacy devices should be connected to IO APIC #0
 24.2101 +		 */
 24.2102 +		setup_ExtINT_IRQ0_pin(pin2, vector);
 24.2103 +		if (timer_irq_works()) {
 24.2104 +			printk("works.\n");
 24.2105 +			if (pin1 != -1)
 24.2106 +				replace_pin_at_irq(0, 0, pin1, 0, pin2);
 24.2107 +			else
 24.2108 +				add_pin_to_irq(0, 0, pin2);
 24.2109 +			if (nmi_watchdog == NMI_IO_APIC) {
 24.2110 +				setup_nmi();
 24.2111 +				check_nmi_watchdog();
 24.2112 +			}
 24.2113 +			return;
 24.2114 +		}
 24.2115 +		/*
 24.2116 +		 * Cleanup, just in case ...
 24.2117 +		 */
 24.2118 +		clear_IO_APIC_pin(0, pin2);
 24.2119 +	}
 24.2120 +	printk(" failed.\n");
 24.2121 +
 24.2122 +	if (nmi_watchdog == NMI_IO_APIC) {
 24.2123 +		printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n");
 24.2124 +		nmi_watchdog = 0;
 24.2125 +	}
 24.2126 +
 24.2127 +	printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
 24.2128 +
 24.2129 +	disable_8259A_irq(0);
 24.2130 +	irq_desc[0].handler = &lapic_irq_type;
 24.2131 +	apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector);	/* Fixed mode */
 24.2132 +	enable_8259A_irq(0);
 24.2133 +
 24.2134 +	if (timer_irq_works()) {
 24.2135 +		printk(" works.\n");
 24.2136 +		return;
 24.2137 +	}
 24.2138 +	apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector);
 24.2139 +	printk(" failed.\n");
 24.2140 +
 24.2141 +	printk(KERN_INFO "...trying to set up timer as ExtINT IRQ...");
 24.2142 +
 24.2143 +	timer_ack = 0;
 24.2144 +	init_8259A(0);
 24.2145 +	make_8259A_irq(0);
 24.2146 +	apic_write_around(APIC_LVT0, APIC_DM_EXTINT);
 24.2147 +
 24.2148 +	unlock_ExtINT_logic();
 24.2149 +
 24.2150 +	if (timer_irq_works()) {
 24.2151 +		printk(" works.\n");
 24.2152 +		return;
 24.2153 +	}
 24.2154 +	printk(" failed :(.\n");
 24.2155 +	panic("IO-APIC + timer doesn't work!  Boot with apic=debug and send a "
 24.2156 +		"report.  Then try booting with the 'noapic' option");
 24.2157 +}
 24.2158 +#else
 24.2159 +#define check_timer() ((void)0)
 24.2160 +#endif
 24.2161 +
 24.2162 +/*
 24.2163 + *
 24.2164 + * IRQ's that are handled by the PIC in the MPS IOAPIC case.
 24.2165 + * - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ.
 24.2166 + *   Linux doesn't really care, as it's not actually used
 24.2167 + *   for any interrupt handling anyway.
 24.2168 + */
 24.2169 +#define PIC_IRQS	(1 << PIC_CASCADE_IR)
 24.2170 +
 24.2171 +void __init setup_IO_APIC(void)
 24.2172 +{
 24.2173 +	enable_IO_APIC();
 24.2174 +
 24.2175 +	if (acpi_ioapic)
 24.2176 +		io_apic_irqs = ~0;	/* all IRQs go through IOAPIC */
 24.2177 +	else
 24.2178 +		io_apic_irqs = ~PIC_IRQS;
 24.2179 +
 24.2180 +	printk("ENABLING IO-APIC IRQs\n");
 24.2181 +
 24.2182 +	/*
 24.2183 +	 * Set up IO-APIC IRQ routing.
 24.2184 +	 */
 24.2185 +	if (!acpi_ioapic)
 24.2186 +		setup_ioapic_ids_from_mpc();
 24.2187 +#ifndef CONFIG_XEN
 24.2188 +	sync_Arb_IDs();
 24.2189 +#endif
 24.2190 +	setup_IO_APIC_irqs();
 24.2191 +	init_IO_APIC_traps();
 24.2192 +	check_timer();
 24.2193 +	if (!acpi_ioapic)
 24.2194 +		print_IO_APIC();
 24.2195 +}
 24.2196 +
 24.2197 +/*
 24.2198 + *	Called after all the initialization is done. If we didnt find any
 24.2199 + *	APIC bugs then we can allow the modify fast path
 24.2200 + */
 24.2201 + 
 24.2202 +static int __init io_apic_bug_finalize(void)
 24.2203 +{
 24.2204 +	if(sis_apic_bug == -1)
 24.2205 +		sis_apic_bug = 0;
 24.2206 +	return 0;
 24.2207 +}
 24.2208 +
 24.2209 +late_initcall(io_apic_bug_finalize);
 24.2210 +
 24.2211 +struct sysfs_ioapic_data {
 24.2212 +	struct sys_device dev;
 24.2213 +	struct IO_APIC_route_entry entry[0];
 24.2214 +};
 24.2215 +static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS];
 24.2216 +
 24.2217 +static int ioapic_suspend(struct sys_device *dev, u32 state)
 24.2218 +{
 24.2219 +	struct IO_APIC_route_entry *entry;
 24.2220 +	struct sysfs_ioapic_data *data;
 24.2221 +	unsigned long flags;
 24.2222 +	int i;
 24.2223 +	
 24.2224 +	data = container_of(dev, struct sysfs_ioapic_data, dev);
 24.2225 +	entry = data->entry;
 24.2226 +	spin_lock_irqsave(&ioapic_lock, flags);
 24.2227 +	for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
 24.2228 +		*(((int *)entry) + 1) = io_apic_read(dev->id, 0x11 + 2 * i);
 24.2229 +		*(((int *)entry) + 0) = io_apic_read(dev->id, 0x10 + 2 * i);
 24.2230 +	}
 24.2231 +	spin_unlock_irqrestore(&ioapic_lock, flags);
 24.2232 +
 24.2233 +	return 0;
 24.2234 +}
 24.2235 +
 24.2236 +static int ioapic_resume(struct sys_device *dev)
 24.2237 +{
 24.2238 +	struct IO_APIC_route_entry *entry;
 24.2239 +	struct sysfs_ioapic_data *data;
 24.2240 +	unsigned long flags;
 24.2241 +	union IO_APIC_reg_00 reg_00;
 24.2242 +	int i;
 24.2243 +	
 24.2244 +	data = container_of(dev, struct sysfs_ioapic_data, dev);
 24.2245 +	entry = data->entry;
 24.2246 +
 24.2247 +	spin_lock_irqsave(&ioapic_lock, flags);
 24.2248 +	reg_00.raw = io_apic_read(dev->id, 0);
 24.2249 +	if (reg_00.bits.ID != mp_ioapics[dev->id].mpc_apicid) {
 24.2250 +		reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid;
 24.2251 +		io_apic_write(dev->id, 0, reg_00.raw);
 24.2252 +	}
 24.2253 +	for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
 24.2254 +		io_apic_write(dev->id, 0x11+2*i, *(((int *)entry)+1));
 24.2255 +		io_apic_write(dev->id, 0x10+2*i, *(((int *)entry)+0));
 24.2256 +	}
 24.2257 +	spin_unlock_irqrestore(&ioapic_lock, flags);
 24.2258 +
 24.2259 +	return 0;
 24.2260 +}
 24.2261 +
 24.2262 +static struct sysdev_class ioapic_sysdev_class = {
 24.2263 +	set_kset_name("ioapic"),
 24.2264 +	.suspend = ioapic_suspend,
 24.2265 +	.resume = ioapic_resume,
 24.2266 +};
 24.2267 +
 24.2268 +static int __init ioapic_init_sysfs(void)
 24.2269 +{
 24.2270 +	struct sys_device * dev;
 24.2271 +	int i, size, error = 0;
 24.2272 +
 24.2273 +	error = sysdev_class_register(&ioapic_sysdev_class);
 24.2274 +	if (error)
 24.2275 +		return error;
 24.2276 +
 24.2277 +	for (i = 0; i < nr_ioapics; i++ ) {
 24.2278 +		size = sizeof(struct sys_device) + nr_ioapic_registers[i] 
 24.2279 +			* sizeof(struct IO_APIC_route_entry);
 24.2280 +		mp_ioapic_data[i] = kmalloc(size, GFP_KERNEL);
 24.2281 +		if (!mp_ioapic_data[i]) {
 24.2282 +			printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
 24.2283 +			continue;
 24.2284 +		}
 24.2285 +		memset(mp_ioapic_data[i], 0, size);
 24.2286 +		dev = &mp_ioapic_data[i]->dev;
 24.2287 +		dev->id = i; 
 24.2288 +		dev->cls = &ioapic_sysdev_class;
 24.2289 +		error = sysdev_register(dev);
 24.2290 +		if (error) {
 24.2291 +			kfree(mp_ioapic_data[i]);
 24.2292 +			mp_ioapic_data[i] = NULL;
 24.2293 +			printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
 24.2294 +			continue;
 24.2295 +		}
 24.2296 +	}
 24.2297 +
 24.2298 +	return 0;
 24.2299 +}
 24.2300 +
 24.2301 +device_initcall(ioapic_init_sysfs);
 24.2302 +
 24.2303 +/* --------------------------------------------------------------------------
 24.2304 +                          ACPI-based IOAPIC Configuration
 24.2305 +   -------------------------------------------------------------------------- */
 24.2306 +
 24.2307  #ifdef CONFIG_ACPI_BOOT
 24.2308 +
 24.2309  int __init io_apic_get_unique_id (int ioapic, int apic_id)
 24.2310  {
 24.2311 +#ifndef CONFIG_XEN
 24.2312  	union IO_APIC_reg_00 reg_00;
 24.2313  	static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
 24.2314 +	physid_mask_t tmp;
 24.2315  	unsigned long flags;
 24.2316 +	int i = 0;
 24.2317  
 24.2318  	/*
 24.2319  	 * The P4 platform supports up to 256 APIC IDs on two separate APIC 
 24.2320 @@ -306,8 +2485,45 @@ int __init io_apic_get_unique_id (int io
 24.2321  		apic_id = reg_00.bits.ID;
 24.2322  	}
 24.2323  
 24.2324 +	/*
 24.2325 +	 * Every APIC in a system must have a unique ID or we get lots of nice 
 24.2326 +	 * 'stuck on smp_invalidate_needed IPI wait' messages.
 24.2327 +	 */
 24.2328 +	if (check_apicid_used(apic_id_map, apic_id)) {
 24.2329 +
 24.2330 +		for (i = 0; i < get_physical_broadcast(); i++) {
 24.2331 +			if (!check_apicid_used(apic_id_map, i))
 24.2332 +				break;
 24.2333 +		}
 24.2334 +
 24.2335 +		if (i == get_physical_broadcast())
 24.2336 +			panic("Max apic_id exceeded!\n");
 24.2337 +
 24.2338 +		printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
 24.2339 +			"trying %d\n", ioapic, apic_id, i);
 24.2340 +
 24.2341 +		apic_id = i;
 24.2342 +	} 
 24.2343 +
 24.2344 +	tmp = apicid_to_cpu_present(apic_id);
 24.2345 +	physids_or(apic_id_map, apic_id_map, tmp);
 24.2346 +
 24.2347 +	if (reg_00.bits.ID != apic_id) {
 24.2348 +		reg_00.bits.ID = apic_id;
 24.2349 +
 24.2350 +		spin_lock_irqsave(&ioapic_lock, flags);
 24.2351 +		io_apic_write(ioapic, 0, reg_00.raw);
 24.2352 +		reg_00.raw = io_apic_read(ioapic, 0);
 24.2353 +		spin_unlock_irqrestore(&ioapic_lock, flags);
 24.2354 +
 24.2355 +		/* Sanity check */
 24.2356 +		if (reg_00.bits.ID != apic_id)
 24.2357 +			panic("IOAPIC[%d]: Unable change apic_id!\n", ioapic);
 24.2358 +	}
 24.2359 +
 24.2360  	apic_printk(APIC_VERBOSE, KERN_INFO
 24.2361  			"IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
 24.2362 +#endif /* !CONFIG_XEN */
 24.2363  
 24.2364  	return apic_id;
 24.2365  }
 24.2366 @@ -338,6 +2554,7 @@ int __init io_apic_get_redir_entries (in
 24.2367  	return reg_01.bits.entries;
 24.2368  }
 24.2369  
 24.2370 +
 24.2371  int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low)
 24.2372  {
 24.2373  	struct IO_APIC_route_entry entry;
 24.2374 @@ -377,12 +2594,10 @@ int io_apic_set_pci_routing (int ioapic,
 24.2375  		mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq,
 24.2376  		edge_level, active_high_low);
 24.2377  
 24.2378 -#ifndef CONFIG_XEN
 24.2379  	ioapic_register_intr(irq, entry.vector, edge_level);
 24.2380  
 24.2381  	if (!ioapic && (irq < 16))
 24.2382  		disable_8259A_irq(irq);
 24.2383 -#endif
 24.2384  
 24.2385  	spin_lock_irqsave(&ioapic_lock, flags);
 24.2386  	io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1));
 24.2387 @@ -391,4 +2606,5 @@ int io_apic_set_pci_routing (int ioapic,
 24.2388  
 24.2389  	return 0;
 24.2390  }
 24.2391 +
 24.2392  #endif /*CONFIG_ACPI_BOOT*/
    25.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/mpparse.c	Tue May 03 14:42:00 2005 +0000
    25.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/mpparse.c	Tue May 10 08:49:59 2005 +0000
    25.3 @@ -109,7 +109,7 @@ static int MP_valid_apicid(int apicid, i
    25.4  {
    25.5  	return hweight_long(apicid & 0xf) == 1 && (apicid >> 4) != 0xf;
    25.6  }
    25.7 -#else
    25.8 +#elif !defined(CONFIG_XEN)
    25.9  static int MP_valid_apicid(int apicid, int version)
   25.10  {
   25.11  	if (version >= 0x14)
   25.12 @@ -119,6 +119,7 @@ static int MP_valid_apicid(int apicid, i
   25.13  }
   25.14  #endif
   25.15  
   25.16 +#ifndef CONFIG_XEN
   25.17  void __init MP_processor_info (struct mpc_config_processor *m)
   25.18  {
   25.19   	int ver, apicid;
   25.20 @@ -217,6 +218,12 @@ void __init MP_processor_info (struct mp
   25.21  	apic_version[m->mpc_apicid] = ver;
   25.22  	bios_cpu_apicid[num_processors - 1] = m->mpc_apicid;
   25.23  }
   25.24 +#else
   25.25 +void __init MP_processor_info (struct mpc_config_processor *m)
   25.26 +{
   25.27 +	num_processors++;
   25.28 +}
   25.29 +#endif /* CONFIG_XEN */
   25.30  
   25.31  static void __init MP_bus_info (struct mpc_config_bus *m)
   25.32  {
   25.33 @@ -690,7 +697,7 @@ void __init get_smp_config (void)
   25.34  		 * Read the physical hardware table.  Anything here will
   25.35  		 * override the defaults.
   25.36  		 */
   25.37 -		if (!smp_read_mpc((void *)mpf->mpf_physptr)) {
   25.38 +		if (!smp_read_mpc(isa_bus_to_virt(mpf->mpf_physptr))) {
   25.39  			smp_found_config = 0;
   25.40  			printk(KERN_ERR "BIOS bug, MP table errors detected!...\n");
   25.41  			printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n");
   25.42 @@ -743,7 +750,6 @@ static int __init smp_scan_config (unsig
   25.43  			smp_found_config = 1;
   25.44  			printk(KERN_INFO "found SMP MP-table at %08lx\n",
   25.45  						virt_to_phys(mpf));
   25.46 -			reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE);
   25.47  			if (mpf->mpf_physptr) {
   25.48  				/*
   25.49  				 * We cannot access to MPC table to compute
   25.50 @@ -817,12 +823,14 @@ void __init find_smp_config (void)
   25.51  void __init mp_register_lapic_address (
   25.52  	u64			address)
   25.53  {
   25.54 +#ifndef CONFIG_XEN
   25.55  	mp_lapic_addr = (unsigned long) address;
   25.56  
   25.57  	if (boot_cpu_physical_apicid == -1U)
   25.58  		boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
   25.59  
   25.60  	Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid);
   25.61 +#endif
   25.62  }
   25.63  
   25.64  
   25.65 @@ -842,6 +850,7 @@ void __init mp_register_lapic (
   25.66  	if (id == boot_cpu_physical_apicid)
   25.67  		boot_cpu = 1;
   25.68  
   25.69 +#ifndef CONFIG_XEN
   25.70  	processor.mpc_type = MP_PROCESSOR;
   25.71  	processor.mpc_apicid = id;
   25.72  	processor.mpc_apicver = GET_APIC_VERSION(apic_read(APIC_LVR));
   25.73 @@ -852,6 +861,7 @@ void __init mp_register_lapic (
   25.74  	processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
   25.75  	processor.mpc_reserved[0] = 0;
   25.76  	processor.mpc_reserved[1] = 0;
   25.77 +#endif
   25.78  
   25.79  	MP_processor_info(&processor);
   25.80  }
    26.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/setup.c	Tue May 03 14:42:00 2005 +0000
    26.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/setup.c	Tue May 10 08:49:59 2005 +0000
    26.3 @@ -779,7 +779,7 @@ static void __init parse_cmdline_early (
    26.4  			noexec_setup(from + 7);
    26.5  
    26.6  
    26.7 -#ifdef  CONFIG_X86_SMP
    26.8 +#ifdef  CONFIG_X86_MPPARSE
    26.9  		/*
   26.10  		 * If the BIOS enumerates physical processors before logical,
   26.11  		 * maxcpus=N at enumeration-time can be used to disable HT.
   26.12 @@ -1133,12 +1133,6 @@ static unsigned long __init setup_memory
   26.13  	 */
   26.14  	acpi_reserve_bootmem();
   26.15  #endif
   26.16 -#ifdef CONFIG_X86_FIND_SMP_CONFIG
   26.17 -	/*
   26.18 -	 * Find and reserve possible boot-time SMP configuration:
   26.19 -	 */
   26.20 -	find_smp_config();
   26.21 -#endif
   26.22  
   26.23  #ifdef CONFIG_BLK_DEV_INITRD
   26.24  	if (xen_start_info.mod_start) {
   26.25 @@ -1503,6 +1497,13 @@ void __init setup_arch(char **cmdline_p)
   26.26  #endif
   26.27  	paging_init();
   26.28  
   26.29 +#ifdef CONFIG_X86_FIND_SMP_CONFIG
   26.30 +	/*
   26.31 +	 * Find and reserve possible boot-time SMP configuration:
   26.32 +	 */
   26.33 +	find_smp_config();
   26.34 +#endif
   26.35 +
   26.36  	/* Make sure we have a correctly sized P->M table. */
   26.37  	if (max_pfn != xen_start_info.nr_pages) {
   26.38  		phys_to_machine_mapping = alloc_bootmem_low_pages(
   26.39 @@ -1566,6 +1567,10 @@ void __init setup_arch(char **cmdline_p)
   26.40  	if (efi_enabled)
   26.41  		efi_map_memmap();
   26.42  
   26.43 +	op.cmd             = PHYSDEVOP_SET_IOPL;
   26.44 +	op.u.set_iopl.iopl = current->thread.io_pl = 1;
   26.45 +	HYPERVISOR_physdev_op(&op);
   26.46 +
   26.47  	/*
   26.48  	 * Parse the ACPI tables for possible boot-time SMP configuration.
   26.49  	 */
   26.50 @@ -1583,10 +1588,6 @@ void __init setup_arch(char **cmdline_p)
   26.51  
   26.52  	register_memory();
   26.53  
   26.54 -	op.cmd             = PHYSDEVOP_SET_IOPL;
   26.55 -	op.u.set_iopl.iopl = current->thread.io_pl = 1;
   26.56 -	HYPERVISOR_physdev_op(&op);
   26.57 -
   26.58  	if (xen_start_info.flags & SIF_INITDOMAIN) {
   26.59  		if (!(xen_start_info.flags & SIF_PRIVILEGED))
   26.60  			panic("Xen granted us console access "
    27.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/mm/init.c	Tue May 03 14:42:00 2005 +0000
    27.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/init.c	Tue May 10 08:49:59 2005 +0000
    27.3 @@ -361,6 +361,7 @@ static void __init pagetable_init (void)
    27.4  	make_page_writable(old_pgd);
    27.5  	__flush_tlb_all();
    27.6  	free_bootmem(__pa(old_pgd), PAGE_SIZE);
    27.7 +	init_mm.context.pinned = 1;
    27.8  
    27.9  	kernel_physical_mapping_init(pgd_base);
   27.10  	remap_numa_kva();
   27.11 @@ -562,7 +563,7 @@ void __init paging_init(void)
   27.12  	zone_sizes_init();
   27.13  
   27.14  	/* Switch to the real shared_info page, and clear the dummy page. */
   27.15 -	set_fixmap_ma(FIX_SHARED_INFO, xen_start_info.shared_info);
   27.16 +	set_fixmap(FIX_SHARED_INFO, xen_start_info.shared_info);
   27.17  	HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
   27.18  	memset(empty_zero_page, 0, sizeof(empty_zero_page));
   27.19  
   27.20 @@ -570,10 +571,11 @@ void __init paging_init(void)
   27.21  	/* Setup mapping of lower 1st MB */
   27.22  	for (i = 0; i < NR_FIX_ISAMAPS; i++)
   27.23  		if (xen_start_info.flags & SIF_PRIVILEGED)
   27.24 -			set_fixmap_ma(FIX_ISAMAP_BEGIN - i, i * PAGE_SIZE);
   27.25 +			set_fixmap(FIX_ISAMAP_BEGIN - i, i * PAGE_SIZE);
   27.26  		else
   27.27 -			set_fixmap_ma_ro(FIX_ISAMAP_BEGIN - i,
   27.28 -					 virt_to_machine(empty_zero_page));
   27.29 +			__set_fixmap(FIX_ISAMAP_BEGIN - i,
   27.30 +				     virt_to_machine(empty_zero_page),
   27.31 +				     PAGE_KERNEL_RO);
   27.32  #endif
   27.33  }
   27.34  
    28.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/mm/ioremap.c	Tue May 03 14:42:00 2005 +0000
    28.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/ioremap.c	Tue May 10 08:49:59 2005 +0000
    28.3 @@ -256,7 +256,7 @@ void __init *bt_ioremap(unsigned long ph
    28.4  	 */
    28.5  	idx = FIX_BTMAP_BEGIN;
    28.6  	while (nrpages > 0) {
    28.7 -		set_fixmap_ma(idx, phys_addr);
    28.8 +		set_fixmap(idx, phys_addr);
    28.9  		phys_addr += PAGE_SIZE;
   28.10  		--idx;
   28.11  		--nrpages;
    29.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/mm/pgtable.c	Tue May 03 14:42:00 2005 +0000
    29.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/pgtable.c	Tue May 10 08:49:59 2005 +0000
    29.3 @@ -22,6 +22,7 @@
    29.4  #include <asm/tlb.h>
    29.5  #include <asm/tlbflush.h>
    29.6  #include <asm/io.h>
    29.7 +#include <asm/mmu_context.h>
    29.8  
    29.9  #include <asm-xen/foreign_page.h>
   29.10  
   29.11 @@ -176,18 +177,18 @@ void __set_fixmap (enum fixed_addresses 
   29.12  		BUG();
   29.13  		return;
   29.14  	}
   29.15 -	set_pte_pfn(address, phys >> PAGE_SHIFT, flags);
   29.16 -}
   29.17 -
   29.18 -void __set_fixmap_ma (enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
   29.19 -{
   29.20 -	unsigned long address = __fix_to_virt(idx);
   29.21 -
   29.22 -	if (idx >= __end_of_fixed_addresses) {
   29.23 -		BUG();
   29.24 -		return;
   29.25 +	switch (idx) {
   29.26 +	case FIX_WP_TEST:
   29.27 +	case FIX_VSYSCALL:
   29.28 +#ifdef CONFIG_X86_F00F_BUG
   29.29 +	case FIX_F00F_IDT:
   29.30 +#endif
   29.31 +		set_pte_pfn(address, phys >> PAGE_SHIFT, flags);
   29.32 +		break;
   29.33 +	default:
   29.34 +		set_pte_pfn_ma(address, phys >> PAGE_SHIFT, flags);
   29.35 +		break;
   29.36  	}
   29.37 -	set_pte_pfn_ma(address, phys >> PAGE_SHIFT, flags);
   29.38  }
   29.39  
   29.40  pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
   29.41 @@ -480,7 +481,6 @@ void mm_unpin(struct mm_struct *mm)
   29.42  
   29.43  void _arch_exit_mmap(struct mm_struct *mm)
   29.44  {
   29.45 -    unsigned int cpu = smp_processor_id();
   29.46      struct task_struct *tsk = current;
   29.47  
   29.48      task_lock(tsk);
   29.49 @@ -494,9 +494,7 @@ void _arch_exit_mmap(struct mm_struct *m
   29.50          tsk->active_mm = &init_mm;
   29.51          atomic_inc(&init_mm.mm_count);
   29.52  
   29.53 -        cpu_set(cpu, init_mm.cpu_vm_mask);
   29.54 -        load_cr3(swapper_pg_dir);
   29.55 -        cpu_clear(cpu, mm->cpu_vm_mask);
   29.56 +        switch_mm(mm, &init_mm, tsk);
   29.57  
   29.58          atomic_dec(&mm->mm_count);
   29.59          BUG_ON(atomic_read(&mm->mm_count) == 0);
    30.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/pci/Makefile	Tue May 03 14:42:00 2005 +0000
    30.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/pci/Makefile	Tue May 10 08:49:59 2005 +0000
    30.3 @@ -11,7 +11,8 @@ c-obj-$(CONFIG_PCI_DIRECT)	+= direct.o
    30.4  c-pci-y				:= fixup.o
    30.5  c-pci-$(CONFIG_ACPI_PCI)	+= acpi.o
    30.6  c-pci-y				+= legacy.o
    30.7 -c-pci-y				+= irq.o
    30.8 +# Make sure irq.o gets linked in after legacy.o
    30.9 +l-pci-y				+= irq.o
   30.10  
   30.11  c-pci-$(CONFIG_X86_VISWS)	:= visws.o fixup.o
   30.12  pci-$(CONFIG_X86_VISWS)		:=
   30.13 @@ -26,6 +27,6 @@ c-link	:=
   30.14  $(patsubst %.o,$(obj)/%.c,$(c-obj-y) $(c-link)):
   30.15  	@ln -fsn $(srctree)/arch/i386/pci/$(notdir $@) $@
   30.16  
   30.17 -obj-y	+= $(c-obj-y)
   30.18 +obj-y	+= $(c-obj-y) $(l-pci-y)
   30.19  
   30.20  clean-files += $(patsubst %.o,%.c,$(c-obj-y) $(c-obj-) $(c-link))
    31.1 --- a/linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c	Tue May 03 14:42:00 2005 +0000
    31.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c	Tue May 10 08:49:59 2005 +0000
    31.3 @@ -20,10 +20,6 @@
    31.4  #include <asm-xen/linux-public/privcmd.h>
    31.5  #include <asm-xen/gnttab.h>
    31.6  
    31.7 -#ifndef set_fixmap_ma
    31.8 -#define set_fixmap_ma set_fixmap
    31.9 -#endif
   31.10 -
   31.11  #if 1
   31.12  #define ASSERT(_p) \
   31.13      if ( !(_p) ) { printk(KERN_ALERT"Assertion '%s': line %d, file %s\n", \
   31.14 @@ -339,7 +335,7 @@ int gnttab_resume(void)
   31.15      BUG_ON(setup.status != 0);
   31.16  
   31.17      for ( i = 0; i < NR_GRANT_FRAMES; i++ )
   31.18 -        set_fixmap_ma(FIX_GNTTAB_END - i, frames[i] << PAGE_SHIFT);
   31.19 +        set_fixmap(FIX_GNTTAB_END - i, frames[i] << PAGE_SHIFT);
   31.20  
   31.21      return 0;
   31.22  }
    32.1 --- a/linux-2.6.11-xen-sparse/arch/xen/kernel/reboot.c	Tue May 03 14:42:00 2005 +0000
    32.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/kernel/reboot.c	Tue May 10 08:49:59 2005 +0000
    32.3 @@ -129,11 +129,7 @@ static void __do_suspend(void)
    32.4      memcpy(&xen_start_info, &suspend_record->resume_info,
    32.5             sizeof(xen_start_info));
    32.6  
    32.7 -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
    32.8 -    set_fixmap_ma(FIX_SHARED_INFO, xen_start_info.shared_info);
    32.9 -#else
   32.10      set_fixmap(FIX_SHARED_INFO, xen_start_info.shared_info);
   32.11 -#endif
   32.12  
   32.13      HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
   32.14  
    33.1 --- a/linux-2.6.11-xen-sparse/arch/xen/x86_64/mm/init.c	Tue May 03 14:42:00 2005 +0000
    33.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/mm/init.c	Tue May 10 08:49:59 2005 +0000
    33.3 @@ -361,7 +361,14 @@ void __set_fixmap (enum fixed_addresses 
    33.4  		printk("Invalid __set_fixmap\n");
    33.5  		return;
    33.6  	}
    33.7 -	set_pte_phys(address, phys, prot, SET_FIXMAP_KERNEL);
    33.8 +	switch (idx) {
    33.9 +	case VSYSCALL_FIRST_PAGE:
   33.10 +		set_pte_phys(address, phys, prot, SET_FIXMAP_KERNEL);
   33.11 +		break;
   33.12 +	default:
   33.13 +		set_pte_phys_ma(address, phys, prot);
   33.14 +		break;
   33.15 +	}
   33.16  }
   33.17  
   33.18  
   33.19 @@ -414,18 +421,6 @@ void __set_fixmap_user (enum fixed_addre
   33.20  
   33.21  }
   33.22  
   33.23 -
   33.24 -void __set_fixmap_ma (enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
   33.25 -{ 
   33.26 -	unsigned long address = __fix_to_virt(idx);
   33.27 -
   33.28 -	if (idx >= __end_of_fixed_addresses) {
   33.29 -		printk("Invalid __set_fixmap\n");
   33.30 -		return;
   33.31 -	}
   33.32 -	set_pte_phys_ma(address, phys, prot);
   33.33 -}
   33.34 -
   33.35  unsigned long __initdata table_start, table_end, tables_reserved; 
   33.36  
   33.37  #if 0
   33.38 @@ -632,8 +627,7 @@ void __init paging_init(void)
   33.39  		free_area_init(zones_size);
   33.40  	}
   33.41  
   33.42 -        __set_fixmap_ma(FIX_SHARED_INFO, xen_start_info.shared_info, 
   33.43 -                        __pgprot(_KERNPG_TABLE));
   33.44 +        set_fixmap(FIX_SHARED_INFO, xen_start_info.shared_info);
   33.45          HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
   33.46  
   33.47          memset(empty_zero_page, 0, sizeof(empty_zero_page));
   33.48 @@ -642,11 +636,11 @@ void __init paging_init(void)
   33.49          /* Setup mapping of lower 1st MB */
   33.50          for (i = 0; i < NR_FIX_ISAMAPS; i++)
   33.51                  if (xen_start_info.flags & SIF_PRIVILEGED)
   33.52 -                        __set_fixmap_ma(FIX_ISAMAP_BEGIN - i, i * PAGE_SIZE,
   33.53 -                                __pgprot(_KERNPG_TABLE));
   33.54 +                        set_fixmap(FIX_ISAMAP_BEGIN - i, i * PAGE_SIZE);
   33.55                  else
   33.56 -                        set_fixmap_ma_ro(FIX_ISAMAP_BEGIN - i,
   33.57 -                                         virt_to_machine(empty_zero_page));
   33.58 +                        __set_fixmap(FIX_ISAMAP_BEGIN - i,
   33.59 +				     virt_to_machine(empty_zero_page),
   33.60 +				     PAGE_KERNEL_RO);
   33.61  #endif
   33.62  
   33.63  }
    34.1 --- a/linux-2.6.11-xen-sparse/arch/xen/x86_64/mm/ioremap.c	Tue May 03 14:42:00 2005 +0000
    34.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/mm/ioremap.c	Tue May 10 08:49:59 2005 +0000
    34.3 @@ -273,7 +273,7 @@ void __init *bt_ioremap(unsigned long ph
    34.4  	 */
    34.5  	idx = FIX_BTMAP_BEGIN;
    34.6  	while (nrpages > 0) {
    34.7 -		set_fixmap_ma(idx, phys_addr);
    34.8 +		set_fixmap(idx, phys_addr);
    34.9  		phys_addr += PAGE_SIZE;
   34.10  		--idx;
   34.11  		--nrpages;
    35.1 --- a/linux-2.6.11-xen-sparse/arch/xen/x86_64/pci/Makefile	Tue May 03 14:42:00 2005 +0000
    35.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/pci/Makefile	Tue May 10 08:49:59 2005 +0000
    35.3 @@ -12,7 +12,7 @@ c-obj-y		:= i386.o
    35.4  c-obj-y		+= fixup.o
    35.5  c-obj-$(CONFIG_ACPI_PCI)	+= acpi.o
    35.6  c-obj-y			+= legacy.o common.o
    35.7 -c-xen-obj-$(CONFIG_PCI_DIRECT)+= direct.o
    35.8 +c-obj-$(CONFIG_PCI_DIRECT)+= direct.o
    35.9  c-xen-obj-y		+= irq.o
   35.10  # mmconfig has a 64bit special
   35.11  c-obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o
    36.1 --- a/linux-2.6.11-xen-sparse/drivers/acpi/tables.c	Tue May 03 14:42:00 2005 +0000
    36.2 +++ b/linux-2.6.11-xen-sparse/drivers/acpi/tables.c	Tue May 10 08:49:59 2005 +0000
    36.3 @@ -581,8 +581,8 @@ acpi_table_init (void)
    36.4  		return -ENODEV;
    36.5  	}
    36.6  
    36.7 -        rsdp = (struct acpi_table_rsdp *) (__fix_to_virt(FIX_ACPI_RSDP_PAGE)
    36.8 -                                           + (rsdp_phys & ~PAGE_MASK));
    36.9 +	rsdp = (struct acpi_table_rsdp *) (__fix_to_virt(FIX_ACPI_RSDP_PAGE) +
   36.10 +					   (rsdp_phys & ~PAGE_MASK));
   36.11  	if (!rsdp) {
   36.12  		printk(KERN_WARNING PREFIX "Unable to map RSDP\n");
   36.13  		return -ENODEV;
    37.1 --- a/linux-2.6.11-xen-sparse/drivers/xen/Makefile	Tue May 03 14:42:00 2005 +0000
    37.2 +++ b/linux-2.6.11-xen-sparse/drivers/xen/Makefile	Tue May 10 08:49:59 2005 +0000
    37.3 @@ -3,8 +3,8 @@
    37.4  obj-y	+= console/
    37.5  obj-y	+= evtchn/
    37.6  obj-y	+= balloon/
    37.7 +obj-y	+= privcmd/
    37.8  
    37.9 -obj-$(CONFIG_XEN_PRIVILEGED_GUEST)	+= privcmd/
   37.10  obj-$(CONFIG_XEN_BLKDEV_BACKEND)	+= blkback/
   37.11  obj-$(CONFIG_XEN_NETDEV_BACKEND)	+= netback/
   37.12  obj-$(CONFIG_XEN_BLKDEV_FRONTEND)	+= blkfront/
    38.1 --- a/linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c	Tue May 03 14:42:00 2005 +0000
    38.2 +++ b/linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c	Tue May 10 08:49:59 2005 +0000
    38.3 @@ -50,6 +50,7 @@
    38.4  #include <asm-xen/xen-public/io/netif.h>
    38.5  #include <asm-xen/balloon.h>
    38.6  #include <asm/page.h>
    38.7 +#include <asm/uaccess.h>
    38.8  
    38.9  #ifndef __GFP_NOWARN
   38.10  #define __GFP_NOWARN 0
    39.1 --- a/linux-2.6.11-xen-sparse/drivers/xen/privcmd/privcmd.c	Tue May 03 14:42:00 2005 +0000
    39.2 +++ b/linux-2.6.11-xen-sparse/drivers/xen/privcmd/privcmd.c	Tue May 10 08:49:59 2005 +0000
    39.3 @@ -219,9 +219,6 @@ static struct file_operations privcmd_fi
    39.4  
    39.5  static int __init privcmd_init(void)
    39.6  {
    39.7 -    if ( !(xen_start_info.flags & SIF_PRIVILEGED) )
    39.8 -        return 0;
    39.9 -
   39.10      privcmd_intf = create_xen_proc_entry("privcmd", 0400);
   39.11      if ( privcmd_intf != NULL )
   39.12          privcmd_intf->proc_fops = &privcmd_file_ops;
    40.1 --- a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/fixmap.h	Tue May 03 14:42:00 2005 +0000
    40.2 +++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/fixmap.h	Tue May 10 08:49:59 2005 +0000
    40.3 @@ -80,7 +80,7 @@ enum fixed_addresses {
    40.4  #ifdef CONFIG_ACPI_BOOT
    40.5  	FIX_ACPI_BEGIN,
    40.6  	FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1,
    40.7 -        FIX_ACPI_RSDP_PAGE,
    40.8 +	FIX_ACPI_RSDP_PAGE,
    40.9  #endif
   40.10  #ifdef CONFIG_PCI_MMCONFIG
   40.11  	FIX_PCIE_MCFG,
   40.12 @@ -104,15 +104,9 @@ enum fixed_addresses {
   40.13  
   40.14  extern void __set_fixmap (enum fixed_addresses idx,
   40.15  					unsigned long phys, pgprot_t flags);
   40.16 -extern void __set_fixmap_ma (enum fixed_addresses idx,
   40.17 -					unsigned long mach, pgprot_t flags);
   40.18  
   40.19  #define set_fixmap(idx, phys) \
   40.20  		__set_fixmap(idx, phys, PAGE_KERNEL)
   40.21 -#define set_fixmap_ma(idx, phys) \
   40.22 -		__set_fixmap_ma(idx, phys, PAGE_KERNEL)
   40.23 -#define set_fixmap_ma_ro(idx, phys) \
   40.24 -		__set_fixmap_ma(idx, phys, PAGE_KERNEL_RO)
   40.25  /*
   40.26   * Some hardware wants to get fixmapped without caching.
   40.27   */
    41.1 --- a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/io_apic.h	Tue May 03 14:42:00 2005 +0000
    41.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    41.3 @@ -1,230 +0,0 @@
    41.4 -#ifndef __ASM_IO_APIC_H
    41.5 -#define __ASM_IO_APIC_H
    41.6 -
    41.7 -#include <linux/config.h>
    41.8 -#include <asm/types.h>
    41.9 -#include <asm/mpspec.h>
   41.10 -
   41.11 -#include <asm-xen/xen-public/xen.h>
   41.12 -#include <asm-xen/xen-public/physdev.h>
   41.13 -
   41.14 -/*
   41.15 - * Intel IO-APIC support for SMP and UP systems.
   41.16 - *
   41.17 - * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar
   41.18 - */
   41.19 -
   41.20 -#ifdef CONFIG_X86_IO_APIC
   41.21 -
   41.22 -#ifdef CONFIG_PCI_MSI
   41.23 -static inline int use_pci_vector(void)	{return 1;}
   41.24 -static inline void disable_edge_ioapic_vector(unsigned int vector) { }
   41.25 -static inline void mask_and_ack_level_ioapic_vector(unsigned int vector) { }
   41.26 -static inline void end_edge_ioapic_vector (unsigned int vector) { }
   41.27 -#define startup_level_ioapic	startup_level_ioapic_vector
   41.28 -#define shutdown_level_ioapic	mask_IO_APIC_vector
   41.29 -#define enable_level_ioapic	unmask_IO_APIC_vector
   41.30 -#define disable_level_ioapic	mask_IO_APIC_vector
   41.31 -#define mask_and_ack_level_ioapic mask_and_ack_level_ioapic_vector
   41.32 -#define end_level_ioapic	end_level_ioapic_vector
   41.33 -#define set_ioapic_affinity	set_ioapic_affinity_vector
   41.34 -
   41.35 -#define startup_edge_ioapic 	startup_edge_ioapic_vector
   41.36 -#define shutdown_edge_ioapic 	disable_edge_ioapic_vector
   41.37 -#define enable_edge_ioapic 	unmask_IO_APIC_vector
   41.38 -#define disable_edge_ioapic 	disable_edge_ioapic_vector
   41.39 -#define ack_edge_ioapic 	ack_edge_ioapic_vector
   41.40 -#define end_edge_ioapic 	end_edge_ioapic_vector
   41.41 -#else
   41.42 -static inline int use_pci_vector(void)	{return 0;}
   41.43 -static inline void disable_edge_ioapic_irq(unsigned int irq) { }
   41.44 -static inline void mask_and_ack_level_ioapic_irq(unsigned int irq) { }
   41.45 -static inline void end_edge_ioapic_irq (unsigned int irq) { }
   41.46 -#define startup_level_ioapic	startup_level_ioapic_irq
   41.47 -#define shutdown_level_ioapic	mask_IO_APIC_irq
   41.48 -#define enable_level_ioapic	unmask_IO_APIC_irq
   41.49 -#define disable_level_ioapic	mask_IO_APIC_irq
   41.50 -#define mask_and_ack_level_ioapic mask_and_ack_level_ioapic_irq
   41.51 -#define end_level_ioapic	end_level_ioapic_irq
   41.52 -#define set_ioapic_affinity	set_ioapic_affinity_irq
   41.53 -
   41.54 -#define startup_edge_ioapic 	startup_edge_ioapic_irq
   41.55 -#define shutdown_edge_ioapic 	disable_edge_ioapic_irq
   41.56 -#define enable_edge_ioapic 	unmask_IO_APIC_irq
   41.57 -#define disable_edge_ioapic 	disable_edge_ioapic_irq
   41.58 -#define ack_edge_ioapic 	ack_edge_ioapic_irq
   41.59 -#define end_edge_ioapic 	end_edge_ioapic_irq
   41.60 -#endif
   41.61 -
   41.62 -#define IO_APIC_BASE(idx) \
   41.63 -		((volatile int *)(__fix_to_virt(FIX_IO_APIC_BASE_0 + idx) \
   41.64 -		+ (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK)))
   41.65 -
   41.66 -/*
   41.67 - * The structure of the IO-APIC:
   41.68 - */
   41.69 -union IO_APIC_reg_00 {
   41.70 -	u32	raw;
   41.71 -	struct {
   41.72 -		u32	__reserved_2	: 14,
   41.73 -			LTS		:  1,
   41.74 -			delivery_type	:  1,
   41.75 -			__reserved_1	:  8,
   41.76 -			ID		:  8;
   41.77 -	} __attribute__ ((packed)) bits;
   41.78 -};
   41.79 -
   41.80 -union IO_APIC_reg_01 {
   41.81 -	u32	raw;
   41.82 -	struct {
   41.83 -		u32	version		:  8,
   41.84 -			__reserved_2	:  7,
   41.85 -			PRQ		:  1,
   41.86 -			entries		:  8,
   41.87 -			__reserved_1	:  8;
   41.88 -	} __attribute__ ((packed)) bits;
   41.89 -};
   41.90 -
   41.91 -union IO_APIC_reg_02 {
   41.92 -	u32	raw;
   41.93 -	struct {
   41.94 -		u32	__reserved_2	: 24,
   41.95 -			arbitration	:  4,
   41.96 -			__reserved_1	:  4;
   41.97 -	} __attribute__ ((packed)) bits;
   41.98 -};
   41.99 -
  41.100 -union IO_APIC_reg_03 {
  41.101 -	u32	raw;
  41.102 -	struct {
  41.103 -		u32	boot_DT		:  1,
  41.104 -			__reserved_1	: 31;
  41.105 -	} __attribute__ ((packed)) bits;
  41.106 -};
  41.107 -
  41.108 -/*
  41.109 - * # of IO-APICs and # of IRQ routing registers
  41.110 - */
  41.111 -extern int nr_ioapics;
  41.112 -extern int nr_ioapic_registers[MAX_IO_APICS];
  41.113 -
  41.114 -enum ioapic_irq_destination_types {
  41.115 -	dest_Fixed = 0,
  41.116 -	dest_LowestPrio = 1,
  41.117 -	dest_SMI = 2,
  41.118 -	dest__reserved_1 = 3,
  41.119 -	dest_NMI = 4,
  41.120 -	dest_INIT = 5,
  41.121 -	dest__reserved_2 = 6,
  41.122 -	dest_ExtINT = 7
  41.123 -};
  41.124 -
  41.125 -struct IO_APIC_route_entry {
  41.126 -	__u32	vector		:  8,
  41.127 -		delivery_mode	:  3,	/* 000: FIXED
  41.128 -					 * 001: lowest prio
  41.129 -					 * 111: ExtINT
  41.130 -					 */
  41.131 -		dest_mode	:  1,	/* 0: physical, 1: logical */
  41.132 -		delivery_status	:  1,
  41.133 -		polarity	:  1,
  41.134 -		irr		:  1,
  41.135 -		trigger		:  1,	/* 0: edge, 1: level */
  41.136 -		mask		:  1,	/* 0: enabled, 1: disabled */
  41.137 -		__reserved_2	: 15;
  41.138 -
  41.139 -	union {		struct { __u32
  41.140 -					__reserved_1	: 24,
  41.141 -					physical_dest	:  4,
  41.142 -					__reserved_2	:  4;
  41.143 -			} physical;
  41.144 -
  41.145 -			struct { __u32
  41.146 -					__reserved_1	: 24,
  41.147 -					logical_dest	:  8;
  41.148 -			} logical;
  41.149 -	} dest;
  41.150 -
  41.151 -} __attribute__ ((packed));
  41.152 -
  41.153 -/*
  41.154 - * MP-BIOS irq configuration table structures:
  41.155 - */
  41.156 -
  41.157 -/* I/O APIC entries */
  41.158 -extern struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
  41.159 -
  41.160 -/* # of MP IRQ source entries */
  41.161 -extern int mp_irq_entries;
  41.162 -
  41.163 -/* MP IRQ source entries */
  41.164 -extern struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
  41.165 -
  41.166 -/* non-0 if default (table-less) MP configuration */
  41.167 -extern int mpc_default_type;
  41.168 -
  41.169 -static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
  41.170 -{
  41.171 -        physdev_op_t op;
  41.172 -        int ret;
  41.173 -
  41.174 -        op.cmd = PHYSDEVOP_APIC_READ;
  41.175 -        op.u.apic_op.apic = apic;
  41.176 -        op.u.apic_op.offset = reg;
  41.177 -        ret = HYPERVISOR_physdev_op(&op);
  41.178 -        if (ret)
  41.179 -                return ret;
  41.180 -        return op.u.apic_op.value;
  41.181 -}
  41.182 -
  41.183 -static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
  41.184 -{
  41.185 -        physdev_op_t op;
  41.186 -        int ret;
  41.187 -
  41.188 -        op.cmd = PHYSDEVOP_APIC_WRITE;
  41.189 -        op.u.apic_op.apic = apic;
  41.190 -        op.u.apic_op.offset = reg;
  41.191 -        op.u.apic_op.value = value;
  41.192 -        ret = HYPERVISOR_physdev_op(&op);
  41.193 -}
  41.194 -
  41.195 -/*
  41.196 - * Re-write a value: to be used for read-modify-write
  41.197 - * cycles where the read already set up the index register.
  41.198 - *
  41.199 - * Older SiS APIC requires we rewrite the index regiser
  41.200 - */
  41.201 -extern int sis_apic_bug;
  41.202 -static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
  41.203 -{
  41.204 -	if (sis_apic_bug)
  41.205 -		*IO_APIC_BASE(apic) = reg;
  41.206 -	*(IO_APIC_BASE(apic)+4) = value;
  41.207 -}
  41.208 -
  41.209 -/* 1 if "noapic" boot option passed */
  41.210 -extern int skip_ioapic_setup;
  41.211 -
  41.212 -/*
  41.213 - * If we use the IO-APIC for IRQ routing, disable automatic
  41.214 - * assignment of PCI IRQ's.
  41.215 - */
  41.216 -#define io_apic_assign_pci_irqs (mp_irq_entries && !skip_ioapic_setup && io_apic_irqs)
  41.217 -
  41.218 -#ifdef CONFIG_ACPI_BOOT
  41.219 -extern int io_apic_get_unique_id (int ioapic, int apic_id);
  41.220 -extern int io_apic_get_version (int ioapic);
  41.221 -extern int io_apic_get_redir_entries (int ioapic);
  41.222 -extern int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low);
  41.223 -#endif /*CONFIG_ACPI_BOOT*/
  41.224 -
  41.225 -extern int (*ioapic_renumber_irq)(int ioapic, int irq);
  41.226 -
  41.227 -#else  /* !CONFIG_X86_IO_APIC */
  41.228 -#define io_apic_assign_pci_irqs 0
  41.229 -#endif
  41.230 -
  41.231 -extern int assign_irq_vector(int irq);
  41.232 -
  41.233 -#endif
    42.1 --- a/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/fixmap.h	Tue May 03 14:42:00 2005 +0000
    42.2 +++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/fixmap.h	Tue May 10 08:49:59 2005 +0000
    42.3 @@ -60,17 +60,8 @@ enum fixed_addresses {
    42.4  extern void __set_fixmap (enum fixed_addresses idx,
    42.5  					unsigned long phys, pgprot_t flags);
    42.6  
    42.7 -extern void __set_fixmap_ma (enum fixed_addresses idx,
    42.8 -					unsigned long mach, pgprot_t flags);
    42.9 -
   42.10  #define set_fixmap(idx, phys) \
   42.11  		__set_fixmap(idx, phys, PAGE_KERNEL)
   42.12 -
   42.13 -#define set_fixmap_ma(idx, phys) \
   42.14 -		__set_fixmap_ma(idx, phys, PAGE_KERNEL)
   42.15 -#define set_fixmap_ma_ro(idx, phys) \
   42.16 -		__set_fixmap_ma(idx, phys, PAGE_KERNEL_RO)
   42.17 -
   42.18  /*
   42.19   * Some hardware wants to get fixmapped without caching.
   42.20   */
    43.1 --- a/linux-2.6.11-xen-sparse/mkbuildtree	Tue May 03 14:42:00 2005 +0000
    43.2 +++ b/linux-2.6.11-xen-sparse/mkbuildtree	Tue May 10 08:49:59 2005 +0000
    43.3 @@ -66,7 +66,7 @@ relative_lndir ()
    43.4      (
    43.5      cd $i
    43.6      pref=`echo $i | sed -e 's#/[^/]*#../#g' -e 's#^\.##'`
    43.7 -    for j in `find . -type f -o -type l -maxdepth 1`; do
    43.8 +    for j in `find . -maxdepth 1 -type f -o -type l`; do
    43.9        ln -sf ${pref}${REAL_DIR}/$i/$j ${SYMLINK_DIR}/$i/$j
   43.10      done
   43.11      )
    44.1 --- a/tools/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/linux-xen-low.c	Tue May 03 14:42:00 2005 +0000
    44.2 +++ b/tools/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/linux-xen-low.c	Tue May 10 08:49:59 2005 +0000
    44.3 @@ -338,7 +338,9 @@ regsets_store_inferior_registers ()
    44.4  	    }
    44.5  	  else
    44.6  	    {
    44.7 +#ifdef DEBUG
    44.8  	      perror ("Warning: ptrace(regsets_store_inferior_registers)");
    44.9 +#endif
   44.10  	    }
   44.11  	}
   44.12        regset ++;
   44.13 @@ -364,7 +366,6 @@ linux_fetch_registers (int regno)
   44.14  void
   44.15  linux_store_registers (int regno)
   44.16  {
   44.17 -    printf("store %d\n", regno);
   44.18    if (use_regsets_p)
   44.19      {
   44.20        if (regsets_store_inferior_registers () == 0)
    45.1 --- a/tools/libxc/Makefile	Tue May 03 14:42:00 2005 +0000
    45.2 +++ b/tools/libxc/Makefile	Tue May 10 08:49:59 2005 +0000
    45.3 @@ -16,6 +16,7 @@ vpath %c       $(XEN_LIBXUTIL)
    45.4  INCLUDES += -I $(XEN_LIBXUTIL)
    45.5  
    45.6  SRCS     :=
    45.7 +SRCS     += xc_sedf.c
    45.8  SRCS     += xc_bvtsched.c
    45.9  SRCS     += xc_core.c
   45.10  SRCS     += xc_domain.c
    46.1 --- a/tools/libxc/xc.h	Tue May 03 14:42:00 2005 +0000
    46.2 +++ b/tools/libxc/xc.h	Tue May 10 08:49:59 2005 +0000
    46.3 @@ -296,6 +296,14 @@ int xc_bvtsched_domain_get(int xc_handle
    46.4                             long long *warpl,
    46.5                             long long *warpu);
    46.6  
    46.7 +int xc_sedf_domain_set(int xc_handle,
    46.8 +                          u32 domid,
    46.9 +                          u64 period, u64 slice, u64 latency, u16 extratime, u16 weight);
   46.10 +
   46.11 +int xc_sedf_domain_get(int xc_handle,
   46.12 +                          u32 domid,
   46.13 +                          u64* period, u64 *slice, u64 *latency, u16 *extratime, u16* weight);
   46.14 +
   46.15  typedef evtchn_status_t xc_evtchn_status_t;
   46.16  
   46.17  /*
    47.1 --- a/tools/libxc/xc_domain.c	Tue May 03 14:42:00 2005 +0000
    47.2 +++ b/tools/libxc/xc_domain.c	Tue May 10 08:49:59 2005 +0000
    47.3 @@ -43,7 +43,7 @@ int xc_domain_create(int xc_handle,
    47.4          goto fail;
    47.5      }
    47.6  
    47.7 -    return err;
    47.8 +    return 0;
    47.9  
   47.10   fail:
   47.11      errno_saved = errno;
   47.12 @@ -103,6 +103,7 @@ int xc_domain_getinfo(int xc_handle,
   47.13      unsigned int nr_doms;
   47.14      u32 next_domid = first_domid;
   47.15      dom0_op_t op;
   47.16 +    int rc = 0; 
   47.17  
   47.18      for ( nr_doms = 0; nr_doms < max_doms; nr_doms++ )
   47.19      {
   47.20 @@ -110,7 +111,7 @@ int xc_domain_getinfo(int xc_handle,
   47.21          op.u.getdomaininfo.domain = (domid_t)next_domid;
   47.22          op.u.getdomaininfo.exec_domain = 0; // FIX ME?!?
   47.23          op.u.getdomaininfo.ctxt = NULL; /* no exec context info, thanks. */
   47.24 -        if ( do_dom0_op(xc_handle, &op) < 0 )
   47.25 +        if ( (rc = do_dom0_op(xc_handle, &op)) < 0 )
   47.26              break;
   47.27          info->domid   = (u16)op.u.getdomaininfo.domain;
   47.28  
   47.29 @@ -137,6 +138,8 @@ int xc_domain_getinfo(int xc_handle,
   47.30          info++;
   47.31      }
   47.32  
   47.33 +    if(!nr_doms) return rc; 
   47.34 +
   47.35      return nr_doms;
   47.36  }
   47.37  
    48.1 --- a/tools/libxc/xc_physdev.c	Tue May 03 14:42:00 2005 +0000
    48.2 +++ b/tools/libxc/xc_physdev.c	Tue May 10 08:49:59 2005 +0000
    48.3 @@ -16,14 +16,6 @@ int xc_physdev_pci_access_modify(int xc_
    48.4                                   int func,
    48.5                                   int enable)
    48.6  {
    48.7 -    dom0_op_t op;
    48.8 -
    48.9 -    op.cmd = DOM0_PCIDEV_ACCESS;
   48.10 -    op.u.pcidev_access.domain = (domid_t)domid;
   48.11 -    op.u.pcidev_access.bus    = bus;
   48.12 -    op.u.pcidev_access.dev    = dev;
   48.13 -    op.u.pcidev_access.func   = func;
   48.14 -    op.u.pcidev_access.enable = enable;
   48.15 -
   48.16 -    return do_dom0_op(xc_handle, &op);
   48.17 +    errno = ENOSYS;
   48.18 +    return -1;
   48.19  }
    49.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    49.2 +++ b/tools/libxc/xc_sedf.c	Tue May 10 08:49:59 2005 +0000
    49.3 @@ -0,0 +1,51 @@
    49.4 +/******************************************************************************
    49.5 + * xc_sedf.c
    49.6 + * 
    49.7 + * API for manipulating parameters of the Simple EDF scheduler.
    49.8 + * 
    49.9 + * changes by Stephan Diestelhorst
   49.10 + * based on code
   49.11 + * by Mark Williamson, Copyright (c) 2004 Intel Research Cambridge.
   49.12 + */
   49.13 +
   49.14 +#include "xc_private.h"
   49.15 +
   49.16 +int xc_sedf_domain_set(int xc_handle,
   49.17 +                          u32 domid, u64 period, u64 slice,u64 latency, u16 extratime,u16 weight)
   49.18 +{
   49.19 +    dom0_op_t op;
   49.20 +    struct sedf_adjdom *p = &op.u.adjustdom.u.sedf;
   49.21 +
   49.22 +    op.cmd = DOM0_ADJUSTDOM;
   49.23 +    op.u.adjustdom.domain  = (domid_t)domid;
   49.24 +    op.u.adjustdom.sched_id = SCHED_SEDF;
   49.25 +    op.u.adjustdom.direction = SCHED_INFO_PUT;
   49.26 +
   49.27 +    p->period    = period;
   49.28 +    p->slice     = slice;
   49.29 +    p->latency   = latency;
   49.30 +    p->extratime = extratime;
   49.31 +    p->weight    = weight;
   49.32 +    return do_dom0_op(xc_handle, &op);
   49.33 +}
   49.34 +
   49.35 +int xc_sedf_domain_get(int xc_handle, u32 domid, u64 *period, u64 *slice, u64* latency, u16* extratime, u16* weight)
   49.36 +{
   49.37 +    dom0_op_t op;
   49.38 +    int ret;
   49.39 +    struct sedf_adjdom *p = &op.u.adjustdom.u.sedf;
   49.40 +
   49.41 +    op.cmd = DOM0_ADJUSTDOM;    
   49.42 +    op.u.adjustdom.domain = (domid_t)domid;
   49.43 +    op.u.adjustdom.sched_id = SCHED_SEDF;
   49.44 +    op.u.adjustdom.direction = SCHED_INFO_GET;
   49.45 +
   49.46 +    ret = do_dom0_op(xc_handle, &op);
   49.47 +
   49.48 +    *period    = p->period;
   49.49 +    *slice     = p->slice;
   49.50 +    *latency   = p->latency;
   49.51 +    *extratime = p->extratime;
   49.52 +    *weight    = p->weight;
   49.53 +    return ret;
   49.54 +}
    50.1 --- a/tools/libxutil/sxpr_parser.h	Tue May 03 14:42:00 2005 +0000
    50.2 +++ b/tools/libxutil/sxpr_parser.h	Tue May 10 08:49:59 2005 +0000
    50.3 @@ -28,7 +28,7 @@
    50.4  /** Size of a parser input buffer.
    50.5   * Tokens read must fit into this size (including trailing null).
    50.6   */
    50.7 -#define PARSER_BUF_SIZE 1024
    50.8 +#define PARSER_BUF_SIZE 4096
    50.9  
   50.10  struct Parser;
   50.11  typedef int ParserStateFn(struct Parser *, char c);
    51.1 --- a/tools/python/xen/lowlevel/xc/xc.c	Tue May 03 14:42:00 2005 +0000
    51.2 +++ b/tools/python/xen/lowlevel/xc/xc.c	Tue May 10 08:49:59 2005 +0000
    51.3 @@ -827,6 +827,52 @@ static PyObject *pyxc_physinfo(PyObject 
    51.4                           "cpu_khz",     info.cpu_khz);
    51.5  }
    51.6  
    51.7 +static PyObject *pyxc_sedf_domain_set(PyObject *self,
    51.8 +                                         PyObject *args,
    51.9 +                                         PyObject *kwds)
   51.10 +{
   51.11 +    XcObject *xc = (XcObject *)self;
   51.12 +    u32 domid;
   51.13 +    u64 period, slice, latency;
   51.14 +    u16 extratime, weight;
   51.15 +    static char *kwd_list[] = { "dom", "period", "slice", "latency", "extratime", "weight",NULL };
   51.16 +    
   51.17 +    if( !PyArg_ParseTupleAndKeywords(args, kwds, "iLLLhh", kwd_list, &domid,
   51.18 +                                     &period, &slice, &latency, &extratime, &weight) )
   51.19 +        return NULL;
   51.20 +   if ( xc_sedf_domain_set(xc->xc_handle, domid, period, slice, latency, extratime,weight) != 0 )
   51.21 +        return PyErr_SetFromErrno(xc_error);
   51.22 +
   51.23 +    Py_INCREF(zero);
   51.24 +    return zero;
   51.25 +}
   51.26 +
   51.27 +static PyObject *pyxc_sedf_domain_get(PyObject *self,
   51.28 +                                         PyObject *args,
   51.29 +                                         PyObject *kwds)
   51.30 +{
   51.31 +    XcObject *xc = (XcObject *)self;
   51.32 +    u32 domid;
   51.33 +    u64 period, slice,latency;
   51.34 +    u16 weight, extratime;
   51.35 +    
   51.36 +    static char *kwd_list[] = { "dom", NULL };
   51.37 +
   51.38 +    if( !PyArg_ParseTupleAndKeywords(args, kwds, "i", kwd_list, &domid) )
   51.39 +        return NULL;
   51.40 +    
   51.41 +    if ( xc_sedf_domain_get( xc->xc_handle, domid, &period,
   51.42 +                                &slice,&latency,&extratime,&weight) )
   51.43 +        return PyErr_SetFromErrno(xc_error);
   51.44 +
   51.45 +    return Py_BuildValue("{s:i,s:L,s:L,s:L,s:i}",
   51.46 +                         "domain",    domid,
   51.47 +                         "period",    period,
   51.48 +                         "slice",     slice,
   51.49 +			 "latency",   latency,
   51.50 +			 "extratime", extratime);
   51.51 +}
   51.52 +
   51.53  static PyObject *pyxc_shadow_control(PyObject *self,
   51.54                                       PyObject *args,
   51.55                                       PyObject *kwds)
   51.56 @@ -1027,6 +1073,30 @@ static PyMethodDef pyxc_methods[] = {
   51.57        " warpu  [long]: Unwarp requirement.\n"
   51.58        " warpl  [long]: Warp limit,\n"
   51.59      },
   51.60 +    
   51.61 +    { "sedf_domain_set",
   51.62 +      (PyCFunction)pyxc_sedf_domain_set,
   51.63 +      METH_KEYWORDS, "\n"
   51.64 +      "Set the scheduling parameters for a domain when running with Atropos.\n"
   51.65 +      " dom       [int]:  domain to set\n"
   51.66 +      " period    [long]: domain's scheduling period\n"
   51.67 +      " slice     [long]: domain's slice per period\n"
   51.68 +      " latency   [long]: domain's wakeup latency hint\n"
   51.69 +      " extratime [int]:  domain aware of extratime?\n"
   51.70 +      "Returns: [int] 0 on success; -1 on error.\n" },
   51.71 +
   51.72 +    { "sedf_domain_get",
   51.73 +      (PyCFunction)pyxc_sedf_domain_get,
   51.74 +      METH_KEYWORDS, "\n"
   51.75 +      "Get the current scheduling parameters for a domain when running with\n"
   51.76 +      "the Atropos scheduler."
   51.77 +      " dom       [int]: domain to query\n"
   51.78 +      "Returns:   [dict]\n"
   51.79 +      " domain    [int]: domain ID\n"
   51.80 +      " period    [long]: scheduler period\n"
   51.81 +      " slice     [long]: CPU reservation per period\n"
   51.82 +      " latency   [long]: domain's wakeup latency hint\n"
   51.83 +      " extratime [int]:  domain aware of extratime?\n"},
   51.84  
   51.85      { "evtchn_alloc_unbound", 
   51.86        (PyCFunction)pyxc_evtchn_alloc_unbound,
    52.1 --- a/tools/python/xen/xend/XendClient.py	Tue May 03 14:42:00 2005 +0000
    52.2 +++ b/tools/python/xen/xend/XendClient.py	Tue May 10 08:49:59 2005 +0000
    52.3 @@ -260,6 +260,15 @@ class Xend:
    52.4                                'warpl'    : warpl,
    52.5                                'warpu'    : warpu })
    52.6  
    52.7 +    def xend_domain_cpu_sedf_set(self, id, period, slice, latency, extratime, weight):
    52.8 +        return self.xendPost(self.domainurl(id),
    52.9 +                             {'op'        : 'cpu_sedf_set',
   52.10 +                              'period'    : period,
   52.11 +                              'slice'     : slice,
   52.12 +			      'latency'   : latency,
   52.13 +			      'extratime' : extratime,
   52.14 +			      'weight'    : weight })
   52.15 +
   52.16      def xend_domain_maxmem_set(self, id, memory):
   52.17          return self.xendPost(self.domainurl(id),
   52.18                               { 'op'     : 'maxmem_set',
    53.1 --- a/tools/python/xen/xend/XendDomain.py	Tue May 03 14:42:00 2005 +0000
    53.2 +++ b/tools/python/xen/xend/XendDomain.py	Tue May 10 08:49:59 2005 +0000
    53.3 @@ -644,6 +644,24 @@ class XendDomain:
    53.4          except Exception, ex:
    53.5              raise XendError(str(ex))
    53.6      
    53.7 +    
    53.8 +    def domain_cpu_sedf_set(self, id, period, slice, latency, extratime, weight):
    53.9 +        """Set Simple EDF scheduler parameters for a domain.
   53.10 +        """
   53.11 +	dominfo = self.domain_lookup(id)
   53.12 +        try:
   53.13 +            return xc.sedf_domain_set(dominfo.dom, period, slice, latency, extratime, weight)
   53.14 +        except Exception, ex:
   53.15 +            raise XendError(str(ex))
   53.16 +
   53.17 +    def domain_cpu_sedf_get(self, id):
   53.18 +        """Get Atropos scheduler parameters for a domain.
   53.19 +        """
   53.20 +        dominfo = self.domain_lookup(id)
   53.21 +        try:
   53.22 +            return xc.sedf_domain_get(dominfo.dom)
   53.23 +        except Exception, ex:
   53.24 +            raise XendError(str(ex))
   53.25      def domain_device_create(self, id, devconfig):
   53.26          """Create a new device for a domain.
   53.27  
    54.1 --- a/tools/python/xen/xend/server/SrvDomain.py	Tue May 03 14:42:00 2005 +0000
    54.2 +++ b/tools/python/xen/xend/server/SrvDomain.py	Tue May 10 08:49:59 2005 +0000
    54.3 @@ -107,6 +107,18 @@ class SrvDomain(SrvDir):
    54.4          val = fn(req.args, {'dom': self.dom.id})
    54.5          return val
    54.6      
    54.7 +    
    54.8 +    def op_cpu_sedf_set(self, op, req):
    54.9 +        fn = FormFn(self.xd.domain_cpu_sedf_set,
   54.10 +                    [['dom', 'str'],
   54.11 +                     ['period', 'int'],
   54.12 +                     ['slice', 'int'],
   54.13 +		     ['latency', 'int'],
   54.14 +		     ['extratime', 'int'],
   54.15 +		     ['weight', 'int']])
   54.16 +        val = fn(req.args, {'dom': self.dom.id})
   54.17 +        return val
   54.18 +
   54.19      def op_maxmem_set(self, op, req):
   54.20          fn = FormFn(self.xd.domain_maxmem_set,
   54.21                      [['dom', 'str'],
    55.1 --- a/tools/python/xen/xm/main.py	Tue May 03 14:42:00 2005 +0000
    55.2 +++ b/tools/python/xen/xm/main.py	Tue May 10 08:49:59 2005 +0000
    55.3 @@ -591,6 +591,23 @@ class ProgBvtslice(Prog):
    55.4  
    55.5  xm.prog(ProgBvtslice)
    55.6  
    55.7 +class ProgSedf(Prog):
    55.8 +    group = 'scheduler'
    55.9 +    name= "sedf"
   55.10 +    info = """Set simple EDF parameters."""
   55.11 +
   55.12 +    def help(self, args):
   55.13 +        print args[0], "DOM PERIOD SLICE LATENCY EXTRATIME WEIGHT"
   55.14 +        print "\nSet simple EDF parameters."
   55.15 +
   55.16 +    def main(self, args):
   55.17 +	if len(args) != 7: self.err("%s: Invalid argument(s)" % args[0])
   55.18 +	dom = args[1]
   55.19 +	v = map(int, args[2:7])
   55.20 +	server.xend_domain_cpu_sedf_set(dom, *v)
   55.21 +
   55.22 +xm.prog(ProgSedf)
   55.23 +
   55.24  class ProgInfo(Prog):
   55.25      group = 'host'
   55.26      name = "info"
    56.1 --- a/xen/arch/ia64/domain.c	Tue May 03 14:42:00 2005 +0000
    56.2 +++ b/xen/arch/ia64/domain.c	Tue May 10 08:49:59 2005 +0000
    56.3 @@ -621,14 +621,12 @@ int construct_dom0(struct domain *d,
    56.4  	unsigned long pkern_entry;
    56.5  	unsigned long pkern_end;
    56.6  
    56.7 -	extern void physdev_init_dom0(struct domain *);
    56.8 -
    56.9  //printf("construct_dom0: starting\n");
   56.10  	/* Sanity! */
   56.11  #ifndef CLONE_DOMAIN0
   56.12  	if ( d != dom0 ) 
   56.13  	    BUG();
   56.14 -	if ( test_bit(DF_CONSTRUCTED, &d->d_flags) ) 
   56.15 +	if ( test_bit(DF_CONSTRUCTED, &d->flags) ) 
   56.16  	    BUG();
   56.17  #endif
   56.18  
   56.19 @@ -755,13 +753,7 @@ int construct_dom0(struct domain *d,
   56.20  #endif
   56.21  	console_endboot(strstr(cmdline, "tty0") != NULL);
   56.22  
   56.23 -	/* DOM0 gets access to everything. */
   56.24 -#ifdef CLONE_DOMAIN0
   56.25 -if (d == dom0)
   56.26 -#endif
   56.27 -	physdev_init_dom0(d);
   56.28 -
   56.29 -	set_bit(DF_CONSTRUCTED, &d->d_flags);
   56.30 +	set_bit(DF_CONSTRUCTED, &d->flags);
   56.31  
   56.32  	new_thread(ed, pkern_entry, 0, 0);
   56.33  	// FIXME: Hack for keyboard input
   56.34 @@ -791,7 +783,7 @@ int construct_domU(struct domain *d,
   56.35  	unsigned long pkern_entry;
   56.36  
   56.37  #ifndef DOMU_AUTO_RESTART
   56.38 -	if ( test_bit(DF_CONSTRUCTED, &d->d_flags) ) BUG();
   56.39 +	if ( test_bit(DF_CONSTRUCTED, &d->flags) ) BUG();
   56.40  #endif
   56.41  
   56.42  	printk("*** LOADING DOMAIN %d ***\n",d->id);
   56.43 @@ -824,7 +816,7 @@ int construct_domU(struct domain *d,
   56.44  	loaddomainelfimage(d,image_start);
   56.45  	printk("loaddomainelfimage returns\n");
   56.46  
   56.47 -	set_bit(DF_CONSTRUCTED, &d->d_flags);
   56.48 +	set_bit(DF_CONSTRUCTED, &d->flags);
   56.49  
   56.50  	printk("calling new_thread, entry=%p\n",pkern_entry);
   56.51  #ifdef DOMU_AUTO_RESTART
    57.1 --- a/xen/arch/ia64/idle0_task.c	Tue May 03 14:42:00 2005 +0000
    57.2 +++ b/xen/arch/ia64/idle0_task.c	Tue May 10 08:49:59 2005 +0000
    57.3 @@ -22,7 +22,7 @@
    57.4  #define IDLE0_DOMAIN(_t)             \
    57.5  {                                    \
    57.6      id:          IDLE_DOMAIN_ID,     \
    57.7 -    d_flags:     1<<DF_IDLETASK,     \
    57.8 +    flags:     1<<DF_IDLETASK,       \
    57.9      refcnt:      ATOMIC_INIT(1)      \
   57.10  }
   57.11  
    58.1 --- a/xen/arch/ia64/xenmisc.c	Tue May 03 14:42:00 2005 +0000
    58.2 +++ b/xen/arch/ia64/xenmisc.c	Tue May 10 08:49:59 2005 +0000
    58.3 @@ -133,73 +133,6 @@ void free_page_type(struct pfn_info *pag
    58.4  }
    58.5  
    58.6  ///////////////////////////////
    58.7 -// from arch/x86/pci.c
    58.8 -///////////////////////////////
    58.9 -
   58.10 -int
   58.11 -pcibios_prep_mwi (struct pci_dev *dev)
   58.12 -{
   58.13 -	dummy();
   58.14 -}
   58.15 -
   58.16 -///////////////////////////////
   58.17 -// from arch/x86/pci-irq.c
   58.18 -///////////////////////////////
   58.19 -
   58.20 -void pcibios_enable_irq(struct pci_dev *dev)
   58.21 -{
   58.22 -	dummy();
   58.23 -}
   58.24 -
   58.25 -///////////////////////////////
   58.26 -// from arch/ia64/pci-pc.c
   58.27 -///////////////////////////////
   58.28 -
   58.29 -#include <xen/pci.h>
   58.30 -
   58.31 -int pcibios_enable_device(struct pci_dev *dev, int mask)
   58.32 -{
   58.33 -	dummy();
   58.34 -	return 0;
   58.35 -}
   58.36 -
   58.37 -int (*pci_config_read)(int seg, int bus, int dev, int fn, int reg, int len, u32 *value) = NULL;
   58.38 -int (*pci_config_write)(int seg, int bus, int dev, int fn, int reg, int len, u32 value) = NULL;
   58.39 -
   58.40 -//struct pci_fixup pcibios_fixups[] = { { 0 } };
   58.41 -struct pci_fixup pcibios_fixups[] = { { 0 } };
   58.42 -
   58.43 -void
   58.44 -pcibios_align_resource(void *data, struct resource *res,
   58.45 -		       unsigned long size, unsigned long align)
   58.46 -{
   58.47 -	dummy();
   58.48 -}
   58.49 -
   58.50 -void
   58.51 -pcibios_update_resource(struct pci_dev *dev, struct resource *root,
   58.52 -			struct resource *res, int resource)
   58.53 -{
   58.54 -	dummy();
   58.55 -}
   58.56 -
   58.57 -void __devinit  pcibios_fixup_bus(struct pci_bus *b)
   58.58 -{
   58.59 -	dummy();
   58.60 -}
   58.61 -
   58.62 -void __init pcibios_init(void)
   58.63 -{
   58.64 -	dummy();
   58.65 -}
   58.66 -
   58.67 -char * __devinit  pcibios_setup(char *str)
   58.68 -{
   58.69 -	dummy();
   58.70 -	return 0;
   58.71 -}
   58.72 -
   58.73 -///////////////////////////////
   58.74  // from arch/ia64/traps.c
   58.75  ///////////////////////////////
   58.76  
   58.77 @@ -217,33 +150,6 @@ void dump_pageframe_info(struct domain *
   58.78  }
   58.79  
   58.80  ///////////////////////////////
   58.81 -// from common/physdev.c
   58.82 -///////////////////////////////
   58.83 -void
   58.84 -physdev_init_dom0(struct domain *d)
   58.85 -{
   58.86 -}
   58.87 -
   58.88 -int
   58.89 -physdev_pci_access_modify(domid_t id, int bus, int dev, int func, int enable)
   58.90 -{
   58.91 -	return -EINVAL;
   58.92 -}
   58.93 -
   58.94 -void physdev_modify_ioport_access_range(struct domain *d, int enable,
   58.95 -	int port, int num)
   58.96 -{
   58.97 -	printk("physdev_modify_ioport_access_range not implemented\n");
   58.98 -	dummy();
   58.99 -}
  58.100 -
  58.101 -void physdev_destroy_state(struct domain *d)
  58.102 -{
  58.103 -	printk("physdev_destroy_state not implemented\n");
  58.104 -	dummy();
  58.105 -}
  58.106 -
  58.107 -///////////////////////////////
  58.108  // called from arch/ia64/head.S
  58.109  ///////////////////////////////
  58.110  
  58.111 @@ -334,7 +240,7 @@ int id = ((struct exec_domain *)current)
  58.112  if (!cnt[id]--) { printk("%x",id); cnt[id] = 50; }
  58.113  if (!i--) { printk("+",id); cnt[id] = 100; }
  58.114  }
  58.115 -	clear_bit(EDF_RUNNING, &prev->ed_flags);
  58.116 +	clear_bit(EDF_RUNNING, &prev->flags);
  58.117  	//if (!is_idle_task(next->domain) )
  58.118  		//send_guest_virq(next, VIRQ_TIMER);
  58.119  	load_region_regs(current);
  58.120 @@ -364,7 +270,7 @@ loop:
  58.121  	printf(buf);
  58.122  	if (regs) show_registers(regs);
  58.123  	domain_pause_by_systemcontroller(current->domain);
  58.124 -	set_bit(DF_CRASHED, ed->domain->d_flags);
  58.125 +	set_bit(DF_CRASHED, ed->domain->flags);
  58.126  	if (ed->domain->id == 0) {
  58.127  		int i = 1000000000L;
  58.128  		// if domain0 crashes, just periodically print out panic
    59.1 --- a/xen/arch/ia64/xensetup.c	Tue May 03 14:42:00 2005 +0000
    59.2 +++ b/xen/arch/ia64/xensetup.c	Tue May 10 08:49:59 2005 +0000
    59.3 @@ -69,9 +69,6 @@ unsigned char opt_pdb[10] = "none";
    59.4  unsigned int opt_tbuf_size = 10;
    59.5  /* opt_sched: scheduler - default to Borrowed Virtual Time */
    59.6  char opt_sched[10] = "bvt";
    59.7 -/* opt_physdev_dom0_hide: list of PCI slots to hide from domain 0. */
    59.8 -/* Format is '(%02x:%02x.%1x)(%02x:%02x.%1x)' and so on. */
    59.9 -char opt_physdev_dom0_hide[200] = "";
   59.10  /* opt_leveltrigger, opt_edgetrigger: Force an IO-APIC-routed IRQ to be */
   59.11  /*                                    level- or edge-triggered.         */
   59.12  /* Example: 'leveltrigger=4,5,6,20 edgetrigger=21'. */
   59.13 @@ -270,7 +267,7 @@ printk("About to call init_idle_task()\n
   59.14      if ( dom0 == NULL )
   59.15          panic("Error creating domain 0\n");
   59.16  
   59.17 -    set_bit(DF_PRIVILEGED, &dom0->d_flags);
   59.18 +    set_bit(DF_PRIVILEGED, &dom0->flags);
   59.19  
   59.20      /*
   59.21       * We're going to setup domain0 using the module(s) that we stashed safely
    60.1 --- a/xen/arch/x86/Makefile	Tue May 03 14:42:00 2005 +0000
    60.2 +++ b/xen/arch/x86/Makefile	Tue May 10 08:49:59 2005 +0000
    60.3 @@ -3,6 +3,7 @@ include $(BASEDIR)/Rules.mk
    60.4  
    60.5  OBJS += $(patsubst %.S,%.o,$(wildcard $(TARGET_SUBARCH)/*.S))
    60.6  OBJS += $(patsubst %.c,%.o,$(wildcard $(TARGET_SUBARCH)/*.c))
    60.7 +OBJS += $(patsubst %.c,%.o,$(wildcard acpi/*.c))
    60.8  OBJS += $(patsubst %.c,%.o,$(wildcard mtrr/*.c))
    60.9  
   60.10  OBJS := $(subst $(TARGET_SUBARCH)/asm-offsets.o,,$(OBJS))
   60.11 @@ -11,8 +12,6 @@ ifneq ($(crash_debug),y)
   60.12  OBJS := $(patsubst cdb%.o,,$(OBJS))
   60.13  endif
   60.14  
   60.15 -OBJS := $(filter-out pci%,$(OBJS))
   60.16 -
   60.17  default: $(TARGET)
   60.18  
   60.19  $(TARGET): $(TARGET)-syms boot/mkelf32
    61.1 --- a/xen/arch/x86/Rules.mk	Tue May 03 14:42:00 2005 +0000
    61.2 +++ b/xen/arch/x86/Rules.mk	Tue May 10 08:49:59 2005 +0000
    61.3 @@ -2,8 +2,8 @@
    61.4  # x86-specific definitions
    61.5  
    61.6  CFLAGS  += -nostdinc -fno-builtin -fno-common -fno-strict-aliasing
    61.7 -CFLAGS  += -iwithprefix include -Wall -Werror -pipe
    61.8 -CFLAGS  += -I$(BASEDIR)/include -Wno-pointer-arith -Wredundant-decls
    61.9 +CFLAGS  += -iwithprefix include -Wall -Werror -Wno-pointer-arith -pipe
   61.10 +CFLAGS  += -I$(BASEDIR)/include -I$(BASEDIR)/include/asm-x86/mach-default
   61.11  
   61.12  ifeq ($(optimize),y)
   61.13  CFLAGS  += -O3 -fomit-frame-pointer
    62.1 --- a/xen/arch/x86/acpi.c	Tue May 03 14:42:00 2005 +0000
    62.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    62.3 @@ -1,721 +0,0 @@
    62.4 -/*
    62.5 - *  acpi.c - Architecture-Specific Low-Level ACPI Support
    62.6 - *
    62.7 - *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
    62.8 - *  Copyright (C) 2001 Jun Nakajima <jun.nakajima@intel.com>
    62.9 - *  Copyright (C) 2001 Patrick Mochel <mochel@osdl.org>
   62.10 - *
   62.11 - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   62.12 - *
   62.13 - *  This program is free software; you can redistribute it and/or modify
   62.14 - *  it under the terms of the GNU General Public License as published by
   62.15 - *  the Free Software Foundation; either version 2 of the License, or
   62.16 - *  (at your option) any later version.
   62.17 - *
   62.18 - *  This program is distributed in the hope that it will be useful,
   62.19 - *  but WITHOUT ANY WARRANTY; without even the implied warranty of
   62.20 - *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   62.21 - *  GNU General Public License for more details.
   62.22 - *
   62.23 - *  You should have received a copy of the GNU General Public License
   62.24 - *  along with this program; if not, write to the Free Software
   62.25 - *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   62.26 - *
   62.27 - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   62.28 - */
   62.29 -
   62.30 -#include <xen/config.h>
   62.31 -#include <xen/kernel.h>
   62.32 -#include <xen/init.h>
   62.33 -#include <xen/types.h>
   62.34 -#include <xen/slab.h>
   62.35 -#include <xen/pci.h>
   62.36 -#include <xen/irq.h>
   62.37 -#include <xen/acpi.h>
   62.38 -#include <asm/mpspec.h>
   62.39 -#include <asm/io.h>
   62.40 -#include <asm/apic.h>
   62.41 -#include <asm/apicdef.h>
   62.42 -#include <asm/page.h>
   62.43 -#include <asm/io_apic.h>
   62.44 -#include <asm/acpi.h>
   62.45 -#include <asm/smpboot.h>
   62.46 -
   62.47 -
   62.48 -#define PREFIX			"ACPI: "
   62.49 -
   62.50 -int acpi_lapic;
   62.51 -int acpi_ioapic;
   62.52 -int acpi_strict;
   62.53 -
   62.54 -acpi_interrupt_flags acpi_sci_flags __initdata;
   62.55 -int acpi_sci_override_gsi __initdata;
   62.56 -/* --------------------------------------------------------------------------
   62.57 -                              Boot-time Configuration
   62.58 -   -------------------------------------------------------------------------- */
   62.59 -
   62.60 -int acpi_noirq __initdata = 0;  /* skip ACPI IRQ initialization */
   62.61 -int acpi_ht __initdata = 1;     /* enable HT */
   62.62 -
   62.63 -enum acpi_irq_model_id		acpi_irq_model;
   62.64 -
   62.65 -
   62.66 -/*
   62.67 - * Temporarily use the virtual area starting from FIX_IO_APIC_BASE_END,
   62.68 - * to map the target physical address. The problem is that set_fixmap()
   62.69 - * provides a single page, and it is possible that the page is not
   62.70 - * sufficient.
   62.71 - * By using this area, we can map up to MAX_IO_APICS pages temporarily,
   62.72 - * i.e. until the next __va_range() call.
   62.73 - *
   62.74 - * Important Safety Note:  The fixed I/O APIC page numbers are *subtracted*
   62.75 - * from the fixed base.  That's why we start at FIX_IO_APIC_BASE_END and
   62.76 - * count idx down while incrementing the phys address.
   62.77 - */
   62.78 -char *__acpi_map_table(unsigned long phys, unsigned long size)
   62.79 -{
   62.80 -	unsigned long base, offset, mapped_size;
   62.81 -	int idx;
   62.82 -
   62.83 -	if (phys + size < 8*1024*1024) 
   62.84 -		return __va(phys); 
   62.85 -
   62.86 -	offset = phys & (PAGE_SIZE - 1);
   62.87 -	mapped_size = PAGE_SIZE - offset;
   62.88 -	set_fixmap(FIX_ACPI_END, phys);
   62.89 -	base = fix_to_virt(FIX_ACPI_END);
   62.90 -
   62.91 -	/*
   62.92 -	 * Most cases can be covered by the below.
   62.93 -	 */
   62.94 -	idx = FIX_ACPI_END;
   62.95 -	while (mapped_size < size) {
   62.96 -		if (--idx < FIX_ACPI_BEGIN)
   62.97 -			return 0;	/* cannot handle this */
   62.98 -		phys += PAGE_SIZE;
   62.99 -		set_fixmap(idx, phys);
  62.100 -		mapped_size += PAGE_SIZE;
  62.101 -	}
  62.102 -
  62.103 -	return ((char *) base + offset);
  62.104 -}
  62.105 -
  62.106 -
  62.107 -#ifdef CONFIG_X86_LOCAL_APIC
  62.108 -
  62.109 -static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
  62.110 -
  62.111 -
  62.112 -static int __init
  62.113 -acpi_parse_madt (
  62.114 -	unsigned long		phys_addr,
  62.115 -	unsigned long		size)
  62.116 -{
  62.117 -	struct acpi_table_madt	*madt = NULL;
  62.118 -
  62.119 -	if (!phys_addr || !size)
  62.120 -		return -EINVAL;
  62.121 -
  62.122 -	madt = (struct acpi_table_madt *) __acpi_map_table(phys_addr, size);
  62.123 -	if (!madt) {
  62.124 -		printk(KERN_WARNING PREFIX "Unable to map MADT\n");
  62.125 -		return -ENODEV;
  62.126 -	}
  62.127 -
  62.128 -	if (madt->lapic_address)
  62.129 -		acpi_lapic_addr = (u64) madt->lapic_address;
  62.130 -
  62.131 -	printk(KERN_INFO PREFIX "Local APIC address 0x%08x\n",
  62.132 -		madt->lapic_address);
  62.133 -
  62.134 -	detect_clustered_apic(madt->header.oem_id, madt->header.oem_table_id);
  62.135 -
  62.136 -	return 0;
  62.137 -}
  62.138 -
  62.139 -
  62.140 -static int __init
  62.141 -acpi_parse_lapic (
  62.142 -	acpi_table_entry_header *header)
  62.143 -{
  62.144 -	struct acpi_table_lapic	*processor = NULL;
  62.145 -
  62.146 -	processor = (struct acpi_table_lapic*) header;
  62.147 -	if (!processor)
  62.148 -		return -EINVAL;
  62.149 -
  62.150 -	acpi_table_print_madt_entry(header);
  62.151 -
  62.152 -	mp_register_lapic (
  62.153 -		processor->id,					   /* APIC ID */
  62.154 -		processor->flags.enabled);			  /* Enabled? */
  62.155 -
  62.156 -	return 0;
  62.157 -}
  62.158 -
  62.159 -
  62.160 -static int __init
  62.161 -acpi_parse_lapic_addr_ovr (
  62.162 -	acpi_table_entry_header *header)
  62.163 -{
  62.164 -	struct acpi_table_lapic_addr_ovr *lapic_addr_ovr = NULL;
  62.165 -
  62.166 -	lapic_addr_ovr = (struct acpi_table_lapic_addr_ovr*) header;
  62.167 -	if (!lapic_addr_ovr)
  62.168 -		return -EINVAL;
  62.169 -
  62.170 -	acpi_lapic_addr = lapic_addr_ovr->address;
  62.171 -
  62.172 -	return 0;
  62.173 -}
  62.174 -
  62.175 -static int __init
  62.176 -acpi_parse_lapic_nmi (
  62.177 -	acpi_table_entry_header *header)
  62.178 -{
  62.179 -	struct acpi_table_lapic_nmi *lapic_nmi = NULL;
  62.180 -
  62.181 -	lapic_nmi = (struct acpi_table_lapic_nmi*) header;
  62.182 -	if (!lapic_nmi)
  62.183 -		return -EINVAL;
  62.184 -
  62.185 -	acpi_table_print_madt_entry(header);
  62.186 -
  62.187 -	if (lapic_nmi->lint != 1)
  62.188 -		printk(KERN_WARNING PREFIX "NMI not connected to LINT 1!\n");
  62.189 -
  62.190 -	return 0;
  62.191 -}
  62.192 -
  62.193 -#endif /*CONFIG_X86_LOCAL_APIC*/
  62.194 -
  62.195 -#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_ACPI_INTERPRETER)
  62.196 -
  62.197 -static int __init
  62.198 -acpi_parse_ioapic (
  62.199 -	acpi_table_entry_header *header)
  62.200 -{
  62.201 -	struct acpi_table_ioapic *ioapic = NULL;
  62.202 -
  62.203 -	ioapic = (struct acpi_table_ioapic*) header;
  62.204 -	if (!ioapic)
  62.205 -		return -EINVAL;
  62.206 - 
  62.207 -	acpi_table_print_madt_entry(header);
  62.208 -
  62.209 -	mp_register_ioapic (
  62.210 -		ioapic->id,
  62.211 -		ioapic->address,
  62.212 -		ioapic->global_irq_base);
  62.213 - 
  62.214 -	return 0;
  62.215 -}
  62.216 -
  62.217 -/*
  62.218 - * Parse Interrupt Source Override for the ACPI SCI
  62.219 - */
  62.220 -static void
  62.221 -acpi_sci_ioapic_setup(u32 gsi, u16 polarity, u16 trigger)
  62.222 -{
  62.223 -	if (trigger == 0)	/* compatible SCI trigger is level */
  62.224 -		trigger = 3;
  62.225 -
  62.226 -	if (polarity == 0)	/* compatible SCI polarity is low */
  62.227 -		polarity = 3;
  62.228 -
  62.229 -	/* Command-line over-ride via acpi_sci= */
  62.230 -	if (acpi_sci_flags.trigger)
  62.231 -		trigger = acpi_sci_flags.trigger;
  62.232 -
  62.233 -	if (acpi_sci_flags.polarity)
  62.234 -		polarity = acpi_sci_flags.polarity;
  62.235 -
  62.236 -	/*
  62.237 - 	 * mp_config_acpi_legacy_irqs() already setup IRQs < 16
  62.238 -	 * If GSI is < 16, this will update its flags,
  62.239 -	 * else it will create a new mp_irqs[] entry.
  62.240 -	 */
  62.241 -	mp_override_legacy_irq(gsi, polarity, trigger, gsi);
  62.242 -
  62.243 -	/*
  62.244 -	 * stash over-ride to indicate we've been here
  62.245 -	 * and for later update of acpi_fadt
  62.246 -	 */
  62.247 -	acpi_sci_override_gsi = gsi;
  62.248 -	return;
  62.249 -}
  62.250 -
  62.251 -static int __init
  62.252 -acpi_parse_fadt(unsigned long phys, unsigned long size)
  62.253 -{
  62.254 -        struct fadt_descriptor_rev2 *fadt =0;
  62.255 -
  62.256 -        fadt = (struct fadt_descriptor_rev2*) __acpi_map_table(phys,size);
  62.257 -        if (!fadt) {
  62.258 -                printk(KERN_WARNING PREFIX "Unable to map FADT\n");
  62.259 -                return 0;
  62.260 -        }
  62.261 -
  62.262 -#ifdef  CONFIG_ACPI_INTERPRETER
  62.263 -        /* initialize sci_int early for INT_SRC_OVR MADT parsing */
  62.264 -        acpi_fadt.sci_int = fadt->sci_int;
  62.265 -#endif
  62.266 -
  62.267 -        return 0;
  62.268 -}
  62.269 -
  62.270 -
  62.271 -static int __init
  62.272 -acpi_parse_int_src_ovr (
  62.273 -	acpi_table_entry_header *header)
  62.274 -{
  62.275 -	struct acpi_table_int_src_ovr *intsrc = NULL;
  62.276 -
  62.277 -	intsrc = (struct acpi_table_int_src_ovr*) header;
  62.278 -	if (!intsrc)
  62.279 -		return -EINVAL;
  62.280 -
  62.281 -	acpi_table_print_madt_entry(header);
  62.282 -
  62.283 -	if (intsrc->bus_irq == acpi_fadt.sci_int) {
  62.284 -		acpi_sci_ioapic_setup(intsrc->global_irq,
  62.285 -			intsrc->flags.polarity, intsrc->flags.trigger);
  62.286 -		return 0;
  62.287 -	}
  62.288 -
  62.289 -	mp_override_legacy_irq (
  62.290 -		intsrc->bus_irq,
  62.291 -		intsrc->flags.polarity,
  62.292 -		intsrc->flags.trigger,
  62.293 -		intsrc->global_irq);
  62.294 -
  62.295 -	return 0;
  62.296 -}
  62.297 -
  62.298 -
  62.299 -static int __init
  62.300 -acpi_parse_nmi_src (
  62.301 -	acpi_table_entry_header *header)
  62.302 -{
  62.303 -	struct acpi_table_nmi_src *nmi_src = NULL;
  62.304 -
  62.305 -	nmi_src = (struct acpi_table_nmi_src*) header;
  62.306 -	if (!nmi_src)
  62.307 -		return -EINVAL;
  62.308 -
  62.309 -	acpi_table_print_madt_entry(header);
  62.310 -
  62.311 -	/* TBD: Support nimsrc entries? */
  62.312 -
  62.313 -	return 0;
  62.314 -}
  62.315 -
  62.316 -#endif /*CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER*/
  62.317 -
  62.318 -
  62.319 -static unsigned long __init
  62.320 -acpi_scan_rsdp (
  62.321 -	unsigned long		start,
  62.322 -	unsigned long		length)
  62.323 -{
  62.324 -	unsigned long		offset = 0;
  62.325 -	unsigned long		sig_len = sizeof("RSD PTR ") - 1;
  62.326 -
  62.327 -	/*
  62.328 -	 * Scan all 16-byte boundaries of the physical memory region for the
  62.329 -	 * RSDP signature.
  62.330 -	 */
  62.331 -	for (offset = 0; offset < length; offset += 16) {
  62.332 -		if (strncmp((char *) (start + offset), "RSD PTR ", sig_len))
  62.333 -			continue;
  62.334 -		return (start + offset);
  62.335 -	}
  62.336 -
  62.337 -	return 0;
  62.338 -}
  62.339 -
  62.340 -
  62.341 -unsigned long __init
  62.342 -acpi_find_rsdp (void)
  62.343 -{
  62.344 -	unsigned long		rsdp_phys = 0;
  62.345 -
  62.346 -	/*
  62.347 -	 * Scan memory looking for the RSDP signature. First search EBDA (low
  62.348 -	 * memory) paragraphs and then search upper memory (E0000-FFFFF).
  62.349 -	 */
  62.350 -	rsdp_phys = acpi_scan_rsdp (0, 0x400);
  62.351 -	if (!rsdp_phys)
  62.352 -		rsdp_phys = acpi_scan_rsdp (0xE0000, 0x20000);
  62.353 -
  62.354 -	return rsdp_phys;
  62.355 -}
  62.356 -
  62.357 -
  62.358 -/*
  62.359 - * acpi_boot_init()
  62.360 - *  called from setup_arch(), always.
  62.361 - *	1. maps ACPI tables for later use
  62.362 - *	2. enumerates lapics
  62.363 - *	3. enumerates io-apics
  62.364 - *
  62.365 - * side effects:
  62.366 - * 	acpi_lapic = 1 if LAPIC found
  62.367 - *	acpi_ioapic = 1 if IOAPIC found
  62.368 - *	if (acpi_lapic && acpi_ioapic) smp_found_config = 1;
  62.369 - *	if acpi_blacklisted() disable_acpi()
  62.370 - *	acpi_irq_model=...
  62.371 - *	...
  62.372 - *
  62.373 - * return value: (currently ignored)
  62.374 - *	0: success
  62.375 - *	!0: failure
  62.376 - */
  62.377 -int __init
  62.378 -acpi_boot_init (void)
  62.379 -{
  62.380 -	int			result = 0;
  62.381 -
  62.382 -	if (acpi_disabled && !acpi_ht)
  62.383 -		return(1);
  62.384 -
  62.385 -	/*
  62.386 -	 * The default interrupt routing model is PIC (8259).  This gets
  62.387 -	 * overriden if IOAPICs are enumerated (below).
  62.388 -	 */
  62.389 -	acpi_irq_model = ACPI_IRQ_MODEL_PIC;
  62.390 -
  62.391 -	/* 
  62.392 -	 * Initialize the ACPI boot-time table parser.
  62.393 -	 */
  62.394 -	result = acpi_table_init();
  62.395 -	if (result) {
  62.396 -		disable_acpi();
  62.397 -		return result;
  62.398 -	}
  62.399 -
  62.400 -	result = acpi_blacklisted();
  62.401 -	if (result) {
  62.402 -		printk(KERN_NOTICE PREFIX "BIOS listed in blacklist, disabling ACPI support\n");
  62.403 -		disable_acpi();
  62.404 -		return result;
  62.405 -	}
  62.406 -
  62.407 -#ifdef CONFIG_X86_LOCAL_APIC
  62.408 -
  62.409 -	/* 
  62.410 -	 * MADT
  62.411 -	 * ----
  62.412 -	 * Parse the Multiple APIC Description Table (MADT), if exists.
  62.413 -	 * Note that this table provides platform SMP configuration 
  62.414 -	 * information -- the successor to MPS tables.
  62.415 -	 */
  62.416 -
  62.417 -	result = acpi_table_parse(ACPI_APIC, acpi_parse_madt);
  62.418 -	if (!result) {
  62.419 -		return 0;
  62.420 -	}
  62.421 -	else if (result < 0) {
  62.422 -		printk(KERN_ERR PREFIX "Error parsing MADT\n");
  62.423 -		return result;
  62.424 -	}
  62.425 -	else if (result > 1) 
  62.426 -		printk(KERN_WARNING PREFIX "Multiple MADT tables exist\n");
  62.427 -
  62.428 -	/* 
  62.429 -	 * Local APIC
  62.430 -	 * ----------
  62.431 -	 * Note that the LAPIC address is obtained from the MADT (32-bit value)
  62.432 -	 * and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value).
  62.433 -	 */
  62.434 -
  62.435 -	result = acpi_table_parse_madt(ACPI_MADT_LAPIC_ADDR_OVR, acpi_parse_lapic_addr_ovr);
  62.436 -	if (result < 0) {
  62.437 -		printk(KERN_ERR PREFIX "Error parsing LAPIC address override entry\n");
  62.438 -		return result;
  62.439 -	}
  62.440 -
  62.441 -	mp_register_lapic_address(acpi_lapic_addr);
  62.442 -
  62.443 -	result = acpi_table_parse_madt(ACPI_MADT_LAPIC, acpi_parse_lapic);
  62.444 -	if (!result) { 
  62.445 -		printk(KERN_ERR PREFIX "No LAPIC entries present\n");
  62.446 -		/* TBD: Cleanup to allow fallback to MPS */
  62.447 -		return -ENODEV;
  62.448 -	}
  62.449 -	else if (result < 0) {
  62.450 -		printk(KERN_ERR PREFIX "Error parsing LAPIC entry\n");
  62.451 -		/* TBD: Cleanup to allow fallback to MPS */
  62.452 -		return result;
  62.453 -	}
  62.454 -
  62.455 -	result = acpi_table_parse_madt(ACPI_MADT_LAPIC_NMI, acpi_parse_lapic_nmi);
  62.456 -	if (result < 0) {
  62.457 -		printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n");
  62.458 -		/* TBD: Cleanup to allow fallback to MPS */
  62.459 -		return result;
  62.460 -	}
  62.461 -
  62.462 -	acpi_lapic = 1;
  62.463 -
  62.464 -#endif /*CONFIG_X86_LOCAL_APIC*/
  62.465 -
  62.466 -#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_ACPI_INTERPRETER)
  62.467 -
  62.468 -	/* 
  62.469 -	 * I/O APIC 
  62.470 -	 * --------
  62.471 -	 */
  62.472 -
  62.473 -	/*
  62.474 -	 * ACPI interpreter is required to complete interrupt setup,
  62.475 -	 * so if it is off, don't enumerate the io-apics with ACPI.
  62.476 -	 * If MPS is present, it will handle them,
  62.477 -	 * otherwise the system will stay in PIC mode
  62.478 -	 */
  62.479 -	if (acpi_disabled || acpi_noirq) {
  62.480 -		return 1;
  62.481 -	}
  62.482 -
  62.483 -	/*
  62.484 -	 * if "noapic" boot option, don't look for IO-APICs
  62.485 -	 */
  62.486 -	if (ioapic_setup_disabled()) {
  62.487 -		printk(KERN_INFO PREFIX "Skipping IOAPIC probe "
  62.488 -			"due to 'noapic' option.\n");
  62.489 -		return 1;
  62.490 -        }
  62.491 -
  62.492 -
  62.493 -	result = acpi_table_parse_madt(ACPI_MADT_IOAPIC, acpi_parse_ioapic);
  62.494 -	if (!result) { 
  62.495 -		printk(KERN_ERR PREFIX "No IOAPIC entries present\n");
  62.496 -		return -ENODEV;
  62.497 -	}
  62.498 -	else if (result < 0) {
  62.499 -		printk(KERN_ERR PREFIX "Error parsing IOAPIC entry\n");
  62.500 -		return result;
  62.501 -	}
  62.502 -
  62.503 -	/* Build a default routing table for legacy (ISA) interrupts. */
  62.504 -	mp_config_acpi_legacy_irqs();
  62.505 -
  62.506 -	/* Record sci_int for use when looking for MADT sci_int override */
  62.507 -	acpi_table_parse(ACPI_FADT, acpi_parse_fadt);
  62.508 -
  62.509 -	result = acpi_table_parse_madt(ACPI_MADT_INT_SRC_OVR, acpi_parse_int_src_ovr);
  62.510 -	if (result < 0) {
  62.511 -		printk(KERN_ERR PREFIX "Error parsing interrupt source overrides entry\n");
  62.512 -		/* TBD: Cleanup to allow fallback to MPS */
  62.513 -		return result;
  62.514 -	}
  62.515 -
  62.516 -	/*
  62.517 -	 * If BIOS did not supply an INT_SRC_OVR for the SCI
  62.518 -	 * pretend we got one so we can set the SCI flags.
  62.519 -	 */
  62.520 -	if (!acpi_sci_override_gsi)
  62.521 -		acpi_sci_ioapic_setup(acpi_fadt.sci_int, 0, 0);
  62.522 -
  62.523 -	result = acpi_table_parse_madt(ACPI_MADT_NMI_SRC, acpi_parse_nmi_src);
  62.524 -	if (result < 0) {
  62.525 -		printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n");
  62.526 -		/* TBD: Cleanup to allow fallback to MPS */
  62.527 -		return result;
  62.528 -	}
  62.529 -
  62.530 -	acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC;
  62.531 -
  62.532 -	acpi_irq_balance_set(NULL);
  62.533 -
  62.534 -	acpi_ioapic = 1;
  62.535 -
  62.536 -	if (acpi_lapic && acpi_ioapic)
  62.537 -		smp_found_config = 1;
  62.538 -
  62.539 -#endif /*CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER*/
  62.540 -
  62.541 -	return 0;
  62.542 -}
  62.543 -
  62.544 -
  62.545 -#ifdef	CONFIG_ACPI_BUS
  62.546 -/*
  62.547 - * acpi_pic_sci_set_trigger()
  62.548 - *
  62.549 - * use ELCR to set PIC-mode trigger type for SCI
  62.550 - *
  62.551 - * If a PIC-mode SCI is not recognized or gives spurious IRQ7's
  62.552 - * it may require Edge Trigger -- use "acpi_sci=edge"
  62.553 - *
  62.554 - * Port 0x4d0-4d1 are ECLR1 and ECLR2, the Edge/Level Control Registers
  62.555 - * for the 8259 PIC.  bit[n] = 1 means irq[n] is Level, otherwise Edge.
  62.556 - * ECLR1 is IRQ's 0-7 (IRQ 0, 1, 2 must be 0)
  62.557 - * ECLR2 is IRQ's 8-15 (IRQ 8, 13 must be 0)
  62.558 - */
  62.559 -
  62.560 -void __init
  62.561 -acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger)
  62.562 -{
  62.563 -	unsigned char mask = 1 << (irq & 7);
  62.564 -	unsigned int port = 0x4d0 + (irq >> 3);
  62.565 -	unsigned char val = inb(port);
  62.566 -
  62.567 -
  62.568 -	printk(PREFIX "IRQ%d SCI:", irq);
  62.569 -	if (!(val & mask)) {
  62.570 -		printk(" Edge");
  62.571 -
  62.572 -		if (trigger == 3) {
  62.573 -			printk(" set to Level");
  62.574 -			outb(val | mask, port);
  62.575 -		}
  62.576 -	} else {
  62.577 -		printk(" Level");
  62.578 -
  62.579 -		if (trigger == 1) {
  62.580 -			printk(" set to Edge");
  62.581 -			outb(val & ~mask, port);
  62.582 -		}
  62.583 -	}
  62.584 -	printk(" Trigger.\n");
  62.585 -}
  62.586 -
  62.587 -#endif /* CONFIG_ACPI_BUS */
  62.588 -
  62.589 -
  62.590 -/* --------------------------------------------------------------------------
  62.591 -                              Low-Level Sleep Support
  62.592 -   -------------------------------------------------------------------------- */
  62.593 -
  62.594 -#ifdef CONFIG_ACPI_SLEEP
  62.595 -
  62.596 -#define DEBUG
  62.597 -
  62.598 -#ifdef DEBUG
  62.599 -#include <xen/serial.h>
  62.600 -#endif
  62.601 -
  62.602 -/* address in low memory of the wakeup routine. */
  62.603 -unsigned long acpi_wakeup_address = 0;
  62.604 -
  62.605 -/* new page directory that we will be using */
  62.606 -static pmd_t *pmd;
  62.607 -
  62.608 -/* saved page directory */
  62.609 -static pmd_t saved_pmd;
  62.610 -
  62.611 -/* page which we'll use for the new page directory */
  62.612 -static pte_t *ptep;
  62.613 -
  62.614 -extern unsigned long FASTCALL(acpi_copy_wakeup_routine(unsigned long));
  62.615 -
  62.616 -/*
  62.617 - * acpi_create_identity_pmd
  62.618 - *
  62.619 - * Create a new, identity mapped pmd.
  62.620 - *
  62.621 - * Do this by creating new page directory, and marking all the pages as R/W
  62.622 - * Then set it as the new Page Middle Directory.
  62.623 - * And, of course, flush the TLB so it takes effect.
  62.624 - *
  62.625 - * We save the address of the old one, for later restoration.
  62.626 - */
  62.627 -static void acpi_create_identity_pmd (void)
  62.628 -{
  62.629 -	pgd_t *pgd;
  62.630 -	int i;
  62.631 -
  62.632 -	ptep = (pte_t*)__get_free_page(GFP_KERNEL);
  62.633 -
  62.634 -	/* fill page with low mapping */
  62.635 -	for (i = 0; i < PTRS_PER_PTE; i++)
  62.636 -		set_pte(ptep + i, mk_pte_phys(i << PAGE_SHIFT, PAGE_SHARED));
  62.637 -
  62.638 -	pgd = pgd_offset(current->active_mm, 0);
  62.639 -	pmd = pmd_alloc(current->mm,pgd, 0);
  62.640 -
  62.641 -	/* save the old pmd */
  62.642 -	saved_pmd = *pmd;
  62.643 -
  62.644 -	/* set the new one */
  62.645 -	set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(ptep)));
  62.646 -
  62.647 -	/* flush the TLB */
  62.648 -	local_flush_tlb();
  62.649 -}
  62.650 -
  62.651 -/*
  62.652 - * acpi_restore_pmd
  62.653 - *
  62.654 - * Restore the old pmd saved by acpi_create_identity_pmd and
  62.655 - * free the page that said function alloc'd
  62.656 - */
  62.657 -static void acpi_restore_pmd (void)
  62.658 -{
  62.659 -	set_pmd(pmd, saved_pmd);
  62.660 -	local_flush_tlb();
  62.661 -	free_page((unsigned long)ptep);
  62.662 -}
  62.663 -
  62.664 -/**
  62.665 - * acpi_save_state_mem - save kernel state
  62.666 - *
  62.667 - * Create an identity mapped page table and copy the wakeup routine to
  62.668 - * low memory.
  62.669 - */
  62.670 -int acpi_save_state_mem (void)
  62.671 -{
  62.672 -	acpi_create_identity_pmd();
  62.673 -	acpi_copy_wakeup_routine(acpi_wakeup_address);
  62.674 -
  62.675 -	return 0;
  62.676 -}
  62.677 -
  62.678 -/**
  62.679 - * acpi_save_state_disk - save kernel state to disk
  62.680 - *
  62.681 - */
  62.682 -int acpi_save_state_disk (void)
  62.683 -{
  62.684 -	return 1;
  62.685 -}
  62.686 -
  62.687 -/*
  62.688 - * acpi_restore_state
  62.689 - */
  62.690 -void acpi_restore_state_mem (void)
  62.691 -{
  62.692 -	acpi_restore_pmd();
  62.693 -}
  62.694 -
  62.695 -/**
  62.696 - * acpi_reserve_bootmem - do _very_ early ACPI initialisation
  62.697 - *
  62.698 - * We allocate a page in low memory for the wakeup
  62.699 - * routine for when we come back from a sleep state. The
  62.700 - * runtime allocator allows specification of <16M pages, but not
  62.701 - * <1M pages.
  62.702 - */
  62.703 -void __init acpi_reserve_bootmem(void)
  62.704 -{
  62.705 -	acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE);
  62.706 -	if (!acpi_wakeup_address)
  62.707 -		printk(KERN_ERR "ACPI: Cannot allocate lowmem, S3 disabled.\n");
  62.708 -}
  62.709 -
  62.710 -void do_suspend_lowlevel_s4bios(int resume)
  62.711 -{
  62.712 -	if (!resume) {
  62.713 -		save_processor_context();
  62.714 -		acpi_save_register_state((unsigned long)&&acpi_sleep_done);
  62.715 -		acpi_enter_sleep_state_s4bios();
  62.716 -		return;
  62.717 -	}
  62.718 -acpi_sleep_done:
  62.719 -	restore_processor_context();
  62.720 -}
  62.721 -
  62.722 -
  62.723 -#endif /*CONFIG_ACPI_SLEEP*/
  62.724 -
    63.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    63.2 +++ b/xen/arch/x86/acpi/boot.c	Tue May 10 08:49:59 2005 +0000
    63.3 @@ -0,0 +1,912 @@
    63.4 +/*
    63.5 + *  boot.c - Architecture-Specific Low-Level ACPI Boot Support
    63.6 + *
    63.7 + *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
    63.8 + *  Copyright (C) 2001 Jun Nakajima <jun.nakajima@intel.com>
    63.9 + *
   63.10 + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   63.11 + *
   63.12 + *  This program is free software; you can redistribute it and/or modify
   63.13 + *  it under the terms of the GNU General Public License as published by
   63.14 + *  the Free Software Foundation; either version 2 of the License, or
   63.15 + *  (at your option) any later version.
   63.16 + *
   63.17 + *  This program is distributed in the hope that it will be useful,
   63.18 + *  but WITHOUT ANY WARRANTY; without even the implied warranty of
   63.19 + *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   63.20 + *  GNU General Public License for more details.
   63.21 + *
   63.22 + *  You should have received a copy of the GNU General Public License
   63.23 + *  along with this program; if not, write to the Free Software
   63.24 + *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   63.25 + *
   63.26 + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   63.27 + */
   63.28 +
   63.29 +#include <xen/config.h>
   63.30 +#include <xen/errno.h>
   63.31 +#include <xen/init.h>
   63.32 +#include <xen/acpi.h>
   63.33 +#include <xen/irq.h>
   63.34 +#include <xen/sched.h>
   63.35 +#include <asm/page.h>
   63.36 +#include <asm/apic.h>
   63.37 +#include <asm/io_apic.h>
   63.38 +#include <asm/apic.h>
   63.39 +#include <asm/io.h>
   63.40 +#include <asm/irq.h>
   63.41 +#include <asm/mpspec.h>
   63.42 +
   63.43 +int sbf_port; /* XXX XEN */
   63.44 +
   63.45 +#ifdef	CONFIG_X86_64
   63.46 +
   63.47 +static inline void  acpi_madt_oem_check(char *oem_id, char *oem_table_id) { }
   63.48 +extern void __init clustered_apic_check(void);
   63.49 +static inline int ioapic_setup_disabled(void) { return 0; }
   63.50 +#include <asm/proto.h>
   63.51 +
   63.52 +#else	/* X86 */
   63.53 +
   63.54 +#ifdef	CONFIG_X86_LOCAL_APIC
   63.55 +#include <mach_apic.h>
   63.56 +#include <mach_mpparse.h>
   63.57 +#endif	/* CONFIG_X86_LOCAL_APIC */
   63.58 +
   63.59 +#endif	/* X86 */
   63.60 +
   63.61 +#define BAD_MADT_ENTRY(entry, end) (					    \
   63.62 +		(!entry) || (unsigned long)entry + sizeof(*entry) > end ||  \
   63.63 +		((acpi_table_entry_header *)entry)->length != sizeof(*entry))
   63.64 +
   63.65 +#define PREFIX			"ACPI: "
   63.66 +
   63.67 +#ifdef CONFIG_ACPI_PCI
   63.68 +int acpi_noirq __initdata;	/* skip ACPI IRQ initialization */
   63.69 +int acpi_pci_disabled __initdata; /* skip ACPI PCI scan and IRQ initialization */
   63.70 +#else
   63.71 +int acpi_noirq __initdata = 1;
   63.72 +int acpi_pci_disabled __initdata = 1;
   63.73 +#endif
   63.74 +int acpi_ht __initdata = 1;	/* enable HT */
   63.75 +
   63.76 +int acpi_lapic;
   63.77 +int acpi_ioapic;
   63.78 +int acpi_strict;
   63.79 +EXPORT_SYMBOL(acpi_strict);
   63.80 +
   63.81 +acpi_interrupt_flags acpi_sci_flags __initdata;
   63.82 +int acpi_sci_override_gsi __initdata;
   63.83 +int acpi_skip_timer_override __initdata;
   63.84 +
   63.85 +#ifdef CONFIG_X86_LOCAL_APIC
   63.86 +static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
   63.87 +#endif
   63.88 +
   63.89 +#ifndef __HAVE_ARCH_CMPXCHG
   63.90 +#warning ACPI uses CMPXCHG, i486 and later hardware
   63.91 +#endif
   63.92 +
   63.93 +#define MAX_MADT_ENTRIES	256
   63.94 +u8 x86_acpiid_to_apicid[MAX_MADT_ENTRIES] =
   63.95 +			{ [0 ... MAX_MADT_ENTRIES-1] = 0xff };
   63.96 +EXPORT_SYMBOL(x86_acpiid_to_apicid);
   63.97 +
   63.98 +/* --------------------------------------------------------------------------
   63.99 +                              Boot-time Configuration
  63.100 +   -------------------------------------------------------------------------- */
  63.101 +
  63.102 +/*
  63.103 + * The default interrupt routing model is PIC (8259).  This gets
  63.104 + * overriden if IOAPICs are enumerated (below).
  63.105 + */
  63.106 +enum acpi_irq_model_id		acpi_irq_model = ACPI_IRQ_MODEL_PIC;
  63.107 +
  63.108 +#ifdef	CONFIG_X86_64
  63.109 +
  63.110 +/* rely on all ACPI tables being in the direct mapping */
  63.111 +char *__acpi_map_table(unsigned long phys_addr, unsigned long size)
  63.112 +{
  63.113 +	if (!phys_addr || !size)
  63.114 +	return NULL;
  63.115 +
  63.116 +	if (phys_addr < (end_pfn_map << PAGE_SHIFT))
  63.117 +		return __va(phys_addr);
  63.118 +
  63.119 +	return NULL;
  63.120 +}
  63.121 +
  63.122 +#else
  63.123 +
  63.124 +/*
  63.125 + * Temporarily use the virtual area starting from FIX_IO_APIC_BASE_END,
  63.126 + * to map the target physical address. The problem is that set_fixmap()
  63.127 + * provides a single page, and it is possible that the page is not
  63.128 + * sufficient.
  63.129 + * By using this area, we can map up to MAX_IO_APICS pages temporarily,
  63.130 + * i.e. until the next __va_range() call.
  63.131 + *
  63.132 + * Important Safety Note:  The fixed I/O APIC page numbers are *subtracted*
  63.133 + * from the fixed base.  That's why we start at FIX_IO_APIC_BASE_END and
  63.134 + * count idx down while incrementing the phys address.
  63.135 + */
  63.136 +char *__acpi_map_table(unsigned long phys, unsigned long size)
  63.137 +{
  63.138 +	unsigned long base, offset, mapped_size;
  63.139 +	int idx;
  63.140 +
  63.141 +	if (phys + size < 8*1024*1024) 
  63.142 +		return __va(phys); 
  63.143 +
  63.144 +	offset = phys & (PAGE_SIZE - 1);
  63.145 +	mapped_size = PAGE_SIZE - offset;
  63.146 +	set_fixmap(FIX_ACPI_END, phys);
  63.147 +	base = fix_to_virt(FIX_ACPI_END);
  63.148 +
  63.149 +	/*
  63.150 +	 * Most cases can be covered by the below.
  63.151 +	 */
  63.152 +	idx = FIX_ACPI_END;
  63.153 +	while (mapped_size < size) {
  63.154 +		if (--idx < FIX_ACPI_BEGIN)
  63.155 +			return NULL;	/* cannot handle this */
  63.156 +		phys += PAGE_SIZE;
  63.157 +		set_fixmap(idx, phys);
  63.158 +		mapped_size += PAGE_SIZE;
  63.159 +	}
  63.160 +
  63.161 +	return ((unsigned char *) base + offset);
  63.162 +}
  63.163 +#endif
  63.164 +
  63.165 +#ifdef CONFIG_PCI_MMCONFIG
  63.166 +static int __init acpi_parse_mcfg(unsigned long phys_addr, unsigned long size)
  63.167 +{
  63.168 +	struct acpi_table_mcfg *mcfg;
  63.169 +
  63.170 +	if (!phys_addr || !size)
  63.171 +		return -EINVAL;
  63.172 +
  63.173 +	mcfg = (struct acpi_table_mcfg *) __acpi_map_table(phys_addr, size);
  63.174 +	if (!mcfg) {
  63.175 +		printk(KERN_WARNING PREFIX "Unable to map MCFG\n");
  63.176 +		return -ENODEV;
  63.177 +	}
  63.178 +
  63.179 +	if (mcfg->base_reserved) {
  63.180 +		printk(KERN_ERR PREFIX "MMCONFIG not in low 4GB of memory\n");
  63.181 +		return -ENODEV;
  63.182 +	}
  63.183 +
  63.184 +	pci_mmcfg_base_addr = mcfg->base_address;
  63.185 +
  63.186 +	return 0;
  63.187 +}
  63.188 +#else
  63.189 +#define	acpi_parse_mcfg NULL
  63.190 +#endif /* !CONFIG_PCI_MMCONFIG */
  63.191 +
  63.192 +#ifdef CONFIG_X86_LOCAL_APIC
  63.193 +static int __init
  63.194 +acpi_parse_madt (
  63.195 +	unsigned long		phys_addr,
  63.196 +	unsigned long		size)
  63.197 +{
  63.198 +	struct acpi_table_madt	*madt = NULL;
  63.199 +
  63.200 +	if (!phys_addr || !size)
  63.201 +		return -EINVAL;
  63.202 +
  63.203 +	madt = (struct acpi_table_madt *) __acpi_map_table(phys_addr, size);
  63.204 +	if (!madt) {
  63.205 +		printk(KERN_WARNING PREFIX "Unable to map MADT\n");
  63.206 +		return -ENODEV;
  63.207 +	}
  63.208 +
  63.209 +	if (madt->lapic_address) {
  63.210 +		acpi_lapic_addr = (u64) madt->lapic_address;
  63.211 +
  63.212 +		printk(KERN_DEBUG PREFIX "Local APIC address 0x%08x\n",
  63.213 +			madt->lapic_address);
  63.214 +	}
  63.215 +
  63.216 +	acpi_madt_oem_check(madt->header.oem_id, madt->header.oem_table_id);
  63.217 +	
  63.218 +	return 0;
  63.219 +}
  63.220 +
  63.221 +
  63.222 +static int __init
  63.223 +acpi_parse_lapic (
  63.224 +	acpi_table_entry_header *header, const unsigned long end)
  63.225 +{
  63.226 +	struct acpi_table_lapic	*processor = NULL;
  63.227 +
  63.228 +	processor = (struct acpi_table_lapic*) header;
  63.229 +
  63.230 +	if (BAD_MADT_ENTRY(processor, end))
  63.231 +		return -EINVAL;
  63.232 +
  63.233 +	acpi_table_print_madt_entry(header);
  63.234 +
  63.235 +	/* no utility in registering a disabled processor */
  63.236 +	if (processor->flags.enabled == 0)
  63.237 +		return 0;
  63.238 +
  63.239 +	x86_acpiid_to_apicid[processor->acpi_id] = processor->id;
  63.240 +
  63.241 +	mp_register_lapic (
  63.242 +		processor->id,					   /* APIC ID */
  63.243 +		processor->flags.enabled);			  /* Enabled? */
  63.244 +
  63.245 +	return 0;
  63.246 +}
  63.247 +
  63.248 +static int __init
  63.249 +acpi_parse_lapic_addr_ovr (
  63.250 +	acpi_table_entry_header *header, const unsigned long end)
  63.251 +{
  63.252 +	struct acpi_table_lapic_addr_ovr *lapic_addr_ovr = NULL;
  63.253 +
  63.254 +	lapic_addr_ovr = (struct acpi_table_lapic_addr_ovr*) header;
  63.255 +
  63.256 +	if (BAD_MADT_ENTRY(lapic_addr_ovr, end))
  63.257 +		return -EINVAL;
  63.258 +
  63.259 +	acpi_lapic_addr = lapic_addr_ovr->address;
  63.260 +
  63.261 +	return 0;
  63.262 +}
  63.263 +
  63.264 +static int __init
  63.265 +acpi_parse_lapic_nmi (
  63.266 +	acpi_table_entry_header *header, const unsigned long end)
  63.267 +{
  63.268 +	struct acpi_table_lapic_nmi *lapic_nmi = NULL;
  63.269 +
  63.270 +	lapic_nmi = (struct acpi_table_lapic_nmi*) header;
  63.271 +
  63.272 +	if (BAD_MADT_ENTRY(lapic_nmi, end))
  63.273 +		return -EINVAL;
  63.274 +
  63.275 +	acpi_table_print_madt_entry(header);
  63.276 +
  63.277 +	if (lapic_nmi->lint != 1)
  63.278 +		printk(KERN_WARNING PREFIX "NMI not connected to LINT 1!\n");
  63.279 +
  63.280 +	return 0;
  63.281 +}
  63.282 +
  63.283 +
  63.284 +#endif /*CONFIG_X86_LOCAL_APIC*/
  63.285 +
  63.286 +#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_ACPI_INTERPRETER)
  63.287 +
  63.288 +static int __init
  63.289 +acpi_parse_ioapic (
  63.290 +	acpi_table_entry_header *header, const unsigned long end)
  63.291 +{
  63.292 +	struct acpi_table_ioapic *ioapic = NULL;
  63.293 +
  63.294 +	ioapic = (struct acpi_table_ioapic*) header;
  63.295 +
  63.296 +	if (BAD_MADT_ENTRY(ioapic, end))
  63.297 +		return -EINVAL;
  63.298 + 
  63.299 +	acpi_table_print_madt_entry(header);
  63.300 +
  63.301 +	mp_register_ioapic (
  63.302 +		ioapic->id,
  63.303 +		ioapic->address,
  63.304 +		ioapic->global_irq_base);
  63.305 + 
  63.306 +	return 0;
  63.307 +}
  63.308 +
  63.309 +/*
  63.310 + * Parse Interrupt Source Override for the ACPI SCI
  63.311 + */
  63.312 +static void
  63.313 +acpi_sci_ioapic_setup(u32 gsi, u16 polarity, u16 trigger)
  63.314 +{
  63.315 +	if (trigger == 0)	/* compatible SCI trigger is level */
  63.316 +		trigger = 3;
  63.317 +
  63.318 +	if (polarity == 0)	/* compatible SCI polarity is low */
  63.319 +		polarity = 3;
  63.320 +
  63.321 +	/* Command-line over-ride via acpi_sci= */
  63.322 +	if (acpi_sci_flags.trigger)
  63.323 +		trigger = acpi_sci_flags.trigger;
  63.324 +
  63.325 +	if (acpi_sci_flags.polarity)
  63.326 +		polarity = acpi_sci_flags.polarity;
  63.327 +
  63.328 +	/*
  63.329 + 	 * mp_config_acpi_legacy_irqs() already setup IRQs < 16
  63.330 +	 * If GSI is < 16, this will update its flags,
  63.331 +	 * else it will create a new mp_irqs[] entry.
  63.332 +	 */
  63.333 +	mp_override_legacy_irq(gsi, polarity, trigger, gsi);
  63.334 +
  63.335 +	/*
  63.336 +	 * stash over-ride to indicate we've been here
  63.337 +	 * and for later update of acpi_fadt
  63.338 +	 */
  63.339 +	acpi_sci_override_gsi = gsi;
  63.340 +	return;
  63.341 +}
  63.342 +
  63.343 +static int __init
  63.344 +acpi_parse_int_src_ovr (
  63.345 +	acpi_table_entry_header *header, const unsigned long end)
  63.346 +{
  63.347 +	struct acpi_table_int_src_ovr *intsrc = NULL;
  63.348 +
  63.349 +	intsrc = (struct acpi_table_int_src_ovr*) header;
  63.350 +
  63.351 +	if (BAD_MADT_ENTRY(intsrc, end))
  63.352 +		return -EINVAL;
  63.353 +
  63.354 +	acpi_table_print_madt_entry(header);
  63.355 +
  63.356 +	if (intsrc->bus_irq == acpi_fadt.sci_int) {
  63.357 +		acpi_sci_ioapic_setup(intsrc->global_irq,
  63.358 +			intsrc->flags.polarity, intsrc->flags.trigger);
  63.359 +		return 0;
  63.360 +	}
  63.361 +
  63.362 +	if (acpi_skip_timer_override &&
  63.363 +		intsrc->bus_irq == 0 && intsrc->global_irq == 2) {
  63.364 +			printk(PREFIX "BIOS IRQ0 pin2 override ignored.\n");
  63.365 +			return 0;
  63.366 +	}
  63.367 +
  63.368 +	mp_override_legacy_irq (
  63.369 +		intsrc->bus_irq,
  63.370 +		intsrc->flags.polarity,
  63.371 +		intsrc->flags.trigger,
  63.372 +		intsrc->global_irq);
  63.373 +
  63.374 +	return 0;
  63.375 +}
  63.376 +
  63.377 +
  63.378 +static int __init
  63.379 +acpi_parse_nmi_src (
  63.380 +	acpi_table_entry_header *header, const unsigned long end)
  63.381 +{
  63.382 +	struct acpi_table_nmi_src *nmi_src = NULL;
  63.383 +
  63.384 +	nmi_src = (struct acpi_table_nmi_src*) header;
  63.385 +
  63.386 +	if (BAD_MADT_ENTRY(nmi_src, end))
  63.387 +		return -EINVAL;
  63.388 +
  63.389 +	acpi_table_print_madt_entry(header);
  63.390 +
  63.391 +	/* TBD: Support nimsrc entries? */
  63.392 +
  63.393 +	return 0;
  63.394 +}
  63.395 +
  63.396 +#endif /* CONFIG_X86_IO_APIC */
  63.397 +
  63.398 +#ifdef	CONFIG_ACPI_BUS
  63.399 +
  63.400 +/*
  63.401 + * acpi_pic_sci_set_trigger()
  63.402 + * 
  63.403 + * use ELCR to set PIC-mode trigger type for SCI
  63.404 + *
  63.405 + * If a PIC-mode SCI is not recognized or gives spurious IRQ7's
  63.406 + * it may require Edge Trigger -- use "acpi_sci=edge"
  63.407 + *
  63.408 + * Port 0x4d0-4d1 are ECLR1 and ECLR2, the Edge/Level Control Registers
  63.409 + * for the 8259 PIC.  bit[n] = 1 means irq[n] is Level, otherwise Edge.
  63.410 + * ECLR1 is IRQ's 0-7 (IRQ 0, 1, 2 must be 0)
  63.411 + * ECLR2 is IRQ's 8-15 (IRQ 8, 13 must be 0)
  63.412 + */
  63.413 +
  63.414 +void __init
  63.415 +acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger)
  63.416 +{
  63.417 +	unsigned int mask = 1 << irq;
  63.418 +	unsigned int old, new;
  63.419 +
  63.420 +	/* Real old ELCR mask */
  63.421 +	old = inb(0x4d0) | (inb(0x4d1) << 8);
  63.422 +
  63.423 +	/*
  63.424 +	 * If we use ACPI to set PCI irq's, then we should clear ELCR
  63.425 +	 * since we will set it correctly as we enable the PCI irq
  63.426 +	 * routing.
  63.427 +	 */
  63.428 +	new = acpi_noirq ? old : 0;
  63.429 +
  63.430 +	/*
  63.431 +	 * Update SCI information in the ELCR, it isn't in the PCI
  63.432 +	 * routing tables..
  63.433 +	 */
  63.434 +	switch (trigger) {
  63.435 +	case 1:	/* Edge - clear */
  63.436 +		new &= ~mask;
  63.437 +		break;
  63.438 +	case 3: /* Level - set */
  63.439 +		new |= mask;
  63.440 +		break;
  63.441 +	}
  63.442 +
  63.443 +	if (old == new)
  63.444 +		return;
  63.445 +
  63.446 +	printk(PREFIX "setting ELCR to %04x (from %04x)\n", new, old);
  63.447 +	outb(new, 0x4d0);
  63.448 +	outb(new >> 8, 0x4d1);
  63.449 +}
  63.450 +
  63.451 +
  63.452 +#endif /* CONFIG_ACPI_BUS */
  63.453 +
  63.454 +int acpi_gsi_to_irq(u32 gsi, unsigned int *irq)
  63.455 +{
  63.456 +#ifdef CONFIG_X86_IO_APIC
  63.457 +	if (use_pci_vector() && !platform_legacy_irq(gsi))
  63.458 + 		*irq = IO_APIC_VECTOR(gsi);
  63.459 +	else
  63.460 +#endif
  63.461 +		*irq = gsi;
  63.462 +	return 0;
  63.463 +}
  63.464 +
  63.465 +unsigned int acpi_register_gsi(u32 gsi, int edge_level, int active_high_low)
  63.466 +{
  63.467 +	unsigned int irq;
  63.468 +	unsigned int plat_gsi = gsi;
  63.469 +
  63.470 +#ifdef CONFIG_PCI
  63.471 +	/*
  63.472 +	 * Make sure all (legacy) PCI IRQs are set as level-triggered.
  63.473 +	 */
  63.474 +	if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) {
  63.475 +		extern void eisa_set_level_irq(unsigned int irq);
  63.476 +
  63.477 +		if (edge_level == ACPI_LEVEL_SENSITIVE)
  63.478 +				eisa_set_level_irq(gsi);
  63.479 +	}
  63.480 +#endif
  63.481 +
  63.482 +#ifdef CONFIG_X86_IO_APIC
  63.483 +	if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) {
  63.484 +		plat_gsi = mp_register_gsi(gsi, edge_level, active_high_low);
  63.485 +	}
  63.486 +#endif
  63.487 +	acpi_gsi_to_irq(plat_gsi, &irq);
  63.488 +	return irq;
  63.489 +}
  63.490 +EXPORT_SYMBOL(acpi_register_gsi);
  63.491 +
  63.492 +/*
  63.493 + *  ACPI based hotplug support for CPU
  63.494 + */
  63.495 +#ifdef CONFIG_ACPI_HOTPLUG_CPU
  63.496 +int
  63.497 +acpi_map_lsapic(acpi_handle handle, int *pcpu)
  63.498 +{
  63.499 +	/* TBD */
  63.500 +	return -EINVAL;
  63.501 +}
  63.502 +EXPORT_SYMBOL(acpi_map_lsapic);
  63.503 +
  63.504 +
  63.505 +int
  63.506 +acpi_unmap_lsapic(int cpu)
  63.507 +{
  63.508 +	/* TBD */
  63.509 +	return -EINVAL;
  63.510 +}
  63.511 +EXPORT_SYMBOL(acpi_unmap_lsapic);
  63.512 +#endif /* CONFIG_ACPI_HOTPLUG_CPU */
  63.513 +
  63.514 +static unsigned long __init
  63.515 +acpi_scan_rsdp (
  63.516 +	unsigned long		start,
  63.517 +	unsigned long		length)
  63.518 +{
  63.519 +	unsigned long		offset = 0;
  63.520 +	unsigned long		sig_len = sizeof("RSD PTR ") - 1;
  63.521 +
  63.522 +	/*
  63.523 +	 * Scan all 16-byte boundaries of the physical memory region for the
  63.524 +	 * RSDP signature.
  63.525 +	 */
  63.526 +	for (offset = 0; offset < length; offset += 16) {
  63.527 +		if (strncmp((char *) (start + offset), "RSD PTR ", sig_len))
  63.528 +			continue;
  63.529 +		return (start + offset);
  63.530 +	}
  63.531 +
  63.532 +	return 0;
  63.533 +}
  63.534 +
  63.535 +static int __init acpi_parse_sbf(unsigned long phys_addr, unsigned long size)
  63.536 +{
  63.537 +	struct acpi_table_sbf *sb;
  63.538 +
  63.539 +	if (!phys_addr || !size)
  63.540 +	return -EINVAL;
  63.541 +
  63.542 +	sb = (struct acpi_table_sbf *) __acpi_map_table(phys_addr, size);
  63.543 +	if (!sb) {
  63.544 +		printk(KERN_WARNING PREFIX "Unable to map SBF\n");
  63.545 +		return -ENODEV;
  63.546 +	}
  63.547 +
  63.548 +	sbf_port = sb->sbf_cmos; /* Save CMOS port */
  63.549 +
  63.550 +	return 0;
  63.551 +}
  63.552 +
  63.553 +
  63.554 +#ifdef CONFIG_HPET_TIMER
  63.555 +
  63.556 +static int __init acpi_parse_hpet(unsigned long phys, unsigned long size)
  63.557 +{
  63.558 +	struct acpi_table_hpet *hpet_tbl;
  63.559 +
  63.560 +	if (!phys || !size)
  63.561 +		return -EINVAL;
  63.562 +
  63.563 +	hpet_tbl = (struct acpi_table_hpet *) __acpi_map_table(phys, size);
  63.564 +	if (!hpet_tbl) {
  63.565 +		printk(KERN_WARNING PREFIX "Unable to map HPET\n");
  63.566 +		return -ENODEV;
  63.567 +	}
  63.568 +
  63.569 +	if (hpet_tbl->addr.space_id != ACPI_SPACE_MEM) {
  63.570 +		printk(KERN_WARNING PREFIX "HPET timers must be located in "
  63.571 +		       "memory.\n");
  63.572 +		return -1;
  63.573 +	}
  63.574 +
  63.575 +#ifdef	CONFIG_X86_64
  63.576 +        vxtime.hpet_address = hpet_tbl->addr.addrl |
  63.577 +                ((long) hpet_tbl->addr.addrh << 32);
  63.578 +
  63.579 +        printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n",
  63.580 +               hpet_tbl->id, vxtime.hpet_address);
  63.581 +#else	/* X86 */
  63.582 +	{
  63.583 +		extern unsigned long hpet_address;
  63.584 +
  63.585 +		hpet_address = hpet_tbl->addr.addrl;
  63.586 +		printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n",
  63.587 +			hpet_tbl->id, hpet_address);
  63.588 +	}
  63.589 +#endif	/* X86 */
  63.590 +
  63.591 +	return 0;
  63.592 +}
  63.593 +#else
  63.594 +#define	acpi_parse_hpet	NULL
  63.595 +#endif
  63.596 +
  63.597 +#ifdef CONFIG_X86_PM_TIMER
  63.598 +extern u32 pmtmr_ioport;
  63.599 +#endif
  63.600 +
  63.601 +static int __init acpi_parse_fadt(unsigned long phys, unsigned long size)
  63.602 +{
  63.603 +	struct fadt_descriptor_rev2 *fadt = NULL;
  63.604 +
  63.605 +	fadt = (struct fadt_descriptor_rev2*) __acpi_map_table(phys,size);
  63.606 +	if(!fadt) {
  63.607 +		printk(KERN_WARNING PREFIX "Unable to map FADT\n");
  63.608 +		return 0;
  63.609 +	}
  63.610 +
  63.611 +#ifdef	CONFIG_ACPI_INTERPRETER
  63.612 +	/* initialize sci_int early for INT_SRC_OVR MADT parsing */
  63.613 +	acpi_fadt.sci_int = fadt->sci_int;
  63.614 +#endif
  63.615 +
  63.616 +#ifdef CONFIG_X86_PM_TIMER
  63.617 +	/* detect the location of the ACPI PM Timer */
  63.618 +	if (fadt->revision >= FADT2_REVISION_ID) {
  63.619 +		/* FADT rev. 2 */
  63.620 +		if (fadt->xpm_tmr_blk.address_space_id != ACPI_ADR_SPACE_SYSTEM_IO)
  63.621 +			return 0;
  63.622 +
  63.623 +		pmtmr_ioport = fadt->xpm_tmr_blk.address;
  63.624 +	} else {
  63.625 +		/* FADT rev. 1 */
  63.626 +		pmtmr_ioport = fadt->V1_pm_tmr_blk;
  63.627 +	}
  63.628 +	if (pmtmr_ioport)
  63.629 +		printk(KERN_INFO PREFIX "PM-Timer IO Port: %#x\n", pmtmr_ioport);
  63.630 +#endif
  63.631 +	return 0;
  63.632 +}
  63.633 +
  63.634 +
  63.635 +unsigned long __init
  63.636 +acpi_find_rsdp (void)
  63.637 +{
  63.638 +	unsigned long		rsdp_phys = 0;
  63.639 +
  63.640 +#if 0
  63.641 +	if (efi_enabled) {
  63.642 +		if (efi.acpi20)
  63.643 +			return __pa(efi.acpi20);
  63.644 +		else if (efi.acpi)
  63.645 +			return __pa(efi.acpi);
  63.646 +	}
  63.647 +#endif
  63.648 +	/*
  63.649 +	 * Scan memory looking for the RSDP signature. First search EBDA (low
  63.650 +	 * memory) paragraphs and then search upper memory (E0000-FFFFF).
  63.651 +	 */
  63.652 +	rsdp_phys = acpi_scan_rsdp (0, 0x400);
  63.653 +	if (!rsdp_phys)
  63.654 +		rsdp_phys = acpi_scan_rsdp (0xE0000, 0xFFFFF);
  63.655 +
  63.656 +	return rsdp_phys;
  63.657 +}
  63.658 +
  63.659 +#ifdef	CONFIG_X86_LOCAL_APIC
  63.660 +/*
  63.661 + * Parse LAPIC entries in MADT
  63.662 + * returns 0 on success, < 0 on error
  63.663 + */
  63.664 +static int __init
  63.665 +acpi_parse_madt_lapic_entries(void)
  63.666 +{
  63.667 +	int count;
  63.668 +
  63.669 +	/* 
  63.670 +	 * Note that the LAPIC address is obtained from the MADT (32-bit value)
  63.671 +	 * and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value).
  63.672 +	 */
  63.673 +
  63.674 +	count = acpi_table_parse_madt(ACPI_MADT_LAPIC_ADDR_OVR, acpi_parse_lapic_addr_ovr, 0);
  63.675 +	if (count < 0) {
  63.676 +		printk(KERN_ERR PREFIX "Error parsing LAPIC address override entry\n");
  63.677 +		return count;
  63.678 +	}
  63.679 +
  63.680 +	mp_register_lapic_address(acpi_lapic_addr);
  63.681 +
  63.682 +	count = acpi_table_parse_madt(ACPI_MADT_LAPIC, acpi_parse_lapic,
  63.683 +				       MAX_APICS);
  63.684 +	if (!count) { 
  63.685 +		printk(KERN_ERR PREFIX "No LAPIC entries present\n");
  63.686 +		/* TBD: Cleanup to allow fallback to MPS */
  63.687 +		return -ENODEV;
  63.688 +	}
  63.689 +	else if (count < 0) {
  63.690 +		printk(KERN_ERR PREFIX "Error parsing LAPIC entry\n");
  63.691 +		/* TBD: Cleanup to allow fallback to MPS */
  63.692 +		return count;
  63.693 +	}
  63.694 +
  63.695 +	count = acpi_table_parse_madt(ACPI_MADT_LAPIC_NMI, acpi_parse_lapic_nmi, 0);
  63.696 +	if (count < 0) {
  63.697 +		printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n");
  63.698 +		/* TBD: Cleanup to allow fallback to MPS */
  63.699 +		return count;
  63.700 +	}
  63.701 +	return 0;
  63.702 +}
  63.703 +#endif /* CONFIG_X86_LOCAL_APIC */
  63.704 +
  63.705 +#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_ACPI_INTERPRETER)
  63.706 +/*
  63.707 + * Parse IOAPIC related entries in MADT
  63.708 + * returns 0 on success, < 0 on error
  63.709 + */
  63.710 +static int __init
  63.711 +acpi_parse_madt_ioapic_entries(void)
  63.712 +{
  63.713 +	int count;
  63.714 +
  63.715 +	/*
  63.716 +	 * ACPI interpreter is required to complete interrupt setup,
  63.717 +	 * so if it is off, don't enumerate the io-apics with ACPI.
  63.718 +	 * If MPS is present, it will handle them,
  63.719 +	 * otherwise the system will stay in PIC mode
  63.720 +	 */
  63.721 +	if (acpi_disabled || acpi_noirq) {
  63.722 +		return -ENODEV;
  63.723 +        }
  63.724 +
  63.725 +	/*
  63.726 + 	 * if "noapic" boot option, don't look for IO-APICs
  63.727 +	 */
  63.728 +	if (skip_ioapic_setup) {
  63.729 +		printk(KERN_INFO PREFIX "Skipping IOAPIC probe "
  63.730 +			"due to 'noapic' option.\n");
  63.731 +		return -ENODEV;
  63.732 +	}
  63.733 +
  63.734 +	count = acpi_table_parse_madt(ACPI_MADT_IOAPIC, acpi_parse_ioapic, MAX_IO_APICS);
  63.735 +	if (!count) {
  63.736 +		printk(KERN_ERR PREFIX "No IOAPIC entries present\n");
  63.737 +		return -ENODEV;
  63.738 +	}
  63.739 +	else if (count < 0) {
  63.740 +		printk(KERN_ERR PREFIX "Error parsing IOAPIC entry\n");
  63.741 +		return count;
  63.742 +	}
  63.743 +
  63.744 +	count = acpi_table_parse_madt(ACPI_MADT_INT_SRC_OVR, acpi_parse_int_src_ovr, NR_IRQ_VECTORS);
  63.745 +	if (count < 0) {
  63.746 +		printk(KERN_ERR PREFIX "Error parsing interrupt source overrides entry\n");
  63.747 +		/* TBD: Cleanup to allow fallback to MPS */
  63.748 +		return count;
  63.749 +	}
  63.750 +
  63.751 +	/*
  63.752 +	 * If BIOS did not supply an INT_SRC_OVR for the SCI
  63.753 +	 * pretend we got one so we can set the SCI flags.
  63.754 +	 */
  63.755 +	if (!acpi_sci_override_gsi)
  63.756 +		acpi_sci_ioapic_setup(acpi_fadt.sci_int, 0, 0);
  63.757 +
  63.758 +	/* Fill in identity legacy mapings where no override */
  63.759 +	mp_config_acpi_legacy_irqs();
  63.760 +
  63.761 +	count = acpi_table_parse_madt(ACPI_MADT_NMI_SRC, acpi_parse_nmi_src, NR_IRQ_VECTORS);
  63.762 +	if (count < 0) {
  63.763 +		printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n");
  63.764 +		/* TBD: Cleanup to allow fallback to MPS */
  63.765 +		return count;
  63.766 +	}
  63.767 +
  63.768 +	return 0;
  63.769 +}
  63.770 +#else
  63.771 +static inline int acpi_parse_madt_ioapic_entries(void)
  63.772 +{
  63.773 +	return -1;
  63.774 +}
  63.775 +#endif /* !(CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER) */
  63.776 +
  63.777 +
  63.778 +static void __init
  63.779 +acpi_process_madt(void)
  63.780 +{
  63.781 +#ifdef CONFIG_X86_LOCAL_APIC
  63.782 +	int count, error;
  63.783 +
  63.784 +	count = acpi_table_parse(ACPI_APIC, acpi_parse_madt);
  63.785 +	if (count >= 1) {
  63.786 +
  63.787 +		/*
  63.788 +		 * Parse MADT LAPIC entries
  63.789 +		 */
  63.790 +		error = acpi_parse_madt_lapic_entries();
  63.791 +		if (!error) {
  63.792 +			acpi_lapic = 1;
  63.793 +
  63.794 +			/*
  63.795 +			 * Parse MADT IO-APIC entries
  63.796 +			 */
  63.797 +			error = acpi_parse_madt_ioapic_entries();
  63.798 +			if (!error) {
  63.799 +				acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC;
  63.800 +				acpi_irq_balance_set(NULL);
  63.801 +				acpi_ioapic = 1;
  63.802 +
  63.803 +				smp_found_config = 1;
  63.804 +				clustered_apic_check();
  63.805 +			}
  63.806 +		}
  63.807 +		if (error == -EINVAL) {
  63.808 +			/*
  63.809 +			 * Dell Precision Workstation 410, 610 come here.
  63.810 +			 */
  63.811 +			printk(KERN_ERR PREFIX "Invalid BIOS MADT, disabling ACPI\n");
  63.812 +			disable_acpi();
  63.813 +		}
  63.814 +	}
  63.815 +#endif
  63.816 +	return;
  63.817 +}
  63.818 +
  63.819 +/*
  63.820 + * acpi_boot_table_init() and acpi_boot_init()
  63.821 + *  called from setup_arch(), always.
  63.822 + *	1. checksums all tables
  63.823 + *	2. enumerates lapics
  63.824 + *	3. enumerates io-apics
  63.825 + *
  63.826 + * acpi_table_init() is separate to allow reading SRAT without
  63.827 + * other side effects.
  63.828 + *
  63.829 + * side effects of acpi_boot_init:
  63.830 + *	acpi_lapic = 1 if LAPIC found
  63.831 + *	acpi_ioapic = 1 if IOAPIC found
  63.832 + *	if (acpi_lapic && acpi_ioapic) smp_found_config = 1;
  63.833 + *	if acpi_blacklisted() acpi_disabled = 1;
  63.834 + *	acpi_irq_model=...
  63.835 + *	...
  63.836 + *
  63.837 + * return value: (currently ignored)
  63.838 + *	0: success
  63.839 + *	!0: failure
  63.840 + */
  63.841 +
  63.842 +int __init
  63.843 +acpi_boot_table_init(void)
  63.844 +{
  63.845 +	int error;
  63.846 +
  63.847 +	/*
  63.848 +	 * If acpi_disabled, bail out
  63.849 +	 * One exception: acpi=ht continues far enough to enumerate LAPICs
  63.850 +	 */
  63.851 +	if (acpi_disabled && !acpi_ht)
  63.852 +		 return 1;
  63.853 +
  63.854 +	/* 
  63.855 +	 * Initialize the ACPI boot-time table parser.
  63.856 +	 */
  63.857 +	error = acpi_table_init();
  63.858 +	if (error) {
  63.859 +		disable_acpi();
  63.860 +		return error;
  63.861 +	}
  63.862 +
  63.863 +#if 0 /*def __i386__*/
  63.864 +	check_acpi_pci();
  63.865 +#endif
  63.866 +
  63.867 +	acpi_table_parse(ACPI_BOOT, acpi_parse_sbf);
  63.868 +
  63.869 +	/*
  63.870 +	 * blacklist may disable ACPI entirely
  63.871 +	 */
  63.872 +	error = acpi_blacklisted();
  63.873 +	if (error) {
  63.874 +		extern int acpi_force;
  63.875 +
  63.876 +		if (acpi_force) {
  63.877 +			printk(KERN_WARNING PREFIX "acpi=force override\n");
  63.878 +		} else {
  63.879 +			printk(KERN_WARNING PREFIX "Disabling ACPI support\n");
  63.880 +			disable_acpi();
  63.881 +			return error;
  63.882 +		}
  63.883 +	}
  63.884 +
  63.885 +	return 0;
  63.886 +}
  63.887 +
  63.888 +
  63.889 +int __init acpi_boot_init(void)
  63.890 +{
  63.891 +	/*
  63.892 +	 * If acpi_disabled, bail out
  63.893 +	 * One exception: acpi=ht continues far enough to enumerate LAPICs
  63.894 +	 */
  63.895 +	if (acpi_disabled && !acpi_ht)
  63.896 +		 return 1;
  63.897 +
  63.898 +	acpi_table_parse(ACPI_BOOT, acpi_parse_sbf);
  63.899 +
  63.900 +	/*
  63.901 +	 * set sci_int and PM timer address
  63.902 +	 */
  63.903 +	acpi_table_parse(ACPI_FADT, acpi_parse_fadt);
  63.904 +
  63.905 +	/*
  63.906 +	 * Process the Multiple APIC Description Table (MADT), if present
  63.907 +	 */
  63.908 +	acpi_process_madt();
  63.909 +
  63.910 +	acpi_table_parse(ACPI_HPET, acpi_parse_hpet);
  63.911 +	acpi_table_parse(ACPI_MCFG, acpi_parse_mcfg);
  63.912 +
  63.913 +	return 0;
  63.914 +}
  63.915 +
    64.1 --- a/xen/arch/x86/apic.c	Tue May 03 14:42:00 2005 +0000
    64.2 +++ b/xen/arch/x86/apic.c	Tue May 10 08:49:59 2005 +0000
    64.3 @@ -34,14 +34,27 @@
    64.4  #include <asm/hardirq.h>
    64.5  #include <asm/apic.h>
    64.6  #include <asm/io_apic.h>
    64.7 -#include <asm/mach_apic.h>
    64.8 -#include <asm/io_ports.h>
    64.9 +#include <mach_apic.h>
   64.10 +#include <io_ports.h>
   64.11  
   64.12  /* Using APIC to generate smp_local_timer_interrupt? */
   64.13  int using_apic_timer = 0;
   64.14  
   64.15 +int apic_verbosity;
   64.16 +
   64.17  static int enabled_via_apicbase;
   64.18  
   64.19 +int get_physical_broadcast(void)
   64.20 +{
   64.21 +    unsigned int lvr, version;
   64.22 +    lvr = apic_read(APIC_LVR);
   64.23 +    version = GET_APIC_VERSION(lvr);
   64.24 +    if (!APIC_INTEGRATED(version) || version >= 0x14)
   64.25 +        return 0xff;
   64.26 +    else
   64.27 +        return 0xf;
   64.28 +}
   64.29 +
   64.30  int get_maxlvt(void)
   64.31  {
   64.32      unsigned int v, ver, maxlvt;
   64.33 @@ -907,7 +920,7 @@ int __init APIC_init_uniprocessor (void)
   64.34  #ifdef CONFIG_SMP
   64.35      cpu_online_map = 1;
   64.36  #endif
   64.37 -    phys_cpu_present_map = 1;
   64.38 +    phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid);
   64.39      apic_write_around(APIC_ID, boot_cpu_physical_apicid);
   64.40  
   64.41      setup_local_APIC();
    65.1 --- a/xen/arch/x86/audit.c	Tue May 03 14:42:00 2005 +0000
    65.2 +++ b/xen/arch/x86/audit.c	Tue May 10 08:49:59 2005 +0000
    65.3 @@ -49,7 +49,8 @@ static int l1, l2, oos_count, page_count
    65.4  int audit_adjust_pgtables(struct domain *d, int dir, int noisy)
    65.5  {
    65.6      int errors = 0;
    65.7 -    int shadow_enabled = shadow_mode_enabled(d) ? 1 : 0;
    65.8 +    int shadow_refcounts = !!shadow_mode_refcounts(d);
    65.9 +    int shadow_enabled = !!shadow_mode_enabled(d);
   65.10      int l2limit;
   65.11  
   65.12      void _adjust(struct pfn_info *page, int adjtype ADJUST_EXTRA_ARGS)
   65.13 @@ -119,7 +120,7 @@ int audit_adjust_pgtables(struct domain 
   65.14              page->count_info += dir;
   65.15      }
   65.16  
   65.17 -    void adjust_l2_page(unsigned long mfn)
   65.18 +    void adjust_l2_page(unsigned long mfn, int shadow)
   65.19      {
   65.20          unsigned long *pt = map_domain_mem(mfn << PAGE_SHIFT);
   65.21          int i;
   65.22 @@ -133,7 +134,7 @@ int audit_adjust_pgtables(struct domain 
   65.23  
   65.24                  if ( noisy )
   65.25                  {
   65.26 -                    if ( shadow_enabled )
   65.27 +                    if ( shadow )
   65.28                      {
   65.29                          if ( page_get_owner(l1page) != NULL )
   65.30                          {
   65.31 @@ -145,6 +146,17 @@ int audit_adjust_pgtables(struct domain 
   65.32                              errors++;
   65.33                              continue;
   65.34                          }
   65.35 +
   65.36 +                        u32 page_type = l1page->u.inuse.type_info & PGT_type_mask;
   65.37 +
   65.38 +                        if ( page_type != PGT_l1_shadow )
   65.39 +                        {
   65.40 +                            printk("Audit %d: [Shadow L2 mfn=%lx i=%x] "
   65.41 +                                   "Expected Shadow L1 t=%x mfn=%lx\n",
   65.42 +                                   d->id, mfn, i,
   65.43 +                                   l1page->u.inuse.type_info, l1mfn);
   65.44 +                            errors++;
   65.45 +                        }
   65.46                      }
   65.47                      else
   65.48                      {
   65.49 @@ -154,7 +166,9 @@ int audit_adjust_pgtables(struct domain 
   65.50                                     "belonging to other dom %p (id=%d)\n",
   65.51                                     l1mfn,
   65.52                                     page_get_owner(l1page),
   65.53 -                                   page_get_owner(l1page)->id);
   65.54 +                                   (page_get_owner(l1page)
   65.55 +                                    ? page_get_owner(l1page)->id
   65.56 +                                    : -1));
   65.57                              errors++;
   65.58                              continue;
   65.59                          }
   65.60 @@ -179,7 +193,7 @@ int audit_adjust_pgtables(struct domain 
   65.61                      }
   65.62                  }
   65.63  
   65.64 -                adjust(l1page, !shadow_enabled);
   65.65 +                adjust(l1page, !shadow);
   65.66              }
   65.67          }
   65.68  
   65.69 @@ -280,7 +294,7 @@ int audit_adjust_pgtables(struct domain 
   65.70                              errors++;
   65.71                          }
   65.72  
   65.73 -                        if ( shadow_enabled &&
   65.74 +                        if ( shadow_refcounts &&
   65.75                               page_is_page_table(gpage) &&
   65.76                               ! page_out_of_sync(gpage) )
   65.77                          {
   65.78 @@ -336,19 +350,21 @@ int audit_adjust_pgtables(struct domain 
   65.79                      break;
   65.80                  case PGT_l1_shadow:
   65.81                      adjust(pfn_to_page(gmfn), 0);
   65.82 -                    adjust_l1_page(smfn);
   65.83 +                    if ( shadow_refcounts )
   65.84 +                        adjust_l1_page(smfn);
   65.85                      if ( page->u.inuse.type_info & PGT_pinned )
   65.86                          adjust(page, 0);
   65.87                      break;
   65.88                  case PGT_hl2_shadow:
   65.89                      adjust(pfn_to_page(gmfn), 0);
   65.90 -                    adjust_hl2_page(smfn);
   65.91 +                    if ( shadow_refcounts )
   65.92 +                        adjust_hl2_page(smfn);
   65.93                      if ( page->u.inuse.type_info & PGT_pinned )
   65.94                          adjust(page, 0);
   65.95                      break;
   65.96                  case PGT_l2_shadow:
   65.97                      adjust(pfn_to_page(gmfn), 0);
   65.98 -                    adjust_l2_page(smfn);
   65.99 +                    adjust_l2_page(smfn, 1);
  65.100                      if ( page->u.inuse.type_info & PGT_pinned )
  65.101                          adjust(page, 0);
  65.102                      break;
  65.103 @@ -391,45 +407,43 @@ int audit_adjust_pgtables(struct domain 
  65.104          struct exec_domain *ed;
  65.105  
  65.106          for_each_exec_domain(d, ed)
  65.107 -            {
  65.108 -                if ( !shadow_enabled )
  65.109 -                {
  65.110 -                    if ( pagetable_val(ed->arch.guest_table) )
  65.111 -                        adjust(&frame_table[pagetable_val(ed->arch.guest_table)
  65.112 -                                            >> PAGE_SHIFT], 1);
  65.113 -                }
  65.114 -                else
  65.115 -                {
  65.116 -                    if ( pagetable_val(ed->arch.guest_table) )
  65.117 -                        adjust(&frame_table[pagetable_val(ed->arch.guest_table)
  65.118 -                                            >> PAGE_SHIFT], 0);
  65.119 -                    if ( pagetable_val(ed->arch.shadow_table) )
  65.120 -                        adjust(&frame_table[pagetable_val(ed->arch.shadow_table)
  65.121 -                                            >> PAGE_SHIFT], 0);
  65.122 -                    if ( ed->arch.monitor_shadow_ref )
  65.123 -                        adjust(&frame_table[ed->arch.monitor_shadow_ref], 0);
  65.124 -                }
  65.125 -            }
  65.126 +        {
  65.127 +            if ( pagetable_val(ed->arch.guest_table) )
  65.128 +                adjust(&frame_table[pagetable_get_pfn(ed->arch.guest_table)], 1);
  65.129 +            if ( pagetable_val(ed->arch.shadow_table) )
  65.130 +                adjust(&frame_table[pagetable_get_pfn(ed->arch.shadow_table)], 0);
  65.131 +            if ( ed->arch.monitor_shadow_ref )
  65.132 +                adjust(&frame_table[ed->arch.monitor_shadow_ref], 0);
  65.133 +        }
  65.134      }
  65.135  
  65.136      void adjust_guest_pages()
  65.137      {
  65.138          struct list_head *list_ent = d->page_list.next;
  65.139          struct pfn_info *page;
  65.140 -        unsigned long mfn;
  65.141 +        unsigned long mfn, snapshot_mfn;
  65.142  
  65.143          while ( list_ent != &d->page_list )
  65.144          {
  65.145              u32 page_type;
  65.146  
  65.147              page = list_entry(list_ent, struct pfn_info, list);
  65.148 -            mfn = page_to_pfn(page);
  65.149 +            snapshot_mfn = mfn = page_to_pfn(page);
  65.150              page_type = page->u.inuse.type_info & PGT_type_mask;
  65.151  
  65.152              BUG_ON(page_get_owner(page) != d);
  65.153  
  65.154              page_count++;
  65.155  
  65.156 +            if ( shadow_enabled && !shadow_refcounts &&
  65.157 +                 page_out_of_sync(page) )
  65.158 +            {
  65.159 +                unsigned long gpfn = __mfn_to_gpfn(d, mfn);
  65.160 +                ASSERT( VALID_M2P(gpfn) );
  65.161 +                snapshot_mfn = __shadow_status(d, gpfn, PGT_snapshot);
  65.162 +                ASSERT( snapshot_mfn );
  65.163 +            }
  65.164 +
  65.165              switch ( page_type )
  65.166              {
  65.167              case PGT_l2_page_table:
  65.168 @@ -437,7 +451,7 @@ int audit_adjust_pgtables(struct domain 
  65.169  
  65.170                  if ( noisy )
  65.171                  {
  65.172 -                    if ( shadow_enabled )
  65.173 +                    if ( shadow_refcounts )
  65.174                      {
  65.175                          printk("Audit %d: found an L2 guest page "
  65.176                                 "mfn=%lx t=%08x c=%08x while in shadow mode\n",
  65.177 @@ -446,19 +460,22 @@ int audit_adjust_pgtables(struct domain 
  65.178                          errors++;
  65.179                      }
  65.180  
  65.181 -                    if ( (page->u.inuse.type_info & PGT_validated) !=
  65.182 -                         PGT_validated )
  65.183 +                    if ( (page->u.inuse.type_info & PGT_count_mask) != 0 )
  65.184                      {
  65.185 -                        printk("Audit %d: L2 mfn=%lx not validated %08x\n",
  65.186 -                               d->id, mfn, page->u.inuse.type_info);
  65.187 -                        errors++;
  65.188 -                    }
  65.189 +                        if ( (page->u.inuse.type_info & PGT_validated) !=
  65.190 +                             PGT_validated )
  65.191 +                        {
  65.192 +                            printk("Audit %d: L2 mfn=%lx not validated %08x\n",
  65.193 +                                   d->id, mfn, page->u.inuse.type_info);
  65.194 +                            errors++;
  65.195 +                        }
  65.196  
  65.197 -                    if ( (page->u.inuse.type_info & PGT_pinned) != PGT_pinned )
  65.198 -                    {
  65.199 -                        printk("Audit %d: L2 mfn=%lx not pinned t=%08x\n",
  65.200 -                               d->id, mfn, page->u.inuse.type_info);
  65.201 -                        errors++;
  65.202 +                        if ( (page->u.inuse.type_info & PGT_pinned) != PGT_pinned )
  65.203 +                        {
  65.204 +                            printk("Audit %d: L2 mfn=%lx not pinned t=%08x\n",
  65.205 +                                   d->id, mfn, page->u.inuse.type_info);
  65.206 +                            errors++;
  65.207 +                        }
  65.208                      }
  65.209                  }
  65.210  
  65.211 @@ -466,7 +483,7 @@ int audit_adjust_pgtables(struct domain 
  65.212                      adjust(page, 1);
  65.213  
  65.214                  if ( page->u.inuse.type_info & PGT_validated )
  65.215 -                    adjust_l2_page(mfn);
  65.216 +                    adjust_l2_page(snapshot_mfn, 0);
  65.217  
  65.218                  break;
  65.219  
  65.220 @@ -475,7 +492,7 @@ int audit_adjust_pgtables(struct domain 
  65.221  
  65.222                  if ( noisy )
  65.223                  {
  65.224 -                    if ( shadow_enabled )
  65.225 +                    if ( shadow_refcounts )
  65.226                      {
  65.227                          printk("found an L1 guest page mfn=%lx t=%08x c=%08x "
  65.228                                 "while in shadow mode\n",
  65.229 @@ -483,21 +500,24 @@ int audit_adjust_pgtables(struct domain 
  65.230                          errors++;
  65.231                      }
  65.232  
  65.233 -                    if ( (page->u.inuse.type_info & PGT_validated) != PGT_validated )
  65.234 +                    if ( (page->u.inuse.type_info & PGT_count_mask) != 0 )
  65.235                      {
  65.236 -                        printk("Audit %d: L1 not validated mfn=%lx t=%08x\n",
  65.237 -                               d->id, mfn, page->u.inuse.type_info);
  65.238 -                        errors++;
  65.239 -                    }
  65.240 -
  65.241 -                    if ( (page->u.inuse.type_info & PGT_pinned) != PGT_pinned )
  65.242 -                    {
  65.243 -                        if ( !VM_ASSIST(d, VMASST_TYPE_writable_pagetables) )
  65.244 +                        if ( (page->u.inuse.type_info & PGT_validated) !=
  65.245 +                             PGT_validated )
  65.246                          {
  65.247 -                            printk("Audit %d: L1 mfn=%lx not pinned t=%08x\n",
  65.248 +                            printk("Audit %d: L1 not validated mfn=%lx t=%08x\n",
  65.249                                     d->id, mfn, page->u.inuse.type_info);
  65.250                              errors++;
  65.251                          }
  65.252 +
  65.253 +                        if ( (page->u.inuse.type_info & PGT_pinned) != PGT_pinned )
  65.254 +                        {
  65.255 +                            if ( !VM_ASSIST(d, VMASST_TYPE_writable_pagetables) )
  65.256 +                            {
  65.257 +                                printk("Audit %d: L1 mfn=%lx not pinned t=%08x\n",
  65.258 +                                       d->id, mfn, page->u.inuse.type_info);
  65.259 +                            }
  65.260 +                        }
  65.261                      }
  65.262                  }
  65.263                  
  65.264 @@ -505,7 +525,7 @@ int audit_adjust_pgtables(struct domain 
  65.265                      adjust(page, 1);
  65.266  
  65.267                  if ( page->u.inuse.type_info & PGT_validated )
  65.268 -                    adjust_l1_page(mfn);
  65.269 +                    adjust_l1_page(snapshot_mfn);
  65.270  
  65.271                  break;
  65.272  
  65.273 @@ -520,7 +540,7 @@ int audit_adjust_pgtables(struct domain 
  65.274                  break;
  65.275  
  65.276              case PGT_writable_page:
  65.277 -                if ( shadow_enabled )
  65.278 +                if ( shadow_refcounts )
  65.279                  {
  65.280                      // In shadow mode, writable pages can get pinned by
  65.281                      // paravirtualized guests that think they are pinning
  65.282 @@ -589,6 +609,8 @@ void audit_pagelist(struct domain *d)
  65.283  
  65.284  void _audit_domain(struct domain *d, int flags)
  65.285  {
  65.286 +    int shadow_refcounts = !!shadow_mode_refcounts(d);
  65.287 +
  65.288      void scan_for_pfn_in_mfn(struct domain *d, unsigned long xmfn,
  65.289                               unsigned long mfn)
  65.290      {
  65.291 @@ -608,8 +630,29 @@ void _audit_domain(struct domain *d, int
  65.292          unmap_domain_mem(pt);           
  65.293      }
  65.294  
  65.295 +    void scan_for_pfn_in_grant_table(struct domain *d, unsigned xmfn)
  65.296 +    {
  65.297 +        int i;
  65.298 +        active_grant_entry_t *act = d->grant_table->active;
  65.299 +
  65.300 +        spin_lock(&d->grant_table->lock);
  65.301 +
  65.302 +        for ( i = 0; i < NR_GRANT_ENTRIES; i++ )
  65.303 +        {
  65.304 +            if ( act[i].pin && (act[i].frame == xmfn) )
  65.305 +            {
  65.306 +                printk("     found active grant table entry i=%d dom=%d pin=%d\n",
  65.307 +                       i, act[i].domid, act[i].pin);
  65.308 +            }
  65.309 +        }
  65.310 +
  65.311 +        spin_unlock(&d->grant_table->lock);
  65.312 +    }
  65.313 +
  65.314      void scan_for_pfn(struct domain *d, unsigned long xmfn)
  65.315      {
  65.316 +        scan_for_pfn_in_grant_table(d, xmfn);
  65.317 +
  65.318          if ( !shadow_mode_enabled(d) )
  65.319          {
  65.320              struct list_head *list_ent = d->page_list.next;
  65.321 @@ -688,7 +731,7 @@ void _audit_domain(struct domain *d, int
  65.322  
  65.323      // Maybe we should just be using BIGLOCK?
  65.324      //
  65.325 -    if ( !(flags & AUDIT_ALREADY_LOCKED) )
  65.326 +    if ( !(flags & AUDIT_SHADOW_ALREADY_LOCKED) )
  65.327          shadow_lock(d);
  65.328  
  65.329      spin_lock(&d->page_alloc_lock);
  65.330 @@ -716,7 +759,7 @@ void _audit_domain(struct domain *d, int
  65.331              errors++;
  65.332          }
  65.333  
  65.334 -        if ( shadow_mode_enabled(d) &&
  65.335 +        if ( shadow_mode_refcounts(d) &&
  65.336               (page_type == PGT_writable_page) &&
  65.337               !(page->u.inuse.type_info & PGT_validated) )
  65.338          {
  65.339 @@ -764,7 +807,9 @@ void _audit_domain(struct domain *d, int
  65.340                         mfn);
  65.341                  errors++;
  65.342              }
  65.343 -            if ( page_type != PGT_writable_page )
  65.344 +            if ( shadow_refcounts
  65.345 +                 ? (page_type != PGT_writable_page)
  65.346 +                 : !(page_type && (page_type <= PGT_l4_page_table)) )
  65.347              {
  65.348                  printk("out of sync page mfn=%lx has strange type "
  65.349                         "t=%08x c=%08x\n",
  65.350 @@ -821,7 +866,7 @@ void _audit_domain(struct domain *d, int
  65.351                         d->id, page->u.inuse.type_info, 
  65.352                         page->tlbflush_timestamp,
  65.353                         page->count_info, mfn);
  65.354 -                errors++;
  65.355 +                //errors++;
  65.356              }
  65.357              break;
  65.358          default:
  65.359 @@ -835,7 +880,7 @@ void _audit_domain(struct domain *d, int
  65.360                     page->count_info,
  65.361                     page->u.inuse.type_info, 
  65.362                     page->tlbflush_timestamp, mfn );
  65.363 -            errors++;
  65.364 +            //errors++;
  65.365              scan_for_pfn_remote(mfn);
  65.366          }
  65.367  
  65.368 @@ -870,6 +915,8 @@ void _audit_domain(struct domain *d, int
  65.369                                 d->id, page_to_pfn(page),
  65.370                                 page->u.inuse.type_info,
  65.371                                 page->count_info);
  65.372 +                        printk("a->gpfn_and_flags=%p\n",
  65.373 +                               (void *)a->gpfn_and_flags);
  65.374                          errors++;
  65.375                      }
  65.376                      break;
  65.377 @@ -905,7 +952,7 @@ void _audit_domain(struct domain *d, int
  65.378                 "pages=%d oos=%d l1=%d l2=%d ctot=%d ttot=%d\n",
  65.379                 d->id, page_count, oos_count, l1, l2, ctot, ttot);
  65.380  
  65.381 -    if ( !(flags & AUDIT_ALREADY_LOCKED) )
  65.382 +    if ( !(flags & AUDIT_SHADOW_ALREADY_LOCKED) )
  65.383          shadow_unlock(d);
  65.384  
  65.385      if ( d != current->domain )
    66.1 --- a/xen/arch/x86/dom0_ops.c	Tue May 03 14:42:00 2005 +0000
    66.2 +++ b/xen/arch/x86/dom0_ops.c	Tue May 10 08:49:59 2005 +0000
    66.3 @@ -397,7 +397,7 @@ void arch_getdomaininfo_ctxt(
    66.4  #endif
    66.5  
    66.6      c->flags = 0;
    66.7 -    if ( test_bit(EDF_DONEFPUINIT, &ed->ed_flags) )
    66.8 +    if ( test_bit(EDF_DONEFPUINIT, &ed->flags) )
    66.9          c->flags |= VGCF_I387_VALID;
   66.10      if ( KERNEL_MODE(ed, &ed->arch.guest_context.user_regs) )
   66.11          c->flags |= VGCF_IN_KERNEL;
    67.1 --- a/xen/arch/x86/domain.c	Tue May 03 14:42:00 2005 +0000
    67.2 +++ b/xen/arch/x86/domain.c	Tue May 10 08:49:59 2005 +0000
    67.3 @@ -37,6 +37,7 @@
    67.4  #include <asm/vmx.h>
    67.5  #include <asm/vmx_vmcs.h>
    67.6  #include <asm/msr.h>
    67.7 +#include <asm/physdev.h>
    67.8  #include <xen/kernel.h>
    67.9  #include <public/io/ioreq.h>
   67.10  #include <xen/multicall.h>
   67.11 @@ -251,7 +252,7 @@ void arch_do_createdomain(struct exec_do
   67.12  
   67.13          d->shared_info = (void *)alloc_xenheap_page();
   67.14          memset(d->shared_info, 0, PAGE_SIZE);
   67.15 -        ed->vcpu_info = &d->shared_info->vcpu_data[ed->eid];
   67.16 +        ed->vcpu_info = &d->shared_info->vcpu_data[ed->id];
   67.17          SHARE_PFN_WITH_DOMAIN(virt_to_page(d->shared_info), d);
   67.18          machine_to_phys_mapping[virt_to_phys(d->shared_info) >> 
   67.19                                 PAGE_SHIFT] = INVALID_M2P_ENTRY;
   67.20 @@ -293,7 +294,7 @@ void arch_do_boot_vcpu(struct exec_domai
   67.21      struct domain *d = ed->domain;
   67.22      ed->arch.schedule_tail = d->exec_domain[0]->arch.schedule_tail;
   67.23      ed->arch.perdomain_ptes = 
   67.24 -        d->arch.mm_perdomain_pt + (ed->eid << PDPT_VCPU_SHIFT);
   67.25 +        d->arch.mm_perdomain_pt + (ed->id << PDPT_VCPU_SHIFT);
   67.26      ed->arch.flags = TF_kernel_mode;
   67.27  }
   67.28  
   67.29 @@ -363,7 +364,8 @@ static int vmx_final_setup_guest(
   67.30  
   67.31          /* Put the domain in shadow mode even though we're going to be using
   67.32           * the shared 1:1 page table initially. It shouldn't hurt */
   67.33 -        shadow_mode_enable(ed->domain, SHM_enable|SHM_translate|SHM_external);
   67.34 +        shadow_mode_enable(ed->domain,
   67.35 +                           SHM_enable|SHM_refcounts|SHM_translate|SHM_external);
   67.36      }
   67.37  
   67.38      return 0;
   67.39 @@ -396,9 +398,9 @@ int arch_set_info_guest(
   67.40                  return -EINVAL;
   67.41      }
   67.42  
   67.43 -    clear_bit(EDF_DONEFPUINIT, &ed->ed_flags);
   67.44 +    clear_bit(EDF_DONEFPUINIT, &ed->flags);
   67.45      if ( c->flags & VGCF_I387_VALID )
   67.46 -        set_bit(EDF_DONEFPUINIT, &ed->ed_flags);
   67.47 +        set_bit(EDF_DONEFPUINIT, &ed->flags);
   67.48  
   67.49      ed->arch.flags &= ~TF_kernel_mode;
   67.50      if ( c->flags & VGCF_IN_KERNEL )
   67.51 @@ -414,7 +416,7 @@ int arch_set_info_guest(
   67.52      if ( !IS_PRIV(d) )
   67.53          ed->arch.guest_context.user_regs.eflags &= 0xffffcfff;
   67.54  
   67.55 -    if ( test_bit(EDF_DONEINIT, &ed->ed_flags) )
   67.56 +    if ( test_bit(EDF_DONEINIT, &ed->flags) )
   67.57          return 0;
   67.58  
   67.59      if ( (rc = (int)set_fast_trap(ed, c->fast_trap_idx)) != 0 )
   67.60 @@ -425,13 +427,13 @@ int arch_set_info_guest(
   67.61      for ( i = 0; i < 8; i++ )
   67.62          (void)set_debugreg(ed, i, c->debugreg[i]);
   67.63  
   67.64 -    if ( ed->eid == 0 )
   67.65 +    if ( ed->id == 0 )
   67.66          d->vm_assist = c->vm_assist;
   67.67  
   67.68      phys_basetab = c->pt_base;
   67.69      ed->arch.guest_table = mk_pagetable(phys_basetab);
   67.70  
   67.71 -    if ( shadow_mode_enabled(d) )
   67.72 +    if ( shadow_mode_refcounts(d) )
   67.73      {
   67.74          if ( !get_page(&frame_table[phys_basetab>>PAGE_SHIFT], d) )
   67.75              return -EINVAL;
   67.76 @@ -477,7 +479,7 @@ int arch_set_info_guest(
   67.77      update_pagetables(ed);
   67.78      
   67.79      /* Don't redo final setup */
   67.80 -    set_bit(EDF_DONEINIT, &ed->ed_flags);
   67.81 +    set_bit(EDF_DONEINIT, &ed->flags);
   67.82  
   67.83      return 0;
   67.84  }
   67.85 @@ -795,7 +797,7 @@ void context_switch(struct exec_domain *
   67.86       * 'prev' (after this point, a dying domain's info structure may be freed
   67.87       * without warning). 
   67.88       */
   67.89 -    clear_bit(EDF_RUNNING, &prev->ed_flags);
   67.90 +    clear_bit(EDF_RUNNING, &prev->flags);
   67.91  
   67.92      schedule_tail(next);
   67.93      BUG();
   67.94 @@ -968,6 +970,8 @@ void domain_relinquish_resources(struct 
   67.95  
   67.96      BUG_ON(d->cpuset != 0);
   67.97  
   67.98 +    physdev_destroy_state(d);
   67.99 +
  67.100      ptwr_destroy(d);
  67.101  
  67.102      /* Release device mappings of other domains */
  67.103 @@ -978,17 +982,21 @@ void domain_relinquish_resources(struct 
  67.104      {
  67.105          if ( pagetable_val(ed->arch.guest_table) != 0 )
  67.106          {
  67.107 -            (shadow_mode_enabled(d) ? put_page : put_page_and_type)
  67.108 -                (&frame_table[pagetable_val(
  67.109 -                    ed->arch.guest_table) >> PAGE_SHIFT]);
  67.110 +            if ( shadow_mode_refcounts(d) )
  67.111 +                put_page(&frame_table[pagetable_get_pfn(ed->arch.guest_table)]);
  67.112 +            else
  67.113 +                put_page_and_type(&frame_table[pagetable_get_pfn(ed->arch.guest_table)]);
  67.114 +
  67.115              ed->arch.guest_table = mk_pagetable(0);
  67.116          }
  67.117  
  67.118          if ( pagetable_val(ed->arch.guest_table_user) != 0 )
  67.119          {
  67.120 -            (shadow_mode_enabled(d) ? put_page : put_page_and_type)
  67.121 -                (&frame_table[pagetable_val(
  67.122 -                    ed->arch.guest_table_user) >> PAGE_SHIFT]);
  67.123 +            if ( shadow_mode_refcounts(d) )
  67.124 +                put_page(&frame_table[pagetable_get_pfn(ed->arch.guest_table_user)]);
  67.125 +            else
  67.126 +                put_page_and_type(&frame_table[pagetable_get_pfn(ed->arch.guest_table_user)]);
  67.127 +
  67.128              ed->arch.guest_table_user = mk_pagetable(0);
  67.129          }
  67.130  
    68.1 --- a/xen/arch/x86/domain_build.c	Tue May 03 14:42:00 2005 +0000
    68.2 +++ b/xen/arch/x86/domain_build.c	Tue May 10 08:49:59 2005 +0000
    68.3 @@ -114,7 +114,7 @@ int construct_dom0(struct domain *d,
    68.4      /* Sanity! */
    68.5      if ( d->id != 0 ) 
    68.6          BUG();
    68.7 -    if ( test_bit(DF_CONSTRUCTED, &d->d_flags) ) 
    68.8 +    if ( test_bit(DF_CONSTRUCTED, &d->flags) ) 
    68.9          BUG();
   68.10  
   68.11      memset(&dsi, 0, sizeof(struct domain_setup_info));
   68.12 @@ -540,14 +540,14 @@ int construct_dom0(struct domain *d,
   68.13      /* DOM0 gets access to everything. */
   68.14      physdev_init_dom0(d);
   68.15  
   68.16 -    set_bit(DF_CONSTRUCTED, &d->d_flags);
   68.17 +    set_bit(DF_CONSTRUCTED, &d->flags);
   68.18  
   68.19      new_thread(ed, dsi.v_kernentry, vstack_end, vstartinfo_start);
   68.20  
   68.21      if ( opt_dom0_shadow || opt_dom0_translate )
   68.22      {
   68.23          shadow_mode_enable(d, (opt_dom0_translate
   68.24 -                               ? SHM_enable | SHM_translate
   68.25 +                               ? SHM_enable | SHM_refcounts | SHM_translate
   68.26                                 : SHM_enable));
   68.27          if ( opt_dom0_translate )
   68.28          {
   68.29 @@ -570,7 +570,7 @@ int construct_dom0(struct domain *d,
   68.30              idle_pg_table[1] = root_create_phys(pagetable_val(d->arch.phys_table),
   68.31                                                  __PAGE_HYPERVISOR);
   68.32              translate_l2pgtable(d, (l1_pgentry_t *)(1u << L2_PAGETABLE_SHIFT),
   68.33 -                                pagetable_val(ed->arch.guest_table) >> PAGE_SHIFT);
   68.34 +                                pagetable_get_pfn(ed->arch.guest_table));
   68.35              idle_pg_table[1] = root_empty();
   68.36              local_flush_tlb();
   68.37          }
    69.1 --- a/xen/arch/x86/i387.c	Tue May 03 14:42:00 2005 +0000
    69.2 +++ b/xen/arch/x86/i387.c	Tue May 10 08:49:59 2005 +0000
    69.3 @@ -18,7 +18,7 @@ void init_fpu(void)
    69.4      __asm__ __volatile__ ( "fninit" );
    69.5      if ( cpu_has_xmm )
    69.6          load_mxcsr(0x1f80);
    69.7 -    set_bit(EDF_DONEFPUINIT, &current->ed_flags);
    69.8 +    set_bit(EDF_DONEFPUINIT, &current->flags);
    69.9  }
   69.10  
   69.11  void save_init_fpu(struct exec_domain *tsk)
   69.12 @@ -28,7 +28,7 @@ void save_init_fpu(struct exec_domain *t
   69.13       * This causes us to set the real flag, so we'll need
   69.14       * to temporarily clear it while saving f-p state.
   69.15       */
   69.16 -    if ( test_bit(EDF_GUEST_STTS, &tsk->ed_flags) )
   69.17 +    if ( test_bit(EDF_GUEST_STTS, &tsk->flags) )
   69.18          clts();
   69.19  
   69.20      if ( cpu_has_fxsr )
   69.21 @@ -40,7 +40,7 @@ void save_init_fpu(struct exec_domain *t
   69.22              "fnsave %0 ; fwait"
   69.23              : "=m" (tsk->arch.guest_context.fpu_ctxt) );
   69.24  
   69.25 -    clear_bit(EDF_USEDFPU, &tsk->ed_flags);
   69.26 +    clear_bit(EDF_USEDFPU, &tsk->flags);
   69.27      stts();
   69.28  }
   69.29  
    70.1 --- a/xen/arch/x86/i8259.c	Tue May 03 14:42:00 2005 +0000
    70.2 +++ b/xen/arch/x86/i8259.c	Tue May 10 08:49:59 2005 +0000
    70.3 @@ -45,7 +45,7 @@ BUILD_COMMON_IRQ()
    70.4  
    70.5  /*
    70.6   * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts:
    70.7 - * (these are usually mapped to vectors 0x30-0x3f)
    70.8 + * (these are usually mapped to vectors 0x20-0x2f)
    70.9   */
   70.10  BUILD_16_IRQS(0x0)
   70.11  
   70.12 @@ -58,7 +58,7 @@ BUILD_16_IRQS(0x0)
   70.13   * of these. Plus, more powerful systems might have more than 64
   70.14   * IO-APIC registers.
   70.15   *
   70.16 - * (these are usually mapped into the 0x30-0xff vector range)
   70.17 + * (these are usually mapped into the 0x20-0xff vector range)
   70.18   */
   70.19  BUILD_16_IRQS(0x1) BUILD_16_IRQS(0x2) BUILD_16_IRQS(0x3)
   70.20  BUILD_16_IRQS(0x4) BUILD_16_IRQS(0x5) BUILD_16_IRQS(0x6) BUILD_16_IRQS(0x7)
   70.21 @@ -101,7 +101,7 @@ BUILD_SMP_INTERRUPT(spurious_interrupt,S
   70.22  	IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \
   70.23  	IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f)
   70.24  
   70.25 -    void *interrupt[NR_IRQS] = {
   70.26 +    void (*interrupt[NR_IRQS])(void) = {
   70.27  	IRQLIST_16(0x0),
   70.28  
   70.29  #ifdef CONFIG_X86_IO_APIC
   70.30 @@ -341,7 +341,7 @@ void __init init_8259A(int auto_eoi)
   70.31       * outb_p - this has to work on a wide range of PC hardware.
   70.32       */
   70.33      outb_p(0x11, 0x20);	/* ICW1: select 8259A-1 init */
   70.34 -    outb_p(0x30 + 0, 0x21);	/* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 */
   70.35 +    outb_p(0x20 + 0, 0x21);	/* ICW2: 8259A-1 IR0-7 mapped to 0x20-0x27 */
   70.36      outb_p(0x04, 0x21);	/* 8259A-1 (the master) has a slave on IR2 */
   70.37      if (auto_eoi)
   70.38          outb_p(0x03, 0x21);	/* master does Auto EOI */
   70.39 @@ -349,7 +349,7 @@ void __init init_8259A(int auto_eoi)
   70.40          outb_p(0x01, 0x21);	/* master expects normal EOI */
   70.41  
   70.42      outb_p(0x11, 0xA0);	/* ICW1: select 8259A-2 init */
   70.43 -    outb_p(0x30 + 8, 0xA1);	/* ICW2: 8259A-2 IR0-7 mapped to 0x38-0x3f */
   70.44 +    outb_p(0x20 + 8, 0xA1);	/* ICW2: 8259A-2 IR0-7 mapped to 0x28-0x2f */
   70.45      outb_p(0x02, 0xA1);	/* 8259A-2 is a slave on master's IR2 */
   70.46      outb_p(0x01, 0xA1);	/* (slave's support for AEOI in flat mode
   70.47                             is to be investigated) */
    71.1 --- a/xen/arch/x86/idle0_task.c	Tue May 03 14:42:00 2005 +0000
    71.2 +++ b/xen/arch/x86/idle0_task.c	Tue May 10 08:49:59 2005 +0000
    71.3 @@ -5,7 +5,7 @@
    71.4  
    71.5  struct domain idle0_domain = {
    71.6      id:          IDLE_DOMAIN_ID,
    71.7 -    d_flags:     1<<DF_IDLETASK,
    71.8 +    flags:       1<<DF_IDLETASK,
    71.9      refcnt:      ATOMIC_INIT(1)
   71.10  };
   71.11  
    72.1 --- a/xen/arch/x86/io_apic.c	Tue May 03 14:42:00 2005 +0000
    72.2 +++ b/xen/arch/x86/io_apic.c	Tue May 10 08:49:59 2005 +0000
    72.3 @@ -25,26 +25,20 @@
    72.4  #include <xen/irq.h>
    72.5  #include <xen/delay.h>
    72.6  #include <xen/sched.h>
    72.7 -#include <xen/config.h>
    72.8 +#include <xen/acpi.h>
    72.9 +#include <asm/io.h>
   72.10  #include <asm/mc146818rtc.h>
   72.11 -#include <asm/io.h>
   72.12 -#include <asm/mpspec.h>
   72.13 -#include <asm/io_apic.h>
   72.14  #include <asm/smp.h>
   72.15  #include <asm/desc.h>
   72.16 -#include <asm/smpboot.h>
   72.17 -
   72.18 -#ifdef CONFIG_X86_IO_APIC
   72.19 -
   72.20 -#undef APIC_LOCKUP_DEBUG
   72.21 +#include <mach_apic.h>
   72.22 +#include <io_ports.h>
   72.23  
   72.24 -#define APIC_LOCKUP_DEBUG
   72.25 +int (*ioapic_renumber_irq)(int ioapic, int irq);
   72.26 +atomic_t irq_mis_count;
   72.27  
   72.28 -static spinlock_t ioapic_lock = SPIN_LOCK_UNLOCKED;
   72.29 +static DEFINE_SPINLOCK(ioapic_lock);
   72.30  
   72.31 -unsigned int int_dest_addr_mode = APIC_DEST_LOGICAL;
   72.32 -unsigned char int_delivery_mode = dest_LowestPrio;
   72.33 -
   72.34 +int skip_ioapic_setup;
   72.35  
   72.36  /*
   72.37   * # of IRQ routing registers
   72.38 @@ -69,12 +63,20 @@ static struct irq_pin_list {
   72.39  	int apic, pin, next;
   72.40  } irq_2_pin[PIN_MAP_SIZE];
   72.41  
   72.42 +int vector_irq[NR_VECTORS] = { [0 ... NR_VECTORS - 1] = -1};
   72.43 +#ifdef CONFIG_PCI_MSI
   72.44 +#define vector_to_irq(vector) 	\
   72.45 +	(platform_legacy_irq(vector) ? vector : vector_irq[vector])
   72.46 +#else
   72.47 +#define vector_to_irq(vector)	(vector)
   72.48 +#endif
   72.49 +
   72.50  /*
   72.51   * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
   72.52   * shared ISA-space IRQs, so we have to support them. We are super
   72.53   * fast in the common case, and fast for shared ISA-space IRQs.
   72.54   */
   72.55 -static void __init add_pin_to_irq(unsigned int irq, int apic, int pin)
   72.56 +static void add_pin_to_irq(unsigned int irq, int apic, int pin)
   72.57  {
   72.58  	static int first_free_entry = NR_IRQS;
   72.59  	struct irq_pin_list *entry = irq_2_pin + irq;
   72.60 @@ -112,36 +114,48 @@ static void __init replace_pin_at_irq(un
   72.61  	}
   72.62  }
   72.63  
   72.64 -#define __DO_ACTION(R, ACTION, FINAL)					\
   72.65 -									\
   72.66 -{									\
   72.67 -	int pin;							\
   72.68 -	struct irq_pin_list *entry = irq_2_pin + irq;			\
   72.69 -									\
   72.70 -	for (;;) {							\
   72.71 -		unsigned int reg;					\
   72.72 -		pin = entry->pin;					\
   72.73 -		if (pin == -1)						\
   72.74 -			break;						\
   72.75 -		reg = io_apic_read(entry->apic, 0x10 + R + pin*2);	\
   72.76 -		reg ACTION;						\
   72.77 -		io_apic_write(entry->apic, 0x10 + R + pin*2, reg);	\
   72.78 -		if (!entry->next)					\
   72.79 -			break;						\
   72.80 -		entry = irq_2_pin + entry->next;			\
   72.81 -	}								\
   72.82 -	FINAL;								\
   72.83 +static void __modify_IO_APIC_irq (unsigned int irq, unsigned long enable, unsigned long disable)
   72.84 +{
   72.85 +	struct irq_pin_list *entry = irq_2_pin + irq;
   72.86 +	unsigned int pin, reg;
   72.87 +
   72.88 +	for (;;) {
   72.89 +		pin = entry->pin;
   72.90 +		if (pin == -1)
   72.91 +			break;
   72.92 +		reg = io_apic_read(entry->apic, 0x10 + pin*2);
   72.93 +		reg &= ~disable;
   72.94 +		reg |= enable;
   72.95 +		io_apic_modify(entry->apic, 0x10 + pin*2, reg);
   72.96 +		if (!entry->next)
   72.97 +			break;
   72.98 +		entry = irq_2_pin + entry->next;
   72.99 +	}
  72.100  }
  72.101  
  72.102 -#define DO_ACTION(name,R,ACTION, FINAL)					\
  72.103 -									\
  72.104 -	static void name##_IO_APIC_irq (unsigned int irq)		\
  72.105 -	__DO_ACTION(R, ACTION, FINAL)
  72.106 +/* mask = 1 */
  72.107 +static void __mask_IO_APIC_irq (unsigned int irq)
  72.108 +{
  72.109 +	__modify_IO_APIC_irq(irq, 0x00010000, 0);
  72.110 +}
  72.111  
  72.112 -DO_ACTION( __mask,    0, |= 0x00010000, io_apic_sync(entry->apic) )
  72.113 -DO_ACTION( __unmask,  0, &= 0xfffeffff, )
  72.114 -DO_ACTION( __edge,    0, &= 0xffff7fff, )
  72.115 -DO_ACTION( __level,   0, |= 0x00008000, )
  72.116 +/* mask = 0 */
  72.117 +static void __unmask_IO_APIC_irq (unsigned int irq)
  72.118 +{
  72.119 +	__modify_IO_APIC_irq(irq, 0, 0x00010000);
  72.120 +}
  72.121 +
  72.122 +/* mask = 1, trigger = 0 */
  72.123 +static void __mask_and_edge_IO_APIC_irq (unsigned int irq)
  72.124 +{
  72.125 +	__modify_IO_APIC_irq(irq, 0x00010000, 0x00008000);
  72.126 +}
  72.127 +
  72.128 +/* mask = 0, trigger = 1 */
  72.129 +static void __unmask_and_level_IO_APIC_irq (unsigned int irq)
  72.130 +{
  72.131 +	__modify_IO_APIC_irq(irq, 0x00008000, 0x00010000);
  72.132 +}
  72.133  
  72.134  static void mask_IO_APIC_irq (unsigned int irq)
  72.135  {
  72.136 @@ -165,7 +179,7 @@ void clear_IO_APIC_pin(unsigned int apic
  72.137  {
  72.138  	struct IO_APIC_route_entry entry;
  72.139  	unsigned long flags;
  72.140 -
  72.141 +	
  72.142  	/* Check delivery_mode to be sure we're not clearing an SMI pin */
  72.143  	spin_lock_irqsave(&ioapic_lock, flags);
  72.144  	*(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
  72.145 @@ -194,85 +208,467 @@ static void clear_IO_APIC (void)
  72.146  			clear_IO_APIC_pin(apic, pin);
  72.147  }
  72.148  
  72.149 -static void set_ioapic_affinity (unsigned int irq, unsigned long mask)
  72.150 +static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
  72.151  {
  72.152  	unsigned long flags;
  72.153 -
  72.154 -	/*
  72.155 -	 * Only the first 8 bits are valid.
  72.156 -	 */
  72.157 -	mask = mask << 24;
  72.158 +	int pin;
  72.159 +	struct irq_pin_list *entry = irq_2_pin + irq;
  72.160 +	unsigned int apicid_value;
  72.161 +	
  72.162 +	apicid_value = cpu_mask_to_apicid(cpumask);
  72.163 +	/* Prepare to do the io_apic_write */
  72.164 +	apicid_value = apicid_value << 24;
  72.165  	spin_lock_irqsave(&ioapic_lock, flags);
  72.166 -	__DO_ACTION(1, = mask, )
  72.167 +	for (;;) {
  72.168 +		pin = entry->pin;
  72.169 +		if (pin == -1)
  72.170 +			break;
  72.171 +		io_apic_write(entry->apic, 0x10 + 1 + pin*2, apicid_value);
  72.172 +		if (!entry->next)
  72.173 +			break;
  72.174 +		entry = irq_2_pin + entry->next;
  72.175 +	}
  72.176  	spin_unlock_irqrestore(&ioapic_lock, flags);
  72.177  }
  72.178  
  72.179 -#define balance_irq(_irq) ((void)0)
  72.180 -
  72.181 -/*
  72.182 - * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
  72.183 - * specific CPU-side IRQs.
  72.184 - */
  72.185 -
  72.186 -#define MAX_PIRQS 8
  72.187 -int pirq_entries [MAX_PIRQS];
  72.188 -int pirqs_enabled;
  72.189 +#if defined(CONFIG_IRQBALANCE)
  72.190 +# include <asm/processor.h>	/* kernel_thread() */
  72.191 +# include <xen/kernel_stat.h>	/* kstat */
  72.192 +# include <xen/slab.h>		/* kmalloc() */
  72.193 +# include <xen/timer.h>	/* time_after() */
  72.194 + 
  72.195 +# ifdef CONFIG_BALANCED_IRQ_DEBUG
  72.196 +#  define TDprintk(x...) do { printk("<%ld:%s:%d>: ", jiffies, __FILE__, __LINE__); printk(x); } while (0)
  72.197 +#  define Dprintk(x...) do { TDprintk(x); } while (0)
  72.198 +# else
  72.199 +#  define TDprintk(x...) 
  72.200 +#  define Dprintk(x...) 
  72.201 +# endif
  72.202  
  72.203 -int skip_ioapic_setup;
  72.204 -#if 0
  72.205 +cpumask_t __cacheline_aligned pending_irq_balance_cpumask[NR_IRQS];
  72.206  
  72.207 -static int __init noioapic_setup(char *str)
  72.208 +#define IRQBALANCE_CHECK_ARCH -999
  72.209 +static int irqbalance_disabled = IRQBALANCE_CHECK_ARCH;
  72.210 +static int physical_balance = 0;
  72.211 +
  72.212 +struct irq_cpu_info {
  72.213 +	unsigned long * last_irq;
  72.214 +	unsigned long * irq_delta;
  72.215 +	unsigned long irq;
  72.216 +} irq_cpu_data[NR_CPUS];
  72.217 +
  72.218 +#define CPU_IRQ(cpu)		(irq_cpu_data[cpu].irq)
  72.219 +#define LAST_CPU_IRQ(cpu,irq)   (irq_cpu_data[cpu].last_irq[irq])
  72.220 +#define IRQ_DELTA(cpu,irq) 	(irq_cpu_data[cpu].irq_delta[irq])
  72.221 +
  72.222 +#define IDLE_ENOUGH(cpu,now) \
  72.223 +		(idle_cpu(cpu) && ((now) - irq_stat[(cpu)].idle_timestamp > 1))
  72.224 +
  72.225 +#define IRQ_ALLOWED(cpu, allowed_mask)	cpu_isset(cpu, allowed_mask)
  72.226 +
  72.227 +#define CPU_TO_PACKAGEINDEX(i) (first_cpu(cpu_sibling_map[i]))
  72.228 +
  72.229 +#define MAX_BALANCED_IRQ_INTERVAL	(5*HZ)
  72.230 +#define MIN_BALANCED_IRQ_INTERVAL	(HZ/2)
  72.231 +#define BALANCED_IRQ_MORE_DELTA		(HZ/10)
  72.232 +#define BALANCED_IRQ_LESS_DELTA		(HZ)
  72.233 +
  72.234 +long balanced_irq_interval = MAX_BALANCED_IRQ_INTERVAL;
  72.235 +
  72.236 +static unsigned long move(int curr_cpu, cpumask_t allowed_mask,
  72.237 +			unsigned long now, int direction)
  72.238  {
  72.239 -	skip_ioapic_setup = 1;
  72.240 -	return 1;
  72.241 -}
  72.242 +	int search_idle = 1;
  72.243 +	int cpu = curr_cpu;
  72.244  
  72.245 -__setup("noapic", noioapic_setup);
  72.246 +	goto inside;
  72.247  
  72.248 -static int __init ioapic_setup(char *str)
  72.249 -{
  72.250 -	skip_ioapic_setup = 0;
  72.251 -	return 1;
  72.252 +	do {
  72.253 +		if (unlikely(cpu == curr_cpu))
  72.254 +			search_idle = 0;
  72.255 +inside:
  72.256 +		if (direction == 1) {
  72.257 +			cpu++;
  72.258 +			if (cpu >= NR_CPUS)
  72.259 +				cpu = 0;
  72.260 +		} else {
  72.261 +			cpu--;
  72.262 +			if (cpu == -1)
  72.263 +				cpu = NR_CPUS-1;
  72.264 +		}
  72.265 +	} while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu,allowed_mask) ||
  72.266 +			(search_idle && !IDLE_ENOUGH(cpu,now)));
  72.267 +
  72.268 +	return cpu;
  72.269  }
  72.270  
  72.271 -__setup("apic", ioapic_setup);
  72.272 -
  72.273 -
  72.274 -
  72.275 -static int __init ioapic_pirq_setup(char *str)
  72.276 +static inline void balance_irq(int cpu, int irq)
  72.277  {
  72.278 -	int i, max;
  72.279 -	int ints[MAX_PIRQS+1];
  72.280 -
  72.281 -	get_options(str, ARRAY_SIZE(ints), ints);
  72.282 -
  72.283 -	for (i = 0; i < MAX_PIRQS; i++)
  72.284 -		pirq_entries[i] = -1;
  72.285 +	unsigned long now = jiffies;
  72.286 +	cpumask_t allowed_mask;
  72.287 +	unsigned int new_cpu;
  72.288 +		
  72.289 +	if (irqbalance_disabled)
  72.290 +		return; 
  72.291  
  72.292 -	pirqs_enabled = 1;
  72.293 -	printk(KERN_INFO "PIRQ redirection, working around broken MP-BIOS.\n");
  72.294 -	max = MAX_PIRQS;
  72.295 -	if (ints[0] < MAX_PIRQS)
  72.296 -		max = ints[0];
  72.297 +	cpus_and(allowed_mask, cpu_online_map, irq_affinity[irq]);
  72.298 +	new_cpu = move(cpu, allowed_mask, now, 1);
  72.299 +	if (cpu != new_cpu) {
  72.300 +		irq_desc_t *desc = irq_desc + irq;
  72.301 +		unsigned long flags;
  72.302  
  72.303 -	for (i = 0; i < max; i++) {
  72.304 -		printk(KERN_DEBUG "... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
  72.305 -		/*
  72.306 -		 * PIRQs are mapped upside down, usually.
  72.307 -		 */
  72.308 -		pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
  72.309 +		spin_lock_irqsave(&desc->lock, flags);
  72.310 +		pending_irq_balance_cpumask[irq] = cpumask_of_cpu(new_cpu);
  72.311 +		spin_unlock_irqrestore(&desc->lock, flags);
  72.312  	}
  72.313 -	return 1;
  72.314  }
  72.315  
  72.316 -__setup("pirq=", ioapic_pirq_setup);
  72.317 +static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold)
  72.318 +{
  72.319 +	int i, j;
  72.320 +	Dprintk("Rotating IRQs among CPUs.\n");
  72.321 +	for (i = 0; i < NR_CPUS; i++) {
  72.322 +		for (j = 0; cpu_online(i) && (j < NR_IRQS); j++) {
  72.323 +			if (!irq_desc[j].action)
  72.324 +				continue;
  72.325 +			/* Is it a significant load ?  */
  72.326 +			if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i),j) <
  72.327 +						useful_load_threshold)
  72.328 +				continue;
  72.329 +			balance_irq(i, j);
  72.330 +		}
  72.331 +	}
  72.332 +	balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
  72.333 +		balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);	
  72.334 +	return;
  72.335 +}
  72.336  
  72.337 -#endif
  72.338 +static void do_irq_balance(void)
  72.339 +{
  72.340 +	int i, j;
  72.341 +	unsigned long max_cpu_irq = 0, min_cpu_irq = (~0);
  72.342 +	unsigned long move_this_load = 0;
  72.343 +	int max_loaded = 0, min_loaded = 0;
  72.344 +	int load;
  72.345 +	unsigned long useful_load_threshold = balanced_irq_interval + 10;
  72.346 +	int selected_irq;
  72.347 +	int tmp_loaded, first_attempt = 1;
  72.348 +	unsigned long tmp_cpu_irq;
  72.349 +	unsigned long imbalance = 0;
  72.350 +	cpumask_t allowed_mask, target_cpu_mask, tmp;
  72.351 +
  72.352 +	for (i = 0; i < NR_CPUS; i++) {
  72.353 +		int package_index;
  72.354 +		CPU_IRQ(i) = 0;
  72.355 +		if (!cpu_online(i))
  72.356 +			continue;
  72.357 +		package_index = CPU_TO_PACKAGEINDEX(i);
  72.358 +		for (j = 0; j < NR_IRQS; j++) {
  72.359 +			unsigned long value_now, delta;
  72.360 +			/* Is this an active IRQ? */
  72.361 +			if (!irq_desc[j].action)
  72.362 +				continue;
  72.363 +			if ( package_index == i )
  72.364 +				IRQ_DELTA(package_index,j) = 0;
  72.365 +			/* Determine the total count per processor per IRQ */
  72.366 +			value_now = (unsigned long) kstat_cpu(i).irqs[j];
  72.367 +
  72.368 +			/* Determine the activity per processor per IRQ */
  72.369 +			delta = value_now - LAST_CPU_IRQ(i,j);
  72.370 +
  72.371 +			/* Update last_cpu_irq[][] for the next time */
  72.372 +			LAST_CPU_IRQ(i,j) = value_now;
  72.373 +
  72.374 +			/* Ignore IRQs whose rate is less than the clock */
  72.375 +			if (delta < useful_load_threshold)
  72.376 +				continue;
  72.377 +			/* update the load for the processor or package total */
  72.378 +			IRQ_DELTA(package_index,j) += delta;
  72.379 +
  72.380 +			/* Keep track of the higher numbered sibling as well */
  72.381 +			if (i != package_index)
  72.382 +				CPU_IRQ(i) += delta;
  72.383 +			/*
  72.384 +			 * We have sibling A and sibling B in the package
  72.385 +			 *
  72.386 +			 * cpu_irq[A] = load for cpu A + load for cpu B
  72.387 +			 * cpu_irq[B] = load for cpu B
  72.388 +			 */
  72.389 +			CPU_IRQ(package_index) += delta;
  72.390 +		}
  72.391 +	}
  72.392 +	/* Find the least loaded processor package */
  72.393 +	for (i = 0; i < NR_CPUS; i++) {
  72.394 +		if (!cpu_online(i))
  72.395 +			continue;
  72.396 +		if (i != CPU_TO_PACKAGEINDEX(i))
  72.397 +			continue;
  72.398 +		if (min_cpu_irq > CPU_IRQ(i)) {
  72.399 +			min_cpu_irq = CPU_IRQ(i);
  72.400 +			min_loaded = i;
  72.401 +		}
  72.402 +	}
  72.403 +	max_cpu_irq = ULONG_MAX;
  72.404 +
  72.405 +tryanothercpu:
  72.406 +	/* Look for heaviest loaded processor.
  72.407 +	 * We may come back to get the next heaviest loaded processor.
  72.408 +	 * Skip processors with trivial loads.
  72.409 +	 */
  72.410 +	tmp_cpu_irq = 0;
  72.411 +	tmp_loaded = -1;
  72.412 +	for (i = 0; i < NR_CPUS; i++) {
  72.413 +		if (!cpu_online(i))
  72.414 +			continue;
  72.415 +		if (i != CPU_TO_PACKAGEINDEX(i))
  72.416 +			continue;
  72.417 +		if (max_cpu_irq <= CPU_IRQ(i)) 
  72.418 +			continue;
  72.419 +		if (tmp_cpu_irq < CPU_IRQ(i)) {
  72.420 +			tmp_cpu_irq = CPU_IRQ(i);
  72.421 +			tmp_loaded = i;
  72.422 +		}
  72.423 +	}
  72.424 +
  72.425 +	if (tmp_loaded == -1) {
  72.426 + 	 /* In the case of small number of heavy interrupt sources, 
  72.427 +	  * loading some of the cpus too much. We use Ingo's original 
  72.428 +	  * approach to rotate them around.
  72.429 +	  */
  72.430 +		if (!first_attempt && imbalance >= useful_load_threshold) {
  72.431 +			rotate_irqs_among_cpus(useful_load_threshold);
  72.432 +			return;
  72.433 +		}
  72.434 +		goto not_worth_the_effort;
  72.435 +	}
  72.436 +	
  72.437 +	first_attempt = 0;		/* heaviest search */
  72.438 +	max_cpu_irq = tmp_cpu_irq;	/* load */
  72.439 +	max_loaded = tmp_loaded;	/* processor */
  72.440 +	imbalance = (max_cpu_irq - min_cpu_irq) / 2;
  72.441 +	
  72.442 +	Dprintk("max_loaded cpu = %d\n", max_loaded);
  72.443 +	Dprintk("min_loaded cpu = %d\n", min_loaded);
  72.444 +	Dprintk("max_cpu_irq load = %ld\n", max_cpu_irq);
  72.445 +	Dprintk("min_cpu_irq load = %ld\n", min_cpu_irq);
  72.446 +	Dprintk("load imbalance = %lu\n", imbalance);
  72.447 +
  72.448 +	/* if imbalance is less than approx 10% of max load, then
  72.449 +	 * observe diminishing returns action. - quit
  72.450 +	 */
  72.451 +	if (imbalance < (max_cpu_irq >> 3)) {
  72.452 +		Dprintk("Imbalance too trivial\n");
  72.453 +		goto not_worth_the_effort;
  72.454 +	}
  72.455 +
  72.456 +tryanotherirq:
  72.457 +	/* if we select an IRQ to move that can't go where we want, then
  72.458 +	 * see if there is another one to try.
  72.459 +	 */
  72.460 +	move_this_load = 0;
  72.461 +	selected_irq = -1;
  72.462 +	for (j = 0; j < NR_IRQS; j++) {
  72.463 +		/* Is this an active IRQ? */
  72.464 +		if (!irq_desc[j].action)
  72.465 +			continue;
  72.466 +		if (imbalance <= IRQ_DELTA(max_loaded,j))
  72.467 +			continue;
  72.468 +		/* Try to find the IRQ that is closest to the imbalance
  72.469 +		 * without going over.
  72.470 +		 */
  72.471 +		if (move_this_load < IRQ_DELTA(max_loaded,j)) {
  72.472 +			move_this_load = IRQ_DELTA(max_loaded,j);
  72.473 +			selected_irq = j;
  72.474 +		}
  72.475 +	}
  72.476 +	if (selected_irq == -1) {
  72.477 +		goto tryanothercpu;
  72.478 +	}
  72.479 +
  72.480 +	imbalance = move_this_load;
  72.481 +	
  72.482 +	/* For physical_balance case, we accumlated both load
  72.483 +	 * values in the one of the siblings cpu_irq[],
  72.484 +	 * to use the same code for physical and logical processors
  72.485 +	 * as much as possible. 
  72.486 +	 *
  72.487 +	 * NOTE: the cpu_irq[] array holds the sum of the load for
  72.488 +	 * sibling A and sibling B in the slot for the lowest numbered
  72.489 +	 * sibling (A), _AND_ the load for sibling B in the slot for
  72.490 +	 * the higher numbered sibling.
  72.491 +	 *
  72.492 +	 * We seek the least loaded sibling by making the comparison
  72.493 +	 * (A+B)/2 vs B
  72.494 +	 */
  72.495 +	load = CPU_IRQ(min_loaded) >> 1;
  72.496 +	for_each_cpu_mask(j, cpu_sibling_map[min_loaded]) {
  72.497 +		if (load > CPU_IRQ(j)) {
  72.498 +			/* This won't change cpu_sibling_map[min_loaded] */
  72.499 +			load = CPU_IRQ(j);
  72.500 +			min_loaded = j;
  72.501 +		}
  72.502 +	}
  72.503 +
  72.504 +	cpus_and(allowed_mask, cpu_online_map, irq_affinity[selected_irq]);
  72.505 +	target_cpu_mask = cpumask_of_cpu(min_loaded);
  72.506 +	cpus_and(tmp, target_cpu_mask, allowed_mask);
  72.507 +
  72.508 +	if (!cpus_empty(tmp)) {
  72.509 +		irq_desc_t *desc = irq_desc + selected_irq;
  72.510 +		unsigned long flags;
  72.511 +
  72.512 +		Dprintk("irq = %d moved to cpu = %d\n",
  72.513 +				selected_irq, min_loaded);
  72.514 +		/* mark for change destination */
  72.515 +		spin_lock_irqsave(&desc->lock, flags);
  72.516 +		pending_irq_balance_cpumask[selected_irq] =
  72.517 +					cpumask_of_cpu(min_loaded);
  72.518 +		spin_unlock_irqrestore(&desc->lock, flags);
  72.519 +		/* Since we made a change, come back sooner to 
  72.520 +		 * check for more variation.
  72.521 +		 */
  72.522 +		balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
  72.523 +			balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);	
  72.524 +		return;
  72.525 +	}
  72.526 +	goto tryanotherirq;
  72.527 +
  72.528 +not_worth_the_effort:
  72.529 +	/*
  72.530 +	 * if we did not find an IRQ to move, then adjust the time interval
  72.531 +	 * upward
  72.532 +	 */
  72.533 +	balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL,
  72.534 +		balanced_irq_interval + BALANCED_IRQ_MORE_DELTA);	
  72.535 +	Dprintk("IRQ worth rotating not found\n");
  72.536 +	return;
  72.537 +}
  72.538 +
  72.539 +static int balanced_irq(void *unused)
  72.540 +{
  72.541 +	int i;
  72.542 +	unsigned long prev_balance_time = jiffies;
  72.543 +	long time_remaining = balanced_irq_interval;
  72.544 +
  72.545 +	daemonize("kirqd");
  72.546 +	
  72.547 +	/* push everything to CPU 0 to give us a starting point.  */
  72.548 +	for (i = 0 ; i < NR_IRQS ; i++) {
  72.549 +		pending_irq_balance_cpumask[i] = cpumask_of_cpu(0);
  72.550 +	}
  72.551 +
  72.552 +	for ( ; ; ) {
  72.553 +		set_current_state(TASK_INTERRUPTIBLE);
  72.554 +		time_remaining = schedule_timeout(time_remaining);
  72.555 +		try_to_freeze(PF_FREEZE);
  72.556 +		if (time_after(jiffies,
  72.557 +				prev_balance_time+balanced_irq_interval)) {
  72.558 +			do_irq_balance();
  72.559 +			prev_balance_time = jiffies;
  72.560 +			time_remaining = balanced_irq_interval;
  72.561 +		}
  72.562 +	}
  72.563 +	return 0;
  72.564 +}
  72.565 +
  72.566 +static int __init balanced_irq_init(void)
  72.567 +{
  72.568 +	int i;
  72.569 +	struct cpuinfo_x86 *c;
  72.570 +	cpumask_t tmp;
  72.571 +
  72.572 +	cpus_shift_right(tmp, cpu_online_map, 2);
  72.573 +        c = &boot_cpu_data;
  72.574 +	/* When not overwritten by the command line ask subarchitecture. */
  72.575 +	if (irqbalance_disabled == IRQBALANCE_CHECK_ARCH)
  72.576 +		irqbalance_disabled = NO_BALANCE_IRQ;
  72.577 +	if (irqbalance_disabled)
  72.578 +		return 0;
  72.579 +	
  72.580 +	 /* disable irqbalance completely if there is only one processor online */
  72.581 +	if (num_online_cpus() < 2) {
  72.582 +		irqbalance_disabled = 1;
  72.583 +		return 0;
  72.584 +	}
  72.585 +	/*
  72.586 +	 * Enable physical balance only if more than 1 physical processor
  72.587 +	 * is present
  72.588 +	 */
  72.589 +	if (smp_num_siblings > 1 && !cpus_empty(tmp))
  72.590 +		physical_balance = 1;
  72.591 +
  72.592 +	for (i = 0; i < NR_CPUS; i++) {
  72.593 +		if (!cpu_online(i))
  72.594 +			continue;
  72.595 +		irq_cpu_data[i].irq_delta = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
  72.596 +		irq_cpu_data[i].last_irq = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
  72.597 +		if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) {
  72.598 +			printk(KERN_ERR "balanced_irq_init: out of memory");
  72.599 +			goto failed;
  72.600 +		}
  72.601 +		memset(irq_cpu_data[i].irq_delta,0,sizeof(unsigned long) * NR_IRQS);
  72.602 +		memset(irq_cpu_data[i].last_irq,0,sizeof(unsigned long) * NR_IRQS);
  72.603 +	}
  72.604 +	
  72.605 +	printk(KERN_INFO "Starting balanced_irq\n");
  72.606 +	if (kernel_thread(balanced_irq, NULL, CLONE_KERNEL) >= 0) 
  72.607 +		return 0;
  72.608 +	else 
  72.609 +		printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
  72.610 +failed:
  72.611 +	for (i = 0; i < NR_CPUS; i++) {
  72.612 +		if(irq_cpu_data[i].irq_delta)
  72.613 +			kfree(irq_cpu_data[i].irq_delta);
  72.614 +		if(irq_cpu_data[i].last_irq)
  72.615 +			kfree(irq_cpu_data[i].last_irq);
  72.616 +	}
  72.617 +	return 0;
  72.618 +}
  72.619 +
  72.620 +int __init irqbalance_disable(char *str)
  72.621 +{
  72.622 +	irqbalance_disabled = 1;
  72.623 +	return 0;
  72.624 +}
  72.625 +
  72.626 +__setup("noirqbalance", irqbalance_disable);
  72.627 +
  72.628 +static inline void move_irq(int irq)
  72.629 +{
  72.630 +	/* note - we hold the desc->lock */
  72.631 +	if (unlikely(!cpus_empty(pending_irq_balance_cpumask[irq]))) {
  72.632 +		set_ioapic_affinity_irq(irq, pending_irq_balance_cpumask[irq]);
  72.633 +		cpus_clear(pending_irq_balance_cpumask[irq]);
  72.634 +	}
  72.635 +}
  72.636 +
  72.637 +late_initcall(balanced_irq_init);
  72.638 +
  72.639 +#else /* !CONFIG_IRQBALANCE */
  72.640 +static inline void move_irq(int irq) { }
  72.641 +#endif /* CONFIG_IRQBALANCE */
  72.642 +
  72.643 +#ifndef CONFIG_SMP
  72.644 +void fastcall send_IPI_self(int vector)
  72.645 +{
  72.646 +	unsigned int cfg;
  72.647 +
  72.648 +	/*
  72.649 +	 * Wait for idle.
  72.650 +	 */
  72.651 +	apic_wait_icr_idle();
  72.652 +	cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL;
  72.653 +	/*
  72.654 +	 * Send the IPI. The write to APIC_ICR fires this off.
  72.655 +	 */
  72.656 +	apic_write_around(APIC_ICR, cfg);
  72.657 +}
  72.658 +#endif /* !CONFIG_SMP */
  72.659  
  72.660  /*
  72.661   * Find the IRQ entry number of a certain pin.
  72.662   */
  72.663 -static int __init find_irq_entry(int apic, int pin, int type)
  72.664 +static int find_irq_entry(int apic, int pin, int type)
  72.665  {
  72.666  	int i;
  72.667  
  72.668 @@ -289,7 +685,7 @@ static int __init find_irq_entry(int api
  72.669  /*
  72.670   * Find the pin to which IRQ[irq] (ISA) is connected
  72.671   */
  72.672 -static int __init find_isa_irq_pin(int irq, int type)
  72.673 +static int find_isa_irq_pin(int irq, int type)
  72.674  {
  72.675  	int i;
  72.676  
  72.677 @@ -298,7 +694,9 @@ static int __init find_isa_irq_pin(int i
  72.678  
  72.679  		if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
  72.680  		     mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
  72.681 -		     mp_bus_id_to_type[lbus] == MP_BUS_MCA) &&
  72.682 +		     mp_bus_id_to_type[lbus] == MP_BUS_MCA ||
  72.683 +		     mp_bus_id_to_type[lbus] == MP_BUS_NEC98
  72.684 +		    ) &&
  72.685  		    (mp_irqs[i].mpc_irqtype == type) &&
  72.686  		    (mp_irqs[i].mpc_srcbusirq == irq))
  72.687  
  72.688 @@ -317,9 +715,9 @@ int IO_APIC_get_PCI_irq_vector(int bus, 
  72.689  {
  72.690  	int apic, i, best_guess = -1;
  72.691  
  72.692 -	Dprintk("querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
  72.693 -		bus, slot, pin);
  72.694 -	if ((mp_bus_id_to_pci_bus==NULL) || (mp_bus_id_to_pci_bus[bus] == -1)) {
  72.695 +	apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, "
  72.696 +		"slot:%d, pin:%d.\n", bus, slot, pin);
  72.697 +	if (mp_bus_id_to_pci_bus[bus] == -1) {
  72.698  		printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
  72.699  		return -1;
  72.700  	}
  72.701 @@ -354,15 +752,40 @@ int IO_APIC_get_PCI_irq_vector(int bus, 
  72.702  }
  72.703  
  72.704  /*
  72.705 + * This function currently is only a helper for the i386 smp boot process where 
  72.706 + * we need to reprogram the ioredtbls to cater for the cpus which have come online
  72.707 + * so mask in all cases should simply be TARGET_CPUS
  72.708 + */
  72.709 +void __init setup_ioapic_dest(void)
  72.710 +{
  72.711 +	int pin, ioapic, irq, irq_entry;
  72.712 +
  72.713 +	if (skip_ioapic_setup == 1)
  72.714 +		return;
  72.715 +
  72.716 +	for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
  72.717 +		for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
  72.718 +			irq_entry = find_irq_entry(ioapic, pin, mp_INT);
  72.719 +			if (irq_entry == -1)
  72.720 +				continue;
  72.721 +			irq = pin_2_irq(irq_entry, ioapic, pin);
  72.722 +			set_ioapic_affinity_irq(irq, TARGET_CPUS);
  72.723 +		}
  72.724 +
  72.725 +	}
  72.726 +}
  72.727 +
  72.728 +/*
  72.729   * EISA Edge/Level control register, ELCR
  72.730   */
  72.731 -static int __init EISA_ELCR(unsigned int irq)
  72.732 +static int EISA_ELCR(unsigned int irq)
  72.733  {
  72.734  	if (irq < 16) {
  72.735  		unsigned int port = 0x4d0 + (irq >> 3);
  72.736  		return (inb(port) >> (irq & 7)) & 1;
  72.737  	}
  72.738 -	printk(KERN_INFO "Broken MPtable reports ISA irq %d\n", irq);
  72.739 +	apic_printk(APIC_VERBOSE, KERN_INFO
  72.740 +			"Broken MPtable reports ISA irq %d\n", irq);
  72.741  	return 0;
  72.742  }
  72.743  
  72.744 @@ -392,6 +815,12 @@ static int __init EISA_ELCR(unsigned int
  72.745  #define default_MCA_trigger(idx)	(1)
  72.746  #define default_MCA_polarity(idx)	(0)
  72.747  
  72.748 +/* NEC98 interrupts are always polarity zero edge triggered,
  72.749 + * when listed as conforming in the MP table. */
  72.750 +
  72.751 +#define default_NEC98_trigger(idx)     (0)
  72.752 +#define default_NEC98_polarity(idx)    (0)
  72.753 +
  72.754  static int __init MPBIOS_polarity(int idx)
  72.755  {
  72.756  	int bus = mp_irqs[idx].mpc_srcbus;
  72.757 @@ -426,6 +855,11 @@ static int __init MPBIOS_polarity(int id
  72.758  					polarity = default_MCA_polarity(idx);
  72.759  					break;
  72.760  				}
  72.761 +				case MP_BUS_NEC98: /* NEC 98 pin */
  72.762 +				{
  72.763 +					polarity = default_NEC98_polarity(idx);
  72.764 +					break;
  72.765 +				}
  72.766  				default:
  72.767  				{
  72.768  					printk(KERN_WARNING "broken BIOS!!\n");
  72.769 @@ -461,7 +895,7 @@ static int __init MPBIOS_polarity(int id
  72.770  	return polarity;
  72.771  }
  72.772  
  72.773 -static int __init MPBIOS_trigger(int idx)
  72.774 +static int MPBIOS_trigger(int idx)
  72.775  {
  72.776  	int bus = mp_irqs[idx].mpc_srcbus;
  72.777  	int trigger;
  72.778 @@ -495,6 +929,11 @@ static int __init MPBIOS_trigger(int idx
  72.779  					trigger = default_MCA_trigger(idx);
  72.780  					break;
  72.781  				}
  72.782 +				case MP_BUS_NEC98: /* NEC 98 pin */
  72.783 +				{
  72.784 +					trigger = default_NEC98_trigger(idx);
  72.785 +					break;
  72.786 +				}
  72.787  				default:
  72.788  				{
  72.789  					printk(KERN_WARNING "broken BIOS!!\n");
  72.790 @@ -556,6 +995,7 @@ static int pin_2_irq(int idx, int apic, 
  72.791  		case MP_BUS_ISA: /* ISA pin */
  72.792  		case MP_BUS_EISA:
  72.793  		case MP_BUS_MCA:
  72.794 +		case MP_BUS_NEC98:
  72.795  		{
  72.796  			irq = mp_irqs[idx].mpc_srcbusirq;
  72.797  			break;
  72.798 @@ -569,6 +1009,13 @@ static int pin_2_irq(int idx, int apic, 
  72.799  			while (i < apic)
  72.800  				irq += nr_ioapic_registers[i++];
  72.801  			irq += pin;
  72.802 +
  72.803 +			/*
  72.804 +			 * For MPS mode, so far only needed by ES7000 platform
  72.805 +			 */
  72.806 +			if (ioapic_renumber_irq)
  72.807 +				irq = ioapic_renumber_irq(apic, irq);
  72.808 +
  72.809  			break;
  72.810  		}
  72.811  		default:
  72.812 @@ -579,20 +1026,6 @@ static int pin_2_irq(int idx, int apic, 
  72.813  		}
  72.814  	}
  72.815  
  72.816 -	/*
  72.817 -	 * PCI IRQ command line redirection. Yes, limits are hardcoded.
  72.818 -	 */
  72.819 -	if ((pin >= 16) && (pin <= 23)) {
  72.820 -		if (pirq_entries[pin-16] != -1) {
  72.821 -			if (!pirq_entries[pin-16]) {
  72.822 -				printk(KERN_DEBUG "disabling PIRQ%d\n", pin-16);
  72.823 -			} else {
  72.824 -				irq = pirq_entries[pin-16];
  72.825 -				printk(KERN_DEBUG "using PIRQ%d -> IRQ %d\n",
  72.826 -						pin-16, irq);
  72.827 -			}
  72.828 -		}
  72.829 -	}
  72.830  	return irq;
  72.831  }
  72.832  
  72.833 @@ -613,93 +1046,74 @@ static inline int IO_APIC_irq_trigger(in
  72.834  	return 0;
  72.835  }
  72.836  
  72.837 -int irq_vector[NR_IRQS] = { FIRST_DEVICE_VECTOR , 0 };
  72.838 -
  72.839 -#ifdef CONFIG_VMX
  72.840 -int vector_irq[256];
  72.841 -#endif
  72.842 +/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
  72.843 +u8 irq_vector[NR_IRQ_VECTORS] = { FIRST_DEVICE_VECTOR , 0 };
  72.844  
  72.845  int assign_irq_vector(int irq)
  72.846  {
  72.847  	static int current_vector = FIRST_DEVICE_VECTOR, offset = 0;
  72.848 -	if (IO_APIC_VECTOR(irq) > 0)
  72.849 +
  72.850 +	BUG_ON(irq >= NR_IRQ_VECTORS);
  72.851 +	if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0)
  72.852  		return IO_APIC_VECTOR(irq);
  72.853  next:
  72.854  	current_vector += 8;
  72.855  
  72.856 -        /* Skip the hypercall vector. */
  72.857 -	if (current_vector == HYPERCALL_VECTOR) goto next;
  72.858 -        /* Skip the Linux/BSD fast-trap vector. */
  72.859 -        if (current_vector == 0x80) goto next;
  72.860 +	/* Skip the hypercall vector. */
  72.861 +	if (current_vector == HYPERCALL_VECTOR)
  72.862 +		goto next;
  72.863  
  72.864 -	if (current_vector > FIRST_SYSTEM_VECTOR) {
  72.865 +	/* Skip the Linux/BSD fast-trap vector. */
  72.866 +	if (current_vector == 0x80)
  72.867 +		goto next;
  72.868 +
  72.869 +	if (current_vector >= FIRST_SYSTEM_VECTOR) {
  72.870  		offset++;
  72.871 +		if (!(offset%8))
  72.872 +			return -ENOSPC;
  72.873  		current_vector = FIRST_DEVICE_VECTOR + offset;
  72.874  	}
  72.875  
  72.876 -	if (current_vector == FIRST_SYSTEM_VECTOR)
  72.877 -		panic("ran out of interrupt sources!");
  72.878 +	vector_irq[current_vector] = irq;
  72.879 +	if (irq != AUTO_ASSIGN)
  72.880 +		IO_APIC_VECTOR(irq) = current_vector;
  72.881  
  72.882 -	IO_APIC_VECTOR(irq) = current_vector;
  72.883 -#ifdef CONFIG_VMX
  72.884 -        vector_irq[current_vector] = irq;
  72.885 -        printk("vector_irq[%x] = %d\n", current_vector, irq);
  72.886 -#endif
  72.887  	return current_vector;
  72.888  }
  72.889  
  72.890 -extern void (*interrupt[NR_IRQS])(void);
  72.891 -
  72.892 -/*
  72.893 - * Level and edge triggered IO-APIC interrupts need different handling,
  72.894 - * so we use two separate IRQ descriptors. Edge triggered IRQs can be
  72.895 - * handled with the level-triggered descriptor, but that one has slightly
  72.896 - * more overhead. Level-triggered interrupts cannot be handled with the
  72.897 - * edge-triggered handler, without risking IRQ storms and other ugly
  72.898 - * races.
  72.899 - */
  72.900 +static struct hw_interrupt_type ioapic_level_type;
  72.901 +static struct hw_interrupt_type ioapic_edge_type;
  72.902  
  72.903 -static unsigned int startup_edge_ioapic_irq(unsigned int irq);
  72.904 -#define shutdown_edge_ioapic_irq  disable_edge_ioapic_irq
  72.905 -#define enable_edge_ioapic_irq    unmask_IO_APIC_irq
  72.906 -static void disable_edge_ioapic_irq (unsigned int irq);
  72.907 -static void ack_edge_ioapic_irq(unsigned int irq);
  72.908 -static void end_edge_ioapic_irq (unsigned int i);
  72.909 -static struct hw_interrupt_type ioapic_edge_irq_type = {
  72.910 -	"IO-APIC-edge",
  72.911 -	startup_edge_ioapic_irq,
  72.912 -	shutdown_edge_ioapic_irq,
  72.913 -	enable_edge_ioapic_irq,
  72.914 -	disable_edge_ioapic_irq,
  72.915 -	ack_edge_ioapic_irq,
  72.916 -	end_edge_ioapic_irq,
  72.917 -	set_ioapic_affinity,
  72.918 -};
  72.919 +#define IOAPIC_AUTO	-1
  72.920 +#define IOAPIC_EDGE	0
  72.921 +#define IOAPIC_LEVEL	1
  72.922  
  72.923 -static unsigned int startup_level_ioapic_irq (unsigned int irq);
  72.924 -#define shutdown_level_ioapic_irq mask_IO_APIC_irq
  72.925 -#define enable_level_ioapic_irq   unmask_IO_APIC_irq
  72.926 -#define disable_level_ioapic_irq  mask_IO_APIC_irq
  72.927 -static void mask_and_ack_level_ioapic_irq (unsigned int irq);
  72.928 -static void end_level_ioapic_irq (unsigned int irq);
  72.929 -static struct hw_interrupt_type ioapic_level_irq_type = {
  72.930 -	"IO-APIC-level",
  72.931 -	startup_level_ioapic_irq,
  72.932 -	shutdown_level_ioapic_irq,
  72.933 -	enable_level_ioapic_irq,
  72.934 -	disable_level_ioapic_irq,
  72.935 -	mask_and_ack_level_ioapic_irq,
  72.936 -	end_level_ioapic_irq,
  72.937 -	set_ioapic_affinity,
  72.938 -};
  72.939 +static inline void ioapic_register_intr(int irq, int vector, unsigned long trigger)
  72.940 +{
  72.941 +	if (use_pci_vector() && !platform_legacy_irq(irq)) {
  72.942 +		if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
  72.943 +				trigger == IOAPIC_LEVEL)
  72.944 +			irq_desc[vector].handler = &ioapic_level_type;
  72.945 +		else
  72.946 +			irq_desc[vector].handler = &ioapic_edge_type;
  72.947 +		set_intr_gate(vector, interrupt[vector]);
  72.948 +	} else	{
  72.949 +		if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
  72.950 +				trigger == IOAPIC_LEVEL)
  72.951 +			irq_desc[irq].handler = &ioapic_level_type;
  72.952 +		else
  72.953 +			irq_desc[irq].handler = &ioapic_edge_type;
  72.954 +		set_intr_gate(vector, interrupt[irq]);
  72.955 +	}
  72.956 +}
  72.957  
  72.958  void __init setup_IO_APIC_irqs(void)
  72.959  {
  72.960  	struct IO_APIC_route_entry entry;
  72.961 -	int apic, pin, idx, irq, vector;
  72.962 +	int apic, pin, idx, irq, first_notcon = 1, vector;
  72.963  	unsigned long flags;
  72.964  
  72.965 -	printk(KERN_DEBUG "init IO_APIC IRQs\n");
  72.966 +	apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
  72.967  
  72.968  	for (apic = 0; apic < nr_ioapics; apic++) {
  72.969  	for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
  72.970 @@ -710,13 +1124,24 @@ void __init setup_IO_APIC_irqs(void)
  72.971  		memset(&entry,0,sizeof(entry));
  72.972  
  72.973  		entry.delivery_mode = INT_DELIVERY_MODE;
  72.974 -		entry.dest_mode = (INT_DEST_ADDR_MODE != 0);
  72.975 +		entry.dest_mode = INT_DEST_MODE;
  72.976  		entry.mask = 0;				/* enable IRQ */
  72.977 -		entry.dest.logical.logical_dest = target_cpus();
  72.978 +		entry.dest.logical.logical_dest = 
  72.979 +					cpu_mask_to_apicid(TARGET_CPUS);
  72.980  
  72.981  		idx = find_irq_entry(apic,pin,mp_INT);
  72.982 -		if (idx == -1)
  72.983 +		if (idx == -1) {
  72.984 +			if (first_notcon) {
  72.985 +				apic_printk(APIC_VERBOSE, KERN_DEBUG
  72.986 +						" IO-APIC (apicid-pin) %d-%d",
  72.987 +						mp_ioapics[apic].mpc_apicid,
  72.988 +						pin);
  72.989 +				first_notcon = 0;
  72.990 +			} else
  72.991 +				apic_printk(APIC_VERBOSE, ", %d-%d",
  72.992 +					mp_ioapics[apic].mpc_apicid, pin);
  72.993  			continue;
  72.994 +		}
  72.995  
  72.996  		entry.trigger = irq_trigger(idx);
  72.997  		entry.polarity = irq_polarity(idx);
  72.998 @@ -731,8 +1156,7 @@ void __init setup_IO_APIC_irqs(void)
  72.999  		 * skip adding the timer int on secondary nodes, which causes
 72.1000  		 * a small but painful rift in the time-space continuum
 72.1001  		 */
 72.1002 -		if ((clustered_apic_mode == CLUSTERED_APIC_NUMAQ) 
 72.1003 -			&& (apic != 0) && (irq == 0))
 72.1004 +		if (multi_timer_check(apic, irq))
 72.1005  			continue;
 72.1006  		else
 72.1007  			add_pin_to_irq(irq, apic, pin);
 72.1008 @@ -743,13 +1167,7 @@ void __init setup_IO_APIC_irqs(void)
 72.1009  		if (IO_APIC_IRQ(irq)) {
 72.1010  			vector = assign_irq_vector(irq);
 72.1011  			entry.vector = vector;
 72.1012 -
 72.1013 -			if (IO_APIC_irq_trigger(irq))
 72.1014 -				irq_desc[irq].handler = &ioapic_level_irq_type;
 72.1015 -			else
 72.1016 -				irq_desc[irq].handler = &ioapic_edge_irq_type;
 72.1017 -
 72.1018 -			set_intr_gate(vector, interrupt[irq]);
 72.1019 +			ioapic_register_intr(irq, vector, IOAPIC_AUTO);
 72.1020  		
 72.1021  			if (!apic && (irq < 16))
 72.1022  				disable_8259A_irq(irq);
 72.1023 @@ -760,11 +1178,13 @@ void __init setup_IO_APIC_irqs(void)
 72.1024  		spin_unlock_irqrestore(&ioapic_lock, flags);
 72.1025  	}
 72.1026  	}
 72.1027 +
 72.1028 +	if (!first_notcon)
 72.1029 +		apic_printk(APIC_VERBOSE, " not connected.\n");
 72.1030  }
 72.1031  
 72.1032  /*
 72.1033 - * Set up the 8259A-master output pin as broadcast to all
 72.1034 - * CPUs.
 72.1035 + * Set up the 8259A-master output pin:
 72.1036   */
 72.1037  void __init setup_ExtINT_IRQ0_pin(unsigned int pin, int vector)
 72.1038  {
 72.1039 @@ -782,9 +1202,9 @@ void __init setup_ExtINT_IRQ0_pin(unsign
 72.1040  	 * We use logical delivery to get the timer IRQ
 72.1041  	 * to the first CPU.
 72.1042  	 */
 72.1043 -	entry.dest_mode = (INT_DEST_ADDR_MODE != 0);
 72.1044 +	entry.dest_mode = INT_DEST_MODE;
 72.1045  	entry.mask = 0;					/* unmask IRQ now */
 72.1046 -	entry.dest.logical.logical_dest = target_cpus();
 72.1047 +	entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
 72.1048  	entry.delivery_mode = INT_DELIVERY_MODE;
 72.1049  	entry.polarity = 0;
 72.1050  	entry.trigger = 0;
 72.1051 @@ -794,7 +1214,7 @@ void __init setup_ExtINT_IRQ0_pin(unsign
 72.1052  	 * The timer IRQ doesn't have to know that behind the
 72.1053  	 * scene we have a 8259A-master in AEOI mode ...
 72.1054  	 */
 72.1055 -	irq_desc[0].handler = &ioapic_edge_irq_type;
 72.1056 +	irq_desc[0].handler = &ioapic_edge_type;
 72.1057  
 72.1058  	/*
 72.1059  	 * Add it to the IO-APIC irq-routing table:
 72.1060 @@ -807,23 +1227,23 @@ void __init setup_ExtINT_IRQ0_pin(unsign
 72.1061  	enable_8259A_irq(0);
 72.1062  }
 72.1063  
 72.1064 -void __init UNEXPECTED_IO_APIC(void)
 72.1065 +static inline void UNEXPECTED_IO_APIC(void)
 72.1066  {
 72.1067 -	printk(KERN_WARNING 
 72.1068 -		"An unexpected IO-APIC was found. If this kernel release is less than\n"
 72.1069 -		"three months old please report this to linux-smp@vger.kernel.org\n");
 72.1070  }
 72.1071  
 72.1072  void __init print_IO_APIC(void)
 72.1073  {
 72.1074 -#ifdef VERBOSE
 72.1075 +#ifndef NDEBUG
 72.1076  	int apic, i;
 72.1077 -	struct IO_APIC_reg_00 reg_00;
 72.1078 -	struct IO_APIC_reg_01 reg_01;
 72.1079 -	struct IO_APIC_reg_02 reg_02;
 72.1080 -	struct IO_APIC_reg_03 reg_03;
 72.1081 +	union IO_APIC_reg_00 reg_00;
 72.1082 +	union IO_APIC_reg_01 reg_01;
 72.1083 +	union IO_APIC_reg_02 reg_02;
 72.1084 +	union IO_APIC_reg_03 reg_03;
 72.1085  	unsigned long flags;
 72.1086  
 72.1087 +	if (apic_verbosity == APIC_QUIET)
 72.1088 +		return;
 72.1089 +
 72.1090   	printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
 72.1091  	for (i = 0; i < nr_ioapics; i++)
 72.1092  		printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
 72.1093 @@ -838,47 +1258,46 @@ void __init print_IO_APIC(void)
 72.1094  	for (apic = 0; apic < nr_ioapics; apic++) {
 72.1095  
 72.1096  	spin_lock_irqsave(&ioapic_lock, flags);
 72.1097 -	*(int *)&reg_00 = io_apic_read(apic, 0);
 72.1098 -	*(int *)&reg_01 = io_apic_read(apic, 1);
 72.1099 -	if (reg_01.version >= 0x10)
 72.1100 -		*(int *)&reg_02 = io_apic_read(apic, 2);
 72.1101 -	if (reg_01.version >= 0x20)
 72.1102 -		*(int *)&reg_03 = io_apic_read(apic, 3);
 72.1103 +	reg_00.raw = io_apic_read(apic, 0);
 72.1104 +	reg_01.raw = io_apic_read(apic, 1);
 72.1105 +	if (reg_01.bits.version >= 0x10)
 72.1106 +		reg_02.raw = io_apic_read(apic, 2);
 72.1107 +	if (reg_01.bits.version >= 0x20)
 72.1108 +		reg_03.raw = io_apic_read(apic, 3);
 72.1109  	spin_unlock_irqrestore(&ioapic_lock, flags);
 72.1110  
 72.1111 -	printk("\n");
 72.1112  	printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid);
 72.1113 -	printk(KERN_DEBUG ".... register #00: %08X\n", *(int *)&reg_00);
 72.1114 -	printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.ID);
 72.1115 -	printk(KERN_DEBUG ".......    : Delivery Type: %X\n", reg_00.delivery_type);
 72.1116 -	printk(KERN_DEBUG ".......    : LTS          : %X\n", reg_00.LTS);
 72.1117 -	if (reg_00.__reserved_0 || reg_00.__reserved_1 || reg_00.__reserved_2)
 72.1118 +	printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
 72.1119 +	printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.bits.ID);
 72.1120 +	printk(KERN_DEBUG ".......    : Delivery Type: %X\n", reg_00.bits.delivery_type);
 72.1121 +	printk(KERN_DEBUG ".......    : LTS          : %X\n", reg_00.bits.LTS);
 72.1122 +	if (reg_00.bits.ID >= get_physical_broadcast())
 72.1123 +		UNEXPECTED_IO_APIC();
 72.1124 +	if (reg_00.bits.__reserved_1 || reg_00.bits.__reserved_2)
 72.1125  		UNEXPECTED_IO_APIC();
 72.1126  
 72.1127 -	printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)&reg_01);
 72.1128 -	printk(KERN_DEBUG ".......     : max redirection entries: %04X\n", reg_01.entries);
 72.1129 -	if (	(reg_01.entries != 0x0f) && /* older (Neptune) boards */
 72.1130 -		(reg_01.entries != 0x17) && /* typical ISA+PCI boards */
 72.1131 -		(reg_01.entries != 0x1b) && /* Compaq Proliant boards */
 72.1132 -		(reg_01.entries != 0x1f) && /* dual Xeon boards */
 72.1133 -		(reg_01.entries != 0x22) && /* bigger Xeon boards */
 72.1134 -		(reg_01.entries != 0x2E) &&
 72.1135 -		(reg_01.entries != 0x3F)
 72.1136 +	printk(KERN_DEBUG ".... register #01: %08X\n", reg_01.raw);
 72.1137 +	printk(KERN_DEBUG ".......     : max redirection entries: %04X\n", reg_01.bits.entries);
 72.1138 +	if (	(reg_01.bits.entries != 0x0f) && /* older (Neptune) boards */
 72.1139 +		(reg_01.bits.entries != 0x17) && /* typical ISA+PCI boards */
 72.1140 +		(reg_01.bits.entries != 0x1b) && /* Compaq Proliant boards */
 72.1141 +		(reg_01.bits.entries != 0x1f) && /* dual Xeon boards */
 72.1142 +		(reg_01.bits.entries != 0x22) && /* bigger Xeon boards */
 72.1143 +		(reg_01.bits.entries != 0x2E) &&
 72.1144 +		(reg_01.bits.entries != 0x3F)
 72.1145  	)
 72.1146  		UNEXPECTED_IO_APIC();
 72.1147  
 72.1148 -	printk(KERN_DEBUG ".......     : PRQ implemented: %X\n", reg_01.PRQ);
 72.1149 -	printk(KERN_DEBUG ".......     : IO APIC version: %04X\n", reg_01.version);
 72.1150 -	if (	(reg_01.version != 0x01) && /* 82489DX IO-APICs */
 72.1151 -		(reg_01.version != 0x02) && /* VIA */
 72.1152 -		(reg_01.version != 0x03) && /* later VIA */
 72.1153 -		(reg_01.version != 0x10) && /* oldest IO-APICs */
 72.1154 -		(reg_01.version != 0x11) && /* Pentium/Pro IO-APICs */
 72.1155 -		(reg_01.version != 0x13) && /* Xeon IO-APICs */
 72.1156 -		(reg_01.version != 0x20)    /* Intel P64H (82806 AA) */
 72.1157 +	printk(KERN_DEBUG ".......     : PRQ implemented: %X\n", reg_01.bits.PRQ);
 72.1158 +	printk(KERN_DEBUG ".......     : IO APIC version: %04X\n", reg_01.bits.version);
 72.1159 +	if (	(reg_01.bits.version != 0x01) && /* 82489DX IO-APICs */
 72.1160 +		(reg_01.bits.version != 0x10) && /* oldest IO-APICs */
 72.1161 +		(reg_01.bits.version != 0x11) && /* Pentium/Pro IO-APICs */
 72.1162 +		(reg_01.bits.version != 0x13) && /* Xeon IO-APICs */
 72.1163 +		(reg_01.bits.version != 0x20)    /* Intel P64H (82806 AA) */
 72.1164  	)
 72.1165  		UNEXPECTED_IO_APIC();
 72.1166 -	if (reg_01.__reserved_1 || reg_01.__reserved_2)
 72.1167 +	if (reg_01.bits.__reserved_1 || reg_01.bits.__reserved_2)
 72.1168  		UNEXPECTED_IO_APIC();
 72.1169  
 72.1170  	/*
 72.1171 @@ -886,10 +1305,10 @@ void __init print_IO_APIC(void)
 72.1172  	 * but the value of reg_02 is read as the previous read register
 72.1173  	 * value, so ignore it if reg_02 == reg_01.
 72.1174  	 */
 72.1175 -	if (reg_01.version >= 0x10 && *(int *)&reg_02 != *(int *)&reg_01) {
 72.1176 -		printk(KERN_DEBUG ".... register #02: %08X\n", *(int *)&reg_02);
 72.1177 -		printk(KERN_DEBUG ".......     : arbitration: %02X\n", reg_02.arbitration);
 72.1178 -		if (reg_02.__reserved_1 || reg_02.__reserved_2)
 72.1179 +	if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) {
 72.1180 +		printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
 72.1181 +		printk(KERN_DEBUG ".......     : arbitration: %02X\n", reg_02.bits.arbitration);
 72.1182 +		if (reg_02.bits.__reserved_1 || reg_02.bits.__reserved_2)
 72.1183  			UNEXPECTED_IO_APIC();
 72.1184  	}
 72.1185  
 72.1186 @@ -898,11 +1317,11 @@ void __init print_IO_APIC(void)
 72.1187  	 * or reg_03, but the value of reg_0[23] is read as the previous read
 72.1188  	 * register value, so ignore it if reg_03 == reg_0[12].
 72.1189  	 */
 72.1190 -	if (reg_01.version >= 0x20 && *(int *)&reg_03 != *(int *)&reg_02 &&
 72.1191 -	    *(int *)&reg_03 != *(int *)&reg_01) {
 72.1192 -		printk(KERN_DEBUG ".... register #03: %08X\n", *(int *)&reg_03);
 72.1193 -		printk(KERN_DEBUG ".......     : Boot DT    : %X\n", reg_03.boot_DT);
 72.1194 -		if (reg_03.__reserved_1)
 72.1195 +	if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw &&
 72.1196 +	    reg_03.raw != reg_01.raw) {
 72.1197 +		printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw);
 72.1198 +		printk(KERN_DEBUG ".......     : Boot DT    : %X\n", reg_03.bits.boot_DT);
 72.1199 +		if (reg_03.bits.__reserved_1)
 72.1200  			UNEXPECTED_IO_APIC();
 72.1201  	}
 72.1202  
 72.1203 @@ -911,7 +1330,7 @@ void __init print_IO_APIC(void)
 72.1204  	printk(KERN_DEBUG " NR Log Phy Mask Trig IRR Pol"
 72.1205  			  " Stat Dest Deli Vect:   \n");
 72.1206  
 72.1207 -	for (i = 0; i <= reg_01.entries; i++) {
 72.1208 +	for (i = 0; i <= reg_01.bits.entries; i++) {
 72.1209  		struct IO_APIC_route_entry entry;
 72.1210  
 72.1211  		spin_lock_irqsave(&ioapic_lock, flags);
 72.1212 @@ -937,12 +1356,17 @@ void __init print_IO_APIC(void)
 72.1213  		);
 72.1214  	}
 72.1215  	}
 72.1216 +	if (use_pci_vector())
 72.1217 +		printk(KERN_INFO "Using vector-based indexing\n");
 72.1218  	printk(KERN_DEBUG "IRQ to pin mappings:\n");
 72.1219  	for (i = 0; i < NR_IRQS; i++) {
 72.1220  		struct irq_pin_list *entry = irq_2_pin + i;
 72.1221  		if (entry->pin < 0)
 72.1222  			continue;
 72.1223 -		printk(KERN_DEBUG "IRQ%d ", i);
 72.1224 + 		if (use_pci_vector() && !platform_legacy_irq(i))
 72.1225 +			printk(KERN_DEBUG "IRQ%d ", IO_APIC_VECTOR(i));
 72.1226 +		else
 72.1227 +			printk(KERN_DEBUG "IRQ%d ", i);
 72.1228  		for (;;) {
 72.1229  			printk("-> %d:%d", entry->apic, entry->pin);
 72.1230  			if (!entry->next)
 72.1231 @@ -953,152 +1377,13 @@ void __init print_IO_APIC(void)
 72.1232  	}
 72.1233  
 72.1234  	printk(KERN_INFO ".................................... done.\n");
 72.1235 -#endif
 72.1236 -}
 72.1237 -
 72.1238 -
 72.1239 -#if 0 /* Maybe useful for debugging, but not currently used anywhere. */
 72.1240 -
 72.1241 -static void print_APIC_bitfield (int base)
 72.1242 -{
 72.1243 -	unsigned int v;
 72.1244 -	int i, j;
 72.1245 -
 72.1246 -	printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG);
 72.1247 -	for (i = 0; i < 8; i++) {
 72.1248 -		v = apic_read(base + i*0x10);
 72.1249 -		for (j = 0; j < 32; j++) {
 72.1250 -			if (v & (1<<j))
 72.1251 -				printk("1");
 72.1252 -			else
 72.1253 -				printk("0");
 72.1254 -		}
 72.1255 -		printk("\n");
 72.1256 -	}
 72.1257 +#endif /* !NDEBUG */
 72.1258 +	return;
 72.1259  }
 72.1260  
 72.1261 -
 72.1262 -void /*__init*/ print_local_APIC(void * dummy)
 72.1263 -{
 72.1264 -	unsigned int v, ver, maxlvt;
 72.1265 -
 72.1266 -	printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
 72.1267 -		smp_processor_id(), hard_smp_processor_id());
 72.1268 -	v = apic_read(APIC_ID);
 72.1269 -	printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, GET_APIC_ID(v));
 72.1270 -	v = apic_read(APIC_LVR);
 72.1271 -	printk(KERN_INFO "... APIC VERSION: %08x\n", v);
 72.1272 -	ver = GET_APIC_VERSION(v);
 72.1273 -	maxlvt = get_maxlvt();
 72.1274 -
 72.1275 -	v = apic_read(APIC_TASKPRI);
 72.1276 -	printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
 72.1277 -
 72.1278 -	if (APIC_INTEGRATED(ver)) {			/* !82489DX */
 72.1279 -		v = apic_read(APIC_ARBPRI);
 72.1280 -		printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
 72.1281 -			v & APIC_ARBPRI_MASK);
 72.1282 -		v = apic_read(APIC_PROCPRI);
 72.1283 -		printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
 72.1284 -	}
 72.1285 -
 72.1286 -	v = apic_read(APIC_EOI);
 72.1287 -	printk(KERN_DEBUG "... APIC EOI: %08x\n", v);
 72.1288 -	v = apic_read(APIC_RRR);
 72.1289 -	printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
 72.1290 -	v = apic_read(APIC_LDR);
 72.1291 -	printk(KERN_DEBUG "... APIC LDR: %08x\n", v);
 72.1292 -	v = apic_read(APIC_DFR);
 72.1293 -	printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
 72.1294 -	v = apic_read(APIC_SPIV);
 72.1295 -	printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
 72.1296 -
 72.1297 -	printk(KERN_DEBUG "... APIC ISR field:\n");
 72.1298 -	print_APIC_bitfield(APIC_ISR);
 72.1299 -	printk(KERN_DEBUG "... APIC TMR field:\n");
 72.1300 -	print_APIC_bitfield(APIC_TMR);
 72.1301 -	printk(KERN_DEBUG "... APIC IRR field:\n");
 72.1302 -	print_APIC_bitfield(APIC_IRR);
 72.1303 -
 72.1304 -	if (APIC_INTEGRATED(ver)) {		/* !82489DX */
 72.1305 -		if (maxlvt > 3)		/* Due to the Pentium erratum 3AP. */
 72.1306 -			apic_write(APIC_ESR, 0);
 72.1307 -		v = apic_read(APIC_ESR);
 72.1308 -		printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
 72.1309 -	}
 72.1310 -
 72.1311 -	v = apic_read(APIC_ICR);
 72.1312 -	printk(KERN_DEBUG "... APIC ICR: %08x\n", v);
 72.1313 -	v = apic_read(APIC_ICR2);
 72.1314 -	printk(KERN_DEBUG "... APIC ICR2: %08x\n", v);
 72.1315 -
 72.1316 -	v = apic_read(APIC_LVTT);
 72.1317 -	printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
 72.1318 -
 72.1319 -	if (maxlvt > 3) {                       /* PC is LVT#4. */
 72.1320 -		v = apic_read(APIC_LVTPC);
 72.1321 -		printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v);
 72.1322 -	}
 72.1323 -	v = apic_read(APIC_LVT0);
 72.1324 -	printk(KERN_DEBUG "... APIC LVT0: %08x\n", v);
 72.1325 -	v = apic_read(APIC_LVT1);
 72.1326 -	printk(KERN_DEBUG "... APIC LVT1: %08x\n", v);
 72.1327 -
 72.1328 -	if (maxlvt > 2) {			/* ERR is LVT#3. */
 72.1329 -		v = apic_read(APIC_LVTERR);
 72.1330 -		printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v);
 72.1331 -	}
 72.1332 -
 72.1333 -	v = apic_read(APIC_TMICT);
 72.1334 -	printk(KERN_DEBUG "... APIC TMICT: %08x\n", v);
 72.1335 -	v = apic_read(APIC_TMCCT);
 72.1336 -	printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
 72.1337 -	v = apic_read(APIC_TDCR);
 72.1338 -	printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
 72.1339 -	printk("\n");
 72.1340 -}
 72.1341 -
 72.1342 -void print_all_local_APICs (void)
 72.1343 -{
 72.1344 -	smp_call_function(print_local_APIC, NULL, 1, 1);
 72.1345 -	print_local_APIC(NULL);
 72.1346 -}
 72.1347 -
 72.1348 -void /*__init*/ print_PIC(void)
 72.1349 -{
 72.1350 -	extern spinlock_t i8259A_lock;
 72.1351 -	unsigned int v, flags;
 72.1352 -
 72.1353 -	printk(KERN_DEBUG "\nprinting PIC contents\n");
 72.1354 -
 72.1355 -	spin_lock_irqsave(&i8259A_lock, flags);
 72.1356 -
 72.1357 -	v = inb(0xa1) << 8 | inb(0x21);
 72.1358 -	printk(KERN_DEBUG "... PIC  IMR: %04x\n", v);
 72.1359 -
 72.1360 -	v = inb(0xa0) << 8 | inb(0x20);
 72.1361 -	printk(KERN_DEBUG "... PIC  IRR: %04x\n", v);
 72.1362 -
 72.1363 -	outb(0x0b,0xa0);
 72.1364 -	outb(0x0b,0x20);
 72.1365 -	v = inb(0xa0) << 8 | inb(0x20);
 72.1366 -	outb(0x0a,0xa0);
 72.1367 -	outb(0x0a,0x20);
 72.1368 -
 72.1369 -	spin_unlock_irqrestore(&i8259A_lock, flags);
 72.1370 -
 72.1371 -	printk(KERN_DEBUG "... PIC  ISR: %04x\n", v);
 72.1372 -
 72.1373 -	v = inb(0x4d1) << 8 | inb(0x4d0);
 72.1374 -	printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
 72.1375 -}
 72.1376 -
 72.1377 -#endif /* 0 */
 72.1378 -
 72.1379 -
 72.1380  static void __init enable_IO_APIC(void)
 72.1381  {
 72.1382 -	struct IO_APIC_reg_01 reg_01;
 72.1383 +	union IO_APIC_reg_01 reg_01;
 72.1384  	int i;
 72.1385  	unsigned long flags;
 72.1386  
 72.1387 @@ -1106,18 +1391,15 @@ static void __init enable_IO_APIC(void)
 72.1388  		irq_2_pin[i].pin = -1;
 72.1389  		irq_2_pin[i].next = 0;
 72.1390  	}
 72.1391 -	if (!pirqs_enabled)
 72.1392 -		for (i = 0; i < MAX_PIRQS; i++)
 72.1393 -			pirq_entries[i] = -1;
 72.1394  
 72.1395  	/*
 72.1396  	 * The number of IO-APIC IRQ registers (== #pins):
 72.1397  	 */
 72.1398  	for (i = 0; i < nr_ioapics; i++) {
 72.1399  		spin_lock_irqsave(&ioapic_lock, flags);
 72.1400 -		*(int *)&reg_01 = io_apic_read(i, 1);
 72.1401 +		reg_01.raw = io_apic_read(i, 1);
 72.1402  		spin_unlock_irqrestore(&ioapic_lock, flags);
 72.1403 -		nr_ioapic_registers[i] = reg_01.entries+1;
 72.1404 +		nr_ioapic_registers[i] = reg_01.bits.entries+1;
 72.1405  	}
 72.1406  
 72.1407  	/*
 72.1408 @@ -1146,18 +1428,22 @@ void disable_IO_APIC(void)
 72.1409   * by Matt Domsch <Matt_Domsch@dell.com>  Tue Dec 21 12:25:05 CST 1999
 72.1410   */
 72.1411  
 72.1412 -static void __init setup_ioapic_ids_from_mpc (void)
 72.1413 +#ifndef CONFIG_X86_NUMAQ
 72.1414 +static void __init setup_ioapic_ids_from_mpc(void)
 72.1415  {
 72.1416 -	struct IO_APIC_reg_00 reg_00;
 72.1417 -	unsigned long phys_id_present_map = phys_cpu_present_map;
 72.1418 +	union IO_APIC_reg_00 reg_00;
 72.1419 +	physid_mask_t phys_id_present_map;
 72.1420  	int apic;
 72.1421  	int i;
 72.1422  	unsigned char old_id;
 72.1423  	unsigned long flags;
 72.1424  
 72.1425 -	if (clustered_apic_mode)
 72.1426 -		/* We don't have a good way to do this yet - hack */
 72.1427 -		phys_id_present_map = (u_long) 0xf;
 72.1428 +	/*
 72.1429 +	 * This is broken; anything with a real cpu count has to
 72.1430 +	 * circumvent this idiocy regardless.
 72.1431 +	 */
 72.1432 +	phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map);
 72.1433 +
 72.1434  	/*
 72.1435  	 * Set the IOAPIC ID to the value stored in the MPC table.
 72.1436  	 */
 72.1437 @@ -1165,41 +1451,48 @@ static void __init setup_ioapic_ids_from
 72.1438  
 72.1439  		/* Read the register 0 value */
 72.1440  		spin_lock_irqsave(&ioapic_lock, flags);
 72.1441 -		*(int *)&reg_00 = io_apic_read(apic, 0);
 72.1442 +		reg_00.raw = io_apic_read(apic, 0);
 72.1443  		spin_unlock_irqrestore(&ioapic_lock, flags);
 72.1444  		
 72.1445  		old_id = mp_ioapics[apic].mpc_apicid;
 72.1446  
 72.1447 -		if (mp_ioapics[apic].mpc_apicid >= apic_broadcast_id) {
 72.1448 +		if (mp_ioapics[apic].mpc_apicid >= get_physical_broadcast()) {
 72.1449  			printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
 72.1450  				apic, mp_ioapics[apic].mpc_apicid);
 72.1451  			printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
 72.1452 -				reg_00.ID);
 72.1453 -			mp_ioapics[apic].mpc_apicid = reg_00.ID;
 72.1454 +				reg_00.bits.ID);
 72.1455 +			mp_ioapics[apic].mpc_apicid = reg_00.bits.ID;
 72.1456  		}
 72.1457  
 72.1458 +		/* Don't check I/O APIC IDs for some xAPIC systems.  They have
 72.1459 +		 * no meaning without the serial APIC bus. */
 72.1460 +		if (NO_IOAPIC_CHECK)
 72.1461 +			continue;
 72.1462  		/*
 72.1463  		 * Sanity check, is the ID really free? Every APIC in a
 72.1464  		 * system must have a unique ID or we get lots of nice
 72.1465  		 * 'stuck on smp_invalidate_needed IPI wait' messages.
 72.1466 -		 * I/O APIC IDs no longer have any meaning for xAPICs and SAPICs.
 72.1467  		 */
 72.1468 -		if ((clustered_apic_mode != CLUSTERED_APIC_XAPIC) &&
 72.1469 -		    (phys_id_present_map & (1 << mp_ioapics[apic].mpc_apicid))) {
 72.1470 +		if (check_apicid_used(phys_id_present_map,
 72.1471 +					mp_ioapics[apic].mpc_apicid)) {
 72.1472  			printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
 72.1473  				apic, mp_ioapics[apic].mpc_apicid);
 72.1474 -			for (i = 0; i < 0xf; i++)
 72.1475 -				if (!(phys_id_present_map & (1 << i)))
 72.1476 +			for (i = 0; i < get_physical_broadcast(); i++)
 72.1477 +				if (!physid_isset(i, phys_id_present_map))
 72.1478  					break;
 72.1479 -			if (i >= apic_broadcast_id)
 72.1480 +			if (i >= get_physical_broadcast())
 72.1481  				panic("Max APIC ID exceeded!\n");
 72.1482  			printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
 72.1483  				i);
 72.1484 -			phys_id_present_map |= 1 << i;
 72.1485 +			physid_set(i, phys_id_present_map);
 72.1486  			mp_ioapics[apic].mpc_apicid = i;
 72.1487  		} else {
 72.1488 -			printk("Setting %d in the phys_id_present_map\n", mp_ioapics[apic].mpc_apicid);
 72.1489 -			phys_id_present_map |= 1 << mp_ioapics[apic].mpc_apicid;
 72.1490 +			physid_mask_t tmp;
 72.1491 +			tmp = apicid_to_cpu_present(mp_ioapics[apic].mpc_apicid);
 72.1492 +			apic_printk(APIC_VERBOSE, "Setting %d in the "
 72.1493 +					"phys_id_present_map\n",
 72.1494 +					mp_ioapics[apic].mpc_apicid);
 72.1495 +			physids_or(phys_id_present_map, phys_id_present_map, tmp);
 72.1496  		}
 72.1497  
 72.1498  
 72.1499 @@ -1217,26 +1510,30 @@ static void __init setup_ioapic_ids_from
 72.1500  		 * Read the right value from the MPC table and
 72.1501  		 * write it into the ID register.
 72.1502  	 	 */
 72.1503 -		printk(KERN_INFO "...changing IO-APIC physical APIC ID to %d ...",
 72.1504 -					mp_ioapics[apic].mpc_apicid);
 72.1505 +		apic_printk(APIC_VERBOSE, KERN_INFO
 72.1506 +			"...changing IO-APIC physical APIC ID to %d ...",
 72.1507 +			mp_ioapics[apic].mpc_apicid);
 72.1508  
 72.1509 -		reg_00.ID = mp_ioapics[apic].mpc_apicid;
 72.1510 +		reg_00.bits.ID = mp_ioapics[apic].mpc_apicid;
 72.1511  		spin_lock_irqsave(&ioapic_lock, flags);
 72.1512 -		io_apic_write(apic, 0, *(int *)&reg_00);
 72.1513 +		io_apic_write(apic, 0, reg_00.raw);
 72.1514  		spin_unlock_irqrestore(&ioapic_lock, flags);
 72.1515  
 72.1516  		/*
 72.1517  		 * Sanity check
 72.1518  		 */
 72.1519  		spin_lock_irqsave(&ioapic_lock, flags);
 72.1520 -		*(int *)&reg_00 = io_apic_read(apic, 0);
 72.1521 +		reg_00.raw = io_apic_read(apic, 0);
 72.1522  		spin_unlock_irqrestore(&ioapic_lock, flags);
 72.1523 -		if (reg_00.ID != mp_ioapics[apic].mpc_apicid)
 72.1524 -			panic("could not set ID!\n");
 72.1525 +		if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid)
 72.1526 +			printk("could not set ID!\n");
 72.1527  		else
 72.1528 -			printk(" ok.\n");
 72.1529 +			apic_printk(APIC_VERBOSE, " ok.\n");
 72.1530  	}
 72.1531  }
 72.1532 +#else
 72.1533 +static void __init setup_ioapic_ids_from_mpc(void) { }
 72.1534 +#endif
 72.1535  
 72.1536  /*
 72.1537   * There is a nasty bug in some older SMP boards, their mptable lies
 72.1538 @@ -1248,9 +1545,9 @@ static void __init setup_ioapic_ids_from
 72.1539   */
 72.1540  static int __init timer_irq_works(void)
 72.1541  {
 72.1542 -	unsigned int t1 = jiffies;
 72.1543 +	unsigned long t1 = jiffies;
 72.1544  
 72.1545 -	__sti();
 72.1546 +	local_irq_enable();
 72.1547  	/* Let ten ticks pass... */
 72.1548  	mdelay((10 * 1000) / HZ);
 72.1549  
 72.1550 @@ -1267,7 +1564,18 @@ static int __init timer_irq_works(void)
 72.1551  	return 0;
 72.1552  }
 72.1553  
 72.1554 -static void disable_edge_ioapic_irq (unsigned int irq) { /* nothing */ }
 72.1555 +/*
 72.1556 + * In the SMP+IOAPIC case it might happen that there are an unspecified
 72.1557 + * number of pending IRQ events unhandled. These cases are very rare,
 72.1558 + * so we 'resend' these IRQs via IPIs, to the same CPU. It's much
 72.1559 + * better to do it this way as thus we do not have to be aware of
 72.1560 + * 'pending' interrupts in the IRQ path, except at this point.
 72.1561 + */
 72.1562 +/*
 72.1563 + * Edge triggered needs to resend any interrupt
 72.1564 + * that was delayed but this is now handled in the device
 72.1565 + * independent code.
 72.1566 + */
 72.1567  
 72.1568  /*
 72.1569   * Starting up a edge-triggered IO-APIC interrupt is
 72.1570 @@ -1278,7 +1586,6 @@ static void disable_edge_ioapic_irq (uns
 72.1571   * This is not complete - we should be able to fake
 72.1572   * an edge even if it isn't on the 8259A...
 72.1573   */
 72.1574 -
 72.1575  static unsigned int startup_edge_ioapic_irq(unsigned int irq)
 72.1576  {
 72.1577  	int was_pending = 0;
 72.1578 @@ -1303,16 +1610,13 @@ static unsigned int startup_edge_ioapic_
 72.1579   */
 72.1580  static void ack_edge_ioapic_irq(unsigned int irq)
 72.1581  {
 72.1582 -	balance_irq(irq);
 72.1583 +	move_irq(irq);
 72.1584  	if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED))
 72.1585  					== (IRQ_PENDING | IRQ_DISABLED))
 72.1586  		mask_IO_APIC_irq(irq);
 72.1587  	ack_APIC_irq();
 72.1588  }
 72.1589  
 72.1590 -static void end_edge_ioapic_irq (unsigned int i) { /* nothing */ }
 72.1591 -
 72.1592 -
 72.1593  /*
 72.1594   * Level triggered interrupts can just be masked,
 72.1595   * and shutting down and starting up the interrupt
 72.1596 @@ -1334,15 +1638,12 @@ static unsigned int startup_level_ioapic
 72.1597  	return 0; /* don't check for pending */
 72.1598  }
 72.1599  
 72.1600 -static void mask_and_ack_level_ioapic_irq(unsigned int irq)
 72.1601 +static void end_level_ioapic_irq (unsigned int irq)
 72.1602  {
 72.1603  	unsigned long v;
 72.1604  	int i;
 72.1605  
 72.1606 -	balance_irq(irq);
 72.1607 -
 72.1608 -	mask_IO_APIC_irq(irq);
 72.1609 -
 72.1610 +	move_irq(irq);
 72.1611  /*
 72.1612   * It appears there is an erratum which affects at least version 0x11
 72.1613   * of I/O APIC (that's the 82093AA and cores integrated into various
 72.1614 @@ -1363,45 +1664,102 @@ static void mask_and_ack_level_ioapic_ir
 72.1615   * The idea is from Manfred Spraul.  --macro
 72.1616   */
 72.1617  	i = IO_APIC_VECTOR(irq);
 72.1618 +
 72.1619  	v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
 72.1620  
 72.1621  	ack_APIC_irq();
 72.1622  
 72.1623  	if (!(v & (1 << (i & 0x1f)))) {
 72.1624 -#ifdef APIC_LOCKUP_DEBUG
 72.1625 -		struct irq_pin_list *entry;
 72.1626 -#endif
 72.1627 -
 72.1628 -#ifdef APIC_MISMATCH_DEBUG
 72.1629  		atomic_inc(&irq_mis_count);
 72.1630 -#endif
 72.1631  		spin_lock(&ioapic_lock);
 72.1632 -		__edge_IO_APIC_irq(irq);
 72.1633 -#ifdef APIC_LOCKUP_DEBUG
 72.1634 -		for (entry = irq_2_pin + irq;;) {
 72.1635 -			unsigned int reg;
 72.1636 -
 72.1637 -			if (entry->pin == -1)
 72.1638 -				break;
 72.1639 -			reg = io_apic_read(entry->apic, 0x10 + entry->pin * 2);
 72.1640 -			if (reg & 0x00004000)
 72.1641 -				printk(KERN_CRIT "Aieee!!!  Remote IRR"
 72.1642 -					" still set after unlock!\n");
 72.1643 -			if (!entry->next)
 72.1644 -				break;
 72.1645 -			entry = irq_2_pin + entry->next;
 72.1646 -		}
 72.1647 -#endif
 72.1648 -		__level_IO_APIC_irq(irq);
 72.1649 +		__mask_and_edge_IO_APIC_irq(irq);
 72.1650 +		__unmask_and_level_IO_APIC_irq(irq);
 72.1651  		spin_unlock(&ioapic_lock);
 72.1652  	}
 72.1653  }
 72.1654  
 72.1655 -static void end_level_ioapic_irq(unsigned int irq)
 72.1656 +#ifdef CONFIG_PCI_MSI
 72.1657 +static unsigned int startup_edge_ioapic_vector(unsigned int vector)
 72.1658  {
 72.1659 +	int irq = vector_to_irq(vector);
 72.1660 +
 72.1661 +	return startup_edge_ioapic_irq(irq);
 72.1662 +}
 72.1663 +
 72.1664 +static void ack_edge_ioapic_vector(unsigned int vector)
 72.1665 +{
 72.1666 +	int irq = vector_to_irq(vector);
 72.1667 +
 72.1668 +	ack_edge_ioapic_irq(irq);
 72.1669 +}
 72.1670 +
 72.1671 +static unsigned int startup_level_ioapic_vector (unsigned int vector)
 72.1672 +{
 72.1673 +	int irq = vector_to_irq(vector);
 72.1674 +
 72.1675 +	return startup_level_ioapic_irq (irq);
 72.1676 +}
 72.1677 +
 72.1678 +static void end_level_ioapic_vector (unsigned int vector)
 72.1679 +{
 72.1680 +	int irq = vector_to_irq(vector);
 72.1681 +
 72.1682 +	end_level_ioapic_irq(irq);
 72.1683 +}
 72.1684 +
 72.1685 +static void mask_IO_APIC_vector (unsigned int vector)
 72.1686 +{
 72.1687 +	int irq = vector_to_irq(vector);
 72.1688 +
 72.1689 +	mask_IO_APIC_irq(irq);
 72.1690 +}
 72.1691 +
 72.1692 +static void unmask_IO_APIC_vector (unsigned int vector)
 72.1693 +{
 72.1694 +	int irq = vector_to_irq(vector);
 72.1695 +
 72.1696  	unmask_IO_APIC_irq(irq);
 72.1697  }
 72.1698  
 72.1699 +static void set_ioapic_affinity_vector (unsigned int vector,
 72.1700 +					cpumask_t cpu_mask)
 72.1701 +{
 72.1702 +	int irq = vector_to_irq(vector);
 72.1703 +
 72.1704 +	set_ioapic_affinity_irq(irq, cpu_mask);
 72.1705 +}
 72.1706 +#endif
 72.1707 +
 72.1708 +/*
 72.1709 + * Level and edge triggered IO-APIC interrupts need different handling,
 72.1710 + * so we use two separate IRQ descriptors. Edge triggered IRQs can be
 72.1711 + * handled with the level-triggered descriptor, but that one has slightly
 72.1712 + * more overhead. Level-triggered interrupts cannot be handled with the
 72.1713 + * edge-triggered handler, without risking IRQ storms and other ugly
 72.1714 + * races.
 72.1715 + */
 72.1716 +static struct hw_interrupt_type ioapic_edge_type = {
 72.1717 +	.typename 	= "IO-APIC-edge",
 72.1718 +	.startup 	= startup_edge_ioapic,
 72.1719 +	.shutdown 	= shutdown_edge_ioapic,
 72.1720 +	.enable 	= enable_edge_ioapic,
 72.1721 +	.disable 	= disable_edge_ioapic,
 72.1722 +	.ack 		= ack_edge_ioapic,
 72.1723 +	.end 		= end_edge_ioapic,
 72.1724 +	.set_affinity 	= set_ioapic_affinity,
 72.1725 +};
 72.1726 +
 72.1727 +static struct hw_interrupt_type ioapic_level_type = {
 72.1728 +	.typename 	= "IO-APIC-level",
 72.1729 +	.startup 	= startup_level_ioapic,
 72.1730 +	.shutdown 	= shutdown_level_ioapic,
 72.1731 +	.enable 	= enable_level_ioapic,
 72.1732 +	.disable 	= disable_level_ioapic,
 72.1733 +	.ack 		= mask_and_ack_level_ioapic,
 72.1734 +	.end 		= end_level_ioapic,
 72.1735 +	.set_affinity 	= set_ioapic_affinity,
 72.1736 +};
 72.1737 +
 72.1738  static inline void init_IO_APIC_traps(void)
 72.1739  {
 72.1740  	int irq;
 72.1741 @@ -1418,7 +1776,13 @@ static inline void init_IO_APIC_traps(vo
 72.1742  	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
 72.1743  	 */
 72.1744  	for (irq = 0; irq < NR_IRQS ; irq++) {
 72.1745 -		if (IO_APIC_IRQ(irq) && !IO_APIC_VECTOR(irq)) {
 72.1746 +		int tmp = irq;
 72.1747 +		if (use_pci_vector()) {
 72.1748 +			if (!platform_legacy_irq(tmp))
 72.1749 +				if ((tmp = vector_to_irq(tmp)) == -1)
 72.1750 +					continue;
 72.1751 +		}
 72.1752 +		if (IO_APIC_IRQ(tmp) && !IO_APIC_VECTOR(tmp)) {
 72.1753  			/*
 72.1754  			 * Hmm.. We don't have an entry for this,
 72.1755  			 * so default to an old-fashioned 8259
 72.1756 @@ -1457,15 +1821,35 @@ static void ack_lapic_irq (unsigned int 
 72.1757  static void end_lapic_irq (unsigned int i) { /* nothing */ }
 72.1758  
 72.1759  static struct hw_interrupt_type lapic_irq_type = {
 72.1760 -	"local-APIC-edge",
 72.1761 -	NULL, /* startup_irq() not used for IRQ0 */
 72.1762 -	NULL, /* shutdown_irq() not used for IRQ0 */
 72.1763 -	enable_lapic_irq,
 72.1764 -	disable_lapic_irq,
 72.1765 -	ack_lapic_irq,
 72.1766 -	end_lapic_irq
 72.1767 +	.typename 	= "local-APIC-edge",
 72.1768 +	.startup 	= NULL, /* startup_irq() not used for IRQ0 */
 72.1769 +	.shutdown 	= NULL, /* shutdown_irq() not used for IRQ0 */
 72.1770 +	.enable 	= enable_lapic_irq,
 72.1771 +	.disable 	= disable_lapic_irq,
 72.1772 +	.ack 		= ack_lapic_irq,
 72.1773 +	.end 		= end_lapic_irq
 72.1774  };
 72.1775  
 72.1776 +#if 0
 72.1777 +static void setup_nmi (void)
 72.1778 +{
 72.1779 +	/*
 72.1780 + 	 * Dirty trick to enable the NMI watchdog ...
 72.1781 +	 * We put the 8259A master into AEOI mode and
 72.1782 +	 * unmask on all local APICs LVT0 as NMI.
 72.1783 +	 *
 72.1784 +	 * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
 72.1785 +	 * is from Maciej W. Rozycki - so we do not have to EOI from
 72.1786 +	 * the NMI handler or the timer interrupt.
 72.1787 +	 */ 
 72.1788 +	apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
 72.1789 +
 72.1790 +	on_each_cpu(enable_NMI_through_LVT0, NULL, 1, 1);
 72.1791 +
 72.1792 +	apic_printk(APIC_VERBOSE, " done.\n");
 72.1793 +}
 72.1794 +#endif
 72.1795 +
 72.1796  /*
 72.1797   * This looks a bit hackish but it's about the only one way of sending
 72.1798   * a few INTA cycles to 8259As and any associated glue logic.  ICR does
 72.1799 @@ -1536,7 +1920,6 @@ static inline void unlock_ExtINT_logic(v
 72.1800   */
 72.1801  static inline void check_timer(void)
 72.1802  {
 72.1803 -	extern int timer_ack;
 72.1804  	int pin1, pin2;
 72.1805  	int vector;
 72.1806  
 72.1807 @@ -1569,8 +1952,17 @@ static inline void check_timer(void)
 72.1808  		 * Ok, does IRQ0 through the IOAPIC work?
 72.1809  		 */
 72.1810  		unmask_IO_APIC_irq(0);
 72.1811 -		if (timer_irq_works())
 72.1812 +		if (timer_irq_works()) {
 72.1813 +#if 0
 72.1814 +			if (nmi_watchdog == NMI_IO_APIC) {
 72.1815 +				disable_8259A_irq(0);
 72.1816 +				setup_nmi();
 72.1817 +				enable_8259A_irq(0);
 72.1818 +				check_nmi_watchdog();
 72.1819 +			}
 72.1820 +#endif
 72.1821  			return;
 72.1822 +		}
 72.1823  		clear_IO_APIC_pin(0, pin1);
 72.1824  		printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n");
 72.1825  	}
 72.1826 @@ -1588,6 +1980,12 @@ static inline void check_timer(void)
 72.1827  				replace_pin_at_irq(0, 0, pin1, 0, pin2);
 72.1828  			else
 72.1829  				add_pin_to_irq(0, 0, pin2);
 72.1830 +#if 0
 72.1831 +			if (nmi_watchdog == NMI_IO_APIC) {
 72.1832 +				setup_nmi();
 72.1833 +				check_nmi_watchdog();
 72.1834 +			}
 72.1835 +#endif
 72.1836  			return;
 72.1837  		}
 72.1838  		/*
 72.1839 @@ -1597,6 +1995,11 @@ static inline void check_timer(void)
 72.1840  	}
 72.1841  	printk(" failed.\n");
 72.1842  
 72.1843 +	if (nmi_watchdog == NMI_IO_APIC) {
 72.1844 +		printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n");
 72.1845 +		nmi_watchdog = 0;
 72.1846 +	}
 72.1847 +
 72.1848  	printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
 72.1849  
 72.1850  	disable_8259A_irq(0);
 72.1851 @@ -1613,6 +2016,7 @@ static inline void check_timer(void)
 72.1852  
 72.1853  	printk(KERN_INFO "...trying to set up timer as ExtINT IRQ...");
 72.1854  
 72.1855 +	timer_ack = 0;
 72.1856  	init_8259A(0);
 72.1857  	make_8259A_irq(0);
 72.1858  	apic_write_around(APIC_LVT0, APIC_DM_EXTINT);
 72.1859 @@ -1624,31 +2028,40 @@ static inline void check_timer(void)
 72.1860  		return;
 72.1861  	}
 72.1862  	printk(" failed :(.\n");
 72.1863 -	panic("IO-APIC + timer doesn't work! pester mingo@redhat.com");
 72.1864 +	panic("IO-APIC + timer doesn't work!  Boot with apic=debug and send a "
 72.1865 +		"report.  Then try booting with the 'noapic' option");
 72.1866 +}
 72.1867 +
 72.1868 +#define NR_IOAPIC_BIOSIDS 256
 72.1869 +static u8 ioapic_biosid_to_apic_enum[NR_IOAPIC_BIOSIDS];
 72.1870 +static void store_ioapic_biosid_mapping(void)
 72.1871 +{
 72.1872 +    u8 apic;
 72.1873 +    memset(ioapic_biosid_to_apic_enum, ~0, NR_IOAPIC_BIOSIDS);
 72.1874 +    for ( apic = 0; apic < nr_ioapics; apic++ )
 72.1875 +        ioapic_biosid_to_apic_enum[mp_ioapics[apic].mpc_apicid] = apic;
 72.1876  }
 72.1877  
 72.1878  /*
 72.1879   *
 72.1880 - * IRQ's that are handled by the old PIC in all cases:
 72.1881 + * IRQ's that are handled by the PIC in the MPS IOAPIC case.
 72.1882   * - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ.
 72.1883   *   Linux doesn't really care, as it's not actually used
 72.1884   *   for any interrupt handling anyway.
 72.1885 - * - There used to be IRQ13 here as well, but all
 72.1886 - *   MPS-compliant must not use it for FPU coupling and we
 72.1887 - *   want to use exception 16 anyway.  And there are
 72.1888 - *   systems who connect it to an I/O APIC for other uses.
 72.1889 - *   Thus we don't mark it special any longer.
 72.1890 - *
 72.1891 - * Additionally, something is definitely wrong with irq9
 72.1892 - * on PIIX4 boards.
 72.1893   */
 72.1894 -#define PIC_IRQS	(1<<2)
 72.1895 +#define PIC_IRQS	(1 << PIC_CASCADE_IR)
 72.1896  
 72.1897  void __init setup_IO_APIC(void)
 72.1898  {
 72.1899 +	store_ioapic_biosid_mapping();
 72.1900 +
 72.1901  	enable_IO_APIC();
 72.1902  
 72.1903 -	io_apic_irqs = ~PIC_IRQS;
 72.1904 +	if (acpi_ioapic)
 72.1905 +		io_apic_irqs = ~0;	/* all IRQs go through IOAPIC */
 72.1906 +	else
 72.1907 +		io_apic_irqs = ~PIC_IRQS;
 72.1908 +
 72.1909  	printk("ENABLING IO-APIC IRQs\n");
 72.1910  
 72.1911  	/*
 72.1912 @@ -1660,26 +2073,20 @@ void __init setup_IO_APIC(void)
 72.1913  	setup_IO_APIC_irqs();
 72.1914  	init_IO_APIC_traps();
 72.1915  	check_timer();
 72.1916 -	if (!acpi_ioapic)
 72.1917 -		print_IO_APIC();
 72.1918 +	print_IO_APIC();
 72.1919  }
 72.1920  
 72.1921 -#endif /* CONFIG_X86_IO_APIC */
 72.1922 -
 72.1923 -
 72.1924 -
 72.1925  /* --------------------------------------------------------------------------
 72.1926                            ACPI-based IOAPIC Configuration
 72.1927     -------------------------------------------------------------------------- */
 72.1928  
 72.1929  #ifdef CONFIG_ACPI_BOOT
 72.1930  
 72.1931 -#define IO_APIC_MAX_ID		15
 72.1932 -
 72.1933  int __init io_apic_get_unique_id (int ioapic, int apic_id)
 72.1934  {
 72.1935 -	struct IO_APIC_reg_00 reg_00;
 72.1936 -	static unsigned long apic_id_map = 0;
 72.1937 +	union IO_APIC_reg_00 reg_00;
 72.1938 +	static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
 72.1939 +	physid_mask_t tmp;
 72.1940  	unsigned long flags;
 72.1941  	int i = 0;
 72.1942  
 72.1943 @@ -1692,38 +2099,31 @@ int __init io_apic_get_unique_id (int io
 72.1944  	 *      advantage of new APIC bus architecture.
 72.1945  	 */
 72.1946  
 72.1947 -	if (!apic_id_map)
 72.1948 -		apic_id_map = phys_cpu_present_map;
 72.1949 +	if (physids_empty(apic_id_map))
 72.1950 +		apic_id_map = ioapic_phys_id_map(phys_cpu_present_map);
 72.1951  
 72.1952  	spin_lock_irqsave(&ioapic_lock, flags);
 72.1953 -	*(int *)&reg_00 = io_apic_read(ioapic, 0);
 72.1954 +	reg_00.raw = io_apic_read(ioapic, 0);
 72.1955  	spin_unlock_irqrestore(&ioapic_lock, flags);
 72.1956  
 72.1957 -	if (apic_id >= IO_APIC_MAX_ID) {
 72.1958 +	if (apic_id >= get_physical_broadcast()) {
 72.1959  		printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
 72.1960 -			"%d\n", ioapic, apic_id, reg_00.ID);
 72.1961 -		apic_id = reg_00.ID;
 72.1962 -	}
 72.1963 -
 72.1964 -	/* XAPICs do not need unique IDs */
 72.1965 -	if (clustered_apic_mode == CLUSTERED_APIC_XAPIC){
 72.1966 -		printk(KERN_INFO "IOAPIC[%d]: Assigned apic_id %d\n", 
 72.1967 -			ioapic, apic_id);
 72.1968 -		return apic_id;
 72.1969 +			"%d\n", ioapic, apic_id, reg_00.bits.ID);
 72.1970 +		apic_id = reg_00.bits.ID;
 72.1971  	}
 72.1972  
 72.1973  	/*
 72.1974  	 * Every APIC in a system must have a unique ID or we get lots of nice 
 72.1975  	 * 'stuck on smp_invalidate_needed IPI wait' messages.
 72.1976  	 */
 72.1977 -	if (apic_id_map & (1 << apic_id)) {
 72.1978 +	if (check_apicid_used(apic_id_map, apic_id)) {
 72.1979  
 72.1980 -		for (i = 0; i < IO_APIC_MAX_ID; i++) {
 72.1981 -			if (!(apic_id_map & (1 << i)))
 72.1982 +		for (i = 0; i < get_physical_broadcast(); i++) {
 72.1983 +			if (!check_apicid_used(apic_id_map, i))
 72.1984  				break;
 72.1985  		}
 72.1986  
 72.1987 -		if (i == IO_APIC_MAX_ID)
 72.1988 +		if (i == get_physical_broadcast())
 72.1989  			panic("Max apic_id exceeded!\n");
 72.1990  
 72.1991  		printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
 72.1992 @@ -1732,22 +2132,24 @@ int __init io_apic_get_unique_id (int io
 72.1993  		apic_id = i;
 72.1994  	} 
 72.1995  
 72.1996 -	apic_id_map |= (1 << apic_id);
 72.1997 +	tmp = apicid_to_cpu_present(apic_id);
 72.1998 +	physids_or(apic_id_map, apic_id_map, tmp);
 72.1999  
 72.2000 -	if (reg_00.ID != apic_id) {
 72.2001 -		reg_00.ID = apic_id;
 72.2002 +	if (reg_00.bits.ID != apic_id) {
 72.2003 +		reg_00.bits.ID = apic_id;
 72.2004  
 72.2005  		spin_lock_irqsave(&ioapic_lock, flags);
 72.2006 -		io_apic_write(ioapic, 0, *(int *)&reg_00);
 72.2007 -		*(int *)&reg_00 = io_apic_read(ioapic, 0);
 72.2008 +		io_apic_write(ioapic, 0, reg_00.raw);
 72.2009 +		reg_00.raw = io_apic_read(ioapic, 0);
 72.2010  		spin_unlock_irqrestore(&ioapic_lock, flags);
 72.2011  
 72.2012  		/* Sanity check */
 72.2013 -		if (reg_00.ID != apic_id)
 72.2014 +		if (reg_00.bits.ID != apic_id)
 72.2015  			panic("IOAPIC[%d]: Unable change apic_id!\n", ioapic);
 72.2016  	}
 72.2017  
 72.2018 -	printk(KERN_INFO "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
 72.2019 +	apic_printk(APIC_VERBOSE, KERN_INFO
 72.2020 +			"IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
 72.2021  
 72.2022  	return apic_id;
 72.2023  }
 72.2024 @@ -1755,27 +2157,27 @@ int __init io_apic_get_unique_id (int io
 72.2025  
 72.2026  int __init io_apic_get_version (int ioapic)
 72.2027  {
 72.2028 -	struct IO_APIC_reg_01	reg_01;
 72.2029 +	union IO_APIC_reg_01	reg_01;
 72.2030  	unsigned long flags;
 72.2031  
 72.2032  	spin_lock_irqsave(&ioapic_lock, flags);
 72.2033 -	*(int *)&reg_01 = io_apic_read(ioapic, 1);
 72.2034 +	reg_01.raw = io_apic_read(ioapic, 1);
 72.2035  	spin_unlock_irqrestore(&ioapic_lock, flags);
 72.2036  
 72.2037 -	return reg_01.version;
 72.2038 +	return reg_01.bits.version;
 72.2039  }
 72.2040  
 72.2041  
 72.2042  int __init io_apic_get_redir_entries (int ioapic)
 72.2043  {
 72.2044 -	struct IO_APIC_reg_01	reg_01;
 72.2045 +	union IO_APIC_reg_01	reg_01;
 72.2046  	unsigned long flags;
 72.2047  
 72.2048  	spin_lock_irqsave(&ioapic_lock, flags);
 72.2049 -	*(int *)&reg_01 = io_apic_read(ioapic, 1);
 72.2050 +	reg_01.raw = io_apic_read(ioapic, 1);
 72.2051  	spin_unlock_irqrestore(&ioapic_lock, flags);
 72.2052  
 72.2053 -	return reg_01.entries;
 72.2054 +	return reg_01.bits.entries;
 72.2055  }
 72.2056  
 72.2057  
 72.2058 @@ -1785,7 +2187,7 @@ int io_apic_set_pci_routing (int ioapic,
 72.2059  	unsigned long flags;
 72.2060  
 72.2061  	if (!IO_APIC_IRQ(irq)) {
 72.2062 -		printk(KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0/n", 
 72.2063 +		printk(KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
 72.2064  			ioapic);
 72.2065  		return -EINVAL;
 72.2066  	}
 72.2067 @@ -1798,12 +2200,12 @@ int io_apic_set_pci_routing (int ioapic,
 72.2068  
 72.2069  	memset(&entry,0,sizeof(entry));
 72.2070  
 72.2071 -	entry.delivery_mode = dest_LowestPrio;
 72.2072 -	entry.dest_mode = INT_DELIVERY_MODE;
 72.2073 -	entry.dest.logical.logical_dest = target_cpus();
 72.2074 -	entry.mask = 1;					 /* Disabled (masked) */
 72.2075 +	entry.delivery_mode = INT_DELIVERY_MODE;
 72.2076 +	entry.dest_mode = INT_DEST_MODE;
 72.2077 +	entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
 72.2078  	entry.trigger = edge_level;
 72.2079  	entry.polarity = active_high_low;
 72.2080 +	entry.mask  = 1;
 72.2081  
 72.2082  	/*
 72.2083  	 * IRQs < 16 are already in the irq_2_pin[] map
 72.2084 @@ -1813,17 +2215,12 @@ int io_apic_set_pci_routing (int ioapic,
 72.2085  
 72.2086  	entry.vector = assign_irq_vector(irq);
 72.2087  
 72.2088 -	printk(KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry (%d-%d -> 0x%x -> "
 72.2089 -		"IRQ %d Mode:%i Active:%i)\n", ioapic,
 72.2090 -		mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq, edge_level, active_high_low);
 72.2091 +	apic_printk(APIC_DEBUG, KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry "
 72.2092 +		"(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", ioapic,
 72.2093 +		mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq,
 72.2094 +		edge_level, active_high_low);
 72.2095  
 72.2096 -	if (edge_level) {
 72.2097 -		irq_desc[irq].handler = &ioapic_level_irq_type;
 72.2098 -	} else {
 72.2099 -		irq_desc[irq].handler = &ioapic_edge_irq_type;
 72.2100 -	}
 72.2101 -
 72.2102 -	set_intr_gate(entry.vector, interrupt[irq]);
 72.2103 +	ioapic_register_intr(irq, entry.vector, edge_level);
 72.2104  
 72.2105  	if (!ioapic && (irq < 16))
 72.2106  		disable_8259A_irq(irq);
 72.2107 @@ -1838,114 +2235,82 @@ int io_apic_set_pci_routing (int ioapic,
 72.2108  
 72.2109  #endif /*CONFIG_ACPI_BOOT*/
 72.2110  
 72.2111 -/* opt_leveltrigger, opt_edgetrigger: Force an IO-APIC-routed IRQ to be */
 72.2112 -/*                                    level- or edge-triggered.         */
 72.2113 -/* Example: 'leveltrigger=4,5,6,20 edgetrigger=21'. */
 72.2114 -static char opt_leveltrigger[30] = "", opt_edgetrigger[30] = "";
 72.2115 -string_param("leveltrigger", opt_leveltrigger);
 72.2116 -string_param("edgetrigger", opt_edgetrigger);
 72.2117 -
 72.2118 -static int __init ioapic_trigger_setup(void)
 72.2119 -{
 72.2120 -    char       *p;
 72.2121 -    irq_desc_t *desc;
 72.2122 -    long        irq;
 72.2123 -
 72.2124 -    p = opt_leveltrigger;
 72.2125 -    while ( *p != '\0' )
 72.2126 -    {
 72.2127 -        irq = simple_strtol(p, &p, 10);
 72.2128 -        if ( (irq <= 0) || (irq >= NR_IRQS) )
 72.2129 -        {
 72.2130 -            printk("IRQ '%ld' out of range in level-trigger list '%s'\n",
 72.2131 -                   irq, opt_leveltrigger);
 72.2132 -            break;
 72.2133 -        }
 72.2134 -
 72.2135 -        printk("Forcing IRQ %ld to level-trigger: ", irq);
 72.2136 -
 72.2137 -        desc = &irq_desc[irq];
 72.2138 -        spin_lock_irq(&desc->lock);
 72.2139  
 72.2140 -        if ( desc->handler == &ioapic_level_irq_type )
 72.2141 -        {
 72.2142 -            printk("already level-triggered (no force applied).\n");
 72.2143 -        }
 72.2144 -        else if ( desc->handler != &ioapic_edge_irq_type )
 72.2145 -        {
 72.2146 -            printk("cannot force (can only force IO-APIC-edge IRQs).\n");
 72.2147 -        }
 72.2148 -        else
 72.2149 -        {
 72.2150 -            desc->handler = &ioapic_level_irq_type;
 72.2151 -            __mask_IO_APIC_irq(irq);
 72.2152 -            __level_IO_APIC_irq(irq);        
 72.2153 -            printk("done.\n");
 72.2154 -        }
 72.2155 +int ioapic_guest_read(int apicid, int address, u32 *pval)
 72.2156 +{
 72.2157 +    u32 val;
 72.2158 +    int apicenum;
 72.2159 +    union IO_APIC_reg_00 reg_00;
 72.2160 +    unsigned long flags;
 72.2161  
 72.2162 -        spin_unlock_irq(&desc->lock);
 72.2163 -
 72.2164 -        if ( *p == '\0' )
 72.2165 -            break;
 72.2166 +    if ( (apicid >= NR_IOAPIC_BIOSIDS) ||
 72.2167 +         ((apicenum = ioapic_biosid_to_apic_enum[apicid]) >= nr_ioapics) )
 72.2168 +            return -EINVAL;
 72.2169  
 72.2170 -        if ( *p != ',' )
 72.2171 -        {
 72.2172 -            printk("Unexpected character '%c' in level-trigger list '%s'\n",
 72.2173 -                   *p, opt_leveltrigger);
 72.2174 -            break;
 72.2175 -        }
 72.2176 +    spin_lock_irqsave(&ioapic_lock, flags);
 72.2177 +    val = io_apic_read(apicenum, address);
 72.2178 +    spin_unlock_irqrestore(&ioapic_lock, flags);
 72.2179  
 72.2180 -        p++;
 72.2181 +    /* Rewrite APIC ID to what the BIOS originally specified. */
 72.2182 +    if ( address == 0 )
 72.2183 +    {
 72.2184 +        reg_00.raw = val;
 72.2185 +        reg_00.bits.ID = apicid;
 72.2186 +        val = reg_00.raw;
 72.2187      }
 72.2188  
 72.2189 -    p = opt_edgetrigger;
 72.2190 -    while ( *p != '\0' )
 72.2191 +    *pval = val;
 72.2192 +    return 0;
 72.2193 +}
 72.2194 +
 72.2195 +int ioapic_guest_write(int apicid, int address, u32 val)
 72.2196 +{
 72.2197 +    int apicenum, pin, irq;
 72.2198 +    struct IO_APIC_route_entry rte = { 0 };
 72.2199 +    struct irq_pin_list *entry;
 72.2200 +    unsigned long flags;
 72.2201 +
 72.2202 +    if ( (apicid >= NR_IOAPIC_BIOSIDS) ||
 72.2203 +         ((apicenum = ioapic_biosid_to_apic_enum[apicid]) >= nr_ioapics) )
 72.2204 +            return -EINVAL;
 72.2205 +
 72.2206 +    /* Only write to the first half of a route entry. */
 72.2207 +    if ( (address < 0x10) || (address & 1) )
 72.2208 +        return 0;
 72.2209 +    
 72.2210 +    pin = (address - 0x10) >> 1;
 72.2211 +
 72.2212 +    rte.dest.logical.logical_dest = target_cpus();
 72.2213 +    *(int *)&rte = val;
 72.2214 +
 72.2215 +    if ( rte.vector >= FIRST_DEVICE_VECTOR )
 72.2216      {
 72.2217 -        irq = simple_strtol(p, &p, 10);
 72.2218 -        if ( (irq <= 0) || (irq >= NR_IRQS) )
 72.2219 -        {
 72.2220 -            printk("IRQ '%ld' out of range in edge-trigger list '%s'\n",
 72.2221 -                   irq, opt_edgetrigger);
 72.2222 -            break;
 72.2223 -        }
 72.2224 +        /* Is there a valid irq mapped to this vector? */
 72.2225 +        irq = vector_irq[rte.vector];
 72.2226 +        if ( !IO_APIC_IRQ(irq) )
 72.2227 +            return 0;
 72.2228  
 72.2229 -        printk("Forcing IRQ %ld to edge-trigger: ", irq);
 72.2230 -
 72.2231 -        desc = &irq_desc[irq];
 72.2232 -        spin_lock_irq(&desc->lock);
 72.2233 +        /* Set the correct irq-handling type. */
 72.2234 +        irq_desc[irq].handler = rte.trigger ? 
 72.2235 +            &ioapic_level_type: &ioapic_edge_type;
 72.2236  
 72.2237 -        if ( desc->handler == &ioapic_edge_irq_type )
 72.2238 -        {
 72.2239 -            printk("already edge-triggered (no force applied).\n");
 72.2240 -        }
 72.2241 -        else if ( desc->handler != &ioapic_level_irq_type )
 72.2242 +        /* Record the pin<->irq mapping. */
 72.2243 +        for ( entry = &irq_2_pin[irq]; ; entry = &irq_2_pin[entry->next] )
 72.2244          {
 72.2245 -            printk("cannot force (can only force IO-APIC-level IRQs).\n");
 72.2246 +            if ( (entry->apic == apicenum) && (entry->pin == pin) )
 72.2247 +                break;
 72.2248 +            if ( !entry->next )
 72.2249 +            {
 72.2250 +                add_pin_to_irq(irq, apicenum, pin);
 72.2251 +                break;
 72.2252 +            }
 72.2253          }
 72.2254 -        else
 72.2255 -        {
 72.2256 -            desc->handler = &ioapic_edge_irq_type;
 72.2257 -            __edge_IO_APIC_irq(irq);        
 72.2258 -            desc->status |= IRQ_PENDING; /* may have lost a masked edge */
 72.2259 -            printk("done.\n");
 72.2260 -        }
 72.2261 -
 72.2262 -        spin_unlock_irq(&desc->lock);
 72.2263 +    }
 72.2264  
 72.2265 -        if ( *p == '\0' )
 72.2266 -            break;
 72.2267 -
 72.2268 -        if ( *p != ',' )
 72.2269 -        {
 72.2270 -            printk("Unexpected character '%c' in edge-trigger list '%s'\n",
 72.2271 -                   *p, opt_edgetrigger);
 72.2272 -            break;
 72.2273 -        }
 72.2274 -
 72.2275 -        p++;
 72.2276 -    }
 72.2277 +    spin_lock_irqsave(&ioapic_lock, flags);
 72.2278 +    io_apic_write(apicenum, 0x10 + 2 * pin, *(((int *)&rte) + 0));
 72.2279 +    io_apic_write(apicenum, 0x11 + 2 * pin, *(((int *)&rte) + 1));
 72.2280 +    spin_unlock_irqrestore(&ioapic_lock, flags);
 72.2281  
 72.2282      return 0;
 72.2283  }
 72.2284 -
 72.2285 -__initcall(ioapic_trigger_setup);
    73.1 --- a/xen/arch/x86/irq.c	Tue May 03 14:42:00 2005 +0000
    73.2 +++ b/xen/arch/x86/irq.c	Tue May 10 08:49:59 2005 +0000
    73.3 @@ -42,7 +42,6 @@ struct hw_interrupt_type no_irq_type = {
    73.4  };
    73.5  
    73.6  atomic_t irq_err_count;
    73.7 -atomic_t irq_mis_count;
    73.8  
    73.9  inline void disable_irq_nosync(unsigned int irq)
   73.10  {
    74.1 --- a/xen/arch/x86/microcode.c	Tue May 03 14:42:00 2005 +0000
    74.2 +++ b/xen/arch/x86/microcode.c	Tue May 10 08:49:59 2005 +0000
    74.3 @@ -87,13 +87,6 @@
    74.4  #define vmalloc(_s) xmalloc_bytes(_s)
    74.5  #define vfree(_p) xfree(_p)
    74.6  #define num_online_cpus() smp_num_cpus
    74.7 -static inline int on_each_cpu(
    74.8 -    void (*func) (void *info), void *info, int retry, int wait)
    74.9 -{
   74.10 -    int ret = smp_call_function(func, info, retry, wait);
   74.11 -    func(info);
   74.12 -    return ret;
   74.13 -}
   74.14  
   74.15  #if 0
   74.16  MODULE_DESCRIPTION("Intel CPU (IA-32) Microcode Update Driver");
    75.1 --- a/xen/arch/x86/mm.c	Tue May 03 14:42:00 2005 +0000
    75.2 +++ b/xen/arch/x86/mm.c	Tue May 10 08:49:59 2005 +0000
    75.3 @@ -316,7 +316,7 @@ int map_ldt_shadow_page(unsigned int off
    75.4  
    75.5      res = get_page_and_type(&frame_table[gmfn], d, PGT_ldt_page);
    75.6  
    75.7 -    if ( !res && unlikely(shadow_mode_enabled(d)) )
    75.8 +    if ( !res && unlikely(shadow_mode_refcounts(d)) )
    75.9      {
   75.10          shadow_lock(d);
   75.11          shadow_remove_all_write_access(d, gpfn, gmfn);
   75.12 @@ -392,7 +392,7 @@ get_linear_pagetable(
   75.13      struct pfn_info *page;
   75.14      unsigned long pfn;
   75.15  
   75.16 -    ASSERT( !shadow_mode_enabled(d) );
   75.17 +    ASSERT( !shadow_mode_refcounts(d) );
   75.18  
   75.19      if ( (root_get_flags(re) & _PAGE_RW) )
   75.20      {
   75.21 @@ -482,7 +482,7 @@ get_page_from_l2e(
   75.22  {
   75.23      int rc;
   75.24  
   75.25 -    ASSERT(!shadow_mode_enabled(d));
   75.26 +    ASSERT(!shadow_mode_refcounts(d));
   75.27  
   75.28      if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
   75.29          return 1;
   75.30 @@ -512,6 +512,8 @@ static int
   75.31  get_page_from_l3e(
   75.32      l3_pgentry_t l3e, unsigned long pfn, struct domain *d)
   75.33  {
   75.34 +    ASSERT( !shadow_mode_refcounts(d) );
   75.35 +
   75.36      if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
   75.37          return 1;
   75.38  
   75.39 @@ -533,6 +535,8 @@ get_page_from_l4e(
   75.40  {
   75.41      int rc;
   75.42  
   75.43 +    ASSERT( !shadow_mode_refcounts(d) );
   75.44 +
   75.45      if ( !(l4e_get_flags(l4e) & _PAGE_PRESENT) )
   75.46          return 1;
   75.47  
   75.48 @@ -641,7 +645,7 @@ static int alloc_l1_table(struct pfn_inf
   75.49      l1_pgentry_t  *pl1e;
   75.50      int            i;
   75.51  
   75.52 -    ASSERT(!shadow_mode_enabled(d));
   75.53 +    ASSERT(!shadow_mode_refcounts(d));
   75.54  
   75.55      pl1e = map_domain_mem(pfn << PAGE_SHIFT);
   75.56  
   75.57 @@ -670,10 +674,12 @@ static int alloc_l2_table(struct pfn_inf
   75.58      l2_pgentry_t  *pl2e;
   75.59      int            i;
   75.60  
   75.61 +    // See the code in shadow_promote() to understand why this is here...
   75.62      if ( (PGT_base_page_table == PGT_l2_page_table) &&
   75.63 -         shadow_mode_enabled(d) )
   75.64 +         unlikely(shadow_mode_refcounts(d)) )
   75.65          return 1;
   75.66 -    ASSERT( !shadow_mode_enabled(d) );
   75.67 +
   75.68 +    ASSERT( !shadow_mode_refcounts(d) );
   75.69     
   75.70      pl2e = map_domain_mem(pfn << PAGE_SHIFT);
   75.71  
   75.72 @@ -716,7 +722,7 @@ static int alloc_l3_table(struct pfn_inf
   75.73      l3_pgentry_t  *pl3e = page_to_virt(page);
   75.74      int            i;
   75.75  
   75.76 -    ASSERT( !shadow_mode_enabled(d) );
   75.77 +    ASSERT( !shadow_mode_refcounts(d) );
   75.78  
   75.79      for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
   75.80          if ( is_guest_l3_slot(i) &&
   75.81 @@ -741,10 +747,12 @@ static int alloc_l4_table(struct pfn_inf
   75.82      l4_pgentry_t  *pl4e = page_to_virt(page);
   75.83      int            i;
   75.84  
   75.85 +    // See the code in shadow_promote() to understand why this is here...
   75.86      if ( (PGT_base_page_table == PGT_l4_page_table) &&
   75.87 -         shadow_mode_enabled(d) )
   75.88 +         shadow_mode_refcounts(d) )
   75.89          return 1;
   75.90 -    ASSERT( !shadow_mode_enabled(d) );
   75.91 +
   75.92 +    ASSERT( !shadow_mode_refcounts(d) );
   75.93  
   75.94      for ( i = 0; i < L4_PAGETABLE_ENTRIES; i++ )
   75.95          if ( is_guest_l4_slot(i) &&
   75.96 @@ -861,11 +869,12 @@ static int mod_l1_entry(l1_pgentry_t *pl
   75.97      l1_pgentry_t ol1e;
   75.98      struct domain *d = current->domain;
   75.99  
  75.100 -    ASSERT( !shadow_mode_enabled(d) );
  75.101 -
  75.102      if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) )
  75.103          return 0;
  75.104  
  75.105 +    if ( unlikely(shadow_mode_refcounts(d)) )
  75.106 +        return update_l1e(pl1e, ol1e, nl1e);
  75.107 +
  75.108      if ( l1e_get_flags(nl1e) & _PAGE_PRESENT )
  75.109      {
  75.110          if ( unlikely(l1e_get_flags(nl1e) & L1_DISALLOW_MASK) )
  75.111 @@ -893,7 +902,7 @@ static int mod_l1_entry(l1_pgentry_t *pl
  75.112          if ( unlikely(!update_l1e(pl1e, ol1e, nl1e)) )
  75.113              return 0;
  75.114      }
  75.115 -    
  75.116 +
  75.117      put_page_from_l1e(ol1e, d);
  75.118      return 1;
  75.119  }
  75.120 @@ -1095,8 +1104,19 @@ int alloc_page_type(struct pfn_info *pag
  75.121  void free_page_type(struct pfn_info *page, unsigned int type)
  75.122  {
  75.123      struct domain *owner = page_get_owner(page);
  75.124 -    if ( likely(owner != NULL) && unlikely(shadow_mode_enabled(owner)) )
  75.125 -        return;
  75.126 +    unsigned long gpfn;
  75.127 +
  75.128 +    if ( owner != NULL )
  75.129 +    {
  75.130 +        if ( unlikely(shadow_mode_refcounts(owner)) )
  75.131 +            return;
  75.132 +        if ( unlikely(shadow_mode_enabled(owner)) )
  75.133 +        {
  75.134 +            gpfn = __mfn_to_gpfn(owner, page_to_pfn(page));
  75.135 +            ASSERT(VALID_M2P(gpfn));
  75.136 +            remove_shadow(owner, gpfn, type);
  75.137 +        }
  75.138 +    }
  75.139  
  75.140      switch ( type )
  75.141      {
  75.142 @@ -1142,7 +1162,7 @@ void put_page_type(struct pfn_info *page
  75.143           * See domain.c:relinquish_list().
  75.144           */
  75.145          ASSERT((x & PGT_validated) || 
  75.146 -               test_bit(DF_DYING, &page_get_owner(page)->d_flags));
  75.147 +               test_bit(DF_DYING, &page_get_owner(page)->flags));
  75.148  
  75.149          if ( unlikely((nx & PGT_count_mask) == 0) )
  75.150          {
  75.151 @@ -1287,7 +1307,7 @@ int new_guest_cr3(unsigned long mfn)
  75.152      int okay;
  75.153      unsigned long old_base_mfn;
  75.154  
  75.155 -    if ( shadow_mode_enabled(d) )
  75.156 +    if ( shadow_mode_refcounts(d) )
  75.157          okay = get_page_from_pagenr(mfn, d);
  75.158      else
  75.159          okay = get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d);
  75.160 @@ -1296,24 +1316,24 @@ int new_guest_cr3(unsigned long mfn)
  75.161      {
  75.162          invalidate_shadow_ldt(ed);
  75.163  
  75.164 -        old_base_mfn = pagetable_val(ed->arch.guest_table) >> PAGE_SHIFT;
  75.165 +        old_base_mfn = pagetable_get_pfn(ed->arch.guest_table);
  75.166          ed->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT);
  75.167          update_pagetables(ed); /* update shadow_table and monitor_table */
  75.168  
  75.169          write_ptbase(ed);
  75.170  
  75.171 -        if ( shadow_mode_enabled(d) )
  75.172 +        if ( shadow_mode_refcounts(d) )
  75.173              put_page(&frame_table[old_base_mfn]);
  75.174          else
  75.175              put_page_and_type(&frame_table[old_base_mfn]);
  75.176  
  75.177 -        /* CR3 holds its own ref to its shadow. */
  75.178 +        /* CR3 also holds a ref to its shadow... */
  75.179          if ( shadow_mode_enabled(d) )
  75.180          {
  75.181              if ( ed->arch.monitor_shadow_ref )
  75.182                  put_shadow_ref(ed->arch.monitor_shadow_ref);
  75.183              ed->arch.monitor_shadow_ref =
  75.184 -                pagetable_val(ed->arch.monitor_table) >> PAGE_SHIFT;
  75.185 +                pagetable_get_pfn(ed->arch.monitor_table);
  75.186              ASSERT(!page_get_owner(&frame_table[ed->arch.monitor_shadow_ref]));
  75.187              get_shadow_ref(ed->arch.monitor_shadow_ref);
  75.188          }
  75.189 @@ -1486,7 +1506,7 @@ int do_mmuext_op(
  75.190              type = PGT_l1_page_table | PGT_va_mutable;
  75.191  
  75.192          pin_page:
  75.193 -            if ( shadow_mode_enabled(FOREIGNDOM) )
  75.194 +            if ( shadow_mode_refcounts(FOREIGNDOM) )
  75.195                  type = PGT_writable_page;
  75.196  
  75.197              okay = get_page_and_type_from_pagenr(op.mfn, type, FOREIGNDOM);
  75.198