ia64/xen-unstable

changeset 4808:487de0451d2b

bitkeeper revision 1.1389.1.36 (428075b7XUww-l4YTG96au0lbtOO_g)

Merge firebug.cl.cam.ac.uk:/auto/groups/xeno-xenod/BK/xen-unstable.bk
into firebug.cl.cam.ac.uk:/local/scratch/cl349/xen-unstable.bk
author cl349@firebug.cl.cam.ac.uk[cl349]
date Tue May 10 08:49:59 2005 +0000 (2005-05-10)
parents 9c9be1b0e3b1 f470118a979e
children 754aa05abf61
files .rootkeys BitKeeper/etc/ignore BitKeeper/etc/logging_ok docs/misc/sedf_scheduler_mini-HOWTO.txt freebsd-5.3-xen-sparse/i386-xen/i386-xen/clock.c freebsd-5.3-xen-sparse/i386-xen/i386-xen/evtchn.c freebsd-5.3-xen-sparse/i386-xen/i386-xen/machdep.c freebsd-5.3-xen-sparse/i386-xen/i386-xen/mp_machdep.c freebsd-5.3-xen-sparse/i386-xen/i386-xen/pmap.c freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_machdep.c freebsd-5.3-xen-sparse/i386-xen/include/hypervisor.h freebsd-5.3-xen-sparse/i386-xen/include/pcpu.h freebsd-5.3-xen-sparse/i386-xen/include/pmap.h freebsd-5.3-xen-sparse/i386-xen/include/xenfunc.h linux-2.4.29-xen-sparse/mkbuildtree linux-2.6.11-xen-sparse/arch/xen/Kconfig.drivers linux-2.6.11-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32 linux-2.6.11-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32 linux-2.6.11-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64 linux-2.6.11-xen-sparse/arch/xen/i386/Kconfig linux-2.6.11-xen-sparse/arch/xen/i386/kernel/Makefile linux-2.6.11-xen-sparse/arch/xen/i386/kernel/acpi/Makefile linux-2.6.11-xen-sparse/arch/xen/i386/kernel/acpi/boot.c linux-2.6.11-xen-sparse/arch/xen/i386/kernel/apic.c linux-2.6.11-xen-sparse/arch/xen/i386/kernel/io_apic.c linux-2.6.11-xen-sparse/arch/xen/i386/kernel/mpparse.c linux-2.6.11-xen-sparse/arch/xen/i386/kernel/setup.c linux-2.6.11-xen-sparse/arch/xen/i386/mm/init.c linux-2.6.11-xen-sparse/arch/xen/i386/mm/ioremap.c linux-2.6.11-xen-sparse/arch/xen/i386/mm/pgtable.c linux-2.6.11-xen-sparse/arch/xen/i386/pci/Makefile linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c linux-2.6.11-xen-sparse/arch/xen/kernel/reboot.c linux-2.6.11-xen-sparse/arch/xen/x86_64/mm/init.c linux-2.6.11-xen-sparse/arch/xen/x86_64/mm/ioremap.c linux-2.6.11-xen-sparse/arch/xen/x86_64/pci/Makefile linux-2.6.11-xen-sparse/drivers/acpi/tables.c linux-2.6.11-xen-sparse/drivers/xen/Makefile linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c linux-2.6.11-xen-sparse/drivers/xen/privcmd/privcmd.c linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/fixmap.h linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/io_apic.h linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/fixmap.h linux-2.6.11-xen-sparse/mkbuildtree tools/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/linux-xen-low.c tools/libxc/Makefile tools/libxc/xc.h tools/libxc/xc_domain.c tools/libxc/xc_physdev.c tools/libxc/xc_sedf.c tools/libxutil/sxpr_parser.h tools/python/xen/lowlevel/xc/xc.c tools/python/xen/xend/XendClient.py tools/python/xen/xend/XendDomain.py tools/python/xen/xend/server/SrvDomain.py tools/python/xen/xm/main.py xen/arch/ia64/domain.c xen/arch/ia64/idle0_task.c xen/arch/ia64/xenmisc.c xen/arch/ia64/xensetup.c xen/arch/x86/Makefile xen/arch/x86/Rules.mk xen/arch/x86/acpi.c xen/arch/x86/acpi/boot.c xen/arch/x86/apic.c xen/arch/x86/audit.c xen/arch/x86/dom0_ops.c xen/arch/x86/domain.c xen/arch/x86/domain_build.c xen/arch/x86/i387.c xen/arch/x86/i8259.c xen/arch/x86/idle0_task.c xen/arch/x86/io_apic.c xen/arch/x86/irq.c xen/arch/x86/microcode.c xen/arch/x86/mm.c xen/arch/x86/mpparse.c xen/arch/x86/mtrr/generic.c xen/arch/x86/mtrr/main.c xen/arch/x86/nmi.c xen/arch/x86/pci-irq.c xen/arch/x86/pci-pc.c xen/arch/x86/pci-x86.c xen/arch/x86/pci-x86.h xen/arch/x86/physdev.c xen/arch/x86/setup.c xen/arch/x86/shadow.c xen/arch/x86/smpboot.c xen/arch/x86/string.c xen/arch/x86/traps.c xen/arch/x86/vmx.c xen/arch/x86/x86_32/asm-offsets.c xen/arch/x86/x86_32/mm.c xen/arch/x86/x86_32/traps.c xen/arch/x86/x86_64/mm.c xen/common/Makefile xen/common/bitmap.c xen/common/dom0_ops.c xen/common/domain.c xen/common/event_channel.c xen/common/keyhandler.c xen/common/page_alloc.c xen/common/physdev.c xen/common/sched_bvt.c xen/common/sched_sedf.c xen/common/schedule.c xen/common/xmalloc.c xen/drivers/Makefile xen/drivers/acpi/Makefile xen/drivers/acpi/acpi_ksyms.c xen/drivers/acpi/tables.c xen/drivers/char/console.c xen/drivers/char/serial.c xen/drivers/pci/Makefile xen/drivers/pci/compat.c xen/drivers/pci/gen-devlist.c xen/drivers/pci/names.c xen/drivers/pci/pci.c xen/drivers/pci/pci.ids xen/drivers/pci/quirks.c xen/drivers/pci/setup-res.c xen/include/acpi/acconfig.h xen/include/acpi/acdebug.h xen/include/acpi/acdisasm.h xen/include/acpi/acdispat.h xen/include/acpi/acevents.h xen/include/acpi/acexcep.h xen/include/acpi/acglobal.h xen/include/acpi/achware.h xen/include/acpi/acinterp.h xen/include/acpi/aclocal.h xen/include/acpi/acmacros.h xen/include/acpi/acnamesp.h xen/include/acpi/acobject.h xen/include/acpi/acoutput.h xen/include/acpi/acparser.h xen/include/acpi/acpi.h xen/include/acpi/acpi_bus.h xen/include/acpi/acpi_drivers.h xen/include/acpi/acpiosxf.h xen/include/acpi/acpixf.h xen/include/acpi/acresrc.h xen/include/acpi/acstruct.h xen/include/acpi/actables.h xen/include/acpi/actbl.h xen/include/acpi/actbl1.h xen/include/acpi/actbl2.h xen/include/acpi/actbl71.h xen/include/acpi/actypes.h xen/include/acpi/acutils.h xen/include/acpi/amlcode.h xen/include/acpi/amlresrc.h xen/include/acpi/platform/acenv.h xen/include/acpi/platform/acgcc.h xen/include/acpi/platform/aclinux.h xen/include/asm-ia64/config.h xen/include/asm-ia64/serial.h xen/include/asm-x86/apic.h xen/include/asm-x86/apicdef.h xen/include/asm-x86/config.h xen/include/asm-x86/debugger.h xen/include/asm-x86/domain.h xen/include/asm-x86/fixmap.h xen/include/asm-x86/i387.h xen/include/asm-x86/io_apic.h xen/include/asm-x86/io_ports.h xen/include/asm-x86/irq.h xen/include/asm-x86/mach-default/apm.h xen/include/asm-x86/mach-default/bios_ebda.h xen/include/asm-x86/mach-default/do_timer.h xen/include/asm-x86/mach-default/entry_arch.h xen/include/asm-x86/mach-default/io_ports.h xen/include/asm-x86/mach-default/irq_vectors.h xen/include/asm-x86/mach-default/irq_vectors_limits.h xen/include/asm-x86/mach-default/mach_apic.h xen/include/asm-x86/mach-default/mach_apicdef.h xen/include/asm-x86/mach-default/mach_ipi.h xen/include/asm-x86/mach-default/mach_mpparse.h xen/include/asm-x86/mach-default/mach_mpspec.h xen/include/asm-x86/mach-default/mach_reboot.h xen/include/asm-x86/mach-default/mach_time.h xen/include/asm-x86/mach-default/mach_timer.h xen/include/asm-x86/mach-default/mach_traps.h xen/include/asm-x86/mach-default/mach_wakecpu.h xen/include/asm-x86/mach-default/pci-functions.h xen/include/asm-x86/mach-default/setup_arch_post.h xen/include/asm-x86/mach-default/setup_arch_pre.h xen/include/asm-x86/mach-default/smpboot_hooks.h xen/include/asm-x86/mach_apic.h xen/include/asm-x86/mm.h xen/include/asm-x86/mpspec.h xen/include/asm-x86/mpspec_def.h xen/include/asm-x86/page.h xen/include/asm-x86/physdev.h xen/include/asm-x86/processor.h xen/include/asm-x86/shadow.h xen/include/asm-x86/smp.h xen/include/asm-x86/smpboot.h xen/include/asm-x86/system.h xen/include/asm-x86/time.h xen/include/asm-x86/x86_32/domain_page.h xen/include/asm-x86/x86_64/string.h xen/include/public/dom0_ops.h xen/include/public/physdev.h xen/include/public/sched_ctl.h xen/include/xen/acpi.h xen/include/xen/adv_sched_hist.h xen/include/xen/bitmap.h xen/include/xen/bitops.h xen/include/xen/config.h xen/include/xen/cpumask.h xen/include/xen/event.h xen/include/xen/irq.h xen/include/xen/lib.h xen/include/xen/perfc_defn.h xen/include/xen/physdev.h xen/include/xen/sched-if.h xen/include/xen/sched.h xen/include/xen/serial.h xen/include/xen/slab.h xen/include/xen/smp.h xen/include/xen/spinlock.h xen/include/xen/types.h
line diff
     1.1 --- a/.rootkeys	Tue May 03 14:42:00 2005 +0000
     1.2 +++ b/.rootkeys	Tue May 10 08:49:59 2005 +0000
     1.3 @@ -21,6 +21,7 @@ 4022a73cgxX1ryj1HgS-IwwB6NUi2A docs/misc
     1.4  412f4bd9sm5mCQ8BkrgKcAKZGadq7Q docs/misc/blkif-drivers-explained.txt
     1.5  420b949cy9ZGzED74Fz_DaWlK7tT4g docs/misc/crashdb.txt
     1.6  4251a1f82AexscYEiF4Iku8Gc_kWfQ docs/misc/grant-tables.txt
     1.7 +424d462b5GuApQ_NyMsRFt9LbrsWow docs/misc/sedf_scheduler_mini-HOWTO.txt
     1.8  40d6ccbfKKBq8jE0ula4eHEzBiQuDA docs/misc/xen_config.html
     1.9  410a4c2bAO_m_l4RsiiPHnZ4ixHWbQ docs/misc/xend.tex
    1.10  3f9e7d564bWFB-Czjv1qdmE6o0GqNg docs/src/interface.tex
    1.11 @@ -353,7 +354,6 @@ 41979925z1MsKU1SfuuheM1IFDQ_bA linux-2.6
    1.12  4118b6a418gnL6AZsTdglC92YGqYTg linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/highmem.h
    1.13  42539fb5A9hsS3NFQ-2VY4y1TONZZQ linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/hypercall.h
    1.14  40f5623aJVXQwpJMOLE99XgvGsfQ8Q linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/io.h
    1.15 -42778a69MXZVxch4pQqYsMPS0WnNSg linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/io_apic.h
    1.16  40f5623aKXkBBxgpLx2NcvkncQ1Yyw linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mach-xen/irq_vectors.h
    1.17  40f5623aDMCsWOFO0jktZ4e8sjwvEg linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_post.h
    1.18  40f5623arsFXkGdPvIqvFi3yFXGR0Q linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_pre.h
    1.19 @@ -745,6 +745,7 @@ 3fbba6dctWRWlFJkYb6hdix2X4WMuw tools/lib
    1.20  3fbba6dcbVrG2hPzEzwdeV_UC8kydQ tools/libxc/xc_private.h
    1.21  42337174PxyzzPk62raDiYCIsfStDg tools/libxc/xc_ptrace.c
    1.22  4273458duzL--nsTfT6e_q6Kfij48g tools/libxc/xc_ptrace_core.c
    1.23 +41ebbfe9U0b0kI-HgjK7VEY4EvW7_w tools/libxc/xc_sedf.c
    1.24  41dde8b0pLfAKMs_L9Uri2hnzHiCRQ tools/libxc/xc_vmx_build.c
    1.25  40e1b09dMYB4ItGCqcMIzirdMd9I-w tools/libxutil/Makefile
    1.26  40e033325Sjqs-_4TuzeUEprP_gYFg tools/libxutil/allocate.c
    1.27 @@ -1188,7 +1189,7 @@ 421098b6lY2JzrV1oFDbrt7XQhtElg xen/arch/
    1.28  427664f5eygrc3nEhI3RKf0Y37PzyA xen/arch/ia64/xentime.c
    1.29  3ddb79bcZbRBzT3elFWSX7u6NtMagQ xen/arch/x86/Makefile
    1.30  3ddb79bcBQF85CfLS4i1WGZ4oLLaCA xen/arch/x86/Rules.mk
    1.31 -3e5636e5FAYZ5_vQnmgwFJfSdmO5Mw xen/arch/x86/acpi.c
    1.32 +3e5636e5FAYZ5_vQnmgwFJfSdmO5Mw xen/arch/x86/acpi/boot.c
    1.33  3ddb79bcsjinG9k1KcvbVBuas1R2dA xen/arch/x86/apic.c
    1.34  42360b3244-Q6BpEKhR_A1YtG1wPNQ xen/arch/x86/audit.c
    1.35  3ddb79c4yGZ7_22QAFFwPzqP4NSHwA xen/arch/x86/boot/mkelf32.c
    1.36 @@ -1218,10 +1219,7 @@ 41aaf567tqrKGSTDK8OVeAbpeoccPw xen/arch/
    1.37  41aaf567a36esU-rUK7twPiv-yTFyw xen/arch/x86/mtrr/mtrr.h
    1.38  41aaf567DcTL6pqVtLZJI5cSryyA1A xen/arch/x86/mtrr/state.c
    1.39  3f12cff65EV3qOG2j37Qm0ShgvXGRw xen/arch/x86/nmi.c
    1.40 -3ddb79bdHe6_Uij4-glW91vInNtBYQ xen/arch/x86/pci-irq.c
    1.41 -3ddb79bcZ_2FxINljqNSkqa17ISyJw xen/arch/x86/pci-pc.c
    1.42 -3ddb79bdeJ7_86z03yTAPIeeywOg3Q xen/arch/x86/pci-x86.c
    1.43 -3ddb79bdIKgipvGoqExEQ7jawfVowA xen/arch/x86/pci-x86.h
    1.44 +4051bcecFeq4DE70p4zGO5setf47CA xen/arch/x86/physdev.c
    1.45  3ddb79bc7KxGCEJsgBnkDX7XjD_ZEQ xen/arch/x86/rwlock.c
    1.46  3ddb79bcrD6Z_rUvSDgrvjyb4846Eg xen/arch/x86/setup.c
    1.47  405b8599xI_PoEr3zZoJ2on-jdn7iw xen/arch/x86/shadow.c
    1.48 @@ -1253,6 +1251,7 @@ 40e96d3akN3Hu_J5Bk-WXD8OGscrYQ xen/arch/
    1.49  422f27c8J9DQfCpegccMid59XhSmGA xen/arch/x86/x86_emulate.c
    1.50  3ddb79bdff-gj-jFGKjOejeHLqL8Lg xen/common/Makefile
    1.51  3e397e66AyyD5fYraAySWuwi9uqSXg xen/common/ac_timer.c
    1.52 +427fa2d0J0LU2s5oKbsM0nTZ2iyd2Q xen/common/bitmap.c
    1.53  3ddb79bdLX_P6iB7ILiblRLWvebapg xen/common/dom0_ops.c
    1.54  3e6377e4i0c9GtKN65e99OtRbw3AZw xen/common/dom_mem_ops.c
    1.55  3ddb79bdYO5D8Av12NHqPeSviav7cg xen/common/domain.c
    1.56 @@ -1265,9 +1264,9 @@ 3ddb79bduhSEZI8xa7IbGQCpap5y2A xen/commo
    1.57  41a61536SZbR6cj1ukWTb0DYU-vz9w xen/common/multicall.c
    1.58  3ddb79bdD4SLmmdMD7yLW5HcUWucXw xen/common/page_alloc.c
    1.59  3e54c38dkHAev597bPr71-hGzTdocg xen/common/perfc.c
    1.60 -4051bcecFeq4DE70p4zGO5setf47CA xen/common/physdev.c
    1.61  3ddb79bdHqdQpATqC0rmUZNbsb6L6A xen/common/resource.c
    1.62  40589968dD2D1aejwSOvrROg7fOvGQ xen/common/sched_bvt.c
    1.63 +41ebbfe9oF1BF3cH5v7yE3eOL9uPbA xen/common/sched_sedf.c
    1.64  3e397e6619PgAfBbw2XFbXkewvUWgw xen/common/schedule.c
    1.65  3ddb79bd0gVQYmL2zvuJnldvD0AGxQ xen/common/softirq.c
    1.66  3e7f358awXBC3Vw-wFRwPw18qL1khg xen/common/string.c
    1.67 @@ -1276,50 +1275,29 @@ 3ddb79bd3zgV33PHdt-cgh3sxcb1hw xen/commo
    1.68  4203fb92Qcy7mGpauBdq09J-WAqfoA xen/common/xmalloc.c
    1.69  3ddb79c0ppNeJtjC4va8j41ADCnchA xen/drivers/Makefile
    1.70  40715b2bi9gU43-cYzlmPDgreYQchw xen/drivers/acpi/Makefile
    1.71 -40715b2bDxNCz5LFV8FAXihmYJZFUQ xen/drivers/acpi/acpi_ksyms.c
    1.72  40715b2cNVOegtvyft_AHFKJYRprfA xen/drivers/acpi/tables.c
    1.73  3e4a8cb7alzQCDKS7MlioPoHBKYkdQ xen/drivers/char/Makefile
    1.74  4049e6bfNSIq7s7OV-Bd69QD0RpR2Q xen/drivers/char/console.c
    1.75  3e4a8cb7nMChlro4wvOBo76n__iCFA xen/drivers/char/serial.c
    1.76 -3ddb79beUWngyIhMHgyPtuTem4o4JA xen/drivers/pci/Makefile
    1.77 -3ddb79beU9td0Mnm0VUMklerBa37qQ xen/drivers/pci/compat.c
    1.78 -3ddb79beHkGQE58z5t5gyUCYiwOxvw xen/drivers/pci/gen-devlist.c
    1.79 -3ddb79bfoQcFKLf5P6wZlDl36alWdQ xen/drivers/pci/names.c
    1.80 -3ddb79bfyX7-pD6XdxY_mdNrJR20iw xen/drivers/pci/pci.c
    1.81 -3ddb79bf2AS7YBGwooE_Kbv7XgUqNQ xen/drivers/pci/pci.ids
    1.82 -3ddb79bf7sTn85WtP_8Nc2YEmmVExQ xen/drivers/pci/quirks.c
    1.83 -3ddb79bfJaf0bkE1Y67bnll8-kjEPg xen/drivers/pci/setup-res.c
    1.84  40715b2cFpte_UNWnBZW0Du7z9AhTQ xen/include/acpi/acconfig.h
    1.85 -40715b2cEQWX-PaxEH30qI48K1krnQ xen/include/acpi/acdebug.h
    1.86 -40715b2c_7j-oy3ZNAuqE3IFNPzArg xen/include/acpi/acdisasm.h
    1.87 -40715b2cFdcPx-2FHGM1Q-M-592xYQ xen/include/acpi/acdispat.h
    1.88 -40715b2cKqD_Lihnlmvnyl4AAl3EFw xen/include/acpi/acevents.h
    1.89  40715b2ctNvVZ058w8eM8DR9hOat_A xen/include/acpi/acexcep.h
    1.90  40715b2com8I01qcHcAw47e93XsCqQ xen/include/acpi/acglobal.h
    1.91  40715b2cS1t4uI3sMsu-c0M4qqAIrw xen/include/acpi/achware.h
    1.92 -40715b2chSz545A4Tq-y3WAtanzPMQ xen/include/acpi/acinterp.h
    1.93  40715b2cGf23lRI58NphiaDQl698-w xen/include/acpi/aclocal.h
    1.94  40715b2cdG7tCF2NMk0j1RCQQPzPXg xen/include/acpi/acmacros.h
    1.95 -40715b2ckP2XZZDkSTehu6riuMogDA xen/include/acpi/acnamesp.h
    1.96  40715b2c4AvHYn2-2YIyt3mx-Mm5tw xen/include/acpi/acobject.h
    1.97  40715b2cPUXsHzmchvXx7QHAfW0nMw xen/include/acpi/acoutput.h
    1.98 -40715b2cNbYpt9CHmCHg3RG7fBdACA xen/include/acpi/acparser.h
    1.99  40715b2cWM_6zR14U9Tp0s_q8D002A xen/include/acpi/acpi.h
   1.100  40715b2dcJDTiROgyMk9swD_veWktA xen/include/acpi/acpi_bus.h
   1.101  40715b2dRFlZK6apnH7WkUCBdyFXWA xen/include/acpi/acpi_drivers.h
   1.102  40715b2dtgZhNGAzlyBMe3kqve3mqw xen/include/acpi/acpiosxf.h
   1.103  40715b2dpW5TY7n5rzCufsDhJVWuMQ xen/include/acpi/acpixf.h
   1.104 -40715b2dp9-_D9LTjcvtf-Yopih5mQ xen/include/acpi/acresrc.h
   1.105  40715b2djvd97KbIpt4wyJgxwqCqmg xen/include/acpi/acstruct.h
   1.106 -40715b2dRIU9cQgmTjtCRNleIJDAYg xen/include/acpi/actables.h
   1.107  40715b2dy8ECRkSo9x0tRRueAjPx1g xen/include/acpi/actbl.h
   1.108  40715b2ds4J-XWn9Ix-lgBiJffNgxw xen/include/acpi/actbl1.h
   1.109  40715b2d_aMKMjKKNImJR4km52KRHA xen/include/acpi/actbl2.h
   1.110 -40715b2dxC03aemijgL-iDH_-BkKRw xen/include/acpi/actbl71.h
   1.111  40715b2d0oQUijKwEw6SDJ4LhD8c4g xen/include/acpi/actypes.h
   1.112  40715b2dBByvcAtRpN5mafyEYLcBWA xen/include/acpi/acutils.h
   1.113 -40715b2d_iPcZ0uUVGblPvjsHOwE5Q xen/include/acpi/amlcode.h
   1.114 -40715b2d3CdS6dIpZDTiCJRlDG3LCA xen/include/acpi/amlresrc.h
   1.115  40715b2dKRW7A71SNaeV6zfrEzYxPw xen/include/acpi/platform/acenv.h
   1.116  40715b2d8fYydJMcODFrV1ocLklGDg xen/include/acpi/platform/acgcc.h
   1.117  40715b2d1yZkqyAt0kgx2xEwsatuuA xen/include/acpi/platform/aclinux.h
   1.118 @@ -1369,18 +1347,39 @@ 3ddb79c3TMDjkxVndKFKnGiwY0HzDg xen/inclu
   1.119  4204e7acwXDo-5iAAiO2eQbtDeYZXA xen/include/asm-x86/init.h
   1.120  3ddb79c3fQ_O3o5NHK2N8AJdk0Ea4Q xen/include/asm-x86/io.h
   1.121  3ddb79c2TKeScYHQZreTdHqYNLbehQ xen/include/asm-x86/io_apic.h
   1.122 -42605109qxLSrHVE2SRhGXmgk907iw xen/include/asm-x86/io_ports.h
   1.123  3ddb79c2L7rTlFzazOLW1XuSZefpFw xen/include/asm-x86/irq.h
   1.124  404f1b93OjLO4bFfBXYNaJdIqlNz-Q xen/include/asm-x86/ldt.h
   1.125 -4260510aYPj2kr6rMbBfMxcvvmXndQ xen/include/asm-x86/mach_apic.h
   1.126 +427fa2d0m8MOSSXT13zgb-q0fGA_Dw xen/include/asm-x86/mach-default/apm.h
   1.127 +427fa2d0suK9Av7vsAXhsQxZjqpc_Q xen/include/asm-x86/mach-default/bios_ebda.h
   1.128 +427fa2d0yC3KzLozoeK3Xa3uGVfIdw xen/include/asm-x86/mach-default/do_timer.h
   1.129 +427fa2d0bWQkR1mW5OBYxn07AN-bDw xen/include/asm-x86/mach-default/entry_arch.h
   1.130 +427fa2d0-SWcuwbdSypo4953bc2JdQ xen/include/asm-x86/mach-default/io_ports.h
   1.131 +427fa2d0eyAl7LAeO-SVV4IW7lZPGQ xen/include/asm-x86/mach-default/irq_vectors.h
   1.132 +427fa2d0df7VWG4KKpnKbKR2Cbd1_w xen/include/asm-x86/mach-default/irq_vectors_limits.h
   1.133 +4260510aYPj2kr6rMbBfMxcvvmXndQ xen/include/asm-x86/mach-default/mach_apic.h
   1.134 +427fa2d0I3FWjE2tWdOhlEOJn7stcg xen/include/asm-x86/mach-default/mach_apicdef.h
   1.135 +427fa2d093fDS2gOBLcl7Yndzl7HmA xen/include/asm-x86/mach-default/mach_ipi.h
   1.136 +427fa2d0Y7bD35d-FvDAeiJDIdRw2A xen/include/asm-x86/mach-default/mach_mpparse.h
   1.137 +427fa2d0aLQgE9e1GY9ZP5jrMOC8pQ xen/include/asm-x86/mach-default/mach_mpspec.h
   1.138 +427fa2d0fJ5nNn5ydJuOaZIL6F2fjQ xen/include/asm-x86/mach-default/mach_reboot.h
   1.139 +427fa2d0VlN555TE68TjKMsrOoFXNA xen/include/asm-x86/mach-default/mach_time.h
   1.140 +427fa2d0C0jWTKYjy7WJjGKeujSpSg xen/include/asm-x86/mach-default/mach_timer.h
   1.141 +427fa2d0UXLiS1scpNrK26ZT6Oes3g xen/include/asm-x86/mach-default/mach_traps.h
   1.142 +427fa2d0OfglYyfpDTD5DII4M0uZRw xen/include/asm-x86/mach-default/mach_wakecpu.h
   1.143 +427fa2d0_OBPxdi5Qo04JWgZhz7BFA xen/include/asm-x86/mach-default/pci-functions.h
   1.144 +427fa2d0mrTtXrliqDfLuJc5LLVXaA xen/include/asm-x86/mach-default/setup_arch_post.h
   1.145 +427fa2d0Uoo7gC61Kep6Yy7Os367Hg xen/include/asm-x86/mach-default/setup_arch_pre.h
   1.146 +427fa2d1EKnA8zCq2QLHiGOUqOgszg xen/include/asm-x86/mach-default/smpboot_hooks.h
   1.147  3ddb79c3I98vWcQR8xEo34JMJ4Ahyw xen/include/asm-x86/mc146818rtc.h
   1.148  40ec25fd7cSvbP7Biw91zaU_g0xsEQ xen/include/asm-x86/mm.h
   1.149  3ddb79c3n_UbPuxlkNxvvLycClIkxA xen/include/asm-x86/mpspec.h
   1.150 +427fa2d1eJRenftJJnRyLsHKl1ghtA xen/include/asm-x86/mpspec_def.h
   1.151  3ddb79c2wa0dA_LGigxOelSGbJ284Q xen/include/asm-x86/msr.h
   1.152  41aaf567Mi3OishhvrCtET1y-mxQBg xen/include/asm-x86/mtrr.h
   1.153  41a61536MFhNalgbVmYGXAhQsPTZNw xen/include/asm-x86/multicall.h
   1.154  3ddb79c3xjYnrv5t3VqYlR4tNEOl4Q xen/include/asm-x86/page.h
   1.155  3ddb79c3ysKUbxZuwKBRK3WXU2TlEg xen/include/asm-x86/pci.h
   1.156 +42422fb0FVX-TJkSvAXnbfwMf19XFA xen/include/asm-x86/physdev.h
   1.157  3ddb79c2QF5-pZGzuX4QukPCDAl59A xen/include/asm-x86/processor.h
   1.158  40cf1596bim9F9DNdV75klgRSZ6Y2A xen/include/asm-x86/regs.h
   1.159  3ddb79c2plf7ciNgoNjU-RsbUzawsw xen/include/asm-x86/rwlock.h
   1.160 @@ -1436,11 +1435,15 @@ 4266bd01Ul-pC01ZVvBkhBnv5eqzvw xen/inclu
   1.161  3ddb79c25UE59iu4JJcbRalx95mvcg xen/include/public/xen.h
   1.162  3e397e66m2tO3s-J8Jnr7Ws_tGoPTg xen/include/xen/ac_timer.h
   1.163  40715b2epYl2jBbxzz9CI2rgIca7Zg xen/include/xen/acpi.h
   1.164 +422f0995xCgnbsVhTjSncnqIABs64g xen/include/xen/adv_sched_hist.h
   1.165 +427fa2d1wyoVbvCyZRLposYjA_D_4g xen/include/xen/bitmap.h
   1.166 +427fa2d1ItcC_yWuBUkhc7adedP5ow xen/include/xen/bitops.h
   1.167  3ddb79c0c0cX_DZE209-Bb-Rx1v-Aw xen/include/xen/cache.h
   1.168  41f2cea7Yna7xc0X9fyavIjoSFFeVg xen/include/xen/compile.h.in
   1.169  3f840f12CkbYSlwMrY2S11Mpyxg7Nw xen/include/xen/compiler.h
   1.170  3ddb79c259jh8hE7vre_8NuE7nwNSA xen/include/xen/config.h
   1.171  3eb165e0eawr3R-p2ZQtSdLWtLRN_A xen/include/xen/console.h
   1.172 +427fa2d1bQCWgEQqTTh5MjG4MPEH9g xen/include/xen/cpumask.h
   1.173  3ddb79c1V44RD26YqCUm-kqIupM37A xen/include/xen/ctype.h
   1.174  3ddb79c05DdHQ0UxX_jKsXdR4QlMCA xen/include/xen/delay.h
   1.175  40f2b4a2hC3HtChu-ArD8LyojxWMjg xen/include/xen/domain.h
   1.176 @@ -1463,7 +1466,6 @@ 3ddb79c2Fg44_PBPVxHSC0gTOMq4Ow xen/inclu
   1.177  3ddb79c0MOVXq8qZDQRGb6z64_xAwg xen/include/xen/pci_ids.h
   1.178  3e54c38dlSCVdyVM4PKcrSfzLLxWUQ xen/include/xen/perfc.h
   1.179  3e54c38de9SUSYSAwxDf_DwkpAnQFA xen/include/xen/perfc_defn.h
   1.180 -42422fb0FVX-TJkSvAXnbfwMf19XFA xen/include/xen/physdev.h
   1.181  3ddb79c04nQVR3EYM5L4zxDV_MCo1g xen/include/xen/prefetch.h
   1.182  3e4540ccU1sgCx8seIMGlahmMfv7yQ xen/include/xen/reboot.h
   1.183  40589969nPq3DMzv24RDb5LXE9brHw xen/include/xen/sched-if.h
     3.1 --- a/BitKeeper/etc/logging_ok	Tue May 03 14:42:00 2005 +0000
     3.2 +++ b/BitKeeper/etc/logging_ok	Tue May 10 08:49:59 2005 +0000
     3.3 @@ -60,6 +60,7 @@ kmacy@shemp.lab.netapp.com
     3.4  kraxel@bytesex.org
     3.5  laudney@eclipse.(none)
     3.6  leendert@watson.ibm.com
     3.7 +lists-xen@pimb.org
     3.8  lynx@idefix.cl.cam.ac.uk
     3.9  maf46@burn.cl.cam.ac.uk
    3.10  mafetter@fleming.research
    3.11 @@ -99,3 +100,4 @@ vh249@airwolf.cl.cam.ac.uk
    3.12  vh249@arcadians.cl.cam.ac.uk
    3.13  xen-ia64.adm@bkbits.net
    3.14  xenbk@gandalf.hpl.hp.com
    3.15 +ydroneaud@mandriva.com
     4.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.2 +++ b/docs/misc/sedf_scheduler_mini-HOWTO.txt	Tue May 10 08:49:59 2005 +0000
     4.3 @@ -0,0 +1,44 @@
     4.4 +sEDF scheduler
     4.5 +--------------
     4.6 +Author:
     4.7 +   Stephan.Diestelhorst@{cl.cam.ac.uk, inf.tu-dresden.de}
     4.8 +   
     4.9 +Overview:
    4.10 +  This scheduler provides weighted CPU sharing in an intuitive way and
    4.11 +  uses realtime-algorithms to ensure time guarantees.
    4.12 +
    4.13 +Usage:
    4.14 +   -add "sched=sedf" on Xen's boot command-line
    4.15 +   -create domains as usual
    4.16 +   -use "xm sedf <dom-id> <period> <slice> <latency-hint> <extra> <weight>"
    4.17 +    Where:
    4.18 +      -period/slice are the normal EDF scheduling parameters in nanosecs
    4.19 +      -latency-hint is the scaled period in case the domain is doing heavy I/O
    4.20 +         (unused by the currently compiled version)
    4.21 +      -extra is a flag (0/1), which controls whether the domain can run in
    4.22 +       extra-time
    4.23 +      -weight is mutually exclusive with period/slice and specifies another
    4.24 +       way of setting a domains cpu slice
    4.25 +
    4.26 +Examples:
    4.27 + normal EDF (20ms/5ms):
    4.28 +  xm sedf <dom-id> 20000000 5000000 0 0 0
    4.29 +  
    4.30 + best-effort domains (i.e. non-realtime):
    4.31 +  xm sedf <dom-id> 20000000 0 0 1 0
    4.32     4.33 + normal EDF (20ms/5ms) + share of extra-time:
    4.34 +  xm sedf <dom-id> 20000000 5000000 0 1 0
    4.35 +  
    4.36 + 4 domains with weights 2:3:4:2
    4.37 +  xm sedf <d1> 0 0 0 0 2
    4.38 +  xm sedf <d2> 0 0 0 0 3
    4.39 +  xm sedf <d3> 0 0 0 0 4
    4.40 +  xm sedf <d4> 0 0 0 0 2
    4.41 +  
    4.42 + 1 fully-specified (10ms/3ms) domain, 3 other domains share
    4.43 + available rest in 2:7:3 ratio:
    4.44 +  xm sedf <d1> 10000000 3000000 0 0 0
    4.45 +  xm sedf <d2> 0 0 0 0 2
    4.46 +  xm sedf <d3> 0 0 0 0 7
    4.47 +  xm sedf <d4> 0 0 0 0 3
    4.48 \ No newline at end of file
     5.1 --- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/clock.c	Tue May 03 14:42:00 2005 +0000
     5.2 +++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/clock.c	Tue May 10 08:49:59 2005 +0000
     5.3 @@ -87,6 +87,12 @@
     5.4  
     5.5  /* XEN specific defines */
     5.6  #include <machine/xen_intr.h>
     5.7 +#include <vm/vm.h>   /* needed by machine/pmap.h */
     5.8 +#include <vm/pmap.h> /* needed by machine/pmap.h */
     5.9 +#include <machine/pmap.h> /* needed by xen-os.h */
    5.10 +#include <machine/hypervisor-ifs.h>
    5.11 +#include <machine/xen-os.h> /* needed by xenfunc.h */
    5.12 +#include <machine/xenfunc.h>
    5.13  
    5.14  /*
    5.15   * 32-bit time_t's can't reach leap years before 1904 or after 2036, so we
    5.16 @@ -129,7 +135,15 @@ static uint64_t shadow_system_time;
    5.17  static uint32_t shadow_time_version;
    5.18  static struct timeval shadow_tv;
    5.19  
    5.20 +#define DEFINE_PER_CPU(type, name) \
    5.21 +    __typeof__(type) per_cpu__##name
    5.22 +
    5.23 +#define per_cpu(var, cpu)           (*((void)cpu, &per_cpu__##var))
    5.24 +
    5.25 +
    5.26  static uint64_t processed_system_time;/* System time (ns) at last processing. */
    5.27 +static DEFINE_PER_CPU(uint64_t, processed_system_time);
    5.28 +
    5.29  
    5.30  #define NS_PER_TICK (1000000000ULL/hz)
    5.31  
    5.32 @@ -202,18 +216,19 @@ static struct timecounter xen_timecounte
    5.33  static void 
    5.34  clkintr(struct clockframe *frame)
    5.35  {
    5.36 -    int64_t delta;
    5.37 +    int64_t cpu_delta, delta;
    5.38 +    int cpu = smp_processor_id();
    5.39      long ticks = 0;
    5.40  
    5.41 -
    5.42      do {
    5.43      	__get_time_values_from_xen();
    5.44 -    	delta = (int64_t)(shadow_system_time + 
    5.45 -			  xen_get_offset() * 1000 - 
    5.46 -			  processed_system_time);
    5.47 +    	delta = cpu_delta = (int64_t)shadow_system_time + 
    5.48 +		(int64_t)xen_get_offset() * 1000;
    5.49 +	delta -= processed_system_time;
    5.50 +	cpu_delta -= per_cpu(processed_system_time, cpu);
    5.51      } while (!TIME_VALUES_UP_TO_DATE);
    5.52  
    5.53 -    if (unlikely(delta < 0)) {
    5.54 +    if (unlikely(delta < 0) || unlikely(cpu_delta < 0)) {
    5.55          printk("Timer ISR: Time went backwards: %lld\n", delta);
    5.56          return;
    5.57      }
    5.58 @@ -225,15 +240,28 @@ clkintr(struct clockframe *frame)
    5.59          delta -= NS_PER_TICK;
    5.60          processed_system_time += NS_PER_TICK;
    5.61      }
    5.62 -
    5.63 -    if (ticks > 0) {
    5.64 -	if (frame)
    5.65 -		timer_func(frame);
    5.66 -#ifdef SMP
    5.67 -	if (timer_func == hardclock && frame)
    5.68 -		forward_hardclock();
    5.69 +    /* Local CPU jiffy work. */
    5.70 +    while (cpu_delta >= NS_PER_TICK) {
    5.71 +	    cpu_delta -= NS_PER_TICK;
    5.72 +	    per_cpu(processed_system_time, cpu) += NS_PER_TICK;
    5.73 +#if 0
    5.74 +	    update_process_times(user_mode(regs));
    5.75 +	    profile_tick(CPU_PROFILING, regs);
    5.76  #endif
    5.77      }
    5.78 +    if (ticks > 0) {
    5.79 +	if (frame) timer_func(frame);
    5.80 +    }
    5.81 +    
    5.82 +    if (cpu != 0)
    5.83 +	    return;
    5.84 +    /*
    5.85 +     * Take synchronised time from Xen once a minute if we're not
    5.86 +     * synchronised ourselves, and we haven't chosen to keep an independent
    5.87 +     * time base.
    5.88 +     */
    5.89 +    
    5.90 +    /* XXX TODO */
    5.91  }
    5.92  
    5.93  #include "opt_ddb.h"
    5.94 @@ -429,7 +457,7 @@ resettodr()
    5.95   * Start clocks running.
    5.96   */
    5.97  void
    5.98 -cpu_initclocks()
    5.99 +cpu_initclocks(void)
   5.100  {
   5.101  	int diag;
   5.102  	int time_irq = bind_virq_to_irq(VIRQ_TIMER);
   5.103 @@ -445,8 +473,26 @@ cpu_initclocks()
   5.104  	/* initialize xen values */
   5.105  	__get_time_values_from_xen();
   5.106  	processed_system_time = shadow_system_time;
   5.107 +	per_cpu(processed_system_time, 0) = processed_system_time;
   5.108 +
   5.109  }
   5.110  
   5.111 +#ifdef SMP 
   5.112 +void
   5.113 +ap_cpu_initclocks(void)
   5.114 +{
   5.115 +	int irq;
   5.116 +	int cpu = smp_processor_id();
   5.117 +
   5.118 +	per_cpu(processed_system_time, cpu) = shadow_system_time;
   5.119 +	
   5.120 +	irq = bind_virq_to_irq(VIRQ_TIMER);
   5.121 +	PCPU_SET(time_irq, irq);
   5.122 +	PANIC_IF(intr_add_handler("clk", irq, (driver_intr_t *)clkintr, 
   5.123 +				  NULL, INTR_TYPE_CLK | INTR_FAST, NULL));
   5.124 +}
   5.125 +#endif
   5.126 +
   5.127  void
   5.128  cpu_startprofclock(void)
   5.129  {
     6.1 --- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/evtchn.c	Tue May 03 14:42:00 2005 +0000
     6.2 +++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/evtchn.c	Tue May 10 08:49:59 2005 +0000
     6.3 @@ -79,9 +79,14 @@ evtchn_do_upcall(struct intrframe *frame
     6.4                  l2 &= ~(1 << l2i);
     6.5              
     6.6                  port = (l1i << 5) + l2i;
     6.7 +		irq = evtchn_to_irq[port];
     6.8 +#ifdef SMP		
     6.9 +		if (irq == PCPU_GET(cpuast)) 
    6.10 +			continue;
    6.11 +#endif
    6.12                  if ( (owned = mtx_owned(&sched_lock)) != 0 )
    6.13                      mtx_unlock_spin_flags(&sched_lock, MTX_QUIET);
    6.14 -                if ( (irq = evtchn_to_irq[port]) != -1 ) {
    6.15 +                if ( irq != -1 ) {
    6.16  		    struct intsrc *isrc = intr_lookup_source(irq);
    6.17  		    intr_execute_handlers(isrc, frame);
    6.18  		} else {
    6.19 @@ -584,6 +589,7 @@ ap_evtchn_init(int cpu)
    6.20          PCPU_GET(virq_to_irq)[i] = -1;
    6.21  }
    6.22  
    6.23 +
    6.24  static void 
    6.25  evtchn_init(void *dummy __unused)
    6.26  {
    6.27 @@ -591,13 +597,6 @@ evtchn_init(void *dummy __unused)
    6.28      struct xenpic *xp;
    6.29      struct xenpic_intsrc *pin;
    6.30  
    6.31 -    /*
    6.32 -     * xenpic_lock: in order to allow an interrupt to occur in a critical
    6.33 -     * 	        section, to set pcpu->ipending (etc...) properly, we
    6.34 -     *	        must be able to get the icu lock, so it can't be
    6.35 -     *	        under witness.
    6.36 -     */
    6.37 -    mtx_init(&irq_mapping_update_lock, "xp", NULL, MTX_DEF);
    6.38  
    6.39      /* XXX -- expedience hack */
    6.40      PCPU_SET(virq_to_irq, (int *)&virq_to_irq[0]);
    6.41 @@ -657,3 +656,11 @@ evtchn_init(void *dummy __unused)
    6.42  }
    6.43  
    6.44  SYSINIT(evtchn_init, SI_SUB_INTR, SI_ORDER_ANY, evtchn_init, NULL);
    6.45 +    /*
    6.46 +     * xenpic_lock: in order to allow an interrupt to occur in a critical
    6.47 +     * 	        section, to set pcpu->ipending (etc...) properly, we
    6.48 +     *	        must be able to get the icu lock, so it can't be
    6.49 +     *	        under witness.
    6.50 +     */
    6.51 +
    6.52 +MTX_SYSINIT(irq_mapping_update_lock, &irq_mapping_update_lock, "xp", MTX_DEF|MTX_NOWITNESS);
     7.1 --- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/machdep.c	Tue May 03 14:42:00 2005 +0000
     7.2 +++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/machdep.c	Tue May 10 08:49:59 2005 +0000
     7.3 @@ -78,6 +78,7 @@
     7.4  #include <sys/sched.h>
     7.5  #include <sys/sysent.h>
     7.6  #include <sys/sysctl.h>
     7.7 +#include <sys/smp.h>
     7.8  #include <sys/ucontext.h>
     7.9  #include <sys/vmmeter.h>
    7.10  #include <sys/bus.h>
    7.11 @@ -883,14 +884,6 @@ SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_
    7.12  static void
    7.13  cpu_idle_default(void)
    7.14  {
    7.15 -#if 0
    7.16 -	/*
    7.17 -	 * we must absolutely guarentee that hlt is the
    7.18 -	 * absolute next instruction after sti or we
    7.19 -	 * introduce a timing window.
    7.20 -	 */
    7.21 -	__asm __volatile("sti; hlt");
    7.22 -#endif
    7.23  	idle_block();
    7.24  	enable_intr();
    7.25  }
    7.26 @@ -1376,6 +1369,7 @@ pteinfo_t *pteinfo_list;
    7.27  unsigned long *xen_machine_phys = ((unsigned long *)VADDR(1008, 0));
    7.28  int preemptable;
    7.29  int gdt_set;
    7.30 +static int ncpus;
    7.31  
    7.32  /* Linux infection */
    7.33  #define PAGE_OFFSET  KERNBASE
    7.34 @@ -1387,6 +1381,10 @@ initvalues(start_info_t *startinfo)
    7.35      int i;
    7.36      vm_paddr_t pdir_shadow_ma, KPTphys;
    7.37      vm_offset_t *pdir_shadow;
    7.38 +#ifdef SMP
    7.39 +    int j;
    7.40 +#endif
    7.41 +
    7.42  #ifdef WRITABLE_PAGETABLES
    7.43      printk("using writable pagetables\n");
    7.44      HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables);
    7.45 @@ -1447,18 +1445,19 @@ initvalues(start_info_t *startinfo)
    7.46  
    7.47  
    7.48  #ifdef SMP
    7.49 +#if 0
    7.50      /* allocate cpu0 private page */
    7.51      cpu0prvpage = (KERNBASE + (tmpindex << PAGE_SHIFT));
    7.52      tmpindex++; 
    7.53 -
    7.54 +#endif
    7.55      /* allocate SMP page table */
    7.56      SMPpt = (unsigned long *)(KERNBASE + (tmpindex << PAGE_SHIFT));
    7.57 -
    7.58 +#if 0
    7.59      /* Map the private page into the SMP page table */
    7.60      SMPpt[0] = vtomach(cpu0prvpage) | PG_RW | PG_M | PG_V | PG_A;
    7.61 -
    7.62 +#endif
    7.63      /* map SMP page table RO */
    7.64 -    PT_SET_MA(SMPpt, vtomach(SMPpt) & ~PG_RW);
    7.65 +    PT_SET_MA(SMPpt, *vtopte((vm_offset_t)SMPpt) & ~PG_RW);
    7.66  
    7.67      /* put the page table into the page directory */
    7.68      xen_queue_pt_update((vm_paddr_t)(IdlePTD + MPPTDI), 
    7.69 @@ -1496,44 +1495,61 @@ initvalues(start_info_t *startinfo)
    7.70      tmpindex++;
    7.71  
    7.72      HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list = (unsigned long)xen_phys_machine;
    7.73 +    ncpus = HYPERVISOR_shared_info->n_vcpu; 
    7.74 +#ifdef SMP
    7.75 +    for (i = 0; i < ncpus; i++) {
    7.76 +	    int npages = (sizeof(struct privatespace) + 1)/PAGE_SIZE;
    7.77 +	    for (j = 0; j < npages; j++) {
    7.78 +		    vm_paddr_t ma = xpmap_ptom(tmpindex << PAGE_SHIFT);
    7.79 +		    tmpindex++;
    7.80 +		    PT_SET_VA_MA(SMPpt + i*npages + j, ma | PG_A | PG_V | PG_RW | PG_M, FALSE);
    7.81 +	    }
    7.82 +    }
    7.83 +    xen_flush_queue();
    7.84 +#endif
    7.85      
    7.86      init_first = tmpindex;
    7.87      
    7.88  }
    7.89  
    7.90 +
    7.91 +trap_info_t trap_table[] = {
    7.92 +	{ 0,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(div)},
    7.93 +	{ 1,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dbg)},
    7.94 +	{ 3,   3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bpt)},
    7.95 +	{ 4,   3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ofl)},
    7.96 +	/* This is UPL on Linux and KPL on BSD */
    7.97 +	{ 5,   3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bnd)},
    7.98 +	{ 6,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ill)},
    7.99 +	{ 7,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dna)},
   7.100 +	/*
   7.101 +	 * { 8,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(XXX)},
   7.102 +	 *   no handler for double fault
   7.103 +	 */
   7.104 +	{ 9,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpusegm)},
   7.105 +	{10,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(tss)},
   7.106 +	{11,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(missing)},
   7.107 +	{12,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(stk)},
   7.108 +	{13,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(prot)},
   7.109 +	{14,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(page)},
   7.110 +	{15,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(rsvd)},
   7.111 +	{16,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpu)},
   7.112 +	{17,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(align)},
   7.113 +	{18,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(mchk)},
   7.114 +	{19,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(xmm)},
   7.115 +	{0x80, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(int0x80_syscall)},
   7.116 +	{  0, 0,           0, 0 }
   7.117 +};
   7.118 +
   7.119  void
   7.120  init386(void)
   7.121  {
   7.122  	int gsel_tss, metadata_missing, off, x, error;
   7.123  	struct pcpu *pc;
   7.124  	unsigned long gdtmachpfn;
   7.125 -	trap_info_t trap_table[] = {
   7.126 -	    { 0,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(div)},
   7.127 -	    { 1,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dbg)},
   7.128 -	    { 3,   3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bpt)},
   7.129 -	    { 4,   3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ofl)},
   7.130 -	    /* This is UPL on Linux and KPL on BSD */
   7.131 -	    { 5,   3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bnd)},
   7.132 -	    { 6,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ill)},
   7.133 -	    { 7,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dna)},
   7.134 -	    /*
   7.135 -	     * { 8,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(XXX)},
   7.136 -	     *   no handler for double fault
   7.137 -	     */
   7.138 -	    { 9,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpusegm)},
   7.139 -	    {10,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(tss)},
   7.140 -	    {11,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(missing)},
   7.141 -	    {12,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(stk)},
   7.142 -	    {13,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(prot)},
   7.143 -	    {14,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(page)},
   7.144 -	    {15,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(rsvd)},
   7.145 -	    {16,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpu)},
   7.146 -	    {17,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(align)},
   7.147 -	    {18,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(mchk)},
   7.148 -	    {19,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(xmm)},
   7.149 -	    {0x80, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(int0x80_syscall)},
   7.150 -	    {  0, 0,           0, 0 }
   7.151 -        };
   7.152 +#ifdef SMP
   7.153 +	int i;
   7.154 +#endif
   7.155  	proc0.p_uarea = proc0uarea;
   7.156  	thread0.td_kstack = proc0kstack;
   7.157  	thread0.td_pcb = (struct pcb *)
   7.158 @@ -1583,26 +1599,42 @@ init386(void)
   7.159  	gdt_segs[GDATA_SEL].ssd_limit = atop(0 - ((1 << 26) - (1 << 22) + (1 << 16))); 
   7.160  #endif
   7.161  #ifdef SMP
   7.162 -	/* this correspond to the cpu private page as mapped into the SMP page 
   7.163 -	 * table in initvalues
   7.164 +	/* XXX this will blow up if there are more than 512/NGDT vcpus - will never 
   7.165 +	 * be an issue in the real world but should add an assert on general principles
   7.166 +	 * we'll likely blow up when we hit LAST_RESERVED_GDT_ENTRY, at which point we
   7.167 +	 * would need to start allocating more pages for the GDT
   7.168  	 */
   7.169  	pc = &SMP_prvspace[0].pcpu;
   7.170 -	gdt_segs[GPRIV_SEL].ssd_limit =
   7.171 -		atop(sizeof(struct privatespace) - 1);
   7.172 +	for (i = 0; i < ncpus; i++) {
   7.173 +		cpu_add(i, (i == 0));
   7.174 +
   7.175 +		gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[i];
   7.176 +		gdt_segs[GPRIV_SEL].ssd_limit =
   7.177 +			atop(sizeof(struct privatespace) - 1);
   7.178 +		gdt_segs[GPROC0_SEL].ssd_base =
   7.179 +			(int) &SMP_prvspace[i].pcpu.pc_common_tss;
   7.180 +		SMP_prvspace[i].pcpu.pc_prvspace =
   7.181 +			&SMP_prvspace[i].pcpu;
   7.182 +		
   7.183 +		for (x = 0; x < NGDT; x++) {
   7.184 +			ssdtosd(&gdt_segs[x], &gdt[i * NGDT + x].sd);
   7.185 +		}
   7.186 +	}
   7.187  #else
   7.188  	pc = &__pcpu;
   7.189  	gdt_segs[GPRIV_SEL].ssd_limit =
   7.190  		atop(sizeof(struct pcpu) - 1);
   7.191 -#endif
   7.192  	gdt_segs[GPRIV_SEL].ssd_base = (int) pc;
   7.193  	gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss;
   7.194  	for (x = 0; x < NGDT; x++)
   7.195  	    ssdtosd(&gdt_segs[x], &gdt[x].sd);
   7.196 +#endif
   7.197 +
   7.198  
   7.199  	PT_SET_MA(gdt, *vtopte((unsigned long)gdt) & ~PG_RW);
   7.200  	gdtmachpfn = vtomach(gdt) >> PAGE_SHIFT;
   7.201 -	if ((error = HYPERVISOR_set_gdt(&gdtmachpfn, LAST_RESERVED_GDT_ENTRY + 1))) 
   7.202 -	    panic("set_gdt failed");
   7.203 +	PANIC_IF(HYPERVISOR_set_gdt(&gdtmachpfn, LAST_RESERVED_GDT_ENTRY + 1) != 0);
   7.204 +
   7.205  	
   7.206  	lgdt_finish();
   7.207  	gdt_set = 1;
     8.1 --- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/mp_machdep.c	Tue May 03 14:42:00 2005 +0000
     8.2 +++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/mp_machdep.c	Tue May 10 08:49:59 2005 +0000
     8.3 @@ -83,7 +83,16 @@
     8.4  #include <machine/specialreg.h>
     8.5  #include <machine/privatespace.h>
     8.6  
     8.7 +
     8.8 +/* XEN includes */
     8.9  #include <machine/xenfunc.h>
    8.10 +#include <machine/xen_intr.h>
    8.11 +
    8.12 +void Xhypervisor_callback(void);
    8.13 +void failsafe_callback(void);
    8.14 +
    8.15 +/***************/
    8.16 +
    8.17  
    8.18  #define WARMBOOT_TARGET		0
    8.19  #define WARMBOOT_OFF		(KERNBASE + 0x0467)
    8.20 @@ -94,6 +103,10 @@
    8.21  #define BIOS_RESET		(0x0f)
    8.22  #define BIOS_WARM		(0x0a)
    8.23  
    8.24 +
    8.25 +#undef POSTCODE
    8.26 +#define POSTCODE(x)
    8.27 +
    8.28  /*
    8.29   * this code MUST be enabled here and in mpboot.s.
    8.30   * it follows the very early stages of AP boot by placing values in CMOS ram.
    8.31 @@ -175,6 +188,8 @@ extern pt_entry_t *KPTphys;
    8.32  /* SMP page table page */
    8.33  extern pt_entry_t *SMPpt;
    8.34  
    8.35 +extern trap_info_t trap_table[];
    8.36 +
    8.37  struct pcb stoppcbs[MAXCPU];
    8.38  
    8.39  /* Variables needed for SMP tlb shootdown. */
    8.40 @@ -208,7 +223,9 @@ static u_int boot_address;
    8.41  
    8.42  static void	set_logical_apic_ids(void);
    8.43  static int	start_all_aps(void);
    8.44 +#if 0
    8.45  static void	install_ap_tramp(void);
    8.46 +#endif
    8.47  static int	start_ap(int apic_id);
    8.48  static void	release_aps(void *dummy);
    8.49  
    8.50 @@ -314,6 +331,7 @@ int
    8.51  cpu_mp_probe(void)
    8.52  {
    8.53  
    8.54 +	mp_ncpus = HYPERVISOR_shared_info->n_vcpu;
    8.55  	/*
    8.56  	 * Always record BSP in CPU map so that the mbuf init code works
    8.57  	 * correctly.
    8.58 @@ -342,20 +360,24 @@ cpu_mp_probe(void)
    8.59  	return (1);
    8.60  }
    8.61  
    8.62 -/*
    8.63 - * Initialize the IPI handlers and start up the AP's.
    8.64 - */
    8.65 -void
    8.66 -cpu_mp_start(void)
    8.67 +static void
    8.68 +cpu_mp_ipi_init(void)
    8.69  {
    8.70 -	int i;
    8.71 -
    8.72 -	POSTCODE(MP_START_POST);
    8.73 -
    8.74 -	/* Initialize the logical ID to APIC ID table. */
    8.75 -	for (i = 0; i < MAXCPU; i++)
    8.76 -		cpu_apic_ids[i] = -1;
    8.77 -
    8.78 +	int irq;
    8.79 +	int cpu = smp_processor_id();
    8.80 +	/* 
    8.81 +	 * these are not needed by XenFreeBSD - from Keir:
    8.82 +	 * For TLB-flush related IPIs, Xen has hypercalls 
    8.83 +	 * you should use instead. You can pass a pointer 
    8.84 +	 * to a vcpu bitmap to update_va_mapping(), and to
    8.85 +	 * MMUEXT_flush_tlb_multi and MMEXT_invlpg_multi. 
    8.86 +	 * Xen will then make sure that those vcpus get 
    8.87 +	 * flushed appropriately before returning to the
    8.88 +	 * caller.
    8.89 +	 * There is also no indication that we need to forward
    8.90 +	 * clock interrupts.
    8.91 +	 */
    8.92 +#if 0 
    8.93  	/* Install an inter-CPU IPI for TLB invalidation */
    8.94  	setidt(IPI_INVLTLB, IDTVEC(invltlb),
    8.95  	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
    8.96 @@ -371,22 +393,69 @@ cpu_mp_start(void)
    8.97  	/* Install an inter-CPU IPI for forwarding statclock() */
    8.98  	setidt(IPI_STATCLOCK, IDTVEC(statclock),
    8.99  	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
   8.100 -	
   8.101 +#endif
   8.102 +
   8.103 +	/* 
   8.104 +	 * These can all be consolidated. For now leaving 
   8.105 +	 * as individual IPIs.
   8.106 +	 *
   8.107 +	 */
   8.108 +#if 0
   8.109  	/* Install an inter-CPU IPI for lazy pmap release */
   8.110  	setidt(IPI_LAZYPMAP, IDTVEC(lazypmap),
   8.111  	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
   8.112 +#else
   8.113 +	irq = bind_ipi_on_cpu_to_irq(cpu, IPI_LAZYPMAP);
   8.114 +	PCPU_SET(lazypmap, irq);
   8.115 +	PANIC_IF(intr_add_handler("pmap_lazyfix", irq, 
   8.116 +				  (driver_intr_t *)pmap_lazyfix_action, 
   8.117 +				  NULL, INTR_TYPE_CLK | INTR_FAST, NULL));
   8.118 +#endif
   8.119  
   8.120 +#if 0
   8.121  	/* Install an inter-CPU IPI for all-CPU rendezvous */
   8.122  	setidt(IPI_RENDEZVOUS, IDTVEC(rendezvous),
   8.123  	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
   8.124 +#else 
   8.125 +	irq = bind_ipi_on_cpu_to_irq(cpu, IPI_RENDEZVOUS);
   8.126 +	PCPU_SET(rendezvous, irq);
   8.127 +	PANIC_IF(intr_add_handler("smp_rendezvous", irq, 
   8.128 +				  (driver_intr_t *)smp_rendezvous_action, 
   8.129 +				  NULL, INTR_TYPE_CLK | INTR_FAST, NULL));
   8.130 +#endif
   8.131  
   8.132 +#if 0
   8.133  	/* Install an inter-CPU IPI for forcing an additional software trap */
   8.134  	setidt(IPI_AST, IDTVEC(cpuast),
   8.135  	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
   8.136 -
   8.137 +#else
   8.138 +	irq = bind_ipi_on_cpu_to_irq(cpu, IPI_AST);
   8.139 +	PCPU_SET(cpuast, irq);
   8.140 +#endif
   8.141 +	/* XXX ignore for now */
   8.142 +#if 0 
   8.143  	/* Install an inter-CPU IPI for CPU stop/restart */
   8.144  	setidt(IPI_STOP, IDTVEC(cpustop),
   8.145  	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
   8.146 +#endif
   8.147 +
   8.148 +}
   8.149 +
   8.150 +SYSINIT(ipi_setup, SI_SUB_INTR, SI_ORDER_ANY, cpu_mp_ipi_init, NULL);
   8.151 +
   8.152 +/*
   8.153 + * Initialize the IPI handlers and start up the AP's.
   8.154 + */
   8.155 +void
   8.156 +cpu_mp_start(void) /* --- Start here --- */
   8.157 +{
   8.158 +	int i;
   8.159 +
   8.160 +	POSTCODE(MP_START_POST);
   8.161 +
   8.162 +	/* Initialize the logical ID to APIC ID table. */
   8.163 +	for (i = 0; i < MAXCPU; i++)
   8.164 +		cpu_apic_ids[i] = -1;
   8.165  
   8.166  
   8.167  	/* Set boot_cpu_id if needed. */
   8.168 @@ -437,35 +506,44 @@ cpu_mp_announce(void)
   8.169  void
   8.170  init_secondary(void)
   8.171  {
   8.172 -	int	gsel_tss;
   8.173 -	int	x, myid;
   8.174 +	int	myid;
   8.175 +	unsigned long gdtmachpfn;
   8.176 +	printk("MADE IT!!");
   8.177 +
   8.178  #if 0
   8.179  	u_int	cr0;
   8.180  #endif
   8.181 +	/* Steps to booting SMP on xen as gleaned from XenLinux:
   8.182 +	 * - cpu_init() - processor specific initialization
   8.183 +	 * - smp_callin() 
   8.184 +	 *    - wait 2s for BP to finish its startup sequence
   8.185 +	 *    - map_cpu_to_logical_apicid()
   8.186 +	 *    - save cpuid info
   8.187 +	 *    - set bit in callin map to let master (BP?) continue
   8.188 +	 * - local setup timer() - per cpu timer initialization
   8.189 +	 * - ldebug_setup() - bind debug IRQ to local CPU.
   8.190 +	 * - smp_intr_init() - IPI setup that we do in cpu_mp_start
   8.191 +	 * - local_irq_enable() - enable interrupts locally
   8.192 +	 * - cpu_set(id, map) - announce that we're up
   8.193 +	 * - cpu_idle() - make us schedulable
   8.194 +	 */
   8.195 +
   8.196 +
   8.197  	/* bootAP is set in start_ap() to our ID. */
   8.198  	myid = bootAP;
   8.199 -	gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[myid];
   8.200 -	gdt_segs[GPROC0_SEL].ssd_base =
   8.201 -		(int) &SMP_prvspace[myid].pcpu.pc_common_tss;
   8.202 -	SMP_prvspace[myid].pcpu.pc_prvspace =
   8.203 -		&SMP_prvspace[myid].pcpu;
   8.204 +
   8.205 +	gdtmachpfn = vtomach(gdt) >> PAGE_SHIFT;
   8.206 +	PANIC_IF(HYPERVISOR_set_gdt(&gdtmachpfn, LAST_RESERVED_GDT_ENTRY + 1) != 0); 
   8.207  
   8.208 -	for (x = 0; x < NGDT; x++) {
   8.209 -		ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd);
   8.210 -	}
   8.211 +	
   8.212 +	lgdt_finish();
   8.213  
   8.214 -#if 0
   8.215 -	r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
   8.216 -	r_gdt.rd_base = (int) &gdt[myid * NGDT];
   8.217 -	lgdt(&r_gdt);			/* does magic intra-segment return */
   8.218 +	PCPU_SET(cpuid, myid);
   8.219  
   8.220 -	lidt(&r_idt);
   8.221 -	lldt(_default_ldt);
   8.222 -#endif
   8.223 +
   8.224 +	set_user_ldt((struct mdproc *)_default_ldt);
   8.225  	PCPU_SET(currentldt, _default_ldt);
   8.226  
   8.227 -	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
   8.228 -	gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;
   8.229  	PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */
   8.230  	PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
   8.231  	PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
   8.232 @@ -557,6 +635,13 @@ init_secondary(void)
   8.233  	while (smp_started == 0)
   8.234  		ia32_pause();
   8.235  
   8.236 +	/* need to wait until now to setup the IPIs as SI_SUB_CPU is
   8.237 +	 * much earlier than SI_SUB_INTR
   8.238 +	 */  
   8.239 +	ap_evtchn_init(myid);
   8.240 +	ap_cpu_initclocks();
   8.241 +	cpu_mp_ipi_init();
   8.242 +
   8.243  	/* ok, now grab sched_lock and enter the scheduler */
   8.244  	mtx_lock_spin(&sched_lock);
   8.245  
   8.246 @@ -610,28 +695,35 @@ set_logical_apic_ids(void)
   8.247  static int
   8.248  start_all_aps(void)
   8.249  {
   8.250 -#ifndef PC98
   8.251 -	u_char mpbiosreason;
   8.252 -#endif
   8.253 -	u_long mpbioswarmvec;
   8.254  	struct pcpu *pc;
   8.255  	char *stack;
   8.256 -	uintptr_t kptbase;
   8.257 -	int i, pg, apic_id, cpu;
   8.258 +	int i, apic_id, cpu;
   8.259 +
   8.260 +	/* 
   8.261 +	 * This function corresponds most closely to 
   8.262 +	 * smp_boot_cpus in XenLinux - the sequence there 
   8.263 +	 * is:
   8.264 +	 * - check if SMP config is found - if not:
   8.265 +	 *     - clear the I/O APIC IRQs
   8.266 +	 *     - map cpu to logical apicid
   8.267 +	 *     - exit
   8.268 +	 * - smp_intr_init - IPI initialization
   8.269 +	 * - map cpu to logical apicid
   8.270 +	 * - boot each of the vcpus
   8.271 +	 * - clear and then construct the cpu sibling [logical CPUs] map.
   8.272 +	 *
   8.273 +	 */
   8.274  
   8.275  	POSTCODE(START_ALL_APS_POST);
   8.276  
   8.277  	mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
   8.278 -
   8.279 +#if 0
   8.280  	/* install the AP 1st level boot code */
   8.281  	install_ap_tramp();
   8.282  
   8.283  	/* save the current value of the warm-start vector */
   8.284  	mpbioswarmvec = *((u_long *) WARMBOOT_OFF);
   8.285 -#ifndef PC98
   8.286 -	outb(CMOS_REG, BIOS_RESET);
   8.287 -	mpbiosreason = inb(CMOS_DATA);
   8.288 -#endif
   8.289 +
   8.290  
   8.291  	/* set up temporary P==V mapping for AP boot */
   8.292  	/* XXX this is a hack, we should boot the AP on its own stack/PTD */
   8.293 @@ -640,7 +732,7 @@ start_all_aps(void)
   8.294  		PTD[i] = (pd_entry_t)(PG_V | PG_RW |
   8.295  		    ((kptbase + i * PAGE_SIZE) & PG_FRAME));
   8.296  	invltlb();
   8.297 -
   8.298 +#endif
   8.299  	/* start each AP */
   8.300  	for (cpu = 0, apic_id = 0; apic_id < MAXCPU; apic_id++) {
   8.301  		if (!cpu_info[apic_id].cpu_present ||
   8.302 @@ -650,7 +742,7 @@ start_all_aps(void)
   8.303  
   8.304  		/* save APIC ID for this logical ID */
   8.305  		cpu_apic_ids[cpu] = apic_id;
   8.306 -
   8.307 +#if 0
   8.308  		/* first page of AP's private space */
   8.309  		pg = cpu * i386_btop(sizeof(struct privatespace));
   8.310  
   8.311 @@ -665,11 +757,14 @@ start_all_aps(void)
   8.312  		for (i = 0; i < KSTACK_PAGES; i++)
   8.313  			SMPpt[pg + 1 + i] = (pt_entry_t)
   8.314  			    (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
   8.315 +#endif
   8.316 +		pc = &SMP_prvspace[cpu].pcpu;
   8.317  
   8.318  		/* prime data page for it to use */
   8.319  		pcpu_init(pc, cpu, sizeof(struct pcpu));
   8.320  		pc->pc_apic_id = apic_id;
   8.321  
   8.322 +#if 0
   8.323  		/* setup a vector to our boot code */
   8.324  		*((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
   8.325  		*((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4);
   8.326 @@ -677,7 +772,7 @@ start_all_aps(void)
   8.327  		outb(CMOS_REG, BIOS_RESET);
   8.328  		outb(CMOS_DATA, BIOS_WARM);	/* 'warm-start' */
   8.329  #endif
   8.330 -
   8.331 +#endif
   8.332  		bootSTK = &SMP_prvspace[cpu].idlekstack[KSTACK_PAGES *
   8.333  		    PAGE_SIZE];
   8.334  		bootAP = cpu;
   8.335 @@ -700,13 +795,10 @@ start_all_aps(void)
   8.336  	/* build our map of 'other' CPUs */
   8.337  	PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
   8.338  
   8.339 +#if 0
   8.340  	/* restore the warmstart vector */
   8.341  	*(u_long *) WARMBOOT_OFF = mpbioswarmvec;
   8.342 -#ifndef PC98
   8.343 -	outb(CMOS_REG, BIOS_RESET);
   8.344 -	outb(CMOS_DATA, mpbiosreason);
   8.345  #endif
   8.346 -
   8.347  	/*
   8.348  	 * Set up the idle context for the BSP.  Similar to above except
   8.349  	 * that some was done by locore, some by pmap.c and some is implicit
   8.350 @@ -739,7 +831,7 @@ extern void bootDataSeg(void);
   8.351  extern void MPentry(void);
   8.352  extern u_int MP_GDT;
   8.353  extern u_int mp_gdtbase;
   8.354 -
   8.355 +#if 0
   8.356  static void
   8.357  install_ap_tramp(void)
   8.358  {
   8.359 @@ -791,6 +883,21 @@ install_ap_tramp(void)
   8.360  	*dst16 = (u_int) boot_address & 0xffff;
   8.361  	*dst8 = ((u_int) boot_address >> 16) & 0xff;
   8.362  }
   8.363 +#endif
   8.364 +
   8.365 +static int 
   8.366 +cpu_mp_trap_init(trap_info_t *trap_ctxt)
   8.367 +{
   8.368 +
   8.369 +        trap_info_t *t = trap_table;
   8.370 +
   8.371 +        for (t = trap_table; t->address; t++) {
   8.372 +                trap_ctxt[t->vector].flags = t->flags;
   8.373 +                trap_ctxt[t->vector].cs = t->cs;
   8.374 +                trap_ctxt[t->vector].address = t->address;
   8.375 +        }
   8.376 +        return 0x80 /*SYSCALL_VECTOR*/;
   8.377 +}
   8.378  
   8.379  /*
   8.380   * This function starts the AP (application processor) identified
   8.381 @@ -802,8 +909,25 @@ install_ap_tramp(void)
   8.382  static int
   8.383  start_ap(int apic_id)
   8.384  {
   8.385 -	int vector, ms;
   8.386 -	int cpus;
   8.387 +	int vector, ms, i;
   8.388 +	int cpus, boot_error;
   8.389 +	vcpu_guest_context_t ctxt;
   8.390 +
   8.391 +	/* 
   8.392 +	 * This is the FreeBSD equivalent to do_boot_cpu(apicid) in
   8.393 +	 * smpboot.c. 
   8.394 +	 * its initialization sequence consists of:
   8.395 +	 * - fork_idle(cpu) to create separate idle context
   8.396 +	 * - initialization of idle's context to start_secondary
   8.397 +	 * - initialization of cpu ctxt to start in startup_32_smp
   8.398 +	 * - then we call HYPERVISOR_boot_vcpu with the cpu index and
   8.399 +	 *   a pointer to the context.
   8.400 +	 * - on boot success we:
   8.401 +	 *   - set ourselves in the callout_map
   8.402 +	 *   - wait up to 5 seconds for us to be set in the callin map
   8.403 +	 * - set x86_cpu_to_apicid[cpu] = apicid;
   8.404 +	 *
   8.405 +	 */
   8.406  
   8.407  	POSTCODE(START_AP_POST);
   8.408  
   8.409 @@ -813,6 +937,55 @@ start_ap(int apic_id)
   8.410  	/* used as a watchpoint to signal AP startup */
   8.411  	cpus = mp_naps;
   8.412  
   8.413 +	memset(&ctxt, 0, sizeof(ctxt));
   8.414 +
   8.415 +	ctxt.user_regs.ds = GSEL(GDATA_SEL, SEL_KPL);
   8.416 +	ctxt.user_regs.es = GSEL(GDATA_SEL, SEL_KPL);
   8.417 +	ctxt.user_regs.fs = 0;
   8.418 +	ctxt.user_regs.gs = 0;
   8.419 +	ctxt.user_regs.ss = __KERNEL_DS;
   8.420 +	ctxt.user_regs.cs = __KERNEL_CS;
   8.421 +	ctxt.user_regs.eip = (unsigned long)init_secondary;
   8.422 +	ctxt.user_regs.esp = (unsigned long)bootSTK;
   8.423 +#ifdef notyet
   8.424 +	ctxt.user_regs.eflags = (1<<9) | (1<<2) | (idle->thread.io_pl<<12);
   8.425 +#else
   8.426 +	ctxt.user_regs.eflags = (1<<9) | (1<<2);
   8.427 +#endif
   8.428 +	/* FPU is set up to default initial state. */
   8.429 +	memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt));
   8.430 +
   8.431 +	/* Virtual IDT is empty at start-of-day. */
   8.432 +	for ( i = 0; i < 256; i++ )
   8.433 +	{
   8.434 +		ctxt.trap_ctxt[i].vector = i;
   8.435 +		ctxt.trap_ctxt[i].cs     = FLAT_KERNEL_CS;
   8.436 +	}
   8.437 +	ctxt.fast_trap_idx = cpu_mp_trap_init(ctxt.trap_ctxt);
   8.438 +
   8.439 +	/* No LDT. */
   8.440 +	ctxt.ldt_ents = 0;
   8.441 +
   8.442 +	/* Ring 1 stack is the initial stack. */
   8.443 +	ctxt.kernel_ss = __KERNEL_DS;
   8.444 +	ctxt.kernel_sp = (unsigned long)bootSTK;
   8.445 +
   8.446 +	/* Callback handlers. */
   8.447 +	ctxt.event_callback_cs     = __KERNEL_CS;
   8.448 +	ctxt.event_callback_eip    = (unsigned long)Xhypervisor_callback;
   8.449 +	ctxt.failsafe_callback_cs  = __KERNEL_CS;
   8.450 +	ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
   8.451 +
   8.452 +	ctxt.pt_base = (vm_paddr_t)IdlePTD;
   8.453 +
   8.454 +	boot_error = HYPERVISOR_boot_vcpu(bootAP, &ctxt);
   8.455 +
   8.456 +	
   8.457 +	if (boot_error) 
   8.458 +		printk("Houston we have a problem\n");
   8.459 +	else
   8.460 +		printk("boot_vcpu succeeded\n");
   8.461 +#if 0
   8.462  	/*
   8.463  	 * first we do an INIT/RESET IPI this INIT IPI might be run, reseting
   8.464  	 * and running the target CPU. OR this INIT IPI might be latched (P5
   8.465 @@ -862,6 +1035,7 @@ start_ap(int apic_id)
   8.466  	    APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
   8.467  	    vector, apic_id);
   8.468  	lapic_ipi_wait(-1);
   8.469 +#endif
   8.470  	DELAY(200);		/* wait ~200uS */
   8.471  
   8.472  	/* Wait up to 5 seconds for it to start. */
     9.1 --- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/pmap.c	Tue May 03 14:42:00 2005 +0000
     9.2 +++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/pmap.c	Tue May 10 08:49:59 2005 +0000
     9.3 @@ -1374,7 +1374,6 @@ static u_int *lazymask;
     9.4  static u_int lazyptd;
     9.5  static volatile u_int lazywait;
     9.6  
     9.7 -void pmap_lazyfix_action(void);
     9.8  
     9.9  void
    9.10  pmap_lazyfix_action(void)
    10.1 --- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_machdep.c	Tue May 03 14:42:00 2005 +0000
    10.2 +++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_machdep.c	Tue May 10 08:49:59 2005 +0000
    10.3 @@ -380,8 +380,6 @@ printk(const char *fmt, ...)
    10.4          (void)HYPERVISOR_console_write(buf, ret);
    10.5  }
    10.6  
    10.7 -#define PANIC_IF(exp) if (unlikely(exp)) {printk("%s failed\n",#exp); panic("%s: %s:%d", #exp, __FILE__, __LINE__);} 
    10.8 -
    10.9  
   10.10  #define XPQUEUE_SIZE 128
   10.11  #ifdef SMP
    11.1 --- a/freebsd-5.3-xen-sparse/i386-xen/include/hypervisor.h	Tue May 03 14:42:00 2005 +0000
    11.2 +++ b/freebsd-5.3-xen-sparse/i386-xen/include/hypervisor.h	Tue May 10 08:49:59 2005 +0000
    11.3 @@ -441,4 +441,20 @@ HYPERVISOR_vm_assist(unsigned int cmd, u
    11.4      return ret;
    11.5  }
    11.6  
    11.7 +static inline int
    11.8 +HYPERVISOR_boot_vcpu(
    11.9 +    unsigned long vcpu, vcpu_guest_context_t *ctxt)
   11.10 +{
   11.11 +    int ret;
   11.12 +    unsigned long ign1, ign2;
   11.13 +
   11.14 +    __asm__ __volatile__ (
   11.15 +        TRAP_INSTR
   11.16 +        : "=a" (ret), "=b" (ign1), "=c" (ign2)
   11.17 +	: "0" (__HYPERVISOR_boot_vcpu), "1" (vcpu), "2" (ctxt)
   11.18 +	: "memory");
   11.19 +
   11.20 +    return ret;
   11.21 +}
   11.22 +
   11.23  #endif /* __HYPERVISOR_H__ */
    12.1 --- a/freebsd-5.3-xen-sparse/i386-xen/include/pcpu.h	Tue May 03 14:42:00 2005 +0000
    12.2 +++ b/freebsd-5.3-xen-sparse/i386-xen/include/pcpu.h	Tue May 10 08:49:59 2005 +0000
    12.3 @@ -53,7 +53,12 @@
    12.4          int     *pc_ipi_to_evtchn;                                      \
    12.5          int     *pc_virq_to_irq;                                        \
    12.6          u_int   pc_cr2;                                                 \
    12.7 -        u_int   pc_pdir                                        
    12.8 +        u_int   pc_pdir;                                                \
    12.9 +        u_int   pc_lazypmap;                                            \
   12.10 +        u_int   pc_rendezvous;                                          \
   12.11 +        u_int   pc_cpuast;                                              \
   12.12 +        u_int   pc_time_irq;                                              \
   12.13 +        uint64_t pc_processed_system_time;  
   12.14  
   12.15  #if defined(lint)
   12.16   
    13.1 --- a/freebsd-5.3-xen-sparse/i386-xen/include/pmap.h	Tue May 03 14:42:00 2005 +0000
    13.2 +++ b/freebsd-5.3-xen-sparse/i386-xen/include/pmap.h	Tue May 10 08:49:59 2005 +0000
    13.3 @@ -343,6 +343,7 @@ void	pmap_set_pg(void);
    13.4  void	pmap_invalidate_page(pmap_t, vm_offset_t);
    13.5  void	pmap_invalidate_range(pmap_t, vm_offset_t, vm_offset_t);
    13.6  void	pmap_invalidate_all(pmap_t);
    13.7 +void    pmap_lazyfix_action(void);
    13.8  
    13.9  void pmap_map_readonly(pmap_t pmap, vm_offset_t va, int len);
   13.10  void pmap_map_readwrite(pmap_t pmap, vm_offset_t va, int len);
    14.1 --- a/freebsd-5.3-xen-sparse/i386-xen/include/xenfunc.h	Tue May 03 14:42:00 2005 +0000
    14.2 +++ b/freebsd-5.3-xen-sparse/i386-xen/include/xenfunc.h	Tue May 10 08:49:59 2005 +0000
    14.3 @@ -61,6 +61,9 @@ void load_cr3(uint32_t val);
    14.4  void xen_machphys_update(unsigned long, unsigned long);
    14.5  void xen_update_descriptor(union descriptor *, union descriptor *);
    14.6  void lldt(u_short sel);
    14.7 +void ap_cpu_initclocks(void);
    14.8 +
    14.9 +
   14.10  /*
   14.11   * Invalidate a patricular VA on all cpus
   14.12   *
   14.13 @@ -79,5 +82,6 @@ invltlb(void)
   14.14  	
   14.15  }
   14.16  
   14.17 +#define PANIC_IF(exp) if (unlikely(exp)) {printk("%s failed\n",#exp); panic("%s: %s:%d", #exp, __FILE__, __LINE__);} 
   14.18  
   14.19  #endif /* _XEN_XENFUNC_H_ */
    15.1 --- a/linux-2.4.29-xen-sparse/mkbuildtree	Tue May 03 14:42:00 2005 +0000
    15.2 +++ b/linux-2.4.29-xen-sparse/mkbuildtree	Tue May 10 08:49:59 2005 +0000
    15.3 @@ -66,7 +66,7 @@ relative_lndir ()
    15.4      (
    15.5      cd $i
    15.6      pref=`echo $i | sed -e 's#/[^/]*#../#g' -e 's#^\.##'`
    15.7 -    for j in `find . -type f -o -type l -maxdepth 1`; do
    15.8 +    for j in `find . -maxdepth 1 -type f -o -type l`; do
    15.9        ln -sf ${pref}${REAL_DIR}/$i/$j ${SYMLINK_DIR}/$i/$j
   15.10      done
   15.11      )
   15.12 @@ -232,7 +232,6 @@ ln -sf ../../../${LINUX_26}/arch/xen/ker
   15.13  ln -sf ../../../${LINUX_26}/arch/xen/kernel/gnttab.c
   15.14  ln -sf ../../../${LINUX_26}/arch/xen/kernel/reboot.c
   15.15  ln -sf ../../../${LINUX_26}/arch/xen/kernel/skbuff.c
   15.16 -ln -sf ../../../${LINUX_26}/arch/xen/i386/kernel/pci-dma.c
   15.17  
   15.18  cd ${AD}/arch/xen/lib
   15.19  ln -sf ../../i386/lib/checksum.S 
    16.1 --- a/linux-2.6.11-xen-sparse/arch/xen/Kconfig.drivers	Tue May 03 14:42:00 2005 +0000
    16.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/Kconfig.drivers	Tue May 10 08:49:59 2005 +0000
    16.3 @@ -49,9 +49,46 @@ source "drivers/infiniband/Kconfig"
    16.4  endif
    16.5  
    16.6  if !XEN_PHYSDEV_ACCESS
    16.7 +
    16.8 +menu "Character devices"
    16.9 +
   16.10  config UNIX98_PTYS
   16.11  	bool
   16.12  	default y
   16.13 +
   16.14 +config LEGACY_PTYS
   16.15 +	bool "Legacy (BSD) PTY support"
   16.16 +	default y
   16.17 +	---help---
   16.18 +	  A pseudo terminal (PTY) is a software device consisting of two
   16.19 +	  halves: a master and a slave. The slave device behaves identical to
   16.20 +	  a physical terminal; the master device is used by a process to
   16.21 +	  read data from and write data to the slave, thereby emulating a
   16.22 +	  terminal. Typical programs for the master side are telnet servers
   16.23 +	  and xterms.
   16.24 +
   16.25 +	  Linux has traditionally used the BSD-like names /dev/ptyxx
   16.26 +	  for masters and /dev/ttyxx for slaves of pseudo
   16.27 +	  terminals. This scheme has a number of problems, including
   16.28 +	  security.  This option enables these legacy devices; on most
   16.29 +	  systems, it is safe to say N.
   16.30 +
   16.31 +
   16.32 +config LEGACY_PTY_COUNT
   16.33 +	int "Maximum number of legacy PTY in use"
   16.34 +	depends on LEGACY_PTYS
   16.35 +	range 1 256
   16.36 +	default "256"
   16.37 +	---help---
   16.38 +	  The maximum number of legacy PTYs that can be used at any one time.
   16.39 +	  The default is 256, and should be more than enough.  Embedded
   16.40 +	  systems may want to reduce this to save memory.
   16.41 +
   16.42 +	  When not in use, each legacy PTY occupies 12 bytes on 32-bit
   16.43 +	  architectures and 24 bytes on 64-bit architectures.
   16.44 +
   16.45 +endmenu
   16.46 +
   16.47  endif
   16.48  
   16.49  endmenu
    17.1 --- a/linux-2.6.11-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32	Tue May 03 14:42:00 2005 +0000
    17.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32	Tue May 10 08:49:59 2005 +0000
    17.3 @@ -1,7 +1,7 @@
    17.4  #
    17.5  # Automatically generated make config: don't edit
    17.6  # Linux kernel version: 2.6.11-xen0
    17.7 -# Tue May  3 13:25:48 2005
    17.8 +# Wed May  4 17:11:56 2005
    17.9  #
   17.10  CONFIG_XEN=y
   17.11  CONFIG_ARCH_XEN=y
   17.12 @@ -146,7 +146,7 @@ CONFIG_PCI=y
   17.13  # CONFIG_PCI_GOMMCONFIG is not set
   17.14  # CONFIG_PCI_GODIRECT is not set
   17.15  CONFIG_PCI_GOANY=y
   17.16 -CONFIG_PCI_BIOS=y
   17.17 +# CONFIG_PCI_BIOS is not set
   17.18  CONFIG_PCI_DIRECT=y
   17.19  # CONFIG_PCIEPORTBUS is not set
   17.20  # CONFIG_PCI_MSI is not set
   17.21 @@ -176,16 +176,22 @@ CONFIG_PCMCIA_PROBE=y
   17.22  # Kernel hacking
   17.23  #
   17.24  CONFIG_DEBUG_KERNEL=y
   17.25 +CONFIG_MAGIC_SYSRQ=y
   17.26 +# CONFIG_SCHEDSTATS is not set
   17.27 +# CONFIG_DEBUG_SLAB is not set
   17.28 +# CONFIG_DEBUG_PREEMPT is not set
   17.29 +# CONFIG_DEBUG_SPINLOCK is not set
   17.30 +# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
   17.31 +# CONFIG_DEBUG_KOBJECT is not set
   17.32 +CONFIG_DEBUG_BUGVERBOSE=y
   17.33 +# CONFIG_DEBUG_INFO is not set
   17.34 +# CONFIG_DEBUG_FS is not set
   17.35 +# CONFIG_FRAME_POINTER is not set
   17.36  CONFIG_EARLY_PRINTK=y
   17.37  # CONFIG_DEBUG_STACKOVERFLOW is not set
   17.38 +# CONFIG_KPROBES is not set
   17.39  # CONFIG_DEBUG_STACK_USAGE is not set
   17.40 -# CONFIG_DEBUG_SLAB is not set
   17.41 -CONFIG_MAGIC_SYSRQ=y
   17.42 -# CONFIG_DEBUG_SPINLOCK is not set
   17.43  # CONFIG_DEBUG_PAGEALLOC is not set
   17.44 -# CONFIG_DEBUG_INFO is not set
   17.45 -# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
   17.46 -# CONFIG_FRAME_POINTER is not set
   17.47  # CONFIG_4KSTACKS is not set
   17.48  CONFIG_X86_FIND_SMP_CONFIG=y
   17.49  CONFIG_X86_MPPARSE=y
    18.1 --- a/linux-2.6.11-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32	Tue May 03 14:42:00 2005 +0000
    18.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32	Tue May 10 08:49:59 2005 +0000
    18.3 @@ -1,7 +1,7 @@
    18.4  #
    18.5  # Automatically generated make config: don't edit
    18.6  # Linux kernel version: 2.6.11-xenU
    18.7 -# Wed Apr  6 09:20:09 2005
    18.8 +# Wed May  4 17:14:10 2005
    18.9  #
   18.10  CONFIG_XEN=y
   18.11  CONFIG_ARCH_XEN=y
   18.12 @@ -132,16 +132,22 @@ CONFIG_HAVE_DEC_LOCK=y
   18.13  # Kernel hacking
   18.14  #
   18.15  CONFIG_DEBUG_KERNEL=y
   18.16 +CONFIG_MAGIC_SYSRQ=y
   18.17 +# CONFIG_SCHEDSTATS is not set
   18.18 +# CONFIG_DEBUG_SLAB is not set
   18.19 +# CONFIG_DEBUG_PREEMPT is not set
   18.20 +# CONFIG_DEBUG_SPINLOCK is not set
   18.21 +# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
   18.22 +# CONFIG_DEBUG_KOBJECT is not set
   18.23 +CONFIG_DEBUG_BUGVERBOSE=y
   18.24 +# CONFIG_DEBUG_INFO is not set
   18.25 +# CONFIG_DEBUG_FS is not set
   18.26 +# CONFIG_FRAME_POINTER is not set
   18.27  CONFIG_EARLY_PRINTK=y
   18.28  # CONFIG_DEBUG_STACKOVERFLOW is not set
   18.29 +# CONFIG_KPROBES is not set
   18.30  # CONFIG_DEBUG_STACK_USAGE is not set
   18.31 -# CONFIG_DEBUG_SLAB is not set
   18.32 -CONFIG_MAGIC_SYSRQ=y
   18.33 -# CONFIG_DEBUG_SPINLOCK is not set
   18.34  # CONFIG_DEBUG_PAGEALLOC is not set
   18.35 -# CONFIG_DEBUG_INFO is not set
   18.36 -# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
   18.37 -# CONFIG_FRAME_POINTER is not set
   18.38  # CONFIG_4KSTACKS is not set
   18.39  CONFIG_GENERIC_HARDIRQS=y
   18.40  CONFIG_GENERIC_IRQ_PROBE=y
   18.41 @@ -334,6 +340,12 @@ CONFIG_NETDEVICES=y
   18.42  # CONFIG_SHAPER is not set
   18.43  # CONFIG_NETCONSOLE is not set
   18.44  CONFIG_UNIX98_PTYS=y
   18.45 +CONFIG_LEGACY_PTYS=y
   18.46 +CONFIG_LEGACY_PTY_COUNT=256
   18.47 +
   18.48 +#
   18.49 +# Character devices
   18.50 +#
   18.51  
   18.52  #
   18.53  # File systems
    19.1 --- a/linux-2.6.11-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64	Tue May 03 14:42:00 2005 +0000
    19.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64	Tue May 10 08:49:59 2005 +0000
    19.3 @@ -316,6 +316,12 @@ CONFIG_NETDEVICES=y
    19.4  # CONFIG_SHAPER is not set
    19.5  # CONFIG_NETCONSOLE is not set
    19.6  CONFIG_UNIX98_PTYS=y
    19.7 +CONFIG_LEGACY_PTYS=y
    19.8 +CONFIG_LEGACY_PTY_COUNT=256
    19.9 +
   19.10 +#
   19.11 +# Character devices
   19.12 +#
   19.13  
   19.14  #
   19.15  # File systems
    20.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/Kconfig	Tue May 03 14:42:00 2005 +0000
    20.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/Kconfig	Tue May 10 08:49:59 2005 +0000
    20.3 @@ -12,6 +12,11 @@ config XENARCH
    20.4  config X86
    20.5  	bool
    20.6  	default y
    20.7 +	help
    20.8 +	  This is Linux's home port.  Linux was originally native to the Intel
    20.9 +	  386, and runs on all the later x86 processors including the Intel
   20.10 +	  486, 586, Pentiums, and various instruction-set-compatible chips by
   20.11 +	  AMD, Cyrix, and others.
   20.12  
   20.13  config MMU
   20.14  	bool
   20.15 @@ -782,13 +787,19 @@ endchoice
   20.16  config PCI_BIOS
   20.17  	bool
   20.18  	depends on !X86_VISWS && PCI && (PCI_GOBIOS || PCI_GOANY)
   20.19 -	default n
   20.20 +	default y
   20.21  
   20.22  config PCI_DIRECT
   20.23  	bool
   20.24   	depends on PCI && ((PCI_GODIRECT || PCI_GOANY) || X86_VISWS)
   20.25  	default y
   20.26  
   20.27 +config PCI_MMCONFIG
   20.28 +	bool
   20.29 +	depends on PCI && (PCI_GOMMCONFIG || (PCI_GOANY && ACPI))
   20.30 +	select ACPI_BOOT
   20.31 +	default y
   20.32 +
   20.33  source "drivers/pci/pcie/Kconfig"
   20.34  
   20.35  source "drivers/pci/Kconfig"
   20.36 @@ -856,129 +867,7 @@ endmenu
   20.37  
   20.38  endif
   20.39  
   20.40 -menu "Kernel hacking"
   20.41 -
   20.42 -config DEBUG_KERNEL
   20.43 -	bool "Kernel debugging"
   20.44 -	help
   20.45 -	  Say Y here if you are developing drivers or trying to debug and
   20.46 -	  identify kernel problems.
   20.47 -
   20.48 -config EARLY_PRINTK
   20.49 -	bool "Early printk" if EMBEDDED
   20.50 -	default y
   20.51 -	help
   20.52 -	  Write kernel log output directly into the VGA buffer or to a serial
   20.53 -	  port.
   20.54 -
   20.55 -	  This is useful for kernel debugging when your machine crashes very
   20.56 -	  early before the console code is initialized. For normal operation
   20.57 -	  it is not recommended because it looks ugly and doesn't cooperate
   20.58 -	  with klogd/syslogd or the X server. You should normally N here,
   20.59 -	  unless you want to debug such a crash.
   20.60 -
   20.61 -config DEBUG_STACKOVERFLOW
   20.62 -	bool "Check for stack overflows"
   20.63 -	depends on DEBUG_KERNEL
   20.64 -
   20.65 -config DEBUG_STACK_USAGE
   20.66 -	bool "Stack utilization instrumentation"
   20.67 -	depends on DEBUG_KERNEL
   20.68 -	help
   20.69 -	  Enables the display of the minimum amount of free stack which each
   20.70 -	  task has ever had available in the sysrq-T and sysrq-P debug output.
   20.71 -
   20.72 -	  This option will slow down process creation somewhat.
   20.73 -
   20.74 -config DEBUG_SLAB
   20.75 -	bool "Debug memory allocations"
   20.76 -	depends on DEBUG_KERNEL
   20.77 -	help
   20.78 -	  Say Y here to have the kernel do limited verification on memory
   20.79 -	  allocation as well as poisoning memory on free to catch use of freed
   20.80 -	  memory.
   20.81 -
   20.82 -config MAGIC_SYSRQ
   20.83 -	bool "Magic SysRq key"
   20.84 -	depends on DEBUG_KERNEL
   20.85 -	help
   20.86 -	  If you say Y here, you will have some control over the system even
   20.87 -	  if the system crashes for example during kernel debugging (e.g., you
   20.88 -	  will be able to flush the buffer cache to disk, reboot the system
   20.89 -	  immediately or dump some status information). This is accomplished
   20.90 -	  by pressing various keys while holding SysRq (Alt+PrintScreen). It
   20.91 -	  also works on a serial console (on PC hardware at least), if you
   20.92 -	  send a BREAK and then within 5 seconds a command keypress. The
   20.93 -	  keys are documented in <file:Documentation/sysrq.txt>. Don't say Y
   20.94 -	  unless you really know what this hack does.
   20.95 -
   20.96 -config DEBUG_SPINLOCK
   20.97 -	bool "Spinlock debugging"
   20.98 -	depends on DEBUG_KERNEL
   20.99 -	help
  20.100 -	  Say Y here and build SMP to catch missing spinlock initialization
  20.101 -	  and certain other kinds of spinlock errors commonly made.  This is
  20.102 -	  best used in conjunction with the NMI watchdog so that spinlock
  20.103 -	  deadlocks are also debuggable.
  20.104 -
  20.105 -config DEBUG_PAGEALLOC
  20.106 -	bool "Page alloc debugging"
  20.107 -	depends on DEBUG_KERNEL
  20.108 -	help
  20.109 -	  Unmap pages from the kernel linear mapping after free_pages().
  20.110 -	  This results in a large slowdown, but helps to find certain types
  20.111 -	  of memory corruptions.
  20.112 -
  20.113 -config DEBUG_HIGHMEM
  20.114 -	bool "Highmem debugging"
  20.115 -	depends on DEBUG_KERNEL && HIGHMEM
  20.116 -	help
  20.117 -	  This options enables addition error checking for high memory systems.
  20.118 -	  Disable for production systems.
  20.119 -
  20.120 -config DEBUG_INFO
  20.121 -	bool "Compile the kernel with debug info"
  20.122 -	depends on DEBUG_KERNEL
  20.123 -	help
  20.124 -          If you say Y here the resulting kernel image will include
  20.125 -	  debugging info resulting in a larger kernel image.
  20.126 -	  Say Y here only if you plan to use gdb to debug the kernel.
  20.127 -	  If you don't debug the kernel, you can say N.
  20.128 -	  
  20.129 -config DEBUG_SPINLOCK_SLEEP
  20.130 -	bool "Sleep-inside-spinlock checking"
  20.131 -	help
  20.132 -	  If you say Y here, various routines which may sleep will become very
  20.133 -	  noisy if they are called with a spinlock held.	
  20.134 -
  20.135 -config FRAME_POINTER
  20.136 -	bool "Compile the kernel with frame pointers"
  20.137 -	help
  20.138 -	  If you say Y here the resulting kernel image will be slightly larger
  20.139 -	  and slower, but it will give very useful debugging information.
  20.140 -	  If you don't debug the kernel, you can say N, but we may not be able
  20.141 -	  to solve problems without frame pointers.
  20.142 -
  20.143 -config 4KSTACKS
  20.144 -	bool "Use 4Kb for kernel stacks instead of 8Kb"
  20.145 -	help
  20.146 -	  If you say Y here the kernel will use a 4Kb stacksize for the
  20.147 -	  kernel stack attached to each process/thread. This facilitates
  20.148 -	  running more threads on a system and also reduces the pressure
  20.149 -	  on the VM subsystem for higher order allocations. This option
  20.150 -	  will also use IRQ stacks to compensate for the reduced stackspace.
  20.151 -
  20.152 -config X86_FIND_SMP_CONFIG
  20.153 -	bool
  20.154 -	depends on X86_LOCAL_APIC || X86_VOYAGER
  20.155 -	default n
  20.156 -
  20.157 -config X86_MPPARSE
  20.158 -	bool
  20.159 -	depends on X86_LOCAL_APIC && !X86_VISWS
  20.160 -	default y
  20.161 -
  20.162 -endmenu
  20.163 +source "arch/i386/Kconfig.debug"
  20.164  
  20.165  #
  20.166  # Use the generic interrupt handling code in kernel/irq/:
    21.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/Makefile	Tue May 03 14:42:00 2005 +0000
    21.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/Makefile	Tue May 10 08:49:59 2005 +0000
    21.3 @@ -20,7 +20,7 @@ s-obj-y	:=
    21.4  
    21.5  obj-y				+= cpu/
    21.6  obj-y				+= timers/
    21.7 -c-obj-$(CONFIG_ACPI_BOOT)	+= acpi/
    21.8 +obj-$(CONFIG_ACPI_BOOT)		+= acpi/
    21.9  #c-obj-$(CONFIG_X86_BIOS_REBOOT)	+= reboot.o
   21.10  c-obj-$(CONFIG_MCA)		+= mca.o
   21.11  c-obj-$(CONFIG_X86_MSR)		+= msr.o
   21.12 @@ -29,10 +29,10 @@ obj-$(CONFIG_MICROCODE)		+= microcode.o
   21.13  c-obj-$(CONFIG_APM)		+= apm.o
   21.14  obj-$(CONFIG_X86_SMP)		+= smp.o smpboot.o
   21.15  #obj-$(CONFIG_X86_TRAMPOLINE)	+= trampoline.o
   21.16 -c-obj-$(CONFIG_X86_MPPARSE)	+= mpparse.o
   21.17 +obj-$(CONFIG_X86_MPPARSE)	+= mpparse.o
   21.18  obj-$(CONFIG_X86_LOCAL_APIC)	+= apic.o
   21.19  c-obj-$(CONFIG_X86_LOCAL_APIC)	+= nmi.o
   21.20 -c-obj-$(CONFIG_X86_IO_APIC)	+= io_apic.o
   21.21 +obj-$(CONFIG_X86_IO_APIC)	+= io_apic.o
   21.22  c-obj-$(CONFIG_X86_NUMAQ)	+= numaq.o
   21.23  c-obj-$(CONFIG_X86_SUMMIT_NUMA)	+= summit.o
   21.24  c-obj-$(CONFIG_MODULES)		+= module.o
    22.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/acpi/Makefile	Tue May 03 14:42:00 2005 +0000
    22.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/acpi/Makefile	Tue May 10 08:49:59 2005 +0000
    22.3 @@ -7,3 +7,7 @@ c-link                                  
    22.4  $(patsubst %.o,$(obj)/%.c,$(c-obj-y) $(c-link)):
    22.5  	@ln -fsn $(srctree)/arch/i386/kernel/acpi/$(notdir $@) $@
    22.6  
    22.7 +obj-y	+= $(c-obj-y) $(s-obj-y)
    22.8 +
    22.9 +clean-files += $(patsubst %.o,%.c,$(c-obj-y) $(c-obj-) $(c-link))
   22.10 +clean-files += $(patsubst %.o,%.S,$(s-obj-y) $(s-obj-) $(s-link))
    23.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/acpi/boot.c	Tue May 03 14:42:00 2005 +0000
    23.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/acpi/boot.c	Tue May 10 08:49:59 2005 +0000
    23.3 @@ -109,17 +109,17 @@ enum acpi_irq_model_id		acpi_irq_model =
    23.4  
    23.5  char *__acpi_map_table(unsigned long phys_addr, unsigned long size)
    23.6  {
    23.7 -        unsigned int i,j;
    23.8 +	unsigned int i,j;
    23.9  
   23.10 -        j = PAGE_ALIGN(size) >> PAGE_SHIFT;
   23.11 -        for (i = 0; (i < FIX_ACPI_PAGES) && j ; i++, j--) {
   23.12 -                __set_fixmap_ma(FIX_ACPI_END - i,
   23.13 -                                (phys_addr & PAGE_MASK) + (i << PAGE_SHIFT),
   23.14 -                                PAGE_KERNEL);
   23.15 -        }
   23.16 +	j = PAGE_ALIGN(size) >> PAGE_SHIFT;
   23.17 +	for (i = 0; (i < FIX_ACPI_PAGES) && j ; i++, j--) {
   23.18 +		set_fixmap(FIX_ACPI_END - i,
   23.19 +			   (phys_addr & PAGE_MASK) + (i << PAGE_SHIFT));
   23.20 +	}
   23.21  
   23.22 -        return (char *) __fix_to_virt(FIX_ACPI_END) + (phys_addr & ~PAGE_MASK);
   23.23 +	return (char *) __fix_to_virt(FIX_ACPI_END) + (phys_addr & ~PAGE_MASK);
   23.24  }
   23.25 +
   23.26  #else
   23.27  #ifdef	CONFIG_X86_64
   23.28  
   23.29 @@ -523,7 +523,7 @@ acpi_scan_rsdp (
   23.30  {
   23.31  	unsigned long		offset = 0;
   23.32  	unsigned long		sig_len = sizeof("RSD PTR ") - 1;
   23.33 -        unsigned long           vstart = isa_bus_to_virt(start);
   23.34 +	unsigned long		vstart = (unsigned long)isa_bus_to_virt(start);
   23.35  
   23.36  	/*
   23.37  	 * Scan all 16-byte boundaries of the physical memory region for the
   23.38 @@ -649,16 +649,15 @@ acpi_find_rsdp (void)
   23.39  		else if (efi.acpi)
   23.40  			return __pa(efi.acpi);
   23.41  	}
   23.42 -
   23.43  	/*
   23.44  	 * Scan memory looking for the RSDP signature. First search EBDA (low
   23.45  	 * memory) paragraphs and then search upper memory (E0000-FFFFF).
   23.46  	 */
   23.47  	rsdp_phys = acpi_scan_rsdp (0, 0x400);
   23.48  	if (!rsdp_phys)
   23.49 -		rsdp_phys = acpi_scan_rsdp (0xE0000, 0xFFFFF);
   23.50 +		rsdp_phys = acpi_scan_rsdp (0xE0000, 0x20000);
   23.51  
   23.52 -        __set_fixmap_ma(FIX_ACPI_RSDP_PAGE, rsdp_phys, PAGE_KERNEL);
   23.53 +	set_fixmap(FIX_ACPI_RSDP_PAGE, rsdp_phys);
   23.54  
   23.55  	return rsdp_phys;
   23.56  }
   23.57 @@ -673,10 +672,6 @@ acpi_parse_madt_lapic_entries(void)
   23.58  {
   23.59  	int count;
   23.60  
   23.61 -#ifdef CONFIG_XEN
   23.62 -        return 0;
   23.63 -#endif
   23.64 -
   23.65  	/* 
   23.66  	 * Note that the LAPIC address is obtained from the MADT (32-bit value)
   23.67  	 * and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value).
   23.68 @@ -872,7 +867,7 @@ acpi_boot_table_init(void)
   23.69  	}
   23.70  
   23.71  #ifdef __i386__
   23.72 -	//check_acpi_pci();
   23.73 +	check_acpi_pci();
   23.74  #endif
   23.75  
   23.76  	acpi_table_parse(ACPI_BOOT, acpi_parse_sbf);
    24.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/apic.c	Tue May 03 14:42:00 2005 +0000
    24.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/apic.c	Tue May 10 08:49:59 2005 +0000
    24.3 @@ -16,8 +16,32 @@
    24.4  
    24.5  #include <linux/config.h>
    24.6  #include <linux/init.h>
    24.7 -#include <asm/apic.h>
    24.8 +
    24.9 +#include <linux/mm.h>
   24.10 +#include <linux/irq.h>
   24.11 +#include <linux/delay.h>
   24.12 +#include <linux/bootmem.h>
   24.13 +#include <linux/smp_lock.h>
   24.14 +#include <linux/interrupt.h>
   24.15 +#include <linux/mc146818rtc.h>
   24.16 +#include <linux/kernel_stat.h>
   24.17 +#include <linux/sysdev.h>
   24.18  
   24.19 +#include <asm/atomic.h>
   24.20 +#include <asm/smp.h>
   24.21 +#include <asm/mtrr.h>
   24.22 +#include <asm/mpspec.h>
   24.23 +#include <asm/desc.h>
   24.24 +#include <asm/arch_hooks.h>
   24.25 +#include <asm/hpet.h>
   24.26 +
   24.27 +#include <mach_apic.h>
   24.28 +
   24.29 +#include "io_ports.h"
   24.30 +
   24.31 +/*
   24.32 + * Debug level
   24.33 + */
   24.34  int apic_verbosity;
   24.35  
   24.36  int get_physical_broadcast(void)
   24.37 @@ -49,5 +73,11 @@ void ack_bad_irq(unsigned int irq)
   24.38   */
   24.39  int __init APIC_init_uniprocessor (void)
   24.40  {
   24.41 +#ifdef CONFIG_X86_IO_APIC
   24.42 +	if (smp_found_config)
   24.43 +		if (!skip_ioapic_setup && nr_ioapics)
   24.44 +			setup_IO_APIC();
   24.45 +#endif
   24.46 +
   24.47  	return 0;
   24.48  }
    25.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/io_apic.c	Tue May 03 14:42:00 2005 +0000
    25.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/io_apic.c	Tue May 10 08:49:59 2005 +0000
    25.3 @@ -37,19 +37,56 @@
    25.4  #include <asm/smp.h>
    25.5  #include <asm/desc.h>
    25.6  #include <asm/timer.h>
    25.7 -#include <asm/io_apic.h>
    25.8 -#include <asm/apic.h>
    25.9  
   25.10  #include <mach_apic.h>
   25.11  
   25.12  #include "io_ports.h"
   25.13  
   25.14 +#ifdef CONFIG_XEN
   25.15 +
   25.16 +#include <asm-xen/xen-public/xen.h>
   25.17 +#include <asm-xen/xen-public/physdev.h>
   25.18 +
   25.19 +/* Fake i8259 */
   25.20 +#define make_8259A_irq(_irq)     (io_apic_irqs &= ~(1UL<<(_irq)))
   25.21 +#define disable_8259A_irq(_irq)  ((void)0)
   25.22 +#define i8259A_irq_pending(_irq) (0)
   25.23 +
   25.24 +unsigned long io_apic_irqs;
   25.25 +
   25.26 +static inline unsigned int xen_io_apic_read(unsigned int apic, unsigned int reg)
   25.27 +{
   25.28 +	physdev_op_t op;
   25.29 +	int ret;
   25.30 +
   25.31 +	op.cmd = PHYSDEVOP_APIC_READ;
   25.32 +	op.u.apic_op.apic = mp_ioapics[apic].mpc_apicid;
   25.33 +	op.u.apic_op.offset = reg;
   25.34 +	ret = HYPERVISOR_physdev_op(&op);
   25.35 +	if (ret)
   25.36 +		return ret;
   25.37 +	return op.u.apic_op.value;
   25.38 +}
   25.39 +
   25.40 +static inline void xen_io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
   25.41 +{
   25.42 +	physdev_op_t op;
   25.43 +
   25.44 +	op.cmd = PHYSDEVOP_APIC_WRITE;
   25.45 +	op.u.apic_op.apic = mp_ioapics[apic].mpc_apicid;
   25.46 +	op.u.apic_op.offset = reg;
   25.47 +	op.u.apic_op.value = value;
   25.48 +	HYPERVISOR_physdev_op(&op);
   25.49 +}
   25.50 +
   25.51 +#define io_apic_read(a,r)    xen_io_apic_read(a,r)
   25.52 +#define io_apic_write(a,r,v) xen_io_apic_write(a,r,v)
   25.53 +
   25.54 +#endif /* CONFIG_XEN */
   25.55 +
   25.56  int (*ioapic_renumber_irq)(int ioapic, int irq);
   25.57  atomic_t irq_mis_count;
   25.58  
   25.59 -unsigned long io_apic_irqs;
   25.60 -int skip_ioapic_setup;
   25.61 -
   25.62  static DEFINE_SPINLOCK(ioapic_lock);
   25.63  
   25.64  /*
   25.65 @@ -89,24 +126,6 @@ int vector_irq[NR_VECTORS] = { [0 ... NR
   25.66  #define vector_to_irq(vector)	(vector)
   25.67  #endif
   25.68  
   25.69 -
   25.70 -#ifndef CONFIG_SMP
   25.71 -void fastcall send_IPI_self(int vector)
   25.72 -{
   25.73 -     return; 
   25.74 -}
   25.75 -#endif
   25.76 -
   25.77 -int irqbalance_disable(char *str)
   25.78 -{
   25.79 -     return 0; 
   25.80 -}
   25.81 -
   25.82 -void print_IO_APIC(void)
   25.83 -{
   25.84 -     return; 
   25.85 -}
   25.86 -
   25.87  /*
   25.88   * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
   25.89   * shared ISA-space IRQs, so we have to support them. We are super
   25.90 @@ -130,6 +149,583 @@ static void add_pin_to_irq(unsigned int 
   25.91  	entry->pin = pin;
   25.92  }
   25.93  
   25.94 +#ifndef CONFIG_XEN
   25.95 +/*
   25.96 + * Reroute an IRQ to a different pin.
   25.97 + */
   25.98 +static void __init replace_pin_at_irq(unsigned int irq,
   25.99 +				      int oldapic, int oldpin,
  25.100 +				      int newapic, int newpin)
  25.101 +{
  25.102 +	struct irq_pin_list *entry = irq_2_pin + irq;
  25.103 +
  25.104 +	while (1) {
  25.105 +		if (entry->apic == oldapic && entry->pin == oldpin) {
  25.106 +			entry->apic = newapic;
  25.107 +			entry->pin = newpin;
  25.108 +		}
  25.109 +		if (!entry->next)
  25.110 +			break;
  25.111 +		entry = irq_2_pin + entry->next;
  25.112 +	}
  25.113 +}
  25.114 +
  25.115 +static void __modify_IO_APIC_irq (unsigned int irq, unsigned long enable, unsigned long disable)
  25.116 +{
  25.117 +	struct irq_pin_list *entry = irq_2_pin + irq;
  25.118 +	unsigned int pin, reg;
  25.119 +
  25.120 +	for (;;) {
  25.121 +		pin = entry->pin;
  25.122 +		if (pin == -1)
  25.123 +			break;
  25.124 +		reg = io_apic_read(entry->apic, 0x10 + pin*2);
  25.125 +		reg &= ~disable;
  25.126 +		reg |= enable;
  25.127 +		io_apic_modify(entry->apic, 0x10 + pin*2, reg);
  25.128 +		if (!entry->next)
  25.129 +			break;
  25.130 +		entry = irq_2_pin + entry->next;
  25.131 +	}
  25.132 +}
  25.133 +
  25.134 +/* mask = 1 */
  25.135 +static void __mask_IO_APIC_irq (unsigned int irq)
  25.136 +{
  25.137 +	__modify_IO_APIC_irq(irq, 0x00010000, 0);
  25.138 +}
  25.139 +
  25.140 +/* mask = 0 */
  25.141 +static void __unmask_IO_APIC_irq (unsigned int irq)
  25.142 +{
  25.143 +	__modify_IO_APIC_irq(irq, 0, 0x00010000);
  25.144 +}
  25.145 +
  25.146 +/* mask = 1, trigger = 0 */
  25.147 +static void __mask_and_edge_IO_APIC_irq (unsigned int irq)
  25.148 +{
  25.149 +	__modify_IO_APIC_irq(irq, 0x00010000, 0x00008000);
  25.150 +}
  25.151 +
  25.152 +/* mask = 0, trigger = 1 */
  25.153 +static void __unmask_and_level_IO_APIC_irq (unsigned int irq)
  25.154 +{
  25.155 +	__modify_IO_APIC_irq(irq, 0x00008000, 0x00010000);
  25.156 +}
  25.157 +
  25.158 +static void mask_IO_APIC_irq (unsigned int irq)
  25.159 +{
  25.160 +	unsigned long flags;
  25.161 +
  25.162 +	spin_lock_irqsave(&ioapic_lock, flags);
  25.163 +	__mask_IO_APIC_irq(irq);
  25.164 +	spin_unlock_irqrestore(&ioapic_lock, flags);
  25.165 +}
  25.166 +
  25.167 +static void unmask_IO_APIC_irq (unsigned int irq)
  25.168 +{
  25.169 +	unsigned long flags;
  25.170 +
  25.171 +	spin_lock_irqsave(&ioapic_lock, flags);
  25.172 +	__unmask_IO_APIC_irq(irq);
  25.173 +	spin_unlock_irqrestore(&ioapic_lock, flags);
  25.174 +}
  25.175 +
  25.176 +void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
  25.177 +{
  25.178 +	struct IO_APIC_route_entry entry;
  25.179 +	unsigned long flags;
  25.180 +	
  25.181 +	/* Check delivery_mode to be sure we're not clearing an SMI pin */
  25.182 +	spin_lock_irqsave(&ioapic_lock, flags);
  25.183 +	*(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
  25.184 +	*(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
  25.185 +	spin_unlock_irqrestore(&ioapic_lock, flags);
  25.186 +	if (entry.delivery_mode == dest_SMI)
  25.187 +		return;
  25.188 +
  25.189 +	/*
  25.190 +	 * Disable it in the IO-APIC irq-routing table:
  25.191 +	 */
  25.192 +	memset(&entry, 0, sizeof(entry));
  25.193 +	entry.mask = 1;
  25.194 +	spin_lock_irqsave(&ioapic_lock, flags);
  25.195 +	io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0));
  25.196 +	io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1));
  25.197 +	spin_unlock_irqrestore(&ioapic_lock, flags);
  25.198 +}
  25.199 +
  25.200 +static void clear_IO_APIC (void)
  25.201 +{
  25.202 +	int apic, pin;
  25.203 +
  25.204 +	for (apic = 0; apic < nr_ioapics; apic++)
  25.205 +		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
  25.206 +			clear_IO_APIC_pin(apic, pin);
  25.207 +}
  25.208 +
  25.209 +static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
  25.210 +{
  25.211 +	unsigned long flags;
  25.212 +	int pin;
  25.213 +	struct irq_pin_list *entry = irq_2_pin + irq;
  25.214 +	unsigned int apicid_value;
  25.215 +	
  25.216 +	apicid_value = cpu_mask_to_apicid(cpumask);
  25.217 +	/* Prepare to do the io_apic_write */
  25.218 +	apicid_value = apicid_value << 24;
  25.219 +	spin_lock_irqsave(&ioapic_lock, flags);
  25.220 +	for (;;) {
  25.221 +		pin = entry->pin;
  25.222 +		if (pin == -1)
  25.223 +			break;
  25.224 +		io_apic_write(entry->apic, 0x10 + 1 + pin*2, apicid_value);
  25.225 +		if (!entry->next)
  25.226 +			break;
  25.227 +		entry = irq_2_pin + entry->next;
  25.228 +	}
  25.229 +	spin_unlock_irqrestore(&ioapic_lock, flags);
  25.230 +}
  25.231 +#else
  25.232 +#define clear_IO_APIC() ((void)0)
  25.233 +#endif
  25.234 +
  25.235 +#if defined(CONFIG_IRQBALANCE)
  25.236 +# include <asm/processor.h>	/* kernel_thread() */
  25.237 +# include <linux/kernel_stat.h>	/* kstat */
  25.238 +# include <linux/slab.h>		/* kmalloc() */
  25.239 +# include <linux/timer.h>	/* time_after() */
  25.240 + 
  25.241 +# ifdef CONFIG_BALANCED_IRQ_DEBUG
  25.242 +#  define TDprintk(x...) do { printk("<%ld:%s:%d>: ", jiffies, __FILE__, __LINE__); printk(x); } while (0)
  25.243 +#  define Dprintk(x...) do { TDprintk(x); } while (0)
  25.244 +# else
  25.245 +#  define TDprintk(x...) 
  25.246 +#  define Dprintk(x...) 
  25.247 +# endif
  25.248 +
  25.249 +cpumask_t __cacheline_aligned pending_irq_balance_cpumask[NR_IRQS];
  25.250 +
  25.251 +#define IRQBALANCE_CHECK_ARCH -999
  25.252 +static int irqbalance_disabled = IRQBALANCE_CHECK_ARCH;
  25.253 +static int physical_balance = 0;
  25.254 +
  25.255 +struct irq_cpu_info {
  25.256 +	unsigned long * last_irq;
  25.257 +	unsigned long * irq_delta;
  25.258 +	unsigned long irq;
  25.259 +} irq_cpu_data[NR_CPUS];
  25.260 +
  25.261 +#define CPU_IRQ(cpu)		(irq_cpu_data[cpu].irq)
  25.262 +#define LAST_CPU_IRQ(cpu,irq)   (irq_cpu_data[cpu].last_irq[irq])
  25.263 +#define IRQ_DELTA(cpu,irq) 	(irq_cpu_data[cpu].irq_delta[irq])
  25.264 +
  25.265 +#define IDLE_ENOUGH(cpu,now) \
  25.266 +		(idle_cpu(cpu) && ((now) - irq_stat[(cpu)].idle_timestamp > 1))
  25.267 +
  25.268 +#define IRQ_ALLOWED(cpu, allowed_mask)	cpu_isset(cpu, allowed_mask)
  25.269 +
  25.270 +#define CPU_TO_PACKAGEINDEX(i) (first_cpu(cpu_sibling_map[i]))
  25.271 +
  25.272 +#define MAX_BALANCED_IRQ_INTERVAL	(5*HZ)
  25.273 +#define MIN_BALANCED_IRQ_INTERVAL	(HZ/2)
  25.274 +#define BALANCED_IRQ_MORE_DELTA		(HZ/10)
  25.275 +#define BALANCED_IRQ_LESS_DELTA		(HZ)
  25.276 +
  25.277 +long balanced_irq_interval = MAX_BALANCED_IRQ_INTERVAL;
  25.278 +
  25.279 +static unsigned long move(int curr_cpu, cpumask_t allowed_mask,
  25.280 +			unsigned long now, int direction)
  25.281 +{
  25.282 +	int search_idle = 1;
  25.283 +	int cpu = curr_cpu;
  25.284 +
  25.285 +	goto inside;
  25.286 +
  25.287 +	do {
  25.288 +		if (unlikely(cpu == curr_cpu))
  25.289 +			search_idle = 0;
  25.290 +inside:
  25.291 +		if (direction == 1) {
  25.292 +			cpu++;
  25.293 +			if (cpu >= NR_CPUS)
  25.294 +				cpu = 0;
  25.295 +		} else {
  25.296 +			cpu--;
  25.297 +			if (cpu == -1)
  25.298 +				cpu = NR_CPUS-1;
  25.299 +		}
  25.300 +	} while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu,allowed_mask) ||
  25.301 +			(search_idle && !IDLE_ENOUGH(cpu,now)));
  25.302 +
  25.303 +	return cpu;
  25.304 +}
  25.305 +
  25.306 +static inline void balance_irq(int cpu, int irq)
  25.307 +{
  25.308 +	unsigned long now = jiffies;
  25.309 +	cpumask_t allowed_mask;
  25.310 +	unsigned int new_cpu;
  25.311 +		
  25.312 +	if (irqbalance_disabled)
  25.313 +		return; 
  25.314 +
  25.315 +	cpus_and(allowed_mask, cpu_online_map, irq_affinity[irq]);
  25.316 +	new_cpu = move(cpu, allowed_mask, now, 1);
  25.317 +	if (cpu != new_cpu) {
  25.318 +		irq_desc_t *desc = irq_desc + irq;
  25.319 +		unsigned long flags;
  25.320 +
  25.321 +		spin_lock_irqsave(&desc->lock, flags);
  25.322 +		pending_irq_balance_cpumask[irq] = cpumask_of_cpu(new_cpu);
  25.323 +		spin_unlock_irqrestore(&desc->lock, flags);
  25.324 +	}
  25.325 +}
  25.326 +
  25.327 +static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold)
  25.328 +{
  25.329 +	int i, j;
  25.330 +	Dprintk("Rotating IRQs among CPUs.\n");
  25.331 +	for (i = 0; i < NR_CPUS; i++) {
  25.332 +		for (j = 0; cpu_online(i) && (j < NR_IRQS); j++) {
  25.333 +			if (!irq_desc[j].action)
  25.334 +				continue;
  25.335 +			/* Is it a significant load ?  */
  25.336 +			if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i),j) <
  25.337 +						useful_load_threshold)
  25.338 +				continue;
  25.339 +			balance_irq(i, j);
  25.340 +		}
  25.341 +	}
  25.342 +	balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
  25.343 +		balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);	
  25.344 +	return;
  25.345 +}
  25.346 +
  25.347 +static void do_irq_balance(void)
  25.348 +{
  25.349 +	int i, j;
  25.350 +	unsigned long max_cpu_irq = 0, min_cpu_irq = (~0);
  25.351 +	unsigned long move_this_load = 0;
  25.352 +	int max_loaded = 0, min_loaded = 0;
  25.353 +	int load;
  25.354 +	unsigned long useful_load_threshold = balanced_irq_interval + 10;
  25.355 +	int selected_irq;
  25.356 +	int tmp_loaded, first_attempt = 1;
  25.357 +	unsigned long tmp_cpu_irq;
  25.358 +	unsigned long imbalance = 0;
  25.359 +	cpumask_t allowed_mask, target_cpu_mask, tmp;
  25.360 +
  25.361 +	for (i = 0; i < NR_CPUS; i++) {
  25.362 +		int package_index;
  25.363 +		CPU_IRQ(i) = 0;
  25.364 +		if (!cpu_online(i))
  25.365 +			continue;
  25.366 +		package_index = CPU_TO_PACKAGEINDEX(i);
  25.367 +		for (j = 0; j < NR_IRQS; j++) {
  25.368 +			unsigned long value_now, delta;
  25.369 +			/* Is this an active IRQ? */
  25.370 +			if (!irq_desc[j].action)
  25.371 +				continue;
  25.372 +			if ( package_index == i )
  25.373 +				IRQ_DELTA(package_index,j) = 0;
  25.374 +			/* Determine the total count per processor per IRQ */
  25.375 +			value_now = (unsigned long) kstat_cpu(i).irqs[j];
  25.376 +
  25.377 +			/* Determine the activity per processor per IRQ */
  25.378 +			delta = value_now - LAST_CPU_IRQ(i,j);
  25.379 +
  25.380 +			/* Update last_cpu_irq[][] for the next time */
  25.381 +			LAST_CPU_IRQ(i,j) = value_now;
  25.382 +
  25.383 +			/* Ignore IRQs whose rate is less than the clock */
  25.384 +			if (delta < useful_load_threshold)
  25.385 +				continue;
  25.386 +			/* update the load for the processor or package total */
  25.387 +			IRQ_DELTA(package_index,j) += delta;
  25.388 +
  25.389 +			/* Keep track of the higher numbered sibling as well */
  25.390 +			if (i != package_index)
  25.391 +				CPU_IRQ(i) += delta;
  25.392 +			/*
  25.393 +			 * We have sibling A and sibling B in the package
  25.394 +			 *
  25.395 +			 * cpu_irq[A] = load for cpu A + load for cpu B
  25.396 +			 * cpu_irq[B] = load for cpu B
  25.397 +			 */
  25.398 +			CPU_IRQ(package_index) += delta;
  25.399 +		}
  25.400 +	}
  25.401 +	/* Find the least loaded processor package */
  25.402 +	for (i = 0; i < NR_CPUS; i++) {
  25.403 +		if (!cpu_online(i))
  25.404 +			continue;
  25.405 +		if (i != CPU_TO_PACKAGEINDEX(i))
  25.406 +			continue;
  25.407 +		if (min_cpu_irq > CPU_IRQ(i)) {
  25.408 +			min_cpu_irq = CPU_IRQ(i);
  25.409 +			min_loaded = i;
  25.410 +		}
  25.411 +	}
  25.412 +	max_cpu_irq = ULONG_MAX;
  25.413 +
  25.414 +tryanothercpu:
  25.415 +	/* Look for heaviest loaded processor.
  25.416 +	 * We may come back to get the next heaviest loaded processor.
  25.417 +	 * Skip processors with trivial loads.
  25.418 +	 */
  25.419 +	tmp_cpu_irq = 0;
  25.420 +	tmp_loaded = -1;
  25.421 +	for (i = 0; i < NR_CPUS; i++) {
  25.422 +		if (!cpu_online(i))
  25.423 +			continue;
  25.424 +		if (i != CPU_TO_PACKAGEINDEX(i))
  25.425 +			continue;
  25.426 +		if (max_cpu_irq <= CPU_IRQ(i)) 
  25.427 +			continue;
  25.428 +		if (tmp_cpu_irq < CPU_IRQ(i)) {
  25.429 +			tmp_cpu_irq = CPU_IRQ(i);
  25.430 +			tmp_loaded = i;
  25.431 +		}
  25.432 +	}
  25.433 +
  25.434 +	if (tmp_loaded == -1) {
  25.435 + 	 /* In the case of small number of heavy interrupt sources, 
  25.436 +	  * loading some of the cpus too much. We use Ingo's original 
  25.437 +	  * approach to rotate them around.
  25.438 +	  */
  25.439 +		if (!first_attempt && imbalance >= useful_load_threshold) {
  25.440 +			rotate_irqs_among_cpus(useful_load_threshold);
  25.441 +			return;
  25.442 +		}
  25.443 +		goto not_worth_the_effort;
  25.444 +	}
  25.445 +	
  25.446 +	first_attempt = 0;		/* heaviest search */
  25.447 +	max_cpu_irq = tmp_cpu_irq;	/* load */
  25.448 +	max_loaded = tmp_loaded;	/* processor */
  25.449 +	imbalance = (max_cpu_irq - min_cpu_irq) / 2;
  25.450 +	
  25.451 +	Dprintk("max_loaded cpu = %d\n", max_loaded);
  25.452 +	Dprintk("min_loaded cpu = %d\n", min_loaded);
  25.453 +	Dprintk("max_cpu_irq load = %ld\n", max_cpu_irq);
  25.454 +	Dprintk("min_cpu_irq load = %ld\n", min_cpu_irq);
  25.455 +	Dprintk("load imbalance = %lu\n", imbalance);
  25.456 +
  25.457 +	/* if imbalance is less than approx 10% of max load, then
  25.458 +	 * observe diminishing returns action. - quit
  25.459 +	 */
  25.460 +	if (imbalance < (max_cpu_irq >> 3)) {
  25.461 +		Dprintk("Imbalance too trivial\n");
  25.462 +		goto not_worth_the_effort;
  25.463 +	}
  25.464 +
  25.465 +tryanotherirq:
  25.466 +	/* if we select an IRQ to move that can't go where we want, then
  25.467 +	 * see if there is another one to try.
  25.468 +	 */
  25.469 +	move_this_load = 0;
  25.470 +	selected_irq = -1;
  25.471 +	for (j = 0; j < NR_IRQS; j++) {
  25.472 +		/* Is this an active IRQ? */
  25.473 +		if (!irq_desc[j].action)
  25.474 +			continue;
  25.475 +		if (imbalance <= IRQ_DELTA(max_loaded,j))
  25.476 +			continue;
  25.477 +		/* Try to find the IRQ that is closest to the imbalance
  25.478 +		 * without going over.
  25.479 +		 */
  25.480 +		if (move_this_load < IRQ_DELTA(max_loaded,j)) {
  25.481 +			move_this_load = IRQ_DELTA(max_loaded,j);
  25.482 +			selected_irq = j;
  25.483 +		}
  25.484 +	}
  25.485 +	if (selected_irq == -1) {
  25.486 +		goto tryanothercpu;
  25.487 +	}
  25.488 +
  25.489 +	imbalance = move_this_load;
  25.490 +	
  25.491 +	/* For physical_balance case, we accumlated both load
  25.492 +	 * values in the one of the siblings cpu_irq[],
  25.493 +	 * to use the same code for physical and logical processors
  25.494 +	 * as much as possible. 
  25.495 +	 *
  25.496 +	 * NOTE: the cpu_irq[] array holds the sum of the load for
  25.497 +	 * sibling A and sibling B in the slot for the lowest numbered
  25.498 +	 * sibling (A), _AND_ the load for sibling B in the slot for
  25.499 +	 * the higher numbered sibling.
  25.500 +	 *
  25.501 +	 * We seek the least loaded sibling by making the comparison
  25.502 +	 * (A+B)/2 vs B
  25.503 +	 */
  25.504 +	load = CPU_IRQ(min_loaded) >> 1;
  25.505 +	for_each_cpu_mask(j, cpu_sibling_map[min_loaded]) {
  25.506 +		if (load > CPU_IRQ(j)) {
  25.507 +			/* This won't change cpu_sibling_map[min_loaded] */
  25.508 +			load = CPU_IRQ(j);
  25.509 +			min_loaded = j;
  25.510 +		}
  25.511 +	}
  25.512 +
  25.513 +	cpus_and(allowed_mask, cpu_online_map, irq_affinity[selected_irq]);
  25.514 +	target_cpu_mask = cpumask_of_cpu(min_loaded);
  25.515 +	cpus_and(tmp, target_cpu_mask, allowed_mask);
  25.516 +
  25.517 +	if (!cpus_empty(tmp)) {
  25.518 +		irq_desc_t *desc = irq_desc + selected_irq;
  25.519 +		unsigned long flags;
  25.520 +
  25.521 +		Dprintk("irq = %d moved to cpu = %d\n",
  25.522 +				selected_irq, min_loaded);
  25.523 +		/* mark for change destination */
  25.524 +		spin_lock_irqsave(&desc->lock, flags);
  25.525 +		pending_irq_balance_cpumask[selected_irq] =
  25.526 +					cpumask_of_cpu(min_loaded);
  25.527 +		spin_unlock_irqrestore(&desc->lock, flags);
  25.528 +		/* Since we made a change, come back sooner to 
  25.529 +		 * check for more variation.
  25.530 +		 */
  25.531 +		balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
  25.532 +			balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);	
  25.533 +		return;
  25.534 +	}
  25.535 +	goto tryanotherirq;
  25.536 +
  25.537 +not_worth_the_effort:
  25.538 +	/*
  25.539 +	 * if we did not find an IRQ to move, then adjust the time interval
  25.540 +	 * upward
  25.541 +	 */
  25.542 +	balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL,
  25.543 +		balanced_irq_interval + BALANCED_IRQ_MORE_DELTA);	
  25.544 +	Dprintk("IRQ worth rotating not found\n");
  25.545 +	return;
  25.546 +}
  25.547 +
  25.548 +static int balanced_irq(void *unused)
  25.549 +{
  25.550 +	int i;
  25.551 +	unsigned long prev_balance_time = jiffies;
  25.552 +	long time_remaining = balanced_irq_interval;
  25.553 +
  25.554 +	daemonize("kirqd");
  25.555 +	
  25.556 +	/* push everything to CPU 0 to give us a starting point.  */
  25.557 +	for (i = 0 ; i < NR_IRQS ; i++) {
  25.558 +		pending_irq_balance_cpumask[i] = cpumask_of_cpu(0);
  25.559 +	}
  25.560 +
  25.561 +	for ( ; ; ) {
  25.562 +		set_current_state(TASK_INTERRUPTIBLE);
  25.563 +		time_remaining = schedule_timeout(time_remaining);
  25.564 +		try_to_freeze(PF_FREEZE);
  25.565 +		if (time_after(jiffies,
  25.566 +				prev_balance_time+balanced_irq_interval)) {
  25.567 +			do_irq_balance();
  25.568 +			prev_balance_time = jiffies;
  25.569 +			time_remaining = balanced_irq_interval;
  25.570 +		}
  25.571 +	}
  25.572 +	return 0;
  25.573 +}
  25.574 +
  25.575 +static int __init balanced_irq_init(void)
  25.576 +{
  25.577 +	int i;
  25.578 +	struct cpuinfo_x86 *c;
  25.579 +	cpumask_t tmp;
  25.580 +
  25.581 +	cpus_shift_right(tmp, cpu_online_map, 2);
  25.582 +        c = &boot_cpu_data;
  25.583 +	/* When not overwritten by the command line ask subarchitecture. */
  25.584 +	if (irqbalance_disabled == IRQBALANCE_CHECK_ARCH)
  25.585 +		irqbalance_disabled = NO_BALANCE_IRQ;
  25.586 +	if (irqbalance_disabled)
  25.587 +		return 0;
  25.588 +	
  25.589 +	 /* disable irqbalance completely if there is only one processor online */
  25.590 +	if (num_online_cpus() < 2) {
  25.591 +		irqbalance_disabled = 1;
  25.592 +		return 0;
  25.593 +	}
  25.594 +	/*
  25.595 +	 * Enable physical balance only if more than 1 physical processor
  25.596 +	 * is present
  25.597 +	 */
  25.598 +	if (smp_num_siblings > 1 && !cpus_empty(tmp))
  25.599 +		physical_balance = 1;
  25.600 +
  25.601 +	for (i = 0; i < NR_CPUS; i++) {
  25.602 +		if (!cpu_online(i))
  25.603 +			continue;
  25.604 +		irq_cpu_data[i].irq_delta = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
  25.605 +		irq_cpu_data[i].last_irq = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
  25.606 +		if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) {
  25.607 +			printk(KERN_ERR "balanced_irq_init: out of memory");
  25.608 +			goto failed;
  25.609 +		}
  25.610 +		memset(irq_cpu_data[i].irq_delta,0,sizeof(unsigned long) * NR_IRQS);
  25.611 +		memset(irq_cpu_data[i].last_irq,0,sizeof(unsigned long) * NR_IRQS);
  25.612 +	}
  25.613 +	
  25.614 +	printk(KERN_INFO "Starting balanced_irq\n");
  25.615 +	if (kernel_thread(balanced_irq, NULL, CLONE_KERNEL) >= 0) 
  25.616 +		return 0;
  25.617 +	else 
  25.618 +		printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
  25.619 +failed:
  25.620 +	for (i = 0; i < NR_CPUS; i++) {
  25.621 +		if(irq_cpu_data[i].irq_delta)
  25.622 +			kfree(irq_cpu_data[i].irq_delta);
  25.623 +		if(irq_cpu_data[i].last_irq)
  25.624 +			kfree(irq_cpu_data[i].last_irq);
  25.625 +	}
  25.626 +	return 0;
  25.627 +}
  25.628 +
  25.629 +int __init irqbalance_disable(char *str)
  25.630 +{
  25.631 +	irqbalance_disabled = 1;
  25.632 +	return 0;
  25.633 +}
  25.634 +
  25.635 +__setup("noirqbalance", irqbalance_disable);
  25.636 +
  25.637 +static inline void move_irq(int irq)
  25.638 +{
  25.639 +	/* note - we hold the desc->lock */
  25.640 +	if (unlikely(!cpus_empty(pending_irq_balance_cpumask[irq]))) {
  25.641 +		set_ioapic_affinity_irq(irq, pending_irq_balance_cpumask[irq]);
  25.642 +		cpus_clear(pending_irq_balance_cpumask[irq]);
  25.643 +	}
  25.644 +}
  25.645 +
  25.646 +late_initcall(balanced_irq_init);
  25.647 +
  25.648 +#else /* !CONFIG_IRQBALANCE */
  25.649 +static inline void move_irq(int irq) { }
  25.650 +#endif /* CONFIG_IRQBALANCE */
  25.651 +
  25.652 +#ifndef CONFIG_SMP
  25.653 +void fastcall send_IPI_self(int vector)
  25.654 +{
  25.655 +#ifndef CONFIG_XEN
  25.656 +	unsigned int cfg;
  25.657 +
  25.658 +	/*
  25.659 +	 * Wait for idle.
  25.660 +	 */
  25.661 +	apic_wait_icr_idle();
  25.662 +	cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL;
  25.663 +	/*
  25.664 +	 * Send the IPI. The write to APIC_ICR fires this off.
  25.665 +	 */
  25.666 +	apic_write_around(APIC_ICR, cfg);
  25.667 +#endif
  25.668 +}
  25.669 +#endif /* !CONFIG_SMP */
  25.670 +
  25.671  /*
  25.672   * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
  25.673   * specific CPU-side IRQs.
  25.674 @@ -138,6 +734,88 @@ static void add_pin_to_irq(unsigned int 
  25.675  #define MAX_PIRQS 8
  25.676  int pirq_entries [MAX_PIRQS];
  25.677  int pirqs_enabled;
  25.678 +int skip_ioapic_setup;
  25.679 +
  25.680 +static int __init ioapic_setup(char *str)
  25.681 +{
  25.682 +	skip_ioapic_setup = 1;
  25.683 +	return 1;
  25.684 +}
  25.685 +
  25.686 +__setup("noapic", ioapic_setup);
  25.687 +
  25.688 +static int __init ioapic_pirq_setup(char *str)
  25.689 +{
  25.690 +	int i, max;
  25.691 +	int ints[MAX_PIRQS+1];
  25.692 +
  25.693 +	get_options(str, ARRAY_SIZE(ints), ints);
  25.694 +
  25.695 +	for (i = 0; i < MAX_PIRQS; i++)
  25.696 +		pirq_entries[i] = -1;
  25.697 +
  25.698 +	pirqs_enabled = 1;
  25.699 +	apic_printk(APIC_VERBOSE, KERN_INFO
  25.700 +			"PIRQ redirection, working around broken MP-BIOS.\n");
  25.701 +	max = MAX_PIRQS;
  25.702 +	if (ints[0] < MAX_PIRQS)
  25.703 +		max = ints[0];
  25.704 +
  25.705 +	for (i = 0; i < max; i++) {
  25.706 +		apic_printk(APIC_VERBOSE, KERN_DEBUG
  25.707 +				"... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
  25.708 +		/*
  25.709 +		 * PIRQs are mapped upside down, usually.
  25.710 +		 */
  25.711 +		pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
  25.712 +	}
  25.713 +	return 1;
  25.714 +}
  25.715 +
  25.716 +__setup("pirq=", ioapic_pirq_setup);
  25.717 +
  25.718 +/*
  25.719 + * Find the IRQ entry number of a certain pin.
  25.720 + */
  25.721 +static int find_irq_entry(int apic, int pin, int type)
  25.722 +{
  25.723 +	int i;
  25.724 +
  25.725 +	for (i = 0; i < mp_irq_entries; i++)
  25.726 +		if (mp_irqs[i].mpc_irqtype == type &&
  25.727 +		    (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid ||
  25.728 +		     mp_irqs[i].mpc_dstapic == MP_APIC_ALL) &&
  25.729 +		    mp_irqs[i].mpc_dstirq == pin)
  25.730 +			return i;
  25.731 +
  25.732 +	return -1;
  25.733 +}
  25.734 +
  25.735 +#ifndef CONFIG_XEN
  25.736 +/*
  25.737 + * Find the pin to which IRQ[irq] (ISA) is connected
  25.738 + */
  25.739 +static int find_isa_irq_pin(int irq, int type)
  25.740 +{
  25.741 +	int i;
  25.742 +
  25.743 +	for (i = 0; i < mp_irq_entries; i++) {
  25.744 +		int lbus = mp_irqs[i].mpc_srcbus;
  25.745 +
  25.746 +		if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
  25.747 +		     mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
  25.748 +		     mp_bus_id_to_type[lbus] == MP_BUS_MCA ||
  25.749 +		     mp_bus_id_to_type[lbus] == MP_BUS_NEC98
  25.750 +		    ) &&
  25.751 +		    (mp_irqs[i].mpc_irqtype == type) &&
  25.752 +		    (mp_irqs[i].mpc_srcbusirq == irq))
  25.753 +
  25.754 +			return mp_irqs[i].mpc_dstirq;
  25.755 +	}
  25.756 +	return -1;
  25.757 +}
  25.758 +#endif
  25.759 +
  25.760  /*
  25.761   * Find a specific PCI IRQ entry.
  25.762   * Not an __init, possibly needed by modules
  25.763 @@ -184,6 +862,236 @@ int IO_APIC_get_PCI_irq_vector(int bus, 
  25.764  	return best_guess;
  25.765  }
  25.766  
  25.767 +#ifndef CONFIG_XEN
  25.768 +/*
  25.769 + * This function currently is only a helper for the i386 smp boot process where 
  25.770 + * we need to reprogram the ioredtbls to cater for the cpus which have come online
  25.771 + * so mask in all cases should simply be TARGET_CPUS
  25.772 + */
  25.773 +void __init setup_ioapic_dest(void)
  25.774 +{
  25.775 +	int pin, ioapic, irq, irq_entry;
  25.776 +
  25.777 +	if (skip_ioapic_setup == 1)
  25.778 +		return;
  25.779 +
  25.780 +	for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
  25.781 +		for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
  25.782 +			irq_entry = find_irq_entry(ioapic, pin, mp_INT);
  25.783 +			if (irq_entry == -1)
  25.784 +				continue;
  25.785 +			irq = pin_2_irq(irq_entry, ioapic, pin);
  25.786 +			set_ioapic_affinity_irq(irq, TARGET_CPUS);
  25.787 +		}
  25.788 +
  25.789 +	}
  25.790 +}
  25.791 +#endif /* !CONFIG_XEN */
  25.792 +
  25.793 +/*
  25.794 + * EISA Edge/Level control register, ELCR
  25.795 + */
  25.796 +static int EISA_ELCR(unsigned int irq)
  25.797 +{
  25.798 +	if (irq < 16) {
  25.799 +		unsigned int port = 0x4d0 + (irq >> 3);
  25.800 +		return (inb(port) >> (irq & 7)) & 1;
  25.801 +	}
  25.802 +	apic_printk(APIC_VERBOSE, KERN_INFO
  25.803 +			"Broken MPtable reports ISA irq %d\n", irq);
  25.804 +	return 0;
  25.805 +}
  25.806 +
  25.807 +/* EISA interrupts are always polarity zero and can be edge or level
  25.808 + * trigger depending on the ELCR value.  If an interrupt is listed as
  25.809 + * EISA conforming in the MP table, that means its trigger type must
  25.810 + * be read in from the ELCR */
  25.811 +
  25.812 +#define default_EISA_trigger(idx)	(EISA_ELCR(mp_irqs[idx].mpc_srcbusirq))
  25.813 +#define default_EISA_polarity(idx)	(0)
  25.814 +
  25.815 +/* ISA interrupts are always polarity zero edge triggered,
  25.816 + * when listed as conforming in the MP table. */
  25.817 +
  25.818 +#define default_ISA_trigger(idx)	(0)
  25.819 +#define default_ISA_polarity(idx)	(0)
  25.820 +
  25.821 +/* PCI interrupts are always polarity one level triggered,
  25.822 + * when listed as conforming in the MP table. */
  25.823 +
  25.824 +#define default_PCI_trigger(idx)	(1)
  25.825 +#define default_PCI_polarity(idx)	(1)
  25.826 +
  25.827 +/* MCA interrupts are always polarity zero level triggered,
  25.828 + * when listed as conforming in the MP table. */
  25.829 +
  25.830 +#define default_MCA_trigger(idx)	(1)
  25.831 +#define default_MCA_polarity(idx)	(0)
  25.832 +
  25.833 +/* NEC98 interrupts are always polarity zero edge triggered,
  25.834 + * when listed as conforming in the MP table. */
  25.835 +
  25.836 +#define default_NEC98_trigger(idx)     (0)
  25.837 +#define default_NEC98_polarity(idx)    (0)
  25.838 +
  25.839 +static int __init MPBIOS_polarity(int idx)
  25.840 +{
  25.841 +	int bus = mp_irqs[idx].mpc_srcbus;
  25.842 +	int polarity;
  25.843 +
  25.844 +	/*
  25.845 +	 * Determine IRQ line polarity (high active or low active):
  25.846 +	 */
  25.847 +	switch (mp_irqs[idx].mpc_irqflag & 3)
  25.848 +	{
  25.849 +		case 0: /* conforms, ie. bus-type dependent polarity */
  25.850 +		{
  25.851 +			switch (mp_bus_id_to_type[bus])
  25.852 +			{
  25.853 +				case MP_BUS_ISA: /* ISA pin */
  25.854 +				{
  25.855 +					polarity = default_ISA_polarity(idx);
  25.856 +					break;
  25.857 +				}
  25.858 +				case MP_BUS_EISA: /* EISA pin */
  25.859 +				{
  25.860 +					polarity = default_EISA_polarity(idx);
  25.861 +					break;
  25.862 +				}
  25.863 +				case MP_BUS_PCI: /* PCI pin */
  25.864 +				{
  25.865 +					polarity = default_PCI_polarity(idx);
  25.866 +					break;
  25.867 +				}
  25.868 +				case MP_BUS_MCA: /* MCA pin */
  25.869 +				{
  25.870 +					polarity = default_MCA_polarity(idx);
  25.871 +					break;
  25.872 +				}
  25.873 +				case MP_BUS_NEC98: /* NEC 98 pin */
  25.874 +				{
  25.875 +					polarity = default_NEC98_polarity(idx);
  25.876 +					break;
  25.877 +				}
  25.878 +				default:
  25.879 +				{
  25.880 +					printk(KERN_WARNING "broken BIOS!!\n");
  25.881 +					polarity = 1;
  25.882 +					break;
  25.883 +				}
  25.884 +			}
  25.885 +			break;
  25.886 +		}
  25.887 +		case 1: /* high active */
  25.888 +		{
  25.889 +			polarity = 0;
  25.890 +			break;
  25.891 +		}
  25.892 +		case 2: /* reserved */
  25.893 +		{
  25.894 +			printk(KERN_WARNING "broken BIOS!!\n");
  25.895 +			polarity = 1;
  25.896 +			break;
  25.897 +		}
  25.898 +		case 3: /* low active */
  25.899 +		{
  25.900 +			polarity = 1;
  25.901 +			break;
  25.902 +		}
  25.903 +		default: /* invalid */
  25.904 +		{
  25.905 +			printk(KERN_WARNING "broken BIOS!!\n");
  25.906 +			polarity = 1;
  25.907 +			break;
  25.908 +		}
  25.909 +	}
  25.910 +	return polarity;
  25.911 +}
  25.912 +
  25.913 +static int MPBIOS_trigger(int idx)
  25.914 +{
  25.915 +	int bus = mp_irqs[idx].mpc_srcbus;
  25.916 +	int trigger;
  25.917 +
  25.918 +	/*
  25.919 +	 * Determine IRQ trigger mode (edge or level sensitive):
  25.920 +	 */
  25.921 +	switch ((mp_irqs[idx].mpc_irqflag>>2) & 3)
  25.922 +	{
  25.923 +		case 0: /* conforms, ie. bus-type dependent */
  25.924 +		{
  25.925 +			switch (mp_bus_id_to_type[bus])
  25.926 +			{
  25.927 +				case MP_BUS_ISA: /* ISA pin */
  25.928 +				{
  25.929 +					trigger = default_ISA_trigger(idx);
  25.930 +					break;
  25.931 +				}
  25.932 +				case MP_BUS_EISA: /* EISA pin */
  25.933 +				{
  25.934 +					trigger = default_EISA_trigger(idx);
  25.935 +					break;
  25.936 +				}
  25.937 +				case MP_BUS_PCI: /* PCI pin */
  25.938 +				{
  25.939 +					trigger = default_PCI_trigger(idx);
  25.940 +					break;
  25.941 +				}
  25.942 +				case MP_BUS_MCA: /* MCA pin */
  25.943 +				{
  25.944 +					trigger = default_MCA_trigger(idx);
  25.945 +					break;
  25.946 +				}
  25.947 +				case MP_BUS_NEC98: /* NEC 98 pin */
  25.948 +				{
  25.949 +					trigger = default_NEC98_trigger(idx);
  25.950 +					break;
  25.951 +				}
  25.952 +				default:
  25.953 +				{
  25.954 +					printk(KERN_WARNING "broken BIOS!!\n");
  25.955 +					trigger = 1;
  25.956 +					break;
  25.957 +				}
  25.958 +			}
  25.959 +			break;
  25.960 +		}
  25.961 +		case 1: /* edge */
  25.962 +		{
  25.963 +			trigger = 0;
  25.964 +			break;
  25.965 +		}
  25.966 +		case 2: /* reserved */
  25.967 +		{
  25.968 +			printk(KERN_WARNING "broken BIOS!!\n");
  25.969 +			trigger = 1;
  25.970 +			break;
  25.971 +		}
  25.972 +		case 3: /* level */
  25.973 +		{
  25.974 +			trigger = 1;
  25.975 +			break;
  25.976 +		}
  25.977 +		default: /* invalid */
  25.978 +		{
  25.979 +			printk(KERN_WARNING "broken BIOS!!\n");
  25.980 +			trigger = 0;
  25.981 +			break;
  25.982 +		}
  25.983 +	}
  25.984 +	return trigger;
  25.985 +}
  25.986 +
  25.987 +static inline int irq_polarity(int idx)
  25.988 +{
  25.989 +	return MPBIOS_polarity(idx);
  25.990 +}
  25.991 +
  25.992 +static inline int irq_trigger(int idx)
  25.993 +{
  25.994 +	return MPBIOS_trigger(idx);
  25.995 +}
  25.996 +
  25.997  static int pin_2_irq(int idx, int apic, int pin)
  25.998  {
  25.999  	int irq, i;
 25.1000 @@ -250,26 +1158,41 @@ static int pin_2_irq(int idx, int apic, 
 25.1001  	return irq;
 25.1002  }
 25.1003  
 25.1004 +static inline int IO_APIC_irq_trigger(int irq)
 25.1005 +{
 25.1006 +	int apic, idx, pin;
 25.1007 +
 25.1008 +	for (apic = 0; apic < nr_ioapics; apic++) {
 25.1009 +		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
 25.1010 +			idx = find_irq_entry(apic,pin,mp_INT);
 25.1011 +			if ((idx != -1) && (irq == pin_2_irq(idx,apic,pin)))
 25.1012 +				return irq_trigger(idx);
 25.1013 +		}
 25.1014 +	}
 25.1015 +	/*
 25.1016 +	 * nonexistent IRQs are edge default
 25.1017 +	 */
 25.1018 +	return 0;
 25.1019 +}
 25.1020 +
 25.1021  /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
 25.1022 -u8 irq_vector[NR_IRQ_VECTORS] = { FIRST_DEVICE_VECTOR , 0 };
 25.1023 +u8 irq_vector[NR_IRQ_VECTORS]; /* = { FIRST_DEVICE_VECTOR , 0 }; */
 25.1024  
 25.1025  int assign_irq_vector(int irq)
 25.1026  {
 25.1027  	static int current_vector = FIRST_DEVICE_VECTOR;
 25.1028 -        physdev_op_t op;
 25.1029 -        int ret;
 25.1030 +	physdev_op_t op;
 25.1031  
 25.1032  	BUG_ON(irq >= NR_IRQ_VECTORS);
 25.1033  	if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0)
 25.1034  		return IO_APIC_VECTOR(irq);
 25.1035  
 25.1036 -        op.cmd = PHYSDEVOP_ASSIGN_VECTOR;
 25.1037 -        op.u.irq_op.irq = irq;
 25.1038 -        ret = HYPERVISOR_physdev_op(&op);
 25.1039 -        if (ret)
 25.1040 -            return -ENOSPC;
 25.1041 +	op.cmd = PHYSDEVOP_ASSIGN_VECTOR;
 25.1042 +	op.u.irq_op.irq = irq;
 25.1043 +	if (HYPERVISOR_physdev_op(&op))
 25.1044 +		return -ENOSPC;
 25.1045 +	current_vector = op.u.irq_op.vector;
 25.1046  
 25.1047 -        current_vector = op.u.irq_op.vector;
 25.1048  	vector_irq[current_vector] = irq;
 25.1049  	if (irq != AUTO_ASSIGN)
 25.1050  		IO_APIC_VECTOR(irq) = current_vector;
 25.1051 @@ -277,12 +1200,1268 @@ int assign_irq_vector(int irq)
 25.1052  	return current_vector;
 25.1053  }
 25.1054  
 25.1055 +#ifndef CONFIG_XEN
 25.1056 +static struct hw_interrupt_type ioapic_level_type;
 25.1057 +static struct hw_interrupt_type ioapic_edge_type;
 25.1058 +
 25.1059 +#define IOAPIC_AUTO	-1
 25.1060 +#define IOAPIC_EDGE	0
 25.1061 +#define IOAPIC_LEVEL	1
 25.1062 +
 25.1063 +static inline void ioapic_register_intr(int irq, int vector, unsigned long trigger)
 25.1064 +{
 25.1065 +	if (use_pci_vector() && !platform_legacy_irq(irq)) {
 25.1066 +		if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
 25.1067 +				trigger == IOAPIC_LEVEL)
 25.1068 +			irq_desc[vector].handler = &ioapic_level_type;
 25.1069 +		else
 25.1070 +			irq_desc[vector].handler = &ioapic_edge_type;
 25.1071 +		set_intr_gate(vector, interrupt[vector]);
 25.1072 +	} else	{
 25.1073 +		if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
 25.1074 +				trigger == IOAPIC_LEVEL)
 25.1075 +			irq_desc[irq].handler = &ioapic_level_type;
 25.1076 +		else
 25.1077 +			irq_desc[irq].handler = &ioapic_edge_type;
 25.1078 +		set_intr_gate(vector, interrupt[irq]);
 25.1079 +	}
 25.1080 +}
 25.1081 +#else
 25.1082 +#define ioapic_register_intr(_irq,_vector,_trigger) ((void)0)
 25.1083 +#endif
 25.1084 +
 25.1085 +void __init setup_IO_APIC_irqs(void)
 25.1086 +{
 25.1087 +	struct IO_APIC_route_entry entry;
 25.1088 +	int apic, pin, idx, irq, first_notcon = 1, vector;
 25.1089 +	unsigned long flags;
 25.1090 +
 25.1091 +	apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
 25.1092 +
 25.1093 +	for (apic = 0; apic < nr_ioapics; apic++) {
 25.1094 +	for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
 25.1095 +
 25.1096 +		/*
 25.1097 +		 * add it to the IO-APIC irq-routing table:
 25.1098 +		 */
 25.1099 +		memset(&entry,0,sizeof(entry));
 25.1100 +
 25.1101 +		entry.delivery_mode = INT_DELIVERY_MODE;
 25.1102 +		entry.dest_mode = INT_DEST_MODE;
 25.1103 +		entry.mask = 0;				/* enable IRQ */
 25.1104 +		entry.dest.logical.logical_dest = 
 25.1105 +					cpu_mask_to_apicid(TARGET_CPUS);
 25.1106 +
 25.1107 +		idx = find_irq_entry(apic,pin,mp_INT);
 25.1108 +		if (idx == -1) {
 25.1109 +			if (first_notcon) {
 25.1110 +				apic_printk(APIC_VERBOSE, KERN_DEBUG
 25.1111 +						" IO-APIC (apicid-pin) %d-%d",
 25.1112 +						mp_ioapics[apic].mpc_apicid,
 25.1113 +						pin);
 25.1114 +				first_notcon = 0;
 25.1115 +			} else
 25.1116 +				apic_printk(APIC_VERBOSE, ", %d-%d",
 25.1117 +					mp_ioapics[apic].mpc_apicid, pin);
 25.1118 +			continue;
 25.1119 +		}
 25.1120 +
 25.1121 +		entry.trigger = irq_trigger(idx);
 25.1122 +		entry.polarity = irq_polarity(idx);
 25.1123 +
 25.1124 +		if (irq_trigger(idx)) {
 25.1125 +			entry.trigger = 1;
 25.1126 +			entry.mask = 1;
 25.1127 +		}
 25.1128 +
 25.1129 +		irq = pin_2_irq(idx, apic, pin);
 25.1130 +		/*
 25.1131 +		 * skip adding the timer int on secondary nodes, which causes
 25.1132 +		 * a small but painful rift in the time-space continuum
 25.1133 +		 */
 25.1134 +		if (multi_timer_check(apic, irq))
 25.1135 +			continue;
 25.1136 +		else
 25.1137 +			add_pin_to_irq(irq, apic, pin);
 25.1138 +
 25.1139 +		if (/*!apic &&*/ !IO_APIC_IRQ(irq))
 25.1140 +			continue;
 25.1141 +
 25.1142 +		if (IO_APIC_IRQ(irq)) {
 25.1143 +			vector = assign_irq_vector(irq);
 25.1144 +			entry.vector = vector;
 25.1145 +			ioapic_register_intr(irq, vector, IOAPIC_AUTO);
 25.1146 +		
 25.1147 +			if (!apic && (irq < 16))
 25.1148 +				disable_8259A_irq(irq);
 25.1149 +		}
 25.1150 +		spin_lock_irqsave(&ioapic_lock, flags);
 25.1151 +		io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
 25.1152 +		io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
 25.1153 +		spin_unlock_irqrestore(&ioapic_lock, flags);
 25.1154 +	}
 25.1155 +	}
 25.1156 +
 25.1157 +	if (!first_notcon)
 25.1158 +		apic_printk(APIC_VERBOSE, " not connected.\n");
 25.1159 +}
 25.1160 +
 25.1161 +/*
 25.1162 + * Set up the 8259A-master output pin:
 25.1163 + */
 25.1164 +#ifndef CONFIG_XEN
 25.1165 +void __init setup_ExtINT_IRQ0_pin(unsigned int pin, int vector)
 25.1166 +{
 25.1167 +	struct IO_APIC_route_entry entry;
 25.1168 +	unsigned long flags;
 25.1169 +
 25.1170 +	memset(&entry,0,sizeof(entry));
 25.1171 +
 25.1172 +	disable_8259A_irq(0);
 25.1173 +
 25.1174 +	/* mask LVT0 */
 25.1175 +	apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
 25.1176 +
 25.1177 +	/*
 25.1178 +	 * We use logical delivery to get the timer IRQ
 25.1179 +	 * to the first CPU.
 25.1180 +	 */
 25.1181 +	entry.dest_mode = INT_DEST_MODE;
 25.1182 +	entry.mask = 0;					/* unmask IRQ now */
 25.1183 +	entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
 25.1184 +	entry.delivery_mode = INT_DELIVERY_MODE;
 25.1185 +	entry.polarity = 0;
 25.1186 +	entry.trigger = 0;
 25.1187 +	entry.vector = vector;
 25.1188 +
 25.1189 +	/*
 25.1190 +	 * The timer IRQ doesn't have to know that behind the
 25.1191 +	 * scene we have a 8259A-master in AEOI mode ...
 25.1192 +	 */
 25.1193 +	irq_desc[0].handler = &ioapic_edge_type;
 25.1194 +
 25.1195 +	/*
 25.1196 +	 * Add it to the IO-APIC irq-routing table:
 25.1197 +	 */
 25.1198 +	spin_lock_irqsave(&ioapic_lock, flags);
 25.1199 +	io_apic_write(0, 0x11+2*pin, *(((int *)&entry)+1));
 25.1200 +	io_apic_write(0, 0x10+2*pin, *(((int *)&entry)+0));
 25.1201 +	spin_unlock_irqrestore(&ioapic_lock, flags);
 25.1202 +
 25.1203 +	enable_8259A_irq(0);
 25.1204 +}
 25.1205 +
 25.1206 +static inline void UNEXPECTED_IO_APIC(void)
 25.1207 +{
 25.1208 +}
 25.1209 +
 25.1210 +void __init print_IO_APIC(void)
 25.1211 +{
 25.1212 +	int apic, i;
 25.1213 +	union IO_APIC_reg_00 reg_00;
 25.1214 +	union IO_APIC_reg_01 reg_01;
 25.1215 +	union IO_APIC_reg_02 reg_02;
 25.1216 +	union IO_APIC_reg_03 reg_03;
 25.1217 +	unsigned long flags;
 25.1218 +
 25.1219 +	if (apic_verbosity == APIC_QUIET)
 25.1220 +		return;
 25.1221 +
 25.1222 + 	printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
 25.1223 +	for (i = 0; i < nr_ioapics; i++)
 25.1224 +		printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
 25.1225 +		       mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]);
 25.1226 +
 25.1227 +	/*
 25.1228 +	 * We are a bit conservative about what we expect.  We have to
 25.1229 +	 * know about every hardware change ASAP.
 25.1230 +	 */
 25.1231 +	printk(KERN_INFO "testing the IO APIC.......................\n");
 25.1232 +
 25.1233 +	for (apic = 0; apic < nr_ioapics; apic++) {
 25.1234 +
 25.1235 +	spin_lock_irqsave(&ioapic_lock, flags);
 25.1236 +	reg_00.raw = io_apic_read(apic, 0);
 25.1237 +	reg_01.raw = io_apic_read(apic, 1);
 25.1238 +	if (reg_01.bits.version >= 0x10)
 25.1239 +		reg_02.raw = io_apic_read(apic, 2);
 25.1240 +	if (reg_01.bits.version >= 0x20)
 25.1241 +		reg_03.raw = io_apic_read(apic, 3);
 25.1242 +	spin_unlock_irqrestore(&ioapic_lock, flags);
 25.1243 +
 25.1244 +	printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid);
 25.1245 +	printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
 25.1246 +	printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.bits.ID);
 25.1247 +	printk(KERN_DEBUG ".......    : Delivery Type: %X\n", reg_00.bits.delivery_type);
 25.1248 +	printk(KERN_DEBUG ".......    : LTS          : %X\n", reg_00.bits.LTS);
 25.1249 +	if (reg_00.bits.ID >= get_physical_broadcast())
 25.1250 +		UNEXPECTED_IO_APIC();
 25.1251 +	if (reg_00.bits.__reserved_1 || reg_00.bits.__reserved_2)
 25.1252 +		UNEXPECTED_IO_APIC();
 25.1253 +
 25.1254 +	printk(KERN_DEBUG ".... register #01: %08X\n", reg_01.raw);
 25.1255 +	printk(KERN_DEBUG ".......     : max redirection entries: %04X\n", reg_01.bits.entries);
 25.1256 +	if (	(reg_01.bits.entries != 0x0f) && /* older (Neptune) boards */
 25.1257 +		(reg_01.bits.entries != 0x17) && /* typical ISA+PCI boards */
 25.1258 +		(reg_01.bits.entries != 0x1b) && /* Compaq Proliant boards */
 25.1259 +		(reg_01.bits.entries != 0x1f) && /* dual Xeon boards */
 25.1260 +		(reg_01.bits.entries != 0x22) && /* bigger Xeon boards */
 25.1261 +		(reg_01.bits.entries != 0x2E) &&
 25.1262 +		(reg_01.bits.entries != 0x3F)
 25.1263 +	)
 25.1264 +		UNEXPECTED_IO_APIC();
 25.1265 +
 25.1266 +	printk(KERN_DEBUG ".......     : PRQ implemented: %X\n", reg_01.bits.PRQ);
 25.1267 +	printk(KERN_DEBUG ".......     : IO APIC version: %04X\n", reg_01.bits.version);
 25.1268 +	if (	(reg_01.bits.version != 0x01) && /* 82489DX IO-APICs */
 25.1269 +		(reg_01.bits.version != 0x10) && /* oldest IO-APICs */
 25.1270 +		(reg_01.bits.version != 0x11) && /* Pentium/Pro IO-APICs */
 25.1271 +		(reg_01.bits.version != 0x13) && /* Xeon IO-APICs */
 25.1272 +		(reg_01.bits.version != 0x20)    /* Intel P64H (82806 AA) */
 25.1273 +	)
 25.1274 +		UNEXPECTED_IO_APIC();
 25.1275 +	if (reg_01.bits.__reserved_1 || reg_01.bits.__reserved_2)
 25.1276 +		UNEXPECTED_IO_APIC();
 25.1277 +
 25.1278 +	/*
 25.1279 +	 * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02,
 25.1280 +	 * but the value of reg_02 is read as the previous read register
 25.1281 +	 * value, so ignore it if reg_02 == reg_01.
 25.1282 +	 */
 25.1283 +	if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) {
 25.1284 +		printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
 25.1285 +		printk(KERN_DEBUG ".......     : arbitration: %02X\n", reg_02.bits.arbitration);
 25.1286 +		if (reg_02.bits.__reserved_1 || reg_02.bits.__reserved_2)
 25.1287 +			UNEXPECTED_IO_APIC();
 25.1288 +	}
 25.1289 +
 25.1290 +	/*
 25.1291 +	 * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02
 25.1292 +	 * or reg_03, but the value of reg_0[23] is read as the previous read
 25.1293 +	 * register value, so ignore it if reg_03 == reg_0[12].
 25.1294 +	 */
 25.1295 +	if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw &&
 25.1296 +	    reg_03.raw != reg_01.raw) {
 25.1297 +		printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw);
 25.1298 +		printk(KERN_DEBUG ".......     : Boot DT    : %X\n", reg_03.bits.boot_DT);
 25.1299 +		if (reg_03.bits.__reserved_1)
 25.1300 +			UNEXPECTED_IO_APIC();
 25.1301 +	}
 25.1302 +
 25.1303 +	printk(KERN_DEBUG ".... IRQ redirection table:\n");
 25.1304 +
 25.1305 +	printk(KERN_DEBUG " NR Log Phy Mask Trig IRR Pol"
 25.1306 +			  " Stat Dest Deli Vect:   \n");
 25.1307 +
 25.1308 +	for (i = 0; i <= reg_01.bits.entries; i++) {
 25.1309 +		struct IO_APIC_route_entry entry;
 25.1310 +
 25.1311 +		spin_lock_irqsave(&ioapic_lock, flags);
 25.1312 +		*(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2);
 25.1313 +		*(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2);
 25.1314 +		spin_unlock_irqrestore(&ioapic_lock, flags);
 25.1315 +
 25.1316 +		printk(KERN_DEBUG " %02x %03X %02X  ",
 25.1317 +			i,
 25.1318 +			entry.dest.logical.logical_dest,
 25.1319 +			entry.dest.physical.physical_dest
 25.1320 +		);
 25.1321 +
 25.1322 +		printk("%1d    %1d    %1d   %1d   %1d    %1d    %1d    %02X\n",
 25.1323 +			entry.mask,
 25.1324 +			entry.trigger,
 25.1325 +			entry.irr,
 25.1326 +			entry.polarity,
 25.1327 +			entry.delivery_status,
 25.1328 +			entry.dest_mode,
 25.1329 +			entry.delivery_mode,
 25.1330 +			entry.vector
 25.1331 +		);
 25.1332 +	}
 25.1333 +	}
 25.1334 +	if (use_pci_vector())
 25.1335 +		printk(KERN_INFO "Using vector-based indexing\n");
 25.1336 +	printk(KERN_DEBUG "IRQ to pin mappings:\n");
 25.1337 +	for (i = 0; i < NR_IRQS; i++) {
 25.1338 +		struct irq_pin_list *entry = irq_2_pin + i;
 25.1339 +		if (entry->pin < 0)
 25.1340 +			continue;
 25.1341 + 		if (use_pci_vector() && !platform_legacy_irq(i))
 25.1342 +			printk(KERN_DEBUG "IRQ%d ", IO_APIC_VECTOR(i));
 25.1343 +		else
 25.1344 +			printk(KERN_DEBUG "IRQ%d ", i);
 25.1345 +		for (;;) {
 25.1346 +			printk("-> %d:%d", entry->apic, entry->pin);
 25.1347 +			if (!entry->next)
 25.1348 +				break;
 25.1349 +			entry = irq_2_pin + entry->next;
 25.1350 +		}
 25.1351 +		printk("\n");
 25.1352 +	}
 25.1353 +
 25.1354 +	printk(KERN_INFO ".................................... done.\n");
 25.1355 +
 25.1356 +	return;
 25.1357 +}
 25.1358 +
 25.1359 +static void print_APIC_bitfield (int base)
 25.1360 +{
 25.1361 +	unsigned int v;
 25.1362 +	int i, j;
 25.1363 +
 25.1364 +	if (apic_verbosity == APIC_QUIET)
 25.1365 +		return;
 25.1366 +
 25.1367 +	printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG);
 25.1368 +	for (i = 0; i < 8; i++) {
 25.1369 +		v = apic_read(base + i*0x10);
 25.1370 +		for (j = 0; j < 32; j++) {
 25.1371 +			if (v & (1<<j))
 25.1372 +				printk("1");
 25.1373 +			else
 25.1374 +				printk("0");
 25.1375 +		}
 25.1376 +		printk("\n");
 25.1377 +	}
 25.1378 +}
 25.1379 +
 25.1380 +void /*__init*/ print_local_APIC(void * dummy)
 25.1381 +{
 25.1382 +	unsigned int v, ver, maxlvt;
 25.1383 +
 25.1384 +	if (apic_verbosity == APIC_QUIET)
 25.1385 +		return;
 25.1386 +
 25.1387 +	printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
 25.1388 +		smp_processor_id(), hard_smp_processor_id());
 25.1389 +	v = apic_read(APIC_ID);
 25.1390 +	printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, GET_APIC_ID(v));
 25.1391 +	v = apic_read(APIC_LVR);
 25.1392 +	printk(KERN_INFO "... APIC VERSION: %08x\n", v);
 25.1393 +	ver = GET_APIC_VERSION(v);
 25.1394 +	maxlvt = get_maxlvt();
 25.1395 +
 25.1396 +	v = apic_read(APIC_TASKPRI);
 25.1397 +	printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
 25.1398 +
 25.1399 +	if (APIC_INTEGRATED(ver)) {			/* !82489DX */
 25.1400 +		v = apic_read(APIC_ARBPRI);
 25.1401 +		printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
 25.1402 +			v & APIC_ARBPRI_MASK);
 25.1403 +		v = apic_read(APIC_PROCPRI);
 25.1404 +		printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
 25.1405 +	}
 25.1406 +
 25.1407 +	v = apic_read(APIC_EOI);
 25.1408 +	printk(KERN_DEBUG "... APIC EOI: %08x\n", v);
 25.1409 +	v = apic_read(APIC_RRR);
 25.1410 +	printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
 25.1411 +	v = apic_read(APIC_LDR);
 25.1412 +	printk(KERN_DEBUG "... APIC LDR: %08x\n", v);
 25.1413 +	v = apic_read(APIC_DFR);
 25.1414 +	printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
 25.1415 +	v = apic_read(APIC_SPIV);
 25.1416 +	printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
 25.1417 +
 25.1418 +	printk(KERN_DEBUG "... APIC ISR field:\n");
 25.1419 +	print_APIC_bitfield(APIC_ISR);
 25.1420 +	printk(KERN_DEBUG "... APIC TMR field:\n");
 25.1421 +	print_APIC_bitfield(APIC_TMR);
 25.1422 +	printk(KERN_DEBUG "... APIC IRR field:\n");
 25.1423 +	print_APIC_bitfield(APIC_IRR);
 25.1424 +
 25.1425 +	if (APIC_INTEGRATED(ver)) {		/* !82489DX */
 25.1426 +		if (maxlvt > 3)		/* Due to the Pentium erratum 3AP. */
 25.1427 +			apic_write(APIC_ESR, 0);
 25.1428 +		v = apic_read(APIC_ESR);
 25.1429 +		printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
 25.1430 +	}
 25.1431 +
 25.1432 +	v = apic_read(APIC_ICR);
 25.1433 +	printk(KERN_DEBUG "... APIC ICR: %08x\n", v);
 25.1434 +	v = apic_read(APIC_ICR2);
 25.1435 +	printk(KERN_DEBUG "... APIC ICR2: %08x\n", v);
 25.1436 +
 25.1437 +	v = apic_read(APIC_LVTT);
 25.1438 +	printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
 25.1439 +
 25.1440 +	if (maxlvt > 3) {                       /* PC is LVT#4. */
 25.1441 +		v = apic_read(APIC_LVTPC);
 25.1442 +		printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v);
 25.1443 +	}
 25.1444 +	v = apic_read(APIC_LVT0);
 25.1445 +	printk(KERN_DEBUG "... APIC LVT0: %08x\n", v);
 25.1446 +	v = apic_read(APIC_LVT1);
 25.1447 +	printk(KERN_DEBUG "... APIC LVT1: %08x\n", v);
 25.1448 +
 25.1449 +	if (maxlvt > 2) {			/* ERR is LVT#3. */
 25.1450 +		v = apic_read(APIC_LVTERR);
 25.1451 +		printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v);
 25.1452 +	}
 25.1453 +
 25.1454 +	v = apic_read(APIC_TMICT);
 25.1455 +	printk(KERN_DEBUG "... APIC TMICT: %08x\n", v);
 25.1456 +	v = apic_read(APIC_TMCCT);
 25.1457 +	printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
 25.1458 +	v = apic_read(APIC_TDCR);
 25.1459 +	printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
 25.1460 +	printk("\n");
 25.1461 +}
 25.1462 +
 25.1463 +void print_all_local_APICs (void)
 25.1464 +{
 25.1465 +	on_each_cpu(print_local_APIC, NULL, 1, 1);
 25.1466 +}
 25.1467 +
 25.1468 +void /*__init*/ print_PIC(void)
 25.1469 +{
 25.1470 +	extern spinlock_t i8259A_lock;
 25.1471 +	unsigned int v;
 25.1472 +	unsigned long flags;
 25.1473 +
 25.1474 +	if (apic_verbosity == APIC_QUIET)
 25.1475 +		return;
 25.1476 +
 25.1477 +	printk(KERN_DEBUG "\nprinting PIC contents\n");
 25.1478 +
 25.1479 +	spin_lock_irqsave(&i8259A_lock, flags);
 25.1480 +
 25.1481 +	v = inb(0xa1) << 8 | inb(0x21);
 25.1482 +	printk(KERN_DEBUG "... PIC  IMR: %04x\n", v);
 25.1483 +
 25.1484 +	v = inb(0xa0) << 8 | inb(0x20);
 25.1485 +	printk(KERN_DEBUG "... PIC  IRR: %04x\n", v);
 25.1486 +
 25.1487 +	outb(0x0b,0xa0);
 25.1488 +	outb(0x0b,0x20);
 25.1489 +	v = inb(0xa0) << 8 | inb(0x20);
 25.1490 +	outb(0x0a,0xa0);
 25.1491 +	outb(0x0a,0x20);
 25.1492 +
 25.1493 +	spin_unlock_irqrestore(&i8259A_lock, flags);
 25.1494 +
 25.1495 +	printk(KERN_DEBUG "... PIC  ISR: %04x\n", v);
 25.1496 +
 25.1497 +	v = inb(0x4d1) << 8 | inb(0x4d0);
 25.1498 +	printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
 25.1499 +}
 25.1500 +#else
 25.1501 +void __init print_IO_APIC(void) { }
 25.1502 +#endif /* !CONFIG_XEN */
 25.1503 +
 25.1504 +static void __init enable_IO_APIC(void)
 25.1505 +{
 25.1506 +	union IO_APIC_reg_01 reg_01;
 25.1507 +	int i;
 25.1508 +	unsigned long flags;
 25.1509 +
 25.1510 +	for (i = 0; i < PIN_MAP_SIZE; i++) {
 25.1511 +		irq_2_pin[i].pin = -1;
 25.1512 +		irq_2_pin[i].next = 0;
 25.1513 +	}
 25.1514 +	if (!pirqs_enabled)
 25.1515 +		for (i = 0; i < MAX_PIRQS; i++)
 25.1516 +			pirq_entries[i] = -1;
 25.1517 +
 25.1518 +	/*
 25.1519 +	 * The number of IO-APIC IRQ registers (== #pins):
 25.1520 +	 */
 25.1521 +	for (i = 0; i < nr_ioapics; i++) {
 25.1522 +		spin_lock_irqsave(&ioapic_lock, flags);
 25.1523 +		reg_01.raw = io_apic_read(i, 1);
 25.1524 +		spin_unlock_irqrestore(&ioapic_lock, flags);
 25.1525 +		nr_ioapic_registers[i] = reg_01.bits.entries+1;
 25.1526 +	}
 25.1527 +
 25.1528 +	/*
 25.1529 +	 * Do not trust the IO-APIC being empty at bootup
 25.1530 +	 */
 25.1531 +	clear_IO_APIC();
 25.1532 +}
 25.1533 +
 25.1534 +/*
 25.1535 + * Not an __init, needed by the reboot code
 25.1536 + */
 25.1537 +void disable_IO_APIC(void)
 25.1538 +{
 25.1539 +	/*
 25.1540 +	 * Clear the IO-APIC before rebooting:
 25.1541 +	 */
 25.1542 +	clear_IO_APIC();
 25.1543 +
 25.1544 +#ifndef CONFIG_XEN
 25.1545 +	disconnect_bsp_APIC();
 25.1546 +#endif
 25.1547 +}
 25.1548 +
 25.1549 +/*
 25.1550 + * function to set the IO-APIC physical IDs based on the
 25.1551 + * values stored in the MPC table.
 25.1552 + *
 25.1553 + * by Matt Domsch <Matt_Domsch@dell.com>  Tue Dec 21 12:25:05 CST 1999
 25.1554 + */
 25.1555 +
 25.1556 +#if !defined(CONFIG_XEN) && !defined(CONFIG_X86_NUMAQ)
 25.1557 +static void __init setup_ioapic_ids_from_mpc(void)
 25.1558 +{
 25.1559 +	union IO_APIC_reg_00 reg_00;
 25.1560 +	physid_mask_t phys_id_present_map;
 25.1561 +	int apic;
 25.1562 +	int i;
 25.1563 +	unsigned char old_id;
 25.1564 +	unsigned long flags;
 25.1565 +
 25.1566 +	/*
 25.1567 +	 * This is broken; anything with a real cpu count has to
 25.1568 +	 * circumvent this idiocy regardless.
 25.1569 +	 */
 25.1570 +	phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map);
 25.1571 +
 25.1572 +	/*
 25.1573 +	 * Set the IOAPIC ID to the value stored in the MPC table.
 25.1574 +	 */
 25.1575 +	for (apic = 0; apic < nr_ioapics; apic++) {
 25.1576 +
 25.1577 +		/* Read the register 0 value */
 25.1578 +		spin_lock_irqsave(&ioapic_lock, flags);
 25.1579 +		reg_00.raw = io_apic_read(apic, 0);
 25.1580 +		spin_unlock_irqrestore(&ioapic_lock, flags);
 25.1581 +		
 25.1582 +		old_id = mp_ioapics[apic].mpc_apicid;
 25.1583 +
 25.1584 +		if (mp_ioapics[apic].mpc_apicid >= get_physical_broadcast()) {
 25.1585 +			printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
 25.1586 +				apic, mp_ioapics[apic].mpc_apicid);
 25.1587 +			printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
 25.1588 +				reg_00.bits.ID);
 25.1589 +			mp_ioapics[apic].mpc_apicid = reg_00.bits.ID;
 25.1590 +		}
 25.1591 +
 25.1592 +		/* Don't check I/O APIC IDs for some xAPIC systems.  They have
 25.1593 +		 * no meaning without the serial APIC bus. */
 25.1594 +		if (NO_IOAPIC_CHECK)
 25.1595 +			continue;
 25.1596 +		/*
 25.1597 +		 * Sanity check, is the ID really free? Every APIC in a
 25.1598 +		 * system must have a unique ID or we get lots of nice
 25.1599 +		 * 'stuck on smp_invalidate_needed IPI wait' messages.
 25.1600 +		 */
 25.1601 +		if (check_apicid_used(phys_id_present_map,
 25.1602 +					mp_ioapics[apic].mpc_apicid)) {
 25.1603 +			printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
 25.1604 +				apic, mp_ioapics[apic].mpc_apicid);
 25.1605 +			for (i = 0; i < get_physical_broadcast(); i++)
 25.1606 +				if (!physid_isset(i, phys_id_present_map))
 25.1607 +					break;
 25.1608 +			if (i >= get_physical_broadcast())
 25.1609 +				panic("Max APIC ID exceeded!\n");
 25.1610 +			printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
 25.1611 +				i);
 25.1612 +			physid_set(i, phys_id_present_map);
 25.1613 +			mp_ioapics[apic].mpc_apicid = i;
 25.1614 +		} else {
 25.1615 +			physid_mask_t tmp;
 25.1616 +			tmp = apicid_to_cpu_present(mp_ioapics[apic].mpc_apicid);
 25.1617 +			apic_printk(APIC_VERBOSE, "Setting %d in the "
 25.1618 +					"phys_id_present_map\n",
 25.1619 +					mp_ioapics[apic].mpc_apicid);
 25.1620 +			physids_or(phys_id_present_map, phys_id_present_map, tmp);
 25.1621 +		}
 25.1622 +
 25.1623 +
 25.1624 +		/*
 25.1625 +		 * We need to adjust the IRQ routing table
 25.1626 +		 * if the ID changed.
 25.1627 +		 */
 25.1628 +		if (old_id != mp_ioapics[apic].mpc_apicid)
 25.1629 +			for (i = 0; i < mp_irq_entries; i++)
 25.1630 +				if (mp_irqs[i].mpc_dstapic == old_id)
 25.1631 +					mp_irqs[i].mpc_dstapic
 25.1632 +						= mp_ioapics[apic].mpc_apicid;
 25.1633 +
 25.1634 +		/*
 25.1635 +		 * Read the right value from the MPC table and
 25.1636 +		 * write it into the ID register.
 25.1637 +	 	 */
 25.1638 +		apic_printk(APIC_VERBOSE, KERN_INFO
 25.1639 +			"...changing IO-APIC physical APIC ID to %d ...",
 25.1640 +			mp_ioapics[apic].mpc_apicid);
 25.1641 +
 25.1642 +		reg_00.bits.ID = mp_ioapics[apic].mpc_apicid;
 25.1643 +		spin_lock_irqsave(&ioapic_lock, flags);
 25.1644 +		io_apic_write(apic, 0, reg_00.raw);
 25.1645 +		spin_unlock_irqrestore(&ioapic_lock, flags);
 25.1646 +
 25.1647 +		/*
 25.1648 +		 * Sanity check
 25.1649 +		 */
 25.1650 +		spin_lock_irqsave(&ioapic_lock, flags);
 25.1651 +		reg_00.raw = io_apic_read(apic, 0);
 25.1652 +		spin_unlock_irqrestore(&ioapic_lock, flags);
 25.1653 +		if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid)
 25.1654 +			printk("could not set ID!\n");
 25.1655 +		else
 25.1656 +			apic_printk(APIC_VERBOSE, " ok.\n");
 25.1657 +	}
 25.1658 +}
 25.1659 +#else
 25.1660 +static void __init setup_ioapic_ids_from_mpc(void) { }
 25.1661 +#endif
 25.1662 +
 25.1663 +#ifndef CONFIG_XEN
 25.1664 +/*
 25.1665 + * There is a nasty bug in some older SMP boards, their mptable lies
 25.1666 + * about the timer IRQ. We do the following to work around the situation:
 25.1667 + *
 25.1668 + *	- timer IRQ defaults to IO-APIC IRQ
 25.1669 + *	- if this function detects that timer IRQs are defunct, then we fall
 25.1670 + *	  back to ISA timer IRQs
 25.1671 + */
 25.1672 +static int __init timer_irq_works(void)
 25.1673 +{
 25.1674 +	unsigned long t1 = jiffies;
 25.1675 +
 25.1676 +	local_irq_enable();
 25.1677 +	/* Let ten ticks pass... */
 25.1678 +	mdelay((10 * 1000) / HZ);
 25.1679 +
 25.1680 +	/*
 25.1681 +	 * Expect a few ticks at least, to be sure some possible
 25.1682 +	 * glue logic does not lock up after one or two first
 25.1683 +	 * ticks in a non-ExtINT mode.  Also the local APIC
 25.1684 +	 * might have cached one ExtINT interrupt.  Finally, at
 25.1685 +	 * least one tick may be lost due to delays.
 25.1686 +	 */
 25.1687 +	if (jiffies - t1 > 4)
 25.1688 +		return 1;
 25.1689 +
 25.1690 +	return 0;
 25.1691 +}
 25.1692 +
 25.1693 +/*
 25.1694 + * In the SMP+IOAPIC case it might happen that there are an unspecified
 25.1695 + * number of pending IRQ events unhandled. These cases are very rare,
 25.1696 + * so we 'resend' these IRQs via IPIs, to the same CPU. It's much
 25.1697 + * better to do it this way as thus we do not have to be aware of
 25.1698 + * 'pending' interrupts in the IRQ path, except at this point.
 25.1699 + */
 25.1700 +/*
 25.1701 + * Edge triggered needs to resend any interrupt
 25.1702 + * that was delayed but this is now handled in the device
 25.1703 + * independent code.
 25.1704 + */
 25.1705 +
 25.1706 +/*
 25.1707 + * Starting up a edge-triggered IO-APIC interrupt is
 25.1708 + * nasty - we need to make sure that we get the edge.
 25.1709 + * If it is already asserted for some reason, we need
 25.1710 + * return 1 to indicate that is was pending.
 25.1711 + *
 25.1712 + * This is not complete - we should be able to fake
 25.1713 + * an edge even if it isn't on the 8259A...
 25.1714 + */
 25.1715 +static unsigned int startup_edge_ioapic_irq(unsigned int irq)
 25.1716 +{
 25.1717 +	int was_pending = 0;
 25.1718 +	unsigned long flags;
 25.1719 +
 25.1720 +	spin_lock_irqsave(&ioapic_lock, flags);
 25.1721 +	if (irq < 16) {
 25.1722 +		disable_8259A_irq(irq);
 25.1723 +		if (i8259A_irq_pending(irq))
 25.1724 +			was_pending = 1;
 25.1725 +	}
 25.1726 +	__unmask_IO_APIC_irq(irq);
 25.1727 +	spin_unlock_irqrestore(&ioapic_lock, flags);
 25.1728 +
 25.1729 +	return was_pending;
 25.1730 +}
 25.1731 +
 25.1732 +/*
 25.1733 + * Once we have recorded IRQ_PENDING already, we can mask the
 25.1734 + * interrupt for real. This prevents IRQ storms from unhandled
 25.1735 + * devices.
 25.1736 + */
 25.1737 +static void ack_edge_ioapic_irq(unsigned int irq)
 25.1738 +{
 25.1739 +	move_irq(irq);
 25.1740 +	if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED))
 25.1741 +					== (IRQ_PENDING | IRQ_DISABLED))
 25.1742 +		mask_IO_APIC_irq(irq);
 25.1743 +	ack_APIC_irq();
 25.1744 +}
 25.1745 +
 25.1746 +/*
 25.1747 + * Level triggered interrupts can just be masked,
 25.1748 + * and shutting down and starting up the interrupt
 25.1749 + * is the same as enabling and disabling them -- except
 25.1750 + * with a startup need to return a "was pending" value.
 25.1751 + *
 25.1752 + * Level triggered interrupts are special because we
 25.1753 + * do not touch any IO-APIC register while handling
 25.1754 + * them. We ack the APIC in the end-IRQ handler, not
 25.1755 + * in the start-IRQ-handler. Protection against reentrance
 25.1756 + * from the same interrupt is still provided, both by the
 25.1757 + * generic IRQ layer and by the fact that an unacked local
 25.1758 + * APIC does not accept IRQs.
 25.1759 + */
 25.1760 +static unsigned int startup_level_ioapic_irq (unsigned int irq)
 25.1761 +{
 25.1762 +	unmask_IO_APIC_irq(irq);
 25.1763 +
 25.1764 +	return 0; /* don't check for pending */
 25.1765 +}
 25.1766 +
 25.1767 +static void end_level_ioapic_irq (unsigned int irq)
 25.1768 +{
 25.1769 +	unsigned long v;
 25.1770 +	int i;
 25.1771 +
 25.1772 +	move_irq(irq);
 25.1773 +/*
 25.1774 + * It appears there is an erratum which affects at least version 0x11
 25.1775 + * of I/O APIC (that's the 82093AA and cores integrated into various
 25.1776 + * chipsets).  Under certain conditions a level-triggered interrupt is
 25.1777 + * erroneously delivered as edge-triggered one but the respective IRR
 25.1778 + * bit gets set nevertheless.  As a result the I/O unit expects an EOI
 25.1779 + * message but it will never arrive and further interrupts are blocked
 25.1780 + * from the source.  The exact reason is so far unknown, but the
 25.1781 + * phenomenon was observed when two consecutive interrupt requests
 25.1782 + * from a given source get delivered to the same CPU and the source is
 25.1783 + * temporarily disabled in between.
 25.1784 + *
 25.1785 + * A workaround is to simulate an EOI message manually.  We achieve it
 25.1786 + * by setting the trigger mode to edge and then to level when the edge
 25.1787 + * trigger mode gets detected in the TMR of a local APIC for a
 25.1788 + * level-triggered interrupt.  We mask the source for the time of the
 25.1789 + * operation to prevent an edge-triggered interrupt escaping meanwhile.
 25.1790 + * The idea is from Manfred Spraul.  --macro
 25.1791 + */
 25.1792 +	i = IO_APIC_VECTOR(irq);
 25.1793 +
 25.1794 +	v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
 25.1795 +
 25.1796 +	ack_APIC_irq();
 25.1797 +
 25.1798 +	if (!(v & (1 << (i & 0x1f)))) {
 25.1799 +		atomic_inc(&irq_mis_count);
 25.1800 +		spin_lock(&ioapic_lock);
 25.1801 +		__mask_and_edge_IO_APIC_irq(irq);
 25.1802 +		__unmask_and_level_IO_APIC_irq(irq);
 25.1803 +		spin_unlock(&ioapic_lock);
 25.1804 +	}
 25.1805 +}
 25.1806 +
 25.1807 +#ifdef CONFIG_PCI_MSI
 25.1808 +static unsigned int startup_edge_ioapic_vector(unsigned int vector)
 25.1809 +{
 25.1810 +	int irq = vector_to_irq(vector);
 25.1811 +
 25.1812 +	return startup_edge_ioapic_irq(irq);
 25.1813 +}
 25.1814 +
 25.1815 +static void ack_edge_ioapic_vector(unsigned int vector)
 25.1816 +{
 25.1817 +	int irq = vector_to_irq(vector);
 25.1818 +
 25.1819 +	ack_edge_ioapic_irq(irq);
 25.1820 +}
 25.1821 +
 25.1822 +static unsigned int startup_level_ioapic_vector (unsigned int vector)
 25.1823 +{
 25.1824 +	int irq = vector_to_irq(vector);
 25.1825 +
 25.1826 +	return startup_level_ioapic_irq (irq);
 25.1827 +}
 25.1828 +
 25.1829 +static void end_level_ioapic_vector (unsigned int vector)
 25.1830 +{
 25.1831 +	int irq = vector_to_irq(vector);
 25.1832 +
 25.1833 +	end_level_ioapic_irq(irq);
 25.1834 +}
 25.1835 +
 25.1836 +static void mask_IO_APIC_vector (unsigned int vector)
 25.1837 +{
 25.1838 +	int irq = vector_to_irq(vector);
 25.1839 +
 25.1840 +	mask_IO_APIC_irq(irq);
 25.1841 +}
 25.1842 +
 25.1843 +static void unmask_IO_APIC_vector (unsigned int vector)
 25.1844 +{
 25.1845 +	int irq = vector_to_irq(vector);
 25.1846 +
 25.1847 +	unmask_IO_APIC_irq(irq);
 25.1848 +}
 25.1849 +
 25.1850 +static void set_ioapic_affinity_vector (unsigned int vector,
 25.1851 +					cpumask_t cpu_mask)
 25.1852 +{
 25.1853 +	int irq = vector_to_irq(vector);
 25.1854 +
 25.1855 +	set_ioapic_affinity_irq(irq, cpu_mask);
 25.1856 +}
 25.1857 +#endif
 25.1858 +
 25.1859 +/*
 25.1860 + * Level and edge triggered IO-APIC interrupts need different handling,
 25.1861 + * so we use two separate IRQ descriptors. Edge triggered IRQs can be
 25.1862 + * handled with the level-triggered descriptor, but that one has slightly
 25.1863 + * more overhead. Level-triggered interrupts cannot be handled with the
 25.1864 + * edge-triggered handler, without risking IRQ storms and other ugly
 25.1865 + * races.
 25.1866 + */
 25.1867 +static struct hw_interrupt_type ioapic_edge_type = {
 25.1868 +	.typename 	= "IO-APIC-edge",
 25.1869 +	.startup 	= startup_edge_ioapic,
 25.1870 +	.shutdown 	= shutdown_edge_ioapic,
 25.1871 +	.enable 	= enable_edge_ioapic,
 25.1872 +	.disable 	= disable_edge_ioapic,
 25.1873 +	.ack 		= ack_edge_ioapic,
 25.1874 +	.end 		= end_edge_ioapic,
 25.1875 +	.set_affinity 	= set_ioapic_affinity,
 25.1876 +};
 25.1877 +
 25.1878 +static struct hw_interrupt_type ioapic_level_type = {
 25.1879 +	.typename 	= "IO-APIC-level",
 25.1880 +	.startup 	= startup_level_ioapic,
 25.1881 +	.shutdown 	= shutdown_level_ioapic,
 25.1882 +	.enable 	= enable_level_ioapic,
 25.1883 +	.disable 	= disable_level_ioapic,
 25.1884 +	.ack 		= mask_and_ack_level_ioapic,
 25.1885 +	.end 		= end_level_ioapic,
 25.1886 +	.set_affinity 	= set_ioapic_affinity,
 25.1887 +};
 25.1888 +#endif /* !CONFIG_XEN */
 25.1889 +
 25.1890 +static inline void init_IO_APIC_traps(void)
 25.1891 +{
 25.1892 +	int irq;
 25.1893 +
 25.1894 +	/*
 25.1895 +	 * NOTE! The local APIC isn't very good at handling
 25.1896 +	 * multiple interrupts at the same interrupt level.
 25.1897 +	 * As the interrupt level is determined by taking the
 25.1898 +	 * vector number and shifting that right by 4, we
 25.1899 +	 * want to spread these out a bit so that they don't
 25.1900 +	 * all fall in the same interrupt level.
 25.1901 +	 *
 25.1902 +	 * Also, we've got to be careful not to trash gate
 25.1903 +	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
 25.1904 +	 */
 25.1905 +	for (irq = 0; irq < NR_IRQS ; irq++) {
 25.1906 +		int tmp = irq;
 25.1907 +		if (use_pci_vector()) {
 25.1908 +			if (!platform_legacy_irq(tmp))
 25.1909 +				if ((tmp = vector_to_irq(tmp)) == -1)
 25.1910 +					continue;
 25.1911 +		}
 25.1912 +		if (IO_APIC_IRQ(tmp) && !IO_APIC_VECTOR(tmp)) {
 25.1913 +			/*
 25.1914 +			 * Hmm.. We don't have an entry for this,
 25.1915 +			 * so default to an old-fashioned 8259
 25.1916 +			 * interrupt if we can..
 25.1917 +			 */
 25.1918 +			if (irq < 16)
 25.1919 +				make_8259A_irq(irq);
 25.1920 +#ifndef CONFIG_XEN
 25.1921 +			else
 25.1922 +				/* Strange. Oh, well.. */
 25.1923 +				irq_desc[irq].handler = &no_irq_type;
 25.1924 +#endif
 25.1925 +		}
 25.1926 +	}
 25.1927 +}
 25.1928 +
 25.1929 +#ifndef CONFIG_XEN
 25.1930 +static void enable_lapic_irq (unsigned int irq)
 25.1931 +{
 25.1932 +	unsigned long v;
 25.1933 +
 25.1934 +	v = apic_read(APIC_LVT0);
 25.1935 +	apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED);
 25.1936 +}
 25.1937 +
 25.1938 +static void disable_lapic_irq (unsigned int irq)
 25.1939 +{
 25.1940 +	unsigned long v;
 25.1941 +
 25.1942 +	v = apic_read(APIC_LVT0);
 25.1943 +	apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED);
 25.1944 +}
 25.1945 +
 25.1946 +static void ack_lapic_irq (unsigned int irq)
 25.1947 +{
 25.1948 +	ack_APIC_irq();
 25.1949 +}
 25.1950 +
 25.1951 +static void end_lapic_irq (unsigned int i) { /* nothing */ }
 25.1952 +
 25.1953 +static struct hw_interrupt_type lapic_irq_type = {
 25.1954 +	.typename 	= "local-APIC-edge",
 25.1955 +	.startup 	= NULL, /* startup_irq() not used for IRQ0 */
 25.1956 +	.shutdown 	= NULL, /* shutdown_irq() not used for IRQ0 */
 25.1957 +	.enable 	= enable_lapic_irq,
 25.1958 +	.disable 	= disable_lapic_irq,
 25.1959 +	.ack 		= ack_lapic_irq,
 25.1960 +	.end 		= end_lapic_irq
 25.1961 +};
 25.1962 +
 25.1963 +static void setup_nmi (void)
 25.1964 +{
 25.1965 +	/*
 25.1966 + 	 * Dirty trick to enable the NMI watchdog ...
 25.1967 +	 * We put the 8259A master into AEOI mode and
 25.1968 +	 * unmask on all local APICs LVT0 as NMI.
 25.1969 +	 *
 25.1970 +	 * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
 25.1971 +	 * is from Maciej W. Rozycki - so we do not have to EOI from
 25.1972 +	 * the NMI handler or the timer interrupt.
 25.1973 +	 */ 
 25.1974 +	apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
 25.1975 +
 25.1976 +	on_each_cpu(enable_NMI_through_LVT0, NULL, 1, 1);
 25.1977 +
 25.1978 +	apic_printk(APIC_VERBOSE, " done.\n");
 25.1979 +}
 25.1980 +
 25.1981 +/*
 25.1982 + * This looks a bit hackish but it's about the only one way of sending
 25.1983 + * a few INTA cycles to 8259As and any associated glue logic.  ICR does
 25.1984 + * not support the ExtINT mode, unfortunately.  We need to send these
 25.1985 + * cycles as some i82489DX-based boards have glue logic that keeps the
 25.1986 + * 8259A interrupt line asserted until INTA.  --macro
 25.1987 + */
 25.1988 +static inline void unlock_ExtINT_logic(void)
 25.1989 +{
 25.1990 +	int pin, i;
 25.1991 +	struct IO_APIC_route_entry entry0, entry1;
 25.1992 +	unsigned char save_control, save_freq_select;
 25.1993 +	unsigned long flags;
 25.1994 +
 25.1995 +	pin = find_isa_irq_pin(8, mp_INT);
 25.1996 +	if (pin == -1)
 25.1997 +		return;
 25.1998 +
 25.1999 +	spin_lock_irqsave(&ioapic_lock, flags);
 25.2000 +	*(((int *)&entry0) + 1) = io_apic_read(0, 0x11 + 2 * pin);
 25.2001 +	*(((int *)&entry0) + 0) = io_apic_read(0, 0x10 + 2 * pin);
 25.2002 +	spin_unlock_irqrestore(&ioapic_lock, flags);
 25.2003 +	clear_IO_APIC_pin(0, pin);
 25.2004 +
 25.2005 +	memset(&entry1, 0, sizeof(entry1));
 25.2006 +
 25.2007 +	entry1.dest_mode = 0;			/* physical delivery */
 25.2008 +	entry1.mask = 0;			/* unmask IRQ now */
 25.2009 +	entry1.dest.physical.physical_dest = hard_smp_processor_id();
 25.2010 +	entry1.delivery_mode = dest_ExtINT;
 25.2011 +	entry1.polarity = entry0.polarity;
 25.2012 +	entry1.trigger = 0;
 25.2013 +	entry1.vector = 0;
 25.2014 +
 25.2015 +	spin_lock_irqsave(&ioapic_lock, flags);
 25.2016 +	io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry1) + 1));
 25.2017 +	io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry1) + 0));
 25.2018 +	spin_unlock_irqrestore(&ioapic_lock, flags);
 25.2019 +
 25.2020 +	save_control = CMOS_READ(RTC_CONTROL);
 25.2021 +	save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
 25.2022 +	CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6,
 25.2023 +		   RTC_FREQ_SELECT);
 25.2024 +	CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL);
 25.2025 +
 25.2026 +	i = 100;
 25.2027 +	while (i-- > 0) {
 25.2028 +		mdelay(10);
 25.2029 +		if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF)
 25.2030 +			i -= 10;
 25.2031 +	}
 25.2032 +
 25.2033 +	CMOS_WRITE(save_control, RTC_CONTROL);
 25.2034 +	CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
 25.2035 +	clear_IO_APIC_pin(0, pin);
 25.2036 +
 25.2037 +	spin_lock_irqsave(&ioapic_lock, flags);
 25.2038 +	io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry0) + 1));
 25.2039 +	io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry0) + 0));
 25.2040 +	spin_unlock_irqrestore(&ioapic_lock, flags);
 25.2041 +}
 25.2042 +
 25.2043 +/*
 25.2044 + * This code may look a bit paranoid, but it's supposed to cooperate with
 25.2045 + * a wide range of boards and BIOS bugs.  Fortunately only the timer IRQ
 25.2046 + * is so screwy.  Thanks to Brian Perkins for testing/hacking this beast
 25.2047 + * fanatically on his truly buggy board.
 25.2048 + */
 25.2049 +static inline void check_timer(void)
 25.2050 +{
 25.2051 +	int pin1, pin2;
 25.2052 +	int vector;
 25.2053 +
 25.2054 +	/*
 25.2055 +	 * get/set the timer IRQ vector:
 25.2056 +	 */
 25.2057 +	disable_8259A_irq(0);
 25.2058 +	vector = assign_irq_vector(0);
 25.2059 +	set_intr_gate(vector, interrupt[0]);
 25.2060 +
 25.2061 +	/*
 25.2062 +	 * Subtle, code in do_timer_interrupt() expects an AEOI
 25.2063 +	 * mode for the 8259A whenever interrupts are routed
 25.2064 +	 * through I/O APICs.  Also IRQ0 has to be enabled in
 25.2065 +	 * the 8259A which implies the virtual wire has to be
 25.2066 +	 * disabled in the local APIC.
 25.2067 +	 */
 25.2068 +	apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
 25.2069 +	init_8259A(1);
 25.2070 +	timer_ack = 1;
 25.2071 +	enable_8259A_irq(0);
 25.2072 +
 25.2073 +	pin1 = find_isa_irq_pin(0, mp_INT);
 25.2074 +	pin2 = find_isa_irq_pin(0, mp_ExtINT);
 25.2075 +
 25.2076 +	printk(KERN_INFO "..TIMER: vector=0x%02X pin1=%d pin2=%d\n", vector, pin1, pin2);
 25.2077 +
 25.2078 +	if (pin1 != -1) {
 25.2079 +		/*
 25.2080 +		 * Ok, does IRQ0 through the IOAPIC work?
 25.2081 +		 */
 25.2082 +		unmask_IO_APIC_irq(0);
 25.2083 +		if (timer_irq_works()) {
 25.2084 +			if (nmi_watchdog == NMI_IO_APIC) {
 25.2085 +				disable_8259A_irq(0);
 25.2086 +				setup_nmi();
 25.2087 +				enable_8259A_irq(0);
 25.2088 +				check_nmi_watchdog();
 25.2089 +			}
 25.2090 +			return;
 25.2091 +		}
 25.2092 +		clear_IO_APIC_pin(0, pin1);
 25.2093 +		printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n");
 25.2094 +	}
 25.2095 +
 25.2096 +	printk(KERN_INFO "...trying to set up timer (IRQ0) through the 8259A ... ");
 25.2097 +	if (pin2 != -1) {
 25.2098 +		printk("\n..... (found pin %d) ...", pin2);
 25.2099 +		/*
 25.2100 +		 * legacy devices should be connected to IO APIC #0
 25.2101 +		 */
 25.2102 +		setup_ExtINT_IRQ0_pin(pin2, vector);
 25.2103 +		if (timer_irq_works()) {
 25.2104 +			printk("works.\n");
 25.2105 +			if (pin1 != -1)
 25.2106 +				replace_pin_at_irq(0, 0, pin1, 0, pin2);
 25.2107 +			else
 25.2108 +				add_pin_to_irq(0, 0, pin2);
 25.2109 +			if (nmi_watchdog == NMI_IO_APIC) {
 25.2110 +				setup_nmi();
 25.2111 +				check_nmi_watchdog();
 25.2112 +			}
 25.2113 +			return;
 25.2114 +		}
 25.2115 +		/*
 25.2116 +		 * Cleanup, just in case ...
 25.2117 +		 */
 25.2118 +		clear_IO_APIC_pin(0, pin2);
 25.2119 +	}
 25.2120 +	printk(" failed.\n");
 25.2121 +
 25.2122 +	if (nmi_watchdog == NMI_IO_APIC) {
 25.2123 +		printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n");
 25.2124 +		nmi_watchdog = 0;
 25.2125 +	}
 25.2126 +
 25.2127 +	printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
 25.2128 +
 25.2129 +	disable_8259A_irq(0);
 25.2130 +	irq_desc[0].handler = &lapic_irq_type;
 25.2131 +	apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector);	/* Fixed mode */
 25.2132 +	enable_8259A_irq(0);
 25.2133 +
 25.2134 +	if (timer_irq_works()) {
 25.2135 +		printk(" works.\n");
 25.2136 +		return;
 25.2137 +	}
 25.2138 +	apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector);
 25.2139 +	printk(" failed.\n");
 25.2140 +
 25.2141 +	printk(KERN_INFO "...trying to set up timer as ExtINT IRQ...");
 25.2142 +
 25.2143 +	timer_ack = 0;
 25.2144 +	init_8259A(0);
 25.2145 +	make_8259A_irq(0);
 25.2146 +	apic_write_around(APIC_LVT0, APIC_DM_EXTINT);
 25.2147 +
 25.2148 +	unlock_ExtINT_logic();
 25.2149 +
 25.2150 +	if (timer_irq_works()) {
 25.2151 +		printk(" works.\n");
 25.2152 +		return;
 25.2153 +	}
 25.2154 +	printk(" failed :(.\n");
 25.2155 +	panic("IO-APIC + timer doesn't work!  Boot with apic=debug and send a "
 25.2156 +		"report.  Then try booting with the 'noapic' option");
 25.2157 +}
 25.2158 +#else
 25.2159 +#define check_timer() ((void)0)
 25.2160 +#endif
 25.2161 +
 25.2162 +/*
 25.2163 + *
 25.2164 + * IRQ's that are handled by the PIC in the MPS IOAPIC case.
 25.2165 + * - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ.
 25.2166 + *   Linux doesn't really care, as it's not actually used
 25.2167 + *   for any interrupt handling anyway.
 25.2168 + */
 25.2169 +#define PIC_IRQS	(1 << PIC_CASCADE_IR)
 25.2170 +
 25.2171 +void __init setup_IO_APIC(void)
 25.2172 +{
 25.2173 +	enable_IO_APIC();
 25.2174 +
 25.2175 +	if (acpi_ioapic)
 25.2176 +		io_apic_irqs = ~0;	/* all IRQs go through IOAPIC */
 25.2177 +	else
 25.2178 +		io_apic_irqs = ~PIC_IRQS;
 25.2179 +
 25.2180 +	printk("ENABLING IO-APIC IRQs\n");
 25.2181 +
 25.2182 +	/*
 25.2183 +	 * Set up IO-APIC IRQ routing.
 25.2184 +	 */
 25.2185 +	if (!acpi_ioapic)
 25.2186 +		setup_ioapic_ids_from_mpc();
 25.2187 +#ifndef CONFIG_XEN
 25.2188 +	sync_Arb_IDs();
 25.2189 +#endif
 25.2190 +	setup_IO_APIC_irqs();
 25.2191 +	init_IO_APIC_traps();
 25.2192 +	check_timer();
 25.2193 +	if (!acpi_ioapic)
 25.2194 +		print_IO_APIC();
 25.2195 +}
 25.2196 +
 25.2197 +/*
 25.2198 + *	Called after all the initialization is done. If we didnt find any
 25.2199 + *	APIC bugs then we can allow the modify fast path
 25.2200 + */
 25.2201 + 
 25.2202 +static int __init io_apic_bug_finalize(void)
 25.2203 +{
 25.2204 +	if(sis_apic_bug == -1)
 25.2205 +		sis_apic_bug = 0;
 25.2206 +	return 0;
 25.2207 +}
 25.2208 +
 25.2209 +late_initcall(io_apic_bug_finalize);
 25.2210 +
 25.2211 +struct sysfs_ioapic_data {
 25.2212 +	struct sys_device dev;
 25.2213 +	struct IO_APIC_route_entry entry[0];
 25.2214 +};
 25.2215 +static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS];
 25.2216 +
 25.2217 +static int ioapic_suspend(struct sys_device *dev, u32 state)
 25.2218 +{
 25.2219 +	struct IO_APIC_route_entry *entry;
 25.2220 +	struct sysfs_ioapic_data *data;
 25.2221 +	unsigned long flags;
 25.2222 +	int i;
 25.2223 +	
 25.2224 +	data = container_of(dev, struct sysfs_ioapic_data, dev);
 25.2225 +	entry = data->entry;
 25.2226 +	spin_lock_irqsave(&ioapic_lock, flags);
 25.2227 +	for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
 25.2228 +		*(((int *)entry) + 1) = io_apic_read(dev->id, 0x11 + 2 * i);
 25.2229 +		*(((int *)entry) + 0) = io_apic_read(dev->id, 0x10 + 2 * i);
 25.2230 +	}
 25.2231 +	spin_unlock_irqrestore(&ioapic_lock, flags);
 25.2232 +
 25.2233 +	return 0;
 25.2234 +}
 25.2235 +
 25.2236 +static int ioapic_resume(struct sys_device *dev)
 25.2237 +{
 25.2238 +	struct IO_APIC_route_entry *entry;
 25.2239 +	struct sysfs_ioapic_data *data;
 25.2240 +	unsigned long flags;
 25.2241 +	union IO_APIC_reg_00 reg_00;
 25.2242 +	int i;
 25.2243 +	
 25.2244 +	data = container_of(dev, struct sysfs_ioapic_data, dev);
 25.2245 +	entry = data->entry;
 25.2246 +
 25.2247 +	spin_lock_irqsave(&ioapic_lock, flags);
 25.2248 +	reg_00.raw = io_apic_read(dev->id, 0);
 25.2249 +	if (reg_00.bits.ID != mp_ioapics[dev->id].mpc_apicid) {
 25.2250 +		reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid;
 25.2251 +		io_apic_write(dev->id, 0, reg_00.raw);
 25.2252 +	}
 25.2253 +	for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
 25.2254 +		io_apic_write(dev->id, 0x11+2*i, *(((int *)entry)+1));
 25.2255 +		io_apic_write(dev->id, 0x10+2*i, *(((int *)entry)+0));
 25.2256 +	}
 25.2257 +	spin_unlock_irqrestore(&ioapic_lock, flags);
 25.2258 +
 25.2259 +	return 0;
 25.2260 +}
 25.2261 +
 25.2262 +static struct sysdev_class ioapic_sysdev_class = {
 25.2263 +	set_kset_name("ioapic"),
 25.2264 +	.suspend = ioapic_suspend,
 25.2265 +	.resume = ioapic_resume,
 25.2266 +};
 25.2267 +
 25.2268 +static int __init ioapic_init_sysfs(void)
 25.2269 +{
 25.2270 +	struct sys_device * dev;
 25.2271 +	int i, size, error = 0;
 25.2272 +
 25.2273 +	error = sysdev_class_register(&ioapic_sysdev_class);
 25.2274 +	if (error)
 25.2275 +		return error;
 25.2276 +
 25.2277 +	for (i = 0; i < nr_ioapics; i++ ) {
 25.2278 +		size = sizeof(struct sys_device) + nr_ioapic_registers[i] 
 25.2279 +			* sizeof(struct IO_APIC_route_entry);
 25.2280 +		mp_ioapic_data[i] = kmalloc(size, GFP_KERNEL);
 25.2281 +		if (!mp_ioapic_data[i]) {
 25.2282 +			printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
 25.2283 +			continue;
 25.2284 +		}
 25.2285 +		memset(mp_ioapic_data[i], 0, size);
 25.2286 +		dev = &mp_ioapic_data[i]->dev;
 25.2287 +		dev->id = i; 
 25.2288 +		dev->cls = &ioapic_sysdev_class;
 25.2289 +		error = sysdev_register(dev);
 25.2290 +		if (error) {
 25.2291 +			kfree(mp_ioapic_data[i]);
 25.2292 +			mp_ioapic_data[i] = NULL;
 25.2293 +			printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
 25.2294 +			continue;
 25.2295 +		}
 25.2296 +	}
 25.2297 +
 25.2298 +	return 0;
 25.2299 +}
 25.2300 +
 25.2301 +device_initcall(ioapic_init_sysfs);
 25.2302 +
 25.2303 +/* --------------------------------------------------------------------------
 25.2304 +                          ACPI-based IOAPIC Configuration
 25.2305 +   -------------------------------------------------------------------------- */
 25.2306 +
 25.2307  #ifdef CONFIG_ACPI_BOOT
 25.2308 +
 25.2309  int __init io_apic_get_unique_id (int ioapic, int apic_id)
 25.2310  {
 25.2311 +#ifndef CONFIG_XEN
 25.2312  	union IO_APIC_reg_00 reg_00;
 25.2313  	static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
 25.2314 +	physid_mask_t tmp;
 25.2315  	unsigned long flags;
 25.2316 +	int i = 0;
 25.2317  
 25.2318  	/*
 25.2319  	 * The P4 platform supports up to 256 APIC IDs on two separate APIC 
 25.2320 @@ -306,8 +2485,45 @@ int __init io_apic_get_unique_id (int io
 25.2321  		apic_id = reg_00.bits.ID;
 25.2322  	}
 25.2323  
 25.2324 +	/*
 25.2325 +	 * Every APIC in a system must have a unique ID or we get lots of nice 
 25.2326 +	 * 'stuck on smp_invalidate_needed IPI wait' messages.
 25.2327 +	 */
 25.2328 +	if (check_apicid_used(apic_id_map, apic_id)) {
 25.2329 +
 25.2330 +		for (i = 0; i < get_physical_broadcast(); i++) {
 25.2331 +			if (!check_apicid_used(apic_id_map, i))
 25.2332 +				break;
 25.2333 +		}
 25.2334 +
 25.2335 +		if (i == get_physical_broadcast())
 25.2336 +			panic("Max apic_id exceeded!\n");
 25.2337 +
 25.2338 +		printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
 25.2339 +			"trying %d\n", ioapic, apic_id, i);
 25.2340 +
 25.2341 +		apic_id = i;
 25.2342 +	} 
 25.2343 +
 25.2344 +	tmp = apicid_to_cpu_present(apic_id);
 25.2345 +	physids_or(apic_id_map, apic_id_map, tmp);
 25.2346 +
 25.2347 +	if (reg_00.bits.ID != apic_id) {
 25.2348 +		reg_00.bits.ID = apic_id;
 25.2349 +
 25.2350 +		spin_lock_irqsave(&ioapic_lock, flags);
 25.2351 +		io_apic_write(ioapic, 0, reg_00.raw);
 25.2352 +		reg_00.raw = io_apic_read(ioapic, 0);
 25.2353 +		spin_unlock_irqrestore(&ioapic_lock, flags);
 25.2354 +
 25.2355 +		/* Sanity check */
 25.2356 +		if (reg_00.bits.ID != apic_id)
 25.2357 +			panic("IOAPIC[%d]: Unable change apic_id!\n", ioapic);
 25.2358 +	}
 25.2359 +
 25.2360  	apic_printk(APIC_VERBOSE, KERN_INFO
 25.2361  			"IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
 25.2362 +#endif /* !CONFIG_XEN */
 25.2363  
 25.2364  	return apic_id;
 25.2365  }
 25.2366 @@ -338,6 +2554,7 @@ int __init io_apic_get_redir_entries (in
 25.2367  	return reg_01.bits.entries;
 25.2368  }
 25.2369  
 25.2370 +
 25.2371  int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low)
 25.2372  {
 25.2373  	struct IO_APIC_route_entry entry;
 25.2374 @@ -377,12 +2594,10 @@ int io_apic_set_pci_routing (int ioapic,
 25.2375  		mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq,
 25.2376  		edge_level, active_high_low);
 25.2377  
 25.2378 -#ifndef CONFIG_XEN
 25.2379  	ioapic_register_intr(irq, entry.vector, edge_level);
 25.2380  
 25.2381  	if (!ioapic && (irq < 16))
 25.2382  		disable_8259A_irq(irq);
 25.2383 -#endif
 25.2384  
 25.2385  	spin_lock_irqsave(&ioapic_lock, flags);
 25.2386  	io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1));
 25.2387 @@ -391,4 +2606,5 @@ int io_apic_set_pci_routing (int ioapic,
 25.2388  
 25.2389  	return 0;
 25.2390  }
 25.2391 +
 25.2392  #endif /*CONFIG_ACPI_BOOT*/
    26.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/mpparse.c	Tue May 03 14:42:00 2005 +0000
    26.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/mpparse.c	Tue May 10 08:49:59 2005 +0000
    26.3 @@ -109,7 +109,7 @@ static int MP_valid_apicid(int apicid, i
    26.4  {
    26.5  	return hweight_long(apicid & 0xf) == 1 && (apicid >> 4) != 0xf;
    26.6  }
    26.7 -#else
    26.8 +#elif !defined(CONFIG_XEN)
    26.9  static int MP_valid_apicid(int apicid, int version)
   26.10  {
   26.11  	if (version >= 0x14)
   26.12 @@ -119,6 +119,7 @@ static int MP_valid_apicid(int apicid, i
   26.13  }
   26.14  #endif
   26.15  
   26.16 +#ifndef CONFIG_XEN
   26.17  void __init MP_processor_info (struct mpc_config_processor *m)
   26.18  {
   26.19   	int ver, apicid;
   26.20 @@ -217,6 +218,12 @@ void __init MP_processor_info (struct mp
   26.21  	apic_version[m->mpc_apicid] = ver;
   26.22  	bios_cpu_apicid[num_processors - 1] = m->mpc_apicid;
   26.23  }
   26.24 +#else
   26.25 +void __init MP_processor_info (struct mpc_config_processor *m)
   26.26 +{
   26.27 +	num_processors++;
   26.28 +}
   26.29 +#endif /* CONFIG_XEN */
   26.30  
   26.31  static void __init MP_bus_info (struct mpc_config_bus *m)
   26.32  {
   26.33 @@ -690,7 +697,7 @@ void __init get_smp_config (void)
   26.34  		 * Read the physical hardware table.  Anything here will
   26.35  		 * override the defaults.
   26.36  		 */
   26.37 -		if (!smp_read_mpc((void *)mpf->mpf_physptr)) {
   26.38 +		if (!smp_read_mpc(isa_bus_to_virt(mpf->mpf_physptr))) {
   26.39  			smp_found_config = 0;
   26.40  			printk(KERN_ERR "BIOS bug, MP table errors detected!...\n");
   26.41  			printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n");
   26.42 @@ -743,7 +750,6 @@ static int __init smp_scan_config (unsig
   26.43  			smp_found_config = 1;
   26.44  			printk(KERN_INFO "found SMP MP-table at %08lx\n",
   26.45  						virt_to_phys(mpf));
   26.46 -			reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE);
   26.47  			if (mpf->mpf_physptr) {
   26.48  				/*
   26.49  				 * We cannot access to MPC table to compute
   26.50 @@ -817,12 +823,14 @@ void __init find_smp_config (void)
   26.51  void __init mp_register_lapic_address (
   26.52  	u64			address)
   26.53  {
   26.54 +#ifndef CONFIG_XEN
   26.55  	mp_lapic_addr = (unsigned long) address;
   26.56  
   26.57  	if (boot_cpu_physical_apicid == -1U)
   26.58  		boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
   26.59  
   26.60  	Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid);
   26.61 +#endif
   26.62  }
   26.63  
   26.64  
   26.65 @@ -842,6 +850,7 @@ void __init mp_register_lapic (
   26.66  	if (id == boot_cpu_physical_apicid)
   26.67  		boot_cpu = 1;
   26.68  
   26.69 +#ifndef CONFIG_XEN
   26.70  	processor.mpc_type = MP_PROCESSOR;
   26.71  	processor.mpc_apicid = id;
   26.72  	processor.mpc_apicver = GET_APIC_VERSION(apic_read(APIC_LVR));
   26.73 @@ -852,6 +861,7 @@ void __init mp_register_lapic (
   26.74  	processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
   26.75  	processor.mpc_reserved[0] = 0;
   26.76  	processor.mpc_reserved[1] = 0;
   26.77 +#endif
   26.78  
   26.79  	MP_processor_info(&processor);
   26.80  }
    27.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/setup.c	Tue May 03 14:42:00 2005 +0000
    27.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/setup.c	Tue May 10 08:49:59 2005 +0000
    27.3 @@ -779,7 +779,7 @@ static void __init parse_cmdline_early (
    27.4  			noexec_setup(from + 7);
    27.5  
    27.6  
    27.7 -#ifdef  CONFIG_X86_SMP
    27.8 +#ifdef  CONFIG_X86_MPPARSE
    27.9  		/*
   27.10  		 * If the BIOS enumerates physical processors before logical,
   27.11  		 * maxcpus=N at enumeration-time can be used to disable HT.
   27.12 @@ -1133,12 +1133,6 @@ static unsigned long __init setup_memory
   27.13  	 */
   27.14  	acpi_reserve_bootmem();
   27.15  #endif
   27.16 -#ifdef CONFIG_X86_FIND_SMP_CONFIG
   27.17 -	/*
   27.18 -	 * Find and reserve possible boot-time SMP configuration:
   27.19 -	 */
   27.20 -	find_smp_config();
   27.21 -#endif
   27.22  
   27.23  #ifdef CONFIG_BLK_DEV_INITRD
   27.24  	if (xen_start_info.mod_start) {
   27.25 @@ -1503,6 +1497,13 @@ void __init setup_arch(char **cmdline_p)
   27.26  #endif
   27.27  	paging_init();
   27.28  
   27.29 +#ifdef CONFIG_X86_FIND_SMP_CONFIG
   27.30 +	/*
   27.31 +	 * Find and reserve possible boot-time SMP configuration:
   27.32 +	 */
   27.33 +	find_smp_config();
   27.34 +#endif
   27.35 +
   27.36  	/* Make sure we have a correctly sized P->M table. */
   27.37  	if (max_pfn != xen_start_info.nr_pages) {
   27.38  		phys_to_machine_mapping = alloc_bootmem_low_pages(
   27.39 @@ -1566,6 +1567,10 @@ void __init setup_arch(char **cmdline_p)
   27.40  	if (efi_enabled)
   27.41  		efi_map_memmap();
   27.42  
   27.43 +	op.cmd             = PHYSDEVOP_SET_IOPL;
   27.44 +	op.u.set_iopl.iopl = current->thread.io_pl = 1;
   27.45 +	HYPERVISOR_physdev_op(&op);
   27.46 +
   27.47  	/*
   27.48  	 * Parse the ACPI tables for possible boot-time SMP configuration.
   27.49  	 */
   27.50 @@ -1583,10 +1588,6 @@ void __init setup_arch(char **cmdline_p)
   27.51  
   27.52  	register_memory();
   27.53  
   27.54 -	op.cmd             = PHYSDEVOP_SET_IOPL;
   27.55 -	op.u.set_iopl.iopl = current->thread.io_pl = 1;
   27.56 -	HYPERVISOR_physdev_op(&op);
   27.57 -
   27.58  	if (xen_start_info.flags & SIF_INITDOMAIN) {
   27.59  		if (!(xen_start_info.flags & SIF_PRIVILEGED))
   27.60  			panic("Xen granted us console access "
    28.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/mm/init.c	Tue May 03 14:42:00 2005 +0000
    28.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/init.c	Tue May 10 08:49:59 2005 +0000
    28.3 @@ -361,6 +361,7 @@ static void __init pagetable_init (void)
    28.4  	make_page_writable(old_pgd);
    28.5  	__flush_tlb_all();
    28.6  	free_bootmem(__pa(old_pgd), PAGE_SIZE);
    28.7 +	init_mm.context.pinned = 1;
    28.8  
    28.9  	kernel_physical_mapping_init(pgd_base);
   28.10  	remap_numa_kva();
   28.11 @@ -562,7 +563,7 @@ void __init paging_init(void)
   28.12  	zone_sizes_init();
   28.13  
   28.14  	/* Switch to the real shared_info page, and clear the dummy page. */
   28.15 -	set_fixmap_ma(FIX_SHARED_INFO, xen_start_info.shared_info);
   28.16 +	set_fixmap(FIX_SHARED_INFO, xen_start_info.shared_info);
   28.17  	HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
   28.18  	memset(empty_zero_page, 0, sizeof(empty_zero_page));
   28.19  
   28.20 @@ -570,10 +571,11 @@ void __init paging_init(void)
   28.21  	/* Setup mapping of lower 1st MB */
   28.22  	for (i = 0; i < NR_FIX_ISAMAPS; i++)
   28.23  		if (xen_start_info.flags & SIF_PRIVILEGED)
   28.24 -			set_fixmap_ma(FIX_ISAMAP_BEGIN - i, i * PAGE_SIZE);
   28.25 +			set_fixmap(FIX_ISAMAP_BEGIN - i, i * PAGE_SIZE);
   28.26  		else
   28.27 -			set_fixmap_ma_ro(FIX_ISAMAP_BEGIN - i,
   28.28 -					 virt_to_machine(empty_zero_page));
   28.29 +			__set_fixmap(FIX_ISAMAP_BEGIN - i,
   28.30 +				     virt_to_machine(empty_zero_page),
   28.31 +				     PAGE_KERNEL_RO);
   28.32  #endif
   28.33  }
   28.34  
    29.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/mm/ioremap.c	Tue May 03 14:42:00 2005 +0000
    29.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/ioremap.c	Tue May 10 08:49:59 2005 +0000
    29.3 @@ -256,7 +256,7 @@ void __init *bt_ioremap(unsigned long ph
    29.4  	 */
    29.5  	idx = FIX_BTMAP_BEGIN;
    29.6  	while (nrpages > 0) {
    29.7 -		set_fixmap_ma(idx, phys_addr);
    29.8 +		set_fixmap(idx, phys_addr);
    29.9  		phys_addr += PAGE_SIZE;
   29.10  		--idx;
   29.11  		--nrpages;
    30.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/mm/pgtable.c	Tue May 03 14:42:00 2005 +0000
    30.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/pgtable.c	Tue May 10 08:49:59 2005 +0000
    30.3 @@ -22,6 +22,7 @@
    30.4  #include <asm/tlb.h>
    30.5  #include <asm/tlbflush.h>
    30.6  #include <asm/io.h>
    30.7 +#include <asm/mmu_context.h>
    30.8  
    30.9  #include <asm-xen/foreign_page.h>
   30.10  
   30.11 @@ -176,18 +177,18 @@ void __set_fixmap (enum fixed_addresses 
   30.12  		BUG();
   30.13  		return;
   30.14  	}
   30.15 -	set_pte_pfn(address, phys >> PAGE_SHIFT, flags);
   30.16 -}
   30.17 -
   30.18 -void __set_fixmap_ma (enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
   30.19 -{
   30.20 -	unsigned long address = __fix_to_virt(idx);
   30.21 -
   30.22 -	if (idx >= __end_of_fixed_addresses) {
   30.23 -		BUG();
   30.24 -		return;
   30.25 +	switch (idx) {
   30.26 +	case FIX_WP_TEST:
   30.27 +	case FIX_VSYSCALL:
   30.28 +#ifdef CONFIG_X86_F00F_BUG
   30.29 +	case FIX_F00F_IDT:
   30.30 +#endif
   30.31 +		set_pte_pfn(address, phys >> PAGE_SHIFT, flags);
   30.32 +		break;
   30.33 +	default:
   30.34 +		set_pte_pfn_ma(address, phys >> PAGE_SHIFT, flags);
   30.35 +		break;
   30.36  	}
   30.37 -	set_pte_pfn_ma(address, phys >> PAGE_SHIFT, flags);
   30.38  }
   30.39  
   30.40  pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
   30.41 @@ -480,7 +481,6 @@ void mm_unpin(struct mm_struct *mm)
   30.42  
   30.43  void _arch_exit_mmap(struct mm_struct *mm)
   30.44  {
   30.45 -    unsigned int cpu = smp_processor_id();
   30.46      struct task_struct *tsk = current;
   30.47  
   30.48      task_lock(tsk);
   30.49 @@ -494,9 +494,7 @@ void _arch_exit_mmap(struct mm_struct *m
   30.50          tsk->active_mm = &init_mm;
   30.51          atomic_inc(&init_mm.mm_count);
   30.52  
   30.53 -        cpu_set(cpu, init_mm.cpu_vm_mask);
   30.54 -        load_cr3(swapper_pg_dir);
   30.55 -        cpu_clear(cpu, mm->cpu_vm_mask);
   30.56 +        switch_mm(mm, &init_mm, tsk);
   30.57  
   30.58          atomic_dec(&mm->mm_count);
   30.59          BUG_ON(atomic_read(&mm->mm_count) == 0);
    31.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/pci/Makefile	Tue May 03 14:42:00 2005 +0000
    31.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/pci/Makefile	Tue May 10 08:49:59 2005 +0000
    31.3 @@ -11,7 +11,8 @@ c-obj-$(CONFIG_PCI_DIRECT)	+= direct.o
    31.4  c-pci-y				:= fixup.o
    31.5  c-pci-$(CONFIG_ACPI_PCI)	+= acpi.o
    31.6  c-pci-y				+= legacy.o
    31.7 -c-pci-y				+= irq.o
    31.8 +# Make sure irq.o gets linked in after legacy.o
    31.9 +l-pci-y				+= irq.o
   31.10  
   31.11  c-pci-$(CONFIG_X86_VISWS)	:= visws.o fixup.o
   31.12  pci-$(CONFIG_X86_VISWS)		:=
   31.13 @@ -26,6 +27,6 @@ c-link	:=
   31.14  $(patsubst %.o,$(obj)/%.c,$(c-obj-y) $(c-link)):
   31.15  	@ln -fsn $(srctree)/arch/i386/pci/$(notdir $@) $@
   31.16  
   31.17 -obj-y	+= $(c-obj-y)
   31.18 +obj-y	+= $(c-obj-y) $(l-pci-y)
   31.19  
   31.20  clean-files += $(patsubst %.o,%.c,$(c-obj-y) $(c-obj-) $(c-link))
    32.1 --- a/linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c	Tue May 03 14:42:00 2005 +0000
    32.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c	Tue May 10 08:49:59 2005 +0000
    32.3 @@ -20,10 +20,6 @@
    32.4  #include <asm-xen/linux-public/privcmd.h>
    32.5  #include <asm-xen/gnttab.h>
    32.6  
    32.7 -#ifndef set_fixmap_ma
    32.8 -#define set_fixmap_ma set_fixmap
    32.9 -#endif
   32.10 -
   32.11  #if 1
   32.12  #define ASSERT(_p) \
   32.13      if ( !(_p) ) { printk(KERN_ALERT"Assertion '%s': line %d, file %s\n", \
   32.14 @@ -339,7 +335,7 @@ int gnttab_resume(void)
   32.15      BUG_ON(setup.status != 0);
   32.16  
   32.17      for ( i = 0; i < NR_GRANT_FRAMES; i++ )
   32.18 -        set_fixmap_ma(FIX_GNTTAB_END - i, frames[i] << PAGE_SHIFT);
   32.19 +        set_fixmap(FIX_GNTTAB_END - i, frames[i] << PAGE_SHIFT);
   32.20  
   32.21      return 0;
   32.22  }
    33.1 --- a/linux-2.6.11-xen-sparse/arch/xen/kernel/reboot.c	Tue May 03 14:42:00 2005 +0000
    33.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/kernel/reboot.c	Tue May 10 08:49:59 2005 +0000
    33.3 @@ -129,11 +129,7 @@ static void __do_suspend(void)
    33.4      memcpy(&xen_start_info, &suspend_record->resume_info,
    33.5             sizeof(xen_start_info));
    33.6  
    33.7 -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
    33.8 -    set_fixmap_ma(FIX_SHARED_INFO, xen_start_info.shared_info);
    33.9 -#else
   33.10      set_fixmap(FIX_SHARED_INFO, xen_start_info.shared_info);
   33.11 -#endif
   33.12  
   33.13      HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
   33.14  
    34.1 --- a/linux-2.6.11-xen-sparse/arch/xen/x86_64/mm/init.c	Tue May 03 14:42:00 2005 +0000
    34.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/mm/init.c	Tue May 10 08:49:59 2005 +0000
    34.3 @@ -361,7 +361,14 @@ void __set_fixmap (enum fixed_addresses 
    34.4  		printk("Invalid __set_fixmap\n");
    34.5  		return;
    34.6  	}
    34.7 -	set_pte_phys(address, phys, prot, SET_FIXMAP_KERNEL);
    34.8 +	switch (idx) {
    34.9 +	case VSYSCALL_FIRST_PAGE:
   34.10 +		set_pte_phys(address, phys, prot, SET_FIXMAP_KERNEL);
   34.11 +		break;
   34.12 +	default:
   34.13 +		set_pte_phys_ma(address, phys, prot);
   34.14 +		break;
   34.15 +	}
   34.16  }
   34.17  
   34.18  
   34.19 @@ -414,18 +421,6 @@ void __set_fixmap_user (enum fixed_addre
   34.20  
   34.21  }
   34.22  
   34.23 -
   34.24 -void __set_fixmap_ma (enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
   34.25 -{ 
   34.26 -	unsigned long address = __fix_to_virt(idx);
   34.27 -
   34.28 -	if (idx >= __end_of_fixed_addresses) {
   34.29 -		printk("Invalid __set_fixmap\n");
   34.30 -		return;
   34.31 -	}
   34.32 -	set_pte_phys_ma(address, phys, prot);
   34.33 -}
   34.34 -
   34.35  unsigned long __initdata table_start, table_end, tables_reserved; 
   34.36  
   34.37  #if 0
   34.38 @@ -632,8 +627,7 @@ void __init paging_init(void)
   34.39  		free_area_init(zones_size);
   34.40  	}
   34.41  
   34.42 -        __set_fixmap_ma(FIX_SHARED_INFO, xen_start_info.shared_info, 
   34.43 -                        __pgprot(_KERNPG_TABLE));
   34.44 +        set_fixmap(FIX_SHARED_INFO, xen_start_info.shared_info);
   34.45          HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
   34.46  
   34.47          memset(empty_zero_page, 0, sizeof(empty_zero_page));
   34.48 @@ -642,11 +636,11 @@ void __init paging_init(void)
   34.49          /* Setup mapping of lower 1st MB */
   34.50          for (i = 0; i < NR_FIX_ISAMAPS; i++)
   34.51                  if (xen_start_info.flags & SIF_PRIVILEGED)
   34.52 -                        __set_fixmap_ma(FIX_ISAMAP_BEGIN - i, i * PAGE_SIZE,
   34.53 -                                __pgprot(_KERNPG_TABLE));
   34.54 +                        set_fixmap(FIX_ISAMAP_BEGIN - i, i * PAGE_SIZE);
   34.55                  else
   34.56 -                        set_fixmap_ma_ro(FIX_ISAMAP_BEGIN - i,
   34.57 -                                         virt_to_machine(empty_zero_page));
   34.58 +                        __set_fixmap(FIX_ISAMAP_BEGIN - i,
   34.59 +				     virt_to_machine(empty_zero_page),
   34.60 +				     PAGE_KERNEL_RO);
   34.61  #endif
   34.62  
   34.63  }
    35.1 --- a/linux-2.6.11-xen-sparse/arch/xen/x86_64/mm/ioremap.c	Tue May 03 14:42:00 2005 +0000
    35.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/mm/ioremap.c	Tue May 10 08:49:59 2005 +0000
    35.3 @@ -273,7 +273,7 @@ void __init *bt_ioremap(unsigned long ph
    35.4  	 */
    35.5  	idx = FIX_BTMAP_BEGIN;
    35.6  	while (nrpages > 0) {
    35.7 -		set_fixmap_ma(idx, phys_addr);
    35.8 +		set_fixmap(idx, phys_addr);
    35.9  		phys_addr += PAGE_SIZE;
   35.10  		--idx;
   35.11  		--nrpages;
    36.1 --- a/linux-2.6.11-xen-sparse/arch/xen/x86_64/pci/Makefile	Tue May 03 14:42:00 2005 +0000
    36.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/pci/Makefile	Tue May 10 08:49:59 2005 +0000
    36.3 @@ -12,7 +12,7 @@ c-obj-y		:= i386.o
    36.4  c-obj-y		+= fixup.o
    36.5  c-obj-$(CONFIG_ACPI_PCI)	+= acpi.o
    36.6  c-obj-y			+= legacy.o common.o
    36.7 -c-xen-obj-$(CONFIG_PCI_DIRECT)+= direct.o
    36.8 +c-obj-$(CONFIG_PCI_DIRECT)+= direct.o
    36.9  c-xen-obj-y		+= irq.o
   36.10  # mmconfig has a 64bit special
   36.11  c-obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o
    37.1 --- a/linux-2.6.11-xen-sparse/drivers/acpi/tables.c	Tue May 03 14:42:00 2005 +0000
    37.2 +++ b/linux-2.6.11-xen-sparse/drivers/acpi/tables.c	Tue May 10 08:49:59 2005 +0000
    37.3 @@ -581,8 +581,8 @@ acpi_table_init (void)
    37.4  		return -ENODEV;
    37.5  	}
    37.6  
    37.7 -        rsdp = (struct acpi_table_rsdp *) (__fix_to_virt(FIX_ACPI_RSDP_PAGE)
    37.8 -                                           + (rsdp_phys & ~PAGE_MASK));
    37.9 +	rsdp = (struct acpi_table_rsdp *) (__fix_to_virt(FIX_ACPI_RSDP_PAGE) +
   37.10 +					   (rsdp_phys & ~PAGE_MASK));
   37.11  	if (!rsdp) {
   37.12  		printk(KERN_WARNING PREFIX "Unable to map RSDP\n");
   37.13  		return -ENODEV;
    38.1 --- a/linux-2.6.11-xen-sparse/drivers/xen/Makefile	Tue May 03 14:42:00 2005 +0000
    38.2 +++ b/linux-2.6.11-xen-sparse/drivers/xen/Makefile	Tue May 10 08:49:59 2005 +0000
    38.3 @@ -3,8 +3,8 @@
    38.4  obj-y	+= console/
    38.5  obj-y	+= evtchn/
    38.6  obj-y	+= balloon/
    38.7 +obj-y	+= privcmd/
    38.8  
    38.9 -obj-$(CONFIG_XEN_PRIVILEGED_GUEST)	+= privcmd/
   38.10  obj-$(CONFIG_XEN_BLKDEV_BACKEND)	+= blkback/
   38.11  obj-$(CONFIG_XEN_NETDEV_BACKEND)	+= netback/
   38.12  obj-$(CONFIG_XEN_BLKDEV_FRONTEND)	+= blkfront/
    39.1 --- a/linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c	Tue May 03 14:42:00 2005 +0000
    39.2 +++ b/linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c	Tue May 10 08:49:59 2005 +0000
    39.3 @@ -50,6 +50,7 @@
    39.4  #include <asm-xen/xen-public/io/netif.h>
    39.5  #include <asm-xen/balloon.h>
    39.6  #include <asm/page.h>
    39.7 +#include <asm/uaccess.h>
    39.8  
    39.9  #ifndef __GFP_NOWARN
   39.10  #define __GFP_NOWARN 0
    40.1 --- a/linux-2.6.11-xen-sparse/drivers/xen/privcmd/privcmd.c	Tue May 03 14:42:00 2005 +0000
    40.2 +++ b/linux-2.6.11-xen-sparse/drivers/xen/privcmd/privcmd.c	Tue May 10 08:49:59 2005 +0000
    40.3 @@ -219,9 +219,6 @@ static struct file_operations privcmd_fi
    40.4  
    40.5  static int __init privcmd_init(void)
    40.6  {
    40.7 -    if ( !(xen_start_info.flags & SIF_PRIVILEGED) )
    40.8 -        return 0;
    40.9 -
   40.10      privcmd_intf = create_xen_proc_entry("privcmd", 0400);
   40.11      if ( privcmd_intf != NULL )
   40.12          privcmd_intf->proc_fops = &privcmd_file_ops;
    41.1 --- a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/fixmap.h	Tue May 03 14:42:00 2005 +0000
    41.2 +++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/fixmap.h	Tue May 10 08:49:59 2005 +0000
    41.3 @@ -80,7 +80,7 @@ enum fixed_addresses {
    41.4  #ifdef CONFIG_ACPI_BOOT
    41.5  	FIX_ACPI_BEGIN,
    41.6  	FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1,
    41.7 -        FIX_ACPI_RSDP_PAGE,
    41.8 +	FIX_ACPI_RSDP_PAGE,
    41.9  #endif
   41.10  #ifdef CONFIG_PCI_MMCONFIG
   41.11  	FIX_PCIE_MCFG,
   41.12 @@ -104,15 +104,9 @@ enum fixed_addresses {
   41.13  
   41.14  extern void __set_fixmap (enum fixed_addresses idx,
   41.15  					unsigned long phys, pgprot_t flags);
   41.16 -extern void __set_fixmap_ma (enum fixed_addresses idx,
   41.17 -					unsigned long mach, pgprot_t flags);
   41.18  
   41.19  #define set_fixmap(idx, phys) \
   41.20  		__set_fixmap(idx, phys, PAGE_KERNEL)
   41.21 -#define set_fixmap_ma(idx, phys) \
   41.22 -		__set_fixmap_ma(idx, phys, PAGE_KERNEL)
   41.23 -#define set_fixmap_ma_ro(idx, phys) \
   41.24 -		__set_fixmap_ma(idx, phys, PAGE_KERNEL_RO)
   41.25  /*
   41.26   * Some hardware wants to get fixmapped without caching.
   41.27   */
    42.1 --- a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/io_apic.h	Tue May 03 14:42:00 2005 +0000
    42.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    42.3 @@ -1,230 +0,0 @@
    42.4 -#ifndef __ASM_IO_APIC_H
    42.5 -#define __ASM_IO_APIC_H
    42.6 -
    42.7 -#include <linux/config.h>
    42.8 -#include <asm/types.h>
    42.9 -#include <asm/mpspec.h>
   42.10 -
   42.11 -#include <asm-xen/xen-public/xen.h>
   42.12 -#include <asm-xen/xen-public/physdev.h>
   42.13 -
   42.14 -/*
   42.15 - * Intel IO-APIC support for SMP and UP systems.
   42.16 - *
   42.17 - * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar
   42.18 - */
   42.19 -
   42.20 -#ifdef CONFIG_X86_IO_APIC
   42.21 -
   42.22 -#ifdef CONFIG_PCI_MSI
   42.23 -static inline int use_pci_vector(void)	{return 1;}
   42.24 -static inline void disable_edge_ioapic_vector(unsigned int vector) { }
   42.25 -static inline void mask_and_ack_level_ioapic_vector(unsigned int vector) { }
   42.26 -static inline void end_edge_ioapic_vector (unsigned int vector) { }
   42.27 -#define startup_level_ioapic	startup_level_ioapic_vector
   42.28 -#define shutdown_level_ioapic	mask_IO_APIC_vector
   42.29 -#define enable_level_ioapic	unmask_IO_APIC_vector
   42.30 -#define disable_level_ioapic	mask_IO_APIC_vector
   42.31 -#define mask_and_ack_level_ioapic mask_and_ack_level_ioapic_vector
   42.32 -#define end_level_ioapic	end_level_ioapic_vector
   42.33 -#define set_ioapic_affinity	set_ioapic_affinity_vector
   42.34 -
   42.35 -#define startup_edge_ioapic 	startup_edge_ioapic_vector
   42.36 -#define shutdown_edge_ioapic 	disable_edge_ioapic_vector
   42.37 -#define enable_edge_ioapic 	unmask_IO_APIC_vector
   42.38 -#define disable_edge_ioapic 	disable_edge_ioapic_vector
   42.39 -#define ack_edge_ioapic 	ack_edge_ioapic_vector
   42.40 -#define end_edge_ioapic 	end_edge_ioapic_vector
   42.41 -#else
   42.42 -static inline int use_pci_vector(void)	{return 0;}
   42.43 -static inline void disable_edge_ioapic_irq(unsigned int irq) { }
   42.44 -static inline void mask_and_ack_level_ioapic_irq(unsigned int irq) { }
   42.45 -static inline void end_edge_ioapic_irq (unsigned int irq) { }
   42.46 -#define startup_level_ioapic	startup_level_ioapic_irq
   42.47 -#define shutdown_level_ioapic	mask_IO_APIC_irq
   42.48 -#define enable_level_ioapic	unmask_IO_APIC_irq
   42.49 -#define disable_level_ioapic	mask_IO_APIC_irq
   42.50 -#define mask_and_ack_level_ioapic mask_and_ack_level_ioapic_irq
   42.51 -#define end_level_ioapic	end_level_ioapic_irq
   42.52 -#define set_ioapic_affinity	set_ioapic_affinity_irq
   42.53 -
   42.54 -#define startup_edge_ioapic 	startup_edge_ioapic_irq
   42.55 -#define shutdown_edge_ioapic 	disable_edge_ioapic_irq
   42.56 -#define enable_edge_ioapic 	unmask_IO_APIC_irq
   42.57 -#define disable_edge_ioapic 	disable_edge_ioapic_irq
   42.58 -#define ack_edge_ioapic 	ack_edge_ioapic_irq
   42.59 -#define end_edge_ioapic 	end_edge_ioapic_irq
   42.60 -#endif
   42.61 -
   42.62 -#define IO_APIC_BASE(idx) \
   42.63 -		((volatile int *)(__fix_to_virt(FIX_IO_APIC_BASE_0 + idx) \
   42.64 -		+ (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK)))
   42.65 -
   42.66 -/*
   42.67 - * The structure of the IO-APIC:
   42.68 - */
   42.69 -union IO_APIC_reg_00 {
   42.70 -	u32	raw;
   42.71 -	struct {
   42.72 -		u32	__reserved_2	: 14,
   42.73 -			LTS		:  1,
   42.74 -			delivery_type	:  1,
   42.75 -			__reserved_1	:  8,
   42.76 -			ID		:  8;
   42.77 -	} __attribute__ ((packed)) bits;
   42.78 -};
   42.79 -
   42.80 -union IO_APIC_reg_01 {
   42.81 -	u32	raw;
   42.82 -	struct {
   42.83 -		u32	version		:  8,
   42.84 -			__reserved_2	:  7,
   42.85 -			PRQ		:  1,
   42.86 -			entries		:  8,
   42.87 -			__reserved_1	:  8;
   42.88 -	} __attribute__ ((packed)) bits;
   42.89 -};
   42.90 -
   42.91 -union IO_APIC_reg_02 {
   42.92 -	u32	raw;
   42.93 -	struct {
   42.94 -		u32	__reserved_2	: 24,
   42.95 -			arbitration	:  4,
   42.96 -			__reserved_1	:  4;
   42.97 -	} __attribute__ ((packed)) bits;
   42.98 -};
   42.99 -
  42.100 -union IO_APIC_reg_03 {
  42.101 -	u32	raw;
  42.102 -	struct {
  42.103 -		u32	boot_DT		:  1,
  42.104 -			__reserved_1	: 31;
  42.105 -	} __attribute__ ((packed)) bits;
  42.106 -};
  42.107 -
  42.108 -/*
  42.109 - * # of IO-APICs and # of IRQ routing registers
  42.110 - */
  42.111 -extern int nr_ioapics;
  42.112 -extern int nr_ioapic_registers[MAX_IO_APICS];
  42.113 -
  42.114 -enum ioapic_irq_destination_types {
  42.115 -	dest_Fixed = 0,
  42.116 -	dest_LowestPrio = 1,
  42.117 -	dest_SMI = 2,
  42.118 -	dest__reserved_1 = 3,
  42.119 -	dest_NMI = 4,
  42.120 -	dest_INIT = 5,
  42.121 -	dest__reserved_2 = 6,
  42.122 -	dest_ExtINT = 7
  42.123 -};
  42.124 -
  42.125 -struct IO_APIC_route_entry {
  42.126 -	__u32	vector		:  8,
  42.127 -		delivery_mode	:  3,	/* 000: FIXED
  42.128 -					 * 001: lowest prio
  42.129 -					 * 111: ExtINT
  42.130 -					 */
  42.131 -		dest_mode	:  1,	/* 0: physical, 1: logical */
  42.132 -		delivery_status	:  1,
  42.133 -		polarity	:  1,
  42.134 -		irr		:  1,
  42.135 -		trigger		:  1,	/* 0: edge, 1: level */
  42.136 -		mask		:  1,	/* 0: enabled, 1: disabled */
  42.137 -		__reserved_2	: 15;
  42.138 -
  42.139 -	union {		struct { __u32
  42.140 -					__reserved_1	: 24,
  42.141 -					physical_dest	:  4,
  42.142 -					__reserved_2	:  4;
  42.143 -			} physical;
  42.144 -
  42.145 -			struct { __u32
  42.146 -					__reserved_1	: 24,
  42.147 -					logical_dest	:  8;
  42.148 -			} logical;
  42.149 -	} dest;
  42.150 -
  42.151 -} __attribute__ ((packed));
  42.152 -
  42.153 -/*
  42.154 - * MP-BIOS irq configuration table structures:
  42.155 - */
  42.156 -
  42.157 -/* I/O APIC entries */
  42.158 -extern struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
  42.159 -
  42.160 -/* # of MP IRQ source entries */
  42.161 -extern int mp_irq_entries;
  42.162 -
  42.163 -/* MP IRQ source entries */
  42.164 -extern struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
  42.165 -
  42.166 -/* non-0 if default (table-less) MP configuration */
  42.167 -extern int mpc_default_type;
  42.168 -
  42.169 -static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
  42.170 -{
  42.171 -        physdev_op_t op;
  42.172 -        int ret;
  42.173 -
  42.174 -        op.cmd = PHYSDEVOP_APIC_READ;
  42.175 -        op.u.apic_op.apic = apic;
  42.176 -        op.u.apic_op.offset = reg;
  42.177 -        ret = HYPERVISOR_physdev_op(&op);
  42.178 -        if (ret)
  42.179 -                return ret;
  42.180 -        return op.u.apic_op.value;
  42.181 -}
  42.182 -
  42.183 -static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
  42.184 -{
  42.185 -        physdev_op_t op;
  42.186 -        int ret;
  42.187 -
  42.188 -        op.cmd = PHYSDEVOP_APIC_WRITE;
  42.189 -        op.u.apic_op.apic = apic;
  42.190 -        op.u.apic_op.offset = reg;
  42.191 -        op.u.apic_op.value = value;
  42.192 -        ret = HYPERVISOR_physdev_op(&op);
  42.193 -}
  42.194 -
  42.195 -/*
  42.196 - * Re-write a value: to be used for read-modify-write
  42.197 - * cycles where the read already set up the index register.
  42.198 - *
  42.199 - * Older SiS APIC requires we rewrite the index regiser
  42.200 - */
  42.201 -extern int sis_apic_bug;
  42.202 -static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
  42.203 -{
  42.204 -	if (sis_apic_bug)
  42.205 -		*IO_APIC_BASE(apic) = reg;
  42.206 -	*(IO_APIC_BASE(apic)+4) = value;
  42.207 -}
  42.208 -
  42.209 -/* 1 if "noapic" boot option passed */
  42.210 -extern int skip_ioapic_setup;
  42.211 -
  42.212 -/*
  42.213 - * If we use the IO-APIC for IRQ routing, disable automatic
  42.214 - * assignment of PCI IRQ's.
  42.215 - */
  42.216 -#define io_apic_assign_pci_irqs (mp_irq_entries && !skip_ioapic_setup && io_apic_irqs)
  42.217 -
  42.218 -#ifdef CONFIG_ACPI_BOOT
  42.219 -extern int io_apic_get_unique_id (int ioapic, int apic_id);
  42.220 -extern int io_apic_get_version (int ioapic);
  42.221 -extern int io_apic_get_redir_entries (int ioapic);
  42.222 -extern int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low);
  42.223 -#endif /*CONFIG_ACPI_BOOT*/
  42.224 -
  42.225 -extern int (*ioapic_renumber_irq)(int ioapic, int irq);
  42.226 -
  42.227 -#else  /* !CONFIG_X86_IO_APIC */
  42.228 -#define io_apic_assign_pci_irqs 0
  42.229 -#endif
  42.230 -
  42.231 -extern int assign_irq_vector(int irq);
  42.232 -
  42.233 -#endif
    43.1 --- a/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/fixmap.h	Tue May 03 14:42:00 2005 +0000
    43.2 +++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/fixmap.h	Tue May 10 08:49:59 2005 +0000
    43.3 @@ -60,17 +60,8 @@ enum fixed_addresses {
    43.4  extern void __set_fixmap (enum fixed_addresses idx,
    43.5  					unsigned long phys, pgprot_t flags);
    43.6  
    43.7 -extern void __set_fixmap_ma (enum fixed_addresses idx,
    43.8 -					unsigned long mach, pgprot_t flags);
    43.9 -
   43.10  #define set_fixmap(idx, phys) \
   43.11  		__set_fixmap(idx, phys, PAGE_KERNEL)
   43.12 -
   43.13 -#define set_fixmap_ma(idx, phys) \
   43.14 -		__set_fixmap_ma(idx, phys, PAGE_KERNEL)
   43.15 -#define set_fixmap_ma_ro(idx, phys) \
   43.16 -		__set_fixmap_ma(idx, phys, PAGE_KERNEL_RO)
   43.17 -
   43.18  /*
   43.19   * Some hardware wants to get fixmapped without caching.
   43.20   */
    44.1 --- a/linux-2.6.11-xen-sparse/mkbuildtree	Tue May 03 14:42:00 2005 +0000
    44.2 +++ b/linux-2.6.11-xen-sparse/mkbuildtree	Tue May 10 08:49:59 2005 +0000
    44.3 @@ -66,7 +66,7 @@ relative_lndir ()
    44.4      (
    44.5      cd $i
    44.6      pref=`echo $i | sed -e 's#/[^/]*#../#g' -e 's#^\.##'`
    44.7 -    for j in `find . -type f -o -type l -maxdepth 1`; do
    44.8 +    for j in `find . -maxdepth 1 -type f -o -type l`; do
    44.9        ln -sf ${pref}${REAL_DIR}/$i/$j ${SYMLINK_DIR}/$i/$j
   44.10      done
   44.11      )
    45.1 --- a/tools/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/linux-xen-low.c	Tue May 03 14:42:00 2005 +0000
    45.2 +++ b/tools/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/linux-xen-low.c	Tue May 10 08:49:59 2005 +0000
    45.3 @@ -338,7 +338,9 @@ regsets_store_inferior_registers ()
    45.4  	    }
    45.5  	  else
    45.6  	    {
    45.7 +#ifdef DEBUG
    45.8  	      perror ("Warning: ptrace(regsets_store_inferior_registers)");
    45.9 +#endif
   45.10  	    }
   45.11  	}
   45.12        regset ++;
   45.13 @@ -364,7 +366,6 @@ linux_fetch_registers (int regno)
   45.14  void
   45.15  linux_store_registers (int regno)
   45.16  {
   45.17 -    printf("store %d\n", regno);
   45.18    if (use_regsets_p)
   45.19      {
   45.20        if (regsets_store_inferior_registers () == 0)
    46.1 --- a/tools/libxc/Makefile	Tue May 03 14:42:00 2005 +0000
    46.2 +++ b/tools/libxc/Makefile	Tue May 10 08:49:59 2005 +0000
    46.3 @@ -16,6 +16,7 @@ vpath %c       $(XEN_LIBXUTIL)
    46.4  INCLUDES += -I $(XEN_LIBXUTIL)
    46.5  
    46.6  SRCS     :=
    46.7 +SRCS     += xc_sedf.c
    46.8  SRCS     += xc_bvtsched.c
    46.9  SRCS     += xc_core.c
   46.10  SRCS     += xc_domain.c
    47.1 --- a/tools/libxc/xc.h	Tue May 03 14:42:00 2005 +0000
    47.2 +++ b/tools/libxc/xc.h	Tue May 10 08:49:59 2005 +0000
    47.3 @@ -296,6 +296,14 @@ int xc_bvtsched_domain_get(int xc_handle
    47.4                             long long *warpl,
    47.5                             long long *warpu);
    47.6  
    47.7 +int xc_sedf_domain_set(int xc_handle,
    47.8 +                          u32 domid,
    47.9 +                          u64 period, u64 slice, u64 latency, u16 extratime, u16 weight);
   47.10 +
   47.11 +int xc_sedf_domain_get(int xc_handle,
   47.12 +                          u32 domid,
   47.13 +                          u64* period, u64 *slice, u64 *latency, u16 *extratime, u16* weight);
   47.14 +
   47.15  typedef evtchn_status_t xc_evtchn_status_t;
   47.16  
   47.17  /*
    48.1 --- a/tools/libxc/xc_domain.c	Tue May 03 14:42:00 2005 +0000
    48.2 +++ b/tools/libxc/xc_domain.c	Tue May 10 08:49:59 2005 +0000
    48.3 @@ -43,7 +43,7 @@ int xc_domain_create(int xc_handle,
    48.4          goto fail;
    48.5      }
    48.6  
    48.7 -    return err;
    48.8 +    return 0;
    48.9  
   48.10   fail:
   48.11      errno_saved = errno;
   48.12 @@ -103,6 +103,7 @@ int xc_domain_getinfo(int xc_handle,
   48.13      unsigned int nr_doms;
   48.14      u32 next_domid = first_domid;
   48.15      dom0_op_t op;
   48.16 +    int rc = 0; 
   48.17  
   48.18      for ( nr_doms = 0; nr_doms < max_doms; nr_doms++ )
   48.19      {
   48.20 @@ -110,7 +111,7 @@ int xc_domain_getinfo(int xc_handle,
   48.21          op.u.getdomaininfo.domain = (domid_t)next_domid;
   48.22          op.u.getdomaininfo.exec_domain = 0; // FIX ME?!?
   48.23          op.u.getdomaininfo.ctxt = NULL; /* no exec context info, thanks. */
   48.24 -        if ( do_dom0_op(xc_handle, &op) < 0 )
   48.25 +        if ( (rc = do_dom0_op(xc_handle, &op)) < 0 )
   48.26              break;
   48.27          info->domid   = (u16)op.u.getdomaininfo.domain;
   48.28  
   48.29 @@ -137,6 +138,8 @@ int xc_domain_getinfo(int xc_handle,
   48.30          info++;
   48.31      }
   48.32  
   48.33 +    if(!nr_doms) return rc; 
   48.34 +
   48.35      return nr_doms;
   48.36  }
   48.37  
    49.1 --- a/tools/libxc/xc_physdev.c	Tue May 03 14:42:00 2005 +0000
    49.2 +++ b/tools/libxc/xc_physdev.c	Tue May 10 08:49:59 2005 +0000
    49.3 @@ -16,14 +16,6 @@ int xc_physdev_pci_access_modify(int xc_
    49.4                                   int func,
    49.5                                   int enable)
    49.6  {
    49.7 -    dom0_op_t op;
    49.8 -
    49.9 -    op.cmd = DOM0_PCIDEV_ACCESS;
   49.10 -    op.u.pcidev_access.domain = (domid_t)domid;
   49.11 -    op.u.pcidev_access.bus    = bus;
   49.12 -    op.u.pcidev_access.dev    = dev;
   49.13 -    op.u.pcidev_access.func   = func;
   49.14 -    op.u.pcidev_access.enable = enable;
   49.15 -
   49.16 -    return do_dom0_op(xc_handle, &op);
   49.17 +    errno = ENOSYS;
   49.18 +    return -1;
   49.19  }
    50.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    50.2 +++ b/tools/libxc/xc_sedf.c	Tue May 10 08:49:59 2005 +0000
    50.3 @@ -0,0 +1,51 @@
    50.4 +/******************************************************************************
    50.5 + * xc_sedf.c
    50.6 + * 
    50.7 + * API for manipulating parameters of the Simple EDF scheduler.
    50.8 + * 
    50.9 + * changes by Stephan Diestelhorst
   50.10 + * based on code
   50.11 + * by Mark Williamson, Copyright (c) 2004 Intel Research Cambridge.
   50.12 + */
   50.13 +
   50.14 +#include "xc_private.h"
   50.15 +
   50.16 +int xc_sedf_domain_set(int xc_handle,
   50.17 +                          u32 domid, u64 period, u64 slice,u64 latency, u16 extratime,u16 weight)
   50.18 +{
   50.19 +    dom0_op_t op;
   50.20 +    struct sedf_adjdom *p = &op.u.adjustdom.u.sedf;
   50.21 +
   50.22 +    op.cmd = DOM0_ADJUSTDOM;
   50.23 +    op.u.adjustdom.domain  = (domid_t)domid;
   50.24 +    op.u.adjustdom.sched_id = SCHED_SEDF;
   50.25 +    op.u.adjustdom.direction = SCHED_INFO_PUT;
   50.26 +
   50.27 +    p->period    = period;
   50.28 +    p->slice     = slice;
   50.29 +    p->latency   = latency;
   50.30 +    p->extratime = extratime;
   50.31 +    p->weight    = weight;
   50.32 +    return do_dom0_op(xc_handle, &op);
   50.33 +}
   50.34 +
   50.35 +int xc_sedf_domain_get(int xc_handle, u32 domid, u64 *period, u64 *slice, u64* latency, u16* extratime, u16* weight)
   50.36 +{
   50.37 +    dom0_op_t op;
   50.38 +    int ret;
   50.39 +    struct sedf_adjdom *p = &op.u.adjustdom.u.sedf;
   50.40 +
   50.41 +    op.cmd = DOM0_ADJUSTDOM;    
   50.42 +    op.u.adjustdom.domain = (domid_t)domid;
   50.43 +    op.u.adjustdom.sched_id = SCHED_SEDF;
   50.44 +    op.u.adjustdom.direction = SCHED_INFO_GET;
   50.45 +
   50.46 +    ret = do_dom0_op(xc_handle, &op);
   50.47 +
   50.48 +    *period    = p->period;
   50.49 +    *slice     = p->slice;
   50.50 +    *latency   = p->latency;
   50.51 +    *extratime = p->extratime;
   50.52 +    *weight    = p->weight;
   50.53 +    return ret;
   50.54 +}
    51.1 --- a/tools/libxutil/sxpr_parser.h	Tue May 03 14:42:00 2005 +0000
    51.2 +++ b/tools/libxutil/sxpr_parser.h	Tue May 10 08:49:59 2005 +0000
    51.3 @@ -28,7 +28,7 @@
    51.4  /** Size of a parser input buffer.
    51.5   * Tokens read must fit into this size (including trailing null).
    51.6   */
    51.7 -#define PARSER_BUF_SIZE 1024
    51.8 +#define PARSER_BUF_SIZE 4096
    51.9  
   51.10  struct Parser;
   51.11  typedef int ParserStateFn(struct Parser *, char c);
    52.1 --- a/tools/python/xen/lowlevel/xc/xc.c	Tue May 03 14:42:00 2005 +0000
    52.2 +++ b/tools/python/xen/lowlevel/xc/xc.c	Tue May 10 08:49:59 2005 +0000
    52.3 @@ -827,6 +827,52 @@ static PyObject *pyxc_physinfo(PyObject 
    52.4                           "cpu_khz",     info.cpu_khz);
    52.5  }
    52.6  
    52.7 +static PyObject *pyxc_sedf_domain_set(PyObject *self,
    52.8 +                                         PyObject *args,
    52.9 +                                         PyObject *kwds)
   52.10 +{
   52.11 +    XcObject *xc = (XcObject *)self;
   52.12 +    u32 domid;
   52.13 +    u64 period, slice, latency;
   52.14 +    u16 extratime, weight;
   52.15 +    static char *kwd_list[] = { "dom", "period", "slice", "latency", "extratime", "weight",NULL };
   52.16 +    
   52.17 +    if( !PyArg_ParseTupleAndKeywords(args, kwds, "iLLLhh", kwd_list, &domid,
   52.18 +                                     &period, &slice, &latency, &extratime, &weight) )
   52.19 +        return NULL;
   52.20 +   if ( xc_sedf_domain_set(xc->xc_handle, domid, period, slice, latency, extratime,weight) != 0 )
   52.21 +        return PyErr_SetFromErrno(xc_error);
   52.22 +
   52.23 +    Py_INCREF(zero);
   52.24 +    return zero;
   52.25 +}
   52.26 +
   52.27 +static PyObject *pyxc_sedf_domain_get(PyObject *self,
   52.28 +                                         PyObject *args,
   52.29 +                                         PyObject *kwds)
   52.30 +{
   52.31 +    XcObject *xc = (XcObject *)self;
   52.32 +    u32 domid;
   52.33 +    u64 period, slice,latency;
   52.34 +    u16 weight, extratime;
   52.35 +    
   52.36 +    static char *kwd_list[] = { "dom", NULL };
   52.37 +
   52.38 +    if( !PyArg_ParseTupleAndKeywords(args, kwds, "i", kwd_list, &domid) )
   52.39 +        return NULL;
   52.40 +    
   52.41 +    if ( xc_sedf_domain_get( xc->xc_handle, domid, &period,
   52.42 +                                &slice,&latency,&extratime,&weight) )
   52.43 +        return PyErr_SetFromErrno(xc_error);
   52.44 +
   52.45 +    return Py_BuildValue("{s:i,s:L,s:L,s:L,s:i}",
   52.46 +                         "domain",    domid,
   52.47 +                         "period",    period,
   52.48 +                         "slice",     slice,
   52.49 +			 "latency",   latency,
   52.50 +			 "extratime", extratime);
   52.51 +}
   52.52 +
   52.53  static PyObject *pyxc_shadow_control(PyObject *self,
   52.54                                       PyObject *args,
   52.55                                       PyObject *kwds)
   52.56 @@ -1027,6 +1073,30 @@ static PyMethodDef pyxc_methods[] = {
   52.57        " warpu  [long]: Unwarp requirement.\n"
   52.58        " warpl  [long]: Warp limit,\n"
   52.59      },
   52.60 +    
   52.61 +    { "sedf_domain_set",
   52.62 +      (PyCFunction)pyxc_sedf_domain_set,
   52.63 +      METH_KEYWORDS, "\n"
   52.64 +      "Set the scheduling parameters for a domain when running with Atropos.\n"
   52.65 +      " dom       [int]:  domain to set\n"
   52.66 +      " period    [long]: domain's scheduling period\n"
   52.67 +      " slice     [long]: domain's slice per period\n"
   52.68 +      " latency   [long]: domain's wakeup latency hint\n"
   52.69 +      " extratime [int]:  domain aware of extratime?\n"
   52.70 +      "Returns: [int] 0 on success; -1 on error.\n" },
   52.71 +
   52.72 +    { "sedf_domain_get",
   52.73 +      (PyCFunction)pyxc_sedf_domain_get,
   52.74 +      METH_KEYWORDS, "\n"
   52.75 +      "Get the current scheduling parameters for a domain when running with\n"
   52.76 +      "the Atropos scheduler."
   52.77 +      " dom       [int]: domain to query\n"
   52.78 +      "Returns:   [dict]\n"
   52.79 +      " domain    [int]: domain ID\n"
   52.80 +      " period    [long]: scheduler period\n"
   52.81 +      " slice     [long]: CPU reservation per period\n"
   52.82 +      " latency   [long]: domain's wakeup latency hint\n"
   52.83 +      " extratime [int]:  domain aware of extratime?\n"},
   52.84  
   52.85      { "evtchn_alloc_unbound", 
   52.86        (PyCFunction)pyxc_evtchn_alloc_unbound,
    53.1 --- a/tools/python/xen/xend/XendClient.py	Tue May 03 14:42:00 2005 +0000
    53.2 +++ b/tools/python/xen/xend/XendClient.py	Tue May 10 08:49:59 2005 +0000
    53.3 @@ -260,6 +260,15 @@ class Xend:
    53.4                                'warpl'    : warpl,
    53.5                                'warpu'    : warpu })
    53.6  
    53.7 +    def xend_domain_cpu_sedf_set(self, id, period, slice, latency, extratime, weight):
    53.8 +        return self.xendPost(self.domainurl(id),
    53.9 +                             {'op'        : 'cpu_sedf_set',
   53.10 +                              'period'    : period,
   53.11 +                              'slice'     : slice,
   53.12 +			      'latency'   : latency,
   53.13 +			      'extratime' : extratime,
   53.14 +			      'weight'    : weight })
   53.15 +
   53.16      def xend_domain_maxmem_set(self, id, memory):
   53.17          return self.xendPost(self.domainurl(id),
   53.18                               { 'op'     : 'maxmem_set',
    54.1 --- a/tools/python/xen/xend/XendDomain.py	Tue May 03 14:42:00 2005 +0000
    54.2 +++ b/tools/python/xen/xend/XendDomain.py	Tue May 10 08:49:59 2005 +0000
    54.3 @@ -644,6 +644,24 @@ class XendDomain:
    54.4          except Exception, ex:
    54.5              raise XendError(str(ex))
    54.6      
    54.7 +    
    54.8 +    def domain_cpu_sedf_set(self, id, period, slice, latency, extratime, weight):
    54.9 +        """Set Simple EDF scheduler parameters for a domain.
   54.10 +        """
   54.11 +	dominfo = self.domain_lookup(id)
   54.12 +        try:
   54.13 +            return xc.sedf_domain_set(dominfo.dom, period, slice, latency, extratime, weight)
   54.14 +        except Exception, ex:
   54.15 +            raise XendError(str(ex))
   54.16 +
   54.17 +    def domain_cpu_sedf_get(self, id):
   54.18 +        """Get Atropos scheduler parameters for a domain.
   54.19 +        """
   54.20 +        dominfo = self.domain_lookup(id)
   54.21 +        try:
   54.22 +            return xc.sedf_domain_get(dominfo.dom)
   54.23 +        except Exception, ex:
   54.24 +            raise XendError(str(ex))
   54.25      def domain_device_create(self, id, devconfig):
   54.26          """Create a new device for a domain.
   54.27  
    55.1 --- a/tools/python/xen/xend/server/SrvDomain.py	Tue May 03 14:42:00 2005 +0000
    55.2 +++ b/tools/python/xen/xend/server/SrvDomain.py	Tue May 10 08:49:59 2005 +0000
    55.3 @@ -107,6 +107,18 @@ class SrvDomain(SrvDir):
    55.4          val = fn(req.args, {'dom': self.dom.id})
    55.5          return val
    55.6      
    55.7 +    
    55.8 +    def op_cpu_sedf_set(self, op, req):
    55.9 +        fn = FormFn(self.xd.domain_cpu_sedf_set,
   55.10 +                    [['dom', 'str'],
   55.11 +                     ['period', 'int'],
   55.12 +                     ['slice', 'int'],
   55.13 +		     ['latency', 'int'],
   55.14 +		     ['extratime', 'int'],
   55.15 +		     ['weight', 'int']])
   55.16 +        val = fn(req.args, {'dom': self.dom.id})
   55.17 +        return val
   55.18 +
   55.19      def op_maxmem_set(self, op, req):
   55.20          fn = FormFn(self.xd.domain_maxmem_set,
   55.21                      [['dom', 'str'],
    56.1 --- a/tools/python/xen/xm/main.py	Tue May 03 14:42:00 2005 +0000
    56.2 +++ b/tools/python/xen/xm/main.py	Tue May 10 08:49:59 2005 +0000
    56.3 @@ -591,6 +591,23 @@ class ProgBvtslice(Prog):
    56.4  
    56.5  xm.prog(ProgBvtslice)
    56.6  
    56.7 +class ProgSedf(Prog):
    56.8 +    group = 'scheduler'
    56.9 +    name= "sedf"
   56.10 +    info = """Set simple EDF parameters."""
   56.11 +
   56.12 +    def help(self, args):
   56.13 +        print args[0], "DOM PERIOD SLICE LATENCY EXTRATIME WEIGHT"
   56.14 +        print "\nSet simple EDF parameters."
   56.15 +
   56.16 +    def main(self, args):
   56.17 +	if len(args) != 7: self.err("%s: Invalid argument(s)" % args[0])
   56.18 +	dom = args[1]
   56.19 +	v = map(int, args[2:7])
   56.20 +	server.xend_domain_cpu_sedf_set(dom, *v)
   56.21 +
   56.22 +xm.prog(ProgSedf)
   56.23 +
   56.24  class ProgInfo(Prog):
   56.25      group = 'host'
   56.26      name = "info"
    57.1 --- a/xen/arch/ia64/domain.c	Tue May 03 14:42:00 2005 +0000
    57.2 +++ b/xen/arch/ia64/domain.c	Tue May 10 08:49:59 2005 +0000
    57.3 @@ -621,14 +621,12 @@ int construct_dom0(struct domain *d,
    57.4  	unsigned long pkern_entry;
    57.5  	unsigned long pkern_end;
    57.6  
    57.7 -	extern void physdev_init_dom0(struct domain *);
    57.8 -
    57.9  //printf("construct_dom0: starting\n");
   57.10  	/* Sanity! */
   57.11  #ifndef CLONE_DOMAIN0
   57.12  	if ( d != dom0 ) 
   57.13  	    BUG();
   57.14 -	if ( test_bit(DF_CONSTRUCTED, &d->d_flags) ) 
   57.15 +	if ( test_bit(DF_CONSTRUCTED, &d->flags) ) 
   57.16  	    BUG();
   57.17  #endif
   57.18  
   57.19 @@ -755,13 +753,7 @@ int construct_dom0(struct domain *d,
   57.20  #endif
   57.21  	console_endboot(strstr(cmdline, "tty0") != NULL);
   57.22  
   57.23 -	/* DOM0 gets access to everything. */
   57.24 -#ifdef CLONE_DOMAIN0
   57.25 -if (d == dom0)
   57.26 -#endif
   57.27 -	physdev_init_dom0(d);
   57.28 -
   57.29 -	set_bit(DF_CONSTRUCTED, &d->d_flags);
   57.30 +	set_bit(DF_CONSTRUCTED, &d->flags);
   57.31  
   57.32  	new_thread(ed, pkern_entry, 0, 0);
   57.33  	// FIXME: Hack for keyboard input
   57.34 @@ -791,7 +783,7 @@ int construct_domU(struct domain *d,
   57.35  	unsigned long pkern_entry;
   57.36  
   57.37  #ifndef DOMU_AUTO_RESTART
   57.38 -	if ( test_bit(DF_CONSTRUCTED, &d->d_flags) ) BUG();
   57.39 +	if ( test_bit(DF_CONSTRUCTED, &d->flags) ) BUG();
   57.40  #endif
   57.41  
   57.42  	printk("*** LOADING DOMAIN %d ***\n",d->id);
   57.43 @@ -824,7 +816,7 @@ int construct_domU(struct domain *d,
   57.44  	loaddomainelfimage(d,image_start);
   57.45  	printk("loaddomainelfimage returns\n");
   57.46  
   57.47 -	set_bit(DF_CONSTRUCTED, &d->d_flags);
   57.48 +	set_bit(DF_CONSTRUCTED, &d->flags);
   57.49  
   57.50  	printk("calling new_thread, entry=%p\n",pkern_entry);
   57.51  #ifdef DOMU_AUTO_RESTART
    58.1 --- a/xen/arch/ia64/idle0_task.c	Tue May 03 14:42:00 2005 +0000
    58.2 +++ b/xen/arch/ia64/idle0_task.c	Tue May 10 08:49:59 2005 +0000
    58.3 @@ -22,7 +22,7 @@
    58.4  #define IDLE0_DOMAIN(_t)             \
    58.5  {                                    \
    58.6      id:          IDLE_DOMAIN_ID,     \
    58.7 -    d_flags:     1<<DF_IDLETASK,     \
    58.8 +    flags:     1<<DF_IDLETASK,       \
    58.9      refcnt:      ATOMIC_INIT(1)      \
   58.10  }
   58.11  
    59.1 --- a/xen/arch/ia64/xenmisc.c	Tue May 03 14:42:00 2005 +0000
    59.2 +++ b/xen/arch/ia64/xenmisc.c	Tue May 10 08:49:59 2005 +0000
    59.3 @@ -133,73 +133,6 @@ void free_page_type(struct pfn_info *pag
    59.4  }
    59.5  
    59.6  ///////////////////////////////
    59.7 -// from arch/x86/pci.c
    59.8 -///////////////////////////////
    59.9 -
   59.10 -int
   59.11 -pcibios_prep_mwi (struct pci_dev *dev)
   59.12 -{
   59.13 -	dummy();
   59.14 -}
   59.15 -
   59.16 -///////////////////////////////
   59.17 -// from arch/x86/pci-irq.c
   59.18 -///////////////////////////////
   59.19 -
   59.20 -void pcibios_enable_irq(struct pci_dev *dev)
   59.21 -{
   59.22 -	dummy();
   59.23 -}
   59.24 -
   59.25 -///////////////////////////////
   59.26 -// from arch/ia64/pci-pc.c
   59.27 -///////////////////////////////
   59.28 -
   59.29 -#include <xen/pci.h>
   59.30 -
   59.31 -int pcibios_enable_device(struct pci_dev *dev, int mask)
   59.32 -{
   59.33 -	dummy();
   59.34 -	return 0;
   59.35 -}
   59.36 -
   59.37 -int (*pci_config_read)(int seg, int bus, int dev, int fn, int reg, int len, u32 *value) = NULL;
   59.38 -int (*pci_config_write)(int seg, int bus, int dev, int fn, int reg, int len, u32 value) = NULL;
   59.39 -
   59.40 -//struct pci_fixup pcibios_fixups[] = { { 0 } };
   59.41 -struct pci_fixup pcibios_fixups[] = { { 0 } };
   59.42 -
   59.43 -void
   59.44 -pcibios_align_resource(void *data, struct resource *res,
   59.45 -		       unsigned long size, unsigned long align)
   59.46 -{
   59.47 -	dummy();
   59.48 -}
   59.49 -
   59.50 -void
   59.51 -pcibios_update_resource(struct pci_dev *dev, struct resource *root,
   59.52 -			struct resource *res, int resource)
   59.53 -{
   59.54 -	dummy();
   59.55 -}
   59.56 -
   59.57 -void __devinit  pcibios_fixup_bus(struct pci_bus *b)
   59.58 -{
   59.59 -	dummy();
   59.60 -}
   59.61 -
   59.62 -void __init pcibios_init(void)
   59.63 -{
   59.64 -	dummy();
   59.65 -}
   59.66 -
   59.67 -char * __devinit  pcibios_setup(char *str)
   59.68 -{
   59.69 -	dummy();
   59.70 -	return 0;
   59.71 -}
   59.72 -
   59.73 -///////////////////////////////
   59.74  // from arch/ia64/traps.c
   59.75  ///////////////////////////////
   59.76  
   59.77 @@ -217,33 +150,6 @@ void dump_pageframe_info(struct domain *
   59.78  }
   59.79  
   59.80  ///////////////////////////////
   59.81 -// from common/physdev.c
   59.82 -///////////////////////////////
   59.83 -void
   59.84 -physdev_init_dom0(struct domain *d)
   59.85 -{
   59.86 -}
   59.87 -
   59.88 -int
   59.89 -physdev_pci_access_modify(domid_t id, int bus, int dev, int func, int enable)
   59.90 -{
   59.91 -	return -EINVAL;
   59.92 -}
   59.93 -
   59.94 -void physdev_modify_ioport_access_range(struct domain *d, int enable,
   59.95 -	int port, int num)
   59.96 -{
   59.97 -	printk("physdev_modify_ioport_access_range not implemented\n");
   59.98 -	dummy();
   59.99 -}
  59.100 -
  59.101 -void physdev_destroy_state(struct domain *d)
  59.102 -{
  59.103 -	printk("physdev_destroy_state not implemented\n");
  59.104 -	dummy();
  59.105 -}
  59.106 -
  59.107 -///////////////////////////////
  59.108  // called from arch/ia64/head.S
  59.109  ///////////////////////////////
  59.110  
  59.111 @@ -334,7 +240,7 @@ int id = ((struct exec_domain *)current)
  59.112  if (!cnt[id]--) { printk("%x",id); cnt[id] = 50; }
  59.113  if (!i--) { printk("+",id); cnt[id] = 100; }
  59.114  }
  59.115 -	clear_bit(EDF_RUNNING, &prev->ed_flags);
  59.116 +	clear_bit(EDF_RUNNING, &prev->flags);
  59.117  	//if (!is_idle_task(next->domain) )
  59.118  		//send_guest_virq(next, VIRQ_TIMER);
  59.119  	load_region_regs(current);
  59.120 @@ -364,7 +270,7 @@ loop:
  59.121  	printf(buf);
  59.122  	if (regs) show_registers(regs);
  59.123  	domain_pause_by_systemcontroller(current->domain);
  59.124 -	set_bit(DF_CRASHED, ed->domain->d_flags);
  59.125 +	set_bit(DF_CRASHED, ed->domain->flags);
  59.126  	if (ed->domain->id == 0) {
  59.127  		int i = 1000000000L;
  59.128  		// if domain0 crashes, just periodically print out panic
    60.1 --- a/xen/arch/ia64/xensetup.c	Tue May 03 14:42:00 2005 +0000
    60.2 +++ b/xen/arch/ia64/xensetup.c	Tue May 10 08:49:59 2005 +0000
    60.3 @@ -69,9 +69,6 @@ unsigned char opt_pdb[10] = "none";
    60.4  unsigned int opt_tbuf_size = 10;
    60.5  /* opt_sched: scheduler - default to Borrowed Virtual Time */
    60.6  char opt_sched[10] = "bvt";
    60.7 -/* opt_physdev_dom0_hide: list of PCI slots to hide from domain 0. */
    60.8 -/* Format is '(%02x:%02x.%1x)(%02x:%02x.%1x)' and so on. */
    60.9 -char opt_physdev_dom0_hide[200] = "";
   60.10  /* opt_leveltrigger, opt_edgetrigger: Force an IO-APIC-routed IRQ to be */
   60.11  /*                                    level- or edge-triggered.         */
   60.12  /* Example: 'leveltrigger=4,5,6,20 edgetrigger=21'. */
   60.13 @@ -270,7 +267,7 @@ printk("About to call init_idle_task()\n
   60.14      if ( dom0 == NULL )
   60.15          panic("Error creating domain 0\n");
   60.16  
   60.17 -    set_bit(DF_PRIVILEGED, &dom0->d_flags);
   60.18 +    set_bit(DF_PRIVILEGED, &dom0->flags);
   60.19  
   60.20      /*
   60.21       * We're going to setup domain0 using the module(s) that we stashed safely
    61.1 --- a/xen/arch/x86/Makefile	Tue May 03 14:42:00 2005 +0000
    61.2 +++ b/xen/arch/x86/Makefile	Tue May 10 08:49:59 2005 +0000
    61.3 @@ -3,6 +3,7 @@ include $(BASEDIR)/Rules.mk
    61.4  
    61.5  OBJS += $(patsubst %.S,%.o,$(wildcard $(TARGET_SUBARCH)/*.S))
    61.6  OBJS += $(patsubst %.c,%.o,$(wildcard $(TARGET_SUBARCH)/*.c))
    61.7 +OBJS += $(patsubst %.c,%.o,$(wildcard acpi/*.c))
    61.8  OBJS += $(patsubst %.c,%.o,$(wildcard mtrr/*.c))
    61.9  
   61.10  OBJS := $(subst $(TARGET_SUBARCH)/asm-offsets.o,,$(OBJS))
   61.11 @@ -11,8 +12,6 @@ ifneq ($(crash_debug),y)
   61.12  OBJS := $(patsubst cdb%.o,,$(OBJS))
   61.13  endif
   61.14  
   61.15 -OBJS := $(filter-out pci%,$(OBJS))
   61.16 -
   61.17  default: $(TARGET)
   61.18  
   61.19  $(TARGET): $(TARGET)-syms boot/mkelf32
    62.1 --- a/xen/arch/x86/Rules.mk	Tue May 03 14:42:00 2005 +0000
    62.2 +++ b/xen/arch/x86/Rules.mk	Tue May 10 08:49:59 2005 +0000
    62.3 @@ -2,8 +2,8 @@
    62.4  # x86-specific definitions
    62.5  
    62.6  CFLAGS  += -nostdinc -fno-builtin -fno-common -fno-strict-aliasing
    62.7 -CFLAGS  += -iwithprefix include -Wall -Werror -pipe
    62.8 -CFLAGS  += -I$(BASEDIR)/include -Wno-pointer-arith -Wredundant-decls
    62.9 +CFLAGS  += -iwithprefix include -Wall -Werror -Wno-pointer-arith -pipe
   62.10 +CFLAGS  += -I$(BASEDIR)/include -I$(BASEDIR)/include/asm-x86/mach-default
   62.11  
   62.12  ifeq ($(optimize),y)
   62.13  CFLAGS  += -O3 -fomit-frame-pointer
    63.1 --- a/xen/arch/x86/acpi.c	Tue May 03 14:42:00 2005 +0000
    63.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    63.3 @@ -1,721 +0,0 @@
    63.4 -/*
    63.5 - *  acpi.c - Architecture-Specific Low-Level ACPI Support
    63.6 - *
    63.7 - *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
    63.8 - *  Copyright (C) 2001 Jun Nakajima <jun.nakajima@intel.com>
    63.9 - *  Copyright (C) 2001 Patrick Mochel <mochel@osdl.org>
   63.10 - *
   63.11 - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   63.12 - *
   63.13 - *  This program is free software; you can redistribute it and/or modify
   63.14 - *  it under the terms of the GNU General Public License as published by
   63.15 - *  the Free Software Foundation; either version 2 of the License, or
   63.16 - *  (at your option) any later version.
   63.17 - *
   63.18 - *  This program is distributed in the hope that it will be useful,
   63.19 - *  but WITHOUT ANY WARRANTY; without even the implied warranty of
   63.20 - *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   63.21 - *  GNU General Public License for more details.
   63.22 - *
   63.23 - *  You should have received a copy of the GNU General Public License
   63.24 - *  along with this program; if not, write to the Free Software
   63.25 - *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   63.26 - *
   63.27 - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   63.28 - */
   63.29 -
   63.30 -#include <xen/config.h>
   63.31 -#include <xen/kernel.h>
   63.32 -#include <xen/init.h>
   63.33 -#include <xen/types.h>
   63.34 -#include <xen/slab.h>
   63.35 -#include <xen/pci.h>
   63.36 -#include <xen/irq.h>
   63.37 -#include <xen/acpi.h>
   63.38 -#include <asm/mpspec.h>
   63.39 -#include <asm/io.h>
   63.40 -#include <asm/apic.h>
   63.41 -#include <asm/apicdef.h>
   63.42 -#include <asm/page.h>
   63.43 -#include <asm/io_apic.h>
   63.44 -#include <asm/acpi.h>
   63.45 -#include <asm/smpboot.h>
   63.46 -
   63.47 -
   63.48 -#define PREFIX			"ACPI: "
   63.49 -
   63.50 -int acpi_lapic;
   63.51 -int acpi_ioapic;
   63.52 -int acpi_strict;
   63.53 -
   63.54 -acpi_interrupt_flags acpi_sci_flags __initdata;
   63.55 -int acpi_sci_override_gsi __initdata;
   63.56 -/* --------------------------------------------------------------------------
   63.57 -                              Boot-time Configuration
   63.58 -   -------------------------------------------------------------------------- */
   63.59 -
   63.60 -int acpi_noirq __initdata = 0;  /* skip ACPI IRQ initialization */
   63.61 -int acpi_ht __initdata = 1;     /* enable HT */
   63.62 -
   63.63 -enum acpi_irq_model_id		acpi_irq_model;
   63.64 -
   63.65 -
   63.66 -/*
   63.67 - * Temporarily use the virtual area starting from FIX_IO_APIC_BASE_END,
   63.68 - * to map the target physical address. The problem is that set_fixmap()
   63.69 - * provides a single page, and it is possible that the page is not
   63.70 - * sufficient.
   63.71 - * By using this area, we can map up to MAX_IO_APICS pages temporarily,
   63.72 - * i.e. until the next __va_range() call.
   63.73 - *
   63.74 - * Important Safety Note:  The fixed I/O APIC page numbers are *subtracted*
   63.75 - * from the fixed base.  That's why we start at FIX_IO_APIC_BASE_END and
   63.76 - * count idx down while incrementing the phys address.
   63.77 - */
   63.78 -char *__acpi_map_table(unsigned long phys, unsigned long size)
   63.79 -{
   63.80 -	unsigned long base, offset, mapped_size;
   63.81 -	int idx;
   63.82 -
   63.83 -	if (phys + size < 8*1024*1024) 
   63.84 -		return __va(phys); 
   63.85 -
   63.86 -	offset = phys & (PAGE_SIZE - 1);
   63.87 -	mapped_size = PAGE_SIZE - offset;
   63.88 -	set_fixmap(FIX_ACPI_END, phys);
   63.89 -	base = fix_to_virt(FIX_ACPI_END);
   63.90 -
   63.91 -	/*
   63.92 -	 * Most cases can be covered by the below.
   63.93 -	 */
   63.94 -	idx = FIX_ACPI_END;
   63.95 -	while (mapped_size < size) {
   63.96 -		if (--idx < FIX_ACPI_BEGIN)
   63.97 -			return 0;	/* cannot handle this */
   63.98 -		phys += PAGE_SIZE;
   63.99 -		set_fixmap(idx, phys);
  63.100 -		mapped_size += PAGE_SIZE;
  63.101 -	}
  63.102 -
  63.103 -	return ((char *) base + offset);
  63.104 -}
  63.105 -
  63.106 -
  63.107 -#ifdef CONFIG_X86_LOCAL_APIC
  63.108 -
  63.109 -static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
  63.110 -
  63.111 -
  63.112 -static int __init
  63.113 -acpi_parse_madt (
  63.114 -	unsigned long		phys_addr,
  63.115 -	unsigned long		size)
  63.116 -{
  63.117 -	struct acpi_table_madt	*madt = NULL;
  63.118 -
  63.119 -	if (!phys_addr || !size)
  63.120 -		return -EINVAL;
  63.121 -
  63.122 -	madt = (struct acpi_table_madt *) __acpi_map_table(phys_addr, size);
  63.123 -	if (!madt) {
  63.124 -		printk(KERN_WARNING PREFIX "Unable to map MADT\n");
  63.125 -		return -ENODEV;
  63.126 -	}
  63.127 -
  63.128 -	if (madt->lapic_address)
  63.129 -		acpi_lapic_addr = (u64) madt->lapic_address;
  63.130 -
  63.131 -	printk(KERN_INFO PREFIX "Local APIC address 0x%08x\n",
  63.132 -		madt->lapic_address);
  63.133 -
  63.134 -	detect_clustered_apic(madt->header.oem_id, madt->header.oem_table_id);
  63.135 -
  63.136 -	return 0;
  63.137 -}
  63.138 -
  63.139 -
  63.140 -static int __init
  63.141 -acpi_parse_lapic (
  63.142 -	acpi_table_entry_header *header)
  63.143 -{
  63.144 -	struct acpi_table_lapic	*processor = NULL;
  63.145 -
  63.146 -	processor = (struct acpi_table_lapic*) header;
  63.147 -	if (!processor)
  63.148 -		return -EINVAL;
  63.149 -
  63.150 -	acpi_table_print_madt_entry(header);
  63.151 -
  63.152 -	mp_register_lapic (
  63.153 -		processor->id,					   /* APIC ID */
  63.154 -		processor->flags.enabled);			  /* Enabled? */
  63.155 -
  63.156 -	return 0;
  63.157 -}
  63.158 -
  63.159 -
  63.160 -static int __init
  63.161 -acpi_parse_lapic_addr_ovr (
  63.162 -	acpi_table_entry_header *header)
  63.163 -{
  63.164 -	struct acpi_table_lapic_addr_ovr *lapic_addr_ovr = NULL;
  63.165 -
  63.166 -	lapic_addr_ovr = (struct acpi_table_lapic_addr_ovr*) header;
  63.167 -	if (!lapic_addr_ovr)
  63.168 -		return -EINVAL;
  63.169 -
  63.170 -	acpi_lapic_addr = lapic_addr_ovr->address;
  63.171 -
  63.172 -	return 0;
  63.173 -}
  63.174 -
  63.175 -static int __init
  63.176 -acpi_parse_lapic_nmi (
  63.177 -	acpi_table_entry_header *header)
  63.178 -{
  63.179 -	struct acpi_table_lapic_nmi *lapic_nmi = NULL;
  63.180 -
  63.181 -	lapic_nmi = (struct acpi_table_lapic_nmi*) header;
  63.182 -	if (!lapic_nmi)
  63.183 -		return -EINVAL;
  63.184 -
  63.185 -	acpi_table_print_madt_entry(header);
  63.186 -
  63.187 -	if (lapic_nmi->lint != 1)
  63.188 -		printk(KERN_WARNING PREFIX "NMI not connected to LINT 1!\n");
  63.189 -
  63.190 -	return 0;
  63.191 -}
  63.192 -
  63.193 -#endif /*CONFIG_X86_LOCAL_APIC*/
  63.194 -
  63.195 -#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_ACPI_INTERPRETER)
  63.196 -
  63.197 -static int __init
  63.198 -acpi_parse_ioapic (
  63.199 -	acpi_table_entry_header *header)
  63.200 -{
  63.201 -	struct acpi_table_ioapic *ioapic = NULL;
  63.202 -
  63.203 -	ioapic = (struct acpi_table_ioapic*) header;
  63.204 -	if (!ioapic)
  63.205 -		return -EINVAL;
  63.206 - 
  63.207 -	acpi_table_print_madt_entry(header);
  63.208 -
  63.209 -	mp_register_ioapic (
  63.210 -		ioapic->id,
  63.211 -		ioapic->address,
  63.212 -		ioapic->global_irq_base);
  63.213 - 
  63.214 -	return 0;
  63.215 -}
  63.216 -
  63.217 -/*
  63.218 - * Parse Interrupt Source Override for the ACPI SCI
  63.219 - */
  63.220 -static void
  63.221 -acpi_sci_ioapic_setup(u32 gsi, u16 polarity, u16 trigger)
  63.222 -{
  63.223 -	if (trigger == 0)	/* compatible SCI trigger is level */
  63.224 -		trigger = 3;
  63.225 -
  63.226 -	if (polarity == 0)	/* compatible SCI polarity is low */
  63.227 -		polarity = 3;
  63.228 -
  63.229 -	/* Command-line over-ride via acpi_sci= */
  63.230 -	if (acpi_sci_flags.trigger)
  63.231 -		trigger = acpi_sci_flags.trigger;
  63.232 -
  63.233 -	if (acpi_sci_flags.polarity)
  63.234 -		polarity = acpi_sci_flags.polarity;
  63.235 -
  63.236 -	/*
  63.237 - 	 * mp_config_acpi_legacy_irqs() already setup IRQs < 16
  63.238 -	 * If GSI is < 16, this will update its flags,
  63.239 -	 * else it will create a new mp_irqs[] entry.
  63.240 -	 */
  63.241 -	mp_override_legacy_irq(gsi, polarity, trigger, gsi);
  63.242 -
  63.243 -	/*
  63.244 -	 * stash over-ride to indicate we've been here
  63.245 -	 * and for later update of acpi_fadt
  63.246 -	 */
  63.247 -	acpi_sci_override_gsi = gsi;
  63.248 -	return;
  63.249 -}
  63.250 -
  63.251 -static int __init
  63.252 -acpi_parse_fadt(unsigned long phys, unsigned long size)
  63.253 -{
  63.254 -        struct fadt_descriptor_rev2 *fadt =0;
  63.255 -
  63.256 -        fadt = (struct fadt_descriptor_rev2*) __acpi_map_table(phys,size);
  63.257 -        if (!fadt) {
  63.258 -                printk(KERN_WARNING PREFIX "Unable to map FADT\n");
  63.259 -                return 0;
  63.260 -        }
  63.261 -
  63.262 -#ifdef  CONFIG_ACPI_INTERPRETER
  63.263 -        /* initialize sci_int early for INT_SRC_OVR MADT parsing */
  63.264 -        acpi_fadt.sci_int = fadt->sci_int;
  63.265 -#endif
  63.266 -
  63.267 -        return 0;
  63.268 -}
  63.269 -
  63.270 -
  63.271 -static int __init
  63.272 -acpi_parse_int_src_ovr (
  63.273 -	acpi_table_entry_header *header)
  63.274 -{
  63.275 -	struct acpi_table_int_src_ovr *intsrc = NULL;
  63.276 -
  63.277 -	intsrc = (struct acpi_table_int_src_ovr*) header;
  63.278 -	if (!intsrc)
  63.279 -		return -EINVAL;
  63.280 -
  63.281 -	acpi_table_print_madt_entry(header);
  63.282 -
  63.283 -	if (intsrc->bus_irq == acpi_fadt.sci_int) {
  63.284 -		acpi_sci_ioapic_setup(intsrc->global_irq,
  63.285 -			intsrc->flags.polarity, intsrc->flags.trigger);
  63.286 -		return 0;
  63.287 -	}
  63.288 -
  63.289 -	mp_override_legacy_irq (
  63.290 -		intsrc->bus_irq,
  63.291 -		intsrc->flags.polarity,
  63.292 -		intsrc->flags.trigger,
  63.293 -		intsrc->global_irq);
  63.294 -
  63.295 -	return 0;
  63.296 -}
  63.297 -
  63.298 -
  63.299 -static int __init
  63.300 -acpi_parse_nmi_src (
  63.301 -	acpi_table_entry_header *header)
  63.302 -{
  63.303 -	struct acpi_table_nmi_src *nmi_src = NULL;
  63.304 -
  63.305 -	nmi_src = (struct acpi_table_nmi_src*) header;
  63.306 -	if (!nmi_src)
  63.307 -		return -EINVAL;
  63.308 -
  63.309 -	acpi_table_print_madt_entry(header);
  63.310 -
  63.311 -	/* TBD: Support nimsrc entries? */
  63.312 -
  63.313 -	return 0;
  63.314 -}
  63.315 -
  63.316 -#endif /*CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER*/
  63.317 -
  63.318 -
  63.319 -static unsigned long __init
  63.320 -acpi_scan_rsdp (
  63.321 -	unsigned long		start,
  63.322 -	unsigned long		length)
  63.323 -{
  63.324 -	unsigned long		offset = 0;
  63.325 -	unsigned long		sig_len = sizeof("RSD PTR ") - 1;
  63.326 -
  63.327 -	/*
  63.328 -	 * Scan all 16-byte boundaries of the physical memory region for the
  63.329 -	 * RSDP signature.
  63.330 -	 */
  63.331 -	for (offset = 0; offset < length; offset += 16) {
  63.332 -		if (strncmp((char *) (start + offset), "RSD PTR ", sig_len))
  63.333 -			continue;
  63.334 -		return (start + offset);
  63.335 -	}
  63.336 -
  63.337 -	return 0;
  63.338 -}
  63.339 -
  63.340 -
  63.341 -unsigned long __init
  63.342 -acpi_find_rsdp (void)
  63.343 -{
  63.344 -	unsigned long		rsdp_phys = 0;
  63.345 -
  63.346 -	/*
  63.347 -	 * Scan memory looking for the RSDP signature. First search EBDA (low
  63.348 -	 * memory) paragraphs and then search upper memory (E0000-FFFFF).
  63.349 -	 */
  63.350 -	rsdp_phys = acpi_scan_rsdp (0, 0x400);
  63.351 -	if (!rsdp_phys)
  63.352 -		rsdp_phys = acpi_scan_rsdp (0xE0000, 0x20000);
  63.353 -
  63.354 -	return rsdp_phys;
  63.355 -}
  63.356 -
  63.357 -
  63.358 -/*
  63.359 - * acpi_boot_init()
  63.360 - *  called from setup_arch(), always.
  63.361 - *	1. maps ACPI tables for later use
  63.362 - *	2. enumerates lapics
  63.363 - *	3. enumerates io-apics
  63.364 - *
  63.365 - * side effects:
  63.366 - * 	acpi_lapic = 1 if LAPIC found
  63.367 - *	acpi_ioapic = 1 if IOAPIC found
  63.368 - *	if (acpi_lapic && acpi_ioapic) smp_found_config = 1;
  63.369 - *	if acpi_blacklisted() disable_acpi()
  63.370 - *	acpi_irq_model=...
  63.371 - *	...
  63.372 - *
  63.373 - * return value: (currently ignored)
  63.374 - *	0: success
  63.375 - *	!0: failure
  63.376 - */
  63.377 -int __init
  63.378 -acpi_boot_init (void)
  63.379 -{
  63.380 -	int			result = 0;
  63.381 -
  63.382 -	if (acpi_disabled && !acpi_ht)
  63.383 -		return(1);
  63.384 -
  63.385 -	/*
  63.386 -	 * The default interrupt routing model is PIC (8259).  This gets
  63.387 -	 * overriden if IOAPICs are enumerated (below).
  63.388 -	 */
  63.389 -	acpi_irq_model = ACPI_IRQ_MODEL_PIC;
  63.390 -
  63.391 -	/* 
  63.392 -	 * Initialize the ACPI boot-time table parser.
  63.393 -	 */
  63.394 -	result = acpi_table_init();
  63.395 -	if (result) {
  63.396 -		disable_acpi();
  63.397 -		return result;
  63.398 -	}
  63.399 -
  63.400 -	result = acpi_blacklisted();
  63.401 -	if (result) {
  63.402 -		printk(KERN_NOTICE PREFIX "BIOS listed in blacklist, disabling ACPI support\n");
  63.403 -		disable_acpi();
  63.404 -		return result;
  63.405 -	}
  63.406 -
  63.407 -#ifdef CONFIG_X86_LOCAL_APIC
  63.408 -
  63.409 -	/* 
  63.410 -	 * MADT
  63.411 -	 * ----
  63.412 -	 * Parse the Multiple APIC Description Table (MADT), if exists.
  63.413 -	 * Note that this table provides platform SMP configuration 
  63.414 -	 * information -- the successor to MPS tables.
  63.415 -	 */
  63.416 -
  63.417 -	result = acpi_table_parse(ACPI_APIC, acpi_parse_madt);
  63.418 -	if (!result) {
  63.419 -		return 0;
  63.420 -	}
  63.421 -	else if (result < 0) {
  63.422 -		printk(KERN_ERR PREFIX "Error parsing MADT\n");
  63.423 -		return result;
  63.424 -	}
  63.425 -	else if (result > 1) 
  63.426 -		printk(KERN_WARNING PREFIX "Multiple MADT tables exist\n");
  63.427 -
  63.428 -	/* 
  63.429 -	 * Local APIC
  63.430 -	 * ----------
  63.431 -	 * Note that the LAPIC address is obtained from the MADT (32-bit value)
  63.432 -	 * and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value).
  63.433 -	 */
  63.434 -
  63.435 -	result = acpi_table_parse_madt(ACPI_MADT_LAPIC_ADDR_OVR, acpi_parse_lapic_addr_ovr);
  63.436 -	if (result < 0) {
  63.437 -		printk(KERN_ERR PREFIX "Error parsing LAPIC address override entry\n");
  63.438 -		return result;
  63.439 -	}
  63.440 -
  63.441 -	mp_register_lapic_address(acpi_lapic_addr);
  63.442 -
  63.443 -	result = acpi_table_parse_madt(ACPI_MADT_LAPIC, acpi_parse_lapic);
  63.444 -	if (!result) { 
  63.445 -		printk(KERN_ERR PREFIX "No LAPIC entries present\n");
  63.446 -		/* TBD: Cleanup to allow fallback to MPS */
  63.447 -		return -ENODEV;
  63.448 -	}
  63.449 -	else if (result < 0) {
  63.450 -		printk(KERN_ERR PREFIX "Error parsing LAPIC entry\n");
  63.451 -		/* TBD: Cleanup to allow fallback to MPS */
  63.452 -		return result;
  63.453 -	}
  63.454 -
  63.455 -	result = acpi_table_parse_madt(ACPI_MADT_LAPIC_NMI, acpi_parse_lapic_nmi);
  63.456 -	if (result < 0) {
  63.457 -		printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n");
  63.458 -		/* TBD: Cleanup to allow fallback to MPS */
  63.459 -		return result;
  63.460 -	}
  63.461 -
  63.462 -	acpi_lapic = 1;
  63.463 -
  63.464 -#endif /*CONFIG_X86_LOCAL_APIC*/
  63.465 -
  63.466 -#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_ACPI_INTERPRETER)
  63.467 -
  63.468 -	/* 
  63.469 -	 * I/O APIC 
  63.470 -	 * --------
  63.471 -	 */
  63.472 -
  63.473 -	/*
  63.474 -	 * ACPI interpreter is required to complete interrupt setup,
  63.475 -	 * so if it is off, don't enumerate the io-apics with ACPI.
  63.476 -	 * If MPS is present, it will handle them,
  63.477 -	 * otherwise the system will stay in PIC mode
  63.478 -	 */
  63.479 -	if (acpi_disabled || acpi_noirq) {
  63.480 -		return 1;
  63.481 -	}
  63.482 -
  63.483 -	/*
  63.484 -	 * if "noapic" boot option, don't look for IO-APICs
  63.485 -	 */
  63.486 -	if (ioapic_setup_disabled()) {
  63.487 -		printk(KERN_INFO PREFIX "Skipping IOAPIC probe "
  63.488 -			"due to 'noapic' option.\n");
  63.489 -		return 1;
  63.490 -        }
  63.491 -
  63.492 -
  63.493 -	result = acpi_table_parse_madt(ACPI_MADT_IOAPIC, acpi_parse_ioapic);
  63.494 -	if (!result) { 
  63.495 -		printk(KERN_ERR PREFIX "No IOAPIC entries present\n");
  63.496 -		return -ENODEV;
  63.497 -	}
  63.498 -	else if (result < 0) {
  63.499 -		printk(KERN_ERR PREFIX "Error parsing IOAPIC entry\n");
  63.500 -		return result;
  63.501 -	}
  63.502 -
  63.503 -	/* Build a default routing table for legacy (ISA) interrupts. */
  63.504 -	mp_config_acpi_legacy_irqs();
  63.505 -
  63.506 -	/* Record sci_int for use when looking for MADT sci_int override */
  63.507 -	acpi_table_parse(ACPI_FADT, acpi_parse_fadt);
  63.508 -
  63.509 -	result = acpi_table_parse_madt(ACPI_MADT_INT_SRC_OVR, acpi_parse_int_src_ovr);
  63.510 -	if (result < 0) {
  63.511 -		printk(KERN_ERR PREFIX "Error parsing interrupt source overrides entry\n");
  63.512 -		/* TBD: Cleanup to allow fallback to MPS */
  63.513 -		return result;
  63.514 -	}
  63.515 -
  63.516 -	/*
  63.517 -	 * If BIOS did not supply an INT_SRC_OVR for the SCI
  63.518 -	 * pretend we got one so we can set the SCI flags.
  63.519 -	 */
  63.520 -	if (!acpi_sci_override_gsi)
  63.521 -		acpi_sci_ioapic_setup(acpi_fadt.sci_int, 0, 0);
  63.522 -
  63.523 -	result = acpi_table_parse_madt(ACPI_MADT_NMI_SRC, acpi_parse_nmi_src);
  63.524 -	if (result < 0) {
  63.525 -		printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n");
  63.526 -		/* TBD: Cleanup to allow fallback to MPS */
  63.527 -		return result;
  63.528 -	}
  63.529 -
  63.530 -	acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC;
  63.531 -
  63.532 -	acpi_irq_balance_set(NULL);
  63.533 -
  63.534 -	acpi_ioapic = 1;
  63.535 -
  63.536 -	if (acpi_lapic && acpi_ioapic)
  63.537 -		smp_found_config = 1;
  63.538 -
  63.539 -#endif /*CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER*/
  63.540 -
  63.541 -	return 0;
  63.542 -}
  63.543 -
  63.544 -
  63.545 -#ifdef	CONFIG_ACPI_BUS
  63.546 -/*
  63.547 - * acpi_pic_sci_set_trigger()
  63.548 - *
  63.549 - * use ELCR to set PIC-mode trigger type for SCI
  63.550 - *
  63.551 - * If a PIC-mode SCI is not recognized or gives spurious IRQ7's
  63.552 - * it may require Edge Trigger -- use "acpi_sci=edge"
  63.553 - *
  63.554 - * Port 0x4d0-4d1 are ECLR1 and ECLR2, the Edge/Level Control Registers
  63.555 - * for the 8259 PIC.  bit[n] = 1 means irq[n] is Level, otherwise Edge.
  63.556 - * ECLR1 is IRQ's 0-7 (IRQ 0, 1, 2 must be 0)
  63.557 - * ECLR2 is IRQ's 8-15 (IRQ 8, 13 must be 0)
  63.558 - */
  63.559 -
  63.560 -void __init
  63.561 -acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger)
  63.562 -{
  63.563 -	unsigned char mask = 1 << (irq & 7);
  63.564 -	unsigned int port = 0x4d0 + (irq >> 3);
  63.565 -	unsigned char val = inb(port);
  63.566 -
  63.567 -
  63.568 -	printk(PREFIX "IRQ%d SCI:", irq);
  63.569 -	if (!(val & mask)) {
  63.570 -		printk(" Edge");
  63.571 -
  63.572 -		if (trigger == 3) {
  63.573 -			printk(" set to Level");
  63.574 -			outb(val | mask, port);
  63.575 -		}
  63.576 -	} else {
  63.577 -		printk(" Level");
  63.578 -
  63.579 -		if (trigger == 1) {
  63.580 -			printk(" set to Edge");
  63.581 -			outb(val & ~mask, port);
  63.582 -		}
  63.583 -	}
  63.584 -	printk(" Trigger.\n");
  63.585 -}
  63.586 -
  63.587 -#endif /* CONFIG_ACPI_BUS */
  63.588 -
  63.589 -
  63.590 -/* --------------------------------------------------------------------------
  63.591 -                              Low-Level Sleep Support
  63.592 -   -------------------------------------------------------------------------- */
  63.593 -
  63.594 -#ifdef CONFIG_ACPI_SLEEP
  63.595 -
  63.596 -#define DEBUG
  63.597 -
  63.598 -#ifdef DEBUG
  63.599 -#include <xen/serial.h>
  63.600 -#endif
  63.601 -
  63.602 -/* address in low memory of the wakeup routine. */
  63.603 -unsigned long acpi_wakeup_address = 0;
  63.604 -
  63.605 -/* new page directory that we will be using */
  63.606 -static pmd_t *pmd;
  63.607 -
  63.608 -/* saved page directory */
  63.609 -static pmd_t saved_pmd;
  63.610 -
  63.611 -/* page which we'll use for the new page directory */
  63.612 -static pte_t *ptep;
  63.613 -
  63.614 -extern unsigned long FASTCALL(acpi_copy_wakeup_routine(unsigned long));
  63.615 -
  63.616 -/*
  63.617 - * acpi_create_identity_pmd
  63.618 - *
  63.619 - * Create a new, identity mapped pmd.
  63.620 - *
  63.621 - * Do this by creating new page directory, and marking all the pages as R/W
  63.622 - * Then set it as the new Page Middle Directory.
  63.623 - * And, of course, flush the TLB so it takes effect.
  63.624 - *
  63.625 - * We save the address of the old one, for later restoration.
  63.626 - */
  63.627 -static void acpi_create_identity_pmd (void)
  63.628 -{
  63.629 -	pgd_t *pgd;
  63.630 -	int i;
  63.631 -
  63.632 -	ptep = (pte_t*)__get_free_page(GFP_KERNEL);
  63.633 -
  63.634 -	/* fill page with low mapping */
  63.635 -	for (i = 0; i < PTRS_PER_PTE; i++)
  63.636 -		set_pte(ptep + i, mk_pte_phys(i << PAGE_SHIFT, PAGE_SHARED));
  63.637 -
  63.638 -	pgd = pgd_offset(current->active_mm, 0);
  63.639 -	pmd = pmd_alloc(current->mm,pgd, 0);
  63.640 -
  63.641 -	/* save the old pmd */
  63.642 -	saved_pmd = *pmd;
  63.643 -
  63.644 -	/* set the new one */
  63.645 -	set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(ptep)));
  63.646 -
  63.647 -	/* flush the TLB */
  63.648 -	local_flush_tlb();
  63.649 -}
  63.650 -
  63.651 -/*
  63.652 - * acpi_restore_pmd
  63.653 - *
  63.654 - * Restore the old pmd saved by acpi_create_identity_pmd and
  63.655 - * free the page that said function alloc'd
  63.656 - */
  63.657 -static void acpi_restore_pmd (void)
  63.658 -{
  63.659 -	set_pmd(pmd, saved_pmd);
  63.660 -	local_flush_tlb();
  63.661 -	free_page((unsigned long)ptep);
  63.662 -}
  63.663 -
  63.664 -/**
  63.665 - * acpi_save_state_mem - save kernel state
  63.666 - *
  63.667 - * Create an identity mapped page table and copy the wakeup routine to
  63.668 - * low memory.
  63.669 - */
  63.670 -int acpi_save_state_mem (void)
  63.671 -{
  63.672 -	acpi_create_identity_pmd();
  63.673 -	acpi_copy_wakeup_routine(acpi_wakeup_address);
  63.674 -
  63.675 -	return 0;
  63.676 -}
  63.677 -
  63.678 -/**
  63.679 - * acpi_save_state_disk - save kernel state to disk
  63.680 - *
  63.681 - */
  63.682 -int acpi_save_state_disk (void)
  63.683 -{
  63.684 -	return 1;
  63.685 -}
  63.686 -
  63.687 -/*
  63.688 - * acpi_restore_state
  63.689 - */
  63.690 -void acpi_restore_state_mem (void)
  63.691 -{
  63.692 -	acpi_restore_pmd();
  63.693 -}
  63.694 -
  63.695 -/**
  63.696 - * acpi_reserve_bootmem - do _very_ early ACPI initialisation
  63.697 - *
  63.698 - * We allocate a page in low memory for the wakeup
  63.699 - * routine for when we come back from a sleep state. The
  63.700 - * runtime allocator allows specification of <16M pages, but not
  63.701 - * <1M pages.
  63.702 - */
  63.703 -void __init acpi_reserve_bootmem(void)
  63.704 -{
  63.705 -	acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE);
  63.706 -	if (!acpi_wakeup_address)
  63.707 -		printk(KERN_ERR "ACPI: Cannot allocate lowmem, S3 disabled.\n");
  63.708 -}
  63.709 -
  63.710 -void do_suspend_lowlevel_s4bios(int resume)
  63.711 -{
  63.712 -	if (!resume) {
  63.713 -		save_processor_context();
  63.714 -		acpi_save_register_state((unsigned long)&&acpi_sleep_done);
  63.715 -		acpi_enter_sleep_state_s4bios();
  63.716 -		return;
  63.717 -	}
  63.718 -acpi_sleep_done:
  63.719 -	restore_processor_context();
  63.720 -}
  63.721 -
  63.722 -
  63.723 -#endif /*CONFIG_ACPI_SLEEP*/
  63.724 -
    64.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    64.2 +++ b/xen/arch/x86/acpi/boot.c	Tue May 10 08:49:59 2005 +0000
    64.3 @@ -0,0 +1,912 @@
    64.4 +/*
    64.5 + *  boot.c - Architecture-Specific Low-Level ACPI Boot Support
    64.6 + *
    64.7 + *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
    64.8 + *  Copyright (C) 2001 Jun Nakajima <jun.nakajima@intel.com>
    64.9 + *
   64.10 + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   64.11 + *
   64.12 + *  This program is free software; you can redistribute it and/or modify
   64.13 + *  it under the terms of the GNU General Public License as published by
   64.14 + *  the Free Software Foundation; either version 2 of the License, or
   64.15 + *  (at your option) any later version.
   64.16 + *
   64.17 + *  This program is distributed in the hope that it will be useful,
   64.18 + *  but WITHOUT ANY WARRANTY; without even the implied warranty of
   64.19 + *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   64.20 + *  GNU General Public License for more details.
   64.21 + *
   64.22 + *  You should have received a copy of the GNU General Public License
   64.23 + *  along with this program; if not, write to the Free Software
   64.24 + *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   64.25 + *
   64.26 + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   64.27 + */
   64.28 +
   64.29 +#include <xen/config.h>
   64.30 +#include <xen/errno.h>
   64.31 +#include <xen/init.h>
   64.32 +#include <xen/acpi.h>
   64.33 +#include <xen/irq.h>
   64.34 +#include <xen/sched.h>
   64.35 +#include <asm/page.h>
   64.36 +#include <asm/apic.h>
   64.37 +#include <asm/io_apic.h>
   64.38 +#include <asm/apic.h>
   64.39 +#include <asm/io.h>
   64.40 +#include <asm/irq.h>
   64.41 +#include <asm/mpspec.h>
   64.42 +
   64.43 +int sbf_port; /* XXX XEN */
   64.44 +
   64.45 +#ifdef	CONFIG_X86_64
   64.46 +
   64.47 +static inline void  acpi_madt_oem_check(char *oem_id, char *oem_table_id) { }
   64.48 +extern void __init clustered_apic_check(void);
   64.49 +static inline int ioapic_setup_disabled(void) { return 0; }
   64.50 +#include <asm/proto.h>
   64.51 +
   64.52 +#else	/* X86 */
   64.53 +
   64.54 +#ifdef	CONFIG_X86_LOCAL_APIC
   64.55 +#include <mach_apic.h>
   64.56 +#include <mach_mpparse.h>
   64.57 +#endif	/* CONFIG_X86_LOCAL_APIC */
   64.58 +
   64.59 +#endif	/* X86 */
   64.60 +
   64.61 +#define BAD_MADT_ENTRY(entry, end) (					    \
   64.62 +		(!entry) || (unsigned long)entry + sizeof(*entry) > end ||  \
   64.63 +		((acpi_table_entry_header *)entry)->length != sizeof(*entry))
   64.64 +
   64.65 +#define PREFIX			"ACPI: "
   64.66 +
   64.67 +#ifdef CONFIG_ACPI_PCI
   64.68 +int acpi_noirq __initdata;	/* skip ACPI IRQ initialization */
   64.69 +int acpi_pci_disabled __initdata; /* skip ACPI PCI scan and IRQ initialization */
   64.70 +#else
   64.71 +int acpi_noirq __initdata = 1;
   64.72 +int acpi_pci_disabled __initdata = 1;
   64.73 +#endif
   64.74 +int acpi_ht __initdata = 1;	/* enable HT */
   64.75 +
   64.76 +int acpi_lapic;
   64.77 +int acpi_ioapic;
   64.78 +int acpi_strict;
   64.79 +EXPORT_SYMBOL(acpi_strict);
   64.80 +
   64.81 +acpi_interrupt_flags acpi_sci_flags __initdata;
   64.82 +int acpi_sci_override_gsi __initdata;
   64.83 +int acpi_skip_timer_override __initdata;
   64.84 +
   64.85 +#ifdef CONFIG_X86_LOCAL_APIC
   64.86 +static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
   64.87 +#endif
   64.88 +
   64.89 +#ifndef __HAVE_ARCH_CMPXCHG
   64.90 +#warning ACPI uses CMPXCHG, i486 and later hardware
   64.91 +#endif
   64.92 +
   64.93 +#define MAX_MADT_ENTRIES	256
   64.94 +u8 x86_acpiid_to_apicid[MAX_MADT_ENTRIES] =
   64.95 +			{ [0 ... MAX_MADT_ENTRIES-1] = 0xff };
   64.96 +EXPORT_SYMBOL(x86_acpiid_to_apicid);
   64.97 +
   64.98 +/* --------------------------------------------------------------------------
   64.99 +                              Boot-time Configuration
  64.100 +   -------------------------------------------------------------------------- */
  64.101 +
  64.102 +/*
  64.103 + * The default interrupt routing model is PIC (8259).  This gets
  64.104 + * overriden if IOAPICs are enumerated (below).
  64.105 + */
  64.106 +enum acpi_irq_model_id		acpi_irq_model = ACPI_IRQ_MODEL_PIC;
  64.107 +
  64.108 +#ifdef	CONFIG_X86_64
  64.109 +
  64.110 +/* rely on all ACPI tables being in the direct mapping */
  64.111 +char *__acpi_map_table(unsigned long phys_addr, unsigned long size)
  64.112 +{
  64.113 +	if (!phys_addr || !size)
  64.114 +	return NULL;
  64.115 +
  64.116 +	if (phys_addr < (end_pfn_map << PAGE_SHIFT))
  64.117 +		return __va(phys_addr);
  64.118 +
  64.119 +	return NULL;
  64.120 +}
  64.121 +
  64.122 +#else
  64.123 +
  64.124 +/*
  64.125 + * Temporarily use the virtual area starting from FIX_IO_APIC_BASE_END,
  64.126 + * to map the target physical address. The problem is that set_fixmap()
  64.127 + * provides a single page, and it is possible that the page is not
  64.128 + * sufficient.
  64.129 + * By using this area, we can map up to MAX_IO_APICS pages temporarily,
  64.130 + * i.e. until the next __va_range() call.
  64.131 + *
  64.132 + * Important Safety Note:  The fixed I/O APIC page numbers are *subtracted*
  64.133 + * from the fixed base.  That's why we start at FIX_IO_APIC_BASE_END and
  64.134 + * count idx down while incrementing the phys address.
  64.135 + */
  64.136 +char *__acpi_map_table(unsigned long phys, unsigned long size)
  64.137 +{
  64.138 +	unsigned long base, offset, mapped_size;
  64.139 +	int idx;
  64.140 +
  64.141 +	if (phys + size < 8*1024*1024) 
  64.142 +		return __va(phys); 
  64.143 +
  64.144 +	offset = phys & (PAGE_SIZE - 1);
  64.145 +	mapped_size = PAGE_SIZE - offset;
  64.146 +	set_fixmap(FIX_ACPI_END, phys);
  64.147 +	base = fix_to_virt(FIX_ACPI_END);
  64.148 +
  64.149 +	/*
  64.150 +	 * Most cases can be covered by the below.
  64.151 +	 */
  64.152 +	idx = FIX_ACPI_END;
  64.153 +	while (mapped_size < size) {
  64.154 +		if (--idx < FIX_ACPI_BEGIN)
  64.155 +			return NULL;	/* cannot handle this */
  64.156 +		phys += PAGE_SIZE;
  64.157 +		set_fixmap(idx, phys);
  64.158 +		mapped_size += PAGE_SIZE;
  64.159 +	}
  64.160 +
  64.161 +	return ((unsigned char *) base + offset);
  64.162 +}
  64.163 +#endif
  64.164 +
  64.165 +#ifdef CONFIG_PCI_MMCONFIG
  64.166 +static int __init acpi_parse_mcfg(unsigned long phys_addr, unsigned long size)
  64.167 +{
  64.168 +	struct acpi_table_mcfg *mcfg;
  64.169 +
  64.170 +	if (!phys_addr || !size)
  64.171 +		return -EINVAL;
  64.172 +
  64.173 +	mcfg = (struct acpi_table_mcfg *) __acpi_map_table(phys_addr, size);
  64.174 +	if (!mcfg) {
  64.175 +		printk(KERN_WARNING PREFIX "Unable to map MCFG\n");
  64.176 +		return -ENODEV;
  64.177 +	}
  64.178 +
  64.179 +	if (mcfg->base_reserved) {
  64.180 +		printk(KERN_ERR PREFIX "MMCONFIG not in low 4GB of memory\n");
  64.181 +		return -ENODEV;
  64.182 +	}
  64.183 +
  64.184 +	pci_mmcfg_base_addr = mcfg->base_address;
  64.185 +
  64.186 +	return 0;
  64.187 +}
  64.188 +#else
  64.189 +#define	acpi_parse_mcfg NULL
  64.190 +#endif /* !CONFIG_PCI_MMCONFIG */
  64.191 +
  64.192 +#ifdef CONFIG_X86_LOCAL_APIC
  64.193 +static int __init
  64.194 +acpi_parse_madt (
  64.195 +	unsigned long		phys_addr,
  64.196 +	unsigned long		size)
  64.197 +{
  64.198 +	struct acpi_table_madt	*madt = NULL;
  64.199 +
  64.200 +	if (!phys_addr || !size)
  64.201 +		return -EINVAL;
  64.202 +
  64.203 +	madt = (struct acpi_table_madt *) __acpi_map_table(phys_addr, size);
  64.204 +	if (!madt) {
  64.205 +		printk(KERN_WARNING PREFIX "Unable to map MADT\n");
  64.206 +		return -ENODEV;
  64.207 +	}
  64.208 +
  64.209 +	if (madt->lapic_address) {
  64.210 +		acpi_lapic_addr = (u64) madt->lapic_address;
  64.211 +
  64.212 +		printk(KERN_DEBUG PREFIX "Local APIC address 0x%08x\n",
  64.213 +			madt->lapic_address);
  64.214 +	}
  64.215 +
  64.216 +	acpi_madt_oem_check(madt->header.oem_id, madt->header.oem_table_id);
  64.217 +	
  64.218 +	return 0;
  64.219 +}
  64.220 +
  64.221 +
  64.222 +static int __init
  64.223 +acpi_parse_lapic (
  64.224 +	acpi_table_entry_header *header, const unsigned long end)
  64.225 +{
  64.226 +	struct acpi_table_lapic	*processor = NULL;
  64.227 +
  64.228 +	processor = (struct acpi_table_lapic*) header;
  64.229 +
  64.230 +	if (BAD_MADT_ENTRY(processor, end))
  64.231 +		return -EINVAL;
  64.232 +
  64.233 +	acpi_table_print_madt_entry(header);
  64.234 +
  64.235 +	/* no utility in registering a disabled processor */
  64.236 +	if (processor->flags.enabled == 0)
  64.237 +		return 0;
  64.238 +
  64.239 +	x86_acpiid_to_apicid[processor->acpi_id] = processor->id;
  64.240 +
  64.241 +	mp_register_lapic (
  64.242 +		processor->id,					   /* APIC ID */
  64.243 +		processor->flags.enabled);			  /* Enabled? */
  64.244 +
  64.245 +	return 0;
  64.246 +}
  64.247 +
  64.248 +static int __init
  64.249 +acpi_parse_lapic_addr_ovr (
  64.250 +	acpi_table_entry_header *header, const unsigned long end)
  64.251 +{
  64.252 +	struct acpi_table_lapic_addr_ovr *lapic_addr_ovr = NULL;
  64.253 +
  64.254 +	lapic_addr_ovr = (struct acpi_table_lapic_addr_ovr*) header;
  64.255 +
  64.256 +	if (BAD_MADT_ENTRY(lapic_addr_ovr, end))
  64.257 +		return -EINVAL;
  64.258 +
  64.259 +	acpi_lapic_addr = lapic_addr_ovr->address;
  64.260 +
  64.261 +	return 0;
  64.262 +}
  64.263 +
  64.264 +static int __init
  64.265 +acpi_parse_lapic_nmi (
  64.266 +	acpi_table_entry_header *header, const unsigned long end)
  64.267 +{
  64.268 +	struct acpi_table_lapic_nmi *lapic_nmi = NULL;
  64.269 +
  64.270 +	lapic_nmi = (struct acpi_table_lapic_nmi*) header;
  64.271 +
  64.272 +	if (BAD_MADT_ENTRY(lapic_nmi, end))
  64.273 +		return -EINVAL;
  64.274 +
  64.275 +	acpi_table_print_madt_entry(header);
  64.276 +
  64.277 +	if (lapic_nmi->lint != 1)
  64.278 +		printk(KERN_WARNING PREFIX "NMI not connected to LINT 1!\n");
  64.279 +
  64.280 +	return 0;
  64.281 +}
  64.282 +
  64.283 +
  64.284 +#endif /*CONFIG_X86_LOCAL_APIC*/
  64.285 +
  64.286 +#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_ACPI_INTERPRETER)
  64.287 +
  64.288 +static int __init
  64.289 +acpi_parse_ioapic (
  64.290 +	acpi_table_entry_header *header, const unsigned long end)
  64.291 +{
  64.292 +	struct acpi_table_ioapic *ioapic = NULL;
  64.293 +
  64.294 +	ioapic = (struct acpi_table_ioapic*) header;
  64.295 +
  64.296 +	if (BAD_MADT_ENTRY(ioapic, end))
  64.297 +		return -EINVAL;
  64.298 + 
  64.299 +	acpi_table_print_madt_entry(header);
  64.300 +
  64.301 +	mp_register_ioapic (
  64.302 +		ioapic->id,
  64.303 +		ioapic->address,
  64.304 +		ioapic->global_irq_base);
  64.305 + 
  64.306 +	return 0;
  64.307 +}
  64.308 +
  64.309 +/*
  64.310 + * Parse Interrupt Source Override for the ACPI SCI
  64.311 + */
  64.312 +static void
  64.313 +acpi_sci_ioapic_setup(u32 gsi, u16 polarity, u16 trigger)
  64.314 +{
  64.315 +	if (trigger == 0)	/* compatible SCI trigger is level */
  64.316 +		trigger = 3;
  64.317 +
  64.318 +	if (polarity == 0)	/* compatible SCI polarity is low */
  64.319 +		polarity = 3;
  64.320 +
  64.321 +	/* Command-line over-ride via acpi_sci= */
  64.322 +	if (acpi_sci_flags.trigger)
  64.323 +		trigger = acpi_sci_flags.trigger;
  64.324 +
  64.325 +	if (acpi_sci_flags.polarity)
  64.326 +		polarity = acpi_sci_flags.polarity;
  64.327 +
  64.328 +	/*
  64.329 + 	 * mp_config_acpi_legacy_irqs() already setup IRQs < 16
  64.330 +	 * If GSI is < 16, this will update its flags,
  64.331 +	 * else it will create a new mp_irqs[] entry.
  64.332 +	 */
  64.333 +	mp_override_legacy_irq(gsi, polarity, trigger, gsi);
  64.334 +
  64.335 +	/*
  64.336 +	 * stash over-ride to indicate we've been here
  64.337 +	 * and for later update of acpi_fadt
  64.338 +	 */
  64.339 +	acpi_sci_override_gsi = gsi;
  64.340 +	return;
  64.341 +}
  64.342 +
  64.343 +static int __init
  64.344 +acpi_parse_int_src_ovr (
  64.345 +	acpi_table_entry_header *header, const unsigned long end)
  64.346 +{
  64.347 +	struct acpi_table_int_src_ovr *intsrc = NULL;
  64.348 +
  64.349 +	intsrc = (struct acpi_table_int_src_ovr*) header;
  64.350 +
  64.351 +	if (BAD_MADT_ENTRY(intsrc, end))
  64.352 +		return -EINVAL;
  64.353 +
  64.354 +	acpi_table_print_madt_entry(header);
  64.355 +
  64.356 +	if (intsrc->bus_irq == acpi_fadt.sci_int) {
  64.357 +		acpi_sci_ioapic_setup(intsrc->global_irq,
  64.358 +			intsrc->flags.polarity, intsrc->flags.trigger);
  64.359 +		return 0;
  64.360 +	}
  64.361 +
  64.362 +	if (acpi_skip_timer_override &&
  64.363 +		intsrc->bus_irq == 0 && intsrc->global_irq == 2) {
  64.364 +			printk(PREFIX "BIOS IRQ0 pin2 override ignored.\n");
  64.365 +			return 0;
  64.366 +	}
  64.367 +
  64.368 +	mp_override_legacy_irq (
  64.369 +		intsrc->bus_irq,
  64.370 +		intsrc->flags.polarity,
  64.371 +		intsrc->flags.trigger,
  64.372 +		intsrc->global_irq);
  64.373 +
  64.374 +	return 0;
  64.375 +}
  64.376 +
  64.377 +
  64.378 +static int __init
  64.379 +acpi_parse_nmi_src (
  64.380 +	acpi_table_entry_header *header, const unsigned long end)
  64.381 +{
  64.382 +	struct acpi_table_nmi_src *nmi_src = NULL;
  64.383 +
  64.384 +	nmi_src = (struct acpi_table_nmi_src*) header;
  64.385 +
  64.386 +	if (BAD_MADT_ENTRY(nmi_src, end))
  64.387 +		return -EINVAL;
  64.388 +
  64.389 +	acpi_table_print_madt_entry(header);
  64.390 +
  64.391 +	/* TBD: Support nimsrc entries? */
  64.392 +
  64.393 +	return 0;
  64.394 +}
  64.395 +
  64.396 +#endif /* CONFIG_X86_IO_APIC */
  64.397 +
  64.398 +#ifdef	CONFIG_ACPI_BUS
  64.399 +
  64.400 +/*
  64.401 + * acpi_pic_sci_set_trigger()
  64.402 + * 
  64.403 + * use ELCR to set PIC-mode trigger type for SCI
  64.404 + *
  64.405 + * If a PIC-mode SCI is not recognized or gives spurious IRQ7's
  64.406 + * it may require Edge Trigger -- use "acpi_sci=edge"
  64.407 + *
  64.408 + * Port 0x4d0-4d1 are ECLR1 and ECLR2, the Edge/Level Control Registers
  64.409 + * for the 8259 PIC.  bit[n] = 1 means irq[n] is Level, otherwise Edge.
  64.410 + * ECLR1 is IRQ's 0-7 (IRQ 0, 1, 2 must be 0)
  64.411 + * ECLR2 is IRQ's 8-15 (IRQ 8, 13 must be 0)
  64.412 + */
  64.413 +
  64.414 +void __init
  64.415 +acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger)
  64.416 +{
  64.417 +	unsigned int mask = 1 << irq;
  64.418 +	unsigned int old, new;
  64.419 +
  64.420 +	/* Real old ELCR mask */
  64.421 +	old = inb(0x4d0) | (inb(0x4d1) << 8);
  64.422 +
  64.423 +	/*
  64.424 +	 * If we use ACPI to set PCI irq's, then we should clear ELCR
  64.425 +	 * since we will set it correctly as we enable the PCI irq
  64.426 +	 * routing.
  64.427 +	 */
  64.428 +	new = acpi_noirq ? old : 0;
  64.429 +
  64.430 +	/*
  64.431 +	 * Update SCI information in the ELCR, it isn't in the PCI
  64.432 +	 * routing tables..
  64.433 +	 */
  64.434 +	switch (trigger) {
  64.435 +	case 1:	/* Edge - clear */
  64.436 +		new &= ~mask;
  64.437 +		break;
  64.438 +	case 3: /* Level - set */
  64.439 +		new |= mask;
  64.440 +		break;
  64.441 +	}
  64.442 +
  64.443 +	if (old == new)
  64.444 +		return;
  64.445 +
  64.446 +	printk(PREFIX "setting ELCR to %04x (from %04x)\n", new, old);
  64.447 +	outb(new, 0x4d0);
  64.448 +	outb(new >> 8, 0x4d1);
  64.449 +}
  64.450 +
  64.451 +
  64.452 +#endif /* CONFIG_ACPI_BUS */
  64.453 +
  64.454 +int acpi_gsi_to_irq(u32 gsi, unsigned int *irq)
  64.455 +{
  64.456 +#ifdef CONFIG_X86_IO_APIC
  64.457 +	if (use_pci_vector() && !platform_legacy_irq(gsi))
  64.458 + 		*irq = IO_APIC_VECTOR(gsi);
  64.459 +	else
  64.460 +#endif
  64.461 +		*irq = gsi;
  64.462 +	return 0;
  64.463 +}
  64.464 +
  64.465 +unsigned int acpi_register_gsi(u32 gsi, int edge_level, int active_high_low)
  64.466 +{
  64.467 +	unsigned int irq;
  64.468 +	unsigned int plat_gsi = gsi;
  64.469 +
  64.470 +#ifdef CONFIG_PCI
  64.471 +	/*
  64.472 +	 * Make sure all (legacy) PCI IRQs are set as level-triggered.
  64.473 +	 */
  64.474 +	if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) {
  64.475 +		extern void eisa_set_level_irq(unsigned int irq);
  64.476 +
  64.477 +		if (edge_level == ACPI_LEVEL_SENSITIVE)
  64.478 +				eisa_set_level_irq(gsi);
  64.479 +	}
  64.480 +#endif
  64.481 +
  64.482 +#ifdef CONFIG_X86_IO_APIC
  64.483 +	if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) {
  64.484 +		plat_gsi = mp_register_gsi(gsi, edge_level, active_high_low);
  64.485 +	}
  64.486 +#endif
  64.487 +	acpi_gsi_to_irq(plat_gsi, &irq);
  64.488 +	return irq;
  64.489 +}
  64.490 +EXPORT_SYMBOL(acpi_register_gsi);
  64.491 +
  64.492 +/*
  64.493 + *  ACPI based hotplug support for CPU
  64.494 + */
  64.495 +#ifdef CONFIG_ACPI_HOTPLUG_CPU
  64.496 +int
  64.497 +acpi_map_lsapic(acpi_handle handle, int *pcpu)
  64.498 +{
  64.499 +	/* TBD */
  64.500 +	return -EINVAL;
  64.501 +}
  64.502 +EXPORT_SYMBOL(acpi_map_lsapic);
  64.503 +
  64.504 +
  64.505 +int
  64.506 +acpi_unmap_lsapic(int cpu)
  64.507 +{
  64.508 +	/* TBD */
  64.509 +	return -EINVAL;
  64.510 +}
  64.511 +EXPORT_SYMBOL(acpi_unmap_lsapic);
  64.512 +#endif /* CONFIG_ACPI_HOTPLUG_CPU */
  64.513 +
  64.514 +static unsigned long __init
  64.515 +acpi_scan_rsdp (
  64.516 +	unsigned long		start,
  64.517 +	unsigned long		length)
  64.518 +{
  64.519 +	unsigned long		offset = 0;
  64.520 +	unsigned long		sig_len = sizeof("RSD PTR ") - 1;
  64.521 +
  64.522 +	/*
  64.523 +	 * Scan all 16-byte boundaries of the physical memory region for the
  64.524 +	 * RSDP signature.
  64.525 +	 */
  64.526 +	for (offset = 0; offset < length; offset += 16) {
  64.527 +		if (strncmp((char *) (start + offset), "RSD PTR ", sig_len))
  64.528 +			continue;
  64.529 +		return (start + offset);
  64.530 +	}
  64.531 +
  64.532 +	return 0;
  64.533 +}
  64.534 +
  64.535 +static int __init acpi_parse_sbf(unsigned long phys_addr, unsigned long size)
  64.536 +{
  64.537 +	struct acpi_table_sbf *sb;
  64.538 +
  64.539 +	if (!phys_addr || !size)
  64.540 +	return -EINVAL;
  64.541 +
  64.542 +	sb = (struct acpi_table_sbf *) __acpi_map_table(phys_addr, size);
  64.543 +	if (!sb) {
  64.544 +		printk(KERN_WARNING PREFIX "Unable to map SBF\n");
  64.545 +		return -ENODEV;
  64.546 +	}
  64.547 +
  64.548 +	sbf_port = sb->sbf_cmos; /* Save CMOS port */
  64.549 +
  64.550 +	return 0;
  64.551 +}
  64.552 +
  64.553 +
  64.554 +#ifdef CONFIG_HPET_TIMER
  64.555 +
  64.556 +static int __init acpi_parse_hpet(unsigned long phys, unsigned long size)
  64.557 +{
  64.558 +	struct acpi_table_hpet *hpet_tbl;
  64.559 +
  64.560 +	if (!phys || !size)
  64.561 +		return -EINVAL;
  64.562 +
  64.563 +	hpet_tbl = (struct acpi_table_hpet *) __acpi_map_table(phys, size);
  64.564 +	if (!hpet_tbl) {
  64.565 +		printk(KERN_WARNING PREFIX "Unable to map HPET\n");
  64.566 +		return -ENODEV;
  64.567 +	}
  64.568 +
  64.569 +	if (hpet_tbl->addr.space_id != ACPI_SPACE_MEM) {
  64.570 +		printk(KERN_WARNING PREFIX "HPET timers must be located in "
  64.571 +		       "memory.\n");
  64.572 +		return -1;
  64.573 +	}
  64.574 +
  64.575 +#ifdef	CONFIG_X86_64
  64.576 +        vxtime.hpet_address = hpet_tbl->addr.addrl |
  64.577 +                ((long) hpet_tbl->addr.addrh << 32);
  64.578 +
  64.579 +        printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n",
  64.580 +               hpet_tbl->id, vxtime.hpet_address);
  64.581 +#else	/* X86 */
  64.582 +	{
  64.583 +		extern unsigned long hpet_address;
  64.584 +
  64.585 +		hpet_address = hpet_tbl->addr.addrl;
  64.586 +		printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n",
  64.587 +			hpet_tbl->id, hpet_address);
  64.588 +	}
  64.589 +#endif	/* X86 */
  64.590 +
  64.591 +	return 0;
  64.592 +}
  64.593 +#else
  64.594 +#define	acpi_parse_hpet	NULL
  64.595 +#endif
  64.596 +
  64.597 +#ifdef CONFIG_X86_PM_TIMER
  64.598 +extern u32 pmtmr_ioport;
  64.599 +#endif
  64.600 +
  64.601 +static int __init acpi_parse_fadt(unsigned long phys, unsigned long size)
  64.602 +{
  64.603 +	struct fadt_descriptor_rev2 *fadt = NULL;
  64.604 +
  64.605 +	fadt = (struct fadt_descriptor_rev2*) __acpi_map_table(phys,size);
  64.606 +	if(!fadt) {
  64.607 +		printk(KERN_WARNING PREFIX "Unable to map FADT\n");
  64.608 +		return 0;
  64.609 +	}
  64.610 +
  64.611 +#ifdef	CONFIG_ACPI_INTERPRETER
  64.612 +	/* initialize sci_int early for INT_SRC_OVR MADT parsing */
  64.613 +	acpi_fadt.sci_int = fadt->sci_int;
  64.614 +#endif
  64.615 +
  64.616 +#ifdef CONFIG_X86_PM_TIMER
  64.617 +	/* detect the location of the ACPI PM Timer */
  64.618 +	if (fadt->revision >= FADT2_REVISION_ID) {
  64.619 +		/* FADT rev. 2 */
  64.620 +		if (fadt->xpm_tmr_blk.address_space_id != ACPI_ADR_SPACE_SYSTEM_IO)
  64.621 +			return 0;
  64.622 +
  64.623 +		pmtmr_ioport = fadt->xpm_tmr_blk.address;
  64.624 +	} else {
  64.625 +		/* FADT rev. 1 */
  64.626 +		pmtmr_ioport = fadt->V1_pm_tmr_blk;
  64.627 +	}
  64.628 +	if (pmtmr_ioport)
  64.629 +		printk(KERN_INFO PREFIX "PM-Timer IO Port: %#x\n", pmtmr_ioport);
  64.630 +#endif
  64.631 +	return 0;
  64.632 +}
  64.633 +
  64.634 +
  64.635 +unsigned long __init
  64.636 +acpi_find_rsdp (void)
  64.637 +{
  64.638 +	unsigned long		rsdp_phys = 0;
  64.639 +
  64.640 +#if 0
  64.641 +	if (efi_enabled) {
  64.642 +		if (efi.acpi20)
  64.643 +			return __pa(efi.acpi20);
  64.644 +		else if (efi.acpi)
  64.645 +			return __pa(efi.acpi);
  64.646 +	}
  64.647 +#endif
  64.648 +	/*
  64.649 +	 * Scan memory looking for the RSDP signature. First search EBDA (low
  64.650 +	 * memory) paragraphs and then search upper memory (E0000-FFFFF).
  64.651 +	 */
  64.652 +	rsdp_phys = acpi_scan_rsdp (0, 0x400);
  64.653 +	if (!rsdp_phys)
  64.654 +		rsdp_phys = acpi_scan_rsdp (0xE0000, 0xFFFFF);
  64.655 +
  64.656 +	return rsdp_phys;
  64.657 +}
  64.658 +
  64.659 +#ifdef	CONFIG_X86_LOCAL_APIC
  64.660 +/*
  64.661 + * Parse LAPIC entries in MADT
  64.662 + * returns 0 on success, < 0 on error
  64.663 + */
  64.664 +static int __init
  64.665 +acpi_parse_madt_lapic_entries(void)
  64.666 +{
  64.667 +	int count;
  64.668 +
  64.669 +	/* 
  64.670 +	 * Note that the LAPIC address is obtained from the MADT (32-bit value)
  64.671 +	 * and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value).
  64.672 +	 */
  64.673 +
  64.674 +	count = acpi_table_parse_madt(ACPI_MADT_LAPIC_ADDR_OVR, acpi_parse_lapic_addr_ovr, 0);
  64.675 +	if (count < 0) {
  64.676 +		printk(KERN_ERR PREFIX "Error parsing LAPIC address override entry\n");
  64.677 +		return count;
  64.678 +	}
  64.679 +
  64.680 +	mp_register_lapic_address(acpi_lapic_addr);
  64.681 +
  64.682 +	count = acpi_table_parse_madt(ACPI_MADT_LAPIC, acpi_parse_lapic,
  64.683 +				       MAX_APICS);
  64.684 +	if (!count) { 
  64.685 +		printk(KERN_ERR PREFIX "No LAPIC entries present\n");
  64.686 +		/* TBD: Cleanup to allow fallback to MPS */
  64.687 +		return -ENODEV;
  64.688 +	}
  64.689 +	else if (count < 0) {
  64.690 +		printk(KERN_ERR PREFIX "Error parsing LAPIC entry\n");
  64.691 +		/* TBD: Cleanup to allow fallback to MPS */
  64.692 +		return count;
  64.693 +	}
  64.694 +
  64.695 +	count = acpi_table_parse_madt(ACPI_MADT_LAPIC_NMI, acpi_parse_lapic_nmi, 0);
  64.696 +	if (count < 0) {
  64.697 +		printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n");
  64.698 +		/* TBD: Cleanup to allow fallback to MPS */
  64.699 +		return count;
  64.700 +	}
  64.701 +	return 0;
  64.702 +}
  64.703 +#endif /* CONFIG_X86_LOCAL_APIC */
  64.704 +
  64.705 +#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_ACPI_INTERPRETER)
  64.706 +/*
  64.707 + * Parse IOAPIC related entries in MADT
  64.708 + * returns 0 on success, < 0 on error
  64.709 + */
  64.710 +static int __init
  64.711 +acpi_parse_madt_ioapic_entries(void)
  64.712 +{
  64.713 +	int count;
  64.714 +
  64.715 +	/*
  64.716 +	 * ACPI interpreter is required to complete interrupt setup,
  64.717 +	 * so if it is off, don't enumerate the io-apics with ACPI.
  64.718 +	 * If MPS is present, it will handle them,
  64.719 +	 * otherwise the system will stay in PIC mode
  64.720 +	 */
  64.721 +	if (acpi_disabled || acpi_noirq) {
  64.722 +		return -ENODEV;
  64.723 +        }
  64.724 +
  64.725 +	/*
  64.726 + 	 * if "noapic" boot option, don't look for IO-APICs
  64.727 +	 */
  64.728 +	if (skip_ioapic_setup) {
  64.729 +		printk(KERN_INFO PREFIX "Skipping IOAPIC probe "
  64.730 +			"due to 'noapic' option.\n");
  64.731 +		return -ENODEV;
  64.732 +	}
  64.733 +
  64.734 +	count = acpi_table_parse_madt(ACPI_MADT_IOAPIC, acpi_parse_ioapic, MAX_IO_APICS);
  64.735 +	if (!count) {
  64.736 +		printk(KERN_ERR PREFIX "No IOAPIC entries present\n");
  64.737 +		return -ENODEV;
  64.738 +	}
  64.739 +	else if (count < 0) {
  64.740 +		printk(KERN_ERR PREFIX "Error parsing IOAPIC entry\n");
  64.741 +		return count;
  64.742 +	}
  64.743 +
  64.744 +	count = acpi_table_parse_madt(ACPI_MADT_INT_SRC_OVR, acpi_parse_int_src_ovr, NR_IRQ_VECTORS);
  64.745 +	if (count < 0) {
  64.746 +		printk(KERN_ERR PREFIX "Error parsing interrupt source overrides entry\n");
  64.747 +		/* TBD: Cleanup to allow fallback to MPS */
  64.748 +		return count;
  64.749 +	}
  64.750 +
  64.751 +	/*
  64.752 +	 * If BIOS did not supply an INT_SRC_OVR for the SCI
  64.753 +	 * pretend we got one so we can set the SCI flags.
  64.754 +	 */
  64.755 +	if (!acpi_sci_override_gsi)
  64.756 +		acpi_sci_ioapic_setup(acpi_fadt.sci_int, 0, 0);
  64.757 +
  64.758 +	/* Fill in identity legacy mapings where no override */
  64.759 +	mp_config_acpi_legacy_irqs();
  64.760 +
  64.761 +	count = acpi_table_parse_madt(ACPI_MADT_NMI_SRC, acpi_parse_nmi_src, NR_IRQ_VECTORS);
  64.762 +	if (count < 0) {
  64.763 +		printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n");
  64.764 +		/* TBD: Cleanup to allow fallback to MPS */
  64.765 +		return count;
  64.766 +	}
  64.767 +
  64.768 +	return 0;
  64.769 +}
  64.770 +#else
  64.771 +static inline int acpi_parse_madt_ioapic_entries(void)
  64.772 +{
  64.773 +	return -1;
  64.774 +}
  64.775 +#endif /* !(CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER) */
  64.776 +
  64.777 +
  64.778 +static void __init
  64.779 +acpi_process_madt(void)
  64.780 +{
  64.781 +#ifdef CONFIG_X86_LOCAL_APIC
  64.782 +	int count, error;
  64.783 +
  64.784 +	count = acpi_table_parse(ACPI_APIC, acpi_parse_madt);
  64.785 +	if (count >= 1) {
  64.786 +
  64.787 +		/*
  64.788 +		 * Parse MADT LAPIC entries
  64.789 +		 */
  64.790 +		error = acpi_parse_madt_lapic_entries();
  64.791 +		if (!error) {
  64.792 +			acpi_lapic = 1;
  64.793 +
  64.794 +			/*
  64.795 +			 * Parse MADT IO-APIC entries
  64.796 +			 */
  64.797 +			error = acpi_parse_madt_ioapic_entries();
  64.798 +			if (!error) {
  64.799 +				acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC;
  64.800 +				acpi_irq_balance_set(NULL);
  64.801 +				acpi_ioapic = 1;
  64.802 +
  64.803 +				smp_found_config = 1;
  64.804 +				clustered_apic_check();
  64.805 +			}
  64.806 +		}
  64.807 +		if (error == -EINVAL) {
  64.808 +			/*
  64.809 +			 * Dell Precision Workstation 410, 610 come here.
  64.810 +			 */
  64.811 +			printk(KERN_ERR PREFIX "Invalid BIOS MADT, disabling ACPI\n");
  64.812 +			disable_acpi();
  64.813 +		}
  64.814 +	}
  64.815 +#endif
  64.816 +	return;
  64.817 +}
  64.818 +
  64.819 +/*
  64.820 + * acpi_boot_table_init() and acpi_boot_init()
  64.821 + *  called from setup_arch(), always.
  64.822 + *	1. checksums all tables
  64.823 + *	2. enumerates lapics
  64.824 + *	3. enumerates io-apics
  64.825 + *
  64.826 + * acpi_table_init() is separate to allow reading SRAT without
  64.827 + * other side effects.
  64.828 + *
  64.829 + * side effects of acpi_boot_init:
  64.830 + *	acpi_lapic = 1 if LAPIC found
  64.831 + *	acpi_ioapic = 1 if IOAPIC found
  64.832 + *	if (acpi_lapic && acpi_ioapic) smp_found_config = 1;
  64.833 + *	if acpi_blacklisted() acpi_disabled = 1;
  64.834 + *	acpi_irq_model=...
  64.835 + *	...
  64.836 + *
  64.837 + * return value: (currently ignored)
  64.838 + *	0: success
  64.839 + *	!0: failure
  64.840 + */
  64.841 +
  64.842 +int __init
  64.843 +acpi_boot_table_init(void)
  64.844 +{
  64.845 +	int error;
  64.846 +
  64.847 +	/*
  64.848 +	 * If acpi_disabled, bail out
  64.849 +	 * One exception: acpi=ht continues far enough to enumerate LAPICs
  64.850 +	 */
  64.851 +	if (acpi_disabled && !acpi_ht)
  64.852 +		 return 1;
  64.853 +
  64.854 +	/* 
  64.855 +	 * Initialize the ACPI boot-time table parser.
  64.856 +	 */
  64.857 +	error = acpi_table_init();
  64.858 +	if (error) {
  64.859 +		disable_acpi();
  64.860 +		return error;
  64.861 +	}
  64.862 +
  64.863 +#if 0 /*def __i386__*/
  64.864 +	check_acpi_pci();
  64.865 +#endif
  64.866 +
  64.867 +	acpi_table_parse(ACPI_BOOT, acpi_parse_sbf);
  64.868 +
  64.869 +	/*
  64.870 +	 * blacklist may disable ACPI entirely
  64.871 +	 */
  64.872 +	error = acpi_blacklisted();
  64.873 +	if (error) {
  64.874 +		extern int acpi_force;
  64.875 +
  64.876 +		if (acpi_force) {
  64.877 +			printk(KERN_WARNING PREFIX "acpi=force override\n");
  64.878 +		} else {
  64.879 +			printk(KERN_WARNING PREFIX "Disabling ACPI support\n");
  64.880 +			disable_acpi();
  64.881 +			return error;
  64.882 +		}
  64.883 +	}
  64.884 +
  64.885 +	return 0;
  64.886 +}
  64.887 +
  64.888 +
  64.889 +int __init acpi_boot_init(void)
  64.890 +{
  64.891 +	/*
  64.892 +	 * If acpi_disabled, bail out
  64.893 +	 * One exception: acpi=ht continues far enough to enumerate LAPICs
  64.894 +	 */
  64.895 +	if (acpi_disabled && !acpi_ht)
  64.896 +		 return 1;
  64.897 +
  64.898 +	acpi_table_parse(ACPI_BOOT, acpi_parse_sbf);
  64.899 +
  64.900 +	/*
  64.901 +	 * set sci_int and PM timer address
  64.902 +	 */
  64.903 +	acpi_table_parse(ACPI_FADT, acpi_parse_fadt);
  64.904 +
  64.905 +	/*
  64.906 +	 * Process the Multiple APIC Description Table (MADT), if present
  64.907 +	 */
  64.908 +	acpi_process_madt();
  64.909 +
  64.910 +	acpi_table_parse(ACPI_HPET, acpi_parse_hpet);
  64.911 +	acpi_table_parse(ACPI_MCFG, acpi_parse_mcfg);
  64.912 +
  64.913 +	return 0;
  64.914 +}
  64.915 +
    65.1 --- a/xen/arch/x86/apic.c	Tue May 03 14:42:00 2005 +0000
    65.2 +++ b/xen/arch/x86/apic.c	Tue May 10 08:49:59 2005 +0000
    65.3 @@ -34,14 +34,27 @@
    65.4  #include <asm/hardirq.h>
    65.5  #include <asm/apic.h>
    65.6  #include <asm/io_apic.h>
    65.7 -#include <asm/mach_apic.h>
    65.8 -#include <asm/io_ports.h>
    65.9 +#include <mach_apic.h>
   65.10 +#include <io_ports.h>
   65.11  
   65.12  /* Using APIC to generate smp_local_timer_interrupt? */
   65.13  int using_apic_timer = 0;
   65.14  
   65.15 +int apic_verbosity;
   65.16 +
   65.17  static int enabled_via_apicbase;
   65.18  
   65.19 +int get_physical_broadcast(void)
   65.20 +{
   65.21 +    unsigned int lvr, version;
   65.22 +    lvr = apic_read(APIC_LVR);
   65.23 +    version = GET_APIC_VERSION(lvr);
   65.24 +    if (!APIC_INTEGRATED(version) || version >= 0x14)
   65.25 +        return 0xff;
   65.26 +    else
   65.27 +        return 0xf;
   65.28 +}
   65.29 +
   65.30  int get_maxlvt(void)
   65.31  {
   65.32      unsigned int v, ver, maxlvt;
   65.33 @@ -907,7 +920,7 @@ int __init APIC_init_uniprocessor (void)
   65.34  #ifdef CONFIG_SMP
   65.35      cpu_online_map = 1;
   65.36  #endif
   65.37 -    phys_cpu_present_map = 1;
   65.38 +    phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid);
   65.39      apic_write_around(APIC_ID, boot_cpu_physical_apicid);
   65.40  
   65.41      setup_local_APIC();
    66.1 --- a/xen/arch/x86/audit.c	Tue May 03 14:42:00 2005 +0000
    66.2 +++ b/xen/arch/x86/audit.c	Tue May 10 08:49:59 2005 +0000
    66.3 @@ -49,7 +49,8 @@ static int l1, l2, oos_count, page_count
    66.4  int audit_adjust_pgtables(struct domain *d, int dir, int noisy)
    66.5  {
    66.6      int errors = 0;
    66.7 -    int shadow_enabled = shadow_mode_enabled(d) ? 1 : 0;
    66.8 +    int shadow_refcounts = !!shadow_mode_refcounts(d);
    66.9 +    int shadow_enabled = !!shadow_mode_enabled(d);
   66.10      int l2limit;
   66.11  
   66.12      void _adjust(struct pfn_info *page, int adjtype ADJUST_EXTRA_ARGS)
   66.13 @@ -119,7 +120,7 @@ int audit_adjust_pgtables(struct domain 
   66.14              page->count_info += dir;
   66.15      }
   66.16  
   66.17 -    void adjust_l2_page(unsigned long mfn)
   66.18 +    void adjust_l2_page(unsigned long mfn, int shadow)
   66.19      {
   66.20          unsigned long *pt = map_domain_mem(mfn << PAGE_SHIFT);
   66.21          int i;
   66.22 @@ -133,7 +134,7 @@ int audit_adjust_pgtables(struct domain 
   66.23  
   66.24                  if ( noisy )
   66.25                  {
   66.26 -                    if ( shadow_enabled )
   66.27 +                    if ( shadow )
   66.28                      {
   66.29                          if ( page_get_owner(l1page) != NULL )
   66.30                          {
   66.31 @@ -145,6 +146,17 @@ int audit_adjust_pgtables(struct domain 
   66.32                              errors++;
   66.33                              continue;
   66.34                          }
   66.35 +
   66.36 +                        u32 page_type = l1page->u.inuse.type_info & PGT_type_mask;
   66.37 +
   66.38 +                        if ( page_type != PGT_l1_shadow )
   66.39 +                        {
   66.40 +                            printk("Audit %d: [Shadow L2 mfn=%lx i=%x] "
   66.41 +                                   "Expected Shadow L1 t=%x mfn=%lx\n",
   66.42 +                                   d->id, mfn, i,
   66.43 +                                   l1page->u.inuse.type_info, l1mfn);
   66.44 +                            errors++;
   66.45 +                        }
   66.46                      }
   66.47                      else
   66.48                      {
   66.49 @@ -154,7 +166,9 @@ int audit_adjust_pgtables(struct domain 
   66.50                                     "belonging to other dom %p (id=%d)\n",
   66.51                                     l1mfn,
   66.52                                     page_get_owner(l1page),
   66.53 -                                   page_get_owner(l1page)->id);
   66.54 +                                   (page_get_owner(l1page)
   66.55 +                                    ? page_get_owner(l1page)->id
   66.56 +                                    : -1));
   66.57                              errors++;
   66.58                              continue;
   66.59                          }
   66.60 @@ -179,7 +193,7 @@ int audit_adjust_pgtables(struct domain 
   66.61                      }
   66.62                  }
   66.63  
   66.64 -                adjust(l1page, !shadow_enabled);
   66.65 +                adjust(l1page, !shadow);
   66.66              }
   66.67          }
   66.68  
   66.69 @@ -280,7 +294,7 @@ int audit_adjust_pgtables(struct domain 
   66.70                              errors++;
   66.71                          }
   66.72  
   66.73 -                        if ( shadow_enabled &&
   66.74 +                        if ( shadow_refcounts &&
   66.75                               page_is_page_table(gpage) &&
   66.76                               ! page_out_of_sync(gpage) )
   66.77                          {
   66.78 @@ -336,19 +350,21 @@ int audit_adjust_pgtables(struct domain 
   66.79                      break;
   66.80                  case PGT_l1_shadow:
   66.81                      adjust(pfn_to_page(gmfn), 0);
   66.82 -                    adjust_l1_page(smfn);
   66.83 +                    if ( shadow_refcounts )
   66.84 +                        adjust_l1_page(smfn);
   66.85                      if ( page->u.inuse.type_info & PGT_pinned )
   66.86                          adjust(page, 0);
   66.87                      break;
   66.88                  case PGT_hl2_shadow:
   66.89                      adjust(pfn_to_page(gmfn), 0);
   66.90 -                    adjust_hl2_page(smfn);
   66.91 +                    if ( shadow_refcounts )
   66.92 +                        adjust_hl2_page(smfn);
   66.93                      if ( page->u.inuse.type_info & PGT_pinned )
   66.94                          adjust(page, 0);
   66.95                      break;
   66.96                  case PGT_l2_shadow:
   66.97                      adjust(pfn_to_page(gmfn), 0);
   66.98 -                    adjust_l2_page(smfn);
   66.99 +                    adjust_l2_page(smfn, 1);
  66.100                      if ( page->u.inuse.type_info & PGT_pinned )
  66.101                          adjust(page, 0);
  66.102                      break;
  66.103 @@ -391,45 +407,43 @@ int audit_adjust_pgtables(struct domain 
  66.104          struct exec_domain *ed;
  66.105  
  66.106          for_each_exec_domain(d, ed)
  66.107 -            {
  66.108 -                if ( !shadow_enabled )
  66.109 -                {
  66.110 -                    if ( pagetable_val(ed->arch.guest_table) )
  66.111 -                        adjust(&frame_table[pagetable_val(ed->arch.guest_table)
  66.112 -                                            >> PAGE_SHIFT], 1);
  66.113 -                }
  66.114 -                else
  66.115 -                {
  66.116 -                    if ( pagetable_val(ed->arch.guest_table) )
  66.117 -                        adjust(&frame_table[pagetable_val(ed->arch.guest_table)
  66.118 -                                            >> PAGE_SHIFT], 0);
  66.119 -                    if ( pagetable_val(ed->arch.shadow_table) )
  66.120 -                        adjust(&frame_table[pagetable_val(ed->arch.shadow_table)
  66.121 -                                            >> PAGE_SHIFT], 0);
  66.122 -                    if ( ed->arch.monitor_shadow_ref )
  66.123 -                        adjust(&frame_table[ed->arch.monitor_shadow_ref], 0);
  66.124 -                }
  66.125 -            }
  66.126 +        {
  66.127 +            if ( pagetable_val(ed->arch.guest_table) )
  66.128 +                adjust(&frame_table[pagetable_get_pfn(ed->arch.guest_table)], 1);
  66.129 +            if ( pagetable_val(ed->arch.shadow_table) )
  66.130 +                adjust(&frame_table[pagetable_get_pfn(ed->arch.shadow_table)], 0);
  66.131 +            if ( ed->arch.monitor_shadow_ref )
  66.132 +                adjust(&frame_table[ed->arch.monitor_shadow_ref], 0);
  66.133 +        }
  66.134      }
  66.135  
  66.136      void adjust_guest_pages()
  66.137      {
  66.138          struct list_head *list_ent = d->page_list.next;
  66.139          struct pfn_info *page;
  66.140 -        unsigned long mfn;
  66.141 +        unsigned long mfn, snapshot_mfn;
  66.142  
  66.143          while ( list_ent != &d->page_list )
  66.144          {
  66.145              u32 page_type;
  66.146  
  66.147              page = list_entry(list_ent, struct pfn_info, list);
  66.148 -            mfn = page_to_pfn(page);
  66.149 +            snapshot_mfn = mfn = page_to_pfn(page);
  66.150              page_type = page->u.inuse.type_info & PGT_type_mask;
  66.151  
  66.152              BUG_ON(page_get_owner(page) != d);
  66.153  
  66.154              page_count++;
  66.155  
  66.156 +            if ( shadow_enabled && !shadow_refcounts &&
  66.157 +                 page_out_of_sync(page) )
  66.158 +            {
  66.159 +                unsigned long gpfn = __mfn_to_gpfn(d, mfn);
  66.160 +                ASSERT( VALID_M2P(gpfn) );
  66.161 +                snapshot_mfn = __shadow_status(d, gpfn, PGT_snapshot);
  66.162 +                ASSERT( snapshot_mfn );
  66.163 +            }
  66.164 +
  66.165              switch ( page_type )
  66.166              {
  66.167              case PGT_l2_page_table:
  66.168 @@ -437,7 +451,7 @@ int audit_adjust_pgtables(struct domain 
  66.169  
  66.170                  if ( noisy )
  66.171                  {
  66.172 -                    if ( shadow_enabled )
  66.173 +                    if ( shadow_refcounts )
  66.174                      {
  66.175                          printk("Audit %d: found an L2 guest page "
  66.176                                 "mfn=%lx t=%08x c=%08x while in shadow mode\n",
  66.177 @@ -446,19 +460,22 @@ int audit_adjust_pgtables(struct domain 
  66.178                          errors++;
  66.179                      }
  66.180  
  66.181 -                    if ( (page->u.inuse.type_info & PGT_validated) !=
  66.182 -                         PGT_validated )
  66.183 +                    if ( (page->u.inuse.type_info & PGT_count_mask) != 0 )
  66.184                      {
  66.185 -                        printk("Audit %d: L2 mfn=%lx not validated %08x\n",
  66.186 -                               d->id, mfn, page->u.inuse.type_info);
  66.187 -                        errors++;
  66.188 -                    }
  66.189 +                        if ( (page->u.inuse.type_info & PGT_validated) !=
  66.190 +                             PGT_validated )
  66.191 +                        {
  66.192 +                            printk("Audit %d: L2 mfn=%lx not validated %08x\n",
  66.193 +                                   d->id, mfn, page->u.inuse.type_info);
  66.194 +                            errors++;
  66.195 +                        }
  66.196  
  66.197 -                    if ( (page->u.inuse.type_info & PGT_pinned) != PGT_pinned )
  66.198 -                    {
  66.199 -                        printk("Audit %d: L2 mfn=%lx not pinned t=%08x\n",
  66.200 -                               d->id, mfn, page->u.inuse.type_info);
  66.201 -                        errors++;
  66.202 +                        if ( (page->u.inuse.type_info & PGT_pinned) != PGT_pinned )
  66.203 +                        {
  66.204 +                            printk("Audit %d: L2 mfn=%lx not pinned t=%08x\n",
  66.205 +                                   d->id, mfn, page->u.inuse.type_info);
  66.206 +                            errors++;
  66.207 +                        }
  66.208                      }
  66.209                  }
  66.210  
  66.211 @@ -466,7 +483,7 @@ int audit_adjust_pgtables(struct domain 
  66.212                      adjust(page, 1);
  66.213  
  66.214                  if ( page->u.inuse.type_info & PGT_validated )
  66.215 -                    adjust_l2_page(mfn);
  66.216 +                    adjust_l2_page(snapshot_mfn, 0);
  66.217  
  66.218                  break;
  66.219  
  66.220 @@ -475,7 +492,7 @@ int audit_adjust_pgtables(struct domain 
  66.221  
  66.222                  if ( noisy )
  66.223                  {
  66.224 -                    if ( shadow_enabled )
  66.225 +                    if ( shadow_refcounts )
  66.226                      {
  66.227                          printk("found an L1 guest page mfn=%lx t=%08x c=%08x "
  66.228                                 "while in shadow mode\n",
  66.229 @@ -483,21 +500,24 @@ int audit_adjust_pgtables(struct domain 
  66.230                          errors++;
  66.231                      }
  66.232  
  66.233 -                    if ( (page->u.inuse.type_info & PGT_validated) != PGT_validated )
  66.234 +                    if ( (page->u.inuse.type_info & PGT_count_mask) != 0 )
  66.235                      {
  66.236 -                        printk("Audit %d: L1 not validated mfn=%lx t=%08x\n",
  66.237 -                               d->id, mfn, page->u.inuse.type_info);
  66.238 -                        errors++;
  66.239 -                    }
  66.240 -
  66.241 -                    if ( (page->u.inuse.type_info & PGT_pinned) != PGT_pinned )
  66.242 -                    {
  66.243 -                        if ( !VM_ASSIST(d, VMASST_TYPE_writable_pagetables) )
  66.244 +                        if ( (page->u.inuse.type_info & PGT_validated) !=
  66.245 +                             PGT_validated )
  66.246                          {
  66.247 -                            printk("Audit %d: L1 mfn=%lx not pinned t=%08x\n",
  66.248 +                            printk("Audit %d: L1 not validated mfn=%lx t=%08x\n",
  66.249                                     d->id, mfn, page->u.inuse.type_info);
  66.250                              errors++;
  66.251                          }
  66.252 +
  66.253 +                        if ( (page->u.inuse.type_info & PGT_pinned) != PGT_pinned )
  66.254 +                        {
  66.255 +                            if ( !VM_ASSIST(d, VMASST_TYPE_writable_pagetables) )
  66.256 +                            {
  66.257 +                                printk("Audit %d: L1 mfn=%lx not pinned t=%08x\n",
  66.258 +                                       d->id, mfn, page->u.inuse.type_info);
  66.259 +                            }
  66.260 +                        }
  66.261                      }
  66.262                  }
  66.263                  
  66.264 @@ -505,7 +525,7 @@ int audit_adjust_pgtables(struct domain 
  66.265                      adjust(page, 1);
  66.266  
  66.267                  if ( page->u.inuse.type_info & PGT_validated )
  66.268 -                    adjust_l1_page(mfn);
  66.269 +                    adjust_l1_page(snapshot_mfn);
  66.270  
  66.271                  break;
  66.272  
  66.273 @@ -520,7 +540,7 @@ int audit_adjust_pgtables(struct domain 
  66.274                  break;
  66.275  
  66.276              case PGT_writable_page:
  66.277 -                if ( shadow_enabled )
  66.278 +                if ( shadow_refcounts )
  66.279                  {
  66.280                      // In shadow mode, writable pages can get pinned by
  66.281                      // paravirtualized guests that think they are pinning
  66.282 @@ -589,6 +609,8 @@ void audit_pagelist(struct domain *d)
  66.283  
  66.284  void _audit_domain(struct domain *d, int flags)
  66.285  {
  66.286 +    int shadow_refcounts = !!shadow_mode_refcounts(d);
  66.287 +
  66.288      void scan_for_pfn_in_mfn(struct domain *d, unsigned long xmfn,
  66.289                               unsigned long mfn)
  66.290      {
  66.291 @@ -608,8 +630,29 @@ void _audit_domain(struct domain *d, int
  66.292          unmap_domain_mem(pt);           
  66.293      }
  66.294  
  66.295 +    void scan_for_pfn_in_grant_table(struct domain *d, unsigned xmfn)
  66.296 +    {
  66.297 +        int i;
  66.298 +        active_grant_entry_t *act = d->grant_table->active;
  66.299 +
  66.300 +        spin_lock(&d->grant_table->lock);
  66.301 +
  66.302 +        for ( i = 0; i < NR_GRANT_ENTRIES; i++ )
  66.303 +        {
  66.304 +            if ( act[i].pin && (act[i].frame == xmfn) )
  66.305 +            {
  66.306 +                printk("     found active grant table entry i=%d dom=%d pin=%d\n",
  66.307 +                       i, act[i].domid, act[i].pin);
  66.308 +            }
  66.309 +        }
  66.310 +
  66.311 +        spin_unlock(&d->grant_table->lock);
  66.312 +    }
  66.313 +
  66.314      void scan_for_pfn(struct domain *d, unsigned long xmfn)
  66.315      {
  66.316 +        scan_for_pfn_in_grant_table(d, xmfn);
  66.317 +
  66.318          if ( !shadow_mode_enabled(d) )
  66.319          {
  66.320              struct list_head *list_ent = d->page_list.next;
  66.321 @@ -688,7 +731,7 @@ void _audit_domain(struct domain *d, int
  66.322  
  66.323      // Maybe we should just be using BIGLOCK?
  66.324      //
  66.325 -    if ( !(flags & AUDIT_ALREADY_LOCKED) )
  66.326 +    if ( !(flags & AUDIT_SHADOW_ALREADY_LOCKED) )
  66.327          shadow_lock(d);
  66.328  
  66.329      spin_lock(&d->page_alloc_lock);
  66.330 @@ -716,7 +759,7 @@ void _audit_domain(struct domain *d, int
  66.331              errors++;
  66.332          }
  66.333  
  66.334 -        if ( shadow_mode_enabled(d) &&
  66.335 +        if ( shadow_mode_refcounts(d) &&
  66.336               (page_type == PGT_writable_page) &&
  66.337               !(page->u.inuse.type_info & PGT_validated) )
  66.338          {
  66.339 @@ -764,7 +807,9 @@ void _audit_domain(struct domain *d, int
  66.340                         mfn);
  66.341                  errors++;
  66.342              }
  66.343 -            if ( page_type != PGT_writable_page )
  66.344 +            if ( shadow_refcounts
  66.345 +                 ? (page_type != PGT_writable_page)
  66.346 +                 : !(page_type && (page_type <= PGT_l4_page_table)) )
  66.347              {
  66.348                  printk("out of sync page mfn=%lx has strange type "
  66.349                         "t=%08x c=%08x\n",
  66.350 @@ -821,7 +866,7 @@ void _audit_domain(struct domain *d, int
  66.351                         d->id, page->u.inuse.type_info, 
  66.352                         page->tlbflush_timestamp,
  66.353                         page->count_info, mfn);
  66.354 -                errors++;
  66.355 +                //errors++;
  66.356              }
  66.357              break;
  66.358          default:
  66.359 @@ -835,7 +880,7 @@ void _audit_domain(struct domain *d, int
  66.360                     page->count_info,
  66.361                     page->u.inuse.type_info, 
  66.362                     page->tlbflush_timestamp, mfn );
  66.363 -            errors++;
  66.364 +            //errors++;
  66.365              scan_for_pfn_remote(mfn);
  66.366          }
  66.367  
  66.368 @@ -870,6 +915,8 @@ void _audit_domain(struct domain *d, int
  66.369                                 d->id, page_to_pfn(page),
  66.370                                 page->u.inuse.type_info,
  66.371                                 page->count_info);
  66.372 +                        printk("a->gpfn_and_flags=%p\n",
  66.373 +                               (void *)a->gpfn_and_flags);
  66.374                          errors++;
  66.375                      }
  66.376                      break;
  66.377 @@ -905,7 +952,7 @@ void _audit_domain(struct domain *d, int
  66.378                 "pages=%d oos=%d l1=%d l2=%d ctot=%d ttot=%d\n",
  66.379                 d->id, page_count, oos_count, l1, l2, ctot, ttot);
  66.380  
  66.381 -    if ( !(flags & AUDIT_ALREADY_LOCKED) )
  66.382 +    if ( !(flags & AUDIT_SHADOW_ALREADY_LOCKED) )
  66.383          shadow_unlock(d);
  66.384  
  66.385      if ( d != current->domain )
    67.1 --- a/xen/arch/x86/dom0_ops.c	Tue May 03 14:42:00 2005 +0000
    67.2 +++ b/xen/arch/x86/dom0_ops.c	Tue May 10 08:49:59 2005 +0000
    67.3 @@ -397,7 +397,7 @@ void arch_getdomaininfo_ctxt(
    67.4  #endif
    67.5  
    67.6      c->flags = 0;
    67.7 -    if ( test_bit(EDF_DONEFPUINIT, &ed->ed_flags) )
    67.8 +    if ( test_bit(EDF_DONEFPUINIT, &ed->flags) )
    67.9          c->flags |= VGCF_I387_VALID;
   67.10      if ( KERNEL_MODE(ed, &ed->arch.guest_context.user_regs) )
   67.11          c->flags |= VGCF_IN_KERNEL;
    68.1 --- a/xen/arch/x86/domain.c	Tue May 03 14:42:00 2005 +0000
    68.2 +++ b/xen/arch/x86/domain.c	Tue May 10 08:49:59 2005 +0000
    68.3 @@ -37,6 +37,7 @@
    68.4  #include <asm/vmx.h>
    68.5  #include <asm/vmx_vmcs.h>
    68.6  #include <asm/msr.h>
    68.7 +#include <asm/physdev.h>
    68.8  #include <xen/kernel.h>
    68.9  #include <public/io/ioreq.h>
   68.10  #include <xen/multicall.h>
   68.11 @@ -251,7 +252,7 @@ void arch_do_createdomain(struct exec_do
   68.12  
   68.13          d->shared_info = (void *)alloc_xenheap_page();
   68.14          memset(d->shared_info, 0, PAGE_SIZE);
   68.15 -        ed->vcpu_info = &d->shared_info->vcpu_data[ed->eid];
   68.16 +        ed->vcpu_info = &d->shared_info->vcpu_data[ed->id];
   68.17          SHARE_PFN_WITH_DOMAIN(virt_to_page(d->shared_info), d);
   68.18          machine_to_phys_mapping[virt_to_phys(d->shared_info) >> 
   68.19                                 PAGE_SHIFT] = INVALID_M2P_ENTRY;
   68.20 @@ -293,7 +294,7 @@ void arch_do_boot_vcpu(struct exec_domai
   68.21      struct domain *d = ed->domain;
   68.22      ed->arch.schedule_tail = d->exec_domain[0]->arch.schedule_tail;
   68.23      ed->arch.perdomain_ptes = 
   68.24 -        d->arch.mm_perdomain_pt + (ed->eid << PDPT_VCPU_SHIFT);
   68.25 +        d->arch.mm_perdomain_pt + (ed->id << PDPT_VCPU_SHIFT);
   68.26      ed->arch.flags = TF_kernel_mode;
   68.27  }
   68.28  
   68.29 @@ -363,7 +364,8 @@ static int vmx_final_setup_guest(
   68.30  
   68.31          /* Put the domain in shadow mode even though we're going to be using
   68.32           * the shared 1:1 page table initially. It shouldn't hurt */
   68.33 -        shadow_mode_enable(ed->domain, SHM_enable|SHM_translate|SHM_external);
   68.34 +        shadow_mode_enable(ed->domain,
   68.35 +                           SHM_enable|SHM_refcounts|SHM_translate|SHM_external);
   68.36      }
   68.37  
   68.38      return 0;
   68.39 @@ -396,9 +398,9 @@ int arch_set_info_guest(
   68.40                  return -EINVAL;
   68.41      }
   68.42  
   68.43 -    clear_bit(EDF_DONEFPUINIT, &ed->ed_flags);
   68.44 +    clear_bit(EDF_DONEFPUINIT, &ed->flags);
   68.45      if ( c->flags & VGCF_I387_VALID )
   68.46 -        set_bit(EDF_DONEFPUINIT, &ed->ed_flags);
   68.47 +        set_bit(EDF_DONEFPUINIT, &ed->flags);
   68.48  
   68.49      ed->arch.flags &= ~TF_kernel_mode;
   68.50      if ( c->flags & VGCF_IN_KERNEL )
   68.51 @@ -414,7 +416,7 @@ int arch_set_info_guest(
   68.52      if ( !IS_PRIV(d) )
   68.53          ed->arch.guest_context.user_regs.eflags &= 0xffffcfff;
   68.54  
   68.55 -    if ( test_bit(EDF_DONEINIT, &ed->ed_flags) )
   68.56 +    if ( test_bit(EDF_DONEINIT, &ed->flags) )
   68.57          return 0;
   68.58  
   68.59      if ( (rc = (int)set_fast_trap(ed, c->fast_trap_idx)) != 0 )
   68.60 @@ -425,13 +427,13 @@ int arch_set_info_guest(
   68.61      for ( i = 0; i < 8; i++ )
   68.62          (void)set_debugreg(ed, i, c->debugreg[i]);
   68.63  
   68.64 -    if ( ed->eid == 0 )
   68.65 +    if ( ed->id == 0 )
   68.66          d->vm_assist = c->vm_assist;
   68.67  
   68.68      phys_basetab = c->pt_base;
   68.69      ed->arch.guest_table = mk_pagetable(phys_basetab);
   68.70  
   68.71 -    if ( shadow_mode_enabled(d) )
   68.72 +    if ( shadow_mode_refcounts(d) )
   68.73      {
   68.74          if ( !get_page(&frame_table[phys_basetab>>PAGE_SHIFT], d) )
   68.75              return -EINVAL;
   68.76 @@ -477,7 +479,7 @@ int arch_set_info_guest(
   68.77      update_pagetables(ed);
   68.78      
   68.79      /* Don't redo final setup */
   68.80 -    set_bit(EDF_DONEINIT, &ed->ed_flags);
   68.81 +    set_bit(EDF_DONEINIT, &ed->flags);
   68.82  
   68.83      return 0;
   68.84  }
   68.85 @@ -795,7 +797,7 @@ void context_switch(struct exec_domain *
   68.86       * 'prev' (after this point, a dying domain's info structure may be freed
   68.87       * without warning). 
   68.88       */
   68.89 -    clear_bit(EDF_RUNNING, &prev->ed_flags);
   68.90 +    clear_bit(EDF_RUNNING, &prev->flags);
   68.91  
   68.92      schedule_tail(next);
   68.93      BUG();
   68.94 @@ -968,6 +970,8 @@ void domain_relinquish_resources(struct 
   68.95  
   68.96      BUG_ON(d->cpuset != 0);
   68.97  
   68.98 +    physdev_destroy_state(d);
   68.99 +
  68.100      ptwr_destroy(d);
  68.101  
  68.102      /* Release device mappings of other domains */
  68.103 @@ -978,17 +982,21 @@ void domain_relinquish_resources(struct 
  68.104      {
  68.105          if ( pagetable_val(ed->arch.guest_table) != 0 )
  68.106          {
  68.107 -            (shadow_mode_enabled(d) ? put_page : put_page_and_type)
  68.108 -                (&frame_table[pagetable_val(
  68.109 -                    ed->arch.guest_table) >> PAGE_SHIFT]);
  68.110 +            if ( shadow_mode_refcounts(d) )
  68.111 +                put_page(&frame_table[pagetable_get_pfn(ed->arch.guest_table)]);
  68.112 +            else
  68.113 +                put_page_and_type(&frame_table[pagetable_get_pfn(ed->arch.guest_table)]);
  68.114 +
  68.115              ed->arch.guest_table = mk_pagetable(0);
  68.116          }
  68.117  
  68.118          if ( pagetable_val(ed->arch.guest_table_user) != 0 )
  68.119          {
  68.120 -            (shadow_mode_enabled(d) ? put_page : put_page_and_type)
  68.121 -                (&frame_table[pagetable_val(
  68.122 -                    ed->arch.guest_table_user) >> PAGE_SHIFT]);
  68.123 +            if ( shadow_mode_refcounts(d) )
  68.124 +                put_page(&frame_table[pagetable_get_pfn(ed->arch.guest_table_user)]);
  68.125 +            else
  68.126 +                put_page_and_type(&frame_table[pagetable_get_pfn(ed->arch.guest_table_user)]);
  68.127 +
  68.128              ed->arch.guest_table_user = mk_pagetable(0);
  68.129          }
  68.130  
    69.1 --- a/xen/arch/x86/domain_build.c	Tue May 03 14:42:00 2005 +0000
    69.2 +++ b/xen/arch/x86/domain_build.c	Tue May 10 08:49:59 2005 +0000
    69.3 @@ -114,7 +114,7 @@ int construct_dom0(struct domain *d,
    69.4      /* Sanity! */
    69.5      if ( d->id != 0 ) 
    69.6          BUG();
    69.7 -    if ( test_bit(DF_CONSTRUCTED, &d->d_flags) ) 
    69.8 +    if ( test_bit(DF_CONSTRUCTED, &d->flags) ) 
    69.9          BUG();
   69.10  
   69.11      memset(&dsi, 0, sizeof(struct domain_setup_info));
   69.12 @@ -540,14 +540,14 @@ int construct_dom0(struct domain *d,
   69.13      /* DOM0 gets access to everything. */
   69.14      physdev_init_dom0(d);
   69.15  
   69.16 -    set_bit(DF_CONSTRUCTED, &d->d_flags);
   69.17 +    set_bit(DF_CONSTRUCTED, &d->flags);
   69.18  
   69.19      new_thread(ed, dsi.v_kernentry, vstack_end, vstartinfo_start);
   69.20  
   69.21      if ( opt_dom0_shadow || opt_dom0_translate )
   69.22      {
   69.23          shadow_mode_enable(d, (opt_dom0_translate
   69.24 -                               ? SHM_enable | SHM_translate
   69.25 +                               ? SHM_enable | SHM_refcounts | SHM_translate
   69.26                                 : SHM_enable));
   69.27          if ( opt_dom0_translate )
   69.28          {
   69.29 @@ -570,7 +570,7 @@ int construct_dom0(struct domain *d,
   69.30              idle_pg_table[1] = root_create_phys(pagetable_val(d->arch.phys_table),
   69.31                                                  __PAGE_HYPERVISOR);
   69.32              translate_l2pgtable(d, (l1_pgentry_t *)(1u << L2_PAGETABLE_SHIFT),
   69.33 -                                pagetable_val(ed->arch.guest_table) >> PAGE_SHIFT);
   69.34 +                                pagetable_get_pfn(ed->arch.guest_table));
   69.35              idle_pg_table[1] = root_empty();
   69.36              local_flush_tlb();
   69.37          }
    70.1 --- a/xen/arch/x86/i387.c	Tue May 03 14:42:00 2005 +0000
    70.2 +++ b/xen/arch/x86/i387.c	Tue May 10 08:49:59 2005 +0000
    70.3 @@ -18,7 +18,7 @@ void init_fpu(void)
    70.4      __asm__ __volatile__ ( "fninit" );
    70.5      if ( cpu_has_xmm )
    70.6          load_mxcsr(0x1f80);
    70.7 -    set_bit(EDF_DONEFPUINIT, &current->ed_flags);
    70.8 +    set_bit(EDF_DONEFPUINIT, &current->flags);
    70.9  }
   70.10  
   70.11  void save_init_fpu(struct exec_domain *tsk)
   70.12 @@ -28,7 +28,7 @@ void save_init_fpu(struct exec_domain *t
   70.13       * This causes us to set the real flag, so we'll need
   70.14       * to temporarily clear it while saving f-p state.
   70.15       */
   70.16 -    if ( test_bit(EDF_GUEST_STTS, &tsk->ed_flags) )
   70.17 +    if ( test_bit(EDF_GUEST_STTS, &tsk->flags) )
   70.18          clts();
   70.19  
   70.20      if ( cpu_has_fxsr )
   70.21 @@ -40,7 +40,7 @@ void save_init_fpu(struct exec_domain *t
   70.22              "fnsave %0 ; fwait"
   70.23              : "=m" (tsk->arch.guest_context.fpu_ctxt) );
   70.24  
   70.25 -    clear_bit(EDF_USEDFPU, &tsk->ed_flags);
   70.26 +    clear_bit(EDF_USEDFPU, &tsk->flags);
   70.27      stts();
   70.28  }
   70.29  
    71.1 --- a/xen/arch/x86/i8259.c	Tue May 03 14:42:00 2005 +0000
    71.2 +++ b/xen/arch/x86/i8259.c	Tue May 10 08:49:59 2005 +0000
    71.3 @@ -45,7 +45,7 @@ BUILD_COMMON_IRQ()
    71.4  
    71.5  /*
    71.6   * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts:
    71.7 - * (these are usually mapped to vectors 0x30-0x3f)
    71.8 + * (these are usually mapped to vectors 0x20-0x2f)
    71.9   */
   71.10  BUILD_16_IRQS(0x0)
   71.11  
   71.12 @@ -58,7 +58,7 @@ BUILD_16_IRQS(0x0)
   71.13   * of these. Plus, more powerful systems might have more than 64
   71.14   * IO-APIC registers.
   71.15   *
   71.16 - * (these are usually mapped into the 0x30-0xff vector range)
   71.17 + * (these are usually mapped into the 0x20-0xff vector range)
   71.18   */
   71.19  BUILD_16_IRQS(0x1) BUILD_16_IRQS(0x2) BUILD_16_IRQS(0x3)
   71.20  BUILD_16_IRQS(0x4) BUILD_16_IRQS(0x5) BUILD_16_IRQS(0x6) BUILD_16_IRQS(0x7)
   71.21 @@ -101,7 +101,7 @@ BUILD_SMP_INTERRUPT(spurious_interrupt,S
   71.22  	IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \
   71.23  	IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f)
   71.24  
   71.25 -    void *interrupt[NR_IRQS] = {
   71.26 +    void (*interrupt[NR_IRQS])(void) = {
   71.27  	IRQLIST_16(0x0),
   71.28  
   71.29  #ifdef CONFIG_X86_IO_APIC
   71.30 @@ -341,7 +341,7 @@ void __init init_8259A(int auto_eoi)
   71.31       * outb_p - this has to work on a wide range of PC hardware.
   71.32       */
   71.33      outb_p(0x11, 0x20);	/* ICW1: select 8259A-1 init */
   71.34 -    outb_p(0x30 + 0, 0x21);	/* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 */
   71.35 +    outb_p(0x20 + 0, 0x21);	/* ICW2: 8259A-1 IR0-7 mapped to 0x20-0x27 */
   71.36      outb_p(0x04, 0x21);	/* 8259A-1 (the master) has a slave on IR2 */
   71.37      if (auto_eoi)
   71.38          outb_p(0x03, 0x21);	/* master does Auto EOI */
   71.39 @@ -349,7 +349,7 @@ void __init init_8259A(int auto_eoi)
   71.40          outb_p(0x01, 0x21);	/* master expects normal EOI */
   71.41  
   71.42      outb_p(0x11, 0xA0);	/* ICW1: select 8259A-2 init */
   71.43 -    outb_p(0x30 + 8, 0xA1);	/* ICW2: 8259A-2 IR0-7 mapped to 0x38-0x3f */
   71.44 +    outb_p(0x20 + 8, 0xA1);	/* ICW2: 8259A-2 IR0-7 mapped to 0x28-0x2f */
   71.45      outb_p(0x02, 0xA1);	/* 8259A-2 is a slave on master's IR2 */
   71.46      outb_p(0x01, 0xA1);	/* (slave's support for AEOI in flat mode
   71.47                             is to be investigated) */
    72.1 --- a/xen/arch/x86/idle0_task.c	Tue May 03 14:42:00 2005 +0000
    72.2 +++ b/xen/arch/x86/idle0_task.c	Tue May 10 08:49:59 2005 +0000
    72.3 @@ -5,7 +5,7 @@
    72.4  
    72.5  struct domain idle0_domain = {
    72.6      id:          IDLE_DOMAIN_ID,
    72.7 -    d_flags:     1<<DF_IDLETASK,
    72.8 +    flags:       1<<DF_IDLETASK,
    72.9      refcnt:      ATOMIC_INIT(1)
   72.10  };
   72.11  
    73.1 --- a/xen/arch/x86/io_apic.c	Tue May 03 14:42:00 2005 +0000
    73.2 +++ b/xen/arch/x86/io_apic.c	Tue May 10 08:49:59 2005 +0000
    73.3 @@ -25,26 +25,20 @@
    73.4  #include <xen/irq.h>
    73.5  #include <xen/delay.h>
    73.6  #include <xen/sched.h>
    73.7 -#include <xen/config.h>
    73.8 -#include <asm/mc146818rtc.h>
    73.9 +#include <xen/acpi.h>
   73.10  #include <asm/io.h>
   73.11 -#include <asm/mpspec.h>
   73.12 -#include <asm/io_apic.h>
   73.13 +#include <asm/mc146818rtc.h>
   73.14  #include <asm/smp.h>
   73.15  #include <asm/desc.h>
   73.16 -#include <asm/smpboot.h>
   73.17 -
   73.18 -#ifdef CONFIG_X86_IO_APIC
   73.19 -
   73.20 -#undef APIC_LOCKUP_DEBUG
   73.21 +#include <mach_apic.h>
   73.22 +#include <io_ports.h>
   73.23  
   73.24 -#define APIC_LOCKUP_DEBUG
   73.25 -
   73.26 -static spinlock_t ioapic_lock = SPIN_LOCK_UNLOCKED;
   73.27 +int (*ioapic_renumber_irq)(int ioapic, int irq);
   73.28 +atomic_t irq_mis_count;
   73.29  
   73.30 -unsigned int int_dest_addr_mode = APIC_DEST_LOGICAL;
   73.31 -unsigned char int_delivery_mode = dest_LowestPrio;
   73.32 +static DEFINE_SPINLOCK(ioapic_lock);
   73.33  
   73.34 +int skip_ioapic_setup;
   73.35  
   73.36  /*
   73.37   * # of IRQ routing registers
   73.38 @@ -69,12 +63,20 @@ static struct irq_pin_list {
   73.39  	int apic, pin, next;
   73.40  } irq_2_pin[PIN_MAP_SIZE];
   73.41  
   73.42 +int vector_irq[NR_VECTORS] = { [0 ... NR_VECTORS - 1] = -1};
   73.43 +#ifdef CONFIG_PCI_MSI
   73.44 +#define vector_to_irq(vector) 	\
   73.45 +	(platform_legacy_irq(vector) ? vector : vector_irq[vector])
   73.46 +#else
   73.47 +#define vector_to_irq(vector)	(vector)
   73.48 +#endif
   73.49 +
   73.50  /*
   73.51   * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
   73.52   * shared ISA-space IRQs, so we have to support them. We are super
   73.53   * fast in the common case, and fast for shared ISA-space IRQs.
   73.54   */
   73.55 -static void __init add_pin_to_irq(unsigned int irq, int apic, int pin)
   73.56 +static void add_pin_to_irq(unsigned int irq, int apic, int pin)
   73.57  {
   73.58  	static int first_free_entry = NR_IRQS;
   73.59  	struct irq_pin_list *entry = irq_2_pin + irq;
   73.60 @@ -112,36 +114,48 @@ static void __init replace_pin_at_irq(un
   73.61  	}
   73.62  }
   73.63  
   73.64 -#define __DO_ACTION(R, ACTION, FINAL)					\
   73.65 -									\
   73.66 -{									\
   73.67 -	int pin;							\
   73.68 -	struct irq_pin_list *entry = irq_2_pin + irq;			\
   73.69 -									\
   73.70 -	for (;;) {							\
   73.71 -		unsigned int reg;					\
   73.72 -		pin = entry->pin;					\
   73.73 -		if (pin == -1)						\
   73.74 -			break;						\
   73.75 -		reg = io_apic_read(entry->apic, 0x10 + R + pin*2);	\
   73.76 -		reg ACTION;						\
   73.77 -		io_apic_write(entry->apic, 0x10 + R + pin*2, reg);	\
   73.78 -		if (!entry->next)					\
   73.79 -			break;						\
   73.80 -		entry = irq_2_pin + entry->next;			\
   73.81 -	}								\
   73.82 -	FINAL;								\
   73.83 +static void __modify_IO_APIC_irq (unsigned int irq, unsigned long enable, unsigned long disable)
   73.84 +{
   73.85 +	struct irq_pin_list *entry = irq_2_pin + irq;
   73.86 +	unsigned int pin, reg;
   73.87 +
   73.88 +	for (;;) {
   73.89 +		pin = entry->pin;
   73.90 +		if (pin == -1)
   73.91 +			break;
   73.92 +		reg = io_apic_read(entry->apic, 0x10 + pin*2);
   73.93 +		reg &= ~disable;
   73.94 +		reg |= enable;
   73.95 +		io_apic_modify(entry->apic, 0x10 + pin*2, reg);
   73.96 +		if (!entry->next)
   73.97 +			break;
   73.98 +		entry = irq_2_pin + entry->next;
   73.99 +	}
  73.100  }
  73.101  
  73.102 -#define DO_ACTION(name,R,ACTION, FINAL)					\
  73.103 -									\
  73.104 -	static void name##_IO_APIC_irq (unsigned int irq)		\
  73.105 -	__DO_ACTION(R, ACTION, FINAL)
  73.106 +/* mask = 1 */
  73.107 +static void __mask_IO_APIC_irq (unsigned int irq)
  73.108 +{
  73.109 +	__modify_IO_APIC_irq(irq, 0x00010000, 0);
  73.110 +}
  73.111 +
  73.112 +/* mask = 0 */
  73.113 +static void __unmask_IO_APIC_irq (unsigned int irq)
  73.114 +{
  73.115 +	__modify_IO_APIC_irq(irq, 0, 0x00010000);
  73.116 +}
  73.117  
  73.118 -DO_ACTION( __mask,    0, |= 0x00010000, io_apic_sync(entry->apic) )
  73.119 -DO_ACTION( __unmask,  0, &= 0xfffeffff, )
  73.120 -DO_ACTION( __edge,    0, &= 0xffff7fff, )
  73.121 -DO_ACTION( __level,   0, |= 0x00008000, )
  73.122 +/* mask = 1, trigger = 0 */
  73.123 +static void __mask_and_edge_IO_APIC_irq (unsigned int irq)
  73.124 +{
  73.125 +	__modify_IO_APIC_irq(irq, 0x00010000, 0x00008000);
  73.126 +}
  73.127 +
  73.128 +/* mask = 0, trigger = 1 */
  73.129 +static void __unmask_and_level_IO_APIC_irq (unsigned int irq)
  73.130 +{
  73.131 +	__modify_IO_APIC_irq(irq, 0x00008000, 0x00010000);
  73.132 +}
  73.133  
  73.134  static void mask_IO_APIC_irq (unsigned int irq)
  73.135  {
  73.136 @@ -165,7 +179,7 @@ void clear_IO_APIC_pin(unsigned int apic
  73.137  {
  73.138  	struct IO_APIC_route_entry entry;
  73.139  	unsigned long flags;
  73.140 -
  73.141 +	
  73.142  	/* Check delivery_mode to be sure we're not clearing an SMI pin */
  73.143  	spin_lock_irqsave(&ioapic_lock, flags);
  73.144  	*(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
  73.145 @@ -194,85 +208,467 @@ static void clear_IO_APIC (void)
  73.146  			clear_IO_APIC_pin(apic, pin);
  73.147  }
  73.148  
  73.149 -static void set_ioapic_affinity (unsigned int irq, unsigned long mask)
  73.150 +static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
  73.151  {
  73.152  	unsigned long flags;
  73.153 -
  73.154 -	/*
  73.155 -	 * Only the first 8 bits are valid.
  73.156 -	 */
  73.157 -	mask = mask << 24;
  73.158 +	int pin;
  73.159 +	struct irq_pin_list *entry = irq_2_pin + irq;
  73.160 +	unsigned int apicid_value;
  73.161 +	
  73.162 +	apicid_value = cpu_mask_to_apicid(cpumask);
  73.163 +	/* Prepare to do the io_apic_write */
  73.164 +	apicid_value = apicid_value << 24;
  73.165  	spin_lock_irqsave(&ioapic_lock, flags);
  73.166 -	__DO_ACTION(1, = mask, )
  73.167 +	for (;;) {
  73.168 +		pin = entry->pin;
  73.169 +		if (pin == -1)
  73.170 +			break;
  73.171 +		io_apic_write(entry->apic, 0x10 + 1 + pin*2, apicid_value);
  73.172 +		if (!entry->next)
  73.173 +			break;
  73.174 +		entry = irq_2_pin + entry->next;
  73.175 +	}
  73.176  	spin_unlock_irqrestore(&ioapic_lock, flags);
  73.177  }
  73.178  
  73.179 -#define balance_irq(_irq) ((void)0)
  73.180 +#if defined(CONFIG_IRQBALANCE)
  73.181 +# include <asm/processor.h>	/* kernel_thread() */
  73.182 +# include <xen/kernel_stat.h>	/* kstat */
  73.183 +# include <xen/slab.h>		/* kmalloc() */
  73.184 +# include <xen/timer.h>	/* time_after() */
  73.185 + 
  73.186 +# ifdef CONFIG_BALANCED_IRQ_DEBUG
  73.187 +#  define TDprintk(x...) do { printk("<%ld:%s:%d>: ", jiffies, __FILE__, __LINE__); printk(x); } while (0)
  73.188 +#  define Dprintk(x...) do { TDprintk(x); } while (0)
  73.189 +# else
  73.190 +#  define TDprintk(x...) 
  73.191 +#  define Dprintk(x...) 
  73.192 +# endif
  73.193 +
  73.194 +cpumask_t __cacheline_aligned pending_irq_balance_cpumask[NR_IRQS];
  73.195  
  73.196 -/*
  73.197 - * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
  73.198 - * specific CPU-side IRQs.
  73.199 - */
  73.200 +#define IRQBALANCE_CHECK_ARCH -999
  73.201 +static int irqbalance_disabled = IRQBALANCE_CHECK_ARCH;
  73.202 +static int physical_balance = 0;
  73.203 +
  73.204 +struct irq_cpu_info {
  73.205 +	unsigned long * last_irq;
  73.206 +	unsigned long * irq_delta;
  73.207 +	unsigned long irq;
  73.208 +} irq_cpu_data[NR_CPUS];
  73.209 +
  73.210 +#define CPU_IRQ(cpu)		(irq_cpu_data[cpu].irq)
  73.211 +#define LAST_CPU_IRQ(cpu,irq)   (irq_cpu_data[cpu].last_irq[irq])
  73.212 +#define IRQ_DELTA(cpu,irq) 	(irq_cpu_data[cpu].irq_delta[irq])
  73.213 +
  73.214 +#define IDLE_ENOUGH(cpu,now) \
  73.215 +		(idle_cpu(cpu) && ((now) - irq_stat[(cpu)].idle_timestamp > 1))
  73.216 +
  73.217 +#define IRQ_ALLOWED(cpu, allowed_mask)	cpu_isset(cpu, allowed_mask)
  73.218  
  73.219 -#define MAX_PIRQS 8
  73.220 -int pirq_entries [MAX_PIRQS];
  73.221 -int pirqs_enabled;
  73.222 +#define CPU_TO_PACKAGEINDEX(i) (first_cpu(cpu_sibling_map[i]))
  73.223 +
  73.224 +#define MAX_BALANCED_IRQ_INTERVAL	(5*HZ)
  73.225 +#define MIN_BALANCED_IRQ_INTERVAL	(HZ/2)
  73.226 +#define BALANCED_IRQ_MORE_DELTA		(HZ/10)
  73.227 +#define BALANCED_IRQ_LESS_DELTA		(HZ)
  73.228 +
  73.229 +long balanced_irq_interval = MAX_BALANCED_IRQ_INTERVAL;
  73.230 +
  73.231 +static unsigned long move(int curr_cpu, cpumask_t allowed_mask,
  73.232 +			unsigned long now, int direction)
  73.233 +{
  73.234 +	int search_idle = 1;
  73.235 +	int cpu = curr_cpu;
  73.236 +
  73.237 +	goto inside;
  73.238  
  73.239 -int skip_ioapic_setup;
  73.240 -#if 0
  73.241 +	do {
  73.242 +		if (unlikely(cpu == curr_cpu))
  73.243 +			search_idle = 0;
  73.244 +inside:
  73.245 +		if (direction == 1) {
  73.246 +			cpu++;
  73.247 +			if (cpu >= NR_CPUS)
  73.248 +				cpu = 0;
  73.249 +		} else {
  73.250 +			cpu--;
  73.251 +			if (cpu == -1)
  73.252 +				cpu = NR_CPUS-1;
  73.253 +		}
  73.254 +	} while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu,allowed_mask) ||
  73.255 +			(search_idle && !IDLE_ENOUGH(cpu,now)));
  73.256  
  73.257 -static int __init noioapic_setup(char *str)
  73.258 -{
  73.259 -	skip_ioapic_setup = 1;
  73.260 -	return 1;
  73.261 +	return cpu;
  73.262  }
  73.263  
  73.264 -__setup("noapic", noioapic_setup);
  73.265 +static inline void balance_irq(int cpu, int irq)
  73.266 +{
  73.267 +	unsigned long now = jiffies;
  73.268 +	cpumask_t allowed_mask;
  73.269 +	unsigned int new_cpu;
  73.270 +		
  73.271 +	if (irqbalance_disabled)
  73.272 +		return; 
  73.273 +
  73.274 +	cpus_and(allowed_mask, cpu_online_map, irq_affinity[irq]);
  73.275 +	new_cpu = move(cpu, allowed_mask, now, 1);
  73.276 +	if (cpu != new_cpu) {
  73.277 +		irq_desc_t *desc = irq_desc + irq;
  73.278 +		unsigned long flags;
  73.279  
  73.280 -static int __init ioapic_setup(char *str)
  73.281 +		spin_lock_irqsave(&desc->lock, flags);
  73.282 +		pending_irq_balance_cpumask[irq] = cpumask_of_cpu(new_cpu);
  73.283 +		spin_unlock_irqrestore(&desc->lock, flags);
  73.284 +	}
  73.285 +}
  73.286 +
  73.287 +static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold)
  73.288  {
  73.289 -	skip_ioapic_setup = 0;
  73.290 -	return 1;
  73.291 +	int i, j;
  73.292 +	Dprintk("Rotating IRQs among CPUs.\n");
  73.293 +	for (i = 0; i < NR_CPUS; i++) {
  73.294 +		for (j = 0; cpu_online(i) && (j < NR_IRQS); j++) {
  73.295 +			if (!irq_desc[j].action)
  73.296 +				continue;
  73.297 +			/* Is it a significant load ?  */
  73.298 +			if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i),j) <
  73.299 +						useful_load_threshold)
  73.300 +				continue;
  73.301 +			balance_irq(i, j);
  73.302 +		}
  73.303 +	}
  73.304 +	balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
  73.305 +		balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);	
  73.306 +	return;
  73.307  }
  73.308  
  73.309 -__setup("apic", ioapic_setup);
  73.310 -
  73.311 -
  73.312 -
  73.313 -static int __init ioapic_pirq_setup(char *str)
  73.314 +static void do_irq_balance(void)
  73.315  {
  73.316 -	int i, max;
  73.317 -	int ints[MAX_PIRQS+1];
  73.318 +	int i, j;
  73.319 +	unsigned long max_cpu_irq = 0, min_cpu_irq = (~0);
  73.320 +	unsigned long move_this_load = 0;
  73.321 +	int max_loaded = 0, min_loaded = 0;
  73.322 +	int load;
  73.323 +	unsigned long useful_load_threshold = balanced_irq_interval + 10;
  73.324 +	int selected_irq;
  73.325 +	int tmp_loaded, first_attempt = 1;
  73.326 +	unsigned long tmp_cpu_irq;
  73.327 +	unsigned long imbalance = 0;
  73.328 +	cpumask_t allowed_mask, target_cpu_mask, tmp;
  73.329 +
  73.330 +	for (i = 0; i < NR_CPUS; i++) {
  73.331 +		int package_index;
  73.332 +		CPU_IRQ(i) = 0;
  73.333 +		if (!cpu_online(i))
  73.334 +			continue;
  73.335 +		package_index = CPU_TO_PACKAGEINDEX(i);
  73.336 +		for (j = 0; j < NR_IRQS; j++) {
  73.337 +			unsigned long value_now, delta;
  73.338 +			/* Is this an active IRQ? */
  73.339 +			if (!irq_desc[j].action)
  73.340 +				continue;
  73.341 +			if ( package_index == i )
  73.342 +				IRQ_DELTA(package_index,j) = 0;
  73.343 +			/* Determine the total count per processor per IRQ */
  73.344 +			value_now = (unsigned long) kstat_cpu(i).irqs[j];
  73.345 +
  73.346 +			/* Determine the activity per processor per IRQ */
  73.347 +			delta = value_now - LAST_CPU_IRQ(i,j);
  73.348 +
  73.349 +			/* Update last_cpu_irq[][] for the next time */
  73.350 +			LAST_CPU_IRQ(i,j) = value_now;
  73.351 +
  73.352 +			/* Ignore IRQs whose rate is less than the clock */
  73.353 +			if (delta < useful_load_threshold)
  73.354 +				continue;
  73.355 +			/* update the load for the processor or package total */
  73.356 +			IRQ_DELTA(package_index,j) += delta;
  73.357  
  73.358 -	get_options(str, ARRAY_SIZE(ints), ints);
  73.359 +			/* Keep track of the higher numbered sibling as well */
  73.360 +			if (i != package_index)
  73.361 +				CPU_IRQ(i) += delta;
  73.362 +			/*
  73.363 +			 * We have sibling A and sibling B in the package
  73.364 +			 *
  73.365 +			 * cpu_irq[A] = load for cpu A + load for cpu B
  73.366 +			 * cpu_irq[B] = load for cpu B
  73.367 +			 */
  73.368 +			CPU_IRQ(package_index) += delta;
  73.369 +		}
  73.370 +	}
  73.371 +	/* Find the least loaded processor package */
  73.372 +	for (i = 0; i < NR_CPUS; i++) {
  73.373 +		if (!cpu_online(i))
  73.374 +			continue;
  73.375 +		if (i != CPU_TO_PACKAGEINDEX(i))
  73.376 +			continue;
  73.377 +		if (min_cpu_irq > CPU_IRQ(i)) {
  73.378 +			min_cpu_irq = CPU_IRQ(i);
  73.379 +			min_loaded = i;
  73.380 +		}
  73.381 +	}
  73.382 +	max_cpu_irq = ULONG_MAX;
  73.383  
  73.384 -	for (i = 0; i < MAX_PIRQS; i++)
  73.385 -		pirq_entries[i] = -1;
  73.386 +tryanothercpu:
  73.387 +	/* Look for heaviest loaded processor.
  73.388 +	 * We may come back to get the next heaviest loaded processor.
  73.389 +	 * Skip processors with trivial loads.
  73.390 +	 */
  73.391 +	tmp_cpu_irq = 0;
  73.392 +	tmp_loaded = -1;
  73.393 +	for (i = 0; i < NR_CPUS; i++) {
  73.394 +		if (!cpu_online(i))
  73.395 +			continue;
  73.396 +		if (i != CPU_TO_PACKAGEINDEX(i))
  73.397 +			continue;
  73.398 +		if (max_cpu_irq <= CPU_IRQ(i)) 
  73.399 +			continue;
  73.400 +		if (tmp_cpu_irq < CPU_IRQ(i)) {
  73.401 +			tmp_cpu_irq = CPU_IRQ(i);
  73.402 +			tmp_loaded = i;
  73.403 +		}
  73.404 +	}
  73.405  
  73.406 -	pirqs_enabled = 1;
  73.407 -	printk(KERN_INFO "PIRQ redirection, working around broken MP-BIOS.\n");
  73.408 -	max = MAX_PIRQS;
  73.409 -	if (ints[0] < MAX_PIRQS)
  73.410 -		max = ints[0];
  73.411 +	if (tmp_loaded == -1) {
  73.412 + 	 /* In the case of small number of heavy interrupt sources, 
  73.413 +	  * loading some of the cpus too much. We use Ingo's original 
  73.414 +	  * approach to rotate them around.
  73.415 +	  */
  73.416 +		if (!first_attempt && imbalance >= useful_load_threshold) {
  73.417 +			rotate_irqs_among_cpus(useful_load_threshold);
  73.418 +			return;
  73.419 +		}
  73.420 +		goto not_worth_the_effort;
  73.421 +	}
  73.422 +	
  73.423 +	first_attempt = 0;		/* heaviest search */
  73.424 +	max_cpu_irq = tmp_cpu_irq;	/* load */
  73.425 +	max_loaded = tmp_loaded;	/* processor */
  73.426 +	imbalance = (max_cpu_irq - min_cpu_irq) / 2;
  73.427 +	
  73.428 +	Dprintk("max_loaded cpu = %d\n", max_loaded);
  73.429 +	Dprintk("min_loaded cpu = %d\n", min_loaded);
  73.430 +	Dprintk("max_cpu_irq load = %ld\n", max_cpu_irq);
  73.431 +	Dprintk("min_cpu_irq load = %ld\n", min_cpu_irq);
  73.432 +	Dprintk("load imbalance = %lu\n", imbalance);
  73.433 +
  73.434 +	/* if imbalance is less than approx 10% of max load, then
  73.435 +	 * observe diminishing returns action. - quit
  73.436 +	 */
  73.437 +	if (imbalance < (max_cpu_irq >> 3)) {
  73.438 +		Dprintk("Imbalance too trivial\n");
  73.439 +		goto not_worth_the_effort;
  73.440 +	}
  73.441 +
  73.442 +tryanotherirq:
  73.443 +	/* if we select an IRQ to move that can't go where we want, then
  73.444 +	 * see if there is another one to try.
  73.445 +	 */
  73.446 +	move_this_load = 0;
  73.447 +	selected_irq = -1;
  73.448 +	for (j = 0; j < NR_IRQS; j++) {
  73.449 +		/* Is this an active IRQ? */
  73.450 +		if (!irq_desc[j].action)
  73.451 +			continue;
  73.452 +		if (imbalance <= IRQ_DELTA(max_loaded,j))
  73.453 +			continue;
  73.454 +		/* Try to find the IRQ that is closest to the imbalance
  73.455 +		 * without going over.
  73.456 +		 */
  73.457 +		if (move_this_load < IRQ_DELTA(max_loaded,j)) {
  73.458 +			move_this_load = IRQ_DELTA(max_loaded,j);
  73.459 +			selected_irq = j;
  73.460 +		}
  73.461 +	}
  73.462 +	if (selected_irq == -1) {
  73.463 +		goto tryanothercpu;
  73.464 +	}
  73.465  
  73.466 -	for (i = 0; i < max; i++) {
  73.467 -		printk(KERN_DEBUG "... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
  73.468 -		/*
  73.469 -		 * PIRQs are mapped upside down, usually.
  73.470 +	imbalance = move_this_load;
  73.471 +	
  73.472 +	/* For physical_balance case, we accumlated both load
  73.473 +	 * values in the one of the siblings cpu_irq[],
  73.474 +	 * to use the same code for physical and logical processors
  73.475 +	 * as much as possible. 
  73.476 +	 *
  73.477 +	 * NOTE: the cpu_irq[] array holds the sum of the load for
  73.478 +	 * sibling A and sibling B in the slot for the lowest numbered
  73.479 +	 * sibling (A), _AND_ the load for sibling B in the slot for
  73.480 +	 * the higher numbered sibling.
  73.481 +	 *
  73.482 +	 * We seek the least loaded sibling by making the comparison
  73.483 +	 * (A+B)/2 vs B
  73.484 +	 */
  73.485 +	load = CPU_IRQ(min_loaded) >> 1;
  73.486 +	for_each_cpu_mask(j, cpu_sibling_map[min_loaded]) {
  73.487 +		if (load > CPU_IRQ(j)) {
  73.488 +			/* This won't change cpu_sibling_map[min_loaded] */
  73.489 +			load = CPU_IRQ(j);
  73.490 +			min_loaded = j;
  73.491 +		}
  73.492 +	}
  73.493 +
  73.494 +	cpus_and(allowed_mask, cpu_online_map, irq_affinity[selected_irq]);
  73.495 +	target_cpu_mask = cpumask_of_cpu(min_loaded);
  73.496 +	cpus_and(tmp, target_cpu_mask, allowed_mask);
  73.497 +
  73.498 +	if (!cpus_empty(tmp)) {
  73.499 +		irq_desc_t *desc = irq_desc + selected_irq;
  73.500 +		unsigned long flags;
  73.501 +
  73.502 +		Dprintk("irq = %d moved to cpu = %d\n",
  73.503 +				selected_irq, min_loaded);
  73.504 +		/* mark for change destination */
  73.505 +		spin_lock_irqsave(&desc->lock, flags);
  73.506 +		pending_irq_balance_cpumask[selected_irq] =
  73.507 +					cpumask_of_cpu(min_loaded);
  73.508 +		spin_unlock_irqrestore(&desc->lock, flags);
  73.509 +		/* Since we made a change, come back sooner to 
  73.510 +		 * check for more variation.
  73.511  		 */
  73.512 -		pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
  73.513 +		balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
  73.514 +			balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);	
  73.515 +		return;
  73.516  	}
  73.517 -	return 1;
  73.518 +	goto tryanotherirq;
  73.519 +
  73.520 +not_worth_the_effort:
  73.521 +	/*
  73.522 +	 * if we did not find an IRQ to move, then adjust the time interval
  73.523 +	 * upward
  73.524 +	 */
  73.525 +	balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL,
  73.526 +		balanced_irq_interval + BALANCED_IRQ_MORE_DELTA);	
  73.527 +	Dprintk("IRQ worth rotating not found\n");
  73.528 +	return;
  73.529  }
  73.530  
  73.531 -__setup("pirq=", ioapic_pirq_setup);
  73.532 +static int balanced_irq(void *unused)
  73.533 +{
  73.534 +	int i;
  73.535 +	unsigned long prev_balance_time = jiffies;
  73.536 +	long time_remaining = balanced_irq_interval;
  73.537 +
  73.538 +	daemonize("kirqd");
  73.539 +	
  73.540 +	/* push everything to CPU 0 to give us a starting point.  */
  73.541 +	for (i = 0 ; i < NR_IRQS ; i++) {
  73.542 +		pending_irq_balance_cpumask[i] = cpumask_of_cpu(0);
  73.543 +	}
  73.544 +
  73.545 +	for ( ; ; ) {
  73.546 +		set_current_state(TASK_INTERRUPTIBLE);
  73.547 +		time_remaining = schedule_timeout(time_remaining);
  73.548 +		try_to_freeze(PF_FREEZE);
  73.549 +		if (time_after(jiffies,
  73.550 +				prev_balance_time+balanced_irq_interval)) {
  73.551 +			do_irq_balance();
  73.552 +			prev_balance_time = jiffies;
  73.553 +			time_remaining = balanced_irq_interval;
  73.554 +		}
  73.555 +	}
  73.556 +	return 0;
  73.557 +}
  73.558 +
  73.559 +static int __init balanced_irq_init(void)
  73.560 +{
  73.561 +	int i;
  73.562 +	struct cpuinfo_x86 *c;
  73.563 +	cpumask_t tmp;
  73.564 +
  73.565 +	cpus_shift_right(tmp, cpu_online_map, 2);
  73.566 +        c = &boot_cpu_data;
  73.567 +	/* When not overwritten by the command line ask subarchitecture. */
  73.568 +	if (irqbalance_disabled == IRQBALANCE_CHECK_ARCH)
  73.569 +		irqbalance_disabled = NO_BALANCE_IRQ;
  73.570 +	if (irqbalance_disabled)
  73.571 +		return 0;
  73.572 +	
  73.573 +	 /* disable irqbalance completely if there is only one processor online */
  73.574 +	if (num_online_cpus() < 2) {
  73.575 +		irqbalance_disabled = 1;
  73.576 +		return 0;
  73.577 +	}
  73.578 +	/*
  73.579 +	 * Enable physical balance only if more than 1 physical processor
  73.580 +	 * is present
  73.581 +	 */
  73.582 +	if (smp_num_siblings > 1 && !cpus_empty(tmp))
  73.583 +		physical_balance = 1;
  73.584  
  73.585 -#endif
  73.586 +	for (i = 0; i < NR_CPUS; i++) {
  73.587 +		if (!cpu_online(i))
  73.588 +			continue;
  73.589 +		irq_cpu_data[i].irq_delta = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
  73.590 +		irq_cpu_data[i].last_irq = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
  73.591 +		if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) {
  73.592 +			printk(KERN_ERR "balanced_irq_init: out of memory");
  73.593 +			goto failed;
  73.594 +		}
  73.595 +		memset(irq_cpu_data[i].irq_delta,0,sizeof(unsigned long) * NR_IRQS);
  73.596 +		memset(irq_cpu_data[i].last_irq,0,sizeof(unsigned long) * NR_IRQS);
  73.597 +	}
  73.598 +	
  73.599 +	printk(KERN_INFO "Starting balanced_irq\n");
  73.600 +	if (kernel_thread(balanced_irq, NULL, CLONE_KERNEL) >= 0) 
  73.601 +		return 0;
  73.602 +	else 
  73.603 +		printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
  73.604 +failed:
  73.605 +	for (i = 0; i < NR_CPUS; i++) {
  73.606 +		if(irq_cpu_data[i].irq_delta)
  73.607 +			kfree(irq_cpu_data[i].irq_delta);
  73.608 +		if(irq_cpu_data[i].last_irq)
  73.609 +			kfree(irq_cpu_data[i].last_irq);
  73.610 +	}
  73.611 +	return 0;
  73.612 +}
  73.613 +
  73.614 +int __init irqbalance_disable(char *str)
  73.615 +{
  73.616 +	irqbalance_disabled = 1;
  73.617 +	return 0;
  73.618 +}
  73.619 +
  73.620 +__setup("noirqbalance", irqbalance_disable);
  73.621 +
  73.622 +static inline void move_irq(int irq)
  73.623 +{
  73.624 +	/* note - we hold the desc->lock */
  73.625 +	if (unlikely(!cpus_empty(pending_irq_balance_cpumask[irq]))) {
  73.626 +		set_ioapic_affinity_irq(irq, pending_irq_balance_cpumask[irq]);
  73.627 +		cpus_clear(pending_irq_balance_cpumask[irq]);
  73.628 +	}
  73.629 +}
  73.630 +
  73.631 +late_initcall(balanced_irq_init);
  73.632 +
  73.633 +#else /* !CONFIG_IRQBALANCE */
  73.634 +static inline void move_irq(int irq) { }
  73.635 +#endif /* CONFIG_IRQBALANCE */
  73.636 +
  73.637 +#ifndef CONFIG_SMP
  73.638 +void fastcall send_IPI_self(int vector)
  73.639 +{
  73.640 +	unsigned int cfg;
  73.641 +
  73.642 +	/*
  73.643 +	 * Wait for idle.
  73.644 +	 */
  73.645 +	apic_wait_icr_idle();
  73.646 +	cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL;
  73.647 +	/*
  73.648 +	 * Send the IPI. The write to APIC_ICR fires this off.
  73.649 +	 */
  73.650 +	apic_write_around(APIC_ICR, cfg);
  73.651 +}
  73.652 +#endif /* !CONFIG_SMP */
  73.653  
  73.654  /*
  73.655   * Find the IRQ entry number of a certain pin.
  73.656   */
  73.657 -static int __init find_irq_entry(int apic, int pin, int type)
  73.658 +static int find_irq_entry(int apic, int pin, int type)
  73.659  {
  73.660  	int i;
  73.661  
  73.662 @@ -289,7 +685,7 @@ static int __init find_irq_entry(int api
  73.663  /*
  73.664   * Find the pin to which IRQ[irq] (ISA) is connected
  73.665   */
  73.666 -static int __init find_isa_irq_pin(int irq, int type)
  73.667 +static int find_isa_irq_pin(int irq, int type)
  73.668  {
  73.669  	int i;
  73.670  
  73.671 @@ -298,7 +694,9 @@ static int __init find_isa_irq_pin(int i
  73.672  
  73.673  		if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
  73.674  		     mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
  73.675 -		     mp_bus_id_to_type[lbus] == MP_BUS_MCA) &&
  73.676 +		     mp_bus_id_to_type[lbus] == MP_BUS_MCA ||
  73.677 +		     mp_bus_id_to_type[lbus] == MP_BUS_NEC98
  73.678 +		    ) &&
  73.679  		    (mp_irqs[i].mpc_irqtype == type) &&
  73.680  		    (mp_irqs[i].mpc_srcbusirq == irq))
  73.681  
  73.682 @@ -317,9 +715,9 @@ int IO_APIC_get_PCI_irq_vector(int bus, 
  73.683  {
  73.684  	int apic, i, best_guess = -1;
  73.685  
  73.686 -	Dprintk("querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
  73.687 -		bus, slot, pin);
  73.688 -	if ((mp_bus_id_to_pci_bus==NULL) || (mp_bus_id_to_pci_bus[bus] == -1)) {
  73.689 +	apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, "
  73.690 +		"slot:%d, pin:%d.\n", bus, slot, pin);
  73.691 +	if (mp_bus_id_to_pci_bus[bus] == -1) {
  73.692  		printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
  73.693  		return -1;
  73.694  	}
  73.695 @@ -354,15 +752,40 @@ int IO_APIC_get_PCI_irq_vector(int bus, 
  73.696  }
  73.697  
  73.698  /*
  73.699 + * This function currently is only a helper for the i386 smp boot process where 
  73.700 + * we need to reprogram the ioredtbls to cater for the cpus which have come online
  73.701 + * so mask in all cases should simply be TARGET_CPUS
  73.702 + */
  73.703 +void __init setup_ioapic_dest(void)
  73.704 +{
  73.705 +	int pin, ioapic, irq, irq_entry;
  73.706 +
  73.707 +	if (skip_ioapic_setup == 1)
  73.708 +		return;
  73.709 +
  73.710 +	for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
  73.711 +		for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
  73.712 +			irq_entry = find_irq_entry(ioapic, pin, mp_INT);
  73.713 +			if (irq_entry == -1)
  73.714 +				continue;
  73.715 +			irq = pin_2_irq(irq_entry, ioapic, pin);
  73.716 +			set_ioapic_affinity_irq(irq, TARGET_CPUS);
  73.717 +		}
  73.718 +
  73.719 +	}
  73.720 +}
  73.721 +
  73.722 +/*
  73.723   * EISA Edge/Level control register, ELCR
  73.724   */
  73.725 -static int __init EISA_ELCR(unsigned int irq)
  73.726 +static int EISA_ELCR(unsigned int irq)
  73.727  {
  73.728  	if (irq < 16) {
  73.729  		unsigned int port = 0x4d0 + (irq >> 3);
  73.730  		return (inb(port) >> (irq & 7)) & 1;
  73.731  	}
  73.732 -	printk(KERN_INFO "Broken MPtable reports ISA irq %d\n", irq);
  73.733 +	apic_printk(APIC_VERBOSE, KERN_INFO
  73.734 +			"Broken MPtable reports ISA irq %d\n", irq);
  73.735  	return 0;
  73.736  }
  73.737  
  73.738 @@ -392,6 +815,12 @@ static int __init EISA_ELCR(unsigned int
  73.739  #define default_MCA_trigger(idx)	(1)
  73.740  #define default_MCA_polarity(idx)	(0)
  73.741  
  73.742 +/* NEC98 interrupts are always polarity zero edge triggered,
  73.743 + * when listed as conforming in the MP table. */
  73.744 +
  73.745 +#define default_NEC98_trigger(idx)     (0)
  73.746 +#define default_NEC98_polarity(idx)    (0)
  73.747 +
  73.748  static int __init MPBIOS_polarity(int idx)
  73.749  {
  73.750  	int bus = mp_irqs[idx].mpc_srcbus;
  73.751 @@ -426,6 +855,11 @@ static int __init MPBIOS_polarity(int id
  73.752  					polarity = default_MCA_polarity(idx);
  73.753  					break;
  73.754  				}
  73.755 +				case MP_BUS_NEC98: /* NEC 98 pin */
  73.756 +				{
  73.757 +					polarity = default_NEC98_polarity(idx);
  73.758 +					break;
  73.759 +				}
  73.760  				default:
  73.761  				{
  73.762  					printk(KERN_WARNING "broken BIOS!!\n");
  73.763 @@ -461,7 +895,7 @@ static int __init MPBIOS_polarity(int id
  73.764  	return polarity;
  73.765  }
  73.766  
  73.767 -static int __init MPBIOS_trigger(int idx)
  73.768 +static int MPBIOS_trigger(int idx)
  73.769  {
  73.770  	int bus = mp_irqs[idx].mpc_srcbus;
  73.771  	int trigger;
  73.772 @@ -495,6 +929,11 @@ static int __init MPBIOS_trigger(int idx
  73.773  					trigger = default_MCA_trigger(idx);
  73.774  					break;
  73.775  				}
  73.776 +				case MP_BUS_NEC98: /* NEC 98 pin */
  73.777 +				{
  73.778 +					trigger = default_NEC98_trigger(idx);
  73.779 +					break;
  73.780 +				}
  73.781  				default:
  73.782  				{
  73.783  					printk(KERN_WARNING "broken BIOS!!\n");
  73.784 @@ -556,6 +995,7 @@ static int pin_2_irq(int idx, int apic, 
  73.785  		case MP_BUS_ISA: /* ISA pin */
  73.786  		case MP_BUS_EISA:
  73.787  		case MP_BUS_MCA:
  73.788 +		case MP_BUS_NEC98:
  73.789  		{
  73.790  			irq = mp_irqs[idx].mpc_srcbusirq;
  73.791  			break;
  73.792 @@ -569,6 +1009,13 @@ static int pin_2_irq(int idx, int apic, 
  73.793  			while (i < apic)
  73.794  				irq += nr_ioapic_registers[i++];
  73.795  			irq += pin;
  73.796 +
  73.797 +			/*
  73.798 +			 * For MPS mode, so far only needed by ES7000 platform
  73.799 +			 */
  73.800 +			if (ioapic_renumber_irq)
  73.801 +				irq = ioapic_renumber_irq(apic, irq);
  73.802 +
  73.803  			break;
  73.804  		}
  73.805  		default:
  73.806 @@ -579,20 +1026,6 @@ static int pin_2_irq(int idx, int apic, 
  73.807  		}
  73.808  	}
  73.809  
  73.810 -	/*
  73.811 -	 * PCI IRQ command line redirection. Yes, limits are hardcoded.
  73.812 -	 */
  73.813 -	if ((pin >= 16) && (pin <= 23)) {
  73.814 -		if (pirq_entries[pin-16] != -1) {
  73.815 -			if (!pirq_entries[pin-16]) {
  73.816 -				printk(KERN_DEBUG "disabling PIRQ%d\n", pin-16);
  73.817 -			} else {
  73.818 -				irq = pirq_entries[pin-16];
  73.819 -				printk(KERN_DEBUG "using PIRQ%d -> IRQ %d\n",
  73.820 -						pin-16, irq);
  73.821 -			}
  73.822 -		}
  73.823 -	}
  73.824  	return irq;
  73.825  }
  73.826  
  73.827 @@ -613,93 +1046,74 @@ static inline int IO_APIC_irq_trigger(in
  73.828  	return 0;
  73.829  }
  73.830  
  73.831 -int irq_vector[NR_IRQS] = { FIRST_DEVICE_VECTOR , 0 };
  73.832 -
  73.833 -#ifdef CONFIG_VMX
  73.834 -int vector_irq[256];
  73.835 -#endif
  73.836 +/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
  73.837 +u8 irq_vector[NR_IRQ_VECTORS] = { FIRST_DEVICE_VECTOR , 0 };
  73.838  
  73.839  int assign_irq_vector(int irq)
  73.840  {
  73.841  	static int current_vector = FIRST_DEVICE_VECTOR, offset = 0;
  73.842 -	if (IO_APIC_VECTOR(irq) > 0)
  73.843 +
  73.844 +	BUG_ON(irq >= NR_IRQ_VECTORS);
  73.845 +	if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0)
  73.846  		return IO_APIC_VECTOR(irq);
  73.847  next:
  73.848  	current_vector += 8;
  73.849  
  73.850 -        /* Skip the hypercall vector. */
  73.851 -	if (current_vector == HYPERCALL_VECTOR) goto next;
  73.852 -        /* Skip the Linux/BSD fast-trap vector. */
  73.853 -        if (current_vector == 0x80) goto next;
  73.854 +	/* Skip the hypercall vector. */
  73.855 +	if (current_vector == HYPERCALL_VECTOR)
  73.856 +		goto next;
  73.857  
  73.858 -	if (current_vector > FIRST_SYSTEM_VECTOR) {
  73.859 +	/* Skip the Linux/BSD fast-trap vector. */
  73.860 +	if (current_vector == 0x80)
  73.861 +		goto next;
  73.862 +
  73.863 +	if (current_vector >= FIRST_SYSTEM_VECTOR) {
  73.864  		offset++;
  73.865 +		if (!(offset%8))
  73.866 +			return -ENOSPC;
  73.867  		current_vector = FIRST_DEVICE_VECTOR + offset;
  73.868  	}
  73.869  
  73.870 -	if (current_vector == FIRST_SYSTEM_VECTOR)
  73.871 -		panic("ran out of interrupt sources!");
  73.872 +	vector_irq[current_vector] = irq;
  73.873 +	if (irq != AUTO_ASSIGN)
  73.874 +		IO_APIC_VECTOR(irq) = current_vector;
  73.875  
  73.876 -	IO_APIC_VECTOR(irq) = current_vector;
  73.877 -#ifdef CONFIG_VMX
  73.878 -        vector_irq[current_vector] = irq;
  73.879 -        printk("vector_irq[%x] = %d\n", current_vector, irq);
  73.880 -#endif
  73.881  	return current_vector;
  73.882  }
  73.883  
  73.884 -extern void (*interrupt[NR_IRQS])(void);
  73.885 +static struct hw_interrupt_type ioapic_level_type;
  73.886 +static struct hw_interrupt_type ioapic_edge_type;
  73.887  
  73.888 -/*
  73.889 - * Level and edge triggered IO-APIC interrupts need different handling,
  73.890 - * so we use two separate IRQ descriptors. Edge triggered IRQs can be
  73.891 - * handled with the level-triggered descriptor, but that one has slightly
  73.892 - * more overhead. Level-triggered interrupts cannot be handled with the
  73.893 - * edge-triggered handler, without risking IRQ storms and other ugly
  73.894 - * races.
  73.895 - */
  73.896 +#define IOAPIC_AUTO	-1
  73.897 +#define IOAPIC_EDGE	0
  73.898 +#define IOAPIC_LEVEL	1
  73.899  
  73.900 -static unsigned int startup_edge_ioapic_irq(unsigned int irq);
  73.901 -#define shutdown_edge_ioapic_irq  disable_edge_ioapic_irq
  73.902 -#define enable_edge_ioapic_irq    unmask_IO_APIC_irq
  73.903 -static void disable_edge_ioapic_irq (unsigned int irq);
  73.904 -static void ack_edge_ioapic_irq(unsigned int irq);
  73.905 -static void end_edge_ioapic_irq (unsigned int i);
  73.906 -static struct hw_interrupt_type ioapic_edge_irq_type = {
  73.907 -	"IO-APIC-edge",
  73.908 -	startup_edge_ioapic_irq,
  73.909 -	shutdown_edge_ioapic_irq,
  73.910 -	enable_edge_ioapic_irq,
  73.911 -	disable_edge_ioapic_irq,
  73.912 -	ack_edge_ioapic_irq,
  73.913 -	end_edge_ioapic_irq,
  73.914 -	set_ioapic_affinity,
  73.915 -};
  73.916 -
  73.917 -static unsigned int startup_level_ioapic_irq (unsigned int irq);
  73.918 -#define shutdown_level_ioapic_irq mask_IO_APIC_irq
  73.919 -#define enable_level_ioapic_irq   unmask_IO_APIC_irq
  73.920 -#define disable_level_ioapic_irq  mask_IO_APIC_irq
  73.921 -static void mask_and_ack_level_ioapic_irq (unsigned int irq);
  73.922 -static void end_level_ioapic_irq (unsigned int irq);
  73.923 -static struct hw_interrupt_type ioapic_level_irq_type = {
  73.924 -	"IO-APIC-level",
  73.925 -	startup_level_ioapic_irq,
  73.926 -	shutdown_level_ioapic_irq,
  73.927 -	enable_level_ioapic_irq,
  73.928 -	disable_level_ioapic_irq,
  73.929 -	mask_and_ack_level_ioapic_irq,
  73.930 -	end_level_ioapic_irq,
  73.931 -	set_ioapic_affinity,
  73.932 -};
  73.933 +static inline void ioapic_register_intr(int irq, int vector, unsigned long trigger)
  73.934 +{
  73.935 +	if (use_pci_vector() && !platform_legacy_irq(irq)) {
  73.936 +		if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
  73.937 +				trigger == IOAPIC_LEVEL)
  73.938 +			irq_desc[vector].handler = &ioapic_level_type;
  73.939 +		else
  73.940 +			irq_desc[vector].handler = &ioapic_edge_type;
  73.941 +		set_intr_gate(vector, interrupt[vector]);
  73.942 +	} else	{
  73.943 +		if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
  73.944 +				trigger == IOAPIC_LEVEL)
  73.945 +			irq_desc[irq].handler = &ioapic_level_type;
  73.946 +		else
  73.947 +			irq_desc[irq].handler = &ioapic_edge_type;
  73.948 +		set_intr_gate(vector, interrupt[irq]);
  73.949 +	}
  73.950 +}
  73.951  
  73.952  void __init setup_IO_APIC_irqs(void)
  73.953  {
  73.954  	struct IO_APIC_route_entry entry;
  73.955 -	int apic, pin, idx, irq, vector;
  73.956 +	int apic, pin, idx, irq, first_notcon = 1, vector;
  73.957  	unsigned long flags;
  73.958  
  73.959 -	printk(KERN_DEBUG "init IO_APIC IRQs\n");
  73.960 +	apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
  73.961  
  73.962  	for (apic = 0; apic < nr_ioapics; apic++) {
  73.963  	for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
  73.964 @@ -710,13 +1124,24 @@ void __init setup_IO_APIC_irqs(void)
  73.965  		memset(&entry,0,sizeof(entry));
  73.966  
  73.967  		entry.delivery_mode = INT_DELIVERY_MODE;
  73.968 -		entry.dest_mode = (INT_DEST_ADDR_MODE != 0);
  73.969 +		entry.dest_mode = INT_DEST_MODE;
  73.970  		entry.mask = 0;				/* enable IRQ */
  73.971 -		entry.dest.logical.logical_dest = target_cpus();
  73.972 +		entry.dest.logical.logical_dest = 
  73.973 +					cpu_mask_to_apicid(TARGET_CPUS);
  73.974  
  73.975  		idx = find_irq_entry(apic,pin,mp_INT);
  73.976 -		if (idx == -1)
  73.977 +		if (idx == -1) {
  73.978 +			if (first_notcon) {
  73.979 +				apic_printk(APIC_VERBOSE, KERN_DEBUG
  73.980 +						" IO-APIC (apicid-pin) %d-%d",
  73.981 +						mp_ioapics[apic].mpc_apicid,
  73.982 +						pin);
  73.983 +				first_notcon = 0;
  73.984 +			} else
  73.985 +				apic_printk(APIC_VERBOSE, ", %d-%d",
  73.986 +					mp_ioapics[apic].mpc_apicid, pin);
  73.987  			continue;
  73.988 +		}
  73.989  
  73.990  		entry.trigger = irq_trigger(idx);
  73.991  		entry.polarity = irq_polarity(idx);
  73.992 @@ -731,8 +1156,7 @@ void __init setup_IO_APIC_irqs(void)
  73.993  		 * skip adding the timer int on secondary nodes, which causes
  73.994  		 * a small but painful rift in the time-space continuum
  73.995  		 */
  73.996 -		if ((clustered_apic_mode == CLUSTERED_APIC_NUMAQ) 
  73.997 -			&& (apic != 0) && (irq == 0))
  73.998 +		if (multi_timer_check(apic, irq))
  73.999  			continue;
 73.1000  		else
 73.1001  			add_pin_to_irq(irq, apic, pin);
 73.1002 @@ -743,13 +1167,7 @@ void __init setup_IO_APIC_irqs(void)
 73.1003  		if (IO_APIC_IRQ(irq)) {
 73.1004  			vector = assign_irq_vector(irq);
 73.1005  			entry.vector = vector;
 73.1006 -
 73.1007 -			if (IO_APIC_irq_trigger(irq))
 73.1008 -				irq_desc[irq].handler = &ioapic_level_irq_type;
 73.1009 -			else
 73.1010 -				irq_desc[irq].handler = &ioapic_edge_irq_type;
 73.1011 -
 73.1012 -			set_intr_gate(vector, interrupt[irq]);
 73.1013 +			ioapic_register_intr(irq, vector, IOAPIC_AUTO);
 73.1014  		
 73.1015  			if (!apic && (irq < 16))
 73.1016  				disable_8259A_irq(irq);
 73.1017 @@ -760,11 +1178,13 @@ void __init setup_IO_APIC_irqs(void)
 73.1018  		spin_unlock_irqrestore(&ioapic_lock, flags);
 73.1019  	}
 73.1020  	}
 73.1021 +
 73.1022 +	if (!first_notcon)
 73.1023 +		apic_printk(APIC_VERBOSE, " not connected.\n");
 73.1024  }
 73.1025  
 73.1026  /*
 73.1027 - * Set up the 8259A-master output pin as broadcast to all
 73.1028 - * CPUs.
 73.1029 + * Set up the 8259A-master output pin:
 73.1030   */
 73.1031  void __init setup_ExtINT_IRQ0_pin(unsigned int pin, int vector)
 73.1032  {
 73.1033 @@ -782,9 +1202,9 @@ void __init setup_ExtINT_IRQ0_pin(unsign
 73.1034  	 * We use logical delivery to get the timer IRQ
 73.1035  	 * to the first CPU.
 73.1036  	 */
 73.1037 -	entry.dest_mode = (INT_DEST_ADDR_MODE != 0);
 73.1038 +	entry.dest_mode = INT_DEST_MODE;
 73.1039  	entry.mask = 0;					/* unmask IRQ now */
 73.1040 -	entry.dest.logical.logical_dest = target_cpus();
 73.1041 +	entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
 73.1042  	entry.delivery_mode = INT_DELIVERY_MODE;
 73.1043  	entry.polarity = 0;
 73.1044  	entry.trigger = 0;
 73.1045 @@ -794,7 +1214,7 @@ void __init setup_ExtINT_IRQ0_pin(unsign
 73.1046  	 * The timer IRQ doesn't have to know that behind the
 73.1047  	 * scene we have a 8259A-master in AEOI mode ...
 73.1048  	 */
 73.1049 -	irq_desc[0].handler = &ioapic_edge_irq_type;
 73.1050 +	irq_desc[0].handler = &ioapic_edge_type;
 73.1051  
 73.1052  	/*
 73.1053  	 * Add it to the IO-APIC irq-routing table:
 73.1054 @@ -807,23 +1227,23 @@ void __init setup_ExtINT_IRQ0_pin(unsign
 73.1055  	enable_8259A_irq(0);
 73.1056  }
 73.1057  
 73.1058 -void __init UNEXPECTED_IO_APIC(void)
 73.1059 +static inline void UNEXPECTED_IO_APIC(void)
 73.1060  {
 73.1061 -	printk(KERN_WARNING 
 73.1062 -		"An unexpected IO-APIC was found. If this kernel release is less than\n"
 73.1063 -		"three months old please report this to linux-smp@vger.kernel.org\n");
 73.1064  }
 73.1065  
 73.1066  void __init print_IO_APIC(void)
 73.1067  {
 73.1068 -#ifdef VERBOSE
 73.1069 +#ifndef NDEBUG
 73.1070  	int apic, i;
 73.1071 -	struct IO_APIC_reg_00 reg_00;
 73.1072 -	struct IO_APIC_reg_01 reg_01;
 73.1073 -	struct IO_APIC_reg_02 reg_02;
 73.1074 -	struct IO_APIC_reg_03 reg_03;
 73.1075 +	union IO_APIC_reg_00 reg_00;
 73.1076 +	union IO_APIC_reg_01 reg_01;
 73.1077 +	union IO_APIC_reg_02 reg_02;
 73.1078 +	union IO_APIC_reg_03 reg_03;
 73.1079  	unsigned long flags;
 73.1080  
 73.1081 +	if (apic_verbosity == APIC_QUIET)
 73.1082 +		return;
 73.1083 +
 73.1084   	printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
 73.1085  	for (i = 0; i < nr_ioapics; i++)
 73.1086  		printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
 73.1087 @@ -838,47 +1258,46 @@ void __init print_IO_APIC(void)
 73.1088  	for (apic = 0; apic < nr_ioapics; apic++) {
 73.1089  
 73.1090  	spin_lock_irqsave(&ioapic_lock, flags);
 73.1091 -	*(int *)&reg_00 = io_apic_read(apic, 0);
 73.1092 -	*(int *)&reg_01 = io_apic_read(apic, 1);
 73.1093 -	if (reg_01.version >= 0x10)
 73.1094 -		*(int *)&reg_02 = io_apic_read(apic, 2);
 73.1095 -	if (reg_01.version >= 0x20)
 73.1096 -		*(int *)&reg_03 = io_apic_read(apic, 3);
 73.1097 +	reg_00.raw = io_apic_read(apic, 0);
 73.1098 +	reg_01.raw = io_apic_read(apic, 1);
 73.1099 +	if (reg_01.bits.version >= 0x10)
 73.1100 +		reg_02.raw = io_apic_read(apic, 2);
 73.1101 +	if (reg_01.bits.version >= 0x20)
 73.1102 +		reg_03.raw = io_apic_read(apic, 3);
 73.1103  	spin_unlock_irqrestore(&ioapic_lock, flags);
 73.1104  
 73.1105 -	printk("\n");
 73.1106  	printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid);
 73.1107 -	printk(KERN_DEBUG ".... register #00: %08X\n", *(int *)&reg_00);
 73.1108 -	printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.ID);
 73.1109 -	printk(KERN_DEBUG ".......    : Delivery Type: %X\n", reg_00.delivery_type);
 73.1110 -	printk(KERN_DEBUG ".......    : LTS          : %X\n", reg_00.LTS);
 73.1111 -	if (reg_00.__reserved_0 || reg_00.__reserved_1 || reg_00.__reserved_2)
 73.1112 +	printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
 73.1113 +	printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.bits.ID);
 73.1114 +	printk(KERN_DEBUG ".......    : Delivery Type: %X\n", reg_00.bits.delivery_type);
 73.1115 +	printk(KERN_DEBUG ".......    : LTS          : %X\n", reg_00.bits.LTS);
 73.1116 +	if (reg_00.bits.ID >= get_physical_broadcast())
 73.1117 +		UNEXPECTED_IO_APIC();
 73.1118 +	if (reg_00.bits.__reserved_1 || reg_00.bits.__reserved_2)
 73.1119  		UNEXPECTED_IO_APIC();
 73.1120  
 73.1121 -	printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)&reg_01);
 73.1122 -	printk(KERN_DEBUG ".......     : max redirection entries: %04X\n", reg_01.entries);
 73.1123 -	if (	(reg_01.entries != 0x0f) && /* older (Neptune) boards */
 73.1124 -		(reg_01.entries != 0x17) && /* typical ISA+PCI boards */
 73.1125 -		(reg_01.entries != 0x1b) && /* Compaq Proliant boards */
 73.1126 -		(reg_01.entries != 0x1f) && /* dual Xeon boards */
 73.1127 -		(reg_01.entries != 0x22) && /* bigger Xeon boards */
 73.1128 -		(reg_01.entries != 0x2E) &&
 73.1129 -		(reg_01.entries != 0x3F)
 73.1130 +	printk(KERN_DEBUG ".... register #01: %08X\n", reg_01.raw);
 73.1131 +	printk(KERN_DEBUG ".......     : max redirection entries: %04X\n", reg_01.bits.entries);
 73.1132 +	if (	(reg_01.bits.entries != 0x0f) && /* older (Neptune) boards */
 73.1133 +		(reg_01.bits.entries != 0x17) && /* typical ISA+PCI boards */
 73.1134 +		(reg_01.bits.entries != 0x1b) && /* Compaq Proliant boards */
 73.1135 +		(reg_01.bits.entries != 0x1f) && /* dual Xeon boards */
 73.1136 +		(reg_01.bits.entries != 0x22) && /* bigger Xeon boards */
 73.1137 +		(reg_01.bits.entries != 0x2E) &&
 73.1138 +		(reg_01.bits.entries != 0x3F)
 73.1139  	)
 73.1140  		UNEXPECTED_IO_APIC();
 73.1141  
 73.1142 -	printk(KERN_DEBUG ".......     : PRQ implemented: %X\n", reg_01.PRQ);
 73.1143 -	printk(KERN_DEBUG ".......     : IO APIC version: %04X\n", reg_01.version);
 73.1144 -	if (	(reg_01.version != 0x01) && /* 82489DX IO-APICs */
 73.1145 -		(reg_01.version != 0x02) && /* VIA */
 73.1146 -		(reg_01.version != 0x03) && /* later VIA */
 73.1147 -		(reg_01.version != 0x10) && /* oldest IO-APICs */
 73.1148 -		(reg_01.version != 0x11) && /* Pentium/Pro IO-APICs */
 73.1149 -		(reg_01.version != 0x13) && /* Xeon IO-APICs */
 73.1150 -		(reg_01.version != 0x20)    /* Intel P64H (82806 AA) */
 73.1151 +	printk(KERN_DEBUG ".......     : PRQ implemented: %X\n", reg_01.bits.PRQ);
 73.1152 +	printk(KERN_DEBUG ".......     : IO APIC version: %04X\n", reg_01.bits.version);
 73.1153 +	if (	(reg_01.bits.version != 0x01) && /* 82489DX IO-APICs */
 73.1154 +		(reg_01.bits.version != 0x10) && /* oldest IO-APICs */
 73.1155 +		(reg_01.bits.version != 0x11) && /* Pentium/Pro IO-APICs */
 73.1156 +		(reg_01.bits.version != 0x13) && /* Xeon IO-APICs */
 73.1157 +		(reg_01.bits.version != 0x20)    /* Intel P64H (82806 AA) */
 73.1158  	)
 73.1159  		UNEXPECTED_IO_APIC();
 73.1160 -	if (reg_01.__reserved_1 || reg_01.__reserved_2)
 73.1161 +	if (reg_01.bits.__reserved_1 || reg_01.bits.__reserved_2)
 73.1162  		UNEXPECTED_IO_APIC();
 73.1163  
 73.1164  	/*
 73.1165 @@ -886,10 +1305,10 @@ void __init print_IO_APIC(void)
 73.1166  	 * but the value of reg_02 is read as the previous read register
 73.1167  	 * value, so ignore it if reg_02 == reg_01.
 73.1168  	 */
 73.1169 -	if (reg_01.version >= 0x10 && *(int *)&reg_02 != *(int *)&reg_01) {
 73.1170 -		printk(KERN_DEBUG ".... register #02: %08X\n", *(int *)&reg_02);
 73.1171 -		printk(KERN_DEBUG ".......     : arbitration: %02X\n", reg_02.arbitration);
 73.1172 -		if (reg_02.__reserved_1 || reg_02.__reserved_2)
 73.1173 +	if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) {
 73.1174 +		printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
 73.1175 +		printk(KERN_DEBUG ".......     : arbitration: %02X\n", reg_02.bits.arbitration);
 73.1176 +		if (reg_02.bits.__reserved_1 || reg_02.bits.__reserved_2)
 73.1177  			UNEXPECTED_IO_APIC();
 73.1178  	}
 73.1179  
 73.1180 @@ -898,11 +1317,11 @@ void __init print_IO_APIC(void)
 73.1181  	 * or reg_03, but the value of reg_0[23] is read as the previous read
 73.1182  	 * register value, so ignore it if reg_03 == reg_0[12].
 73.1183  	 */
 73.1184 -	if (reg_01.version >= 0x20 && *(int *)&reg_03 != *(int *)&reg_02 &&
 73.1185 -	    *(int *)&reg_03 != *(int *)&reg_01) {
 73.1186 -		printk(KERN_DEBUG ".... register #03: %08X\n", *(int *)&reg_03);
 73.1187 -		printk(KERN_DEBUG ".......     : Boot DT    : %X\n", reg_03.boot_DT);
 73.1188 -		if (reg_03.__reserved_1)
 73.1189 +	if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw &&
 73.1190 +	    reg_03.raw != reg_01.raw) {
 73.1191 +		printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw);
 73.1192 +		printk(KERN_DEBUG ".......     : Boot DT    : %X\n", reg_03.bits.boot_DT);
 73.1193 +		if (reg_03.bits.__reserved_1)
 73.1194  			UNEXPECTED_IO_APIC();
 73.1195  	}
 73.1196  
 73.1197 @@ -911,7 +1330,7 @@ void __init print_IO_APIC(void)
 73.1198  	printk(KERN_DEBUG " NR Log Phy Mask Trig IRR Pol"
 73.1199  			  " Stat Dest Deli Vect:   \n");
 73.1200  
 73.1201 -	for (i = 0; i <= reg_01.entries; i++) {
 73.1202 +	for (i = 0; i <= reg_01.bits.entries; i++) {
 73.1203  		struct IO_APIC_route_entry entry;
 73.1204  
 73.1205  		spin_lock_irqsave(&ioapic_lock, flags);
 73.1206 @@ -937,12 +1356,17 @@ void __init print_IO_APIC(void)
 73.1207  		);
 73.1208  	}
 73.1209  	}
 73.1210 +	if (use_pci_vector())
 73.1211 +		printk(KERN_INFO "Using vector-based indexing\n");
 73.1212  	printk(KERN_DEBUG "IRQ to pin mappings:\n");
 73.1213  	for (i = 0; i < NR_IRQS; i++) {
 73.1214  		struct irq_pin_list *entry = irq_2_pin + i;
 73.1215  		if (entry->pin < 0)
 73.1216  			continue;
 73.1217 -		printk(KERN_DEBUG "IRQ%d ", i);
 73.1218 + 		if (use_pci_vector() && !platform_legacy_irq(i))
 73.1219 +			printk(KERN_DEBUG "IRQ%d ", IO_APIC_VECTOR(i));
 73.1220 +		else
 73.1221 +			printk(KERN_DEBUG "IRQ%d ", i);
 73.1222  		for (;;) {
 73.1223  			printk("-> %d:%d", entry->apic, entry->pin);
 73.1224  			if (!entry->next)
 73.1225 @@ -953,152 +1377,13 @@ void __init print_IO_APIC(void)
 73.1226  	}
 73.1227  
 73.1228  	printk(KERN_INFO ".................................... done.\n");
 73.1229 -#endif
 73.1230 -}
 73.1231 -
 73.1232 -
 73.1233 -#if 0 /* Maybe useful for debugging, but not currently used anywhere. */
 73.1234 -
 73.1235 -static void print_APIC_bitfield (int base)
 73.1236 -{
 73.1237 -	unsigned int v;
 73.1238 -	int i, j;
 73.1239 -
 73.1240 -	printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG);
 73.1241 -	for (i = 0; i < 8; i++) {
 73.1242 -		v = apic_read(base + i*0x10);
 73.1243 -		for (j = 0; j < 32; j++) {
 73.1244 -			if (v & (1<<j))
 73.1245 -				printk("1");
 73.1246 -			else
 73.1247 -				printk("0");
 73.1248 -		}
 73.1249 -		printk("\n");
 73.1250 -	}
 73.1251 +#endif /* !NDEBUG */
 73.1252 +	return;
 73.1253  }
 73.1254  
 73.1255 -
 73.1256 -void /*__init*/ print_local_APIC(void * dummy)
 73.1257 -{
 73.1258 -	unsigned int v, ver, maxlvt;
 73.1259 -
 73.1260 -	printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
 73.1261 -		smp_processor_id(), hard_smp_processor_id());
 73.1262 -	v = apic_read(APIC_ID);
 73.1263 -	printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, GET_APIC_ID(v));
 73.1264 -	v = apic_read(APIC_LVR);
 73.1265 -	printk(KERN_INFO "... APIC VERSION: %08x\n", v);
 73.1266 -	ver = GET_APIC_VERSION(v);
 73.1267 -	maxlvt = get_maxlvt();
 73.1268 -
 73.1269 -	v = apic_read(APIC_TASKPRI);
 73.1270 -	printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
 73.1271 -
 73.1272 -	if (APIC_INTEGRATED(ver)) {			/* !82489DX */
 73.1273 -		v = apic_read(APIC_ARBPRI);
 73.1274 -		printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
 73.1275 -			v & APIC_ARBPRI_MASK);
 73.1276 -		v = apic_read(APIC_PROCPRI);
 73.1277 -		printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
 73.1278 -	}
 73.1279 -
 73.1280 -	v = apic_read(APIC_EOI);
 73.1281 -	printk(KERN_DEBUG "... APIC EOI: %08x\n", v);
 73.1282 -	v = apic_read(APIC_RRR);
 73.1283 -	printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
 73.1284 -	v = apic_read(APIC_LDR);
 73.1285 -	printk(KERN_DEBUG "... APIC LDR: %08x\n", v);
 73.1286 -	v = apic_read(APIC_DFR);
 73.1287 -	printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
 73.1288 -	v = apic_read(APIC_SPIV);
 73.1289 -	printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
 73.1290 -
 73.1291 -	printk(KERN_DEBUG "... APIC ISR field:\n");
 73.1292 -	print_APIC_bitfield(APIC_ISR);
 73.1293 -	printk(KERN_DEBUG "... APIC TMR field:\n");
 73.1294 -	print_APIC_bitfield(APIC_TMR);
 73.1295 -	printk(KERN_DEBUG "... APIC IRR field:\n");
 73.1296 -	print_APIC_bitfield(APIC_IRR);
 73.1297 -
 73.1298 -	if (APIC_INTEGRATED(ver)) {		/* !82489DX */
 73.1299 -		if (maxlvt > 3)		/* Due to the Pentium erratum 3AP. */
 73.1300 -			apic_write(APIC_ESR, 0);
 73.1301 -		v = apic_read(APIC_ESR);
 73.1302 -		printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
 73.1303 -	}
 73.1304 -
 73.1305 -	v = apic_read(APIC_ICR);
 73.1306 -	printk(KERN_DEBUG "... APIC ICR: %08x\n", v);
 73.1307 -	v = apic_read(APIC_ICR2);
 73.1308 -	printk(KERN_DEBUG "... APIC ICR2: %08x\n", v);
 73.1309 -
 73.1310 -	v = apic_read(APIC_LVTT);
 73.1311 -	printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
 73.1312 -
 73.1313 -	if (maxlvt > 3) {                       /* PC is LVT#4. */
 73.1314 -		v = apic_read(APIC_LVTPC);
 73.1315 -		printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v);
 73.1316 -	}
 73.1317 -	v = apic_read(APIC_LVT0);
 73.1318 -	printk(KERN_DEBUG "... APIC LVT0: %08x\n", v);
 73.1319 -	v = apic_read(APIC_LVT1);
 73.1320 -	printk(KERN_DEBUG "... APIC LVT1: %08x\n", v);
 73.1321 -
 73.1322 -	if (maxlvt > 2) {			/* ERR is LVT#3. */
 73.1323 -		v = apic_read(APIC_LVTERR);
 73.1324 -		printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v);
 73.1325 -	}
 73.1326 -
 73.1327 -	v = apic_read(APIC_TMICT);
 73.1328 -	printk(KERN_DEBUG "... APIC TMICT: %08x\n", v);
 73.1329 -	v = apic_read(APIC_TMCCT);
 73.1330 -	printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
 73.1331 -	v = apic_read(APIC_TDCR);
 73.1332 -	printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
 73.1333 -	printk("\n");
 73.1334 -}
 73.1335 -
 73.1336 -void print_all_local_APICs (void)
 73.1337 -{
 73.1338 -	smp_call_function(print_local_APIC, NULL, 1, 1);
 73.1339 -	print_local_APIC(NULL);
 73.1340 -}
 73.1341 -
 73.1342 -void /*__init*/ print_PIC(void)
 73.1343 -{
 73.1344 -	extern spinlock_t i8259A_lock;
 73.1345 -	unsigned int v, flags;
 73.1346 -
 73.1347 -	printk(KERN_DEBUG "\nprinting PIC contents\n");
 73.1348 -
 73.1349 -	spin_lock_irqsave(&i8259A_lock, flags);
 73.1350 -
 73.1351 -	v = inb(0xa1) << 8 | inb(0x21);
 73.1352 -	printk(KERN_DEBUG "... PIC  IMR: %04x\n", v);
 73.1353 -
 73.1354 -	v = inb(0xa0) << 8 | inb(0x20);
 73.1355 -	printk(KERN_DEBUG "... PIC  IRR: %04x\n", v);
 73.1356 -
 73.1357 -	outb(0x0b,0xa0);
 73.1358 -	outb(0x0b,0x20);
 73.1359 -	v = inb(0xa0) << 8 | inb(0x20);
 73.1360 -	outb(0x0a,0xa0);
 73.1361 -	outb(0x0a,0x20);
 73.1362 -
 73.1363 -	spin_unlock_irqrestore(&i8259A_lock, flags);
 73.1364 -
 73.1365 -	printk(KERN_DEBUG "... PIC  ISR: %04x\n", v);
 73.1366 -
 73.1367 -	v = inb(0x4d1) << 8 | inb(0x4d0);
 73.1368 -	printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
 73.1369 -}
 73.1370 -
 73.1371 -#endif /* 0 */
 73.1372 -
 73.1373 -
 73.1374  static void __init enable_IO_APIC(void)
 73.1375  {
 73.1376 -	struct IO_APIC_reg_01 reg_01;
 73.1377 +	union IO_APIC_reg_01 reg_01;
 73.1378  	int i;
 73.1379  	unsigned long flags;
 73.1380  
 73.1381 @@ -1106,18 +1391,15 @@ static void __init enable_IO_APIC(void)
 73.1382  		irq_2_pin[i].pin = -1;
 73.1383  		irq_2_pin[i].next = 0;
 73.1384  	}
 73.1385 -	if (!pirqs_enabled)
 73.1386 -		for (i = 0; i < MAX_PIRQS; i++)
 73.1387 -			pirq_entries[i] = -1;
 73.1388  
 73.1389  	/*
 73.1390  	 * The number of IO-APIC IRQ registers (== #pins):
 73.1391  	 */
 73.1392  	for (i = 0; i < nr_ioapics; i++) {
 73.1393  		spin_lock_irqsave(&ioapic_lock, flags);
 73.1394 -		*(int *)&reg_01 = io_apic_read(i, 1);
 73.1395 +		reg_01.raw = io_apic_read(i, 1);
 73.1396  		spin_unlock_irqrestore(&ioapic_lock, flags);
 73.1397 -		nr_ioapic_registers[i] = reg_01.entries+1;
 73.1398 +		nr_ioapic_registers[i] = reg_01.bits.entries+1;
 73.1399  	}
 73.1400  
 73.1401  	/*
 73.1402 @@ -1146,18 +1428,22 @@ void disable_IO_APIC(void)
 73.1403   * by Matt Domsch <Matt_Domsch@dell.com>  Tue Dec 21 12:25:05 CST 1999
 73.1404   */
 73.1405  
 73.1406 -static void __init setup_ioapic_ids_from_mpc (void)
 73.1407 +#ifndef CONFIG_X86_NUMAQ
 73.1408 +static void __init setup_ioapic_ids_from_mpc(void)
 73.1409  {
 73.1410 -	struct IO_APIC_reg_00 reg_00;
 73.1411 -	unsigned long phys_id_present_map = phys_cpu_present_map;
 73.1412 +	union IO_APIC_reg_00 reg_00;
 73.1413 +	physid_mask_t phys_id_present_map;
 73.1414  	int apic;
 73.1415  	int i;
 73.1416  	unsigned char old_id;
 73.1417  	unsigned long flags;
 73.1418  
 73.1419 -	if (clustered_apic_mode)
 73.1420 -		/* We don't have a good way to do this yet - hack */
 73.1421 -		phys_id_present_map = (u_long) 0xf;
 73.1422 +	/*
 73.1423 +	 * This is broken; anything with a real cpu count has to
 73.1424 +	 * circumvent this idiocy regardless.
 73.1425 +	 */
 73.1426 +	phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map);
 73.1427 +
 73.1428  	/*
 73.1429  	 * Set the IOAPIC ID to the value stored in the MPC table.
 73.1430  	 */
 73.1431 @@ -1165,41 +1451,48 @@ static void __init setup_ioapic_ids_from
 73.1432  
 73.1433  		/* Read the register 0 value */
 73.1434  		spin_lock_irqsave(&ioapic_lock, flags);
 73.1435 -		*(int *)&reg_00 = io_apic_read(apic, 0);
 73.1436 +		reg_00.raw = io_apic_read(apic, 0);
 73.1437  		spin_unlock_irqrestore(&ioapic_lock, flags);
 73.1438  		
 73.1439  		old_id = mp_ioapics[apic].mpc_apicid;
 73.1440  
 73.1441 -		if (mp_ioapics[apic].mpc_apicid >= apic_broadcast_id) {
 73.1442 +		if (mp_ioapics[apic].mpc_apicid >= get_physical_broadcast()) {
 73.1443  			printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
 73.1444  				apic, mp_ioapics[apic].mpc_apicid);
 73.1445  			printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
 73.1446 -				reg_00.ID);
 73.1447 -			mp_ioapics[apic].mpc_apicid = reg_00.ID;
 73.1448 +				reg_00.bits.ID);
 73.1449 +			mp_ioapics[apic].mpc_apicid = reg_00.bits.ID;
 73.1450  		}
 73.1451  
 73.1452 +		/* Don't check I/O APIC IDs for some xAPIC systems.  They have
 73.1453 +		 * no meaning without the serial APIC bus. */
 73.1454 +		if (NO_IOAPIC_CHECK)
 73.1455 +			continue;
 73.1456  		/*
 73.1457  		 * Sanity check, is the ID really free? Every APIC in a
 73.1458  		 * system must have a unique ID or we get lots of nice
 73.1459  		 * 'stuck on smp_invalidate_needed IPI wait' messages.
 73.1460 -		 * I/O APIC IDs no longer have any meaning for xAPICs and SAPICs.
 73.1461  		 */
 73.1462 -		if ((clustered_apic_mode != CLUSTERED_APIC_XAPIC) &&
 73.1463 -		    (phys_id_present_map & (1 << mp_ioapics[apic].mpc_apicid))) {
 73.1464 +		if (check_apicid_used(phys_id_present_map,
 73.1465 +					mp_ioapics[apic].mpc_apicid)) {
 73.1466  			printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
 73.1467  				apic, mp_ioapics[apic].mpc_apicid);
 73.1468 -			for (i = 0; i < 0xf; i++)
 73.1469 -				if (!(phys_id_present_map & (1 << i)))
 73.1470 +			for (i = 0; i < get_physical_broadcast(); i++)
 73.1471 +				if (!physid_isset(i, phys_id_present_map))
 73.1472  					break;
 73.1473 -			if (i >= apic_broadcast_id)
 73.1474 +			if (i >= get_physical_broadcast())
 73.1475  				panic("Max APIC ID exceeded!\n");
 73.1476  			printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
 73.1477  				i);
 73.1478 -			phys_id_present_map |= 1 << i;
 73.1479 +			physid_set(i, phys_id_present_map);
 73.1480  			mp_ioapics[apic].mpc_apicid = i;
 73.1481  		} else {
 73.1482 -			printk("Setting %d in the phys_id_present_map\n", mp_ioapics[apic].mpc_apicid);
 73.1483 -			phys_id_present_map |= 1 << mp_ioapics[apic].mpc_apicid;
 73.1484 +			physid_mask_t tmp;
 73.1485 +			tmp = apicid_to_cpu_present(mp_ioapics[apic].mpc_apicid);
 73.1486 +			apic_printk(APIC_VERBOSE, "Setting %d in the "
 73.1487 +					"phys_id_present_map\n",
 73.1488 +					mp_ioapics[apic].mpc_apicid);
 73.1489 +			physids_or(phys_id_present_map, phys_id_present_map, tmp);
 73.1490  		}
 73.1491  
 73.1492  
 73.1493 @@ -1217,26 +1510,30 @@ static void __init setup_ioapic_ids_from
 73.1494  		 * Read the right value from the MPC table and
 73.1495  		 * write it into the ID register.
 73.1496  	 	 */
 73.1497 -		printk(KERN_INFO "...changing IO-APIC physical APIC ID to %d ...",
 73.1498 -					mp_ioapics[apic].mpc_apicid);
 73.1499 +		apic_printk(APIC_VERBOSE, KERN_INFO
 73.1500 +			"...changing IO-APIC physical APIC ID to %d ...",
 73.1501 +			mp_ioapics[apic].mpc_apicid);
 73.1502  
 73.1503 -		reg_00.ID = mp_ioapics[apic].mpc_apicid;
 73.1504 +		reg_00.bits.ID = mp_ioapics[apic].mpc_apicid;
 73.1505  		spin_lock_irqsave(&ioapic_lock, flags);
 73.1506 -		io_apic_write(apic, 0, *(int *)&reg_00);
 73.1507 +		io_apic_write(apic, 0, reg_00.raw);
 73.1508  		spin_unlock_irqrestore(&ioapic_lock, flags);
 73.1509  
 73.1510  		/*
 73.1511  		 * Sanity check
 73.1512  		 */
 73.1513  		spin_lock_irqsave(&ioapic_lock, flags);
 73.1514 -		*(int *)&reg_00 = io_apic_read(apic, 0);
 73.1515 +		reg_00.raw = io_apic_read(apic, 0);
 73.1516  		spin_unlock_irqrestore(&ioapic_lock, flags);
 73.1517 -		if (reg_00.ID != mp_ioapics[apic].mpc_apicid)
 73.1518 -			panic("could not set ID!\n");
 73.1519 +		if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid)
 73.1520 +			printk("could not set ID!\n");
 73.1521  		else
 73.1522 -			printk(" ok.\n");
 73.1523 +			apic_printk(APIC_VERBOSE, " ok.\n");
 73.1524  	}
 73.1525  }
 73.1526 +#else
 73.1527 +static void __init setup_ioapic_ids_from_mpc(void) { }
 73.1528 +#endif
 73.1529  
 73.1530  /*
 73.1531   * There is a nasty bug in some older SMP boards, their mptable lies
 73.1532 @@ -1248,9 +1545,9 @@ static void __init setup_ioapic_ids_from
 73.1533   */
 73.1534  static int __init timer_irq_works(void)
 73.1535  {
 73.1536 -	unsigned int t1 = jiffies;
 73.1537 +	unsigned long t1 = jiffies;
 73.1538  
 73.1539 -	__sti();
 73.1540 +	local_irq_enable();
 73.1541  	/* Let ten ticks pass... */
 73.1542  	mdelay((10 * 1000) / HZ);
 73.1543  
 73.1544 @@ -1267,7 +1564,18 @@ static int __init timer_irq_works(void)
 73.1545  	return 0;
 73.1546  }
 73.1547  
 73.1548 -static void disable_edge_ioapic_irq (unsigned int irq) { /* nothing */ }
 73.1549 +/*
 73.1550 + * In the SMP+IOAPIC case it might happen that there are an unspecified
 73.1551 + * number of pending IRQ events unhandled. These cases are very rare,
 73.1552 + * so we 'resend' these IRQs via IPIs, to the same CPU. It's much
 73.1553 + * better to do it this way as thus we do not have to be aware of
 73.1554 + * 'pending' interrupts in the IRQ path, except at this point.
 73.1555 + */
 73.1556 +/*
 73.1557 + * Edge triggered needs to resend any interrupt
 73.1558 + * that was delayed but this is now handled in the device
 73.1559 + * independent code.
 73.1560 + */
 73.1561  
 73.1562  /*
 73.1563   * Starting up a edge-triggered IO-APIC interrupt is
 73.1564 @@ -1278,7 +1586,6 @@ static void disable_edge_ioapic_irq (uns
 73.1565   * This is not complete - we should be able to fake
 73.1566   * an edge even if it isn't on the 8259A...
 73.1567   */
 73.1568 -
 73.1569  static unsigned int startup_edge_ioapic_irq(unsigned int irq)
 73.1570  {
 73.1571  	int was_pending = 0;
 73.1572 @@ -1303,16 +1610,13 @@ static unsigned int startup_edge_ioapic_
 73.1573   */
 73.1574  static void ack_edge_ioapic_irq(unsigned int irq)
 73.1575  {
 73.1576 -	balance_irq(irq);
 73.1577 +	move_irq(irq);
 73.1578  	if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED))
 73.1579  					== (IRQ_PENDING | IRQ_DISABLED))
 73.1580  		mask_IO_APIC_irq(irq);
 73.1581  	ack_APIC_irq();
 73.1582  }
 73.1583  
 73.1584 -static void end_edge_ioapic_irq (unsigned int i) { /* nothing */ }
 73.1585 -
 73.1586 -
 73.1587  /*
 73.1588   * Level triggered interrupts can just be masked,
 73.1589   * and shutting down and starting up the interrupt
 73.1590 @@ -1334,15 +1638,12 @@ static unsigned int startup_level_ioapic
 73.1591  	return 0; /* don't check for pending */
 73.1592  }
 73.1593  
 73.1594 -static void mask_and_ack_level_ioapic_irq(unsigned int irq)
 73.1595 +static void end_level_ioapic_irq (unsigned int irq)
 73.1596  {
 73.1597  	unsigned long v;
 73.1598  	int i;
 73.1599  
 73.1600 -	balance_irq(irq);
 73.1601 -
 73.1602 -	mask_IO_APIC_irq(irq);
 73.1603 -
 73.1604 +	move_irq(irq);
 73.1605  /*
 73.1606   * It appears there is an erratum which affects at least version 0x11
 73.1607   * of I/O APIC (that's the 82093AA and cores integrated into various
 73.1608 @@ -1363,45 +1664,102 @@ static void mask_and_ack_level_ioapic_ir
 73.1609   * The idea is from Manfred Spraul.  --macro
 73.1610   */
 73.1611  	i = IO_APIC_VECTOR(irq);
 73.1612 +
 73.1613  	v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
 73.1614  
 73.1615  	ack_APIC_irq();
 73.1616  
 73.1617  	if (!(v & (1 << (i & 0x1f)))) {
 73.1618 -#ifdef APIC_LOCKUP_DEBUG
 73.1619 -		struct irq_pin_list *entry;
 73.1620 -#endif
 73.1621 -
 73.1622 -#ifdef APIC_MISMATCH_DEBUG
 73.1623  		atomic_inc(&irq_mis_count);
 73.1624 -#endif
 73.1625  		spin_lock(&ioapic_lock);
 73.1626 -		__edge_IO_APIC_irq(irq);
 73.1627 -#ifdef APIC_LOCKUP_DEBUG
 73.1628 -		for (entry = irq_2_pin + irq;;) {
 73.1629 -			unsigned int reg;
 73.1630 -
 73.1631 -			if (entry->pin == -1)
 73.1632 -				break;
 73.1633 -			reg = io_apic_read(entry->apic, 0x10 + entry->pin * 2);
 73.1634 -			if (reg & 0x00004000)
 73.1635 -				printk(KERN_CRIT "Aieee!!!  Remote IRR"
 73.1636 -					" still set after unlock!\n");
 73.1637 -			if (!entry->next)
 73.1638 -				break;
 73.1639 -			entry = irq_2_pin + entry->next;
 73.1640 -		}
 73.1641 -#endif
 73.1642 -		__level_IO_APIC_irq(irq);
 73.1643 +		__mask_and_edge_IO_APIC_irq(irq);
 73.1644 +		__unmask_and_level_IO_APIC_irq(irq);
 73.1645  		spin_unlock(&ioapic_lock);
 73.1646  	}
 73.1647  }
 73.1648  
 73.1649 -static void end_level_ioapic_irq(unsigned int irq)
 73.1650 +#ifdef CONFIG_PCI_MSI
 73.1651 +static unsigned int startup_edge_ioapic_vector(unsigned int vector)
 73.1652 +{
 73.1653 +	int irq = vector_to_irq(vector);
 73.1654 +
 73.1655 +	return startup_edge_ioapic_irq(irq);
 73.1656 +}
 73.1657 +
 73.1658 +static void ack_edge_ioapic_vector(unsigned int vector)
 73.1659 +{
 73.1660 +	int irq = vector_to_irq(vector);
 73.1661 +
 73.1662 +	ack_edge_ioapic_irq(irq);
 73.1663 +}
 73.1664 +
 73.1665 +static unsigned int startup_level_ioapic_vector (unsigned int vector)
 73.1666  {
 73.1667 +	int irq = vector_to_irq(vector);
 73.1668 +
 73.1669 +	return startup_level_ioapic_irq (irq);
 73.1670 +}
 73.1671 +
 73.1672 +static void end_level_ioapic_vector (unsigned int vector)
 73.1673 +{
 73.1674 +	int irq = vector_to_irq(vector);
 73.1675 +
 73.1676 +	end_level_ioapic_irq(irq);
 73.1677 +}
 73.1678 +
 73.1679 +static void mask_IO_APIC_vector (unsigned int vector)
 73.1680 +{
 73.1681 +	int irq = vector_to_irq(vector);
 73.1682 +
 73.1683 +	mask_IO_APIC_irq(irq);
 73.1684 +}
 73.1685 +
 73.1686 +static void unmask_IO_APIC_vector (unsigned int vector)
 73.1687 +{
 73.1688 +	int irq = vector_to_irq(vector);
 73.1689 +
 73.1690  	unmask_IO_APIC_irq(irq);
 73.1691  }
 73.1692  
 73.1693 +static void set_ioapic_affinity_vector (unsigned int vector,
 73.1694 +					cpumask_t cpu_mask)
 73.1695 +{
 73.1696 +	int irq = vector_to_irq(vector);
 73.1697 +
 73.1698 +	set_ioapic_affinity_irq(irq, cpu_mask);
 73.1699 +}
 73.1700 +#endif
 73.1701 +
 73.1702 +/*
 73.1703 + * Level and edge triggered IO-APIC interrupts need different handling,
 73.1704 + * so we use two separate IRQ descriptors. Edge triggered IRQs can be
 73.1705 + * handled with the level-triggered descriptor, but that one has slightly
 73.1706 + * more overhead. Level-triggered interrupts cannot be handled with the
 73.1707 + * edge-triggered handler, without risking IRQ storms and other ugly
 73.1708 + * races.
 73.1709 + */
 73.1710 +static struct hw_interrupt_type ioapic_edge_type = {
 73.1711 +	.typename 	= "IO-APIC-edge",
 73.1712 +	.startup 	= startup_edge_ioapic,
 73.1713 +	.shutdown 	= shutdown_edge_ioapic,
 73.1714 +	.enable 	= enable_edge_ioapic,
 73.1715 +	.disable 	= disable_edge_ioapic,
 73.1716 +	.ack 		= ack_edge_ioapic,
 73.1717 +	.end 		= end_edge_ioapic,
 73.1718 +	.set_affinity 	= set_ioapic_affinity,
 73.1719 +};
 73.1720 +
 73.1721 +static struct hw_interrupt_type ioapic_level_type = {
 73.1722 +	.typename 	= "IO-APIC-level",
 73.1723 +	.startup 	= startup_level_ioapic,
 73.1724 +	.shutdown 	= shutdown_level_ioapic,
 73.1725 +	.enable 	= enable_level_ioapic,
 73.1726 +	.disable 	= disable_level_ioapic,
 73.1727 +	.ack 		= mask_and_ack_level_ioapic,
 73.1728 +	.end 		= end_level_ioapic,
 73.1729 +	.set_affinity 	= set_ioapic_affinity,
 73.1730 +};
 73.1731 +
 73.1732  static inline void init_IO_APIC_traps(void)
 73.1733  {
 73.1734  	int irq;
 73.1735 @@ -1418,7 +1776,13 @@ static inline void init_IO_APIC_traps(vo
 73.1736  	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
 73.1737  	 */
 73.1738  	for (irq = 0; irq < NR_IRQS ; irq++) {
 73.1739 -		if (IO_APIC_IRQ(irq) && !IO_APIC_VECTOR(irq)) {
 73.1740 +		int tmp = irq;
 73.1741 +		if (use_pci_vector()) {
 73.1742 +			if (!platform_legacy_irq(tmp))
 73.1743 +				if ((tmp = vector_to_irq(tmp)) == -1)
 73.1744 +					continue;
 73.1745 +		}
 73.1746 +		if (IO_APIC_IRQ(tmp) && !IO_APIC_VECTOR(tmp)) {
 73.1747  			/*
 73.1748  			 * Hmm.. We don't have an entry for this,
 73.1749  			 * so default to an old-fashioned 8259
 73.1750 @@ -1457,15 +1821,35 @@ static void ack_lapic_irq (unsigned int 
 73.1751  static void end_lapic_irq (unsigned int i) { /* nothing */ }
 73.1752  
 73.1753  static struct hw_interrupt_type lapic_irq_type = {
 73.1754 -	"local-APIC-edge",
 73.1755 -	NULL, /* startup_irq() not used for IRQ0 */
 73.1756 -	NULL, /* shutdown_irq() not used for IRQ0 */
 73.1757 -	enable_lapic_irq,
 73.1758 -	disable_lapic_irq,
 73.1759 -	ack_lapic_irq,
 73.1760 -	end_lapic_irq
 73.1761 +	.typename 	= "local-APIC-edge",
 73.1762 +	.startup 	= NULL, /* startup_irq() not used for IRQ0 */
 73.1763 +	.shutdown 	= NULL, /* shutdown_irq() not used for IRQ0 */
 73.1764 +	.enable 	= enable_lapic_irq,
 73.1765 +	.disable 	= disable_lapic_irq,
 73.1766 +	.ack 		= ack_lapic_irq,
 73.1767 +	.end 		= end_lapic_irq
 73.1768  };
 73.1769  
 73.1770 +#if 0
 73.1771 +static void setup_nmi (void)
 73.1772 +{
 73.1773 +	/*
 73.1774 + 	 * Dirty trick to enable the NMI watchdog ...
 73.1775 +	 * We put the 8259A master into AEOI mode and
 73.1776 +	 * unmask on all local APICs LVT0 as NMI.
 73.1777 +	 *
 73.1778 +	 * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
 73.1779 +	 * is from Maciej W. Rozycki - so we do not have to EOI from
 73.1780 +	 * the NMI handler or the timer interrupt.
 73.1781 +	 */ 
 73.1782 +	apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
 73.1783 +
 73.1784 +	on_each_cpu(enable_NMI_through_LVT0, NULL, 1, 1);
 73.1785 +
 73.1786 +	apic_printk(APIC_VERBOSE, " done.\n");
 73.1787 +}
 73.1788 +#endif
 73.1789 +
 73.1790  /*
 73.1791   * This looks a bit hackish but it's about the only one way of sending
 73.1792   * a few INTA cycles to 8259As and any associated glue logic.  ICR does
 73.1793 @@ -1536,7 +1920,6 @@ static inline void unlock_ExtINT_logic(v
 73.1794   */
 73.1795  static inline void check_timer(void)
 73.1796  {
 73.1797 -	extern int timer_ack;
 73.1798  	int pin1, pin2;
 73.1799  	int vector;
 73.1800  
 73.1801 @@ -1569,8 +1952,17 @@ static inline void check_timer(void)
 73.1802  		 * Ok, does IRQ0 through the IOAPIC work?
 73.1803  		 */
 73.1804  		unmask_IO_APIC_irq(0);
 73.1805 -		if (timer_irq_works())
 73.1806 +		if (timer_irq_works()) {
 73.1807 +#if 0
 73.1808 +			if (nmi_watchdog == NMI_IO_APIC) {
 73.1809 +				disable_8259A_irq(0);
 73.1810 +				setup_nmi();
 73.1811 +				enable_8259A_irq(0);
 73.1812 +				check_nmi_watchdog();
 73.1813 +			}
 73.1814 +#endif
 73.1815  			return;
 73.1816 +		}
 73.1817  		clear_IO_APIC_pin(0, pin1);
 73.1818  		printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n");
 73.1819  	}
 73.1820 @@ -1588,6 +1980,12 @@ static inline void check_timer(void)
 73.1821  				replace_pin_at_irq(0, 0, pin1, 0, pin2);
 73.1822  			else
 73.1823  				add_pin_to_irq(0, 0, pin2);
 73.1824 +#if 0
 73.1825 +			if (nmi_watchdog == NMI_IO_APIC) {
 73.1826 +				setup_nmi();
 73.1827 +				check_nmi_watchdog();
 73.1828 +			}
 73.1829 +#endif
 73.1830  			return;
 73.1831  		}
 73.1832  		/*
 73.1833 @@ -1597,6 +1995,11 @@ static inline void check_timer(void)
 73.1834  	}
 73.1835  	printk(" failed.\n");
 73.1836  
 73.1837 +	if (nmi_watchdog == NMI_IO_APIC) {
 73.1838 +		printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n");
 73.1839 +		nmi_watchdog = 0;
 73.1840 +	}
 73.1841 +
 73.1842  	printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
 73.1843  
 73.1844  	disable_8259A_irq(0);
 73.1845 @@ -1613,6 +2016,7 @@ static inline void check_timer(void)
 73.1846  
 73.1847  	printk(KERN_INFO "...trying to set up timer as ExtINT IRQ...");
 73.1848  
 73.1849 +	timer_ack = 0;
 73.1850  	init_8259A(0);
 73.1851  	make_8259A_irq(0);
 73.1852  	apic_write_around(APIC_LVT0, APIC_DM_EXTINT);
 73.1853 @@ -1624,31 +2028,40 @@ static inline void check_timer(void)
 73.1854  		return;
 73.1855  	}
 73.1856  	printk(" failed :(.\n");
 73.1857 -	panic("IO-APIC + timer doesn't work! pester mingo@redhat.com");
 73.1858 +	panic("IO-APIC + timer doesn't work!  Boot with apic=debug and send a "
 73.1859 +		"report.  Then try booting with the 'noapic' option");
 73.1860 +}
 73.1861 +
 73.1862 +#define NR_IOAPIC_BIOSIDS 256
 73.1863 +static u8 ioapic_biosid_to_apic_enum[NR_IOAPIC_BIOSIDS];
 73.1864 +static void store_ioapic_biosid_mapping(void)
 73.1865 +{
 73.1866 +    u8 apic;
 73.1867 +    memset(ioapic_biosid_to_apic_enum, ~0, NR_IOAPIC_BIOSIDS);
 73.1868 +    for ( apic = 0; apic < nr_ioapics; apic++ )
 73.1869 +        ioapic_biosid_to_apic_enum[mp_ioapics[apic].mpc_apicid] = apic;
 73.1870  }
 73.1871  
 73.1872  /*
 73.1873   *
 73.1874 - * IRQ's that are handled by the old PIC in all cases:
 73.1875 + * IRQ's that are handled by the PIC in the MPS IOAPIC case.
 73.1876   * - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ.
 73.1877   *   Linux doesn't really care, as it's not actually used
 73.1878   *   for any interrupt handling anyway.
 73.1879 - * - There used to be IRQ13 here as well, but all
 73.1880 - *   MPS-compliant must not use it for FPU coupling and we
 73.1881 - *   want to use exception 16 anyway.  And there are
 73.1882 - *   systems who connect it to an I/O APIC for other uses.
 73.1883 - *   Thus we don't mark it special any longer.
 73.1884 - *
 73.1885 - * Additionally, something is definitely wrong with irq9
 73.1886 - * on PIIX4 boards.
 73.1887   */
 73.1888 -#define PIC_IRQS	(1<<2)
 73.1889 +#define PIC_IRQS	(1 << PIC_CASCADE_IR)
 73.1890  
 73.1891  void __init setup_IO_APIC(void)
 73.1892  {
 73.1893 +	store_ioapic_biosid_mapping();
 73.1894 +
 73.1895  	enable_IO_APIC();
 73.1896  
 73.1897 -	io_apic_irqs = ~PIC_IRQS;
 73.1898 +	if (acpi_ioapic)
 73.1899 +		io_apic_irqs = ~0;	/* all IRQs go through IOAPIC */
 73.1900 +	else
 73.1901 +		io_apic_irqs = ~PIC_IRQS;
 73.1902 +
 73.1903  	printk("ENABLING IO-APIC IRQs\n");
 73.1904  
 73.1905  	/*
 73.1906 @@ -1660,26 +2073,20 @@ void __init setup_IO_APIC(void)
 73.1907  	setup_IO_APIC_irqs();
 73.1908  	init_IO_APIC_traps();
 73.1909  	check_timer();
 73.1910 -	if (!acpi_ioapic)
 73.1911 -		print_IO_APIC();
 73.1912 +	print_IO_APIC();
 73.1913  }
 73.1914  
 73.1915 -#endif /* CONFIG_X86_IO_APIC */
 73.1916 -
 73.1917 -
 73.1918 -
 73.1919  /* --------------------------------------------------------------------------
 73.1920                            ACPI-based IOAPIC Configuration
 73.1921     -------------------------------------------------------------------------- */
 73.1922  
 73.1923  #ifdef CONFIG_ACPI_BOOT
 73.1924  
 73.1925 -#define IO_APIC_MAX_ID		15
 73.1926 -
 73.1927  int __init io_apic_get_unique_id (int ioapic, int apic_id)
 73.1928  {
 73.1929 -	struct IO_APIC_reg_00 reg_00;
 73.1930 -	static unsigned long apic_id_map = 0;
 73.1931 +	union IO_APIC_reg_00 reg_00;
 73.1932 +	static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
 73.1933 +	physid_mask_t tmp;
 73.1934  	unsigned long flags;
 73.1935  	int i = 0;
 73.1936  
 73.1937 @@ -1692,38 +2099,31 @@ int __init io_apic_get_unique_id (int io
 73.1938  	 *      advantage of new APIC bus architecture.
 73.1939  	 */
 73.1940  
 73.1941 -	if (!apic_id_map)
 73.1942 -		apic_id_map = phys_cpu_present_map;
 73.1943 +	if (physids_empty(apic_id_map))
 73.1944 +		apic_id_map = ioapic_phys_id_map(phys_cpu_present_map);
 73.1945  
 73.1946  	spin_lock_irqsave(&ioapic_lock, flags);
 73.1947 -	*(int *)&reg_00 = io_apic_read(ioapic, 0);
 73.1948 +	reg_00.raw = io_apic_read(ioapic, 0);
 73.1949  	spin_unlock_irqrestore(&ioapic_lock, flags);
 73.1950  
 73.1951 -	if (apic_id >= IO_APIC_MAX_ID) {
 73.1952 +	if (apic_id >= get_physical_broadcast()) {
 73.1953  		printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
 73.1954 -			"%d\n", ioapic, apic_id, reg_00.ID);
 73.1955 -		apic_id = reg_00.ID;
 73.1956 -	}
 73.1957 -
 73.1958 -	/* XAPICs do not need unique IDs */
 73.1959 -	if (clustered_apic_mode == CLUSTERED_APIC_XAPIC){
 73.1960 -		printk(KERN_INFO "IOAPIC[%d]: Assigned apic_id %d\n", 
 73.1961 -			ioapic, apic_id);
 73.1962 -		return apic_id;
 73.1963 +			"%d\n", ioapic, apic_id, reg_00.bits.ID);
 73.1964 +		apic_id = reg_00.bits.ID;
 73.1965  	}
 73.1966  
 73.1967  	/*
 73.1968  	 * Every APIC in a system must have a unique ID or we get lots of nice 
 73.1969  	 * 'stuck on smp_invalidate_needed IPI wait' messages.
 73.1970  	 */
 73.1971 -	if (apic_id_map & (1 << apic_id)) {
 73.1972 +	if (check_apicid_used(apic_id_map, apic_id)) {
 73.1973  
 73.1974 -		for (i = 0; i < IO_APIC_MAX_ID; i++) {
 73.1975 -			if (!(apic_id_map & (1 << i)))
 73.1976 +		for (i = 0; i < get_physical_broadcast(); i++) {
 73.1977 +			if (!check_apicid_used(apic_id_map, i))
 73.1978  				break;
 73.1979  		}
 73.1980  
 73.1981 -		if (i == IO_APIC_MAX_ID)
 73.1982 +		if (i == get_physical_broadcast())
 73.1983  			panic("Max apic_id exceeded!\n");
 73.1984  
 73.1985  		printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
 73.1986 @@ -1732,22 +2132,24 @@ int __init io_apic_get_unique_id (int io
 73.1987  		apic_id = i;
 73.1988  	} 
 73.1989  
 73.1990 -	apic_id_map |= (1 << apic_id);
 73.1991 +	tmp = apicid_to_cpu_present(apic_id);
 73.1992 +	physids_or(apic_id_map, apic_id_map, tmp);
 73.1993  
 73.1994 -	if (reg_00.ID != apic_id) {
 73.1995 -		reg_00.ID = apic_id;
 73.1996 +	if (reg_00.bits.ID != apic_id) {
 73.1997 +		reg_00.bits.ID = apic_id;
 73.1998  
 73.1999  		spin_lock_irqsave(&ioapic_lock, flags);
 73.2000 -		io_apic_write(ioapic, 0, *(int *)&reg_00);
 73.2001 -		*(int *)&reg_00 = io_apic_read(ioapic, 0);
 73.2002 +		io_apic_write(ioapic, 0, reg_00.raw);
 73.2003 +		reg_00.raw = io_apic_read(ioapic, 0);
 73.2004  		spin_unlock_irqrestore(&ioapic_lock, flags);
 73.2005  
 73.2006  		/* Sanity check */
 73.2007 -		if (reg_00.ID != apic_id)
 73.2008 +		if (reg_00.bits.ID != apic_id)
 73.2009  			panic("IOAPIC[%d]: Unable change apic_id!\n", ioapic);
 73.2010  	}
 73.2011  
 73.2012 -	printk(KERN_INFO "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
 73.2013 +	apic_printk(APIC_VERBOSE, KERN_INFO
 73.2014 +			"IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
 73.2015  
 73.2016  	return apic_id;
 73.2017  }
 73.2018 @@ -1755,27 +2157,27 @@ int __init io_apic_get_unique_id (int io
 73.2019  
 73.2020  int __init io_apic_get_version (int ioapic)
 73.2021  {
 73.2022 -	struct IO_APIC_reg_01	reg_01;
 73.2023 +	union IO_APIC_reg_01	reg_01;
 73.2024  	unsigned long flags;
 73.2025  
 73.2026  	spin_lock_irqsave(&ioapic_lock, flags);
 73.2027 -	*(int *)&reg_01 = io_apic_read(ioapic, 1);
 73.2028 +	reg_01.raw = io_apic_read(ioapic, 1);
 73.2029  	spin_unlock_irqrestore(&ioapic_lock, flags);
 73.2030  
 73.2031 -	return reg_01.version;
 73.2032 +	return reg_01.bits.version;
 73.2033  }
 73.2034  
 73.2035  
 73.2036  int __init io_apic_get_redir_entries (int ioapic)
 73.2037  {
 73.2038 -	struct IO_APIC_reg_01	reg_01;
 73.2039 +	union IO_APIC_reg_01	reg_01;
 73.2040  	unsigned long flags;
 73.2041  
 73.2042  	spin_lock_irqsave(&ioapic_lock, flags);
 73.2043 -	*(int *)&reg_01 = io_apic_read(ioapic, 1);
 73.2044 +	reg_01.raw = io_apic_read(ioapic, 1);
 73.2045  	spin_unlock_irqrestore(&ioapic_lock, flags);
 73.2046  
 73.2047 -	return reg_01.entries;
 73.2048 +	return reg_01.bits.entries;
 73.2049  }
 73.2050  
 73.2051  
 73.2052 @@ -1785,7 +2187,7 @@ int io_apic_set_pci_routing (int ioapic,
 73.2053  	unsigned long flags;
 73.2054  
 73.2055  	if (!IO_APIC_IRQ(irq)) {
 73.2056 -		printk(KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0/n", 
 73.2057 +		printk(KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
 73.2058  			ioapic);
 73.2059  		return -EINVAL;
 73.2060  	}
 73.2061 @@ -1798,12 +2200,12 @@ int io_apic_set_pci_routing (int ioapic,
 73.2062  
 73.2063  	memset(&entry,0,sizeof(entry));
 73.2064  
 73.2065 -	entry.delivery_mode = dest_LowestPrio;
 73.2066 -	entry.dest_mode = INT_DELIVERY_MODE;
 73.2067 -	entry.dest.logical.logical_dest = target_cpus();
 73.2068 -	entry.mask = 1;					 /* Disabled (masked) */
 73.2069 +	entry.delivery_mode = INT_DELIVERY_MODE;
 73.2070 +	entry.dest_mode = INT_DEST_MODE;
 73.2071 +	entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
 73.2072  	entry.trigger = edge_level;
 73.2073  	entry.polarity = active_high_low;
 73.2074 +	entry.mask  = 1;
 73.2075  
 73.2076  	/*
 73.2077  	 * IRQs < 16 are already in the irq_2_pin[] map
 73.2078 @@ -1813,17 +2215,12 @@ int io_apic_set_pci_routing (int ioapic,
 73.2079  
 73.2080  	entry.vector = assign_irq_vector(irq);
 73.2081  
 73.2082 -	printk(KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry (%d-%d -> 0x%x -> "
 73.2083 -		"IRQ %d Mode:%i Active:%i)\n", ioapic,
 73.2084 -		mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq, edge_level, active_high_low);
 73.2085 +	apic_printk(APIC_DEBUG, KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry "
 73.2086 +		"(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", ioapic,
 73.2087 +		mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq,
 73.2088 +		edge_level, active_high_low);
 73.2089  
 73.2090 -	if (edge_level) {
 73.2091 -		irq_desc[irq].handler = &ioapic_level_irq_type;
 73.2092 -	} else {
 73.2093 -		irq_desc[irq].handler = &ioapic_edge_irq_type;
 73.2094 -	}
 73.2095 -
 73.2096 -	set_intr_gate(entry.vector, interrupt[irq]);
 73.2097 +	ioapic_register_intr(irq, entry.vector, edge_level);
 73.2098  
 73.2099  	if (!ioapic && (irq < 16))
 73.2100  		disable_8259A_irq(irq);
 73.2101 @@ -1838,114 +2235,82 @@ int io_apic_set_pci_routing (int ioapic,
 73.2102  
 73.2103  #endif /*CONFIG_ACPI_BOOT*/
 73.2104  
 73.2105 -/* opt_leveltrigger, opt_edgetrigger: Force an IO-APIC-routed IRQ to be */
 73.2106 -/*                                    level- or edge-triggered.         */
 73.2107 -/* Example: 'leveltrigger=4,5,6,20 edgetrigger=21'. */
 73.2108 -static char opt_leveltrigger[30] = "", opt_edgetrigger[30] = "";
 73.2109 -string_param("leveltrigger", opt_leveltrigger);
 73.2110 -string_param("edgetrigger", opt_edgetrigger);
 73.2111  
 73.2112 -static int __init ioapic_trigger_setup(void)
 73.2113 +int ioapic_guest_read(int apicid, int address, u32 *pval)
 73.2114  {
 73.2115 -    char       *p;
 73.2116 -    irq_desc_t *desc;
 73.2117 -    long        irq;
 73.2118 -
 73.2119 -    p = opt_leveltrigger;
 73.2120 -    while ( *p != '\0' )
 73.2121 -    {
 73.2122 -        irq = simple_strtol(p, &p, 10);
 73.2123 -        if ( (irq <= 0) || (irq >= NR_IRQS) )
 73.2124 -        {
 73.2125 -            printk("IRQ '%ld' out of range in level-trigger list '%s'\n",
 73.2126 -                   irq, opt_leveltrigger);
 73.2127 -            break;
 73.2128 -        }
 73.2129 -
 73.2130 -        printk("Forcing IRQ %ld to level-trigger: ", irq);
 73.2131 -
 73.2132 -        desc = &irq_desc[irq];
 73.2133 -        spin_lock_irq(&desc->lock);
 73.2134 +    u32 val;
 73.2135 +    int apicenum;
 73.2136 +    union IO_APIC_reg_00 reg_00;
 73.2137 +    unsigned long flags;
 73.2138  
 73.2139 -        if ( desc->handler == &ioapic_level_irq_type )
 73.2140 -        {
 73.2141 -            printk("already level-triggered (no force applied).\n");
 73.2142 -        }
 73.2143 -        else if ( desc->handler != &ioapic_edge_irq_type )
 73.2144 -        {
 73.2145 -            printk("cannot force (can only force IO-APIC-edge IRQs).\n");
 73.2146 -        }
 73.2147 -        else
 73.2148 -        {
 73.2149 -            desc->handler = &ioapic_level_irq_type;
 73.2150 -            __mask_IO_APIC_irq(irq);
 73.2151 -            __level_IO_APIC_irq(irq);        
 73.2152 -            printk("done.\n");
 73.2153 -        }
 73.2154 +    if ( (apicid >= NR_IOAPIC_BIOSIDS) ||
 73.2155 +         ((apicenum = ioapic_biosid_to_apic_enum[apicid]) >= nr_ioapics) )
 73.2156 +            return -EINVAL;
 73.2157  
 73.2158 -        spin_unlock_irq(&desc->lock);
 73.2159 -
 73.2160 -        if ( *p == '\0' )
 73.2161 -            break;
 73.2162 +    spin_lock_irqsave(&ioapic_lock, flags);
 73.2163 +    val = io_apic_read(apicenum, address);
 73.2164 +    spin_unlock_irqrestore(&ioapic_lock, flags);
 73.2165  
 73.2166 -        if ( *p != ',' )
 73.2167 -        {
 73.2168 -            printk("Unexpected character '%c' in level-trigger list '%s'\n",
 73.2169 -                   *p, opt_leveltrigger);
 73.2170 -            break;
 73.2171 -        }
 73.2172 -
 73.2173 -        p++;
 73.2174 +    /* Rewrite APIC ID to what the BIOS originally specified. */
 73.2175 +    if ( address == 0 )
 73.2176 +    {
 73.2177 +        reg_00.raw = val;
 73.2178 +        reg_00.bits.ID = apicid;
 73.2179 +        val = reg_00.raw;
 73.2180      }
 73.2181  
 73.2182 -    p = opt_edgetrigger;
 73.2183 -    while ( *p != '\0' )
 73.2184 -    {
 73.2185 -        irq = simple_strtol(p, &p, 10);
 73.2186 -        if ( (irq <= 0) || (irq >= NR_IRQS) )
 73.2187 -        {
 73.2188 -            printk("IRQ '%ld' out of range in edge-trigger list '%s'\n",
 73.2189 -                   irq, opt_edgetrigger);
 73.2190 -            break;
 73.2191 -        }
 73.2192 +    *pval = val;
 73.2193 +    return 0;
 73.2194 +}
 73.2195 +
 73.2196 +int ioapic_guest_write(int apicid, int address, u32 val)
 73.2197 +{
 73.2198 +    int apicenum, pin, irq;
 73.2199 +    struct IO_APIC_route_entry rte = { 0 };
 73.2200 +    struct irq_pin_list *entry;
 73.2201 +    unsigned long flags;
 73.2202 +
 73.2203 +    if ( (apicid >= NR_IOAPIC_BIOSIDS) ||
 73.2204 +         ((apicenum = ioapic_biosid_to_apic_enum[apicid]) >= nr_ioapics) )
 73.2205 +            return -EINVAL;
 73.2206  
 73.2207 -        printk("Forcing IRQ %ld to edge-trigger: ", irq);
 73.2208 +    /* Only write to the first half of a route entry. */
 73.2209 +    if ( (address < 0x10) || (address & 1) )
 73.2210 +        return 0;
 73.2211 +    
 73.2212 +    pin = (address - 0x10) >> 1;
 73.2213  
 73.2214 -        desc = &irq_desc[irq];
 73.2215 -        spin_lock_irq(&desc->lock);
 73.2216 +    rte.dest.logical.logical_dest = target_cpus();
 73.2217 +    *(int *)&rte = val;
 73.2218  
 73.2219 -        if ( desc->handler == &ioapic_edge_irq_type )
 73.2220 -        {
 73.2221 -            printk("already edge-triggered (no force applied).\n");
 73.2222 -        }
 73.2223 -        else if ( desc->handler != &ioapic_level_irq_type )
 73.2224 +    if ( rte.vector >= FIRST_DEVICE_VECTOR )
 73.2225 +    {
 73.2226 +        /* Is there a valid irq mapped to this vector? */
 73.2227 +        irq = vector_irq[rte.vector];
 73.2228 +        if ( !IO_APIC_IRQ(irq) )
 73.2229 +            return 0;
 73.2230 +
 73.2231 +        /* Set the correct irq-handling type. */
 73.2232 +        irq_desc[irq].handler = rte.trigger ? 
 73.2233 +            &ioapic_level_type: &ioapic_edge_type;
 73.2234 +
 73.2235 +        /* Record the pin<->irq mapping. */
 73.2236 +        for ( entry = &irq_2_pin[irq]; ; entry = &irq_2_pin[entry->next] )
 73.2237          {
 73.2238 -            printk("cannot force (can only force IO-APIC-level IRQs).\n");
 73.2239 +            if ( (entry->apic == apicenum) && (entry->pin == pin) )
 73.2240 +                break;
 73.2241 +            if ( !entry->next )
 73.2242 +            {
 73.2243 +                add_pin_to_irq(irq, apicenum, pin);
 73.2244 +                break;
 73.2245 +            }
 73.2246          }
 73.2247 -        else
 73.2248 -        {
 73.2249 -            desc->handler = &ioapic_edge_irq_type;
 73.2250 -            __edge_IO_APIC_irq(irq);        
 73.2251 -            desc->status |= IRQ_PENDING; /* may have lost a masked edge */
 73.2252 -            printk("done.\n");
 73.2253 -        }
 73.2254 -
 73.2255 -        spin_unlock_irq(&desc->lock);
 73.2256 +    }
 73.2257  
 73.2258 -        if ( *p == '\0' )
 73.2259 -            break;
 73.2260 -
 73.2261 -        if ( *p != ',' )
 73.2262 -        {
 73.2263 -            printk("Unexpected character '%c' in edge-trigger list '%s'\n",
 73.2264 -                   *p, opt_edgetrigger);
 73.2265 -            break;
 73.2266 -        }
 73.2267 -
 73.2268 -        p++;
 73.2269 -    }
 73.2270 +    spin_lock_irqsave(&ioapic_lock, flags);
 73.2271 +    io_apic_write(apicenum, 0x10 + 2 * pin, *(((int *)&rte) + 0));
 73.2272 +    io_apic_write(apicenum, 0x11 + 2 * pin, *(((int *)&rte) + 1));
 73.2273 +    spin_unlock_irqrestore(&ioapic_lock, flags);
 73.2274  
 73.2275      return 0;
 73.2276  }
 73.2277 -
 73.2278 -__initcall(ioapic_trigger_setup);
    74.1 --- a/xen/arch/x86/irq.c	Tue May 03 14:42:00 2005 +0000
    74.2 +++ b/xen/arch/x86/irq.c	Tue May 10 08:49:59 2005 +0000
    74.3 @@ -42,7 +42,6 @@ struct hw_interrupt_type no_irq_type = {
    74.4  };
    74.5  
    74.6  atomic_t irq_err_count;
    74.7 -atomic_t irq_mis_count;
    74.8  
    74.9  inline void disable_irq_nosync(unsigned int irq)
   74.10  {
    75.1 --- a/xen/arch/x86/microcode.c	Tue May 03 14:42:00 2005 +0000
    75.2 +++ b/xen/arch/x86/microcode.c	Tue May 10 08:49:59 2005 +0000
    75.3 @@ -87,13 +87,6 @@
    75.4  #define vmalloc(_s) xmalloc_bytes(_s)
    75.5  #define vfree(_p) xfree(_p)
    75.6  #define num_online_cpus() smp_num_cpus
    75.7 -static inline int on_each_cpu(
    75.8 -    void (*func) (void *info), void *info, int retry, int wait)
    75.9 -{
   75.10 -    int ret = smp_call_function(func, info, retry, wait);
   75.11 -    func(info);
   75.12 -    return ret;
   75.13 -}
   75.14  
   75.15  #if 0
   75.16  MODULE_DESCRIPTION("Intel CPU (IA-32) Microcode Update Driver");
    76.1 --- a/xen/arch/x86/mm.c	Tue May 03 14:42:00 2005 +0000
    76.2 +++ b/xen/arch/x86/mm.c	Tue May 10 08:49:59 2005 +0000
    76.3 @@ -316,7 +316,7 @@ int map_ldt_shadow_page(unsigned int off
    76.4  
    76.5      res = get_page_and_type(&frame_table[gmfn], d, PGT_ldt_page);
    76.6  
    76.7 -    if ( !res && unlikely(shadow_mode_enabled(d)) )
    76.8 +    if ( !res && unlikely(shadow_mode_refcounts(d)) )
    76.9      {
   76.10          shadow_lock(d);
   76.11          shadow_remove_all_write_access(d, gpfn, gmfn);
   76.12 @@ -392,7 +392,7 @@ get_linear_pagetable(
   76.13      struct pfn_info *page;
   76.14      unsigned long pfn;
   76.15  
   76.16 -    ASSERT( !shadow_mode_enabled(d) );
   76.17 +    ASSERT( !shadow_mode_refcounts(d) );
   76.18  
   76.19      if ( (root_get_flags(re) & _PAGE_RW) )
   76.20      {
   76.21 @@ -482,7 +482,7 @@ get_page_from_l2e(
   76.22  {
   76.23      int rc;
   76.24  
   76.25 -    ASSERT(!shadow_mode_enabled(d));
   76.26 +    ASSERT(!shadow_mode_refcounts(d));
   76.27  
   76.28      if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
   76.29          return 1;
   76.30 @@ -512,6 +512,8 @@ static int
   76.31  get_page_from_l3e(
   76.32      l3_pgentry_t l3e, unsigned long pfn, struct domain *d)
   76.33  {
   76.34 +    ASSERT( !shadow_mode_refcounts(d) );
   76.35 +
   76.36      if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
   76.37          return 1;
   76.38  
   76.39 @@ -533,6 +535,8 @@ get_page_from_l4e(
   76.40  {
   76.41      int rc;
   76.42  
   76.43 +    ASSERT( !shadow_mode_refcounts(d) );
   76.44 +
   76.45      if ( !(l4e_get_flags(l4e) & _PAGE_PRESENT) )
   76.46          return 1;
   76.47  
   76.48 @@ -641,7 +645,7 @@ static int alloc_l1_table(struct pfn_inf
   76.49      l1_pgentry_t  *pl1e;
   76.50      int            i;
   76.51  
   76.52 -    ASSERT(!shadow_mode_enabled(d));
   76.53 +    ASSERT(!shadow_mode_refcounts(d));
   76.54  
   76.55      pl1e = map_domain_mem(pfn << PAGE_SHIFT);
   76.56  
   76.57 @@ -670,10 +674,12 @@ static int alloc_l2_table(struct pfn_inf
   76.58      l2_pgentry_t  *pl2e;
   76.59      int            i;
   76.60  
   76.61 +    // See the code in shadow_promote() to understand why this is here...
   76.62      if ( (PGT_base_page_table == PGT_l2_page_table) &&
   76.63 -         shadow_mode_enabled(d) )
   76.64 +         unlikely(shadow_mode_refcounts(d)) )
   76.65          return 1;
   76.66 -    ASSERT( !shadow_mode_enabled(d) );
   76.67 +
   76.68 +    ASSERT( !shadow_mode_refcounts(d) );
   76.69     
   76.70      pl2e = map_domain_mem(pfn << PAGE_SHIFT);
   76.71  
   76.72 @@ -716,7 +722,7 @@ static int alloc_l3_table(struct pfn_inf
   76.73      l3_pgentry_t  *pl3e = page_to_virt(page);
   76.74      int            i;
   76.75  
   76.76 -    ASSERT( !shadow_mode_enabled(d) );
   76.77 +    ASSERT( !shadow_mode_refcounts(d) );
   76.78  
   76.79      for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
   76.80          if ( is_guest_l3_slot(i) &&
   76.81 @@ -741,10 +747,12 @@ static int alloc_l4_table(struct pfn_inf
   76.82      l4_pgentry_t  *pl4e = page_to_virt(page);
   76.83      int            i;
   76.84  
   76.85 +    // See the code in shadow_promote() to understand why this is here...
   76.86      if ( (PGT_base_page_table == PGT_l4_page_table) &&
   76.87 -         shadow_mode_enabled(d) )
   76.88 +         shadow_mode_refcounts(d) )
   76.89          return 1;
   76.90 -    ASSERT( !shadow_mode_enabled(d) );
   76.91 +
   76.92 +    ASSERT( !shadow_mode_refcounts(d) );
   76.93  
   76.94      for ( i = 0; i < L4_PAGETABLE_ENTRIES; i++ )
   76.95          if ( is_guest_l4_slot(i) &&
   76.96 @@ -861,11 +869,12 @@ static int mod_l1_entry(l1_pgentry_t *pl
   76.97      l1_pgentry_t ol1e;
   76.98      struct domain *d = current->domain;
   76.99  
  76.100 -    ASSERT( !shadow_mode_enabled(d) );
  76.101 -
  76.102      if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) )
  76.103          return 0;
  76.104  
  76.105 +    if ( unlikely(shadow_mode_refcounts(d)) )
  76.106 +        return update_l1e(pl1e, ol1e, nl1e);
  76.107 +
  76.108      if ( l1e_get_flags(nl1e) & _PAGE_PRESENT )
  76.109      {
  76.110          if ( unlikely(l1e_get_flags(nl1e) & L1_DISALLOW_MASK) )
  76.111 @@ -893,7 +902,7 @@ static int mod_l1_entry(l1_pgentry_t *pl
  76.112          if ( unlikely(!update_l1e(pl1e, ol1e, nl1e)) )
  76.113              return 0;
  76.114      }
  76.115 -    
  76.116 +
  76.117      put_page_from_l1e(ol1e, d);
  76.118      return 1;
  76.119  }
  76.120 @@ -1095,8 +1104,19 @@ int alloc_page_type(struct pfn_info *pag
  76.121  void free_page_type(struct pfn_info *page, unsigned int type)
  76.122  {
  76.123      struct domain *owner = page_get_owner(page);
  76.124 -    if ( likely(owner != NULL) && unlikely(shadow_mode_enabled(owner)) )
  76.125 -        return;
  76.126 +    unsigned long gpfn;
  76.127 +
  76.128 +    if ( owner != NULL )
  76.129 +    {
  76.130 +        if ( unlikely(shadow_mode_refcounts(owner)) )
  76.131 +            return;
  76.132 +        if ( unlikely(shadow_mode_enabled(owner)) )
  76.133 +        {
  76.134 +            gpfn = __mfn_to_gpfn(owner, page_to_pfn(page));
  76.135 +            ASSERT(VALID_M2P(gpfn));
  76.136 +            remove_shadow(owner, gpfn, type);
  76.137 +        }
  76.138 +    }
  76.139  
  76.140      switch ( type )
  76.141      {
  76.142 @@ -1142,7 +1162,7 @@ void put_page_type(struct pfn_info *page
  76.143           * See domain.c:relinquish_list().
  76.144           */
  76.145          ASSERT((x & PGT_validated) || 
  76.146 -               test_bit(DF_DYING, &page_get_owner(page)->d_flags));
  76.147 +               test_bit(DF_DYING, &page_get_owner(page)->flags));
  76.148  
  76.149          if ( unlikely((nx & PGT_count_mask) == 0) )
  76.150          {
  76.151 @@ -1287,7 +1307,7 @@ int new_guest_cr3(unsigned long mfn)
  76.152      int okay;
  76.153      unsigned long old_base_mfn;
  76.154  
  76.155 -    if ( shadow_mode_enabled(d) )
  76.156 +    if ( shadow_mode_refcounts(d) )
  76.157          okay = get_page_from_pagenr(mfn, d);
  76.158      else
  76.159          okay = get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d);
  76.160 @@ -1296,24 +1316,24 @@ int new_guest_cr3(unsigned long mfn)
  76.161      {
  76.162          invalidate_shadow_ldt(ed);
  76.163  
  76.164 -        old_base_mfn = pagetable_val(ed->arch.guest_table) >> PAGE_SHIFT;
  76.165 +        old_base_mfn = pagetable_get_pfn(ed->arch.guest_table);
  76.166          ed->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT);
  76.167          update_pagetables(ed); /* update shadow_table and monitor_table */
  76.168  
  76.169          write_ptbase(ed);
  76.170  
  76.171 -        if ( shadow_mode_enabled(d) )
  76.172 +        if ( shadow_mode_refcounts(d) )
  76.173              put_page(&frame_table[old_base_mfn]);
  76.174          else
  76.175              put_page_and_type(&frame_table[old_base_mfn]);
  76.176  
  76.177 -        /* CR3 holds its own ref to its shadow. */
  76.178 +        /* CR3 also holds a ref to its shadow... */
  76.179          if ( shadow_mode_enabled(d) )
  76.180          {
  76.181              if ( ed->arch.monitor_shadow_ref )
  76.182                  put_shadow_ref(ed->arch.monitor_shadow_ref);
  76.183              ed->arch.monitor_shadow_ref =
  76.184 -                pagetable_val(ed->arch.monitor_table) >> PAGE_SHIFT;
  76.185 +                pagetable_get_pfn(ed->arch.monitor_table);
  76.186              ASSERT(!page_get_owner(&frame_table[ed->arch.monitor_shadow_ref]));
  76.187              get_shadow_ref(ed->arch.monitor_shadow_ref);
  76.188          }
  76.189 @@ -1486,7 +1506,7 @@ int do_mmuext_op(
  76.190              type = PGT_l1_page_table | PGT_va_mutable;
  76.191  
  76.192          pin_page:
  76.193 -            if ( shadow_mode_enabled(FOREIGNDOM) )
  76.194 +            if ( shadow_mode_refcounts(FOREIGNDOM) )
  76.195                  type = PGT_writable_page;
  76.196  
  76.197              okay = get_page_and_type_from_pagenr(op.mfn, type, FOREIGNDOM);
  76.198 @@ -1557,7 +1577,7 @@ int do_mmuext_op(
  76.199              else
  76.200              {
  76.201                  unsigned long old_mfn =
  76.202 -                    pagetable_val(ed->arch.guest_table_user) >> PAGE_SHIFT;
  76.203 +                    pagetable_get_pfn(ed->arch.guest_table_user);
  76.204                  ed->arch.guest_table_user = mk_pagetable(op.mfn << PAGE_SHIFT);