ia64/xen-unstable

changeset 4846:ddd290cc8f0d

bitkeeper revision 1.1399 (42820c16ZkkGzrd1OssY0unvIvY5Cw)

Merge http://xen.bkbits.net:8080/xeno-unstable.bk
into gandalf.hpl.hp.com:/var/bk/xeno-unstable.bk
author xenbk@gandalf.hpl.hp.com
date Wed May 11 13:43:50 2005 +0000 (2005-05-11)
parents 39bfbd5ae9b8 cd6c145a8f97
children d16ae85cb89e
files .hgtags .rootkeys BitKeeper/etc/logging_ok docs/misc/sedf_scheduler_mini-HOWTO.txt freebsd-5.3-xen-sparse/i386-xen/i386-xen/clock.c freebsd-5.3-xen-sparse/i386-xen/i386-xen/evtchn.c freebsd-5.3-xen-sparse/i386-xen/i386-xen/machdep.c freebsd-5.3-xen-sparse/i386-xen/i386-xen/mp_machdep.c freebsd-5.3-xen-sparse/i386-xen/i386-xen/pmap.c freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_machdep.c freebsd-5.3-xen-sparse/i386-xen/include/hypervisor.h freebsd-5.3-xen-sparse/i386-xen/include/pcpu.h freebsd-5.3-xen-sparse/i386-xen/include/pmap.h freebsd-5.3-xen-sparse/i386-xen/include/xenfunc.h linux-2.6.11-xen-sparse/arch/xen/Kconfig.drivers linux-2.6.11-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32 linux-2.6.11-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64 linux-2.6.11-xen-sparse/arch/xen/i386/Kconfig linux-2.6.11-xen-sparse/arch/xen/i386/Makefile linux-2.6.11-xen-sparse/arch/xen/i386/kernel/io_apic.c linux-2.6.11-xen-sparse/arch/xen/i386/kernel/irq.c linux-2.6.11-xen-sparse/arch/xen/i386/kernel/process.c linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smp.c linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smpboot.c linux-2.6.11-xen-sparse/arch/xen/i386/kernel/traps.c linux-2.6.11-xen-sparse/arch/xen/i386/mach-default/Makefile linux-2.6.11-xen-sparse/arch/xen/i386/mm/init.c linux-2.6.11-xen-sparse/arch/xen/i386/mm/pgtable.c linux-2.6.11-xen-sparse/arch/xen/x86_64/pci/Makefile linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mach-xen/smpboot_hooks.h linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/smpboot_hooks.h patches/linux-2.6.11/i386-cpu-hotplug-updated-for-mm.patch tools/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/linux-xen-low.c tools/libxc/Makefile tools/libxc/xc.h tools/libxc/xc_sedf.c tools/libxutil/sxpr_parser.h tools/python/xen/lowlevel/xc/xc.c tools/python/xen/xend/XendClient.py tools/python/xen/xend/XendDomain.py tools/python/xen/xend/server/SrvDomain.py tools/python/xen/xm/main.py xen/Rules.mk xen/arch/ia64/asm-offsets.c xen/arch/ia64/domain.c xen/arch/ia64/idle0_task.c xen/arch/ia64/irq.c xen/arch/ia64/ivt.S xen/arch/ia64/mm_init.c xen/arch/ia64/patch/linux-2.6.11/unaligned.c xen/arch/ia64/privop.c xen/arch/ia64/process.c xen/arch/ia64/vcpu.c xen/arch/ia64/xenmisc.c xen/arch/ia64/xensetup.c xen/arch/ia64/xentime.c xen/arch/x86/Makefile xen/arch/x86/Rules.mk xen/arch/x86/acpi.c xen/arch/x86/acpi/boot.c xen/arch/x86/apic.c xen/arch/x86/audit.c xen/arch/x86/dom0_ops.c xen/arch/x86/domain.c xen/arch/x86/domain_build.c xen/arch/x86/i387.c xen/arch/x86/i8259.c xen/arch/x86/idle0_task.c xen/arch/x86/io_apic.c xen/arch/x86/irq.c xen/arch/x86/microcode.c xen/arch/x86/mm.c xen/arch/x86/mpparse.c xen/arch/x86/mtrr/main.c xen/arch/x86/nmi.c xen/arch/x86/physdev.c xen/arch/x86/setup.c xen/arch/x86/shadow.c xen/arch/x86/smpboot.c xen/arch/x86/string.c xen/arch/x86/traps.c xen/arch/x86/vmx.c xen/arch/x86/x86_32/asm-offsets.c xen/arch/x86/x86_32/mm.c xen/arch/x86/x86_32/traps.c xen/arch/x86/x86_64/mm.c xen/common/bitmap.c xen/common/dom0_ops.c xen/common/domain.c xen/common/event_channel.c xen/common/keyhandler.c xen/common/page_alloc.c xen/common/perfc.c xen/common/resource.c xen/common/sched_bvt.c xen/common/sched_sedf.c xen/common/schedule.c xen/common/xmalloc.c xen/drivers/acpi/tables.c xen/drivers/char/console.c xen/drivers/char/serial.c xen/include/acpi/acconfig.h xen/include/acpi/acexcep.h xen/include/acpi/acglobal.h xen/include/acpi/achware.h xen/include/acpi/aclocal.h xen/include/acpi/acmacros.h xen/include/acpi/acobject.h xen/include/acpi/acoutput.h xen/include/acpi/acpi.h xen/include/acpi/acpi_bus.h xen/include/acpi/acpi_drivers.h xen/include/acpi/acpiosxf.h xen/include/acpi/acpixf.h xen/include/acpi/acstruct.h xen/include/acpi/actbl.h xen/include/acpi/actbl1.h xen/include/acpi/actbl2.h xen/include/acpi/actypes.h xen/include/acpi/acutils.h xen/include/acpi/platform/acenv.h xen/include/acpi/platform/acgcc.h xen/include/acpi/platform/aclinux.h xen/include/asm-ia64/config.h xen/include/asm-ia64/domain.h xen/include/asm-ia64/serial.h xen/include/asm-ia64/vcpu.h xen/include/asm-x86/apic.h xen/include/asm-x86/apicdef.h xen/include/asm-x86/bitops.h xen/include/asm-x86/config.h xen/include/asm-x86/debugger.h xen/include/asm-x86/domain.h xen/include/asm-x86/fixmap.h xen/include/asm-x86/i387.h xen/include/asm-x86/io_apic.h xen/include/asm-x86/io_ports.h xen/include/asm-x86/irq.h xen/include/asm-x86/mach-default/apm.h xen/include/asm-x86/mach-default/bios_ebda.h xen/include/asm-x86/mach-default/do_timer.h xen/include/asm-x86/mach-default/entry_arch.h xen/include/asm-x86/mach-default/io_ports.h xen/include/asm-x86/mach-default/irq_vectors.h xen/include/asm-x86/mach-default/irq_vectors_limits.h xen/include/asm-x86/mach-default/mach_apic.h xen/include/asm-x86/mach-default/mach_apicdef.h xen/include/asm-x86/mach-default/mach_ipi.h xen/include/asm-x86/mach-default/mach_mpparse.h xen/include/asm-x86/mach-default/mach_mpspec.h xen/include/asm-x86/mach-default/mach_reboot.h xen/include/asm-x86/mach-default/mach_time.h xen/include/asm-x86/mach-default/mach_timer.h xen/include/asm-x86/mach-default/mach_traps.h xen/include/asm-x86/mach-default/mach_wakecpu.h xen/include/asm-x86/mach-default/pci-functions.h xen/include/asm-x86/mach-default/setup_arch_post.h xen/include/asm-x86/mach-default/setup_arch_pre.h xen/include/asm-x86/mach-default/smpboot_hooks.h xen/include/asm-x86/mach_apic.h xen/include/asm-x86/mm.h xen/include/asm-x86/mpspec.h xen/include/asm-x86/mpspec_def.h xen/include/asm-x86/page.h xen/include/asm-x86/pci.h xen/include/asm-x86/processor.h xen/include/asm-x86/shadow.h xen/include/asm-x86/smp.h xen/include/asm-x86/smpboot.h xen/include/asm-x86/system.h xen/include/asm-x86/time.h xen/include/asm-x86/x86_32/domain_page.h xen/include/asm-x86/x86_64/domain_page.h xen/include/asm-x86/x86_64/string.h xen/include/public/arch-ia64.h xen/include/public/sched_ctl.h xen/include/xen/acpi.h xen/include/xen/adv_sched_hist.h xen/include/xen/bitmap.h xen/include/xen/bitops.h xen/include/xen/config.h xen/include/xen/cpumask.h xen/include/xen/event.h xen/include/xen/inttypes.h xen/include/xen/ioport.h xen/include/xen/irq.h xen/include/xen/lib.h xen/include/xen/pci.h xen/include/xen/perfc.h xen/include/xen/perfc_defn.h xen/include/xen/sched-if.h xen/include/xen/sched.h xen/include/xen/serial.h xen/include/xen/slab.h xen/include/xen/smp.h xen/include/xen/spinlock.h xen/include/xen/types.h
line diff
     1.1 --- a/.hgtags	Fri May 06 17:04:27 2005 +0000
     1.2 +++ b/.hgtags	Wed May 11 13:43:50 2005 +0000
     1.3 @@ -5,6 +5,7 @@ 6e1bbc13911751efa0b1c018425c1b085820fa02
     1.4  fb875591fd72e15c31879c0e9034d99b80225595 RELEASE-2.0.4
     1.5  1a522944f76540ea9d73fcc1b0d13d0f670183f0 RELEASE-2.0.5
     1.6  487b2ee37d1cecb5f3e7a546b05ad097a0226f2f beta1
     1.7 +1f84d0497a5901b9f8d1a051b87871d140b7e23f ia64-stable
     1.8  3d330e41f41ce1bc118c02346e18949ad5d67f6b latest-semistable
     1.9  30c521db4c71960b0cf1d9c9e1b658e77b535a3e latest-stable
    1.10  9afec5bc14aeb197ef37ea54a57eacd427463fc3 semistable
     2.1 --- a/.rootkeys	Fri May 06 17:04:27 2005 +0000
     2.2 +++ b/.rootkeys	Wed May 11 13:43:50 2005 +0000
     2.3 @@ -21,6 +21,7 @@ 4022a73cgxX1ryj1HgS-IwwB6NUi2A docs/misc
     2.4  412f4bd9sm5mCQ8BkrgKcAKZGadq7Q docs/misc/blkif-drivers-explained.txt
     2.5  420b949cy9ZGzED74Fz_DaWlK7tT4g docs/misc/crashdb.txt
     2.6  4251a1f82AexscYEiF4Iku8Gc_kWfQ docs/misc/grant-tables.txt
     2.7 +424d462b5GuApQ_NyMsRFt9LbrsWow docs/misc/sedf_scheduler_mini-HOWTO.txt
     2.8  40d6ccbfKKBq8jE0ula4eHEzBiQuDA docs/misc/xen_config.html
     2.9  410a4c2bAO_m_l4RsiiPHnZ4ixHWbQ docs/misc/xend.tex
    2.10  3f9e7d564bWFB-Czjv1qdmE6o0GqNg docs/src/interface.tex
    2.11 @@ -248,6 +249,7 @@ 40f56238BMqG5PuSHufpjbvp_helBw linux-2.6
    2.12  40f562389xNa78YBZciUibQjyRU_Lg linux-2.6.11-xen-sparse/arch/xen/i386/kernel/traps.c
    2.13  40f56238JypKAUG01ZojFwH7qnZ5uA linux-2.6.11-xen-sparse/arch/xen/i386/kernel/vsyscall.S
    2.14  40f56238wi6AdNQjm0RT57bSkwb6hg linux-2.6.11-xen-sparse/arch/xen/i386/kernel/vsyscall.lds
    2.15 +427245dboQBkhq841wIPqlRD-AG9Jw linux-2.6.11-xen-sparse/arch/xen/i386/mach-default/Makefile
    2.16  40f56238a3w6-byOzexIlMgni76Lcg linux-2.6.11-xen-sparse/arch/xen/i386/mm/Makefile
    2.17  40f56238ILx8xlbywNbzTdv5Zr4xXQ linux-2.6.11-xen-sparse/arch/xen/i386/mm/fault.c
    2.18  4118cc35CbY8rfGVspF5O-7EkXBEAA linux-2.6.11-xen-sparse/arch/xen/i386/mm/highmem.c
    2.19 @@ -465,6 +467,7 @@ 422e4430vKaHLOOGS7X-SUUe3EBCgw netbsd-2.
    2.20  422e4430-gOD358H8nGGnNWes08Nng netbsd-2.0-xen-sparse/sys/miscfs/kernfs/kernfs_vnops.c
    2.21  413cb3b53nyOv1OIeDSsCXhBFDXvJA netbsd-2.0-xen-sparse/sys/nfs/files.nfs
    2.22  413aa1d0oNP8HXLvfPuMe6cSroUfSA patches/linux-2.6.11/agpgart.patch
    2.23 +427261074Iy1MkbbqIV6zdZDWWx_Jg patches/linux-2.6.11/i386-cpu-hotplug-updated-for-mm.patch
    2.24  42372652KCUP-IOH9RN19YQmGhs4aA patches/linux-2.6.11/iomap.patch
    2.25  424f001e_M1Tnxc52rDrmCLelnDWMQ patches/linux-2.6.11/x86_64-linux.patch
    2.26  3f776bd1Hy9rn69ntXBhPReUFw9IEA tools/Makefile
    2.27 @@ -742,6 +745,7 @@ 3fbba6dctWRWlFJkYb6hdix2X4WMuw tools/lib
    2.28  3fbba6dcbVrG2hPzEzwdeV_UC8kydQ tools/libxc/xc_private.h
    2.29  42337174PxyzzPk62raDiYCIsfStDg tools/libxc/xc_ptrace.c
    2.30  4273458duzL--nsTfT6e_q6Kfij48g tools/libxc/xc_ptrace_core.c
    2.31 +41ebbfe9U0b0kI-HgjK7VEY4EvW7_w tools/libxc/xc_sedf.c
    2.32  41dde8b0pLfAKMs_L9Uri2hnzHiCRQ tools/libxc/xc_vmx_build.c
    2.33  40e1b09dMYB4ItGCqcMIzirdMd9I-w tools/libxutil/Makefile
    2.34  40e033325Sjqs-_4TuzeUEprP_gYFg tools/libxutil/allocate.c
    2.35 @@ -1202,7 +1206,7 @@ 421098b6lY2JzrV1oFDbrt7XQhtElg xen/arch/
    2.36  427664f5eygrc3nEhI3RKf0Y37PzyA xen/arch/ia64/xentime.c
    2.37  3ddb79bcZbRBzT3elFWSX7u6NtMagQ xen/arch/x86/Makefile
    2.38  3ddb79bcBQF85CfLS4i1WGZ4oLLaCA xen/arch/x86/Rules.mk
    2.39 -3e5636e5FAYZ5_vQnmgwFJfSdmO5Mw xen/arch/x86/acpi.c
    2.40 +3e5636e5FAYZ5_vQnmgwFJfSdmO5Mw xen/arch/x86/acpi/boot.c
    2.41  3ddb79bcsjinG9k1KcvbVBuas1R2dA xen/arch/x86/apic.c
    2.42  42360b3244-Q6BpEKhR_A1YtG1wPNQ xen/arch/x86/audit.c
    2.43  3ddb79c4yGZ7_22QAFFwPzqP4NSHwA xen/arch/x86/boot/mkelf32.c
    2.44 @@ -1264,6 +1268,7 @@ 40e96d3akN3Hu_J5Bk-WXD8OGscrYQ xen/arch/
    2.45  422f27c8J9DQfCpegccMid59XhSmGA xen/arch/x86/x86_emulate.c
    2.46  3ddb79bdff-gj-jFGKjOejeHLqL8Lg xen/common/Makefile
    2.47  3e397e66AyyD5fYraAySWuwi9uqSXg xen/common/ac_timer.c
    2.48 +427fa2d0J0LU2s5oKbsM0nTZ2iyd2Q xen/common/bitmap.c
    2.49  3ddb79bdLX_P6iB7ILiblRLWvebapg xen/common/dom0_ops.c
    2.50  3e6377e4i0c9GtKN65e99OtRbw3AZw xen/common/dom_mem_ops.c
    2.51  3ddb79bdYO5D8Av12NHqPeSviav7cg xen/common/domain.c
    2.52 @@ -1276,8 +1281,8 @@ 3ddb79bduhSEZI8xa7IbGQCpap5y2A xen/commo
    2.53  41a61536SZbR6cj1ukWTb0DYU-vz9w xen/common/multicall.c
    2.54  3ddb79bdD4SLmmdMD7yLW5HcUWucXw xen/common/page_alloc.c
    2.55  3e54c38dkHAev597bPr71-hGzTdocg xen/common/perfc.c
    2.56 -3ddb79bdHqdQpATqC0rmUZNbsb6L6A xen/common/resource.c
    2.57  40589968dD2D1aejwSOvrROg7fOvGQ xen/common/sched_bvt.c
    2.58 +41ebbfe9oF1BF3cH5v7yE3eOL9uPbA xen/common/sched_sedf.c
    2.59  3e397e6619PgAfBbw2XFbXkewvUWgw xen/common/schedule.c
    2.60  3ddb79bd0gVQYmL2zvuJnldvD0AGxQ xen/common/softirq.c
    2.61  3e7f358awXBC3Vw-wFRwPw18qL1khg xen/common/string.c
    2.62 @@ -1358,18 +1363,37 @@ 3ddb79c3TMDjkxVndKFKnGiwY0HzDg xen/inclu
    2.63  4204e7acwXDo-5iAAiO2eQbtDeYZXA xen/include/asm-x86/init.h
    2.64  3ddb79c3fQ_O3o5NHK2N8AJdk0Ea4Q xen/include/asm-x86/io.h
    2.65  3ddb79c2TKeScYHQZreTdHqYNLbehQ xen/include/asm-x86/io_apic.h
    2.66 -42605109qxLSrHVE2SRhGXmgk907iw xen/include/asm-x86/io_ports.h
    2.67  3ddb79c2L7rTlFzazOLW1XuSZefpFw xen/include/asm-x86/irq.h
    2.68  404f1b93OjLO4bFfBXYNaJdIqlNz-Q xen/include/asm-x86/ldt.h
    2.69 -4260510aYPj2kr6rMbBfMxcvvmXndQ xen/include/asm-x86/mach_apic.h
    2.70 +427fa2d0m8MOSSXT13zgb-q0fGA_Dw xen/include/asm-x86/mach-default/apm.h
    2.71 +427fa2d0suK9Av7vsAXhsQxZjqpc_Q xen/include/asm-x86/mach-default/bios_ebda.h
    2.72 +427fa2d0yC3KzLozoeK3Xa3uGVfIdw xen/include/asm-x86/mach-default/do_timer.h
    2.73 +427fa2d0bWQkR1mW5OBYxn07AN-bDw xen/include/asm-x86/mach-default/entry_arch.h
    2.74 +427fa2d0-SWcuwbdSypo4953bc2JdQ xen/include/asm-x86/mach-default/io_ports.h
    2.75 +427fa2d0eyAl7LAeO-SVV4IW7lZPGQ xen/include/asm-x86/mach-default/irq_vectors.h
    2.76 +427fa2d0df7VWG4KKpnKbKR2Cbd1_w xen/include/asm-x86/mach-default/irq_vectors_limits.h
    2.77 +4260510aYPj2kr6rMbBfMxcvvmXndQ xen/include/asm-x86/mach-default/mach_apic.h
    2.78 +427fa2d0I3FWjE2tWdOhlEOJn7stcg xen/include/asm-x86/mach-default/mach_apicdef.h
    2.79 +427fa2d093fDS2gOBLcl7Yndzl7HmA xen/include/asm-x86/mach-default/mach_ipi.h
    2.80 +427fa2d0Y7bD35d-FvDAeiJDIdRw2A xen/include/asm-x86/mach-default/mach_mpparse.h
    2.81 +427fa2d0aLQgE9e1GY9ZP5jrMOC8pQ xen/include/asm-x86/mach-default/mach_mpspec.h
    2.82 +427fa2d0fJ5nNn5ydJuOaZIL6F2fjQ xen/include/asm-x86/mach-default/mach_reboot.h
    2.83 +427fa2d0VlN555TE68TjKMsrOoFXNA xen/include/asm-x86/mach-default/mach_time.h
    2.84 +427fa2d0C0jWTKYjy7WJjGKeujSpSg xen/include/asm-x86/mach-default/mach_timer.h
    2.85 +427fa2d0UXLiS1scpNrK26ZT6Oes3g xen/include/asm-x86/mach-default/mach_traps.h
    2.86 +427fa2d0OfglYyfpDTD5DII4M0uZRw xen/include/asm-x86/mach-default/mach_wakecpu.h
    2.87 +427fa2d0_OBPxdi5Qo04JWgZhz7BFA xen/include/asm-x86/mach-default/pci-functions.h
    2.88 +427fa2d0mrTtXrliqDfLuJc5LLVXaA xen/include/asm-x86/mach-default/setup_arch_post.h
    2.89 +427fa2d0Uoo7gC61Kep6Yy7Os367Hg xen/include/asm-x86/mach-default/setup_arch_pre.h
    2.90 +427fa2d1EKnA8zCq2QLHiGOUqOgszg xen/include/asm-x86/mach-default/smpboot_hooks.h
    2.91  3ddb79c3I98vWcQR8xEo34JMJ4Ahyw xen/include/asm-x86/mc146818rtc.h
    2.92  40ec25fd7cSvbP7Biw91zaU_g0xsEQ xen/include/asm-x86/mm.h
    2.93  3ddb79c3n_UbPuxlkNxvvLycClIkxA xen/include/asm-x86/mpspec.h
    2.94 +427fa2d1eJRenftJJnRyLsHKl1ghtA xen/include/asm-x86/mpspec_def.h
    2.95  3ddb79c2wa0dA_LGigxOelSGbJ284Q xen/include/asm-x86/msr.h
    2.96  41aaf567Mi3OishhvrCtET1y-mxQBg xen/include/asm-x86/mtrr.h
    2.97  41a61536MFhNalgbVmYGXAhQsPTZNw xen/include/asm-x86/multicall.h
    2.98  3ddb79c3xjYnrv5t3VqYlR4tNEOl4Q xen/include/asm-x86/page.h
    2.99 -3ddb79c3ysKUbxZuwKBRK3WXU2TlEg xen/include/asm-x86/pci.h
   2.100  42422fb0FVX-TJkSvAXnbfwMf19XFA xen/include/asm-x86/physdev.h
   2.101  3ddb79c2QF5-pZGzuX4QukPCDAl59A xen/include/asm-x86/processor.h
   2.102  40cf1596bim9F9DNdV75klgRSZ6Y2A xen/include/asm-x86/regs.h
   2.103 @@ -1426,11 +1450,15 @@ 4266bd01Ul-pC01ZVvBkhBnv5eqzvw xen/inclu
   2.104  3ddb79c25UE59iu4JJcbRalx95mvcg xen/include/public/xen.h
   2.105  3e397e66m2tO3s-J8Jnr7Ws_tGoPTg xen/include/xen/ac_timer.h
   2.106  40715b2epYl2jBbxzz9CI2rgIca7Zg xen/include/xen/acpi.h
   2.107 +422f0995xCgnbsVhTjSncnqIABs64g xen/include/xen/adv_sched_hist.h
   2.108 +427fa2d1wyoVbvCyZRLposYjA_D_4g xen/include/xen/bitmap.h
   2.109 +427fa2d1ItcC_yWuBUkhc7adedP5ow xen/include/xen/bitops.h
   2.110  3ddb79c0c0cX_DZE209-Bb-Rx1v-Aw xen/include/xen/cache.h
   2.111  41f2cea7Yna7xc0X9fyavIjoSFFeVg xen/include/xen/compile.h.in
   2.112  3f840f12CkbYSlwMrY2S11Mpyxg7Nw xen/include/xen/compiler.h
   2.113  3ddb79c259jh8hE7vre_8NuE7nwNSA xen/include/xen/config.h
   2.114  3eb165e0eawr3R-p2ZQtSdLWtLRN_A xen/include/xen/console.h
   2.115 +427fa2d1bQCWgEQqTTh5MjG4MPEH9g xen/include/xen/cpumask.h
   2.116  3ddb79c1V44RD26YqCUm-kqIupM37A xen/include/xen/ctype.h
   2.117  3ddb79c05DdHQ0UxX_jKsXdR4QlMCA xen/include/xen/delay.h
   2.118  40f2b4a2hC3HtChu-ArD8LyojxWMjg xen/include/xen/domain.h
   2.119 @@ -1439,7 +1467,7 @@ 3ddb79c0HIghfBF8zFUdmXhOU8i6hA xen/inclu
   2.120  3ddb79c1W0lQca8gRV7sN6j3iY4Luw xen/include/xen/event.h
   2.121  41262590CyJy4vd42dnqzsn8-eeGvw xen/include/xen/grant_table.h
   2.122  3ddb79c0GurNF9tDWqQbAwJFH8ugfA xen/include/xen/init.h
   2.123 -3ddb79c1nzaWu8NoF4xCCMSFJR4MlA xen/include/xen/ioport.h
   2.124 +428084e41zemtCAtYLcD9bUzwE35SA xen/include/xen/inttypes.h
   2.125  3ddb79c2qAxCOABlkKtD8Txohe-qEw xen/include/xen/irq.h
   2.126  3ddb79c2b3qe-6Ann09FqZBF4IrJaQ xen/include/xen/irq_cpustat.h
   2.127  3e4540ccPHqIIv2pvnQ1gV8LUnoHIg xen/include/xen/kernel.h
   2.128 @@ -1449,7 +1477,6 @@ 3ddb79c18Ajy7micDGQQfJ0zWgEHtA xen/inclu
   2.129  3ddb79c1gs2VbLbQlw0dcDUXYIepDA xen/include/xen/mm.h
   2.130  3ddb79c1ieLZfGSFwfvvSQ2NK1BMSg xen/include/xen/multiboot.h
   2.131  41a61536ii6j2lJ2rXwMOLaG1CHPvw xen/include/xen/multicall.h
   2.132 -3ddb79c2Fg44_PBPVxHSC0gTOMq4Ow xen/include/xen/pci.h
   2.133  3ddb79c0MOVXq8qZDQRGb6z64_xAwg xen/include/xen/pci_ids.h
   2.134  3e54c38dlSCVdyVM4PKcrSfzLLxWUQ xen/include/xen/perfc.h
   2.135  3e54c38de9SUSYSAwxDf_DwkpAnQFA xen/include/xen/perfc_defn.h
     3.1 --- a/BitKeeper/etc/logging_ok	Fri May 06 17:04:27 2005 +0000
     3.2 +++ b/BitKeeper/etc/logging_ok	Wed May 11 13:43:50 2005 +0000
     3.3 @@ -60,6 +60,7 @@ kmacy@shemp.lab.netapp.com
     3.4  kraxel@bytesex.org
     3.5  laudney@eclipse.(none)
     3.6  leendert@watson.ibm.com
     3.7 +lists-xen@pimb.org
     3.8  lynx@idefix.cl.cam.ac.uk
     3.9  maf46@burn.cl.cam.ac.uk
    3.10  mafetter@fleming.research
     4.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.2 +++ b/docs/misc/sedf_scheduler_mini-HOWTO.txt	Wed May 11 13:43:50 2005 +0000
     4.3 @@ -0,0 +1,44 @@
     4.4 +sEDF scheduler
     4.5 +--------------
     4.6 +Author:
     4.7 +   Stephan.Diestelhorst@{cl.cam.ac.uk, inf.tu-dresden.de}
     4.8 +   
     4.9 +Overview:
    4.10 +  This scheduler provides weighted CPU sharing in an intuitive way and
    4.11 +  uses realtime-algorithms to ensure time guarantees.
    4.12 +
    4.13 +Usage:
    4.14 +   -add "sched=sedf" on Xen's boot command-line
    4.15 +   -create domains as usual
    4.16 +   -use "xm sedf <dom-id> <period> <slice> <latency-hint> <extra> <weight>"
    4.17 +    Where:
    4.18 +      -period/slice are the normal EDF scheduling parameters in nanosecs
    4.19 +      -latency-hint is the scaled period in case the domain is doing heavy I/O
    4.20 +         (unused by the currently compiled version)
    4.21 +      -extra is a flag (0/1), which controls whether the domain can run in
    4.22 +       extra-time
    4.23 +      -weight is mutually exclusive with period/slice and specifies another
    4.24 +       way of setting a domains cpu slice
    4.25 +
    4.26 +Examples:
    4.27 + normal EDF (20ms/5ms):
    4.28 +  xm sedf <dom-id> 20000000 5000000 0 0 0
    4.29 +  
    4.30 + best-effort domains (i.e. non-realtime):
    4.31 +  xm sedf <dom-id> 20000000 0 0 1 0
    4.32     4.33 + normal EDF (20ms/5ms) + share of extra-time:
    4.34 +  xm sedf <dom-id> 20000000 5000000 0 1 0
    4.35 +  
    4.36 + 4 domains with weights 2:3:4:2
    4.37 +  xm sedf <d1> 0 0 0 0 2
    4.38 +  xm sedf <d2> 0 0 0 0 3
    4.39 +  xm sedf <d3> 0 0 0 0 4
    4.40 +  xm sedf <d4> 0 0 0 0 2
    4.41 +  
    4.42 + 1 fully-specified (10ms/3ms) domain, 3 other domains share
    4.43 + available rest in 2:7:3 ratio:
    4.44 +  xm sedf <d1> 10000000 3000000 0 0 0
    4.45 +  xm sedf <d2> 0 0 0 0 2
    4.46 +  xm sedf <d3> 0 0 0 0 7
    4.47 +  xm sedf <d4> 0 0 0 0 3
    4.48 \ No newline at end of file
     5.1 --- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/clock.c	Fri May 06 17:04:27 2005 +0000
     5.2 +++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/clock.c	Wed May 11 13:43:50 2005 +0000
     5.3 @@ -87,6 +87,12 @@
     5.4  
     5.5  /* XEN specific defines */
     5.6  #include <machine/xen_intr.h>
     5.7 +#include <vm/vm.h>   /* needed by machine/pmap.h */
     5.8 +#include <vm/pmap.h> /* needed by machine/pmap.h */
     5.9 +#include <machine/pmap.h> /* needed by xen-os.h */
    5.10 +#include <machine/hypervisor-ifs.h>
    5.11 +#include <machine/xen-os.h> /* needed by xenfunc.h */
    5.12 +#include <machine/xenfunc.h>
    5.13  
    5.14  /*
    5.15   * 32-bit time_t's can't reach leap years before 1904 or after 2036, so we
    5.16 @@ -129,7 +135,15 @@ static uint64_t shadow_system_time;
    5.17  static uint32_t shadow_time_version;
    5.18  static struct timeval shadow_tv;
    5.19  
    5.20 +#define DEFINE_PER_CPU(type, name) \
    5.21 +    __typeof__(type) per_cpu__##name
    5.22 +
    5.23 +#define per_cpu(var, cpu)           (*((void)cpu, &per_cpu__##var))
    5.24 +
    5.25 +
    5.26  static uint64_t processed_system_time;/* System time (ns) at last processing. */
    5.27 +static DEFINE_PER_CPU(uint64_t, processed_system_time);
    5.28 +
    5.29  
    5.30  #define NS_PER_TICK (1000000000ULL/hz)
    5.31  
    5.32 @@ -202,18 +216,19 @@ static struct timecounter xen_timecounte
    5.33  static void 
    5.34  clkintr(struct clockframe *frame)
    5.35  {
    5.36 -    int64_t delta;
    5.37 +    int64_t cpu_delta, delta;
    5.38 +    int cpu = smp_processor_id();
    5.39      long ticks = 0;
    5.40  
    5.41 -
    5.42      do {
    5.43      	__get_time_values_from_xen();
    5.44 -    	delta = (int64_t)(shadow_system_time + 
    5.45 -			  xen_get_offset() * 1000 - 
    5.46 -			  processed_system_time);
    5.47 +    	delta = cpu_delta = (int64_t)shadow_system_time + 
    5.48 +		(int64_t)xen_get_offset() * 1000;
    5.49 +	delta -= processed_system_time;
    5.50 +	cpu_delta -= per_cpu(processed_system_time, cpu);
    5.51      } while (!TIME_VALUES_UP_TO_DATE);
    5.52  
    5.53 -    if (unlikely(delta < 0)) {
    5.54 +    if (unlikely(delta < 0) || unlikely(cpu_delta < 0)) {
    5.55          printk("Timer ISR: Time went backwards: %lld\n", delta);
    5.56          return;
    5.57      }
    5.58 @@ -225,15 +240,28 @@ clkintr(struct clockframe *frame)
    5.59          delta -= NS_PER_TICK;
    5.60          processed_system_time += NS_PER_TICK;
    5.61      }
    5.62 -
    5.63 -    if (ticks > 0) {
    5.64 -	if (frame)
    5.65 -		timer_func(frame);
    5.66 -#ifdef SMP
    5.67 -	if (timer_func == hardclock && frame)
    5.68 -		forward_hardclock();
    5.69 +    /* Local CPU jiffy work. */
    5.70 +    while (cpu_delta >= NS_PER_TICK) {
    5.71 +	    cpu_delta -= NS_PER_TICK;
    5.72 +	    per_cpu(processed_system_time, cpu) += NS_PER_TICK;
    5.73 +#if 0
    5.74 +	    update_process_times(user_mode(regs));
    5.75 +	    profile_tick(CPU_PROFILING, regs);
    5.76  #endif
    5.77      }
    5.78 +    if (ticks > 0) {
    5.79 +	if (frame) timer_func(frame);
    5.80 +    }
    5.81 +    
    5.82 +    if (cpu != 0)
    5.83 +	    return;
    5.84 +    /*
    5.85 +     * Take synchronised time from Xen once a minute if we're not
    5.86 +     * synchronised ourselves, and we haven't chosen to keep an independent
    5.87 +     * time base.
    5.88 +     */
    5.89 +    
    5.90 +    /* XXX TODO */
    5.91  }
    5.92  
    5.93  #include "opt_ddb.h"
    5.94 @@ -429,7 +457,7 @@ resettodr()
    5.95   * Start clocks running.
    5.96   */
    5.97  void
    5.98 -cpu_initclocks()
    5.99 +cpu_initclocks(void)
   5.100  {
   5.101  	int diag;
   5.102  	int time_irq = bind_virq_to_irq(VIRQ_TIMER);
   5.103 @@ -445,8 +473,26 @@ cpu_initclocks()
   5.104  	/* initialize xen values */
   5.105  	__get_time_values_from_xen();
   5.106  	processed_system_time = shadow_system_time;
   5.107 +	per_cpu(processed_system_time, 0) = processed_system_time;
   5.108 +
   5.109  }
   5.110  
   5.111 +#ifdef SMP 
   5.112 +void
   5.113 +ap_cpu_initclocks(void)
   5.114 +{
   5.115 +	int irq;
   5.116 +	int cpu = smp_processor_id();
   5.117 +
   5.118 +	per_cpu(processed_system_time, cpu) = shadow_system_time;
   5.119 +	
   5.120 +	irq = bind_virq_to_irq(VIRQ_TIMER);
   5.121 +	PCPU_SET(time_irq, irq);
   5.122 +	PANIC_IF(intr_add_handler("clk", irq, (driver_intr_t *)clkintr, 
   5.123 +				  NULL, INTR_TYPE_CLK | INTR_FAST, NULL));
   5.124 +}
   5.125 +#endif
   5.126 +
   5.127  void
   5.128  cpu_startprofclock(void)
   5.129  {
     6.1 --- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/evtchn.c	Fri May 06 17:04:27 2005 +0000
     6.2 +++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/evtchn.c	Wed May 11 13:43:50 2005 +0000
     6.3 @@ -79,9 +79,14 @@ evtchn_do_upcall(struct intrframe *frame
     6.4                  l2 &= ~(1 << l2i);
     6.5              
     6.6                  port = (l1i << 5) + l2i;
     6.7 +		irq = evtchn_to_irq[port];
     6.8 +#ifdef SMP		
     6.9 +		if (irq == PCPU_GET(cpuast)) 
    6.10 +			continue;
    6.11 +#endif
    6.12                  if ( (owned = mtx_owned(&sched_lock)) != 0 )
    6.13                      mtx_unlock_spin_flags(&sched_lock, MTX_QUIET);
    6.14 -                if ( (irq = evtchn_to_irq[port]) != -1 ) {
    6.15 +                if ( irq != -1 ) {
    6.16  		    struct intsrc *isrc = intr_lookup_source(irq);
    6.17  		    intr_execute_handlers(isrc, frame);
    6.18  		} else {
    6.19 @@ -584,6 +589,7 @@ ap_evtchn_init(int cpu)
    6.20          PCPU_GET(virq_to_irq)[i] = -1;
    6.21  }
    6.22  
    6.23 +
    6.24  static void 
    6.25  evtchn_init(void *dummy __unused)
    6.26  {
    6.27 @@ -591,13 +597,6 @@ evtchn_init(void *dummy __unused)
    6.28      struct xenpic *xp;
    6.29      struct xenpic_intsrc *pin;
    6.30  
    6.31 -    /*
    6.32 -     * xenpic_lock: in order to allow an interrupt to occur in a critical
    6.33 -     * 	        section, to set pcpu->ipending (etc...) properly, we
    6.34 -     *	        must be able to get the icu lock, so it can't be
    6.35 -     *	        under witness.
    6.36 -     */
    6.37 -    mtx_init(&irq_mapping_update_lock, "xp", NULL, MTX_DEF);
    6.38  
    6.39      /* XXX -- expedience hack */
    6.40      PCPU_SET(virq_to_irq, (int *)&virq_to_irq[0]);
    6.41 @@ -657,3 +656,11 @@ evtchn_init(void *dummy __unused)
    6.42  }
    6.43  
    6.44  SYSINIT(evtchn_init, SI_SUB_INTR, SI_ORDER_ANY, evtchn_init, NULL);
    6.45 +    /*
    6.46 +     * xenpic_lock: in order to allow an interrupt to occur in a critical
    6.47 +     * 	        section, to set pcpu->ipending (etc...) properly, we
    6.48 +     *	        must be able to get the icu lock, so it can't be
    6.49 +     *	        under witness.
    6.50 +     */
    6.51 +
    6.52 +MTX_SYSINIT(irq_mapping_update_lock, &irq_mapping_update_lock, "xp", MTX_DEF|MTX_NOWITNESS);
     7.1 --- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/machdep.c	Fri May 06 17:04:27 2005 +0000
     7.2 +++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/machdep.c	Wed May 11 13:43:50 2005 +0000
     7.3 @@ -78,6 +78,7 @@
     7.4  #include <sys/sched.h>
     7.5  #include <sys/sysent.h>
     7.6  #include <sys/sysctl.h>
     7.7 +#include <sys/smp.h>
     7.8  #include <sys/ucontext.h>
     7.9  #include <sys/vmmeter.h>
    7.10  #include <sys/bus.h>
    7.11 @@ -883,14 +884,6 @@ SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_
    7.12  static void
    7.13  cpu_idle_default(void)
    7.14  {
    7.15 -#if 0
    7.16 -	/*
    7.17 -	 * we must absolutely guarentee that hlt is the
    7.18 -	 * absolute next instruction after sti or we
    7.19 -	 * introduce a timing window.
    7.20 -	 */
    7.21 -	__asm __volatile("sti; hlt");
    7.22 -#endif
    7.23  	idle_block();
    7.24  	enable_intr();
    7.25  }
    7.26 @@ -1376,6 +1369,7 @@ pteinfo_t *pteinfo_list;
    7.27  unsigned long *xen_machine_phys = ((unsigned long *)VADDR(1008, 0));
    7.28  int preemptable;
    7.29  int gdt_set;
    7.30 +static int ncpus;
    7.31  
    7.32  /* Linux infection */
    7.33  #define PAGE_OFFSET  KERNBASE
    7.34 @@ -1387,6 +1381,10 @@ initvalues(start_info_t *startinfo)
    7.35      int i;
    7.36      vm_paddr_t pdir_shadow_ma, KPTphys;
    7.37      vm_offset_t *pdir_shadow;
    7.38 +#ifdef SMP
    7.39 +    int j;
    7.40 +#endif
    7.41 +
    7.42  #ifdef WRITABLE_PAGETABLES
    7.43      printk("using writable pagetables\n");
    7.44      HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables);
    7.45 @@ -1447,18 +1445,19 @@ initvalues(start_info_t *startinfo)
    7.46  
    7.47  
    7.48  #ifdef SMP
    7.49 +#if 0
    7.50      /* allocate cpu0 private page */
    7.51      cpu0prvpage = (KERNBASE + (tmpindex << PAGE_SHIFT));
    7.52      tmpindex++; 
    7.53 -
    7.54 +#endif
    7.55      /* allocate SMP page table */
    7.56      SMPpt = (unsigned long *)(KERNBASE + (tmpindex << PAGE_SHIFT));
    7.57 -
    7.58 +#if 0
    7.59      /* Map the private page into the SMP page table */
    7.60      SMPpt[0] = vtomach(cpu0prvpage) | PG_RW | PG_M | PG_V | PG_A;
    7.61 -
    7.62 +#endif
    7.63      /* map SMP page table RO */
    7.64 -    PT_SET_MA(SMPpt, vtomach(SMPpt) & ~PG_RW);
    7.65 +    PT_SET_MA(SMPpt, *vtopte((vm_offset_t)SMPpt) & ~PG_RW);
    7.66  
    7.67      /* put the page table into the page directory */
    7.68      xen_queue_pt_update((vm_paddr_t)(IdlePTD + MPPTDI), 
    7.69 @@ -1496,44 +1495,61 @@ initvalues(start_info_t *startinfo)
    7.70      tmpindex++;
    7.71  
    7.72      HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list = (unsigned long)xen_phys_machine;
    7.73 +    ncpus = HYPERVISOR_shared_info->n_vcpu; 
    7.74 +#ifdef SMP
    7.75 +    for (i = 0; i < ncpus; i++) {
    7.76 +	    int npages = (sizeof(struct privatespace) + 1)/PAGE_SIZE;
    7.77 +	    for (j = 0; j < npages; j++) {
    7.78 +		    vm_paddr_t ma = xpmap_ptom(tmpindex << PAGE_SHIFT);
    7.79 +		    tmpindex++;
    7.80 +		    PT_SET_VA_MA(SMPpt + i*npages + j, ma | PG_A | PG_V | PG_RW | PG_M, FALSE);
    7.81 +	    }
    7.82 +    }
    7.83 +    xen_flush_queue();
    7.84 +#endif
    7.85      
    7.86      init_first = tmpindex;
    7.87      
    7.88  }
    7.89  
    7.90 +
    7.91 +trap_info_t trap_table[] = {
    7.92 +	{ 0,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(div)},
    7.93 +	{ 1,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dbg)},
    7.94 +	{ 3,   3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bpt)},
    7.95 +	{ 4,   3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ofl)},
    7.96 +	/* This is UPL on Linux and KPL on BSD */
    7.97 +	{ 5,   3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bnd)},
    7.98 +	{ 6,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ill)},
    7.99 +	{ 7,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dna)},
   7.100 +	/*
   7.101 +	 * { 8,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(XXX)},
   7.102 +	 *   no handler for double fault
   7.103 +	 */
   7.104 +	{ 9,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpusegm)},
   7.105 +	{10,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(tss)},
   7.106 +	{11,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(missing)},
   7.107 +	{12,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(stk)},
   7.108 +	{13,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(prot)},
   7.109 +	{14,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(page)},
   7.110 +	{15,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(rsvd)},
   7.111 +	{16,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpu)},
   7.112 +	{17,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(align)},
   7.113 +	{18,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(mchk)},
   7.114 +	{19,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(xmm)},
   7.115 +	{0x80, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(int0x80_syscall)},
   7.116 +	{  0, 0,           0, 0 }
   7.117 +};
   7.118 +
   7.119  void
   7.120  init386(void)
   7.121  {
   7.122  	int gsel_tss, metadata_missing, off, x, error;
   7.123  	struct pcpu *pc;
   7.124  	unsigned long gdtmachpfn;
   7.125 -	trap_info_t trap_table[] = {
   7.126 -	    { 0,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(div)},
   7.127 -	    { 1,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dbg)},
   7.128 -	    { 3,   3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bpt)},
   7.129 -	    { 4,   3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ofl)},
   7.130 -	    /* This is UPL on Linux and KPL on BSD */
   7.131 -	    { 5,   3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bnd)},
   7.132 -	    { 6,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ill)},
   7.133 -	    { 7,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dna)},
   7.134 -	    /*
   7.135 -	     * { 8,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(XXX)},
   7.136 -	     *   no handler for double fault
   7.137 -	     */
   7.138 -	    { 9,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpusegm)},
   7.139 -	    {10,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(tss)},
   7.140 -	    {11,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(missing)},
   7.141 -	    {12,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(stk)},
   7.142 -	    {13,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(prot)},
   7.143 -	    {14,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(page)},
   7.144 -	    {15,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(rsvd)},
   7.145 -	    {16,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpu)},
   7.146 -	    {17,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(align)},
   7.147 -	    {18,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(mchk)},
   7.148 -	    {19,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(xmm)},
   7.149 -	    {0x80, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(int0x80_syscall)},
   7.150 -	    {  0, 0,           0, 0 }
   7.151 -        };
   7.152 +#ifdef SMP
   7.153 +	int i;
   7.154 +#endif
   7.155  	proc0.p_uarea = proc0uarea;
   7.156  	thread0.td_kstack = proc0kstack;
   7.157  	thread0.td_pcb = (struct pcb *)
   7.158 @@ -1583,26 +1599,42 @@ init386(void)
   7.159  	gdt_segs[GDATA_SEL].ssd_limit = atop(0 - ((1 << 26) - (1 << 22) + (1 << 16))); 
   7.160  #endif
   7.161  #ifdef SMP
   7.162 -	/* this correspond to the cpu private page as mapped into the SMP page 
   7.163 -	 * table in initvalues
   7.164 +	/* XXX this will blow up if there are more than 512/NGDT vcpus - will never 
   7.165 +	 * be an issue in the real world but should add an assert on general principles
   7.166 +	 * we'll likely blow up when we hit LAST_RESERVED_GDT_ENTRY, at which point we
   7.167 +	 * would need to start allocating more pages for the GDT
   7.168  	 */
   7.169  	pc = &SMP_prvspace[0].pcpu;
   7.170 -	gdt_segs[GPRIV_SEL].ssd_limit =
   7.171 -		atop(sizeof(struct privatespace) - 1);
   7.172 +	for (i = 0; i < ncpus; i++) {
   7.173 +		cpu_add(i, (i == 0));
   7.174 +
   7.175 +		gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[i];
   7.176 +		gdt_segs[GPRIV_SEL].ssd_limit =
   7.177 +			atop(sizeof(struct privatespace) - 1);
   7.178 +		gdt_segs[GPROC0_SEL].ssd_base =
   7.179 +			(int) &SMP_prvspace[i].pcpu.pc_common_tss;
   7.180 +		SMP_prvspace[i].pcpu.pc_prvspace =
   7.181 +			&SMP_prvspace[i].pcpu;
   7.182 +		
   7.183 +		for (x = 0; x < NGDT; x++) {
   7.184 +			ssdtosd(&gdt_segs[x], &gdt[i * NGDT + x].sd);
   7.185 +		}
   7.186 +	}
   7.187  #else
   7.188  	pc = &__pcpu;
   7.189  	gdt_segs[GPRIV_SEL].ssd_limit =
   7.190  		atop(sizeof(struct pcpu) - 1);
   7.191 -#endif
   7.192  	gdt_segs[GPRIV_SEL].ssd_base = (int) pc;
   7.193  	gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss;
   7.194  	for (x = 0; x < NGDT; x++)
   7.195  	    ssdtosd(&gdt_segs[x], &gdt[x].sd);
   7.196 +#endif
   7.197 +
   7.198  
   7.199  	PT_SET_MA(gdt, *vtopte((unsigned long)gdt) & ~PG_RW);
   7.200  	gdtmachpfn = vtomach(gdt) >> PAGE_SHIFT;
   7.201 -	if ((error = HYPERVISOR_set_gdt(&gdtmachpfn, LAST_RESERVED_GDT_ENTRY + 1))) 
   7.202 -	    panic("set_gdt failed");
   7.203 +	PANIC_IF(HYPERVISOR_set_gdt(&gdtmachpfn, LAST_RESERVED_GDT_ENTRY + 1) != 0);
   7.204 +
   7.205  	
   7.206  	lgdt_finish();
   7.207  	gdt_set = 1;
     8.1 --- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/mp_machdep.c	Fri May 06 17:04:27 2005 +0000
     8.2 +++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/mp_machdep.c	Wed May 11 13:43:50 2005 +0000
     8.3 @@ -83,7 +83,16 @@
     8.4  #include <machine/specialreg.h>
     8.5  #include <machine/privatespace.h>
     8.6  
     8.7 +
     8.8 +/* XEN includes */
     8.9  #include <machine/xenfunc.h>
    8.10 +#include <machine/xen_intr.h>
    8.11 +
    8.12 +void Xhypervisor_callback(void);
    8.13 +void failsafe_callback(void);
    8.14 +
    8.15 +/***************/
    8.16 +
    8.17  
    8.18  #define WARMBOOT_TARGET		0
    8.19  #define WARMBOOT_OFF		(KERNBASE + 0x0467)
    8.20 @@ -94,6 +103,10 @@
    8.21  #define BIOS_RESET		(0x0f)
    8.22  #define BIOS_WARM		(0x0a)
    8.23  
    8.24 +
    8.25 +#undef POSTCODE
    8.26 +#define POSTCODE(x)
    8.27 +
    8.28  /*
    8.29   * this code MUST be enabled here and in mpboot.s.
    8.30   * it follows the very early stages of AP boot by placing values in CMOS ram.
    8.31 @@ -175,6 +188,8 @@ extern pt_entry_t *KPTphys;
    8.32  /* SMP page table page */
    8.33  extern pt_entry_t *SMPpt;
    8.34  
    8.35 +extern trap_info_t trap_table[];
    8.36 +
    8.37  struct pcb stoppcbs[MAXCPU];
    8.38  
    8.39  /* Variables needed for SMP tlb shootdown. */
    8.40 @@ -208,7 +223,9 @@ static u_int boot_address;
    8.41  
    8.42  static void	set_logical_apic_ids(void);
    8.43  static int	start_all_aps(void);
    8.44 +#if 0
    8.45  static void	install_ap_tramp(void);
    8.46 +#endif
    8.47  static int	start_ap(int apic_id);
    8.48  static void	release_aps(void *dummy);
    8.49  
    8.50 @@ -314,6 +331,7 @@ int
    8.51  cpu_mp_probe(void)
    8.52  {
    8.53  
    8.54 +	mp_ncpus = HYPERVISOR_shared_info->n_vcpu;
    8.55  	/*
    8.56  	 * Always record BSP in CPU map so that the mbuf init code works
    8.57  	 * correctly.
    8.58 @@ -342,20 +360,24 @@ cpu_mp_probe(void)
    8.59  	return (1);
    8.60  }
    8.61  
    8.62 -/*
    8.63 - * Initialize the IPI handlers and start up the AP's.
    8.64 - */
    8.65 -void
    8.66 -cpu_mp_start(void)
    8.67 +static void
    8.68 +cpu_mp_ipi_init(void)
    8.69  {
    8.70 -	int i;
    8.71 -
    8.72 -	POSTCODE(MP_START_POST);
    8.73 -
    8.74 -	/* Initialize the logical ID to APIC ID table. */
    8.75 -	for (i = 0; i < MAXCPU; i++)
    8.76 -		cpu_apic_ids[i] = -1;
    8.77 -
    8.78 +	int irq;
    8.79 +	int cpu = smp_processor_id();
    8.80 +	/* 
    8.81 +	 * these are not needed by XenFreeBSD - from Keir:
    8.82 +	 * For TLB-flush related IPIs, Xen has hypercalls 
    8.83 +	 * you should use instead. You can pass a pointer 
    8.84 +	 * to a vcpu bitmap to update_va_mapping(), and to
    8.85 +	 * MMUEXT_flush_tlb_multi and MMEXT_invlpg_multi. 
    8.86 +	 * Xen will then make sure that those vcpus get 
    8.87 +	 * flushed appropriately before returning to the
    8.88 +	 * caller.
    8.89 +	 * There is also no indication that we need to forward
    8.90 +	 * clock interrupts.
    8.91 +	 */
    8.92 +#if 0 
    8.93  	/* Install an inter-CPU IPI for TLB invalidation */
    8.94  	setidt(IPI_INVLTLB, IDTVEC(invltlb),
    8.95  	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
    8.96 @@ -371,22 +393,69 @@ cpu_mp_start(void)
    8.97  	/* Install an inter-CPU IPI for forwarding statclock() */
    8.98  	setidt(IPI_STATCLOCK, IDTVEC(statclock),
    8.99  	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
   8.100 -	
   8.101 +#endif
   8.102 +
   8.103 +	/* 
   8.104 +	 * These can all be consolidated. For now leaving 
   8.105 +	 * as individual IPIs.
   8.106 +	 *
   8.107 +	 */
   8.108 +#if 0
   8.109  	/* Install an inter-CPU IPI for lazy pmap release */
   8.110  	setidt(IPI_LAZYPMAP, IDTVEC(lazypmap),
   8.111  	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
   8.112 +#else
   8.113 +	irq = bind_ipi_on_cpu_to_irq(cpu, IPI_LAZYPMAP);
   8.114 +	PCPU_SET(lazypmap, irq);
   8.115 +	PANIC_IF(intr_add_handler("pmap_lazyfix", irq, 
   8.116 +				  (driver_intr_t *)pmap_lazyfix_action, 
   8.117 +				  NULL, INTR_TYPE_CLK | INTR_FAST, NULL));
   8.118 +#endif
   8.119  
   8.120 +#if 0
   8.121  	/* Install an inter-CPU IPI for all-CPU rendezvous */
   8.122  	setidt(IPI_RENDEZVOUS, IDTVEC(rendezvous),
   8.123  	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
   8.124 +#else 
   8.125 +	irq = bind_ipi_on_cpu_to_irq(cpu, IPI_RENDEZVOUS);
   8.126 +	PCPU_SET(rendezvous, irq);
   8.127 +	PANIC_IF(intr_add_handler("smp_rendezvous", irq, 
   8.128 +				  (driver_intr_t *)smp_rendezvous_action, 
   8.129 +				  NULL, INTR_TYPE_CLK | INTR_FAST, NULL));
   8.130 +#endif
   8.131  
   8.132 +#if 0
   8.133  	/* Install an inter-CPU IPI for forcing an additional software trap */
   8.134  	setidt(IPI_AST, IDTVEC(cpuast),
   8.135  	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
   8.136 -
   8.137 +#else
   8.138 +	irq = bind_ipi_on_cpu_to_irq(cpu, IPI_AST);
   8.139 +	PCPU_SET(cpuast, irq);
   8.140 +#endif
   8.141 +	/* XXX ignore for now */
   8.142 +#if 0 
   8.143  	/* Install an inter-CPU IPI for CPU stop/restart */
   8.144  	setidt(IPI_STOP, IDTVEC(cpustop),
   8.145  	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
   8.146 +#endif
   8.147 +
   8.148 +}
   8.149 +
   8.150 +SYSINIT(ipi_setup, SI_SUB_INTR, SI_ORDER_ANY, cpu_mp_ipi_init, NULL);
   8.151 +
   8.152 +/*
   8.153 + * Initialize the IPI handlers and start up the AP's.
   8.154 + */
   8.155 +void
   8.156 +cpu_mp_start(void) /* --- Start here --- */
   8.157 +{
   8.158 +	int i;
   8.159 +
   8.160 +	POSTCODE(MP_START_POST);
   8.161 +
   8.162 +	/* Initialize the logical ID to APIC ID table. */
   8.163 +	for (i = 0; i < MAXCPU; i++)
   8.164 +		cpu_apic_ids[i] = -1;
   8.165  
   8.166  
   8.167  	/* Set boot_cpu_id if needed. */
   8.168 @@ -437,35 +506,44 @@ cpu_mp_announce(void)
   8.169  void
   8.170  init_secondary(void)
   8.171  {
   8.172 -	int	gsel_tss;
   8.173 -	int	x, myid;
   8.174 +	int	myid;
   8.175 +	unsigned long gdtmachpfn;
   8.176 +	printk("MADE IT!!");
   8.177 +
   8.178  #if 0
   8.179  	u_int	cr0;
   8.180  #endif
   8.181 +	/* Steps to booting SMP on xen as gleaned from XenLinux:
   8.182 +	 * - cpu_init() - processor specific initialization
   8.183 +	 * - smp_callin() 
   8.184 +	 *    - wait 2s for BP to finish its startup sequence
   8.185 +	 *    - map_cpu_to_logical_apicid()
   8.186 +	 *    - save cpuid info
   8.187 +	 *    - set bit in callin map to let master (BP?) continue
   8.188 +	 * - local setup timer() - per cpu timer initialization
   8.189 +	 * - ldebug_setup() - bind debug IRQ to local CPU.
   8.190 +	 * - smp_intr_init() - IPI setup that we do in cpu_mp_start
   8.191 +	 * - local_irq_enable() - enable interrupts locally
   8.192 +	 * - cpu_set(id, map) - announce that we're up
   8.193 +	 * - cpu_idle() - make us schedulable
   8.194 +	 */
   8.195 +
   8.196 +
   8.197  	/* bootAP is set in start_ap() to our ID. */
   8.198  	myid = bootAP;
   8.199 -	gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[myid];
   8.200 -	gdt_segs[GPROC0_SEL].ssd_base =
   8.201 -		(int) &SMP_prvspace[myid].pcpu.pc_common_tss;
   8.202 -	SMP_prvspace[myid].pcpu.pc_prvspace =
   8.203 -		&SMP_prvspace[myid].pcpu;
   8.204  
   8.205 -	for (x = 0; x < NGDT; x++) {
   8.206 -		ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd);
   8.207 -	}
   8.208 +	gdtmachpfn = vtomach(gdt) >> PAGE_SHIFT;
   8.209 +	PANIC_IF(HYPERVISOR_set_gdt(&gdtmachpfn, LAST_RESERVED_GDT_ENTRY + 1) != 0); 
   8.210  
   8.211 -#if 0
   8.212 -	r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
   8.213 -	r_gdt.rd_base = (int) &gdt[myid * NGDT];
   8.214 -	lgdt(&r_gdt);			/* does magic intra-segment return */
   8.215 +	
   8.216 +	lgdt_finish();
   8.217  
   8.218 -	lidt(&r_idt);
   8.219 -	lldt(_default_ldt);
   8.220 -#endif
   8.221 +	PCPU_SET(cpuid, myid);
   8.222 +
   8.223 +
   8.224 +	set_user_ldt((struct mdproc *)_default_ldt);
   8.225  	PCPU_SET(currentldt, _default_ldt);
   8.226  
   8.227 -	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
   8.228 -	gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;
   8.229  	PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */
   8.230  	PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
   8.231  	PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
   8.232 @@ -557,6 +635,13 @@ init_secondary(void)
   8.233  	while (smp_started == 0)
   8.234  		ia32_pause();
   8.235  
   8.236 +	/* need to wait until now to setup the IPIs as SI_SUB_CPU is
   8.237 +	 * much earlier than SI_SUB_INTR
   8.238 +	 */  
   8.239 +	ap_evtchn_init(myid);
   8.240 +	ap_cpu_initclocks();
   8.241 +	cpu_mp_ipi_init();
   8.242 +
   8.243  	/* ok, now grab sched_lock and enter the scheduler */
   8.244  	mtx_lock_spin(&sched_lock);
   8.245  
   8.246 @@ -610,28 +695,35 @@ set_logical_apic_ids(void)
   8.247  static int
   8.248  start_all_aps(void)
   8.249  {
   8.250 -#ifndef PC98
   8.251 -	u_char mpbiosreason;
   8.252 -#endif
   8.253 -	u_long mpbioswarmvec;
   8.254  	struct pcpu *pc;
   8.255  	char *stack;
   8.256 -	uintptr_t kptbase;
   8.257 -	int i, pg, apic_id, cpu;
   8.258 +	int i, apic_id, cpu;
   8.259 +
   8.260 +	/* 
   8.261 +	 * This function corresponds most closely to 
   8.262 +	 * smp_boot_cpus in XenLinux - the sequence there 
   8.263 +	 * is:
   8.264 +	 * - check if SMP config is found - if not:
   8.265 +	 *     - clear the I/O APIC IRQs
   8.266 +	 *     - map cpu to logical apicid
   8.267 +	 *     - exit
   8.268 +	 * - smp_intr_init - IPI initialization
   8.269 +	 * - map cpu to logical apicid
   8.270 +	 * - boot each of the vcpus
   8.271 +	 * - clear and then construct the cpu sibling [logical CPUs] map.
   8.272 +	 *
   8.273 +	 */
   8.274  
   8.275  	POSTCODE(START_ALL_APS_POST);
   8.276  
   8.277  	mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
   8.278 -
   8.279 +#if 0
   8.280  	/* install the AP 1st level boot code */
   8.281  	install_ap_tramp();
   8.282  
   8.283  	/* save the current value of the warm-start vector */
   8.284  	mpbioswarmvec = *((u_long *) WARMBOOT_OFF);
   8.285 -#ifndef PC98
   8.286 -	outb(CMOS_REG, BIOS_RESET);
   8.287 -	mpbiosreason = inb(CMOS_DATA);
   8.288 -#endif
   8.289 +
   8.290  
   8.291  	/* set up temporary P==V mapping for AP boot */
   8.292  	/* XXX this is a hack, we should boot the AP on its own stack/PTD */
   8.293 @@ -640,7 +732,7 @@ start_all_aps(void)
   8.294  		PTD[i] = (pd_entry_t)(PG_V | PG_RW |
   8.295  		    ((kptbase + i * PAGE_SIZE) & PG_FRAME));
   8.296  	invltlb();
   8.297 -
   8.298 +#endif
   8.299  	/* start each AP */
   8.300  	for (cpu = 0, apic_id = 0; apic_id < MAXCPU; apic_id++) {
   8.301  		if (!cpu_info[apic_id].cpu_present ||
   8.302 @@ -650,7 +742,7 @@ start_all_aps(void)
   8.303  
   8.304  		/* save APIC ID for this logical ID */
   8.305  		cpu_apic_ids[cpu] = apic_id;
   8.306 -
   8.307 +#if 0
   8.308  		/* first page of AP's private space */
   8.309  		pg = cpu * i386_btop(sizeof(struct privatespace));
   8.310  
   8.311 @@ -665,11 +757,14 @@ start_all_aps(void)
   8.312  		for (i = 0; i < KSTACK_PAGES; i++)
   8.313  			SMPpt[pg + 1 + i] = (pt_entry_t)
   8.314  			    (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
   8.315 +#endif
   8.316 +		pc = &SMP_prvspace[cpu].pcpu;
   8.317  
   8.318  		/* prime data page for it to use */
   8.319  		pcpu_init(pc, cpu, sizeof(struct pcpu));
   8.320  		pc->pc_apic_id = apic_id;
   8.321  
   8.322 +#if 0
   8.323  		/* setup a vector to our boot code */
   8.324  		*((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
   8.325  		*((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4);
   8.326 @@ -677,7 +772,7 @@ start_all_aps(void)
   8.327  		outb(CMOS_REG, BIOS_RESET);
   8.328  		outb(CMOS_DATA, BIOS_WARM);	/* 'warm-start' */
   8.329  #endif
   8.330 -
   8.331 +#endif
   8.332  		bootSTK = &SMP_prvspace[cpu].idlekstack[KSTACK_PAGES *
   8.333  		    PAGE_SIZE];
   8.334  		bootAP = cpu;
   8.335 @@ -700,13 +795,10 @@ start_all_aps(void)
   8.336  	/* build our map of 'other' CPUs */
   8.337  	PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
   8.338  
   8.339 +#if 0
   8.340  	/* restore the warmstart vector */
   8.341  	*(u_long *) WARMBOOT_OFF = mpbioswarmvec;
   8.342 -#ifndef PC98
   8.343 -	outb(CMOS_REG, BIOS_RESET);
   8.344 -	outb(CMOS_DATA, mpbiosreason);
   8.345  #endif
   8.346 -
   8.347  	/*
   8.348  	 * Set up the idle context for the BSP.  Similar to above except
   8.349  	 * that some was done by locore, some by pmap.c and some is implicit
   8.350 @@ -739,7 +831,7 @@ extern void bootDataSeg(void);
   8.351  extern void MPentry(void);
   8.352  extern u_int MP_GDT;
   8.353  extern u_int mp_gdtbase;
   8.354 -
   8.355 +#if 0
   8.356  static void
   8.357  install_ap_tramp(void)
   8.358  {
   8.359 @@ -791,6 +883,21 @@ install_ap_tramp(void)
   8.360  	*dst16 = (u_int) boot_address & 0xffff;
   8.361  	*dst8 = ((u_int) boot_address >> 16) & 0xff;
   8.362  }
   8.363 +#endif
   8.364 +
   8.365 +static int 
   8.366 +cpu_mp_trap_init(trap_info_t *trap_ctxt)
   8.367 +{
   8.368 +
   8.369 +        trap_info_t *t = trap_table;
   8.370 +
   8.371 +        for (t = trap_table; t->address; t++) {
   8.372 +                trap_ctxt[t->vector].flags = t->flags;
   8.373 +                trap_ctxt[t->vector].cs = t->cs;
   8.374 +                trap_ctxt[t->vector].address = t->address;
   8.375 +        }
   8.376 +        return 0x80 /*SYSCALL_VECTOR*/;
   8.377 +}
   8.378  
   8.379  /*
   8.380   * This function starts the AP (application processor) identified
   8.381 @@ -802,8 +909,25 @@ install_ap_tramp(void)
   8.382  static int
   8.383  start_ap(int apic_id)
   8.384  {
   8.385 -	int vector, ms;
   8.386 -	int cpus;
   8.387 +	int vector, ms, i;
   8.388 +	int cpus, boot_error;
   8.389 +	vcpu_guest_context_t ctxt;
   8.390 +
   8.391 +	/* 
   8.392 +	 * This is the FreeBSD equivalent to do_boot_cpu(apicid) in
   8.393 +	 * smpboot.c. 
   8.394 +	 * its initialization sequence consists of:
   8.395 +	 * - fork_idle(cpu) to create separate idle context
   8.396 +	 * - initialization of idle's context to start_secondary
   8.397 +	 * - initialization of cpu ctxt to start in startup_32_smp
   8.398 +	 * - then we call HYPERVISOR_boot_vcpu with the cpu index and
   8.399 +	 *   a pointer to the context.
   8.400 +	 * - on boot success we:
   8.401 +	 *   - set ourselves in the callout_map
   8.402 +	 *   - wait up to 5 seconds for us to be set in the callin map
   8.403 +	 * - set x86_cpu_to_apicid[cpu] = apicid;
   8.404 +	 *
   8.405 +	 */
   8.406  
   8.407  	POSTCODE(START_AP_POST);
   8.408  
   8.409 @@ -813,6 +937,55 @@ start_ap(int apic_id)
   8.410  	/* used as a watchpoint to signal AP startup */
   8.411  	cpus = mp_naps;
   8.412  
   8.413 +	memset(&ctxt, 0, sizeof(ctxt));
   8.414 +
   8.415 +	ctxt.user_regs.ds = GSEL(GDATA_SEL, SEL_KPL);
   8.416 +	ctxt.user_regs.es = GSEL(GDATA_SEL, SEL_KPL);
   8.417 +	ctxt.user_regs.fs = 0;
   8.418 +	ctxt.user_regs.gs = 0;
   8.419 +	ctxt.user_regs.ss = __KERNEL_DS;
   8.420 +	ctxt.user_regs.cs = __KERNEL_CS;
   8.421 +	ctxt.user_regs.eip = (unsigned long)init_secondary;
   8.422 +	ctxt.user_regs.esp = (unsigned long)bootSTK;
   8.423 +#ifdef notyet
   8.424 +	ctxt.user_regs.eflags = (1<<9) | (1<<2) | (idle->thread.io_pl<<12);
   8.425 +#else
   8.426 +	ctxt.user_regs.eflags = (1<<9) | (1<<2);
   8.427 +#endif
   8.428 +	/* FPU is set up to default initial state. */
   8.429 +	memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt));
   8.430 +
   8.431 +	/* Virtual IDT is empty at start-of-day. */
   8.432 +	for ( i = 0; i < 256; i++ )
   8.433 +	{
   8.434 +		ctxt.trap_ctxt[i].vector = i;
   8.435 +		ctxt.trap_ctxt[i].cs     = FLAT_KERNEL_CS;
   8.436 +	}
   8.437 +	ctxt.fast_trap_idx = cpu_mp_trap_init(ctxt.trap_ctxt);
   8.438 +
   8.439 +	/* No LDT. */
   8.440 +	ctxt.ldt_ents = 0;
   8.441 +
   8.442 +	/* Ring 1 stack is the initial stack. */
   8.443 +	ctxt.kernel_ss = __KERNEL_DS;
   8.444 +	ctxt.kernel_sp = (unsigned long)bootSTK;
   8.445 +
   8.446 +	/* Callback handlers. */
   8.447 +	ctxt.event_callback_cs     = __KERNEL_CS;
   8.448 +	ctxt.event_callback_eip    = (unsigned long)Xhypervisor_callback;
   8.449 +	ctxt.failsafe_callback_cs  = __KERNEL_CS;
   8.450 +	ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
   8.451 +
   8.452 +	ctxt.pt_base = (vm_paddr_t)IdlePTD;
   8.453 +
   8.454 +	boot_error = HYPERVISOR_boot_vcpu(bootAP, &ctxt);
   8.455 +
   8.456 +	
   8.457 +	if (boot_error) 
   8.458 +		printk("Houston we have a problem\n");
   8.459 +	else
   8.460 +		printk("boot_vcpu succeeded\n");
   8.461 +#if 0
   8.462  	/*
   8.463  	 * first we do an INIT/RESET IPI this INIT IPI might be run, reseting
   8.464  	 * and running the target CPU. OR this INIT IPI might be latched (P5
   8.465 @@ -862,6 +1035,7 @@ start_ap(int apic_id)
   8.466  	    APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
   8.467  	    vector, apic_id);
   8.468  	lapic_ipi_wait(-1);
   8.469 +#endif
   8.470  	DELAY(200);		/* wait ~200uS */
   8.471  
   8.472  	/* Wait up to 5 seconds for it to start. */
     9.1 --- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/pmap.c	Fri May 06 17:04:27 2005 +0000
     9.2 +++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/pmap.c	Wed May 11 13:43:50 2005 +0000
     9.3 @@ -1374,7 +1374,6 @@ static u_int *lazymask;
     9.4  static u_int lazyptd;
     9.5  static volatile u_int lazywait;
     9.6  
     9.7 -void pmap_lazyfix_action(void);
     9.8  
     9.9  void
    9.10  pmap_lazyfix_action(void)
    10.1 --- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_machdep.c	Fri May 06 17:04:27 2005 +0000
    10.2 +++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_machdep.c	Wed May 11 13:43:50 2005 +0000
    10.3 @@ -380,8 +380,6 @@ printk(const char *fmt, ...)
    10.4          (void)HYPERVISOR_console_write(buf, ret);
    10.5  }
    10.6  
    10.7 -#define PANIC_IF(exp) if (unlikely(exp)) {printk("%s failed\n",#exp); panic("%s: %s:%d", #exp, __FILE__, __LINE__);} 
    10.8 -
    10.9  
   10.10  #define XPQUEUE_SIZE 128
   10.11  #ifdef SMP
    11.1 --- a/freebsd-5.3-xen-sparse/i386-xen/include/hypervisor.h	Fri May 06 17:04:27 2005 +0000
    11.2 +++ b/freebsd-5.3-xen-sparse/i386-xen/include/hypervisor.h	Wed May 11 13:43:50 2005 +0000
    11.3 @@ -441,4 +441,20 @@ HYPERVISOR_vm_assist(unsigned int cmd, u
    11.4      return ret;
    11.5  }
    11.6  
    11.7 +static inline int
    11.8 +HYPERVISOR_boot_vcpu(
    11.9 +    unsigned long vcpu, vcpu_guest_context_t *ctxt)
   11.10 +{
   11.11 +    int ret;
   11.12 +    unsigned long ign1, ign2;
   11.13 +
   11.14 +    __asm__ __volatile__ (
   11.15 +        TRAP_INSTR
   11.16 +        : "=a" (ret), "=b" (ign1), "=c" (ign2)
   11.17 +	: "0" (__HYPERVISOR_boot_vcpu), "1" (vcpu), "2" (ctxt)
   11.18 +	: "memory");
   11.19 +
   11.20 +    return ret;
   11.21 +}
   11.22 +
   11.23  #endif /* __HYPERVISOR_H__ */
    12.1 --- a/freebsd-5.3-xen-sparse/i386-xen/include/pcpu.h	Fri May 06 17:04:27 2005 +0000
    12.2 +++ b/freebsd-5.3-xen-sparse/i386-xen/include/pcpu.h	Wed May 11 13:43:50 2005 +0000
    12.3 @@ -53,7 +53,12 @@
    12.4          int     *pc_ipi_to_evtchn;                                      \
    12.5          int     *pc_virq_to_irq;                                        \
    12.6          u_int   pc_cr2;                                                 \
    12.7 -        u_int   pc_pdir                                        
    12.8 +        u_int   pc_pdir;                                                \
    12.9 +        u_int   pc_lazypmap;                                            \
   12.10 +        u_int   pc_rendezvous;                                          \
   12.11 +        u_int   pc_cpuast;                                              \
   12.12 +        u_int   pc_time_irq;                                              \
   12.13 +        uint64_t pc_processed_system_time;  
   12.14  
   12.15  #if defined(lint)
   12.16   
    13.1 --- a/freebsd-5.3-xen-sparse/i386-xen/include/pmap.h	Fri May 06 17:04:27 2005 +0000
    13.2 +++ b/freebsd-5.3-xen-sparse/i386-xen/include/pmap.h	Wed May 11 13:43:50 2005 +0000
    13.3 @@ -343,6 +343,7 @@ void	pmap_set_pg(void);
    13.4  void	pmap_invalidate_page(pmap_t, vm_offset_t);
    13.5  void	pmap_invalidate_range(pmap_t, vm_offset_t, vm_offset_t);
    13.6  void	pmap_invalidate_all(pmap_t);
    13.7 +void    pmap_lazyfix_action(void);
    13.8  
    13.9  void pmap_map_readonly(pmap_t pmap, vm_offset_t va, int len);
   13.10  void pmap_map_readwrite(pmap_t pmap, vm_offset_t va, int len);
    14.1 --- a/freebsd-5.3-xen-sparse/i386-xen/include/xenfunc.h	Fri May 06 17:04:27 2005 +0000
    14.2 +++ b/freebsd-5.3-xen-sparse/i386-xen/include/xenfunc.h	Wed May 11 13:43:50 2005 +0000
    14.3 @@ -61,6 +61,9 @@ void load_cr3(uint32_t val);
    14.4  void xen_machphys_update(unsigned long, unsigned long);
    14.5  void xen_update_descriptor(union descriptor *, union descriptor *);
    14.6  void lldt(u_short sel);
    14.7 +void ap_cpu_initclocks(void);
    14.8 +
    14.9 +
   14.10  /*
   14.11   * Invalidate a patricular VA on all cpus
   14.12   *
   14.13 @@ -79,5 +82,6 @@ invltlb(void)
   14.14  	
   14.15  }
   14.16  
   14.17 +#define PANIC_IF(exp) if (unlikely(exp)) {printk("%s failed\n",#exp); panic("%s: %s:%d", #exp, __FILE__, __LINE__);} 
   14.18  
   14.19  #endif /* _XEN_XENFUNC_H_ */
    15.1 --- a/linux-2.6.11-xen-sparse/arch/xen/Kconfig.drivers	Fri May 06 17:04:27 2005 +0000
    15.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/Kconfig.drivers	Wed May 11 13:43:50 2005 +0000
    15.3 @@ -49,9 +49,46 @@ source "drivers/infiniband/Kconfig"
    15.4  endif
    15.5  
    15.6  if !XEN_PHYSDEV_ACCESS
    15.7 +
    15.8 +menu "Character devices"
    15.9 +
   15.10  config UNIX98_PTYS
   15.11  	bool
   15.12  	default y
   15.13 +
   15.14 +config LEGACY_PTYS
   15.15 +	bool "Legacy (BSD) PTY support"
   15.16 +	default y
   15.17 +	---help---
   15.18 +	  A pseudo terminal (PTY) is a software device consisting of two
   15.19 +	  halves: a master and a slave. The slave device behaves identical to
   15.20 +	  a physical terminal; the master device is used by a process to
   15.21 +	  read data from and write data to the slave, thereby emulating a
   15.22 +	  terminal. Typical programs for the master side are telnet servers
   15.23 +	  and xterms.
   15.24 +
   15.25 +	  Linux has traditionally used the BSD-like names /dev/ptyxx
   15.26 +	  for masters and /dev/ttyxx for slaves of pseudo
   15.27 +	  terminals. This scheme has a number of problems, including
   15.28 +	  security.  This option enables these legacy devices; on most
   15.29 +	  systems, it is safe to say N.
   15.30 +
   15.31 +
   15.32 +config LEGACY_PTY_COUNT
   15.33 +	int "Maximum number of legacy PTY in use"
   15.34 +	depends on LEGACY_PTYS
   15.35 +	range 1 256
   15.36 +	default "256"
   15.37 +	---help---
   15.38 +	  The maximum number of legacy PTYs that can be used at any one time.
   15.39 +	  The default is 256, and should be more than enough.  Embedded
   15.40 +	  systems may want to reduce this to save memory.
   15.41 +
   15.42 +	  When not in use, each legacy PTY occupies 12 bytes on 32-bit
   15.43 +	  architectures and 24 bytes on 64-bit architectures.
   15.44 +
   15.45 +endmenu
   15.46 +
   15.47  endif
   15.48  
   15.49  endmenu
    16.1 --- a/linux-2.6.11-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32	Fri May 06 17:04:27 2005 +0000
    16.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32	Wed May 11 13:43:50 2005 +0000
    16.3 @@ -340,6 +340,12 @@ CONFIG_NETDEVICES=y
    16.4  # CONFIG_SHAPER is not set
    16.5  # CONFIG_NETCONSOLE is not set
    16.6  CONFIG_UNIX98_PTYS=y
    16.7 +CONFIG_LEGACY_PTYS=y
    16.8 +CONFIG_LEGACY_PTY_COUNT=256
    16.9 +
   16.10 +#
   16.11 +# Character devices
   16.12 +#
   16.13  
   16.14  #
   16.15  # File systems
    17.1 --- a/linux-2.6.11-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64	Fri May 06 17:04:27 2005 +0000
    17.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64	Wed May 11 13:43:50 2005 +0000
    17.3 @@ -316,6 +316,12 @@ CONFIG_NETDEVICES=y
    17.4  # CONFIG_SHAPER is not set
    17.5  # CONFIG_NETCONSOLE is not set
    17.6  CONFIG_UNIX98_PTYS=y
    17.7 +CONFIG_LEGACY_PTYS=y
    17.8 +CONFIG_LEGACY_PTY_COUNT=256
    17.9 +
   17.10 +#
   17.11 +# Character devices
   17.12 +#
   17.13  
   17.14  #
   17.15  # File systems
    18.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/Kconfig	Fri May 06 17:04:27 2005 +0000
    18.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/Kconfig	Wed May 11 13:43:50 2005 +0000
    18.3 @@ -636,7 +636,7 @@ config MTRR
    18.4  
    18.5  config IRQBALANCE
    18.6   	bool "Enable kernel irq balancing"
    18.7 -	depends on SMP && X86_IO_APIC
    18.8 +	depends on SMP && X86_IO_APIC && !XEN
    18.9  	default y
   18.10  	help
   18.11   	  The default yes will allow the kernel to do irq load balancing.
   18.12 @@ -677,6 +677,16 @@ config X86_IO_APIC
   18.13  	depends on !SMP && X86_UP_IOAPIC
   18.14  	default y
   18.15  
   18.16 +config HOTPLUG_CPU
   18.17 +	bool "Support for hot-pluggable CPUs (EXPERIMENTAL)"
   18.18 +	depends on SMP && HOTPLUG && EXPERIMENTAL
   18.19 +	---help---
   18.20 +	  Say Y here to experiment with turning CPUs off and on.  CPUs
   18.21 +	  can be controlled through /sys/devices/system/cpu.
   18.22 +
   18.23 +	  Say N.
   18.24 +
   18.25 +
   18.26  if XEN_PHYSDEV_ACCESS
   18.27  
   18.28  menu "Bus options (PCI, PCMCIA, EISA, MCA, ISA)"
    19.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/Makefile	Fri May 06 17:04:27 2005 +0000
    19.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/Makefile	Wed May 11 13:43:50 2005 +0000
    19.3 @@ -72,6 +72,7 @@ head-y := arch/xen/i386/kernel/head.o ar
    19.4  libs-y 					+= arch/i386/lib/
    19.5  core-y					+= arch/xen/i386/kernel/ \
    19.6  					   arch/xen/i386/mm/ \
    19.7 +					   arch/xen/i386/mach-default/ \
    19.8  					   arch/i386/crypto/
    19.9  # \
   19.10  #					   arch/xen/$(mcore-y)/
    20.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/io_apic.c	Fri May 06 17:04:27 2005 +0000
    20.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/io_apic.c	Wed May 11 13:43:50 2005 +0000
    20.3 @@ -726,6 +726,7 @@ void fastcall send_IPI_self(int vector)
    20.4  }
    20.5  #endif /* !CONFIG_SMP */
    20.6  
    20.7 +
    20.8  /*
    20.9   * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
   20.10   * specific CPU-side IRQs.
    21.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/irq.c	Fri May 06 17:04:27 2005 +0000
    21.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/irq.c	Wed May 11 13:43:50 2005 +0000
    21.3 @@ -15,6 +15,9 @@
    21.4  #include <linux/seq_file.h>
    21.5  #include <linux/interrupt.h>
    21.6  #include <linux/kernel_stat.h>
    21.7 +#include <linux/notifier.h>
    21.8 +#include <linux/cpu.h>
    21.9 +#include <linux/delay.h>
   21.10  
   21.11  #ifndef CONFIG_X86_LOCAL_APIC
   21.12  /*
   21.13 @@ -207,9 +210,8 @@ int show_interrupts(struct seq_file *p, 
   21.14  
   21.15  	if (i == 0) {
   21.16  		seq_printf(p, "           ");
   21.17 -		for (j=0; j<NR_CPUS; j++)
   21.18 -			if (cpu_online(j))
   21.19 -				seq_printf(p, "CPU%d       ",j);
   21.20 +		for_each_cpu(j)
   21.21 +			seq_printf(p, "CPU%d       ",j);
   21.22  		seq_putc(p, '\n');
   21.23  	}
   21.24  
   21.25 @@ -222,9 +224,8 @@ int show_interrupts(struct seq_file *p, 
   21.26  #ifndef CONFIG_SMP
   21.27  		seq_printf(p, "%10u ", kstat_irqs(i));
   21.28  #else
   21.29 -		for (j = 0; j < NR_CPUS; j++)
   21.30 -			if (cpu_online(j))
   21.31 -				seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
   21.32 +		for_each_cpu(j)
   21.33 +			seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
   21.34  #endif
   21.35  		seq_printf(p, " %14s", irq_desc[i].handler->typename);
   21.36  		seq_printf(p, "  %s", action->name);
   21.37 @@ -237,16 +238,13 @@ skip:
   21.38  		spin_unlock_irqrestore(&irq_desc[i].lock, flags);
   21.39  	} else if (i == NR_IRQS) {
   21.40  		seq_printf(p, "NMI: ");
   21.41 -		for (j = 0; j < NR_CPUS; j++)
   21.42 -			if (cpu_online(j))
   21.43 -				seq_printf(p, "%10u ", nmi_count(j));
   21.44 +		for_each_cpu(j)
   21.45 +			seq_printf(p, "%10u ", nmi_count(j));
   21.46  		seq_putc(p, '\n');
   21.47  #ifdef CONFIG_X86_LOCAL_APIC
   21.48  		seq_printf(p, "LOC: ");
   21.49 -		for (j = 0; j < NR_CPUS; j++)
   21.50 -			if (cpu_online(j))
   21.51 -				seq_printf(p, "%10u ",
   21.52 -					irq_stat[j].apic_timer_irqs);
   21.53 +		for_each_cpu(j)
   21.54 +			seq_printf(p, "%10u ", irq_stat[j].apic_timer_irqs);
   21.55  		seq_putc(p, '\n');
   21.56  #endif
   21.57  		seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
   21.58 @@ -256,3 +254,44 @@ skip:
   21.59  	}
   21.60  	return 0;
   21.61  }
   21.62 +
   21.63 +#ifdef CONFIG_HOTPLUG_CPU
   21.64 +
   21.65 +void fixup_irqs(cpumask_t map)
   21.66 +{
   21.67 +	unsigned int irq;
   21.68 +	static int warned;
   21.69 +
   21.70 +	for (irq = 0; irq < NR_IRQS; irq++) {
   21.71 +		cpumask_t mask;
   21.72 +		if (irq == 2)
   21.73 +			continue;
   21.74 +
   21.75 +		cpus_and(mask, irq_affinity[irq], map);
   21.76 +		if (any_online_cpu(mask) == NR_CPUS) {
   21.77 +			printk("Breaking affinity for irq %i\n", irq);
   21.78 +			mask = map;
   21.79 +		}
   21.80 +		if (irq_desc[irq].handler->set_affinity)
   21.81 +			irq_desc[irq].handler->set_affinity(irq, mask);
   21.82 +		else if (irq_desc[irq].action && !(warned++))
   21.83 +			printk("Cannot set affinity for irq %i\n", irq);
   21.84 +	}
   21.85 +
   21.86 +#if 0
   21.87 +	barrier();
   21.88 +	/* Ingo Molnar says: "after the IO-APIC masks have been redirected
   21.89 +	   [note the nop - the interrupt-enable boundary on x86 is two
   21.90 +	   instructions from sti] - to flush out pending hardirqs and
   21.91 +	   IPIs. After this point nothing is supposed to reach this CPU." */
   21.92 +	__asm__ __volatile__("sti; nop; cli");
   21.93 +	barrier();
   21.94 +#else
   21.95 +	/* That doesn't seem sufficient.  Give it 1ms. */
   21.96 +	local_irq_enable();
   21.97 +	mdelay(1);
   21.98 +	local_irq_disable();
   21.99 +#endif
  21.100 +}
  21.101 +#endif
  21.102 +
    22.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/process.c	Fri May 06 17:04:27 2005 +0000
    22.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/process.c	Wed May 11 13:43:50 2005 +0000
    22.3 @@ -13,6 +13,7 @@
    22.4  
    22.5  #include <stdarg.h>
    22.6  
    22.7 +#include <linux/cpu.h>
    22.8  #include <linux/errno.h>
    22.9  #include <linux/sched.h>
   22.10  #include <linux/fs.h>
   22.11 @@ -54,6 +55,9 @@
   22.12  #include <linux/irq.h>
   22.13  #include <linux/err.h>
   22.14  
   22.15 +#include <asm/tlbflush.h>
   22.16 +#include <asm/cpu.h>
   22.17 +
   22.18  asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
   22.19  
   22.20  int hlt_counter;
   22.21 @@ -112,6 +116,33 @@ void xen_idle(void)
   22.22  	}
   22.23  }
   22.24  
   22.25 +#ifdef CONFIG_HOTPLUG_CPU
   22.26 +#include <asm/nmi.h>
   22.27 +/* We don't actually take CPU down, just spin without interrupts. */
   22.28 +static inline void play_dead(void)
   22.29 +{
   22.30 +	/* Ack it */
   22.31 +	__get_cpu_var(cpu_state) = CPU_DEAD;
   22.32 +
   22.33 +	/* We shouldn't have to disable interrupts while dead, but
   22.34 +	 * some interrupts just don't seem to go away, and this makes
   22.35 +	 * it "work" for testing purposes. */
   22.36 +	/* Death loop */
   22.37 +	while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE)
   22.38 +		HYPERVISOR_yield();
   22.39 +
   22.40 +	local_irq_disable();
   22.41 +	__flush_tlb_all();
   22.42 +	cpu_set(smp_processor_id(), cpu_online_map);
   22.43 +	local_irq_enable();
   22.44 +}
   22.45 +#else
   22.46 +static inline void play_dead(void)
   22.47 +{
   22.48 +	BUG();
   22.49 +}
   22.50 +#endif /* CONFIG_HOTPLUG_CPU */
   22.51 +
   22.52  /*
   22.53   * The idle thread. There's no useful work to be
   22.54   * done, so just try to conserve power and have a
   22.55 @@ -130,6 +161,9 @@ void cpu_idle (void)
   22.56  				cpu_clear(cpu, cpu_idle_map);
   22.57  			rmb();
   22.58  
   22.59 +			if (cpu_is_offline(cpu))
   22.60 +				play_dead();
   22.61 +
   22.62  			irq_stat[cpu].idle_timestamp = jiffies;
   22.63  			xen_idle();
   22.64  		}
    23.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smp.c	Fri May 06 17:04:27 2005 +0000
    23.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smp.c	Wed May 11 13:43:50 2005 +0000
    23.3 @@ -19,6 +19,7 @@
    23.4  #include <linux/mc146818rtc.h>
    23.5  #include <linux/cache.h>
    23.6  #include <linux/interrupt.h>
    23.7 +#include <linux/cpu.h>
    23.8  
    23.9  #include <asm/mtrr.h>
   23.10  #include <asm/tlbflush.h>
   23.11 @@ -185,6 +186,7 @@ void send_IPI_mask_bitmask(cpumask_t mas
   23.12  	unsigned int cpu;
   23.13  
   23.14  	local_irq_save(flags);
   23.15 +	WARN_ON(cpus_addr(mask)[0] & ~cpus_addr(cpu_online_map)[0]);
   23.16  
   23.17  	for (cpu = 0; cpu < NR_CPUS; ++cpu) {
   23.18  		if (cpu_isset(cpu, mask)) {
   23.19 @@ -320,21 +322,21 @@ out:
   23.20  static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
   23.21  						unsigned long va)
   23.22  {
   23.23 -	cpumask_t tmp;
   23.24  	/*
   23.25  	 * A couple of (to be removed) sanity checks:
   23.26  	 *
   23.27 -	 * - we do not send IPIs to not-yet booted CPUs.
   23.28  	 * - current CPU must not be in mask
   23.29  	 * - mask must exist :)
   23.30  	 */
   23.31  	BUG_ON(cpus_empty(cpumask));
   23.32 -
   23.33 -	cpus_and(tmp, cpumask, cpu_online_map);
   23.34 -	BUG_ON(!cpus_equal(cpumask, tmp));
   23.35  	BUG_ON(cpu_isset(smp_processor_id(), cpumask));
   23.36  	BUG_ON(!mm);
   23.37  
   23.38 +	/* If a CPU which we ran on has gone down, OK. */
   23.39 +	cpus_and(cpumask, cpumask, cpu_online_map);
   23.40 +	if (cpus_empty(cpumask))
   23.41 +		return;
   23.42 +
   23.43  	/*
   23.44  	 * i'm not happy about this global shared spinlock in the
   23.45  	 * MM hot path, but we'll see how contended it is.
   23.46 @@ -465,6 +467,7 @@ void flush_tlb_all(void)
   23.47   */
   23.48  void smp_send_reschedule(int cpu)
   23.49  {
   23.50 +	WARN_ON(cpu_is_offline(cpu));
   23.51  	send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
   23.52  }
   23.53  
   23.54 @@ -505,10 +508,16 @@ int smp_call_function (void (*func) (voi
   23.55   */
   23.56  {
   23.57  	struct call_data_struct data;
   23.58 -	int cpus = num_online_cpus()-1;
   23.59 +	int cpus;
   23.60  
   23.61 -	if (!cpus)
   23.62 +	/* Holding any lock stops cpus from going down. */
   23.63 +	spin_lock(&call_lock);
   23.64 +	cpus = num_online_cpus()-1;
   23.65 +
   23.66 +	if (!cpus) {
   23.67 +		spin_unlock(&call_lock);
   23.68  		return 0;
   23.69 +	}
   23.70  
   23.71  	/* Can deadlock when called with interrupts disabled */
   23.72  	WARN_ON(irqs_disabled());
   23.73 @@ -520,7 +529,6 @@ int smp_call_function (void (*func) (voi
   23.74  	if (wait)
   23.75  		atomic_set(&data.finished, 0);
   23.76  
   23.77 -	spin_lock(&call_lock);
   23.78  	call_data = &data;
   23.79  	mb();
   23.80  	
    24.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smpboot.c	Fri May 06 17:04:27 2005 +0000
    24.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smpboot.c	Wed May 11 13:43:50 2005 +0000
    24.3 @@ -44,6 +44,9 @@
    24.4  #include <linux/smp_lock.h>
    24.5  #include <linux/irq.h>
    24.6  #include <linux/bootmem.h>
    24.7 +#include <linux/notifier.h>
    24.8 +#include <linux/cpu.h>
    24.9 +#include <linux/percpu.h>
   24.10  
   24.11  #include <linux/delay.h>
   24.12  #include <linux/mc146818rtc.h>
   24.13 @@ -51,11 +54,7 @@
   24.14  #include <asm/desc.h>
   24.15  #include <asm/arch_hooks.h>
   24.16  
   24.17 -#if 1
   24.18 -#define Dprintk(args...)
   24.19 -#else
   24.20  #include <mach_apic.h>
   24.21 -#endif
   24.22  #include <mach_wakecpu.h>
   24.23  #include <smpboot_hooks.h>
   24.24  
   24.25 @@ -93,7 +92,14 @@ extern unsigned char trampoline_data [];
   24.26  extern unsigned char trampoline_end  [];
   24.27  static unsigned char *trampoline_base;
   24.28  static int trampoline_exec;
   24.29 +#endif
   24.30  
   24.31 +#ifdef CONFIG_HOTPLUG_CPU
   24.32 +/* State of each CPU. */
   24.33 +DEFINE_PER_CPU(int, cpu_state) = { 0 };
   24.34 +#endif
   24.35 +
   24.36 +#if 0
   24.37  /*
   24.38   * Currently trivial. Write the real->protected mode
   24.39   * bootstrap into the page concerned. The caller
   24.40 @@ -462,7 +468,7 @@ extern void local_setup_timer(void);
   24.41  /*
   24.42   * Activate a secondary processor.
   24.43   */
   24.44 -static int __init start_secondary(void *unused)
   24.45 +static void __init start_secondary(void *unused)
   24.46  {
   24.47  	/*
   24.48  	 * Dont put anything before smp_callin(), SMP
   24.49 @@ -488,17 +494,6 @@ static int __init start_secondary(void *
   24.50  	local_irq_enable();
   24.51  
   24.52  	wmb();
   24.53 -	if (0) {
   24.54 -		char *msg2 = "delay2\n";
   24.55 -		int timeout;
   24.56 -		for (timeout = 0; timeout < 50000; timeout++) {
   24.57 -			udelay(1000);
   24.58 -			if (timeout == 2000) {
   24.59 -				(void)HYPERVISOR_console_io(CONSOLEIO_write, strlen(msg2), msg2);
   24.60 -				timeout = 0;
   24.61 -			}
   24.62 -		}
   24.63 -	}
   24.64  	cpu_idle();
   24.65  }
   24.66  
   24.67 @@ -1105,15 +1100,15 @@ static void __init smp_boot_cpus(unsigne
   24.68  	 * If we couldn't find an SMP configuration at boot time,
   24.69  	 * get out of here now!
   24.70  	 */
   24.71 -	if (!smp_found_config /* && !acpi_lapic) */) {
   24.72 +	if (!smp_found_config && !acpi_lapic) {
   24.73  		printk(KERN_NOTICE "SMP motherboard not detected.\n");
   24.74  		smpboot_clear_io_apic_irqs();
   24.75  #if 0
   24.76  		phys_cpu_present_map = physid_mask_of_physid(0);
   24.77 +#endif
   24.78  		if (APIC_init_uniprocessor())
   24.79  			printk(KERN_NOTICE "Local APIC not detected."
   24.80  					   " Using dummy APIC emulation.\n");
   24.81 -#endif
   24.82  		map_cpu_to_logical_apicid();
   24.83  		return;
   24.84  	}
   24.85 @@ -1267,9 +1262,11 @@ static void __init smp_boot_cpus(unsigne
   24.86  #if 0
   24.87  	if (nmi_watchdog == NMI_LOCAL_APIC)
   24.88  		check_nmi_watchdog();
   24.89 +#endif
   24.90  
   24.91  	smpboot_setup_io_apic();
   24.92  
   24.93 +#if 0
   24.94  	setup_boot_APIC_clock();
   24.95  
   24.96  	/*
   24.97 @@ -1284,6 +1281,9 @@ static void __init smp_boot_cpus(unsigne
   24.98     who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */
   24.99  void __init smp_prepare_cpus(unsigned int max_cpus)
  24.100  {
  24.101 +	smp_commenced_mask = cpumask_of_cpu(0);
  24.102 +	cpu_callin_map = cpumask_of_cpu(0);
  24.103 +	mb();
  24.104  	smp_boot_cpus(max_cpus);
  24.105  }
  24.106  
  24.107 @@ -1293,20 +1293,97 @@ void __devinit smp_prepare_boot_cpu(void
  24.108  	cpu_set(smp_processor_id(), cpu_callout_map);
  24.109  }
  24.110  
  24.111 +#ifdef CONFIG_HOTPLUG_CPU
  24.112 +
  24.113 +/* must be called with the cpucontrol mutex held */
  24.114 +static int __devinit cpu_enable(unsigned int cpu)
  24.115 +{
  24.116 +	/* get the target out of its holding state */
  24.117 +	per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
  24.118 +	wmb();
  24.119 +
  24.120 +	/* wait for the processor to ack it. timeout? */
  24.121 +	while (!cpu_online(cpu))
  24.122 +		cpu_relax();
  24.123 +
  24.124 +	fixup_irqs(cpu_online_map);
  24.125 +	/* counter the disable in fixup_irqs() */
  24.126 +	local_irq_enable();
  24.127 +	return 0;
  24.128 +}
  24.129 +
  24.130 +int __cpu_disable(void)
  24.131 +{
  24.132 +	cpumask_t map = cpu_online_map;
  24.133 +	int cpu = smp_processor_id();
  24.134 +
  24.135 +	/*
  24.136 +	 * Perhaps use cpufreq to drop frequency, but that could go
  24.137 +	 * into generic code.
  24.138 + 	 *
  24.139 +	 * We won't take down the boot processor on i386 due to some
  24.140 +	 * interrupts only being able to be serviced by the BSP.
  24.141 +	 * Especially so if we're not using an IOAPIC	-zwane
  24.142 +	 */
  24.143 +	if (cpu == 0)
  24.144 +		return -EBUSY;
  24.145 +
  24.146 +	/* Allow any queued timer interrupts to get serviced */
  24.147 +	local_irq_enable();
  24.148 +	mdelay(1);
  24.149 +	local_irq_disable();
  24.150 +
  24.151 +	cpu_clear(cpu, map);
  24.152 +	fixup_irqs(map);
  24.153 +	/* It's now safe to remove this processor from the online map */
  24.154 +	cpu_clear(cpu, cpu_online_map);
  24.155 +	return 0;
  24.156 +}
  24.157 +
  24.158 +void __cpu_die(unsigned int cpu)
  24.159 +{
  24.160 +	/* We don't do anything here: idle task is faking death itself. */
  24.161 +	unsigned int i;
  24.162 +
  24.163 +	for (i = 0; i < 10; i++) {
  24.164 +		/* They ack this in play_dead by setting CPU_DEAD */
  24.165 +		if (per_cpu(cpu_state, cpu) == CPU_DEAD)
  24.166 +			return;
  24.167 +		current->state = TASK_UNINTERRUPTIBLE;
  24.168 +		schedule_timeout(HZ/10);
  24.169 +	}
  24.170 + 	printk(KERN_ERR "CPU %u didn't die...\n", cpu);
  24.171 +}
  24.172 +#else /* ... !CONFIG_HOTPLUG_CPU */
  24.173 +int __cpu_disable(void)
  24.174 +{
  24.175 +	return -ENOSYS;
  24.176 +}
  24.177 +
  24.178 +void __cpu_die(unsigned int cpu)
  24.179 +{
  24.180 +	/* We said "no" in __cpu_disable */
  24.181 +	BUG();
  24.182 +}
  24.183 +#endif /* CONFIG_HOTPLUG_CPU */
  24.184 +
  24.185  int __devinit __cpu_up(unsigned int cpu)
  24.186  {
  24.187 -	/* This only works at boot for x86.  See "rewrite" above. */
  24.188 -	if (cpu_isset(cpu, smp_commenced_mask)) {
  24.189 -		local_irq_enable();
  24.190 -		return -ENOSYS;
  24.191 -	}
  24.192 -
  24.193  	/* In case one didn't come up */
  24.194  	if (!cpu_isset(cpu, cpu_callin_map)) {
  24.195 +		printk(KERN_DEBUG "skipping cpu%d, didn't come online\n", cpu);
  24.196  		local_irq_enable();
  24.197  		return -EIO;
  24.198  	}
  24.199  
  24.200 +#ifdef CONFIG_HOTPLUG_CPU
  24.201 +	/* Already up, and in cpu_quiescent now? */
  24.202 +	if (cpu_isset(cpu, smp_commenced_mask)) {
  24.203 +		cpu_enable(cpu);
  24.204 +		return 0;
  24.205 +	}
  24.206 +#endif
  24.207 +
  24.208  	local_irq_enable();
  24.209  	/* Unleash the CPU! */
  24.210  	cpu_set(cpu, smp_commenced_mask);
    25.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/traps.c	Fri May 06 17:04:27 2005 +0000
    25.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/traps.c	Wed May 11 13:43:50 2005 +0000
    25.3 @@ -609,6 +609,14 @@ fastcall void do_nmi(struct pt_regs * re
    25.4  	nmi_enter();
    25.5  
    25.6  	cpu = smp_processor_id();
    25.7 +
    25.8 +#ifdef CONFIG_HOTPLUG_CPU
    25.9 +	if (!cpu_online(cpu)) {
   25.10 +		nmi_exit();
   25.11 +		return;
   25.12 +	}
   25.13 +#endif
   25.14 +
   25.15  	++nmi_count(cpu);
   25.16  
   25.17  	if (!nmi_callback(regs, cpu))
    26.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    26.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/mach-default/Makefile	Wed May 11 13:43:50 2005 +0000
    26.3 @@ -0,0 +1,12 @@
    26.4 +#
    26.5 +# Makefile for the linux kernel.
    26.6 +#
    26.7 +
    26.8 +c-obj-y				:= topology.o
    26.9 +
   26.10 +$(patsubst %.o,$(obj)/%.c,$(c-obj-y)):
   26.11 +	@ln -fsn $(srctree)/arch/i386/mach-default/$(notdir $@) $@
   26.12 +
   26.13 +obj-y	+= $(c-obj-y)
   26.14 +
   26.15 +clean-files += $(patsubst %.o,%.c,$(c-obj-y) $(c-obj-))
    27.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/mm/init.c	Fri May 06 17:04:27 2005 +0000
    27.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/init.c	Wed May 11 13:43:50 2005 +0000
    27.3 @@ -361,6 +361,7 @@ static void __init pagetable_init (void)
    27.4  	make_page_writable(old_pgd);
    27.5  	__flush_tlb_all();
    27.6  	free_bootmem(__pa(old_pgd), PAGE_SIZE);
    27.7 +	init_mm.context.pinned = 1;
    27.8  
    27.9  	kernel_physical_mapping_init(pgd_base);
   27.10  	remap_numa_kva();
    28.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/mm/pgtable.c	Fri May 06 17:04:27 2005 +0000
    28.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/pgtable.c	Wed May 11 13:43:50 2005 +0000
    28.3 @@ -22,6 +22,7 @@
    28.4  #include <asm/tlb.h>
    28.5  #include <asm/tlbflush.h>
    28.6  #include <asm/io.h>
    28.7 +#include <asm/mmu_context.h>
    28.8  
    28.9  #include <asm-xen/foreign_page.h>
   28.10  
   28.11 @@ -480,7 +481,6 @@ void mm_unpin(struct mm_struct *mm)
   28.12  
   28.13  void _arch_exit_mmap(struct mm_struct *mm)
   28.14  {
   28.15 -    unsigned int cpu = smp_processor_id();
   28.16      struct task_struct *tsk = current;
   28.17  
   28.18      task_lock(tsk);
   28.19 @@ -494,9 +494,7 @@ void _arch_exit_mmap(struct mm_struct *m
   28.20          tsk->active_mm = &init_mm;
   28.21          atomic_inc(&init_mm.mm_count);
   28.22  
   28.23 -        cpu_set(cpu, init_mm.cpu_vm_mask);
   28.24 -        load_cr3(swapper_pg_dir);
   28.25 -        cpu_clear(cpu, mm->cpu_vm_mask);
   28.26 +        switch_mm(mm, &init_mm, tsk);
   28.27  
   28.28          atomic_dec(&mm->mm_count);
   28.29          BUG_ON(atomic_read(&mm->mm_count) == 0);
    29.1 --- a/linux-2.6.11-xen-sparse/arch/xen/x86_64/pci/Makefile	Fri May 06 17:04:27 2005 +0000
    29.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/pci/Makefile	Wed May 11 13:43:50 2005 +0000
    29.3 @@ -12,7 +12,7 @@ c-obj-y		:= i386.o
    29.4  c-obj-y		+= fixup.o
    29.5  c-obj-$(CONFIG_ACPI_PCI)	+= acpi.o
    29.6  c-obj-y			+= legacy.o common.o
    29.7 -c-xen-obj-$(CONFIG_PCI_DIRECT)+= direct.o
    29.8 +c-obj-$(CONFIG_PCI_DIRECT)+= direct.o
    29.9  c-xen-obj-y		+= irq.o
   29.10  # mmconfig has a 64bit special
   29.11  c-obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o
    30.1 --- a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mach-xen/smpboot_hooks.h	Fri May 06 17:04:27 2005 +0000
    30.2 +++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mach-xen/smpboot_hooks.h	Wed May 11 13:43:50 2005 +0000
    30.3 @@ -3,11 +3,7 @@
    30.4  
    30.5  static inline void smpboot_clear_io_apic_irqs(void)
    30.6  {
    30.7 -#if 1
    30.8 -	printk("smpboot_clear_io_apic_irqs\n");
    30.9 -#else
   30.10  	io_apic_irqs = 0;
   30.11 -#endif
   30.12  }
   30.13  
   30.14  static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
   30.15 @@ -43,16 +39,12 @@ static inline void smpboot_restore_warm_
   30.16  
   30.17  static inline void smpboot_setup_io_apic(void)
   30.18  {
   30.19 -#if 1
   30.20 -	printk("smpboot_setup_io_apic\n");
   30.21 -#else
   30.22  	/*
   30.23  	 * Here we can be sure that there is an IO-APIC in the system. Let's
   30.24  	 * go and set it up:
   30.25  	 */
   30.26  	if (!skip_ioapic_setup && nr_ioapics)
   30.27  		setup_IO_APIC();
   30.28 -#endif
   30.29  }
   30.30  
   30.31  
    31.1 --- a/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/smpboot_hooks.h	Fri May 06 17:04:27 2005 +0000
    31.2 +++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/smpboot_hooks.h	Wed May 11 13:43:50 2005 +0000
    31.3 @@ -3,11 +3,7 @@
    31.4  
    31.5  static inline void smpboot_clear_io_apic_irqs(void)
    31.6  {
    31.7 -#if 1
    31.8 -	printk("smpboot_clear_io_apic_irqs\n");
    31.9 -#else
   31.10  	io_apic_irqs = 0;
   31.11 -#endif
   31.12  }
   31.13  
   31.14  static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
   31.15 @@ -43,16 +39,12 @@ static inline void smpboot_restore_warm_
   31.16  
   31.17  static inline void smpboot_setup_io_apic(void)
   31.18  {
   31.19 -#if 1
   31.20 -	printk("smpboot_setup_io_apic\n");
   31.21 -#else
   31.22  	/*
   31.23  	 * Here we can be sure that there is an IO-APIC in the system. Let's
   31.24  	 * go and set it up:
   31.25  	 */
   31.26  	if (!skip_ioapic_setup && nr_ioapics)
   31.27  		setup_IO_APIC();
   31.28 -#endif
   31.29  }
   31.30  
   31.31  
    32.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    32.2 +++ b/patches/linux-2.6.11/i386-cpu-hotplug-updated-for-mm.patch	Wed May 11 13:43:50 2005 +0000
    32.3 @@ -0,0 +1,656 @@
    32.4 +
    32.5 +From: Zwane Mwaikambo <zwane@linuxpower.ca>
    32.6 +
    32.7 +Find attached the i386 cpu hotplug patch updated for Ingo's latest round of
    32.8 +goodies.  In order to avoid dumping cpu hotplug code into kernel/irq/* i
    32.9 +dropped the cpu_online check in do_IRQ() by modifying fixup_irqs().  The
   32.10 +difference being that on cpu offline, fixup_irqs() is called before we
   32.11 +clear the cpu from cpu_online_map and a long delay in order to ensure that
   32.12 +we never have any queued external interrupts on the APICs.  Due to my usual
   32.13 +test victims being in boxes a continent away this hasn't been tested, but
   32.14 +i'll cover bug reports (nudge, Nathan!  ;)
   32.15 +
   32.16 +1) Add CONFIG_HOTPLUG_CPU
   32.17 +2) disable local APIC timer on dead cpus.
   32.18 +3) Disable preempt around irq balancing to prevent CPUs going down.
   32.19 +4) Print irq stats for all possible cpus.
   32.20 +5) Debugging check for interrupts on offline cpus.
   32.21 +6) Hacky fixup_irqs() to redirect irqs when cpus go off/online.
   32.22 +7) play_dead() for offline cpus to spin inside.
   32.23 +8) Handle offline cpus set in flush_tlb_others().
   32.24 +9) Grab lock earlier in smp_call_function() to prevent CPUs going down.
   32.25 +10) Implement __cpu_disable() and __cpu_die().
   32.26 +11) Enable local interrupts in cpu_enable() after fixup_irqs()
   32.27 +12) Don't fiddle with NMI on dead cpu, but leave intact on other cpus.
   32.28 +13) Program IRQ affinity whilst cpu is still in cpu_online_map on offline.
   32.29 +
   32.30 +Signed-off-by: Zwane Mwaikambo <zwane@linuxpower.ca>
   32.31 +DESC
   32.32 +ppc64: fix hotplug cpu
   32.33 +EDESC
   32.34 +From: Zwane Mwaikambo <zwane@fsmlabs.com>
   32.35 +
   32.36 +I seem to have broken this when I moved the clearing of the dying cpu to 
   32.37 +arch specific code.
   32.38 +
   32.39 +Signed-off-by: Zwane Mwaikambo <zwane@fsmlabs.com>
   32.40 +Signed-off-by: Andrew Morton <akpm@osdl.org>
   32.41 +---
   32.42 +
   32.43 + 25-akpm/arch/i386/Kconfig               |    9 ++
   32.44 + 25-akpm/arch/i386/kernel/apic.c         |    3 
   32.45 + 25-akpm/arch/i386/kernel/io_apic.c      |    2 
   32.46 + 25-akpm/arch/i386/kernel/irq.c          |   66 +++++++++++++++++----
   32.47 + 25-akpm/arch/i386/kernel/msr.c          |    2 
   32.48 + 25-akpm/arch/i386/kernel/process.c      |   35 +++++++++++
   32.49 + 25-akpm/arch/i386/kernel/smp.c          |   25 +++++---
   32.50 + 25-akpm/arch/i386/kernel/smpboot.c      |   98 ++++++++++++++++++++++++++++++--
   32.51 + 25-akpm/arch/i386/kernel/traps.c        |    8 ++
   32.52 + 25-akpm/arch/ia64/kernel/smpboot.c      |    3 
   32.53 + 25-akpm/arch/ppc64/kernel/pSeries_smp.c |    5 +
   32.54 + 25-akpm/arch/s390/kernel/smp.c          |    4 -
   32.55 + 25-akpm/include/asm-i386/cpu.h          |    2 
   32.56 + 25-akpm/include/asm-i386/irq.h          |    4 +
   32.57 + 25-akpm/include/asm-i386/smp.h          |    3 
   32.58 + 25-akpm/kernel/cpu.c                    |   14 +---
   32.59 + arch/ppc64/kernel/smp.c                 |    0 
   32.60 + 17 files changed, 242 insertions(+), 41 deletions(-)
   32.61 +
   32.62 +diff -puN arch/i386/Kconfig~i386-cpu-hotplug-updated-for-mm arch/i386/Kconfig
   32.63 +--- 25/arch/i386/Kconfig~i386-cpu-hotplug-updated-for-mm	2005-02-23 02:20:06.000000000 -0800
   32.64 ++++ 25-akpm/arch/i386/Kconfig	2005-02-23 02:20:06.000000000 -0800
   32.65 +@@ -1205,6 +1205,15 @@ config SCx200
   32.66 + 	  This support is also available as a module.  If compiled as a
   32.67 + 	  module, it will be called scx200.
   32.68 + 
   32.69 ++config HOTPLUG_CPU
   32.70 ++	bool "Support for hot-pluggable CPUs (EXPERIMENTAL)"
   32.71 ++	depends on SMP && HOTPLUG && EXPERIMENTAL
   32.72 ++	---help---
   32.73 ++	  Say Y here to experiment with turning CPUs off and on.  CPUs
   32.74 ++	  can be controlled through /sys/devices/system/cpu.
   32.75 ++
   32.76 ++	  Say N.
   32.77 ++
   32.78 + source "drivers/pcmcia/Kconfig"
   32.79 + 
   32.80 + source "drivers/pci/hotplug/Kconfig"
   32.81 +diff -puN arch/i386/kernel/apic.c~i386-cpu-hotplug-updated-for-mm arch/i386/kernel/apic.c
   32.82 +--- 25/arch/i386/kernel/apic.c~i386-cpu-hotplug-updated-for-mm	2005-02-23 02:20:06.000000000 -0800
   32.83 ++++ 25-akpm/arch/i386/kernel/apic.c	2005-02-23 02:20:06.000000000 -0800
   32.84 +@@ -26,6 +26,7 @@
   32.85 + #include <linux/mc146818rtc.h>
   32.86 + #include <linux/kernel_stat.h>
   32.87 + #include <linux/sysdev.h>
   32.88 ++#include <linux/cpu.h>
   32.89 + 
   32.90 + #include <asm/atomic.h>
   32.91 + #include <asm/smp.h>
   32.92 +@@ -1048,7 +1049,7 @@ void __init setup_secondary_APIC_clock(v
   32.93 + 	setup_APIC_timer(calibration_result);
   32.94 + }
   32.95 + 
   32.96 +-void __init disable_APIC_timer(void)
   32.97 ++void __devinit disable_APIC_timer(void)
   32.98 + {
   32.99 + 	if (using_apic_timer) {
  32.100 + 		unsigned long v;
  32.101 +diff -puN arch/i386/kernel/io_apic.c~i386-cpu-hotplug-updated-for-mm arch/i386/kernel/io_apic.c
  32.102 +--- 25/arch/i386/kernel/io_apic.c~i386-cpu-hotplug-updated-for-mm	2005-02-23 02:20:06.000000000 -0800
  32.103 ++++ 25-akpm/arch/i386/kernel/io_apic.c	2005-02-23 02:20:06.000000000 -0800
  32.104 +@@ -576,9 +576,11 @@ static int balanced_irq(void *unused)
  32.105 + 		try_to_freeze(PF_FREEZE);
  32.106 + 		if (time_after(jiffies,
  32.107 + 				prev_balance_time+balanced_irq_interval)) {
  32.108 ++			preempt_disable();
  32.109 + 			do_irq_balance();
  32.110 + 			prev_balance_time = jiffies;
  32.111 + 			time_remaining = balanced_irq_interval;
  32.112 ++			preempt_enable();
  32.113 + 		}
  32.114 + 	}
  32.115 + 	return 0;
  32.116 +diff -puN arch/i386/kernel/irq.c~i386-cpu-hotplug-updated-for-mm arch/i386/kernel/irq.c
  32.117 +--- 25/arch/i386/kernel/irq.c~i386-cpu-hotplug-updated-for-mm	2005-02-23 02:20:06.000000000 -0800
  32.118 ++++ 25-akpm/arch/i386/kernel/irq.c	2005-02-23 02:20:06.000000000 -0800
  32.119 +@@ -15,6 +15,9 @@
  32.120 + #include <linux/seq_file.h>
  32.121 + #include <linux/interrupt.h>
  32.122 + #include <linux/kernel_stat.h>
  32.123 ++#include <linux/notifier.h>
  32.124 ++#include <linux/cpu.h>
  32.125 ++#include <linux/delay.h>
  32.126 + 
  32.127 + #ifndef CONFIG_X86_LOCAL_APIC
  32.128 + /*
  32.129 +@@ -209,9 +212,8 @@ int show_interrupts(struct seq_file *p, 
  32.130 + 
  32.131 + 	if (i == 0) {
  32.132 + 		seq_printf(p, "           ");
  32.133 +-		for (j=0; j<NR_CPUS; j++)
  32.134 +-			if (cpu_online(j))
  32.135 +-				seq_printf(p, "CPU%d       ",j);
  32.136 ++		for_each_cpu(j)
  32.137 ++			seq_printf(p, "CPU%d       ",j);
  32.138 + 		seq_putc(p, '\n');
  32.139 + 	}
  32.140 + 
  32.141 +@@ -224,9 +226,8 @@ int show_interrupts(struct seq_file *p, 
  32.142 + #ifndef CONFIG_SMP
  32.143 + 		seq_printf(p, "%10u ", kstat_irqs(i));
  32.144 + #else
  32.145 +-		for (j = 0; j < NR_CPUS; j++)
  32.146 +-			if (cpu_online(j))
  32.147 +-				seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
  32.148 ++		for_each_cpu(j)
  32.149 ++			seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
  32.150 + #endif
  32.151 + 		seq_printf(p, " %14s", irq_desc[i].handler->typename);
  32.152 + 		seq_printf(p, "  %s", action->name);
  32.153 +@@ -239,16 +240,13 @@ skip:
  32.154 + 		spin_unlock_irqrestore(&irq_desc[i].lock, flags);
  32.155 + 	} else if (i == NR_IRQS) {
  32.156 + 		seq_printf(p, "NMI: ");
  32.157 +-		for (j = 0; j < NR_CPUS; j++)
  32.158 +-			if (cpu_online(j))
  32.159 +-				seq_printf(p, "%10u ", nmi_count(j));
  32.160 ++		for_each_cpu(j)
  32.161 ++			seq_printf(p, "%10u ", nmi_count(j));
  32.162 + 		seq_putc(p, '\n');
  32.163 + #ifdef CONFIG_X86_LOCAL_APIC
  32.164 + 		seq_printf(p, "LOC: ");
  32.165 +-		for (j = 0; j < NR_CPUS; j++)
  32.166 +-			if (cpu_online(j))
  32.167 +-				seq_printf(p, "%10u ",
  32.168 +-					irq_stat[j].apic_timer_irqs);
  32.169 ++		for_each_cpu(j)
  32.170 ++			seq_printf(p, "%10u ", irq_stat[j].apic_timer_irqs);
  32.171 + 		seq_putc(p, '\n');
  32.172 + #endif
  32.173 + 		seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
  32.174 +@@ -258,3 +256,45 @@ skip:
  32.175 + 	}
  32.176 + 	return 0;
  32.177 + }
  32.178 ++
  32.179 ++#ifdef CONFIG_HOTPLUG_CPU
  32.180 ++#include <mach_apic.h>
  32.181 ++
  32.182 ++void fixup_irqs(cpumask_t map)
  32.183 ++{
  32.184 ++	unsigned int irq;
  32.185 ++	static int warned;
  32.186 ++
  32.187 ++	for (irq = 0; irq < NR_IRQS; irq++) {
  32.188 ++		cpumask_t mask;
  32.189 ++		if (irq == 2)
  32.190 ++			continue;
  32.191 ++
  32.192 ++		cpus_and(mask, irq_affinity[irq], map);
  32.193 ++		if (any_online_cpu(mask) == NR_CPUS) {
  32.194 ++			printk("Breaking affinity for irq %i\n", irq);
  32.195 ++			mask = map;
  32.196 ++		}
  32.197 ++		if (irq_desc[irq].handler->set_affinity)
  32.198 ++			irq_desc[irq].handler->set_affinity(irq, mask);
  32.199 ++		else if (irq_desc[irq].action && !(warned++))
  32.200 ++			printk("Cannot set affinity for irq %i\n", irq);
  32.201 ++	}
  32.202 ++
  32.203 ++#if 0
  32.204 ++	barrier();
  32.205 ++	/* Ingo Molnar says: "after the IO-APIC masks have been redirected
  32.206 ++	   [note the nop - the interrupt-enable boundary on x86 is two
  32.207 ++	   instructions from sti] - to flush out pending hardirqs and
  32.208 ++	   IPIs. After this point nothing is supposed to reach this CPU." */
  32.209 ++	__asm__ __volatile__("sti; nop; cli");
  32.210 ++	barrier();
  32.211 ++#else
  32.212 ++	/* That doesn't seem sufficient.  Give it 1ms. */
  32.213 ++	local_irq_enable();
  32.214 ++	mdelay(1);
  32.215 ++	local_irq_disable();
  32.216 ++#endif
  32.217 ++}
  32.218 ++#endif
  32.219 ++
  32.220 +diff -puN arch/i386/kernel/msr.c~i386-cpu-hotplug-updated-for-mm arch/i386/kernel/msr.c
  32.221 +--- 25/arch/i386/kernel/msr.c~i386-cpu-hotplug-updated-for-mm	2005-02-23 02:20:06.000000000 -0800
  32.222 ++++ 25-akpm/arch/i386/kernel/msr.c	2005-02-23 02:20:06.000000000 -0800
  32.223 +@@ -260,7 +260,7 @@ static struct file_operations msr_fops =
  32.224 + 	.open = msr_open,
  32.225 + };
  32.226 + 
  32.227 +-static int msr_class_simple_device_add(int i)
  32.228 ++static int __devinit msr_class_simple_device_add(int i)
  32.229 + {
  32.230 + 	int err = 0;
  32.231 + 	struct class_device *class_err;
  32.232 +diff -puN arch/i386/kernel/process.c~i386-cpu-hotplug-updated-for-mm arch/i386/kernel/process.c
  32.233 +--- 25/arch/i386/kernel/process.c~i386-cpu-hotplug-updated-for-mm	2005-02-23 02:20:06.000000000 -0800
  32.234 ++++ 25-akpm/arch/i386/kernel/process.c	2005-02-23 02:20:06.000000000 -0800
  32.235 +@@ -13,6 +13,7 @@
  32.236 + 
  32.237 + #include <stdarg.h>
  32.238 + 
  32.239 ++#include <linux/cpu.h>
  32.240 + #include <linux/errno.h>
  32.241 + #include <linux/sched.h>
  32.242 + #include <linux/fs.h>
  32.243 +@@ -55,6 +56,9 @@
  32.244 + #include <linux/irq.h>
  32.245 + #include <linux/err.h>
  32.246 + 
  32.247 ++#include <asm/tlbflush.h>
  32.248 ++#include <asm/cpu.h>
  32.249 ++
  32.250 + asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
  32.251 + 
  32.252 + int hlt_counter;
  32.253 +@@ -139,6 +143,34 @@ static void poll_idle (void)
  32.254 + 	}
  32.255 + }
  32.256 + 
  32.257 ++#ifdef CONFIG_HOTPLUG_CPU
  32.258 ++#include <asm/nmi.h>
  32.259 ++/* We don't actually take CPU down, just spin without interrupts. */
  32.260 ++static inline void play_dead(void)
  32.261 ++{
  32.262 ++	/* Ack it */
  32.263 ++	__get_cpu_var(cpu_state) = CPU_DEAD;
  32.264 ++
  32.265 ++	/* We shouldn't have to disable interrupts while dead, but
  32.266 ++	 * some interrupts just don't seem to go away, and this makes
  32.267 ++	 * it "work" for testing purposes. */
  32.268 ++	/* Death loop */
  32.269 ++	while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE)
  32.270 ++		cpu_relax();
  32.271 ++
  32.272 ++	local_irq_disable();
  32.273 ++	__flush_tlb_all();
  32.274 ++	cpu_set(smp_processor_id(), cpu_online_map);
  32.275 ++	enable_APIC_timer();
  32.276 ++	local_irq_enable();
  32.277 ++}
  32.278 ++#else
  32.279 ++static inline void play_dead(void)
  32.280 ++{
  32.281 ++	BUG();
  32.282 ++}
  32.283 ++#endif /* CONFIG_HOTPLUG_CPU */
  32.284 ++
  32.285 + /*
  32.286 +  * The idle thread. There's no useful work to be
  32.287 +  * done, so just try to conserve power and have a
  32.288 +@@ -162,6 +194,9 @@ void cpu_idle (void)
  32.289 + 			if (!idle)
  32.290 + 				idle = default_idle;
  32.291 + 
  32.292 ++			if (cpu_is_offline(cpu))
  32.293 ++				play_dead();
  32.294 ++
  32.295 + 			irq_stat[cpu].idle_timestamp = jiffies;
  32.296 + 			idle();
  32.297 + 		}
  32.298 +diff -puN arch/i386/kernel/smpboot.c~i386-cpu-hotplug-updated-for-mm arch/i386/kernel/smpboot.c
  32.299 +--- 25/arch/i386/kernel/smpboot.c~i386-cpu-hotplug-updated-for-mm	2005-02-23 02:20:06.000000000 -0800
  32.300 ++++ 25-akpm/arch/i386/kernel/smpboot.c	2005-02-23 02:20:06.000000000 -0800
  32.301 +@@ -44,6 +44,9 @@
  32.302 + #include <linux/smp_lock.h>
  32.303 + #include <linux/irq.h>
  32.304 + #include <linux/bootmem.h>
  32.305 ++#include <linux/notifier.h>
  32.306 ++#include <linux/cpu.h>
  32.307 ++#include <linux/percpu.h>
  32.308 + 
  32.309 + #include <linux/delay.h>
  32.310 + #include <linux/mc146818rtc.h>
  32.311 +@@ -89,6 +92,9 @@ extern unsigned char trampoline_end  [];
  32.312 + static unsigned char *trampoline_base;
  32.313 + static int trampoline_exec;
  32.314 + 
  32.315 ++/* State of each CPU. */
  32.316 ++DEFINE_PER_CPU(int, cpu_state) = { 0 };
  32.317 ++
  32.318 + /*
  32.319 +  * Currently trivial. Write the real->protected mode
  32.320 +  * bootstrap into the page concerned. The caller
  32.321 +@@ -1095,6 +1101,9 @@ static void __init smp_boot_cpus(unsigne
  32.322 +    who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */
  32.323 + void __init smp_prepare_cpus(unsigned int max_cpus)
  32.324 + {
  32.325 ++	smp_commenced_mask = cpumask_of_cpu(0);
  32.326 ++	cpu_callin_map = cpumask_of_cpu(0);
  32.327 ++	mb();
  32.328 + 	smp_boot_cpus(max_cpus);
  32.329 + }
  32.330 + 
  32.331 +@@ -1104,20 +1113,99 @@ void __devinit smp_prepare_boot_cpu(void
  32.332 + 	cpu_set(smp_processor_id(), cpu_callout_map);
  32.333 + }
  32.334 + 
  32.335 +-int __devinit __cpu_up(unsigned int cpu)
  32.336 ++#ifdef CONFIG_HOTPLUG_CPU
  32.337 ++
  32.338 ++/* must be called with the cpucontrol mutex held */
  32.339 ++static int __devinit cpu_enable(unsigned int cpu)
  32.340 + {
  32.341 +-	/* This only works at boot for x86.  See "rewrite" above. */
  32.342 +-	if (cpu_isset(cpu, smp_commenced_mask)) {
  32.343 +-		local_irq_enable();
  32.344 +-		return -ENOSYS;
  32.345 ++	/* get the target out of its holding state */
  32.346 ++	per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
  32.347 ++	wmb();
  32.348 ++
  32.349 ++	/* wait for the processor to ack it. timeout? */
  32.350 ++	while (!cpu_online(cpu))
  32.351 ++		cpu_relax();
  32.352 ++
  32.353 ++	fixup_irqs(cpu_online_map);
  32.354 ++	/* counter the disable in fixup_irqs() */
  32.355 ++	local_irq_enable();
  32.356 ++	return 0;
  32.357 ++}
  32.358 ++
  32.359 ++int __cpu_disable(void)
  32.360 ++{
  32.361 ++	cpumask_t map = cpu_online_map;
  32.362 ++	int cpu = smp_processor_id();
  32.363 ++
  32.364 ++	/*
  32.365 ++	 * Perhaps use cpufreq to drop frequency, but that could go
  32.366 ++	 * into generic code.
  32.367 ++ 	 *
  32.368 ++	 * We won't take down the boot processor on i386 due to some
  32.369 ++	 * interrupts only being able to be serviced by the BSP.
  32.370 ++	 * Especially so if we're not using an IOAPIC	-zwane
  32.371 ++	 */
  32.372 ++	if (cpu == 0)
  32.373 ++		return -EBUSY;
  32.374 ++
  32.375 ++	/* We enable the timer again on the exit path of the death loop */
  32.376 ++	disable_APIC_timer();
  32.377 ++	/* Allow any queued timer interrupts to get serviced */
  32.378 ++	local_irq_enable();
  32.379 ++	mdelay(1);
  32.380 ++	local_irq_disable();
  32.381 ++
  32.382 ++	cpu_clear(cpu, map);
  32.383 ++	fixup_irqs(map);
  32.384 ++	/* It's now safe to remove this processor from the online map */
  32.385 ++	cpu_clear(cpu, cpu_online_map);
  32.386 ++	return 0;
  32.387 ++}
  32.388 ++
  32.389 ++void __cpu_die(unsigned int cpu)
  32.390 ++{
  32.391 ++	/* We don't do anything here: idle task is faking death itself. */
  32.392 ++	unsigned int i;
  32.393 ++
  32.394 ++	for (i = 0; i < 10; i++) {
  32.395 ++		/* They ack this in play_dead by setting CPU_DEAD */
  32.396 ++		if (per_cpu(cpu_state, cpu) == CPU_DEAD)
  32.397 ++			return;
  32.398 ++		current->state = TASK_UNINTERRUPTIBLE;
  32.399 ++		schedule_timeout(HZ/10);
  32.400 + 	}
  32.401 ++ 	printk(KERN_ERR "CPU %u didn't die...\n", cpu);
  32.402 ++}
  32.403 ++#else /* ... !CONFIG_HOTPLUG_CPU */
  32.404 ++int __cpu_disable(void)
  32.405 ++{
  32.406 ++	return -ENOSYS;
  32.407 ++}
  32.408 + 
  32.409 ++void __cpu_die(unsigned int cpu)
  32.410 ++{
  32.411 ++	/* We said "no" in __cpu_disable */
  32.412 ++	BUG();
  32.413 ++}
  32.414 ++#endif /* CONFIG_HOTPLUG_CPU */
  32.415 ++
  32.416 ++int __devinit __cpu_up(unsigned int cpu)
  32.417 ++{
  32.418 + 	/* In case one didn't come up */
  32.419 + 	if (!cpu_isset(cpu, cpu_callin_map)) {
  32.420 ++		printk(KERN_DEBUG "skipping cpu%d, didn't come online\n", cpu);
  32.421 + 		local_irq_enable();
  32.422 + 		return -EIO;
  32.423 + 	}
  32.424 + 
  32.425 ++#ifdef CONFIG_HOTPLUG_CPU
  32.426 ++	/* Already up, and in cpu_quiescent now? */
  32.427 ++	if (cpu_isset(cpu, smp_commenced_mask)) {
  32.428 ++		cpu_enable(cpu);
  32.429 ++		return 0;
  32.430 ++	}
  32.431 ++#endif
  32.432 ++
  32.433 + 	local_irq_enable();
  32.434 + 	/* Unleash the CPU! */
  32.435 + 	cpu_set(cpu, smp_commenced_mask);
  32.436 +diff -puN arch/i386/kernel/smp.c~i386-cpu-hotplug-updated-for-mm arch/i386/kernel/smp.c
  32.437 +--- 25/arch/i386/kernel/smp.c~i386-cpu-hotplug-updated-for-mm	2005-02-23 02:20:06.000000000 -0800
  32.438 ++++ 25-akpm/arch/i386/kernel/smp.c	2005-02-23 02:20:06.000000000 -0800
  32.439 +@@ -19,6 +19,7 @@
  32.440 + #include <linux/mc146818rtc.h>
  32.441 + #include <linux/cache.h>
  32.442 + #include <linux/interrupt.h>
  32.443 ++#include <linux/cpu.h>
  32.444 + 
  32.445 + #include <asm/mtrr.h>
  32.446 + #include <asm/tlbflush.h>
  32.447 +@@ -163,7 +164,7 @@ void send_IPI_mask_bitmask(cpumask_t cpu
  32.448 + 	unsigned long flags;
  32.449 + 
  32.450 + 	local_irq_save(flags);
  32.451 +-		
  32.452 ++	WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]);
  32.453 + 	/*
  32.454 + 	 * Wait for idle.
  32.455 + 	 */
  32.456 +@@ -345,21 +346,21 @@ out:
  32.457 + static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
  32.458 + 						unsigned long va)
  32.459 + {
  32.460 +-	cpumask_t tmp;
  32.461 + 	/*
  32.462 + 	 * A couple of (to be removed) sanity checks:
  32.463 + 	 *
  32.464 +-	 * - we do not send IPIs to not-yet booted CPUs.
  32.465 + 	 * - current CPU must not be in mask
  32.466 + 	 * - mask must exist :)
  32.467 + 	 */
  32.468 + 	BUG_ON(cpus_empty(cpumask));
  32.469 +-
  32.470 +-	cpus_and(tmp, cpumask, cpu_online_map);
  32.471 +-	BUG_ON(!cpus_equal(cpumask, tmp));
  32.472 + 	BUG_ON(cpu_isset(smp_processor_id(), cpumask));
  32.473 + 	BUG_ON(!mm);
  32.474 + 
  32.475 ++	/* If a CPU which we ran on has gone down, OK. */
  32.476 ++	cpus_and(cpumask, cpumask, cpu_online_map);
  32.477 ++	if (cpus_empty(cpumask))
  32.478 ++		return;
  32.479 ++
  32.480 + 	/*
  32.481 + 	 * i'm not happy about this global shared spinlock in the
  32.482 + 	 * MM hot path, but we'll see how contended it is.
  32.483 +@@ -484,6 +485,7 @@ void smp_send_nmi_allbutself(void)
  32.484 +  */
  32.485 + void smp_send_reschedule(int cpu)
  32.486 + {
  32.487 ++	WARN_ON(cpu_is_offline(cpu));
  32.488 + 	send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
  32.489 + }
  32.490 + 
  32.491 +@@ -524,10 +526,16 @@ int smp_call_function (void (*func) (voi
  32.492 +  */
  32.493 + {
  32.494 + 	struct call_data_struct data;
  32.495 +-	int cpus = num_online_cpus()-1;
  32.496 ++	int cpus;
  32.497 + 
  32.498 +-	if (!cpus)
  32.499 ++	/* Holding any lock stops cpus from going down. */
  32.500 ++	spin_lock(&call_lock);
  32.501 ++	cpus = num_online_cpus()-1;
  32.502 ++
  32.503 ++	if (!cpus) {
  32.504 ++		spin_unlock(&call_lock);
  32.505 + 		return 0;
  32.506 ++	}
  32.507 + 
  32.508 + 	/* Can deadlock when called with interrupts disabled */
  32.509 + 	WARN_ON(irqs_disabled());
  32.510 +@@ -539,7 +547,6 @@ int smp_call_function (void (*func) (voi
  32.511 + 	if (wait)
  32.512 + 		atomic_set(&data.finished, 0);
  32.513 + 
  32.514 +-	spin_lock(&call_lock);
  32.515 + 	call_data = &data;
  32.516 + 	mb();
  32.517 + 	
  32.518 +diff -puN arch/i386/kernel/traps.c~i386-cpu-hotplug-updated-for-mm arch/i386/kernel/traps.c
  32.519 +--- 25/arch/i386/kernel/traps.c~i386-cpu-hotplug-updated-for-mm	2005-02-23 02:20:06.000000000 -0800
  32.520 ++++ 25-akpm/arch/i386/kernel/traps.c	2005-02-23 02:20:06.000000000 -0800
  32.521 +@@ -669,6 +669,14 @@ fastcall void do_nmi(struct pt_regs * re
  32.522 + 	nmi_enter();
  32.523 + 
  32.524 + 	cpu = smp_processor_id();
  32.525 ++
  32.526 ++#ifdef CONFIG_HOTPLUG_CPU
  32.527 ++	if (!cpu_online(cpu)) {
  32.528 ++		nmi_exit();
  32.529 ++		return;
  32.530 ++	}
  32.531 ++#endif
  32.532 ++
  32.533 + 	++nmi_count(cpu);
  32.534 + 
  32.535 + 	if (!nmi_callback(regs, cpu))
  32.536 +diff -puN arch/ia64/kernel/smpboot.c~i386-cpu-hotplug-updated-for-mm arch/ia64/kernel/smpboot.c
  32.537 +--- 25/arch/ia64/kernel/smpboot.c~i386-cpu-hotplug-updated-for-mm	2005-02-23 02:20:06.000000000 -0800
  32.538 ++++ 25-akpm/arch/ia64/kernel/smpboot.c	2005-02-23 02:20:06.000000000 -0800
  32.539 +@@ -590,9 +590,10 @@ int __cpu_disable(void)
  32.540 + 	if (cpu == 0)
  32.541 + 		return -EBUSY;
  32.542 + 
  32.543 ++	cpu_clear(cpu, cpu_online_map);
  32.544 + 	fixup_irqs();
  32.545 + 	local_flush_tlb_all();
  32.546 +-	printk ("Disabled cpu %u\n", smp_processor_id());
  32.547 ++	printk("Disabled cpu %u\n", cpu);
  32.548 + 	return 0;
  32.549 + }
  32.550 + 
  32.551 +diff -puN arch/ppc64/kernel/smp.c~i386-cpu-hotplug-updated-for-mm arch/ppc64/kernel/smp.c
  32.552 +diff -puN arch/s390/kernel/smp.c~i386-cpu-hotplug-updated-for-mm arch/s390/kernel/smp.c
  32.553 +--- 25/arch/s390/kernel/smp.c~i386-cpu-hotplug-updated-for-mm	2005-02-23 02:20:06.000000000 -0800
  32.554 ++++ 25-akpm/arch/s390/kernel/smp.c	2005-02-23 02:20:06.000000000 -0800
  32.555 +@@ -679,12 +679,14 @@ __cpu_disable(void)
  32.556 + {
  32.557 + 	unsigned long flags;
  32.558 + 	ec_creg_mask_parms cr_parms;
  32.559 ++	int cpu = smp_processor_id();
  32.560 + 
  32.561 + 	spin_lock_irqsave(&smp_reserve_lock, flags);
  32.562 +-	if (smp_cpu_reserved[smp_processor_id()] != 0) {
  32.563 ++	if (smp_cpu_reserved[cpu] != 0) {
  32.564 + 		spin_unlock_irqrestore(&smp_reserve_lock, flags);
  32.565 + 		return -EBUSY;
  32.566 + 	}
  32.567 ++	cpu_clear(cpu, cpu_online_map);
  32.568 + 
  32.569 + #ifdef CONFIG_PFAULT
  32.570 + 	/* Disable pfault pseudo page faults on this cpu. */
  32.571 +diff -puN include/asm-i386/cpu.h~i386-cpu-hotplug-updated-for-mm include/asm-i386/cpu.h
  32.572 +--- 25/include/asm-i386/cpu.h~i386-cpu-hotplug-updated-for-mm	2005-02-23 02:20:06.000000000 -0800
  32.573 ++++ 25-akpm/include/asm-i386/cpu.h	2005-02-23 02:20:06.000000000 -0800
  32.574 +@@ -5,6 +5,7 @@
  32.575 + #include <linux/cpu.h>
  32.576 + #include <linux/topology.h>
  32.577 + #include <linux/nodemask.h>
  32.578 ++#include <linux/percpu.h>
  32.579 + 
  32.580 + #include <asm/node.h>
  32.581 + 
  32.582 +@@ -17,4 +18,5 @@ extern int arch_register_cpu(int num);
  32.583 + extern void arch_unregister_cpu(int);
  32.584 + #endif
  32.585 + 
  32.586 ++DECLARE_PER_CPU(int, cpu_state);
  32.587 + #endif /* _ASM_I386_CPU_H_ */
  32.588 +diff -puN include/asm-i386/irq.h~i386-cpu-hotplug-updated-for-mm include/asm-i386/irq.h
  32.589 +--- 25/include/asm-i386/irq.h~i386-cpu-hotplug-updated-for-mm	2005-02-23 02:20:06.000000000 -0800
  32.590 ++++ 25-akpm/include/asm-i386/irq.h	2005-02-23 02:20:06.000000000 -0800
  32.591 +@@ -38,4 +38,8 @@ extern void release_vm86_irqs(struct tas
  32.592 + extern int irqbalance_disable(char *str);
  32.593 + #endif
  32.594 + 
  32.595 ++#ifdef CONFIG_HOTPLUG_CPU
  32.596 ++extern void fixup_irqs(cpumask_t map);
  32.597 ++#endif
  32.598 ++
  32.599 + #endif /* _ASM_IRQ_H */
  32.600 +diff -puN include/asm-i386/smp.h~i386-cpu-hotplug-updated-for-mm include/asm-i386/smp.h
  32.601 +--- 25/include/asm-i386/smp.h~i386-cpu-hotplug-updated-for-mm	2005-02-23 02:20:06.000000000 -0800
  32.602 ++++ 25-akpm/include/asm-i386/smp.h	2005-02-23 02:20:06.000000000 -0800
  32.603 +@@ -85,6 +85,9 @@ static __inline int logical_smp_processo
  32.604 + }
  32.605 + 
  32.606 + #endif
  32.607 ++
  32.608 ++extern int __cpu_disable(void);
  32.609 ++extern void __cpu_die(unsigned int cpu);
  32.610 + #endif /* !__ASSEMBLY__ */
  32.611 + 
  32.612 + #define NO_PROC_ID		0xFF		/* No processor magic marker */
  32.613 +diff -puN kernel/cpu.c~i386-cpu-hotplug-updated-for-mm kernel/cpu.c
  32.614 +--- 25/kernel/cpu.c~i386-cpu-hotplug-updated-for-mm	2005-02-23 02:20:06.000000000 -0800
  32.615 ++++ 25-akpm/kernel/cpu.c	2005-02-23 02:20:06.000000000 -0800
  32.616 +@@ -63,19 +63,15 @@ static int take_cpu_down(void *unused)
  32.617 + {
  32.618 + 	int err;
  32.619 + 
  32.620 +-	/* Take offline: makes arch_cpu_down somewhat easier. */
  32.621 +-	cpu_clear(smp_processor_id(), cpu_online_map);
  32.622 +-
  32.623 + 	/* Ensure this CPU doesn't handle any more interrupts. */
  32.624 + 	err = __cpu_disable();
  32.625 + 	if (err < 0)
  32.626 +-		cpu_set(smp_processor_id(), cpu_online_map);
  32.627 +-	else
  32.628 +-		/* Force idle task to run as soon as we yield: it should
  32.629 +-		   immediately notice cpu is offline and die quickly. */
  32.630 +-		sched_idle_next();
  32.631 ++		return err;
  32.632 + 
  32.633 +-	return err;
  32.634 ++	/* Force idle task to run as soon as we yield: it should
  32.635 ++	   immediately notice cpu is offline and die quickly. */
  32.636 ++	sched_idle_next();
  32.637 ++	return 0;
  32.638 + }
  32.639 + 
  32.640 + int cpu_down(unsigned int cpu)
  32.641 +diff -puN arch/ppc64/kernel/pSeries_smp.c~i386-cpu-hotplug-updated-for-mm arch/ppc64/kernel/pSeries_smp.c
  32.642 +--- 25/arch/ppc64/kernel/pSeries_smp.c~i386-cpu-hotplug-updated-for-mm	2005-02-23 02:20:08.000000000 -0800
  32.643 ++++ 25-akpm/arch/ppc64/kernel/pSeries_smp.c	2005-02-23 02:20:08.000000000 -0800
  32.644 +@@ -86,10 +86,13 @@ static int query_cpu_stopped(unsigned in
  32.645 + 
  32.646 + int pSeries_cpu_disable(void)
  32.647 + {
  32.648 ++	int cpu = smp_processor_id();
  32.649 ++
  32.650 ++	cpu_clear(cpu, cpu_online_map);
  32.651 + 	systemcfg->processorCount--;
  32.652 + 
  32.653 + 	/*fix boot_cpuid here*/
  32.654 +-	if (smp_processor_id() == boot_cpuid)
  32.655 ++	if (cpu == boot_cpuid)
  32.656 + 		boot_cpuid = any_online_cpu(cpu_online_map);
  32.657 + 
  32.658 + 	/* FIXME: abstract this to not be platform specific later on */
  32.659 +_
    33.1 --- a/tools/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/linux-xen-low.c	Fri May 06 17:04:27 2005 +0000
    33.2 +++ b/tools/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/linux-xen-low.c	Wed May 11 13:43:50 2005 +0000
    33.3 @@ -338,7 +338,9 @@ regsets_store_inferior_registers ()
    33.4  	    }
    33.5  	  else
    33.6  	    {
    33.7 +#ifdef DEBUG
    33.8  	      perror ("Warning: ptrace(regsets_store_inferior_registers)");
    33.9 +#endif
   33.10  	    }
   33.11  	}
   33.12        regset ++;
   33.13 @@ -364,7 +366,6 @@ linux_fetch_registers (int regno)
   33.14  void
   33.15  linux_store_registers (int regno)
   33.16  {
   33.17 -    printf("store %d\n", regno);
   33.18    if (use_regsets_p)
   33.19      {
   33.20        if (regsets_store_inferior_registers () == 0)
    34.1 --- a/tools/libxc/Makefile	Fri May 06 17:04:27 2005 +0000
    34.2 +++ b/tools/libxc/Makefile	Wed May 11 13:43:50 2005 +0000
    34.3 @@ -16,6 +16,7 @@ vpath %c       $(XEN_LIBXUTIL)
    34.4  INCLUDES += -I $(XEN_LIBXUTIL)
    34.5  
    34.6  SRCS     :=
    34.7 +SRCS     += xc_sedf.c
    34.8  SRCS     += xc_bvtsched.c
    34.9  SRCS     += xc_core.c
   34.10  SRCS     += xc_domain.c
    35.1 --- a/tools/libxc/xc.h	Fri May 06 17:04:27 2005 +0000
    35.2 +++ b/tools/libxc/xc.h	Wed May 11 13:43:50 2005 +0000
    35.3 @@ -296,6 +296,14 @@ int xc_bvtsched_domain_get(int xc_handle
    35.4                             long long *warpl,
    35.5                             long long *warpu);
    35.6  
    35.7 +int xc_sedf_domain_set(int xc_handle,
    35.8 +                          u32 domid,
    35.9 +                          u64 period, u64 slice, u64 latency, u16 extratime, u16 weight);
   35.10 +
   35.11 +int xc_sedf_domain_get(int xc_handle,
   35.12 +                          u32 domid,
   35.13 +                          u64* period, u64 *slice, u64 *latency, u16 *extratime, u16* weight);
   35.14 +
   35.15  typedef evtchn_status_t xc_evtchn_status_t;
   35.16  
   35.17  /*
    36.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    36.2 +++ b/tools/libxc/xc_sedf.c	Wed May 11 13:43:50 2005 +0000
    36.3 @@ -0,0 +1,51 @@
    36.4 +/******************************************************************************
    36.5 + * xc_sedf.c
    36.6 + * 
    36.7 + * API for manipulating parameters of the Simple EDF scheduler.
    36.8 + * 
    36.9 + * changes by Stephan Diestelhorst
   36.10 + * based on code
   36.11 + * by Mark Williamson, Copyright (c) 2004 Intel Research Cambridge.
   36.12 + */
   36.13 +
   36.14 +#include "xc_private.h"
   36.15 +
   36.16 +int xc_sedf_domain_set(int xc_handle,
   36.17 +                          u32 domid, u64 period, u64 slice,u64 latency, u16 extratime,u16 weight)
   36.18 +{
   36.19 +    dom0_op_t op;
   36.20 +    struct sedf_adjdom *p = &op.u.adjustdom.u.sedf;
   36.21 +
   36.22 +    op.cmd = DOM0_ADJUSTDOM;
   36.23 +    op.u.adjustdom.domain  = (domid_t)domid;
   36.24 +    op.u.adjustdom.sched_id = SCHED_SEDF;
   36.25 +    op.u.adjustdom.direction = SCHED_INFO_PUT;
   36.26 +
   36.27 +    p->period    = period;
   36.28 +    p->slice     = slice;
   36.29 +    p->latency   = latency;
   36.30 +    p->extratime = extratime;
   36.31 +    p->weight    = weight;
   36.32 +    return do_dom0_op(xc_handle, &op);
   36.33 +}
   36.34 +
   36.35 +int xc_sedf_domain_get(int xc_handle, u32 domid, u64 *period, u64 *slice, u64* latency, u16* extratime, u16* weight)
   36.36 +{
   36.37 +    dom0_op_t op;
   36.38 +    int ret;
   36.39 +    struct sedf_adjdom *p = &op.u.adjustdom.u.sedf;
   36.40 +
   36.41 +    op.cmd = DOM0_ADJUSTDOM;    
   36.42 +    op.u.adjustdom.domain = (domid_t)domid;
   36.43 +    op.u.adjustdom.sched_id = SCHED_SEDF;
   36.44 +    op.u.adjustdom.direction = SCHED_INFO_GET;
   36.45 +
   36.46 +    ret = do_dom0_op(xc_handle, &op);
   36.47 +
   36.48 +    *period    = p->period;
   36.49 +    *slice     = p->slice;
   36.50 +    *latency   = p->latency;
   36.51 +    *extratime = p->extratime;
   36.52 +    *weight    = p->weight;
   36.53 +    return ret;
   36.54 +}
    37.1 --- a/tools/libxutil/sxpr_parser.h	Fri May 06 17:04:27 2005 +0000
    37.2 +++ b/tools/libxutil/sxpr_parser.h	Wed May 11 13:43:50 2005 +0000
    37.3 @@ -28,7 +28,7 @@
    37.4  /** Size of a parser input buffer.
    37.5   * Tokens read must fit into this size (including trailing null).
    37.6   */
    37.7 -#define PARSER_BUF_SIZE 1024
    37.8 +#define PARSER_BUF_SIZE 4096
    37.9  
   37.10  struct Parser;
   37.11  typedef int ParserStateFn(struct Parser *, char c);
    38.1 --- a/tools/python/xen/lowlevel/xc/xc.c	Fri May 06 17:04:27 2005 +0000
    38.2 +++ b/tools/python/xen/lowlevel/xc/xc.c	Wed May 11 13:43:50 2005 +0000
    38.3 @@ -827,6 +827,52 @@ static PyObject *pyxc_physinfo(PyObject 
    38.4                           "cpu_khz",     info.cpu_khz);
    38.5  }
    38.6  
    38.7 +static PyObject *pyxc_sedf_domain_set(PyObject *self,
    38.8 +                                         PyObject *args,
    38.9 +                                         PyObject *kwds)
   38.10 +{
   38.11 +    XcObject *xc = (XcObject *)self;
   38.12 +    u32 domid;
   38.13 +    u64 period, slice, latency;
   38.14 +    u16 extratime, weight;
   38.15 +    static char *kwd_list[] = { "dom", "period", "slice", "latency", "extratime", "weight",NULL };
   38.16 +    
   38.17 +    if( !PyArg_ParseTupleAndKeywords(args, kwds, "iLLLhh", kwd_list, &domid,
   38.18 +                                     &period, &slice, &latency, &extratime, &weight) )
   38.19 +        return NULL;
   38.20 +   if ( xc_sedf_domain_set(xc->xc_handle, domid, period, slice, latency, extratime,weight) != 0 )
   38.21 +        return PyErr_SetFromErrno(xc_error);
   38.22 +
   38.23 +    Py_INCREF(zero);
   38.24 +    return zero;
   38.25 +}
   38.26 +
   38.27 +static PyObject *pyxc_sedf_domain_get(PyObject *self,
   38.28 +                                         PyObject *args,
   38.29 +                                         PyObject *kwds)
   38.30 +{
   38.31 +    XcObject *xc = (XcObject *)self;
   38.32 +    u32 domid;
   38.33 +    u64 period, slice,latency;
   38.34 +    u16 weight, extratime;
   38.35 +    
   38.36 +    static char *kwd_list[] = { "dom", NULL };
   38.37 +
   38.38 +    if( !PyArg_ParseTupleAndKeywords(args, kwds, "i", kwd_list, &domid) )
   38.39 +        return NULL;
   38.40 +    
   38.41 +    if ( xc_sedf_domain_get( xc->xc_handle, domid, &period,
   38.42 +                                &slice,&latency,&extratime,&weight) )
   38.43 +        return PyErr_SetFromErrno(xc_error);
   38.44 +
   38.45 +    return Py_BuildValue("{s:i,s:L,s:L,s:L,s:i}",
   38.46 +                         "domain",    domid,
   38.47 +                         "period",    period,
   38.48 +                         "slice",     slice,
   38.49 +			 "latency",   latency,
   38.50 +			 "extratime", extratime);
   38.51 +}
   38.52 +
   38.53  static PyObject *pyxc_shadow_control(PyObject *self,
   38.54                                       PyObject *args,
   38.55                                       PyObject *kwds)
   38.56 @@ -1027,6 +1073,30 @@ static PyMethodDef pyxc_methods[] = {
   38.57        " warpu  [long]: Unwarp requirement.\n"
   38.58        " warpl  [long]: Warp limit,\n"
   38.59      },
   38.60 +    
   38.61 +    { "sedf_domain_set",
   38.62 +      (PyCFunction)pyxc_sedf_domain_set,
   38.63 +      METH_KEYWORDS, "\n"
   38.64 +      "Set the scheduling parameters for a domain when running with Atropos.\n"
   38.65 +      " dom       [int]:  domain to set\n"
   38.66 +      " period    [long]: domain's scheduling period\n"
   38.67 +      " slice     [long]: domain's slice per period\n"
   38.68 +      " latency   [long]: domain's wakeup latency hint\n"
   38.69 +      " extratime [int]:  domain aware of extratime?\n"
   38.70 +      "Returns: [int] 0 on success; -1 on error.\n" },
   38.71 +
   38.72 +    { "sedf_domain_get",
   38.73 +      (PyCFunction)pyxc_sedf_domain_get,
   38.74 +      METH_KEYWORDS, "\n"
   38.75 +      "Get the current scheduling parameters for a domain when running with\n"
   38.76 +      "the Atropos scheduler."
   38.77 +      " dom       [int]: domain to query\n"
   38.78 +      "Returns:   [dict]\n"
   38.79 +      " domain    [int]: domain ID\n"
   38.80 +      " period    [long]: scheduler period\n"
   38.81 +      " slice     [long]: CPU reservation per period\n"
   38.82 +      " latency   [long]: domain's wakeup latency hint\n"
   38.83 +      " extratime [int]:  domain aware of extratime?\n"},
   38.84  
   38.85      { "evtchn_alloc_unbound", 
   38.86        (PyCFunction)pyxc_evtchn_alloc_unbound,
    39.1 --- a/tools/python/xen/xend/XendClient.py	Fri May 06 17:04:27 2005 +0000
    39.2 +++ b/tools/python/xen/xend/XendClient.py	Wed May 11 13:43:50 2005 +0000
    39.3 @@ -250,6 +250,15 @@ class Xend:
    39.4                                'warpl'    : warpl,
    39.5                                'warpu'    : warpu })
    39.6  
    39.7 +    def xend_domain_cpu_sedf_set(self, id, period, slice, latency, extratime, weight):
    39.8 +        return self.xendPost(self.domainurl(id),
    39.9 +                             {'op'        : 'cpu_sedf_set',
   39.10 +                              'period'    : period,
   39.11 +                              'slice'     : slice,
   39.12 +			      'latency'   : latency,
   39.13 +			      'extratime' : extratime,
   39.14 +			      'weight'    : weight })
   39.15 +
   39.16      def xend_domain_maxmem_set(self, id, memory):
   39.17          return self.xendPost(self.domainurl(id),
   39.18                               { 'op'      : 'maxmem_set',
    40.1 --- a/tools/python/xen/xend/XendDomain.py	Fri May 06 17:04:27 2005 +0000
    40.2 +++ b/tools/python/xen/xend/XendDomain.py	Wed May 11 13:43:50 2005 +0000
    40.3 @@ -707,6 +707,24 @@ class XendDomain:
    40.4          except Exception, ex:
    40.5              raise XendError(str(ex))
    40.6      
    40.7 +    
    40.8 +    def domain_cpu_sedf_set(self, id, period, slice, latency, extratime, weight):
    40.9 +        """Set Simple EDF scheduler parameters for a domain.
   40.10 +        """
   40.11 +	dominfo = self.domain_lookup(id)
   40.12 +        try:
   40.13 +            return xc.sedf_domain_set(dominfo.dom, period, slice, latency, extratime, weight)
   40.14 +        except Exception, ex:
   40.15 +            raise XendError(str(ex))
   40.16 +
   40.17 +    def domain_cpu_sedf_get(self, id):
   40.18 +        """Get Atropos scheduler parameters for a domain.
   40.19 +        """
   40.20 +        dominfo = self.domain_lookup(id)
   40.21 +        try:
   40.22 +            return xc.sedf_domain_get(dominfo.dom)
   40.23 +        except Exception, ex:
   40.24 +            raise XendError(str(ex))
   40.25      def domain_device_create(self, id, devconfig):
   40.26          """Create a new device for a domain.
   40.27  
    41.1 --- a/tools/python/xen/xend/server/SrvDomain.py	Fri May 06 17:04:27 2005 +0000
    41.2 +++ b/tools/python/xen/xend/server/SrvDomain.py	Wed May 11 13:43:50 2005 +0000
    41.3 @@ -103,6 +103,18 @@ class SrvDomain(SrvDir):
    41.4          val = fn(req.args, {'dom': self.dom.id})
    41.5          return val
    41.6      
    41.7 +    
    41.8 +    def op_cpu_sedf_set(self, op, req):
    41.9 +        fn = FormFn(self.xd.domain_cpu_sedf_set,
   41.10 +                    [['dom', 'str'],
   41.11 +                     ['period', 'int'],
   41.12 +                     ['slice', 'int'],
   41.13 +		     ['latency', 'int'],
   41.14 +		     ['extratime', 'int'],
   41.15 +		     ['weight', 'int']])
   41.16 +        val = fn(req.args, {'dom': self.dom.id})
   41.17 +        return val
   41.18 +
   41.19      def op_maxmem_set(self, op, req):
   41.20          fn = FormFn(self.xd.domain_maxmem_set,
   41.21                      [['dom',    'str'],
    42.1 --- a/tools/python/xen/xm/main.py	Fri May 06 17:04:27 2005 +0000
    42.2 +++ b/tools/python/xen/xm/main.py	Wed May 11 13:43:50 2005 +0000
    42.3 @@ -591,6 +591,23 @@ class ProgBvtslice(Prog):
    42.4  
    42.5  xm.prog(ProgBvtslice)
    42.6  
    42.7 +class ProgSedf(Prog):
    42.8 +    group = 'scheduler'
    42.9 +    name= "sedf"
   42.10 +    info = """Set simple EDF parameters."""
   42.11 +
   42.12 +    def help(self, args):
   42.13 +        print args[0], "DOM PERIOD SLICE LATENCY EXTRATIME WEIGHT"
   42.14 +        print "\nSet simple EDF parameters."
   42.15 +
   42.16 +    def main(self, args):
   42.17 +	if len(args) != 7: self.err("%s: Invalid argument(s)" % args[0])
   42.18 +	dom = args[1]
   42.19 +	v = map(int, args[2:7])
   42.20 +	server.xend_domain_cpu_sedf_set(dom, *v)
   42.21 +
   42.22 +xm.prog(ProgSedf)
   42.23 +
   42.24  class ProgInfo(Prog):
   42.25      group = 'host'
   42.26      name = "info"
    43.1 --- a/xen/Rules.mk	Fri May 06 17:04:27 2005 +0000
    43.2 +++ b/xen/Rules.mk	Wed May 11 13:43:50 2005 +0000
    43.3 @@ -2,6 +2,7 @@
    43.4  verbose     ?= n
    43.5  debug       ?= n
    43.6  perfc       ?= n
    43.7 +perfc_arrays?= n
    43.8  trace       ?= n
    43.9  optimize    ?= y
   43.10  domu_debug  ?= n
   43.11 @@ -54,6 +55,9 @@ endif
   43.12  
   43.13  ifeq ($(perfc),y)
   43.14  CFLAGS += -DPERF_COUNTERS
   43.15 +ifeq ($(perfc_arrays),y)
   43.16 +CFLAGS += -DPERF_ARRAYS
   43.17 +endif
   43.18  endif
   43.19  
   43.20  ifeq ($(trace),y)
    44.1 --- a/xen/arch/ia64/asm-offsets.c	Fri May 06 17:04:27 2005 +0000
    44.2 +++ b/xen/arch/ia64/asm-offsets.c	Wed May 11 13:43:50 2005 +0000
    44.3 @@ -8,6 +8,7 @@
    44.4  #include <xen/sched.h>
    44.5  #include <asm/processor.h>
    44.6  #include <asm/ptrace.h>
    44.7 +#include <public/xen.h>
    44.8  
    44.9  #define task_struct exec_domain
   44.10  
   44.11 @@ -37,6 +38,9 @@ void foo(void)
   44.12  
   44.13  	BLANK();
   44.14  
   44.15 +	DEFINE(XSI_PSR_IC_OFS, offsetof(vcpu_info_t, arch.interrupt_collection_enabled));
   44.16 +	DEFINE(XSI_PSR_IC, (SHAREDINFO_ADDR+offsetof(vcpu_info_t, arch.interrupt_collection_enabled)));
   44.17 +	DEFINE(XSI_PSR_I_OFS, offsetof(vcpu_info_t, arch.interrupt_delivery_enabled));
   44.18  	//DEFINE(IA64_TASK_BLOCKED_OFFSET,offsetof (struct task_struct, blocked));
   44.19  	//DEFINE(IA64_TASK_CLEAR_CHILD_TID_OFFSET,offsetof (struct task_struct, clear_child_tid));
   44.20  	//DEFINE(IA64_TASK_GROUP_LEADER_OFFSET, offsetof (struct task_struct, group_leader));
   44.21 @@ -46,8 +50,8 @@ void foo(void)
   44.22  	//DEFINE(IA64_TASK_SIGHAND_OFFSET,offsetof (struct task_struct, sighand));
   44.23  	//DEFINE(IA64_TASK_SIGNAL_OFFSET,offsetof (struct task_struct, signal));
   44.24  	//DEFINE(IA64_TASK_TGID_OFFSET, offsetof (struct task_struct, tgid));
   44.25 -	DEFINE(IA64_TASK_THREAD_KSP_OFFSET, offsetof (struct task_struct, thread.ksp));
   44.26 -	DEFINE(IA64_TASK_THREAD_ON_USTACK_OFFSET, offsetof (struct task_struct, thread.on_ustack));
   44.27 +	DEFINE(IA64_TASK_THREAD_KSP_OFFSET, offsetof (struct exec_domain, arch._thread.ksp));
   44.28 +	DEFINE(IA64_TASK_THREAD_ON_USTACK_OFFSET, offsetof (struct exec_domain, arch._thread.on_ustack));
   44.29  
   44.30  	BLANK();
   44.31  
    45.1 --- a/xen/arch/ia64/domain.c	Fri May 06 17:04:27 2005 +0000
    45.2 +++ b/xen/arch/ia64/domain.c	Wed May 11 13:43:50 2005 +0000
    45.3 @@ -191,7 +191,7 @@ void arch_do_createdomain(struct exec_do
    45.4  	// stay on kernel stack because may get interrupts!
    45.5  	// ia64_ret_from_clone (which b0 gets in new_thread) switches
    45.6  	// to user stack
    45.7 -	ed->thread.on_ustack = 0;
    45.8 +	ed->arch._thread.on_ustack = 0;
    45.9  }
   45.10  
   45.11  void arch_do_boot_vcpu(struct exec_domain *p)
   45.12 @@ -261,7 +261,7 @@ void new_thread(struct exec_domain *ed,
   45.13  printf("new_thread: ed=%p, start_pc=%p, regs=%p, sw=%p, new_rbs=%p, IA64_STK_OFFSET=%p, &r8=%p\n",
   45.14  ed,start_pc,regs,sw,new_rbs,IA64_STK_OFFSET,&regs->r8);
   45.15  	sw->b0 = (unsigned long) &ia64_ret_from_clone;
   45.16 -	ed->thread.ksp = (unsigned long) sw - 16;
   45.17 +	ed->arch._thread.ksp = (unsigned long) sw - 16;
   45.18  	//ed->thread_info->flags = 0;
   45.19  printk("new_thread, about to call init_all_rr\n");
   45.20  	init_all_rr(ed);
   45.21 @@ -626,7 +626,7 @@ int construct_dom0(struct domain *d,
   45.22  #ifndef CLONE_DOMAIN0
   45.23  	if ( d != dom0 ) 
   45.24  	    BUG();
   45.25 -	if ( test_bit(DF_CONSTRUCTED, &d->d_flags) ) 
   45.26 +	if ( test_bit(DF_CONSTRUCTED, &d->flags) ) 
   45.27  	    BUG();
   45.28  #endif
   45.29  
   45.30 @@ -753,7 +753,7 @@ int construct_dom0(struct domain *d,
   45.31  #endif
   45.32  	console_endboot(strstr(cmdline, "tty0") != NULL);
   45.33  
   45.34 -	set_bit(DF_CONSTRUCTED, &d->d_flags);
   45.35 +	set_bit(DF_CONSTRUCTED, &d->flags);
   45.36  
   45.37  	new_thread(ed, pkern_entry, 0, 0);
   45.38  	// FIXME: Hack for keyboard input
   45.39 @@ -783,7 +783,7 @@ int construct_domU(struct domain *d,
   45.40  	unsigned long pkern_entry;
   45.41  
   45.42  #ifndef DOMU_AUTO_RESTART
   45.43 -	if ( test_bit(DF_CONSTRUCTED, &d->d_flags) ) BUG();
   45.44 +	if ( test_bit(DF_CONSTRUCTED, &d->flags) ) BUG();
   45.45  #endif
   45.46  
   45.47  	printk("*** LOADING DOMAIN %d ***\n",d->id);
   45.48 @@ -816,7 +816,7 @@ int construct_domU(struct domain *d,
   45.49  	loaddomainelfimage(d,image_start);
   45.50  	printk("loaddomainelfimage returns\n");
   45.51  
   45.52 -	set_bit(DF_CONSTRUCTED, &d->d_flags);
   45.53 +	set_bit(DF_CONSTRUCTED, &d->flags);
   45.54  
   45.55  	printk("calling new_thread, entry=%p\n",pkern_entry);
   45.56  #ifdef DOMU_AUTO_RESTART
    46.1 --- a/xen/arch/ia64/idle0_task.c	Fri May 06 17:04:27 2005 +0000
    46.2 +++ b/xen/arch/ia64/idle0_task.c	Wed May 11 13:43:50 2005 +0000
    46.3 @@ -22,7 +22,7 @@
    46.4  #define IDLE0_DOMAIN(_t)             \
    46.5  {                                    \
    46.6      id:          IDLE_DOMAIN_ID,     \
    46.7 -    d_flags:     1<<DF_IDLETASK,     \
    46.8 +    flags:     1<<DF_IDLETASK,       \
    46.9      refcnt:      ATOMIC_INIT(1)      \
   46.10  }
   46.11  
    47.1 --- a/xen/arch/ia64/irq.c	Fri May 06 17:04:27 2005 +0000
    47.2 +++ b/xen/arch/ia64/irq.c	Wed May 11 13:43:50 2005 +0000
    47.3 @@ -1406,9 +1406,11 @@ int pirq_guest_bind(struct exec_domain *
    47.4          desc->handler->startup(irq);
    47.5  
    47.6          /* Attempt to bind the interrupt target to the correct CPU. */
    47.7 +#if 0 /* FIXME CONFIG_SMP ??? */
    47.8          if ( desc->handler->set_affinity != NULL )
    47.9              desc->handler->set_affinity(
   47.10                  irq, apicid_to_phys_cpu_present(d->processor));
   47.11 +#endif
   47.12      }
   47.13      else if ( !will_share || !action->shareable )
   47.14      {
    48.1 --- a/xen/arch/ia64/ivt.S	Fri May 06 17:04:27 2005 +0000
    48.2 +++ b/xen/arch/ia64/ivt.S	Wed May 11 13:43:50 2005 +0000
    48.3 @@ -778,10 +778,22 @@ ENTRY(break_fault)
    48.4  	mov r17=cr.iim
    48.5  	mov r31=pr
    48.6  	;;
    48.7 +	movl r18=XSI_PSR_IC
    48.8 +	;;
    48.9 +	ld8 r19=[r18]
   48.10 +	;;
   48.11  	cmp.eq p7,p0=r0,r17			// is this a psuedo-cover?
   48.12 -	// FIXME: may also need to check slot==2?
   48.13  (p7)	br.sptk.many dispatch_privop_fault
   48.14 +	;;
   48.15 +	cmp.ne p7,p0=r0,r19
   48.16 +(p7)	br.sptk.many dispatch_break_fault
   48.17 +	// If we get to here, we have a hyperprivop
   48.18 +	// For now, hyperprivops are handled through the break mechanism
   48.19 +	// Later, they will be fast hand-coded assembly with psr.ic off
   48.20 +	// which means no calls, no use of r1-r15 and no memory accesses
   48.21 +	// except to pinned addresses!
   48.22  	br.sptk.many dispatch_break_fault
   48.23 +	;;
   48.24  #endif
   48.25  	mov r16=IA64_KR(CURRENT)		// r16 = current task; 12 cycle read lat.
   48.26  	mov r17=cr.iim
    49.1 --- a/xen/arch/ia64/mm_init.c	Fri May 06 17:04:27 2005 +0000
    49.2 +++ b/xen/arch/ia64/mm_init.c	Wed May 11 13:43:50 2005 +0000
    49.3 @@ -227,7 +227,7 @@ ia64_set_rbs_bot (void)
    49.4  
    49.5  	if (stack_size > MAX_USER_STACK_SIZE)
    49.6  		stack_size = MAX_USER_STACK_SIZE;
    49.7 -	current->thread.rbs_bot = STACK_TOP - stack_size;
    49.8 +	current->arch._thread.rbs_bot = STACK_TOP - stack_size;
    49.9  }
   49.10  
   49.11  /*
   49.12 @@ -255,7 +255,7 @@ printf("ia64_init_addr_space: called, no
   49.13  	if (vma) {
   49.14  		memset(vma, 0, sizeof(*vma));
   49.15  		vma->vm_mm = current->mm;
   49.16 -		vma->vm_start = current->thread.rbs_bot & PAGE_MASK;
   49.17 +		vma->vm_start = current->arch._thread.rbs_bot & PAGE_MASK;
   49.18  		vma->vm_end = vma->vm_start + PAGE_SIZE;
   49.19  		vma->vm_page_prot = protection_map[VM_DATA_DEFAULT_FLAGS & 0x7];
   49.20  		vma->vm_flags = VM_READ|VM_WRITE|VM_MAYREAD|VM_MAYWRITE|VM_GROWSUP;
    50.1 --- a/xen/arch/ia64/patch/linux-2.6.11/unaligned.c	Fri May 06 17:04:27 2005 +0000
    50.2 +++ b/xen/arch/ia64/patch/linux-2.6.11/unaligned.c	Wed May 11 13:43:50 2005 +0000
    50.3 @@ -1,5 +1,5 @@
    50.4  --- ../../linux-2.6.11/arch/ia64/kernel/unaligned.c	2005-03-02 00:38:25.000000000 -0700
    50.5 -+++ arch/ia64/unaligned.c	2005-04-28 15:40:13.000000000 -0600
    50.6 ++++ arch/ia64/unaligned.c	2005-05-10 15:46:09.000000000 -0600
    50.7  @@ -437,7 +437,11 @@
    50.8   }
    50.9   
   50.10 @@ -12,7 +12,31 @@
   50.11   setreg (unsigned long regnum, unsigned long val, int nat, struct pt_regs *regs)
   50.12   {
   50.13   	struct switch_stack *sw = (struct switch_stack *) regs - 1;
   50.14 -@@ -611,7 +615,11 @@
   50.15 +@@ -522,7 +526,11 @@
   50.16 + 	 */
   50.17 + 	if (regnum >= IA64_FIRST_ROTATING_FR) {
   50.18 + 		ia64_sync_fph(current);
   50.19 ++#ifdef XEN
   50.20 ++		current->arch._thread.fph[fph_index(regs, regnum)] = *fpval;
   50.21 ++#else
   50.22 + 		current->thread.fph[fph_index(regs, regnum)] = *fpval;
   50.23 ++#endif
   50.24 + 	} else {
   50.25 + 		/*
   50.26 + 		 * pt_regs or switch_stack ?
   50.27 +@@ -581,7 +589,11 @@
   50.28 + 	 */
   50.29 + 	if (regnum >= IA64_FIRST_ROTATING_FR) {
   50.30 + 		ia64_flush_fph(current);
   50.31 ++#ifdef XEN
   50.32 ++		*fpval = current->arch._thread.fph[fph_index(regs, regnum)];
   50.33 ++#else
   50.34 + 		*fpval = current->thread.fph[fph_index(regs, regnum)];
   50.35 ++#endif
   50.36 + 	} else {
   50.37 + 		/*
   50.38 + 		 * f0 = 0.0, f1= 1.0. Those registers are constant and are thus
   50.39 +@@ -611,7 +623,11 @@
   50.40   }
   50.41   
   50.42   
   50.43 @@ -24,7 +48,7 @@
   50.44   getreg (unsigned long regnum, unsigned long *val, int *nat, struct pt_regs *regs)
   50.45   {
   50.46   	struct switch_stack *sw = (struct switch_stack *) regs - 1;
   50.47 -@@ -1294,6 +1302,9 @@
   50.48 +@@ -1294,6 +1310,9 @@
   50.49   void
   50.50   ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs)
   50.51   {
   50.52 @@ -34,7 +58,7 @@
   50.53   	struct ia64_psr *ipsr = ia64_psr(regs);
   50.54   	mm_segment_t old_fs = get_fs();
   50.55   	unsigned long bundle[2];
   50.56 -@@ -1502,4 +1513,5 @@
   50.57 +@@ -1502,4 +1521,5 @@
   50.58   	si.si_imm = 0;
   50.59   	force_sig_info(SIGBUS, &si, current);
   50.60   	goto done;
    51.1 --- a/xen/arch/ia64/privop.c	Fri May 06 17:04:27 2005 +0000
    51.2 +++ b/xen/arch/ia64/privop.c	Wed May 11 13:43:50 2005 +0000
    51.3 @@ -205,7 +205,8 @@ IA64FAULT priv_itc_d(VCPU *vcpu, INST64 
    51.4  		return(IA64_ILLOP_FAULT);
    51.5  	if ((fault = vcpu_get_ifa(vcpu,&ifa)) != IA64_NO_FAULT)
    51.6  		return(IA64_ILLOP_FAULT);
    51.7 -	pte = vcpu_get_gr(vcpu,inst.M41.r2);
    51.8 +	if (!inst.inst) pte = vcpu_get_tmp(vcpu,0);
    51.9 +	else pte = vcpu_get_gr(vcpu,inst.M41.r2);
   51.10  
   51.11  	return (vcpu_itc_d(vcpu,pte,itir,ifa));
   51.12  }
   51.13 @@ -219,7 +220,8 @@ IA64FAULT priv_itc_i(VCPU *vcpu, INST64 
   51.14  		return(IA64_ILLOP_FAULT);
   51.15  	if ((fault = vcpu_get_ifa(vcpu,&ifa)) != IA64_NO_FAULT)
   51.16  		return(IA64_ILLOP_FAULT);
   51.17 -	pte = vcpu_get_gr(vcpu,inst.M41.r2);
   51.18 +	if (!inst.inst) pte = vcpu_get_tmp(vcpu,0);
   51.19 +	else pte = vcpu_get_gr(vcpu,inst.M41.r2);
   51.20  
   51.21  	return (vcpu_itc_i(vcpu,pte,itir,ifa));
   51.22  }
   51.23 @@ -417,10 +419,17 @@ IA64FAULT priv_mov_from_pmc(VCPU *vcpu, 
   51.24  	UINT64 val;
   51.25  	IA64FAULT fault;
   51.26  	
   51.27 -	fault = vcpu_get_pmc(vcpu,vcpu_get_gr(vcpu,inst.M43.r3),&val);
   51.28 -	if (fault == IA64_NO_FAULT)
   51.29 -		return vcpu_set_gr(vcpu, inst.M43.r1, val);
   51.30 -	else return fault;
   51.31 +	if (inst.M43.r1 > 63) { // privified mov from pmd
   51.32 +		fault = vcpu_get_pmd(vcpu,vcpu_get_gr(vcpu,inst.M43.r3),&val);
   51.33 +		if (fault == IA64_NO_FAULT)
   51.34 +			return vcpu_set_gr(vcpu, inst.M43.r1-64, val);
   51.35 +	}
   51.36 +	else {
   51.37 +		fault = vcpu_get_pmc(vcpu,vcpu_get_gr(vcpu,inst.M43.r3),&val);
   51.38 +		if (fault == IA64_NO_FAULT)
   51.39 +			return vcpu_set_gr(vcpu, inst.M43.r1, val);
   51.40 +	}
   51.41 +	return fault;
   51.42  }
   51.43  
   51.44  unsigned long from_cr_cnt[128] = { 0 };
   51.45 @@ -531,6 +540,8 @@ struct {
   51.46  	unsigned long bsw0;
   51.47  	unsigned long bsw1;
   51.48  	unsigned long cover;
   51.49 +	unsigned long fc;
   51.50 +	unsigned long cpuid;
   51.51  	unsigned long Mpriv_cnt[64];
   51.52  } privcnt = { 0 };
   51.53  
   51.54 @@ -631,7 +642,11 @@ priv_handle_op(VCPU *vcpu, REGS *regs, i
   51.55  				else x6 = 0x1a;
   51.56  			}
   51.57  		}
   51.58 -		privcnt.Mpriv_cnt[x6]++;
   51.59 +		if (x6 == 52 && inst.M28.r3 > 63)
   51.60 +			privcnt.fc++;
   51.61 +		else if (x6 == 16 && inst.M43.r3 > 63)
   51.62 +			privcnt.cpuid++;
   51.63 +		else privcnt.Mpriv_cnt[x6]++;
   51.64  		return (*pfunc)(vcpu,inst);
   51.65  		break;
   51.66  	    case B:
   51.67 @@ -682,7 +697,7 @@ priv_handle_op(VCPU *vcpu, REGS *regs, i
   51.68          //printf("We who are about do die salute you\n");
   51.69  	printf("handle_op: can't handle privop at 0x%lx (op=0x%016lx) slot %d (type=%d)\n",
   51.70  		 iip, (UINT64)inst.inst, slot, slot_type);
   51.71 -        //printf("vtop(0x%lx)==0x%lx\r\n", iip, tr_vtop(iip));
   51.72 +        //printf("vtop(0x%lx)==0x%lx\n", iip, tr_vtop(iip));
   51.73          //thread_mozambique("privop fault\n");
   51.74  	return (IA64_ILLOP_FAULT);
   51.75  }
   51.76 @@ -745,6 +760,64 @@ priv_emulate(VCPU *vcpu, REGS *regs, UIN
   51.77  }
   51.78  
   51.79  
   51.80 +// FIXME: Move these to include/public/arch-ia64?
   51.81 +#define HYPERPRIVOP_RFI			0x1
   51.82 +#define HYPERPRIVOP_RSM_DT		0x2
   51.83 +#define HYPERPRIVOP_SSM_DT		0x3
   51.84 +#define HYPERPRIVOP_COVER		0x4
   51.85 +#define HYPERPRIVOP_ITC_D		0x5
   51.86 +#define HYPERPRIVOP_ITC_I		0x6
   51.87 +#define HYPERPRIVOP_MAX			0x6
   51.88 +
   51.89 +char *hyperpriv_str[HYPERPRIVOP_MAX+1] = {
   51.90 +	0, "rfi", "rsm.dt", "ssm.dt", "cover", "itc.d", "itc.i",
   51.91 +	0
   51.92 +};
   51.93 +
   51.94 +unsigned long hyperpriv_cnt[HYPERPRIVOP_MAX+1] = { 0 };
   51.95 +
   51.96 +/* hyperprivops are generally executed in assembly (with physical psr.ic off)
   51.97 + * so this code is primarily used for debugging them */
   51.98 +int
   51.99 +ia64_hyperprivop(unsigned long iim, REGS *regs)
  51.100 +{
  51.101 +	struct exec_domain *ed = (struct domain *) current;
  51.102 +	INST64 inst;
  51.103 +	UINT64 val;
  51.104 +
  51.105 +// FIXME: Add instrumentation for these
  51.106 +// FIXME: Handle faults appropriately for these
  51.107 +	if (!iim || iim > HYPERPRIVOP_MAX) {
  51.108 +		printf("bad hyperprivop; ignored\n");
  51.109 +		return 1;
  51.110 +	}
  51.111 +	hyperpriv_cnt[iim]++;
  51.112 +	switch(iim) {
  51.113 +	    case HYPERPRIVOP_RFI:
  51.114 +		(void)vcpu_rfi(ed);
  51.115 +		return 0;	// don't update iip
  51.116 +	    case HYPERPRIVOP_RSM_DT:
  51.117 +		(void)vcpu_reset_psr_dt(ed);
  51.118 +		return 1;
  51.119 +	    case HYPERPRIVOP_SSM_DT:
  51.120 +		(void)vcpu_set_psr_dt(ed);
  51.121 +		return 1;
  51.122 +	    case HYPERPRIVOP_COVER:
  51.123 +		(void)vcpu_cover(ed);
  51.124 +		return 1;
  51.125 +	    case HYPERPRIVOP_ITC_D:
  51.126 +		inst.inst = 0;
  51.127 +		(void)priv_itc_d(ed,inst);
  51.128 +		return 1;
  51.129 +	    case HYPERPRIVOP_ITC_I:
  51.130 +		inst.inst = 0;
  51.131 +		(void)priv_itc_i(ed,inst);
  51.132 +		return 1;
  51.133 +	}
  51.134 +	return 0;
  51.135 +}
  51.136 +
  51.137 +
  51.138  /**************************************************************************
  51.139  Privileged operation instrumentation routines
  51.140  **************************************************************************/
  51.141 @@ -798,55 +871,61 @@ int dump_privop_counts(char *buf)
  51.142  	sum += privcnt.rfi; sum += privcnt.bsw0;
  51.143  	sum += privcnt.bsw1; sum += privcnt.cover;
  51.144  	for (i=0; i < 64; i++) sum += privcnt.Mpriv_cnt[i];
  51.145 -	s += sprintf(s,"Privop statistics: (Total privops: %ld)\r\n",sum);
  51.146 +	s += sprintf(s,"Privop statistics: (Total privops: %ld)\n",sum);
  51.147  	if (privcnt.mov_to_ar_imm)
  51.148 -		s += sprintf(s,"%10d  %s [%d%%]\r\n", privcnt.mov_to_ar_imm,
  51.149 +		s += sprintf(s,"%10d  %s [%d%%]\n", privcnt.mov_to_ar_imm,
  51.150  			"mov_to_ar_imm", (privcnt.mov_to_ar_imm*100L)/sum);
  51.151  	if (privcnt.mov_to_ar_reg)
  51.152 -		s += sprintf(s,"%10d  %s [%d%%]\r\n", privcnt.mov_to_ar_reg,
  51.153 +		s += sprintf(s,"%10d  %s [%d%%]\n", privcnt.mov_to_ar_reg,
  51.154  			"mov_to_ar_reg", (privcnt.mov_to_ar_reg*100L)/sum);
  51.155  	if (privcnt.mov_from_ar)
  51.156 -		s += sprintf(s,"%10d  %s [%d%%]\r\n", privcnt.mov_from_ar,
  51.157 +		s += sprintf(s,"%10d  %s [%d%%]\n", privcnt.mov_from_ar,
  51.158  			"privified-mov_from_ar", (privcnt.mov_from_ar*100L)/sum);
  51.159  	if (privcnt.ssm)
  51.160 -		s += sprintf(s,"%10d  %s [%d%%]\r\n", privcnt.ssm,
  51.161 +		s += sprintf(s,"%10d  %s [%d%%]\n", privcnt.ssm,
  51.162  			"ssm", (privcnt.ssm*100L)/sum);
  51.163  	if (privcnt.rsm)
  51.164 -		s += sprintf(s,"%10d  %s [%d%%]\r\n", privcnt.rsm,
  51.165 +		s += sprintf(s,"%10d  %s [%d%%]\n", privcnt.rsm,
  51.166  			"rsm", (privcnt.rsm*100L)/sum);
  51.167  	if (privcnt.rfi)
  51.168 -		s += sprintf(s,"%10d  %s [%d%%]\r\n", privcnt.rfi,
  51.169 +		s += sprintf(s,"%10d  %s [%d%%]\n", privcnt.rfi,
  51.170  			"rfi", (privcnt.rfi*100L)/sum);
  51.171  	if (privcnt.bsw0)
  51.172 -		s += sprintf(s,"%10d  %s [%d%%]\r\n", privcnt.bsw0,
  51.173 +		s += sprintf(s,"%10d  %s [%d%%]\n", privcnt.bsw0,
  51.174  			"bsw0", (privcnt.bsw0*100L)/sum);
  51.175  	if (privcnt.bsw1)
  51.176 -		s += sprintf(s,"%10d  %s [%d%%]\r\n", privcnt.bsw1,
  51.177 +		s += sprintf(s,"%10d  %s [%d%%]\n", privcnt.bsw1,
  51.178  			"bsw1", (privcnt.bsw1*100L)/sum);
  51.179  	if (privcnt.cover)
  51.180 -		s += sprintf(s,"%10d  %s [%d%%]\r\n", privcnt.cover,
  51.181 +		s += sprintf(s,"%10d  %s [%d%%]\n", privcnt.cover,
  51.182  			"cover", (privcnt.cover*100L)/sum);
  51.183 +	if (privcnt.fc)
  51.184 +		s += sprintf(s,"%10d  %s [%d%%]\n", privcnt.fc,
  51.185 +			"privified-fc", (privcnt.fc*100L)/sum);
  51.186 +	if (privcnt.cpuid)
  51.187 +		s += sprintf(s,"%10d  %s [%d%%]\n", privcnt.cpuid,
  51.188 +			"privified-getcpuid", (privcnt.cpuid*100L)/sum);
  51.189  	for (i=0; i < 64; i++) if (privcnt.Mpriv_cnt[i]) {
  51.190 -		if (!Mpriv_str[i]) s += sprintf(s,"PRIVSTRING NULL!!\r\n");
  51.191 -		else s += sprintf(s,"%10d  %s [%d%%]\r\n", privcnt.Mpriv_cnt[i],
  51.192 +		if (!Mpriv_str[i]) s += sprintf(s,"PRIVSTRING NULL!!\n");
  51.193 +		else s += sprintf(s,"%10d  %s [%d%%]\n", privcnt.Mpriv_cnt[i],
  51.194  			Mpriv_str[i], (privcnt.Mpriv_cnt[i]*100L)/sum);
  51.195  		if (i == 0x24) { // mov from CR
  51.196  			s += sprintf(s,"            [");
  51.197  			for (j=0; j < 128; j++) if (from_cr_cnt[j]) {
  51.198  				if (!cr_str[j])
  51.199 -					s += sprintf(s,"PRIVSTRING NULL!!\r\n");
  51.200 +					s += sprintf(s,"PRIVSTRING NULL!!\n");
  51.201  				s += sprintf(s,"%s(%d),",cr_str[j],from_cr_cnt[j]);
  51.202  			}
  51.203 -			s += sprintf(s,"]\r\n");
  51.204 +			s += sprintf(s,"]\n");
  51.205  		}
  51.206  		else if (i == 0x2c) { // mov to CR
  51.207  			s += sprintf(s,"            [");
  51.208  			for (j=0; j < 128; j++) if (to_cr_cnt[j]) {
  51.209  				if (!cr_str[j])
  51.210 -					s += sprintf(s,"PRIVSTRING NULL!!\r\n");
  51.211 +					s += sprintf(s,"PRIVSTRING NULL!!\n");
  51.212  				s += sprintf(s,"%s(%d),",cr_str[j],to_cr_cnt[j]);
  51.213  			}
  51.214 -			s += sprintf(s,"]\r\n");
  51.215 +			s += sprintf(s,"]\n");
  51.216  		}
  51.217  	}
  51.218  	return s - buf;
  51.219 @@ -864,19 +943,88 @@ int zero_privop_counts(char *buf)
  51.220  	privcnt.ssm = 0; privcnt.rsm = 0;
  51.221  	privcnt.rfi = 0; privcnt.bsw0 = 0;
  51.222  	privcnt.bsw1 = 0; privcnt.cover = 0;
  51.223 +	privcnt.fc = 0; privcnt.cpuid = 0;
  51.224  	for (i=0; i < 64; i++) privcnt.Mpriv_cnt[i] = 0;
  51.225  	for (j=0; j < 128; j++) from_cr_cnt[j] = 0;
  51.226  	for (j=0; j < 128; j++) to_cr_cnt[j] = 0;
  51.227 -	s += sprintf(s,"All privop statistics zeroed\r\n");
  51.228 +	s += sprintf(s,"All privop statistics zeroed\n");
  51.229  	return s - buf;
  51.230  }
  51.231  
  51.232 +#ifdef PRIVOP_ADDR_COUNT
  51.233 +
  51.234 +extern struct privop_addr_count privop_addr_counter[];
  51.235 +
  51.236 +void privop_count_addr(unsigned long iip, int inst)
  51.237 +{
  51.238 +	struct privop_addr_count *v = &privop_addr_counter[inst];
  51.239 +	int i;
  51.240 +
  51.241 +	for (i = 0; i < PRIVOP_COUNT_NADDRS; i++) {
  51.242 +		if (!v->addr[i]) { v->addr[i] = iip; v->count[i]++; return; }
  51.243 +		else if (v->addr[i] == iip)  { v->count[i]++; return; }
  51.244 +	}
  51.245 +	v->overflow++;;
  51.246 +}
  51.247 +
  51.248 +int dump_privop_addrs(char *buf)
  51.249 +{
  51.250 +	int i,j;
  51.251 +	char *s = buf;
  51.252 +	s += sprintf(s,"Privop addresses:\n");
  51.253 +	for (i = 0; i < PRIVOP_COUNT_NINSTS; i++) {
  51.254 +		struct privop_addr_count *v = &privop_addr_counter[i];
  51.255 +		s += sprintf(s,"%s:\n",v->instname);
  51.256 +		for (j = 0; j < PRIVOP_COUNT_NADDRS; j++) {
  51.257 +			if (!v->addr[j]) break;
  51.258 +			s += sprintf(s," @%p #%ld\n",v->addr[j],v->count[j]);
  51.259 +		}
  51.260 +		if (v->overflow) 
  51.261 +			s += sprintf(s," other #%ld\n",v->overflow);
  51.262 +	}
  51.263 +	return s - buf;
  51.264 +}
  51.265 +
  51.266 +void zero_privop_addrs(void)
  51.267 +{
  51.268 +	int i,j;
  51.269 +	for (i = 0; i < PRIVOP_COUNT_NINSTS; i++) {
  51.270 +		struct privop_addr_count *v = &privop_addr_counter[i];
  51.271 +		for (j = 0; j < PRIVOP_COUNT_NADDRS; j++)
  51.272 +			v->addr[j] = v->count[j] = 0;
  51.273 +		v->overflow = 0;
  51.274 +	}
  51.275 +}
  51.276 +#endif
  51.277 +
  51.278 +int dump_hyperprivop_counts(char *buf)
  51.279 +{
  51.280 +	int i;
  51.281 +	char *s = buf;
  51.282 +	s += sprintf(s,"Hyperprivops:\n");
  51.283 +	for (i = 1; i <= HYPERPRIVOP_MAX; i++)
  51.284 +		if (hyperpriv_cnt[i])
  51.285 +			s += sprintf(s,"%10d %s\n",
  51.286 +				hyperpriv_cnt[i], hyperpriv_str[i]);
  51.287 +	return s - buf;
  51.288 +}
  51.289 +
  51.290 +void zero_hyperprivop_counts(void)
  51.291 +{
  51.292 +	int i;
  51.293 +	for (i = 0; i <= HYPERPRIVOP_MAX; i++) hyperpriv_cnt[i] = 0;
  51.294 +}
  51.295 +
  51.296  #define TMPBUFLEN 8*1024
  51.297  int dump_privop_counts_to_user(char __user *ubuf, int len)
  51.298  {
  51.299  	char buf[TMPBUFLEN];
  51.300  	int n = dump_privop_counts(buf);
  51.301  
  51.302 +	n += dump_hyperprivop_counts(buf + n);
  51.303 +#ifdef PRIVOP_ADDR_COUNT
  51.304 +	n += dump_privop_addrs(buf + n);
  51.305 +#endif
  51.306  	if (len < TMPBUFLEN) return -1;
  51.307  	if (__copy_to_user(ubuf,buf,n)) return -1;
  51.308  	return n;
  51.309 @@ -887,6 +1035,10 @@ int zero_privop_counts_to_user(char __us
  51.310  	char buf[TMPBUFLEN];
  51.311  	int n = zero_privop_counts(buf);
  51.312  
  51.313 +	zero_hyperprivop_counts();
  51.314 +#ifdef PRIVOP_ADDR_COUNT
  51.315 +	zero_privop_addrs();
  51.316 +#endif
  51.317  	if (len < TMPBUFLEN) return -1;
  51.318  	if (__copy_to_user(ubuf,buf,n)) return -1;
  51.319  	return n;
    52.1 --- a/xen/arch/ia64/process.c	Fri May 06 17:04:27 2005 +0000
    52.2 +++ b/xen/arch/ia64/process.c	Wed May 11 13:43:50 2005 +0000
    52.3 @@ -51,6 +51,7 @@ extern unsigned long dom0_start, dom0_si
    52.4  			IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED | IA64_PSR_IA)
    52.5  
    52.6  #define PSCB(x,y)	x->vcpu_info->arch.y
    52.7 +#define PSCBX(x,y)	x->arch.y
    52.8  
    52.9  extern unsigned long vcpu_verbose;
   52.10  
   52.11 @@ -154,7 +155,7 @@ panic_domain(regs,"psr.ic off, deliverin
   52.12  		}
   52.13  //printf("Delivering NESTED DATA TLB fault\n");
   52.14  		vector = IA64_DATA_NESTED_TLB_VECTOR;
   52.15 -		regs->cr_iip = ((unsigned long) PSCB(ed,iva) + vector) & ~0xffUL;
   52.16 +		regs->cr_iip = ((unsigned long) PSCBX(ed,iva) + vector) & ~0xffUL;
   52.17  		regs->cr_ipsr = (regs->cr_ipsr & ~DELIVER_PSR_CLR) | DELIVER_PSR_SET;
   52.18  // NOTE: nested trap must NOT pass PSCB address
   52.19  		//regs->r31 = (unsigned long) &PSCB(ed);
   52.20 @@ -187,7 +188,7 @@ panic_domain(regs,"psr.ic off, deliverin
   52.21  	PSCB(ed,ifs) = 0;
   52.22  	PSCB(ed,incomplete_regframe) = 0;
   52.23  
   52.24 -	regs->cr_iip = ((unsigned long) PSCB(ed,iva) + vector) & ~0xffUL;
   52.25 +	regs->cr_iip = ((unsigned long) PSCBX(ed,iva) + vector) & ~0xffUL;
   52.26  	regs->cr_ipsr = (regs->cr_ipsr & ~DELIVER_PSR_CLR) | DELIVER_PSR_SET;
   52.27  #ifdef CONFIG_SMP
   52.28  #error "sharedinfo doesn't handle smp yet"
   52.29 @@ -516,7 +517,7 @@ printf("ia64_fault, vector=0x%p, ifa=%p,
   52.30  	      case 32: /* fp fault */
   52.31  	      case 33: /* fp trap */
   52.32  		//result = handle_fpu_swa((vector == 32) ? 1 : 0, regs, isr);
   52.33 -		if ((result < 0) || (current->thread.flags & IA64_THREAD_FPEMU_SIGFPE)) {
   52.34 +		//if ((result < 0) || (current->thread.flags & IA64_THREAD_FPEMU_SIGFPE)) {
   52.35  			//siginfo.si_signo = SIGFPE;
   52.36  			//siginfo.si_errno = 0;
   52.37  			//siginfo.si_code = FPE_FLTINV;
   52.38 @@ -525,7 +526,7 @@ printf("ia64_fault, vector=0x%p, ifa=%p,
   52.39  			//siginfo.si_isr = isr;
   52.40  			//siginfo.si_imm = 0;
   52.41  			//force_sig_info(SIGFPE, &siginfo, current);
   52.42 -		}
   52.43 +		//}
   52.44  		//return;
   52.45  		sprintf(buf, "FP fault/trap");
   52.46  		break;
   52.47 @@ -722,6 +723,10 @@ ia64_handle_break (unsigned long ifa, st
   52.48  		if (ia64_hypercall(regs))
   52.49  			vcpu_increment_iip(current);
   52.50  	}
   52.51 +	else if (!PSCB(ed,interrupt_collection_enabled)) {
   52.52 +		if (ia64_hyperprivop(iim,regs))
   52.53 +			vcpu_increment_iip(current);
   52.54 +	}
   52.55  	else reflect_interruption(ifa,isr,iim,regs,IA64_BREAK_VECTOR);
   52.56  }
   52.57  
    53.1 --- a/xen/arch/ia64/vcpu.c	Fri May 06 17:04:27 2005 +0000
    53.2 +++ b/xen/arch/ia64/vcpu.c	Wed May 11 13:43:50 2005 +0000
    53.3 @@ -1,6 +1,6 @@
    53.4  /*
    53.5   * Virtualized CPU functions
    53.6 - * 
    53.7 + *
    53.8   * Copyright (C) 2004 Hewlett-Packard Co.
    53.9   *	Dan Magenheimer (dan.magenheimer@hp.com)
   53.10   *
   53.11 @@ -26,6 +26,7 @@ typedef	union {
   53.12  // this def for vcpu_regs won't work if kernel stack is present
   53.13  #define	vcpu_regs(vcpu) ((struct pt_regs *) vcpu->arch.regs)
   53.14  #define	PSCB(x,y)	x->vcpu_info->arch.y
   53.15 +#define	PSCBX(x,y)	x->arch.y
   53.16  
   53.17  #define	TRUE	1
   53.18  #define	FALSE	0
   53.19 @@ -37,6 +38,17 @@ typedef	union {
   53.20  
   53.21  #define STATIC
   53.22  
   53.23 +#ifdef PRIVOP_ADDR_COUNT
   53.24 +struct privop_addr_count privop_addr_counter[PRIVOP_COUNT_NINSTS] = {
   53.25 +	{ "rsm", { 0 }, { 0 }, 0 },
   53.26 +	{ "ssm", { 0 }, { 0 }, 0 }
   53.27 +};
   53.28 +extern void privop_count_addr(unsigned long addr, int inst);
   53.29 +#define	PRIVOP_COUNT_ADDR(regs,inst) privop_count_addr(regs->cr_iip,inst)
   53.30 +#else
   53.31 +#define	PRIVOP_COUNT_ADDR(x,y) do {} while (0)
   53.32 +#endif
   53.33 +
   53.34  unsigned long vcpu_verbose = 0;
   53.35  #define verbose(a...) do {if (vcpu_verbose) printf(a);} while(0)
   53.36  
   53.37 @@ -77,30 +89,20 @@ vcpu_set_gr(VCPU *vcpu, unsigned reg, UI
   53.38  IA64FAULT vcpu_set_ar(VCPU *vcpu, UINT64 reg, UINT64 val)
   53.39  {
   53.40  	if (reg == 44) return (vcpu_set_itc(vcpu,val));
   53.41 -	if (reg == 27) return (IA64_ILLOP_FAULT);
   53.42 -	if (reg > 7) return (IA64_ILLOP_FAULT);
   53.43 -	PSCB(vcpu,krs[reg]) = val;
   53.44 -#if 0
   53.45 -// for now, privify kr read's so all kr accesses are privileged
   53.46 -	switch (reg) {
   53.47 -	      case 0: asm volatile ("mov ar.k0=%0" :: "r"(val)); break;
   53.48 -	      case 1: asm volatile ("mov ar.k1=%0" :: "r"(val)); break;
   53.49 -	      case 2: asm volatile ("mov ar.k2=%0" :: "r"(val)); break;
   53.50 -	      case 3: asm volatile ("mov ar.k3=%0" :: "r"(val)); break;
   53.51 -	      case 4: asm volatile ("mov ar.k4=%0" :: "r"(val)); break;
   53.52 -	      case 5: asm volatile ("mov ar.k5=%0" :: "r"(val)); break;
   53.53 -	      case 6: asm volatile ("mov ar.k6=%0" :: "r"(val)); break;
   53.54 -	      case 7: asm volatile ("mov ar.k7=%0" :: "r"(val)); break;
   53.55 -	      case 27: asm volatile ("mov ar.cflg=%0" :: "r"(val)); break;
   53.56 -	}
   53.57 -#endif
   53.58 +	else if (reg == 27) return (IA64_ILLOP_FAULT);
   53.59 +	else if (reg == 24)
   53.60 +	    printf("warning: setting ar.eflg is a no-op; no IA-32 support\n");
   53.61 +	else if (reg > 7) return (IA64_ILLOP_FAULT);
   53.62 +	else PSCB(vcpu,krs[reg]) = val;
   53.63  	return IA64_NO_FAULT;
   53.64  }
   53.65  
   53.66  IA64FAULT vcpu_get_ar(VCPU *vcpu, UINT64 reg, UINT64 *val)
   53.67  {
   53.68 -	if (reg > 7) return (IA64_ILLOP_FAULT);
   53.69 -	*val = PSCB(vcpu,krs[reg]);
   53.70 +	if (reg == 24)
   53.71 +	    printf("warning: getting ar.eflg is a no-op; no IA-32 support\n");
   53.72 +	else if (reg > 7) return (IA64_ILLOP_FAULT);
   53.73 +	else *val = PSCB(vcpu,krs[reg]);
   53.74  	return IA64_NO_FAULT;
   53.75  }
   53.76  
   53.77 @@ -119,11 +121,18 @@ void vcpu_set_metaphysical_mode(VCPU *vc
   53.78  	}
   53.79  }
   53.80  
   53.81 +IA64FAULT vcpu_reset_psr_dt(VCPU *vcpu)
   53.82 +{
   53.83 +	vcpu_set_metaphysical_mode(vcpu,TRUE);
   53.84 +	return IA64_NO_FAULT;
   53.85 +}
   53.86 +
   53.87  IA64FAULT vcpu_reset_psr_sm(VCPU *vcpu, UINT64 imm24)
   53.88  {
   53.89  	struct ia64_psr psr, imm, *ipsr;
   53.90  	REGS *regs = vcpu_regs(vcpu);
   53.91  
   53.92 +	PRIVOP_COUNT_ADDR(regs,_RSM);
   53.93  	// TODO: All of these bits need to be virtualized
   53.94  	// TODO: Only allowed for current vcpu
   53.95  	__asm__ __volatile ("mov %0=psr;;" : "=r"(psr) :: "memory");
   53.96 @@ -152,12 +161,19 @@ IA64FAULT vcpu_reset_psr_sm(VCPU *vcpu, 
   53.97  extern UINT64 vcpu_check_pending_interrupts(VCPU *vcpu);
   53.98  #define SPURIOUS_VECTOR 0xf
   53.99  
  53.100 +IA64FAULT vcpu_set_psr_dt(VCPU *vcpu)
  53.101 +{
  53.102 +	vcpu_set_metaphysical_mode(vcpu,FALSE);
  53.103 +	return IA64_NO_FAULT;
  53.104 +}
  53.105 +
  53.106  IA64FAULT vcpu_set_psr_sm(VCPU *vcpu, UINT64 imm24)
  53.107  {
  53.108  	struct ia64_psr psr, imm, *ipsr;
  53.109  	REGS *regs = vcpu_regs(vcpu);
  53.110  	UINT64 mask, enabling_interrupts = 0;
  53.111  
  53.112 +	PRIVOP_COUNT_ADDR(regs,_SSM);
  53.113  	// TODO: All of these bits need to be virtualized
  53.114  	__asm__ __volatile ("mov %0=psr;;" : "=r"(psr) :: "memory");
  53.115  	imm = *(struct ia64_psr *)&imm24;
  53.116 @@ -274,9 +290,9 @@ BOOLEAN vcpu_get_psr_i(VCPU *vcpu)
  53.117  
  53.118  UINT64 vcpu_get_ipsr_int_state(VCPU *vcpu,UINT64 prevpsr)
  53.119  {
  53.120 -	UINT64 dcr = PSCB(vcpu,dcr);
  53.121 +	UINT64 dcr = PSCBX(vcpu,dcr);
  53.122  	PSR psr = {0};
  53.123 -	
  53.124 +
  53.125  	//printf("*** vcpu_get_ipsr_int_state (0x%016lx)...",prevpsr);
  53.126  	psr.i64 = prevpsr;
  53.127  	psr.ia64_psr.be = 0; if (dcr & IA64_DCR_BE) psr.ia64_psr.be = 1;
  53.128 @@ -302,13 +318,13 @@ extern unsigned long privop_trace;
  53.129  //verbose("vcpu_get_dcr: called @%p\n",PSCB(vcpu,iip));
  53.130  	// Reads of cr.dcr on Xen always have the sign bit set, so
  53.131  	// a domain can differentiate whether it is running on SP or not
  53.132 -	*pval = PSCB(vcpu,dcr) | 0x8000000000000000L;
  53.133 +	*pval = PSCBX(vcpu,dcr) | 0x8000000000000000L;
  53.134  	return (IA64_NO_FAULT);
  53.135  }
  53.136  
  53.137  IA64FAULT vcpu_get_iva(VCPU *vcpu, UINT64 *pval)
  53.138  {
  53.139 -	*pval = PSCB(vcpu,iva) & ~0x7fffL;
  53.140 +	*pval = PSCBX(vcpu,iva) & ~0x7fffL;
  53.141  	return (IA64_NO_FAULT);
  53.142  }
  53.143  
  53.144 @@ -408,13 +424,13 @@ extern unsigned long privop_trace;
  53.145  	// a domain can differentiate whether it is running on SP or not
  53.146  	// Thus, writes of DCR should ignore the sign bit
  53.147  //verbose("vcpu_set_dcr: called\n");
  53.148 -	PSCB(vcpu,dcr) = val & ~0x8000000000000000L;
  53.149 +	PSCBX(vcpu,dcr) = val & ~0x8000000000000000L;
  53.150  	return (IA64_NO_FAULT);
  53.151  }
  53.152  
  53.153  IA64FAULT vcpu_set_iva(VCPU *vcpu, UINT64 val)
  53.154  {
  53.155 -	PSCB(vcpu,iva) = val & ~0x7fffL;
  53.156 +	PSCBX(vcpu,iva) = val & ~0x7fffL;
  53.157  	return (IA64_NO_FAULT);
  53.158  }
  53.159  
  53.160 @@ -508,16 +524,16 @@ void vcpu_pend_interrupt(VCPU *vcpu, UIN
  53.161  		return;
  53.162  	}
  53.163  	if (!test_bit(vector,PSCB(vcpu,delivery_mask))) return;
  53.164 -	if (test_bit(vector,PSCB(vcpu,irr))) {
  53.165 +	if (test_bit(vector,PSCBX(vcpu,irr))) {
  53.166  //printf("vcpu_pend_interrupt: overrun\n");
  53.167  	}
  53.168 -	set_bit(vector,PSCB(vcpu,irr));
  53.169 +	set_bit(vector,PSCBX(vcpu,irr));
  53.170  	PSCB(vcpu,pending_interruption) = 1;
  53.171  }
  53.172  
  53.173  void early_tick(VCPU *vcpu)
  53.174  {
  53.175 -	UINT64 *p = &PSCB(vcpu,irr[3]);
  53.176 +	UINT64 *p = &PSCBX(vcpu,irr[3]);
  53.177  	printf("vcpu_check_pending: about to deliver early tick\n");
  53.178  	printf("&irr[0]=%p, irr[0]=0x%lx\n",p,*p);
  53.179  }
  53.180 @@ -535,9 +551,9 @@ UINT64 vcpu_check_pending_interrupts(VCP
  53.181  {
  53.182  	UINT64 *p, *q, *r, bits, bitnum, mask, i, vector;
  53.183  
  53.184 -	p = &PSCB(vcpu,irr[3]);
  53.185 +	p = &PSCBX(vcpu,irr[3]);
  53.186  	q = &PSCB(vcpu,delivery_mask[3]);
  53.187 -	r = &PSCB(vcpu,insvc[3]);
  53.188 +	r = &PSCBX(vcpu,insvc[3]);
  53.189  	for (i = 3; ; p--, q--, r--, i--) {
  53.190  		bits = *p & *q;
  53.191  		if (bits) break; // got a potential interrupt
  53.192 @@ -577,9 +593,9 @@ UINT64 vcpu_check_pending_interrupts(VCP
  53.193  #if 0
  53.194  if (vector == (PSCB(vcpu,itv) & 0xff)) {
  53.195  	UINT64 now = ia64_get_itc();
  53.196 -	UINT64 itm = PSCB(vcpu,domain_itm);
  53.197 +	UINT64 itm = PSCBX(vcpu,domain_itm);
  53.198  	if (now < itm) early_tick(vcpu);
  53.199 -	
  53.200 +
  53.201  }
  53.202  #endif
  53.203  	return vector;
  53.204 @@ -639,13 +655,13 @@ IA64FAULT vcpu_get_ivr(VCPU *vcpu, UINT6
  53.205  	i = vector >> 6;
  53.206  	mask = 1L << (vector & 0x3f);
  53.207  //printf("ZZZZZZ vcpu_get_ivr: setting insvc mask for vector %ld\n",vector);
  53.208 -	PSCB(vcpu,insvc[i]) |= mask;
  53.209 -	PSCB(vcpu,irr[i]) &= ~mask;
  53.210 +	PSCBX(vcpu,insvc[i]) |= mask;
  53.211 +	PSCBX(vcpu,irr[i]) &= ~mask;
  53.212  	//PSCB(vcpu,pending_interruption)--;
  53.213  	*pval = vector;
  53.214  	// if delivering a timer interrupt, remember domain_itm
  53.215  	if (vector == (PSCB(vcpu,itv) & 0xff)) {
  53.216 -		PSCB(vcpu,domain_itm_last) = PSCB(vcpu,domain_itm);
  53.217 +		PSCBX(vcpu,domain_itm_last) = PSCBX(vcpu,domain_itm);
  53.218  	}
  53.219  	return IA64_NO_FAULT;
  53.220  }
  53.221 @@ -760,7 +776,7 @@ IA64FAULT vcpu_set_eoi(VCPU *vcpu, UINT6
  53.222  	UINT64 *p, bits, vec, bitnum;
  53.223  	int i;
  53.224  
  53.225 -	p = &PSCB(vcpu,insvc[3]);
  53.226 +	p = &PSCBX(vcpu,insvc[3]);
  53.227  	for (i = 3; (i >= 0) && !(bits = *p); i--, p--);
  53.228  	if (i < 0) {
  53.229  		printf("Trying to EOI interrupt when none are in-service.\r\n");
  53.230 @@ -811,8 +827,8 @@ extern unsigned long privop_trace;
  53.231  	if (val & 0xef00) return (IA64_ILLOP_FAULT);
  53.232  	PSCB(vcpu,itv) = val;
  53.233  	if (val & 0x10000) {
  53.234 -printf("**** vcpu_set_itv(%d): vitm=%lx, setting to 0\n",val,PSCB(vcpu,domain_itm));
  53.235 -		PSCB(vcpu,domain_itm) = 0;
  53.236 +printf("**** vcpu_set_itv(%d): vitm=%lx, setting to 0\n",val,PSCBX(vcpu,domain_itm));
  53.237 +		PSCBX(vcpu,domain_itm) = 0;
  53.238  	}
  53.239  	else vcpu_enable_timer(vcpu,1000000L);
  53.240  	return (IA64_NO_FAULT);
  53.241 @@ -833,6 +849,20 @@ IA64FAULT vcpu_set_cmcv(VCPU *vcpu, UINT
  53.242  }
  53.243  
  53.244  /**************************************************************************
  53.245 + VCPU temporary register access routines
  53.246 +**************************************************************************/
  53.247 +UINT64 vcpu_get_tmp(VCPU *vcpu, UINT64 index)
  53.248 +{
  53.249 +	if (index > 7) return 0;
  53.250 +	return PSCB(vcpu,tmp[index]);
  53.251 +}
  53.252 +
  53.253 +void vcpu_set_tmp(VCPU *vcpu, UINT64 index, UINT64 val)
  53.254 +{
  53.255 +	if (index <= 7) PSCB(vcpu,tmp[index]) = val;
  53.256 +}
  53.257 +
  53.258 +/**************************************************************************
  53.259  Interval timer routines
  53.260  **************************************************************************/
  53.261  
  53.262 @@ -845,14 +875,14 @@ BOOLEAN vcpu_timer_disabled(VCPU *vcpu)
  53.263  BOOLEAN vcpu_timer_inservice(VCPU *vcpu)
  53.264  {
  53.265  	UINT64 itv = PSCB(vcpu,itv);
  53.266 -	return (test_bit(itv, PSCB(vcpu,insvc)));
  53.267 +	return (test_bit(itv, PSCBX(vcpu,insvc)));
  53.268  }
  53.269  
  53.270  BOOLEAN vcpu_timer_expired(VCPU *vcpu)
  53.271  {
  53.272 -	unsigned long domain_itm = PSCB(vcpu,domain_itm);
  53.273 +	unsigned long domain_itm = PSCBX(vcpu,domain_itm);
  53.274  	unsigned long now = ia64_get_itc();
  53.275 - 
  53.276 +
  53.277  	if (!domain_itm) return FALSE;
  53.278  	if (now < domain_itm) return FALSE;
  53.279  	if (vcpu_timer_disabled(vcpu)) return FALSE;
  53.280 @@ -877,36 +907,36 @@ void vcpu_safe_set_itm(unsigned long val
  53.281  
  53.282  void vcpu_set_next_timer(VCPU *vcpu)
  53.283  {
  53.284 -	UINT64 d = PSCB(vcpu,domain_itm);
  53.285 -	//UINT64 s = PSCB(vcpu,xen_itm);
  53.286 +	UINT64 d = PSCBX(vcpu,domain_itm);
  53.287 +	//UINT64 s = PSCBX(vcpu,xen_itm);
  53.288  	UINT64 s = local_cpu_data->itm_next;
  53.289  	UINT64 now = ia64_get_itc();
  53.290 -	//UINT64 interval = PSCB(vcpu,xen_timer_interval);
  53.291 +	//UINT64 interval = PSCBX(vcpu,xen_timer_interval);
  53.292  
  53.293  	/* gloss over the wraparound problem for now... we know it exists
  53.294  	 * but it doesn't matter right now */
  53.295  
  53.296  #if 0
  53.297  	/* ensure at least next SP tick is in the future */
  53.298 -	if (!interval) PSCB(vcpu,xen_itm) = now +
  53.299 +	if (!interval) PSCBX(vcpu,xen_itm) = now +
  53.300  #if 0
  53.301  		(running_on_sim() ? SIM_DEFAULT_CLOCK_RATE :
  53.302 -		 			DEFAULT_CLOCK_RATE);
  53.303 +					DEFAULT_CLOCK_RATE);
  53.304  #else
  53.305  	3000000;
  53.306  //printf("vcpu_set_next_timer: HACK!\n");
  53.307  #endif
  53.308  #if 0
  53.309 -	if (PSCB(vcpu,xen_itm) < now)
  53.310 -		while (PSCB(vcpu,xen_itm) < now + (interval>>1))
  53.311 -			PSCB(vcpu,xen_itm) += interval;
  53.312 +	if (PSCBX(vcpu,xen_itm) < now)
  53.313 +		while (PSCBX(vcpu,xen_itm) < now + (interval>>1))
  53.314 +			PSCBX(vcpu,xen_itm) += interval;
  53.315  #endif
  53.316  #endif
  53.317  
  53.318  	if (is_idle_task(vcpu->domain)) {
  53.319  		printf("****** vcpu_set_next_timer called during idle!!\n");
  53.320  	}
  53.321 -	//s = PSCB(vcpu,xen_itm);
  53.322 +	//s = PSCBX(vcpu,xen_itm);
  53.323  	if (d && (d > now) && (d < s)) {
  53.324  		vcpu_safe_set_itm(d);
  53.325  		//using_domain_as_itm++;
  53.326 @@ -920,10 +950,10 @@ void vcpu_set_next_timer(VCPU *vcpu)
  53.327  // parameter is a time interval specified in cycles
  53.328  void vcpu_enable_timer(VCPU *vcpu,UINT64 cycles)
  53.329  {
  53.330 -    PSCB(vcpu,xen_timer_interval) = cycles;
  53.331 +    PSCBX(vcpu,xen_timer_interval) = cycles;
  53.332      vcpu_set_next_timer(vcpu);
  53.333      printf("vcpu_enable_timer(%d): interval set to %d cycles\n",
  53.334 -             PSCB(vcpu,xen_timer_interval));
  53.335 +             PSCBX(vcpu,xen_timer_interval));
  53.336      __set_bit(PSCB(vcpu,itv), PSCB(vcpu,delivery_mask));
  53.337  }
  53.338  
  53.339 @@ -933,30 +963,30 @@ IA64FAULT vcpu_set_itm(VCPU *vcpu, UINT6
  53.340  
  53.341  	//if (val < now) val = now + 1000;
  53.342  //printf("*** vcpu_set_itm: called with %lx\n",val);
  53.343 -	PSCB(vcpu,domain_itm) = val;
  53.344 +	PSCBX(vcpu,domain_itm) = val;
  53.345  	vcpu_set_next_timer(vcpu);
  53.346  	return (IA64_NO_FAULT);
  53.347  }
  53.348  
  53.349  IA64FAULT vcpu_set_itc(VCPU *vcpu, UINT64 val)
  53.350  {
  53.351 -	
  53.352 +
  53.353  	UINT64 oldnow = ia64_get_itc();
  53.354 -	UINT64 olditm = PSCB(vcpu,domain_itm);
  53.355 +	UINT64 olditm = PSCBX(vcpu,domain_itm);
  53.356  	unsigned long d = olditm - oldnow;
  53.357  	unsigned long x = local_cpu_data->itm_next - oldnow;
  53.358 -	
  53.359 +
  53.360  	UINT64 newnow = val, min_delta;
  53.361  
  53.362  	local_irq_disable();
  53.363  	if (olditm) {
  53.364  printf("**** vcpu_set_itc(%lx): vitm changed to %lx\n",val,newnow+d);
  53.365 -		PSCB(vcpu,domain_itm) = newnow + d;
  53.366 +		PSCBX(vcpu,domain_itm) = newnow + d;
  53.367  	}
  53.368  	local_cpu_data->itm_next = newnow + x;
  53.369 -	d = PSCB(vcpu,domain_itm);
  53.370 +	d = PSCBX(vcpu,domain_itm);
  53.371  	x = local_cpu_data->itm_next;
  53.372 -	
  53.373 +
  53.374  	ia64_set_itc(newnow);
  53.375  	if (d && (d > newnow) && (d < x)) {
  53.376  		vcpu_safe_set_itm(d);
  53.377 @@ -991,7 +1021,7 @@ void vcpu_pend_timer(VCPU *vcpu)
  53.378  
  53.379  	if (vcpu_timer_disabled(vcpu)) return;
  53.380  	//if (vcpu_timer_inservice(vcpu)) return;
  53.381 -	if (PSCB(vcpu,domain_itm_last) == PSCB(vcpu,domain_itm)) {
  53.382 +	if (PSCBX(vcpu,domain_itm_last) == PSCBX(vcpu,domain_itm)) {
  53.383  		// already delivered an interrupt for this so
  53.384  		// don't deliver another
  53.385  		return;
  53.386 @@ -999,7 +1029,7 @@ void vcpu_pend_timer(VCPU *vcpu)
  53.387  #if 0
  53.388  	// attempt to flag "timer tick before its due" source
  53.389  	{
  53.390 -	UINT64 itm = PSCB(vcpu,domain_itm);
  53.391 +	UINT64 itm = PSCBX(vcpu,domain_itm);
  53.392  	UINT64 now = ia64_get_itc();
  53.393  	if (now < itm) printf("******* vcpu_pend_timer: pending before due!\n");
  53.394  	}
  53.395 @@ -1011,7 +1041,7 @@ void vcpu_pend_timer(VCPU *vcpu)
  53.396  UINT64 vcpu_timer_pending_early(VCPU *vcpu)
  53.397  {
  53.398  	UINT64 now = ia64_get_itc();
  53.399 -	UINT64 itm = PSCB(vcpu,domain_itm);
  53.400 +	UINT64 itm = PSCBX(vcpu,domain_itm);
  53.401  
  53.402  	if (vcpu_timer_disabled(vcpu)) return 0;
  53.403  	if (!itm) return 0;
  53.404 @@ -1023,7 +1053,7 @@ void vcpu_poke_timer(VCPU *vcpu)
  53.405  {
  53.406  	UINT64 itv = PSCB(vcpu,itv) & 0xff;
  53.407  	UINT64 now = ia64_get_itc();
  53.408 -	UINT64 itm = PSCB(vcpu,domain_itm);
  53.409 +	UINT64 itm = PSCBX(vcpu,domain_itm);
  53.410  	UINT64 irr;
  53.411  
  53.412  	if (vcpu_timer_disabled(vcpu)) return;
  53.413 @@ -1033,8 +1063,8 @@ void vcpu_poke_timer(VCPU *vcpu)
  53.414  		while(1);
  53.415  	}
  53.416  	// using 0xef instead of itv so can get real irr
  53.417 -	if (now > itm && !test_bit(0xefL, PSCB(vcpu,insvc))) {
  53.418 -		if (!test_bit(0xefL,PSCB(vcpu,irr))) {
  53.419 +	if (now > itm && !test_bit(0xefL, PSCBX(vcpu,insvc))) {
  53.420 +		if (!test_bit(0xefL,PSCBX(vcpu,irr))) {
  53.421  			irr = ia64_getreg(_IA64_REG_CR_IRR3);
  53.422  			if (irr & (1L<<(0xef-0xc0))) return;
  53.423  if (now-itm>0x800000)
  53.424 @@ -1091,7 +1121,7 @@ printf("SI_CR_IIP/IPSR/IFS_OFFSET CHANGE
  53.425  printf("SI_CR_IIP=0x%x,IPSR=0x%x,IFS_OFFSET=0x%x\n",SI_OFS(iip),SI_OFS(ipsr),SI_OFS(ifs));
  53.426  while(1);
  53.427  }
  53.428 -		// TODO: validate PSCB(vcpu,iip) 
  53.429 +		// TODO: validate PSCB(vcpu,iip)
  53.430  		// TODO: PSCB(vcpu,ipsr) = psr;
  53.431  		PSCB(vcpu,ipsr) = psr.i64;
  53.432  		// now set up the trampoline
  53.433 @@ -1338,7 +1368,6 @@ IA64FAULT vcpu_get_cpuid(VCPU *vcpu, UIN
  53.434  
  53.435  unsigned long vcpu_get_rr_ve(VCPU *vcpu,UINT64 vadr)
  53.436  {
  53.437 -	
  53.438  	ia64_rr rr;
  53.439  
  53.440  	rr.rrval = PSCB(vcpu,rrs)[vadr>>61];
  53.441 @@ -1348,7 +1377,6 @@ unsigned long vcpu_get_rr_ve(VCPU *vcpu,
  53.442  
  53.443  unsigned long vcpu_get_rr_ps(VCPU *vcpu,UINT64 vadr)
  53.444  {
  53.445 -	
  53.446  	ia64_rr rr;
  53.447  
  53.448  	rr.rrval = PSCB(vcpu,rrs)[vadr>>61];
  53.449 @@ -1358,7 +1386,6 @@ unsigned long vcpu_get_rr_ps(VCPU *vcpu,
  53.450  
  53.451  unsigned long vcpu_get_rr_rid(VCPU *vcpu,UINT64 vadr)
  53.452  {
  53.453 -	
  53.454  	ia64_rr rr;
  53.455  
  53.456  	rr.rrval = PSCB(vcpu,rrs)[vadr>>61];
  53.457 @@ -1445,8 +1472,8 @@ TR_ENTRY *vcpu_match_tr_entry(VCPU *vcpu
  53.458  	for (i = 0; i < count; i++, trp++) {
  53.459  		if (!trp->p) continue;
  53.460  		if (physicalize_rid(vcpu,trp->rid) != rid) continue;
  53.461 -        	if (ifa < trp->vadr) continue;
  53.462 -        	if (ifa >= (trp->vadr + (1L << trp->ps)) - 1) continue;
  53.463 +		if (ifa < trp->vadr) continue;
  53.464 +		if (ifa >= (trp->vadr + (1L << trp->ps)) - 1) continue;
  53.465  		//if (trp->key && !match_pkr(vcpu,trp->key)) continue;
  53.466  		return trp;
  53.467  	}
  53.468 @@ -1457,9 +1484,9 @@ TR_ENTRY *match_tr(VCPU *vcpu, unsigned 
  53.469  {
  53.470  	TR_ENTRY *trp;
  53.471  
  53.472 -	trp = vcpu_match_tr_entry(vcpu,vcpu->vcpu_info->arch.dtrs,ifa,NDTRS);
  53.473 +	trp = vcpu_match_tr_entry(vcpu,vcpu->arch.dtrs,ifa,NDTRS);
  53.474  	if (trp) return trp;
  53.475 -	trp = vcpu_match_tr_entry(vcpu,vcpu->vcpu_info->arch.itrs,ifa,NITRS);
  53.476 +	trp = vcpu_match_tr_entry(vcpu,vcpu->arch.itrs,ifa,NITRS);
  53.477  	if (trp) return trp;
  53.478  	return 0;
  53.479  }
  53.480 @@ -1470,7 +1497,8 @@ IA64FAULT vcpu_itr_d(VCPU *vcpu, UINT64 
  53.481  	TR_ENTRY *trp;
  53.482  
  53.483  	if (slot >= NDTRS) return IA64_RSVDREG_FAULT;
  53.484 -	trp = &PSCB(vcpu,dtrs[slot]);
  53.485 +	trp = &PSCBX(vcpu,dtrs[slot]);
  53.486 +//printf("***** itr.d: setting slot %d: ifa=%p\n",slot,ifa);
  53.487  	vcpu_set_tr_entry(trp,pte,itir,ifa);
  53.488  	return IA64_NO_FAULT;
  53.489  }
  53.490 @@ -1481,7 +1509,8 @@ IA64FAULT vcpu_itr_i(VCPU *vcpu, UINT64 
  53.491  	TR_ENTRY *trp;
  53.492  
  53.493  	if (slot >= NITRS) return IA64_RSVDREG_FAULT;
  53.494 -	trp = &PSCB(vcpu,itrs[slot]);
  53.495 +	trp = &PSCBX(vcpu,itrs[slot]);
  53.496 +//printf("***** itr.i: setting slot %d: ifa=%p\n",slot,ifa);
  53.497  	vcpu_set_tr_entry(trp,pte,itir,ifa);
  53.498  	return IA64_NO_FAULT;
  53.499  }
  53.500 @@ -1524,12 +1553,12 @@ void vcpu_itc_no_srlz(VCPU *vcpu, UINT64
  53.501  #endif
  53.502  	if (IorD & 0x4) return;  // don't place in 1-entry TLB
  53.503  	if (IorD & 0x1) {
  53.504 -		vcpu_set_tr_entry(&PSCB(vcpu,itlb),pte,ps<<2,vaddr);
  53.505 -		PSCB(vcpu,itlb_pte) = mp_pte;
  53.506 +		vcpu_set_tr_entry(&PSCBX(vcpu,itlb),pte,ps<<2,vaddr);
  53.507 +		PSCBX(vcpu,itlb_pte) = mp_pte;
  53.508  	}
  53.509  	if (IorD & 0x2) {
  53.510 -		vcpu_set_tr_entry(&PSCB(vcpu,dtlb),pte,ps<<2,vaddr);
  53.511 -		PSCB(vcpu,dtlb_pte) = mp_pte;
  53.512 +		vcpu_set_tr_entry(&PSCBX(vcpu,dtlb),pte,ps<<2,vaddr);
  53.513 +		PSCBX(vcpu,dtlb_pte) = mp_pte;
  53.514  	}
  53.515  }
  53.516  
  53.517 @@ -1539,9 +1568,9 @@ unsigned long match_dtlb(VCPU *vcpu, uns
  53.518  {
  53.519  	TR_ENTRY *trp;
  53.520  
  53.521 -	if (trp = vcpu_match_tr_entry(vcpu,&vcpu->vcpu_info->arch.dtlb,ifa,1)) {
  53.522 +	if (trp = vcpu_match_tr_entry(vcpu,&vcpu->arch.dtlb,ifa,1)) {
  53.523  		if (ps) *ps = trp->ps;
  53.524 -		if (mp_pte) *mp_pte = vcpu->vcpu_info->arch.dtlb_pte;
  53.525 +		if (mp_pte) *mp_pte = vcpu->arch.dtlb_pte;
  53.526  		return (trp->page_flags);
  53.527  	}
  53.528  	return 0UL;
  53.529 @@ -1645,8 +1674,8 @@ IA64FAULT vcpu_ptc_e(VCPU *vcpu, UINT64 
  53.530  #endif
  53.531  	local_flush_tlb_all();
  53.532  	// just invalidate the "whole" tlb
  53.533 -	vcpu_purge_tr_entry(&PSCB(vcpu,dtlb));
  53.534 -	vcpu_purge_tr_entry(&PSCB(vcpu,itlb));
  53.535 +	vcpu_purge_tr_entry(&PSCBX(vcpu,dtlb));
  53.536 +	vcpu_purge_tr_entry(&PSCBX(vcpu,itlb));
  53.537  	return IA64_NO_FAULT;
  53.538  }
  53.539  
  53.540 @@ -1666,8 +1695,8 @@ IA64FAULT vcpu_ptc_ga(VCPU *vcpu,UINT64 
  53.541  	vhpt_flush_address(vadr,addr_range);
  53.542  #endif
  53.543  	ia64_global_tlb_purge(vadr,vadr+addr_range,PAGE_SHIFT);
  53.544 -	vcpu_purge_tr_entry(&PSCB(vcpu,dtlb));
  53.545 -	vcpu_purge_tr_entry(&PSCB(vcpu,itlb));
  53.546 +	vcpu_purge_tr_entry(&PSCBX(vcpu,dtlb));
  53.547 +	vcpu_purge_tr_entry(&PSCBX(vcpu,itlb));
  53.548  	return IA64_NO_FAULT;
  53.549  }
  53.550  
    54.1 --- a/xen/arch/ia64/xenmisc.c	Fri May 06 17:04:27 2005 +0000
    54.2 +++ b/xen/arch/ia64/xenmisc.c	Wed May 11 13:43:50 2005 +0000
    54.3 @@ -79,6 +79,14 @@ void raise_actimer_softirq(void)
    54.4  	raise_softirq(AC_TIMER_SOFTIRQ);
    54.5  }
    54.6  
    54.7 +unsigned long __hypercall_create_continuation(
    54.8 +	unsigned int op, unsigned int nr_args, ...)
    54.9 +{
   54.10 +	printf("__hypercall_create_continuation: not implemented!!!\n");
   54.11 +}
   54.12 +
   54.13 +///////////////////////////////
   54.14 +
   54.15  ///////////////////////////////
   54.16  // from arch/x86/apic.c
   54.17  ///////////////////////////////
   54.18 @@ -139,7 +147,7 @@ void free_page_type(struct pfn_info *pag
   54.19  void show_registers(struct pt_regs *regs)
   54.20  {
   54.21  	printf("*** ADD REGISTER DUMP HERE FOR DEBUGGING\n");
   54.22 -}	
   54.23 +}
   54.24  
   54.25  ///////////////////////////////
   54.26  // from common/keyhandler.c
   54.27 @@ -240,7 +248,7 @@ int id = ((struct exec_domain *)current)
   54.28  if (!cnt[id]--) { printk("%x",id); cnt[id] = 50; }
   54.29  if (!i--) { printk("+",id); cnt[id] = 100; }
   54.30  }
   54.31 -	clear_bit(EDF_RUNNING, &prev->ed_flags);
   54.32 +	clear_bit(EDF_RUNNING, &prev->flags);
   54.33  	//if (!is_idle_task(next->domain) )
   54.34  		//send_guest_virq(next, VIRQ_TIMER);
   54.35  	load_region_regs(current);
   54.36 @@ -270,7 +278,7 @@ loop:
   54.37  	printf(buf);
   54.38  	if (regs) show_registers(regs);
   54.39  	domain_pause_by_systemcontroller(current->domain);
   54.40 -	set_bit(DF_CRASHED, ed->domain->d_flags);
   54.41 +	set_bit(DF_CRASHED, ed->domain->flags);
   54.42  	if (ed->domain->id == 0) {
   54.43  		int i = 1000000000L;
   54.44  		// if domain0 crashes, just periodically print out panic
    55.1 --- a/xen/arch/ia64/xensetup.c	Fri May 06 17:04:27 2005 +0000
    55.2 +++ b/xen/arch/ia64/xensetup.c	Wed May 11 13:43:50 2005 +0000
    55.3 @@ -267,7 +267,7 @@ printk("About to call init_idle_task()\n
    55.4      if ( dom0 == NULL )
    55.5          panic("Error creating domain 0\n");
    55.6  
    55.7 -    set_bit(DF_PRIVILEGED, &dom0->d_flags);
    55.8 +    set_bit(DF_PRIVILEGED, &dom0->flags);
    55.9  
   55.10      /*
   55.11       * We're going to setup domain0 using the module(s) that we stashed safely
    56.1 --- a/xen/arch/ia64/xentime.c	Fri May 06 17:04:27 2005 +0000
    56.2 +++ b/xen/arch/ia64/xentime.c	Wed May 11 13:43:50 2005 +0000
    56.3 @@ -84,6 +84,17 @@ xen_timer_interrupt (int irq, void *dev_
    56.4  {
    56.5  	unsigned long new_itm;
    56.6  
    56.7 +#define HEARTBEAT_FREQ 16	// period in seconds
    56.8 +#ifdef HEARTBEAT_FREQ
    56.9 +	static long count = 0;
   56.10 +	if (!(++count & ((HEARTBEAT_FREQ*1024)-1))) {
   56.11 +		printf("Heartbeat... iip=%p,psr.i=%d,pend=%d\n",
   56.12 +			regs->cr_iip,
   56.13 +			current->vcpu_info->arch.interrupt_delivery_enabled,
   56.14 +			current->vcpu_info->arch.pending_interruption);
   56.15 +		count = 0;
   56.16 +	}
   56.17 +#endif
   56.18  #ifndef XEN
   56.19  	if (unlikely(cpu_is_offline(smp_processor_id()))) {
   56.20  		return IRQ_HANDLED;
    57.1 --- a/xen/arch/x86/Makefile	Fri May 06 17:04:27 2005 +0000
    57.2 +++ b/xen/arch/x86/Makefile	Wed May 11 13:43:50 2005 +0000
    57.3 @@ -3,6 +3,7 @@ include $(BASEDIR)/Rules.mk
    57.4  
    57.5  OBJS += $(patsubst %.S,%.o,$(wildcard $(TARGET_SUBARCH)/*.S))
    57.6  OBJS += $(patsubst %.c,%.o,$(wildcard $(TARGET_SUBARCH)/*.c))
    57.7 +OBJS += $(patsubst %.c,%.o,$(wildcard acpi/*.c))
    57.8  OBJS += $(patsubst %.c,%.o,$(wildcard mtrr/*.c))
    57.9  
   57.10  OBJS := $(subst $(TARGET_SUBARCH)/asm-offsets.o,,$(OBJS))
   57.11 @@ -34,6 +35,7 @@ clean:
   57.12  	rm -f x86_32/*.o x86_32/*~ x86_32/core
   57.13  	rm -f x86_64/*.o x86_64/*~ x86_64/core
   57.14  	rm -f mtrr/*.o mtrr/*~ mtrr/core
   57.15 +	rm -f acpi/*.o acpi/*~ acpi/core
   57.16  
   57.17  delete-unfresh-files:
   57.18  	# nothing
    58.1 --- a/xen/arch/x86/Rules.mk	Fri May 06 17:04:27 2005 +0000
    58.2 +++ b/xen/arch/x86/Rules.mk	Wed May 11 13:43:50 2005 +0000
    58.3 @@ -2,8 +2,8 @@
    58.4  # x86-specific definitions
    58.5  
    58.6  CFLAGS  += -nostdinc -fno-builtin -fno-common -fno-strict-aliasing
    58.7 -CFLAGS  += -iwithprefix include -Wall -Werror -pipe
    58.8 -CFLAGS  += -I$(BASEDIR)/include -Wno-pointer-arith -Wredundant-decls
    58.9 +CFLAGS  += -iwithprefix include -Wall -Werror -Wno-pointer-arith -pipe
   58.10 +CFLAGS  += -I$(BASEDIR)/include -I$(BASEDIR)/include/asm-x86/mach-default
   58.11  
   58.12  ifeq ($(optimize),y)
   58.13  CFLAGS  += -O3 -fomit-frame-pointer
    59.1 --- a/xen/arch/x86/acpi.c	Fri May 06 17:04:27 2005 +0000
    59.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    59.3 @@ -1,729 +0,0 @@
    59.4 -/*
    59.5 - *  acpi.c - Architecture-Specific Low-Level ACPI Support
    59.6 - *
    59.7 - *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
    59.8 - *  Copyright (C) 2001 Jun Nakajima <jun.nakajima@intel.com>
    59.9 - *  Copyright (C) 2001 Patrick Mochel <mochel@osdl.org>
   59.10 - *
   59.11 - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   59.12 - *
   59.13 - *  This program is free software; you can redistribute it and/or modify
   59.14 - *  it under the terms of the GNU General Public License as published by
   59.15 - *  the Free Software Foundation; either version 2 of the License, or
   59.16 - *  (at your option) any later version.
   59.17 - *
   59.18 - *  This program is distributed in the hope that it will be useful,
   59.19 - *  but WITHOUT ANY WARRANTY; without even the implied warranty of
   59.20 - *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   59.21 - *  GNU General Public License for more details.
   59.22 - *
   59.23 - *  You should have received a copy of the GNU General Public License
   59.24 - *  along with this program; if not, write to the Free Software
   59.25 - *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   59.26 - *
   59.27 - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   59.28 - */
   59.29 -
   59.30 -#include <xen/config.h>
   59.31 -#include <xen/kernel.h>
   59.32 -#include <xen/init.h>
   59.33 -#include <xen/types.h>
   59.34 -#include <xen/slab.h>
   59.35 -#include <xen/pci.h>
   59.36 -#include <xen/irq.h>
   59.37 -#include <xen/acpi.h>
   59.38 -#include <asm/mpspec.h>
   59.39 -#include <asm/io.h>
   59.40 -#include <asm/apic.h>
   59.41 -#include <asm/apicdef.h>
   59.42 -#include <asm/page.h>
   59.43 -#include <asm/io_apic.h>
   59.44 -#include <asm/acpi.h>
   59.45 -#include <asm/smpboot.h>
   59.46 -
   59.47 -
   59.48 -#define PREFIX			"ACPI: "
   59.49 -
   59.50 -int acpi_lapic;
   59.51 -int acpi_ioapic;
   59.52 -int acpi_strict;
   59.53 -
   59.54 -acpi_interrupt_flags acpi_sci_flags __initdata;
   59.55 -int acpi_sci_override_gsi __initdata;
   59.56 -/* --------------------------------------------------------------------------
   59.57 -                              Boot-time Configuration
   59.58 -   -------------------------------------------------------------------------- */
   59.59 -
   59.60 -int acpi_noirq __initdata = 0;  /* skip ACPI IRQ initialization */
   59.61 -int acpi_ht __initdata = 1;     /* enable HT */
   59.62 -
   59.63 -enum acpi_irq_model_id		acpi_irq_model;
   59.64 -
   59.65 -
   59.66 -/*
   59.67 - * Temporarily use the virtual area starting from FIX_IO_APIC_BASE_END,
   59.68 - * to map the target physical address. The problem is that set_fixmap()
   59.69 - * provides a single page, and it is possible that the page is not
   59.70 - * sufficient.
   59.71 - * By using this area, we can map up to MAX_IO_APICS pages temporarily,
   59.72 - * i.e. until the next __va_range() call.
   59.73 - *
   59.74 - * Important Safety Note:  The fixed I/O APIC page numbers are *subtracted*
   59.75 - * from the fixed base.  That's why we start at FIX_IO_APIC_BASE_END and
   59.76 - * count idx down while incrementing the phys address.
   59.77 - */
   59.78 -char *__acpi_map_table(unsigned long phys, unsigned long size)
   59.79 -{
   59.80 -	unsigned long base, offset, mapped_size;
   59.81 -	int idx;
   59.82 -
   59.83 -	if (phys + size < 8*1024*1024) 
   59.84 -		return __va(phys); 
   59.85 -
   59.86 -	offset = phys & (PAGE_SIZE - 1);
   59.87 -	mapped_size = PAGE_SIZE - offset;
   59.88 -	set_fixmap(FIX_ACPI_END, phys);
   59.89 -	base = fix_to_virt(FIX_ACPI_END);
   59.90 -
   59.91 -	/*
   59.92 -	 * Most cases can be covered by the below.
   59.93 -	 */
   59.94 -	idx = FIX_ACPI_END;
   59.95 -	while (mapped_size < size) {
   59.96 -		if (--idx < FIX_ACPI_BEGIN)
   59.97 -			return 0;	/* cannot handle this */
   59.98 -		phys += PAGE_SIZE;
   59.99 -		set_fixmap(idx, phys);
  59.100 -		mapped_size += PAGE_SIZE;
  59.101 -	}
  59.102 -
  59.103 -	return ((char *) base + offset);
  59.104 -}
  59.105 -
  59.106 -
  59.107 -#ifdef CONFIG_X86_LOCAL_APIC
  59.108 -
  59.109 -static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
  59.110 -
  59.111 -
  59.112 -static int __init
  59.113 -acpi_parse_madt (
  59.114 -	unsigned long		phys_addr,
  59.115 -	unsigned long		size)
  59.116 -{
  59.117 -	struct acpi_table_madt	*madt = NULL;
  59.118 -
  59.119 -	if (!phys_addr || !size)
  59.120 -		return -EINVAL;
  59.121 -
  59.122 -	madt = (struct acpi_table_madt *) __acpi_map_table(phys_addr, size);
  59.123 -	if (!madt) {
  59.124 -		printk(KERN_WARNING PREFIX "Unable to map MADT\n");
  59.125 -		return -ENODEV;
  59.126 -	}
  59.127 -
  59.128 -	if (madt->lapic_address)
  59.129 -		acpi_lapic_addr = (u64) madt->lapic_address;
  59.130 -
  59.131 -	printk(KERN_INFO PREFIX "Local APIC address 0x%08x\n",
  59.132 -		madt->lapic_address);
  59.133 -
  59.134 -	detect_clustered_apic(madt->header.oem_id, madt->header.oem_table_id);
  59.135 -
  59.136 -	return 0;
  59.137 -}
  59.138 -
  59.139 -
  59.140 -static int __init
  59.141 -acpi_parse_lapic (
  59.142 -	acpi_table_entry_header *header)
  59.143 -{
  59.144 -	struct acpi_table_lapic	*processor = NULL;
  59.145 -
  59.146 -	processor = (struct acpi_table_lapic*) header;
  59.147 -	if (!processor)
  59.148 -		return -EINVAL;
  59.149 -
  59.150 -	acpi_table_print_madt_entry(header);
  59.151 -
  59.152 -	mp_register_lapic (
  59.153 -		processor->id,					   /* APIC ID */
  59.154 -		processor->flags.enabled);			  /* Enabled? */
  59.155 -
  59.156 -	return 0;
  59.157 -}
  59.158 -
  59.159 -
  59.160 -static int __init
  59.161 -acpi_parse_lapic_addr_ovr (
  59.162 -	acpi_table_entry_header *header)
  59.163 -{
  59.164 -	struct acpi_table_lapic_addr_ovr *lapic_addr_ovr = NULL;
  59.165 -
  59.166 -	lapic_addr_ovr = (struct acpi_table_lapic_addr_ovr*) header;
  59.167 -	if (!lapic_addr_ovr)
  59.168 -		return -EINVAL;
  59.169 -
  59.170 -	acpi_lapic_addr = lapic_addr_ovr->address;
  59.171 -
  59.172 -	return 0;
  59.173 -}
  59.174 -
  59.175 -static int __init
  59.176 -acpi_parse_lapic_nmi (
  59.177 -	acpi_table_entry_header *header)
  59.178 -{
  59.179 -	struct acpi_table_lapic_nmi *lapic_nmi = NULL;
  59.180 -
  59.181 -	lapic_nmi = (struct acpi_table_lapic_nmi*) header;
  59.182 -	if (!lapic_nmi)
  59.183 -		return -EINVAL;
  59.184 -
  59.185 -	acpi_table_print_madt_entry(header);
  59.186 -
  59.187 -	if (lapic_nmi->lint != 1)
  59.188 -		printk(KERN_WARNING PREFIX "NMI not connected to LINT 1!\n");
  59.189 -
  59.190 -	return 0;
  59.191 -}
  59.192 -
  59.193 -#endif /*CONFIG_X86_LOCAL_APIC*/
  59.194 -
  59.195 -#if defined(CONFIG_X86_IO_APIC) /*&& defined(CONFIG_ACPI_INTERPRETER)*/
  59.196 -
  59.197 -static int __init
  59.198 -acpi_parse_ioapic (
  59.199 -	acpi_table_entry_header *header)
  59.200 -{
  59.201 -	struct acpi_table_ioapic *ioapic = NULL;
  59.202 -
  59.203 -	ioapic = (struct acpi_table_ioapic*) header;
  59.204 -	if (!ioapic)
  59.205 -		return -EINVAL;
  59.206 - 
  59.207 -	acpi_table_print_madt_entry(header);
  59.208 -
  59.209 -	mp_register_ioapic (
  59.210 -		ioapic->id,
  59.211 -		ioapic->address,
  59.212 -		ioapic->global_irq_base);
  59.213 - 
  59.214 -	return 0;
  59.215 -}
  59.216 -
  59.217 -#ifdef CONFIG_ACPI_INTERPRETER
  59.218 -/*
  59.219 - * Parse Interrupt Source Override for the ACPI SCI
  59.220 - */
  59.221 -static void
  59.222 -acpi_sci_ioapic_setup(u32 gsi, u16 polarity, u16 trigger)
  59.223 -{
  59.224 -	if (trigger == 0)	/* compatible SCI trigger is level */
  59.225 -		trigger = 3;
  59.226 -
  59.227 -	if (polarity == 0)	/* compatible SCI polarity is low */
  59.228 -		polarity = 3;
  59.229 -
  59.230 -	/* Command-line over-ride via acpi_sci= */
  59.231 -	if (acpi_sci_flags.trigger)
  59.232 -		trigger = acpi_sci_flags.trigger;
  59.233 -
  59.234 -	if (acpi_sci_flags.polarity)
  59.235 -		polarity = acpi_sci_flags.polarity;
  59.236 -
  59.237 -	/*
  59.238 - 	 * mp_config_acpi_legacy_irqs() already setup IRQs < 16
  59.239 -	 * If GSI is < 16, this will update its flags,
  59.240 -	 * else it will create a new mp_irqs[] entry.
  59.241 -	 */
  59.242 -	mp_override_legacy_irq(gsi, polarity, trigger, gsi);
  59.243 -
  59.244 -	/*
  59.245 -	 * stash over-ride to indicate we've been here
  59.246 -	 * and for later update of acpi_fadt
  59.247 -	 */
  59.248 -	acpi_sci_override_gsi = gsi;
  59.249 -	return;
  59.250 -}
  59.251 -#endif
  59.252 -
  59.253 -static int __init
  59.254 -acpi_parse_fadt(unsigned long phys, unsigned long size)
  59.255 -{
  59.256 -        struct fadt_descriptor_rev2 *fadt =0;
  59.257 -
  59.258 -        fadt = (struct fadt_descriptor_rev2*) __acpi_map_table(phys,size);
  59.259 -        if (!fadt) {
  59.260 -                printk(KERN_WARNING PREFIX "Unable to map FADT\n");
  59.261 -                return 0;
  59.262 -        }
  59.263 -
  59.264 -#ifdef  CONFIG_ACPI_INTERPRETER
  59.265 -        /* initialize sci_int early for INT_SRC_OVR MADT parsing */
  59.266 -        acpi_fadt.sci_int = fadt->sci_int;
  59.267 -#endif
  59.268 -
  59.269 -        return 0;
  59.270 -}
  59.271 -
  59.272 -
  59.273 -static int __init
  59.274 -acpi_parse_int_src_ovr (
  59.275 -	acpi_table_entry_header *header)
  59.276 -{
  59.277 -	struct acpi_table_int_src_ovr *intsrc = NULL;
  59.278 -
  59.279 -	intsrc = (struct acpi_table_int_src_ovr*) header;
  59.280 -	if (!intsrc)
  59.281 -		return -EINVAL;
  59.282 -
  59.283 -	acpi_table_print_madt_entry(header);
  59.284 -
  59.285 -#ifdef CONFIG_ACPI_INTERPRETER
  59.286 -	if (intsrc->bus_irq == acpi_fadt.sci_int) {
  59.287 -		acpi_sci_ioapic_setup(intsrc->global_irq,
  59.288 -			intsrc->flags.polarity, intsrc->flags.trigger);
  59.289 -		return 0;
  59.290 -	}
  59.291 -#endif
  59.292 -
  59.293 -	mp_override_legacy_irq (
  59.294 -		intsrc->bus_irq,
  59.295 -		intsrc->flags.polarity,
  59.296 -		intsrc->flags.trigger,
  59.297 -		intsrc->global_irq);
  59.298 -
  59.299 -	return 0;
  59.300 -}
  59.301 -
  59.302 -
  59.303 -static int __init
  59.304 -acpi_parse_nmi_src (
  59.305 -	acpi_table_entry_header *header)
  59.306 -{
  59.307 -	struct acpi_table_nmi_src *nmi_src = NULL;
  59.308 -
  59.309 -	nmi_src = (struct acpi_table_nmi_src*) header;
  59.310 -	if (!nmi_src)
  59.311 -		return -EINVAL;
  59.312 -
  59.313 -	acpi_table_print_madt_entry(header);
  59.314 -
  59.315 -	/* TBD: Support nimsrc entries? */
  59.316 -
  59.317 -	return 0;
  59.318 -}
  59.319 -
  59.320 -#endif /*CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER*/
  59.321 -
  59.322 -
  59.323 -static unsigned long __init
  59.324 -acpi_scan_rsdp (
  59.325 -	unsigned long		start,
  59.326 -	unsigned long		length)
  59.327 -{
  59.328 -	unsigned long		offset = 0;
  59.329 -	unsigned long		sig_len = sizeof("RSD PTR ") - 1;
  59.330 -
  59.331 -	/*
  59.332 -	 * Scan all 16-byte boundaries of the physical memory region for the
  59.333 -	 * RSDP signature.
  59.334 -	 */
  59.335 -	for (offset = 0; offset < length; offset += 16) {
  59.336 -		if (strncmp((char *) (start + offset), "RSD PTR ", sig_len))
  59.337 -			continue;
  59.338 -		return (start + offset);
  59.339 -	}
  59.340 -
  59.341 -	return 0;
  59.342 -}
  59.343 -
  59.344 -
  59.345 -unsigned long __init
  59.346 -acpi_find_rsdp (void)
  59.347 -{
  59.348 -	unsigned long		rsdp_phys = 0;
  59.349 -
  59.350 -	/*
  59.351 -	 * Scan memory looking for the RSDP signature. First search EBDA (low
  59.352 -	 * memory) paragraphs and then search upper memory (E0000-FFFFF).
  59.353 -	 */
  59.354 -	rsdp_phys = acpi_scan_rsdp (0, 0x400);
  59.355 -	if (!rsdp_phys)
  59.356 -		rsdp_phys = acpi_scan_rsdp (0xE0000, 0x20000);
  59.357 -
  59.358 -	return rsdp_phys;
  59.359 -}
  59.360 -
  59.361 -
  59.362 -/*
  59.363 - * acpi_boot_init()
  59.364 - *  called from setup_arch(), always.
  59.365 - *	1. maps ACPI tables for later use
  59.366 - *	2. enumerates lapics
  59.367 - *	3. enumerates io-apics
  59.368 - *
  59.369 - * side effects:
  59.370 - * 	acpi_lapic = 1 if LAPIC found
  59.371 - *	acpi_ioapic = 1 if IOAPIC found
  59.372 - *	if (acpi_lapic && acpi_ioapic) smp_found_config = 1;
  59.373 - *	if acpi_blacklisted() disable_acpi()
  59.374 - *	acpi_irq_model=...
  59.375 - *	...
  59.376 - *
  59.377 - * return value: (currently ignored)
  59.378 - *	0: success
  59.379 - *	!0: failure
  59.380 - */
  59.381 -int __init
  59.382 -acpi_boot_init (void)
  59.383 -{
  59.384 -	int			result = 0;
  59.385 -
  59.386 -	if (acpi_disabled && !acpi_ht)
  59.387 -		return(1);
  59.388 -
  59.389 -	/*
  59.390 -	 * The default interrupt routing model is PIC (8259).  This gets
  59.391 -	 * overriden if IOAPICs are enumerated (below).
  59.392 -	 */
  59.393 -	acpi_irq_model = ACPI_IRQ_MODEL_PIC;
  59.394 -
  59.395 -	/* 
  59.396 -	 * Initialize the ACPI boot-time table parser.
  59.397 -	 */
  59.398 -	result = acpi_table_init();
  59.399 -	if (result) {
  59.400 -		disable_acpi();
  59.401 -		return result;
  59.402 -	}
  59.403 -
  59.404 -	result = acpi_blacklisted();
  59.405 -	if (result) {
  59.406 -		printk(KERN_NOTICE PREFIX "BIOS listed in blacklist, disabling ACPI support\n");
  59.407 -		disable_acpi();
  59.408 -		return result;
  59.409 -	}
  59.410 -
  59.411 -#ifdef CONFIG_X86_LOCAL_APIC
  59.412 -
  59.413 -	/* 
  59.414 -	 * MADT
  59.415 -	 * ----
  59.416 -	 * Parse the Multiple APIC Description Table (MADT), if exists.
  59.417 -	 * Note that this table provides platform SMP configuration 
  59.418 -	 * information -- the successor to MPS tables.
  59.419 -	 */
  59.420 -
  59.421 -	result = acpi_table_parse(ACPI_APIC, acpi_parse_madt);
  59.422 -	if (!result) {
  59.423 -		return 0;
  59.424 -	}
  59.425 -	else if (result < 0) {
  59.426 -		printk(KERN_ERR PREFIX "Error parsing MADT\n");
  59.427 -		return result;
  59.428 -	}
  59.429 -	else if (result > 1) 
  59.430 -		printk(KERN_WARNING PREFIX "Multiple MADT tables exist\n");
  59.431 -
  59.432 -	/* 
  59.433 -	 * Local APIC
  59.434 -	 * ----------
  59.435 -	 * Note that the LAPIC address is obtained from the MADT (32-bit value)
  59.436 -	 * and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value).
  59.437 -	 */
  59.438 -
  59.439 -	result = acpi_table_parse_madt(ACPI_MADT_LAPIC_ADDR_OVR, acpi_parse_lapic_addr_ovr);
  59.440 -	if (result < 0) {
  59.441 -		printk(KERN_ERR PREFIX "Error parsing LAPIC address override entry\n");
  59.442 -		return result;
  59.443 -	}
  59.444 -
  59.445 -	mp_register_lapic_address(acpi_lapic_addr);
  59.446 -
  59.447 -	result = acpi_table_parse_madt(ACPI_MADT_LAPIC, acpi_parse_lapic);
  59.448 -	if (!result) { 
  59.449 -		printk(KERN_ERR PREFIX "No LAPIC entries present\n");
  59.450 -		/* TBD: Cleanup to allow fallback to MPS */
  59.451 -		return -ENODEV;
  59.452 -	}
  59.453 -	else if (result < 0) {
  59.454 -		printk(KERN_ERR PREFIX "Error parsing LAPIC entry\n");
  59.455 -		/* TBD: Cleanup to allow fallback to MPS */
  59.456 -		return result;
  59.457 -	}
  59.458 -
  59.459 -	result = acpi_table_parse_madt(ACPI_MADT_LAPIC_NMI, acpi_parse_lapic_nmi);
  59.460 -	if (result < 0) {
  59.461 -		printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n");
  59.462 -		/* TBD: Cleanup to allow fallback to MPS */
  59.463 -		return result;
  59.464 -	}
  59.465 -
  59.466 -	acpi_lapic = 1;
  59.467 -
  59.468 -#endif /*CONFIG_X86_LOCAL_APIC*/
  59.469 -
  59.470 -#if defined(CONFIG_X86_IO_APIC) /*&& defined(CONFIG_ACPI_INTERPRETER)*/
  59.471 -
  59.472 -	/* 
  59.473 -	 * I/O APIC 
  59.474 -	 * --------
  59.475 -	 */
  59.476 -
  59.477 -#if 0
  59.478 -	/*
  59.479 -	 * ACPI interpreter is required to complete interrupt setup,
  59.480 -	 * so if it is off, don't enumerate the io-apics with ACPI.
  59.481 -	 * If MPS is present, it will handle them,
  59.482 -	 * otherwise the system will stay in PIC mode
  59.483 -	 */
  59.484 -	if (acpi_disabled || acpi_noirq) {
  59.485 -		return 1;
  59.486 -	}
  59.487 -#endif
  59.488 -
  59.489 -	/*
  59.490 -	 * if "noapic" boot option, don't look for IO-APICs
  59.491 -	 */
  59.492 -	if (ioapic_setup_disabled()) {
  59.493 -		printk(KERN_INFO PREFIX "Skipping IOAPIC probe "
  59.494 -			"due to 'noapic' option.\n");
  59.495 -		return 1;
  59.496 -        }
  59.497 -
  59.498 -
  59.499 -	result = acpi_table_parse_madt(ACPI_MADT_IOAPIC, acpi_parse_ioapic);
  59.500 -	if (!result) { 
  59.501 -		printk(KERN_ERR PREFIX "No IOAPIC entries present\n");
  59.502 -		return -ENODEV;
  59.503 -	}
  59.504 -	else if (result < 0) {
  59.505 -		printk(KERN_ERR PREFIX "Error parsing IOAPIC entry\n");
  59.506 -		return result;
  59.507 -	}
  59.508 -
  59.509 -	/* Build a default routing table for legacy (ISA) interrupts. */
  59.510 -	mp_config_acpi_legacy_irqs();
  59.511 -
  59.512 -	/* Record sci_int for use when looking for MADT sci_int override */
  59.513 -	acpi_table_parse(ACPI_FADT, acpi_parse_fadt);
  59.514 -
  59.515 -	result = acpi_table_parse_madt(ACPI_MADT_INT_SRC_OVR, acpi_parse_int_src_ovr);
  59.516 -	if (result < 0) {
  59.517 -		printk(KERN_ERR PREFIX "Error parsing interrupt source overrides entry\n");
  59.518 -		/* TBD: Cleanup to allow fallback to MPS */
  59.519 -		return result;
  59.520 -	}
  59.521 -
  59.522 -#ifdef CONFIG_ACPI_INTERPRETER
  59.523 -	/*
  59.524 -	 * If BIOS did not supply an INT_SRC_OVR for the SCI
  59.525 -	 * pretend we got one so we can set the SCI flags.
  59.526 -	 */
  59.527 -	if (!acpi_sci_override_gsi)
  59.528 -		acpi_sci_ioapic_setup(acpi_fadt.sci_int, 0, 0);
  59.529 -#endif
  59.530 -
  59.531 -	result = acpi_table_parse_madt(ACPI_MADT_NMI_SRC, acpi_parse_nmi_src);
  59.532 -	if (result < 0) {
  59.533 -		printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n");
  59.534 -		/* TBD: Cleanup to allow fallback to MPS */
  59.535 -		return result;
  59.536 -	}
  59.537 -
  59.538 -	acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC;
  59.539 -
  59.540 -	acpi_irq_balance_set(NULL);
  59.541 -
  59.542 -	acpi_ioapic = 1;
  59.543 -
  59.544 -	if (acpi_lapic && acpi_ioapic)
  59.545 -		smp_found_config = 1;
  59.546 -
  59.547 -#endif /*CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER*/
  59.548 -
  59.549 -	return 0;
  59.550 -}
  59.551 -
  59.552 -
  59.553 -#ifdef	CONFIG_ACPI_BUS
  59.554 -/*
  59.555 - * acpi_pic_sci_set_trigger()
  59.556 - *
  59.557 - * use ELCR to set PIC-mode trigger type for SCI
  59.558 - *
  59.559 - * If a PIC-mode SCI is not recognized or gives spurious IRQ7's
  59.560 - * it may require Edge Trigger -- use "acpi_sci=edge"
  59.561 - *
  59.562 - * Port 0x4d0-4d1 are ECLR1 and ECLR2, the Edge/Level Control Registers
  59.563 - * for the 8259 PIC.  bit[n] = 1 means irq[n] is Level, otherwise Edge.
  59.564 - * ECLR1 is IRQ's 0-7 (IRQ 0, 1, 2 must be 0)
  59.565 - * ECLR2 is IRQ's 8-15 (IRQ 8, 13 must be 0)
  59.566 - */
  59.567 -
  59.568 -void __init
  59.569 -acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger)
  59.570 -{
  59.571 -	unsigned char mask = 1 << (irq & 7);
  59.572 -	unsigned int port = 0x4d0 + (irq >> 3);
  59.573 -	unsigned char val = inb(port);
  59.574 -
  59.575 -
  59.576 -	printk(PREFIX "IRQ%d SCI:", irq);
  59.577 -	if (!(val & mask)) {
  59.578 -		printk(" Edge");
  59.579 -
  59.580 -		if (trigger == 3) {
  59.581 -			printk(" set to Level");
  59.582 -			outb(val | mask, port);
  59.583 -		}
  59.584 -	} else {
  59.585 -		printk(" Level");
  59.586 -
  59.587 -		if (trigger == 1) {
  59.588 -			printk(" set to Edge");
  59.589 -			outb(val & ~mask, port);
  59.590 -		}
  59.591 -	}
  59.592 -	printk(" Trigger.\n");
  59.593 -}
  59.594 -
  59.595 -#endif /* CONFIG_ACPI_BUS */
  59.596 -
  59.597 -
  59.598 -/* --------------------------------------------------------------------------
  59.599 -                              Low-Level Sleep Support
  59.600 -   -------------------------------------------------------------------------- */
  59.601 -
  59.602 -#ifdef CONFIG_ACPI_SLEEP
  59.603 -
  59.604 -#define DEBUG
  59.605 -
  59.606 -#ifdef DEBUG
  59.607 -#include <xen/serial.h>
  59.608 -#endif
  59.609 -
  59.610 -/* address in low memory of the wakeup routine. */
  59.611 -unsigned long acpi_wakeup_address = 0;
  59.612 -
  59.613 -/* new page directory that we will be using */
  59.614 -static pmd_t *pmd;
  59.615 -
  59.616 -/* saved page directory */
  59.617 -static pmd_t saved_pmd;
  59.618 -
  59.619 -/* page which we'll use for the new page directory */
  59.620 -static pte_t *ptep;
  59.621 -
  59.622 -extern unsigned long FASTCALL(acpi_copy_wakeup_routine(unsigned long));
  59.623 -
  59.624 -/*
  59.625 - * acpi_create_identity_pmd
  59.626 - *
  59.627 - * Create a new, identity mapped pmd.
  59.628 - *
  59.629 - * Do this by creating new page directory, and marking all the pages as R/W
  59.630 - * Then set it as the new Page Middle Directory.
  59.631 - * And, of course, flush the TLB so it takes effect.
  59.632 - *
  59.633 - * We save the address of the old one, for later restoration.
  59.634 - */
  59.635 -static void acpi_create_identity_pmd (void)
  59.636 -{
  59.637 -	pgd_t *pgd;
  59.638 -	int i;
  59.639 -
  59.640 -	ptep = (pte_t*)__get_free_page(GFP_KERNEL);
  59.641 -
  59.642 -	/* fill page with low mapping */
  59.643 -	for (i = 0; i < PTRS_PER_PTE; i++)
  59.644 -		set_pte(ptep + i, mk_pte_phys(i << PAGE_SHIFT, PAGE_SHARED));
  59.645 -
  59.646 -	pgd = pgd_offset(current->active_mm, 0);
  59.647 -	pmd = pmd_alloc(current->mm,pgd, 0);
  59.648 -
  59.649 -	/* save the old pmd */
  59.650 -	saved_pmd = *pmd;
  59.651 -
  59.652 -	/* set the new one */
  59.653 -	set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(ptep)));
  59.654 -
  59.655 -	/* flush the TLB */
  59.656 -	local_flush_tlb();
  59.657 -}
  59.658 -
  59.659 -/*
  59.660 - * acpi_restore_pmd
  59.661 - *
  59.662 - * Restore the old pmd saved by acpi_create_identity_pmd and
  59.663 - * free the page that said function alloc'd
  59.664 - */
  59.665 -static void acpi_restore_pmd (void)
  59.666 -{
  59.667 -	set_pmd(pmd, saved_pmd);
  59.668 -	local_flush_tlb();
  59.669 -	free_page((unsigned long)ptep);
  59.670 -}
  59.671 -
  59.672 -/**
  59.673 - * acpi_save_state_mem - save kernel state
  59.674 - *
  59.675 - * Create an identity mapped page table and copy the wakeup routine to
  59.676 - * low memory.
  59.677 - */
  59.678 -int acpi_save_state_mem (void)
  59.679 -{
  59.680 -	acpi_create_identity_pmd();
  59.681 -	acpi_copy_wakeup_routine(acpi_wakeup_address);
  59.682 -
  59.683 -	return 0;
  59.684 -}
  59.685 -
  59.686 -/**
  59.687 - * acpi_save_state_disk - save kernel state to disk
  59.688 - *
  59.689 - */
  59.690 -int acpi_save_state_disk (void)
  59.691 -{
  59.692 -	return 1;
  59.693 -}
  59.694 -
  59.695 -/*
  59.696 - * acpi_restore_state
  59.697 - */
  59.698 -void acpi_restore_state_mem (void)
  59.699 -{
  59.700 -	acpi_restore_pmd();
  59.701 -}
  59.702 -
  59.703 -/**
  59.704 - * acpi_reserve_bootmem - do _very_ early ACPI initialisation
  59.705 - *
  59.706 - * We allocate a page in low memory for the wakeup
  59.707 - * routine for when we come back from a sleep state. The
  59.708 - * runtime allocator allows specification of <16M pages, but not
  59.709 - * <1M pages.
  59.710 - */
  59.711 -void __init acpi_reserve_bootmem(void)
  59.712 -{
  59.713 -	acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE);
  59.714 -	if (!acpi_wakeup_address)
  59.715 -		printk(KERN_ERR "ACPI: Cannot allocate lowmem, S3 disabled.\n");
  59.716 -}
  59.717 -
  59.718 -void do_suspend_lowlevel_s4bios(int resume)
  59.719 -{
  59.720 -	if (!resume) {
  59.721 -		save_processor_context();
  59.722 -		acpi_save_register_state((unsigned long)&&acpi_sleep_done);
  59.723 -		acpi_enter_sleep_state_s4bios();
  59.724 -		return;
  59.725 -	}
  59.726 -acpi_sleep_done:
  59.727 -	restore_processor_context();
  59.728 -}
  59.729 -
  59.730 -
  59.731 -#endif /*CONFIG_ACPI_SLEEP*/
  59.732 -
    60.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    60.2 +++ b/xen/arch/x86/acpi/boot.c	Wed May 11 13:43:50 2005 +0000
    60.3 @@ -0,0 +1,915 @@
    60.4 +/*
    60.5 + *  boot.c - Architecture-Specific Low-Level ACPI Boot Support
    60.6 + *
    60.7 + *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
    60.8 + *  Copyright (C) 2001 Jun Nakajima <jun.nakajima@intel.com>
    60.9 + *
   60.10 + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   60.11 + *
   60.12 + *  This program is free software; you can redistribute it and/or modify
   60.13 + *  it under the terms of the GNU General Public License as published by
   60.14 + *  the Free Software Foundation; either version 2 of the License, or
   60.15 + *  (at your option) any later version.
   60.16 + *
   60.17 + *  This program is distributed in the hope that it will be useful,
   60.18 + *  but WITHOUT ANY WARRANTY; without even the implied warranty of
   60.19 + *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   60.20 + *  GNU General Public License for more details.
   60.21 + *
   60.22 + *  You should have received a copy of the GNU General Public License
   60.23 + *  along with this program; if not, write to the Free Software
   60.24 + *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   60.25 + *
   60.26 + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   60.27 + */
   60.28 +
   60.29 +#include <xen/config.h>
   60.30 +#include <xen/errno.h>
   60.31 +#include <xen/init.h>
   60.32 +#include <xen/acpi.h>
   60.33 +#include <xen/irq.h>
   60.34 +#include <xen/sched.h>
   60.35 +#include <asm/page.h>
   60.36 +#include <asm/apic.h>
   60.37 +#include <asm/io_apic.h>
   60.38 +#include <asm/apic.h>
   60.39 +#include <asm/io.h>
   60.40 +#include <asm/irq.h>
   60.41 +#include <asm/mpspec.h>
   60.42 +
   60.43 +int sbf_port;
   60.44 +#define end_pfn_map max_page
   60.45 +#define CONFIG_ACPI_PCI
   60.46 +
   60.47 +#ifdef	CONFIG_X86_64
   60.48 +
   60.49 +static inline void  acpi_madt_oem_check(char *oem_id, char *oem_table_id) { }
   60.50 +extern void __init clustered_apic_check(void);
   60.51 +static inline int ioapic_setup_disabled(void) { return 0; }
   60.52 +
   60.53 +#else	/* X86 */
   60.54 +
   60.55 +#ifdef	CONFIG_X86_LOCAL_APIC
   60.56 +#include <mach_apic.h>
   60.57 +#include <mach_mpparse.h>
   60.58 +#endif	/* CONFIG_X86_LOCAL_APIC */
   60.59 +
   60.60 +#endif	/* X86 */
   60.61 +
   60.62 +#define BAD_MADT_ENTRY(entry, end) (					    \
   60.63 +		(!entry) || (unsigned long)entry + sizeof(*entry) > end ||  \
   60.64 +		((acpi_table_entry_header *)entry)->length != sizeof(*entry))
   60.65 +
   60.66 +#define PREFIX			"ACPI: "
   60.67 +
   60.68 +#ifdef CONFIG_ACPI_PCI
   60.69 +int acpi_noirq __initdata;	/* skip ACPI IRQ initialization */
   60.70 +int acpi_pci_disabled __initdata; /* skip ACPI PCI scan and IRQ initialization */
   60.71 +#else
   60.72 +int acpi_noirq __initdata = 1;
   60.73 +int acpi_pci_disabled __initdata = 1;
   60.74 +#endif
   60.75 +int acpi_ht __initdata = 1;	/* enable HT */
   60.76 +
   60.77 +int acpi_lapic;
   60.78 +int acpi_ioapic;
   60.79 +int acpi_strict;
   60.80 +EXPORT_SYMBOL(acpi_strict);
   60.81 +
   60.82 +acpi_interrupt_flags acpi_sci_flags __initdata;
   60.83 +int acpi_sci_override_gsi __initdata;
   60.84 +int acpi_skip_timer_override __initdata;
   60.85 +
   60.86 +#ifdef CONFIG_X86_LOCAL_APIC
   60.87 +static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
   60.88 +#endif
   60.89 +
   60.90 +#ifndef __HAVE_ARCH_CMPXCHG
   60.91 +#warning ACPI uses CMPXCHG, i486 and later hardware
   60.92 +#endif
   60.93 +
   60.94 +#define MAX_MADT_ENTRIES	256
   60.95 +u8 x86_acpiid_to_apicid[MAX_MADT_ENTRIES] =
   60.96 +			{ [0 ... MAX_MADT_ENTRIES-1] = 0xff };
   60.97 +EXPORT_SYMBOL(x86_acpiid_to_apicid);
   60.98 +
   60.99 +/* --------------------------------------------------------------------------
  60.100 +                              Boot-time Configuration
  60.101 +   -------------------------------------------------------------------------- */
  60.102 +
  60.103 +/*
  60.104 + * The default interrupt routing model is PIC (8259).  This gets
  60.105 + * overriden if IOAPICs are enumerated (below).
  60.106 + */
  60.107 +enum acpi_irq_model_id		acpi_irq_model = ACPI_IRQ_MODEL_PIC;
  60.108 +
  60.109 +#ifdef	CONFIG_X86_64
  60.110 +
  60.111 +/* rely on all ACPI tables being in the direct mapping */
  60.112 +char *__acpi_map_table(unsigned long phys_addr, unsigned long size)
  60.113 +{
  60.114 +	if (!phys_addr || !size)
  60.115 +	return NULL;
  60.116 +
  60.117 +	if (phys_addr < (end_pfn_map << PAGE_SHIFT))
  60.118 +		return __va(phys_addr);
  60.119 +
  60.120 +	return NULL;
  60.121 +}
  60.122 +
  60.123 +#else
  60.124 +
  60.125 +/*
  60.126 + * Temporarily use the virtual area starting from FIX_IO_APIC_BASE_END,
  60.127 + * to map the target physical address. The problem is that set_fixmap()
  60.128 + * provides a single page, and it is possible that the page is not
  60.129 + * sufficient.
  60.130 + * By using this area, we can map up to MAX_IO_APICS pages temporarily,
  60.131 + * i.e. until the next __va_range() call.
  60.132 + *
  60.133 + * Important Safety Note:  The fixed I/O APIC page numbers are *subtracted*
  60.134 + * from the fixed base.  That's why we start at FIX_IO_APIC_BASE_END and
  60.135 + * count idx down while incrementing the phys address.
  60.136 + */
  60.137 +char *__acpi_map_table(unsigned long phys, unsigned long size)
  60.138 +{
  60.139 +	unsigned long base, offset, mapped_size;
  60.140 +	int idx;
  60.141 +
  60.142 +	if (phys + size < 8*1024*1024) 
  60.143 +		return __va(phys); 
  60.144 +
  60.145 +	offset = phys & (PAGE_SIZE - 1);
  60.146 +	mapped_size = PAGE_SIZE - offset;
  60.147 +	set_fixmap(FIX_ACPI_END, phys);
  60.148 +	base = fix_to_virt(FIX_ACPI_END);
  60.149 +
  60.150 +	/*
  60.151 +	 * Most cases can be covered by the below.
  60.152 +	 */
  60.153 +	idx = FIX_ACPI_END;
  60.154 +	while (mapped_size < size) {
  60.155 +		if (--idx < FIX_ACPI_BEGIN)
  60.156 +			return NULL;	/* cannot handle this */
  60.157 +		phys += PAGE_SIZE;
  60.158 +		set_fixmap(idx, phys);
  60.159 +		mapped_size += PAGE_SIZE;
  60.160 +	}
  60.161 +
  60.162 +	return ((char *) base + offset);
  60.163 +}
  60.164 +#endif
  60.165 +
  60.166 +#ifdef CONFIG_PCI_MMCONFIG
  60.167 +static int __init acpi_parse_mcfg(unsigned long phys_addr, unsigned long size)
  60.168 +{
  60.169 +	struct acpi_table_mcfg *mcfg;
  60.170 +
  60.171 +	if (!phys_addr || !size)
  60.172 +		return -EINVAL;
  60.173 +
  60.174 +	mcfg = (struct acpi_table_mcfg *) __acpi_map_table(phys_addr, size);
  60.175 +	if (!mcfg) {
  60.176 +		printk(KERN_WARNING PREFIX "Unable to map MCFG\n");
  60.177 +		return -ENODEV;
  60.178 +	}
  60.179 +
  60.180 +	if (mcfg->base_reserved) {
  60.181 +		printk(KERN_ERR PREFIX "MMCONFIG not in low 4GB of memory\n");
  60.182 +		return -ENODEV;
  60.183 +	}
  60.184 +
  60.185 +	pci_mmcfg_base_addr = mcfg->base_address;
  60.186 +
  60.187 +	return 0;
  60.188 +}
  60.189 +#else
  60.190 +#define	acpi_parse_mcfg NULL
  60.191 +#endif /* !CONFIG_PCI_MMCONFIG */
  60.192 +
  60.193 +#ifdef CONFIG_X86_LOCAL_APIC
  60.194 +static int __init
  60.195 +acpi_parse_madt (
  60.196 +	unsigned long		phys_addr,
  60.197 +	unsigned long		size)
  60.198 +{
  60.199 +	struct acpi_table_madt	*madt = NULL;
  60.200 +
  60.201 +	if (!phys_addr || !size)
  60.202 +		return -EINVAL;
  60.203 +
  60.204 +	madt = (struct acpi_table_madt *) __acpi_map_table(phys_addr, size);
  60.205 +	if (!madt) {
  60.206 +		printk(KERN_WARNING PREFIX "Unable to map MADT\n");
  60.207 +		return -ENODEV;
  60.208 +	}
  60.209 +
  60.210 +	if (madt->lapic_address) {
  60.211 +		acpi_lapic_addr = (u64) madt->lapic_address;
  60.212 +
  60.213 +		printk(KERN_DEBUG PREFIX "Local APIC address 0x%08x\n",
  60.214 +			madt->lapic_address);
  60.215 +	}
  60.216 +
  60.217 +	acpi_madt_oem_check(madt->header.oem_id, madt->header.oem_table_id);
  60.218 +	
  60.219 +	return 0;
  60.220 +}
  60.221 +
  60.222 +
  60.223 +static int __init
  60.224 +acpi_parse_lapic (
  60.225 +	acpi_table_entry_header *header, const unsigned long end)
  60.226 +{
  60.227 +	struct acpi_table_lapic	*processor = NULL;
  60.228 +
  60.229 +	processor = (struct acpi_table_lapic*) header;
  60.230 +
  60.231 +	if (BAD_MADT_ENTRY(processor, end))
  60.232 +		return -EINVAL;
  60.233 +
  60.234 +	acpi_table_print_madt_entry(header);
  60.235 +
  60.236 +	/* no utility in registering a disabled processor */
  60.237 +	if (processor->flags.enabled == 0)
  60.238 +		return 0;
  60.239 +
  60.240 +	x86_acpiid_to_apicid[processor->acpi_id] = processor->id;
  60.241 +
  60.242 +	mp_register_lapic (
  60.243 +		processor->id,					   /* APIC ID */
  60.244 +		processor->flags.enabled);			  /* Enabled? */
  60.245 +
  60.246 +	return 0;
  60.247 +}
  60.248 +
  60.249 +static int __init
  60.250 +acpi_parse_lapic_addr_ovr (
  60.251 +	acpi_table_entry_header *header, const unsigned long end)
  60.252 +{
  60.253 +	struct acpi_table_lapic_addr_ovr *lapic_addr_ovr = NULL;
  60.254 +
  60.255 +	lapic_addr_ovr = (struct acpi_table_lapic_addr_ovr*) header;
  60.256 +
  60.257 +	if (BAD_MADT_ENTRY(lapic_addr_ovr, end))
  60.258 +		return -EINVAL;
  60.259 +
  60.260 +	acpi_lapic_addr = lapic_addr_ovr->address;
  60.261 +
  60.262 +	return 0;
  60.263 +}
  60.264 +
  60.265 +static int __init
  60.266 +acpi_parse_lapic_nmi (
  60.267 +	acpi_table_entry_header *header, const unsigned long end)
  60.268 +{
  60.269 +	struct acpi_table_lapic_nmi *lapic_nmi = NULL;
  60.270 +
  60.271 +	lapic_nmi = (struct acpi_table_lapic_nmi*) header;
  60.272 +
  60.273 +	if (BAD_MADT_ENTRY(lapic_nmi, end))
  60.274 +		return -EINVAL;
  60.275 +
  60.276 +	acpi_table_print_madt_entry(header);
  60.277 +
  60.278 +	if (lapic_nmi->lint != 1)
  60.279 +		printk(KERN_WARNING PREFIX "NMI not connected to LINT 1!\n");
  60.280 +
  60.281 +	return 0;
  60.282 +}
  60.283 +
  60.284 +
  60.285 +#endif /*CONFIG_X86_LOCAL_APIC*/
  60.286 +
  60.287 +#if defined(CONFIG_X86_IO_APIC) /*&& defined(CONFIG_ACPI_INTERPRETER)*/
  60.288 +
  60.289 +static int __init
  60.290 +acpi_parse_ioapic (
  60.291 +	acpi_table_entry_header *header, const unsigned long end)
  60.292 +{
  60.293 +	struct acpi_table_ioapic *ioapic = NULL;
  60.294 +
  60.295 +	ioapic = (struct acpi_table_ioapic*) header;
  60.296 +
  60.297 +	if (BAD_MADT_ENTRY(ioapic, end))
  60.298 +		return -EINVAL;
  60.299 + 
  60.300 +	acpi_table_print_madt_entry(header);
  60.301 +
  60.302 +	mp_register_ioapic (
  60.303 +		ioapic->id,
  60.304 +		ioapic->address,
  60.305 +		ioapic->global_irq_base);
  60.306 + 
  60.307 +	return 0;
  60.308 +}
  60.309 +
  60.310 +#ifdef CONFIG_ACPI_INTERPRETER
  60.311 +/*
  60.312 + * Parse Interrupt Source Override for the ACPI SCI
  60.313 + */
  60.314 +static void
  60.315 +acpi_sci_ioapic_setup(u32 gsi, u16 polarity, u16 trigger)
  60.316 +{
  60.317 +	if (trigger == 0)	/* compatible SCI trigger is level */
  60.318 +		trigger = 3;
  60.319 +
  60.320 +	if (polarity == 0)	/* compatible SCI polarity is low */
  60.321 +		polarity = 3;
  60.322 +
  60.323 +	/* Command-line over-ride via acpi_sci= */
  60.324 +	if (acpi_sci_flags.trigger)
  60.325 +		trigger = acpi_sci_flags.trigger;
  60.326 +
  60.327 +	if (acpi_sci_flags.polarity)
  60.328 +		polarity = acpi_sci_flags.polarity;
  60.329 +
  60.330 +	/*
  60.331 + 	 * mp_config_acpi_legacy_irqs() already setup IRQs < 16
  60.332 +	 * If GSI is < 16, this will update its flags,
  60.333 +	 * else it will create a new mp_irqs[] entry.
  60.334 +	 */
  60.335 +	mp_override_legacy_irq(gsi, polarity, trigger, gsi);
  60.336 +
  60.337 +	/*
  60.338 +	 * stash over-ride to indicate we've been here
  60.339 +	 * and for later update of acpi_fadt
  60.340 +	 */
  60.341 +	acpi_sci_override_gsi = gsi;
  60.342 +	return;
  60.343 +}
  60.344 +#endif
  60.345 +
  60.346 +static int __init
  60.347 +acpi_parse_int_src_ovr (
  60.348 +	acpi_table_entry_header *header, const unsigned long end)
  60.349 +{
  60.350 +	struct acpi_table_int_src_ovr *intsrc = NULL;
  60.351 +
  60.352 +	intsrc = (struct acpi_table_int_src_ovr*) header;
  60.353 +
  60.354 +	if (BAD_MADT_ENTRY(intsrc, end))
  60.355 +		return -EINVAL;
  60.356 +
  60.357 +	acpi_table_print_madt_entry(header);
  60.358 +
  60.359 +#ifdef CONFIG_ACPI_INTERPRETER
  60.360 +	if (intsrc->bus_irq == acpi_fadt.sci_int) {
  60.361 +		acpi_sci_ioapic_setup(intsrc->global_irq,
  60.362 +			intsrc->flags.polarity, intsrc->flags.trigger);
  60.363 +		return 0;
  60.364 +	}
  60.365 +#endif
  60.366 +
  60.367 +	if (acpi_skip_timer_override &&
  60.368 +		intsrc->bus_irq == 0 && intsrc->global_irq == 2) {
  60.369 +			printk(PREFIX "BIOS IRQ0 pin2 override ignored.\n");
  60.370 +			return 0;
  60.371 +	}
  60.372 +
  60.373 +	mp_override_legacy_irq (
  60.374 +		intsrc->bus_irq,
  60.375 +		intsrc->flags.polarity,
  60.376 +		intsrc->flags.trigger,
  60.377 +		intsrc->global_irq);
  60.378 +
  60.379 +	return 0;
  60.380 +}
  60.381 +
  60.382 +
  60.383 +static int __init
  60.384 +acpi_parse_nmi_src (
  60.385 +	acpi_table_entry_header *header, const unsigned long end)
  60.386 +{
  60.387 +	struct acpi_table_nmi_src *nmi_src = NULL;
  60.388 +
  60.389 +	nmi_src = (struct acpi_table_nmi_src*) header;
  60.390 +
  60.391 +	if (BAD_MADT_ENTRY(nmi_src, end))
  60.392 +		return -EINVAL;
  60.393 +
  60.394 +	acpi_table_print_madt_entry(header);
  60.395 +
  60.396 +	/* TBD: Support nimsrc entries? */
  60.397 +
  60.398 +	return 0;
  60.399 +}
  60.400 +
  60.401 +#endif /* CONFIG_X86_IO_APIC */
  60.402 +
  60.403 +#ifdef	CONFIG_ACPI_BUS
  60.404 +
  60.405 +/*
  60.406 + * acpi_pic_sci_set_trigger()
  60.407 + * 
  60.408 + * use ELCR to set PIC-mode trigger type for SCI
  60.409 + *
  60.410 + * If a PIC-mode SCI is not recognized or gives spurious IRQ7's
  60.411 + * it may require Edge Trigger -- use "acpi_sci=edge"
  60.412 + *
  60.413 + * Port 0x4d0-4d1 are ECLR1 and ECLR2, the Edge/Level Control Registers
  60.414 + * for the 8259 PIC.  bit[n] = 1 means irq[n] is Level, otherwise Edge.
  60.415 + * ECLR1 is IRQ's 0-7 (IRQ 0, 1, 2 must be 0)
  60.416 + * ECLR2 is IRQ's 8-15 (IRQ 8, 13 must be 0)
  60.417 + */
  60.418 +
  60.419 +void __init
  60.420 +acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger)
  60.421 +{
  60.422 +	unsigned int mask = 1 << irq;
  60.423 +	unsigned int old, new;
  60.424 +
  60.425 +	/* Real old ELCR mask */
  60.426 +	old = inb(0x4d0) | (inb(0x4d1) << 8);
  60.427 +
  60.428 +	/*
  60.429 +	 * If we use ACPI to set PCI irq's, then we should clear ELCR
  60.430 +	 * since we will set it correctly as we enable the PCI irq
  60.431 +	 * routing.
  60.432 +	 */
  60.433 +	new = acpi_noirq ? old : 0;
  60.434 +
  60.435 +	/*
  60.436 +	 * Update SCI information in the ELCR, it isn't in the PCI
  60.437 +	 * routing tables..
  60.438 +	 */
  60.439 +	switch (trigger) {
  60.440 +	case 1:	/* Edge - clear */
  60.441 +		new &= ~mask;
  60.442 +		break;
  60.443 +	case 3: /* Level - set */
  60.444 +		new |= mask;
  60.445 +		break;
  60.446 +	}
  60.447 +
  60.448 +	if (old == new)
  60.449 +		return;
  60.450 +
  60.451 +	printk(PREFIX "setting ELCR to %04x (from %04x)\n", new, old);
  60.452 +	outb(new, 0x4d0);
  60.453 +	outb(new >> 8, 0x4d1);
  60.454 +}
  60.455 +
  60.456 +
  60.457 +#endif /* CONFIG_ACPI_BUS */
  60.458 +
  60.459 +int acpi_gsi_to_irq(u32 gsi, unsigned int *irq)
  60.460 +{
  60.461 +#ifdef CONFIG_X86_IO_APIC
  60.462 +	if (use_pci_vector() && !platform_legacy_irq(gsi))
  60.463 + 		*irq = IO_APIC_VECTOR(gsi);
  60.464 +	else
  60.465 +#endif
  60.466 +		*irq = gsi;
  60.467 +	return 0;
  60.468 +}
  60.469 +
  60.470 +unsigned int acpi_register_gsi(u32 gsi, int edge_level, int active_high_low)
  60.471 +{
  60.472 +	unsigned int irq;
  60.473 +	unsigned int plat_gsi = gsi;
  60.474 +
  60.475 +#ifdef CONFIG_PCI
  60.476 +	/*
  60.477 +	 * Make sure all (legacy) PCI IRQs are set as level-triggered.
  60.478 +	 */
  60.479 +	if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) {
  60.480 +		extern void eisa_set_level_irq(unsigned int irq);
  60.481 +
  60.482 +		if (edge_level == ACPI_LEVEL_SENSITIVE)
  60.483 +				eisa_set_level_irq(gsi);
  60.484 +	}
  60.485 +#endif
  60.486 +
  60.487 +#ifdef CONFIG_X86_IO_APIC
  60.488 +	if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) {
  60.489 +		plat_gsi = mp_register_gsi(gsi, edge_level, active_high_low);
  60.490 +	}
  60.491 +#endif
  60.492 +	acpi_gsi_to_irq(plat_gsi, &irq);
  60.493 +	return irq;
  60.494 +}
  60.495 +EXPORT_SYMBOL(acpi_register_gsi);
  60.496 +
  60.497 +/*
  60.498 + *  ACPI based hotplug support for CPU
  60.499 + */
  60.500 +#ifdef CONFIG_ACPI_HOTPLUG_CPU
  60.501 +int
  60.502 +acpi_map_lsapic(acpi_handle handle, int *pcpu)
  60.503 +{
  60.504 +	/* TBD */
  60.505 +	return -EINVAL;
  60.506 +}
  60.507 +EXPORT_SYMBOL(acpi_map_lsapic);
  60.508 +
  60.509 +
  60.510 +int
  60.511 +acpi_unmap_lsapic(int cpu)
  60.512 +{
  60.513 +	/* TBD */
  60.514 +	return -EINVAL;
  60.515 +}
  60.516 +EXPORT_SYMBOL(acpi_unmap_lsapic);
  60.517 +#endif /* CONFIG_ACPI_HOTPLUG_CPU */
  60.518 +
  60.519 +static unsigned long __init
  60.520 +acpi_scan_rsdp (
  60.521 +	unsigned long		start,
  60.522 +	unsigned long		length)
  60.523 +{
  60.524 +	unsigned long		offset = 0;
  60.525 +	unsigned long		sig_len = sizeof("RSD PTR ") - 1;
  60.526 +
  60.527 +	/*
  60.528 +	 * Scan all 16-byte boundaries of the physical memory region for the
  60.529 +	 * RSDP signature.
  60.530 +	 */
  60.531 +	for (offset = 0; offset < length; offset += 16) {
  60.532 +		if (strncmp((char *) (start + offset), "RSD PTR ", sig_len))
  60.533 +			continue;
  60.534 +		return (start + offset);
  60.535 +	}
  60.536 +
  60.537 +	return 0;
  60.538 +}
  60.539 +
  60.540 +static int __init acpi_parse_sbf(unsigned long phys_addr, unsigned long size)
  60.541 +{
  60.542 +	struct acpi_table_sbf *sb;
  60.543 +
  60.544 +	if (!phys_addr || !size)
  60.545 +	return -EINVAL;
  60.546 +
  60.547 +	sb = (struct acpi_table_sbf *) __acpi_map_table(phys_addr, size);
  60.548 +	if (!sb) {
  60.549 +		printk(KERN_WARNING PREFIX "Unable to map SBF\n");
  60.550 +		return -ENODEV;
  60.551 +	}
  60.552 +
  60.553 +	sbf_port = sb->sbf_cmos; /* Save CMOS port */
  60.554 +
  60.555 +	return 0;
  60.556 +}
  60.557 +
  60.558 +
  60.559 +#ifdef CONFIG_HPET_TIMER
  60.560 +
  60.561 +static int __init acpi_parse_hpet(unsigned long phys, unsigned long size)
  60.562 +{
  60.563 +	struct acpi_table_hpet *hpet_tbl;
  60.564 +
  60.565 +	if (!phys || !size)
  60.566 +		return -EINVAL;
  60.567 +
  60.568 +	hpet_tbl = (struct acpi_table_hpet *) __acpi_map_table(phys, size);
  60.569 +	if (!hpet_tbl) {
  60.570 +		printk(KERN_WARNING PREFIX "Unable to map HPET\n");
  60.571 +		return -ENODEV;
  60.572 +	}
  60.573 +
  60.574 +	if (hpet_tbl->addr.space_id != ACPI_SPACE_MEM) {
  60.575 +		printk(KERN_WARNING PREFIX "HPET timers must be located in "
  60.576 +		       "memory.\n");
  60.577 +		return -1;
  60.578 +	}
  60.579 +
  60.580 +#ifdef	CONFIG_X86_64
  60.581 +        vxtime.hpet_address = hpet_tbl->addr.addrl |
  60.582 +                ((long) hpet_tbl->addr.addrh << 32);
  60.583 +
  60.584 +        printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n",
  60.585 +               hpet_tbl->id, vxtime.hpet_address);
  60.586 +#else	/* X86 */
  60.587 +	{
  60.588 +		extern unsigned long hpet_address;
  60.589 +
  60.590 +		hpet_address = hpet_tbl->addr.addrl;
  60.591 +		printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n",
  60.592 +			hpet_tbl->id, hpet_address);
  60.593 +	}
  60.594 +#endif	/* X86 */
  60.595 +
  60.596 +	return 0;
  60.597 +}
  60.598 +#else
  60.599 +#define	acpi_parse_hpet	NULL
  60.600 +#endif
  60.601 +
  60.602 +#ifdef CONFIG_X86_PM_TIMER
  60.603 +extern u32 pmtmr_ioport;
  60.604 +#endif
  60.605 +
  60.606 +static int __init acpi_parse_fadt(unsigned long phys, unsigned long size)
  60.607 +{
  60.608 +	struct fadt_descriptor_rev2 *fadt = NULL;
  60.609 +
  60.610 +	fadt = (struct fadt_descriptor_rev2*) __acpi_map_table(phys,size);
  60.611 +	if(!fadt) {
  60.612 +		printk(KERN_WARNING PREFIX "Unable to map FADT\n");
  60.613 +		return 0;
  60.614 +	}
  60.615 +
  60.616 +#ifdef	CONFIG_ACPI_INTERPRETER
  60.617 +	/* initialize sci_int early for INT_SRC_OVR MADT parsing */
  60.618 +	acpi_fadt.sci_int = fadt->sci_int;
  60.619 +#endif
  60.620 +
  60.621 +#ifdef CONFIG_X86_PM_TIMER
  60.622 +	/* detect the location of the ACPI PM Timer */
  60.623 +	if (fadt->revision >= FADT2_REVISION_ID) {
  60.624 +		/* FADT rev. 2 */
  60.625 +		if (fadt->xpm_tmr_blk.address_space_id != ACPI_ADR_SPACE_SYSTEM_IO)
  60.626 +			return 0;
  60.627 +
  60.628 +		pmtmr_ioport = fadt->xpm_tmr_blk.address;
  60.629 +	} else {
  60.630 +		/* FADT rev. 1 */
  60.631 +		pmtmr_ioport = fadt->V1_pm_tmr_blk;
  60.632 +	}
  60.633 +	if (pmtmr_ioport)
  60.634 +		printk(KERN_INFO PREFIX "PM-Timer IO Port: %#x\n", pmtmr_ioport);
  60.635 +#endif
  60.636 +	return 0;
  60.637 +}
  60.638 +
  60.639 +
  60.640 +unsigned long __init
  60.641 +acpi_find_rsdp (void)
  60.642 +{
  60.643 +	unsigned long		rsdp_phys = 0;
  60.644 +
  60.645 +#if 0
  60.646 +	if (efi_enabled) {
  60.647 +		if (efi.acpi20)
  60.648 +			return __pa(efi.acpi20);
  60.649 +		else if (efi.acpi)
  60.650 +			return __pa(efi.acpi);
  60.651 +	}
  60.652 +#endif
  60.653 +	/*
  60.654 +	 * Scan memory looking for the RSDP signature. First search EBDA (low
  60.655 +	 * memory) paragraphs and then search upper memory (E0000-FFFFF).
  60.656 +	 */
  60.657 +	rsdp_phys = acpi_scan_rsdp (0, 0x400);
  60.658 +	if (!rsdp_phys)
  60.659 +		rsdp_phys = acpi_scan_rsdp (0xE0000, 0x20000);
  60.660 +
  60.661 +	return rsdp_phys;
  60.662 +}
  60.663 +
  60.664 +#ifdef	CONFIG_X86_LOCAL_APIC
  60.665 +/*
  60.666 + * Parse LAPIC entries in MADT
  60.667 + * returns 0 on success, < 0 on error
  60.668 + */
  60.669 +static int __init
  60.670 +acpi_parse_madt_lapic_entries(void)
  60.671 +{
  60.672 +	int count;
  60.673 +
  60.674 +	/* 
  60.675 +	 * Note that the LAPIC address is obtained from the MADT (32-bit value)
  60.676 +	 * and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value).
  60.677 +	 */
  60.678 +
  60.679 +	count = acpi_table_parse_madt(ACPI_MADT_LAPIC_ADDR_OVR, acpi_parse_lapic_addr_ovr, 0);
  60.680 +	if (count < 0) {
  60.681 +		printk(KERN_ERR PREFIX "Error parsing LAPIC address override entry\n");
  60.682 +		return count;
  60.683 +	}
  60.684 +
  60.685 +	mp_register_lapic_address(acpi_lapic_addr);
  60.686 +
  60.687 +	count = acpi_table_parse_madt(ACPI_MADT_LAPIC, acpi_parse_lapic,
  60.688 +				       MAX_APICS);
  60.689 +	if (!count) { 
  60.690 +		printk(KERN_ERR PREFIX "No LAPIC entries present\n");
  60.691 +		/* TBD: Cleanup to allow fallback to MPS */
  60.692 +		return -ENODEV;
  60.693 +	}
  60.694 +	else if (count < 0) {
  60.695 +		printk(KERN_ERR PREFIX "Error parsing LAPIC entry\n");
  60.696 +		/* TBD: Cleanup to allow fallback to MPS */
  60.697 +		return count;
  60.698 +	}
  60.699 +
  60.700 +	count = acpi_table_parse_madt(ACPI_MADT_LAPIC_NMI, acpi_parse_lapic_nmi, 0);
  60.701 +	if (count < 0) {
  60.702 +		printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n");
  60.703 +		/* TBD: Cleanup to allow fallback to MPS */
  60.704 +		return count;
  60.705 +	}
  60.706 +	return 0;
  60.707 +}
  60.708 +#endif /* CONFIG_X86_LOCAL_APIC */
  60.709 +
  60.710 +#if defined(CONFIG_X86_IO_APIC) /*&& defined(CONFIG_ACPI_INTERPRETER)*/
  60.711 +/*
  60.712 + * Parse IOAPIC related entries in MADT
  60.713 + * returns 0 on success, < 0 on error
  60.714 + */
  60.715 +static int __init
  60.716 +acpi_parse_madt_ioapic_entries(void)
  60.717 +{
  60.718 +	int count;
  60.719 +
  60.720 +	/*
  60.721 +	 * ACPI interpreter is required to complete interrupt setup,
  60.722 +	 * so if it is off, don't enumerate the io-apics with ACPI.
  60.723 +	 * If MPS is present, it will handle them,
  60.724 +	 * otherwise the system will stay in PIC mode
  60.725 +	 */
  60.726 +	if (acpi_disabled || acpi_noirq) {
  60.727 +		return -ENODEV;
  60.728 +        }
  60.729 +
  60.730 +	/*
  60.731 + 	 * if "noapic" boot option, don't look for IO-APICs
  60.732 +	 */
  60.733 +	if (skip_ioapic_setup) {
  60.734 +		printk(KERN_INFO PREFIX "Skipping IOAPIC probe "
  60.735 +			"due to 'noapic' option.\n");
  60.736 +		return -ENODEV;
  60.737 +	}
  60.738 +
  60.739 +	count = acpi_table_parse_madt(ACPI_MADT_IOAPIC, acpi_parse_ioapic, MAX_IO_APICS);
  60.740 +	if (!count) {
  60.741 +		printk(KERN_ERR PREFIX "No IOAPIC entries present\n");
  60.742 +		return -ENODEV;
  60.743 +	}
  60.744 +	else if (count < 0) {
  60.745 +		printk(KERN_ERR PREFIX "Error parsing IOAPIC entry\n");
  60.746 +		return count;
  60.747 +	}
  60.748 +
  60.749 +	count = acpi_table_parse_madt(ACPI_MADT_INT_SRC_OVR, acpi_parse_int_src_ovr, NR_IRQ_VECTORS);
  60.750 +	if (count < 0) {
  60.751 +		printk(KERN_ERR PREFIX "Error parsing interrupt source overrides entry\n");
  60.752 +		/* TBD: Cleanup to allow fallback to MPS */
  60.753 +		return count;
  60.754 +	}
  60.755 +
  60.756 +#ifdef CONFIG_ACPI_INTERPRETER
  60.757 +	/*
  60.758 +	 * If BIOS did not supply an INT_SRC_OVR for the SCI
  60.759 +	 * pretend we got one so we can set the SCI flags.
  60.760 +	 */
  60.761 +	if (!acpi_sci_override_gsi)
  60.762 +		acpi_sci_ioapic_setup(acpi_fadt.sci_int, 0, 0);
  60.763 +#endif
  60.764 +
  60.765 +	/* Fill in identity legacy mapings where no override */
  60.766 +	mp_config_acpi_legacy_irqs();
  60.767 +
  60.768 +	count = acpi_table_parse_madt(ACPI_MADT_NMI_SRC, acpi_parse_nmi_src, NR_IRQ_VECTORS);
  60.769 +	if (count < 0) {
  60.770 +		printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n");
  60.771 +		/* TBD: Cleanup to allow fallback to MPS */
  60.772 +		return count;
  60.773 +	}
  60.774 +
  60.775 +	return 0;
  60.776 +}
  60.777 +#else
  60.778 +static inline int acpi_parse_madt_ioapic_entries(void)
  60.779 +{
  60.780 +	return -1;
  60.781 +}
  60.782 +#endif /* !(CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER) */
  60.783 +
  60.784 +
  60.785 +static void __init
  60.786 +acpi_process_madt(void)
  60.787 +{
  60.788 +#ifdef CONFIG_X86_LOCAL_APIC
  60.789 +	int count, error;
  60.790 +
  60.791 +	count = acpi_table_parse(ACPI_APIC, acpi_parse_madt);
  60.792 +	if (count >= 1) {
  60.793 +
  60.794 +		/*
  60.795 +		 * Parse MADT LAPIC entries
  60.796 +		 */
  60.797 +		error = acpi_parse_madt_lapic_entries();
  60.798 +		if (!error) {
  60.799 +			acpi_lapic = 1;
  60.800 +
  60.801 +			/*
  60.802 +			 * Parse MADT IO-APIC entries
  60.803 +			 */
  60.804 +			error = acpi_parse_madt_ioapic_entries();
  60.805 +			if (!error) {
  60.806 +				acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC;
  60.807 +				acpi_irq_balance_set(NULL);
  60.808 +				acpi_ioapic = 1;
  60.809 +
  60.810 +				smp_found_config = 1;
  60.811 +				clustered_apic_check();
  60.812 +			}
  60.813 +		}
  60.814 +		if (error == -EINVAL) {
  60.815 +			/*
  60.816 +			 * Dell Precision Workstation 410, 610 come here.
  60.817 +			 */
  60.818 +			printk(KERN_ERR PREFIX "Invalid BIOS MADT, disabling ACPI\n");
  60.819 +			disable_acpi();
  60.820 +		}
  60.821 +	}
  60.822 +#endif
  60.823 +	return;
  60.824 +}
  60.825 +
  60.826 +/*
  60.827 + * acpi_boot_table_init() and acpi_boot_init()
  60.828 + *  called from setup_arch(), always.
  60.829 + *	1. checksums all tables
  60.830 + *	2. enumerates lapics
  60.831 + *	3. enumerates io-apics
  60.832 + *
  60.833 + * acpi_table_init() is separate to allow reading SRAT without
  60.834 + * other side effects.
  60.835 + *
  60.836 + * side effects of acpi_boot_init:
  60.837 + *	acpi_lapic = 1 if LAPIC found
  60.838 + *	acpi_ioapic = 1 if IOAPIC found
  60.839 + *	if (acpi_lapic && acpi_ioapic) smp_found_config = 1;
  60.840 + *	if acpi_blacklisted() acpi_disabled = 1;
  60.841 + *	acpi_irq_model=...
  60.842 + *	...
  60.843 + *
  60.844 + * return value: (currently ignored)
  60.845 + *	0: success
  60.846 + *	!0: failure
  60.847 + */
  60.848 +
  60.849 +int __init
  60.850 +acpi_boot_table_init(void)
  60.851 +{
  60.852 +	int error;
  60.853 +
  60.854 +	/*
  60.855 +	 * If acpi_disabled, bail out
  60.856 +	 * One exception: acpi=ht continues far enough to enumerate LAPICs
  60.857 +	 */
  60.858 +	if (acpi_disabled && !acpi_ht)
  60.859 +		 return 1;
  60.860 +
  60.861 +	/* 
  60.862 +	 * Initialize the ACPI boot-time table parser.
  60.863 +	 */
  60.864 +	error = acpi_table_init();
  60.865 +	if (error) {
  60.866 +		disable_acpi();
  60.867 +		return error;
  60.868 +	}
  60.869 +
  60.870 +	acpi_table_parse(ACPI_BOOT, acpi_parse_sbf);
  60.871 +
  60.872 +	/*
  60.873 +	 * blacklist may disable ACPI entirely
  60.874 +	 */
  60.875 +	error = acpi_blacklisted();
  60.876 +	if (error) {
  60.877 +		extern int acpi_force;
  60.878 +
  60.879 +		if (acpi_force) {
  60.880 +			printk(KERN_WARNING PREFIX "acpi=force override\n");
  60.881 +		} else {
  60.882 +			printk(KERN_WARNING PREFIX "Disabling ACPI support\n");
  60.883 +			disable_acpi();
  60.884 +			return error;
  60.885 +		}
  60.886 +	}
  60.887 +
  60.888 +	return 0;
  60.889 +}
  60.890 +
  60.891 +
  60.892 +int __init acpi_boot_init(void)
  60.893 +{
  60.894 +	/*
  60.895 +	 * If acpi_disabled, bail out
  60.896 +	 * One exception: acpi=ht continues far enough to enumerate LAPICs
  60.897 +	 */
  60.898 +	if (acpi_disabled && !acpi_ht)
  60.899 +		 return 1;
  60.900 +
  60.901 +	acpi_table_parse(ACPI_BOOT, acpi_parse_sbf);
  60.902 +
  60.903 +	/*
  60.904 +	 * set sci_int and PM timer address
  60.905 +	 */
  60.906 +	acpi_table_parse(ACPI_FADT, acpi_parse_fadt);
  60.907 +
  60.908 +	/*
  60.909 +	 * Process the Multiple APIC Description Table (MADT), if present
  60.910 +	 */
  60.911 +	acpi_process_madt();
  60.912 +
  60.913 +	acpi_table_parse(ACPI_HPET, acpi_parse_hpet);
  60.914 +	acpi_table_parse(ACPI_MCFG, acpi_parse_mcfg);
  60.915 +
  60.916 +	return 0;
  60.917 +}
  60.918 +
    61.1 --- a/xen/arch/x86/apic.c	Fri May 06 17:04:27 2005 +0000
    61.2 +++ b/xen/arch/x86/apic.c	Wed May 11 13:43:50 2005 +0000
    61.3 @@ -34,14 +34,27 @@
    61.4  #include <asm/hardirq.h>
    61.5  #include <asm/apic.h>
    61.6  #include <asm/io_apic.h>
    61.7 -#include <asm/mach_apic.h>
    61.8 -#include <asm/io_ports.h>
    61.9 +#include <mach_apic.h>
   61.10 +#include <io_ports.h>
   61.11  
   61.12  /* Using APIC to generate smp_local_timer_interrupt? */
   61.13  int using_apic_timer = 0;
   61.14  
   61.15 +int apic_verbosity;
   61.16 +
   61.17  static int enabled_via_apicbase;
   61.18  
   61.19 +int get_physical_broadcast(void)
   61.20 +{
   61.21 +    unsigned int lvr, version;
   61.22 +    lvr = apic_read(APIC_LVR);
   61.23 +    version = GET_APIC_VERSION(lvr);
   61.24 +    if (!APIC_INTEGRATED(version) || version >= 0x14)
   61.25 +        return 0xff;
   61.26 +    else
   61.27 +        return 0xf;
   61.28 +}
   61.29 +
   61.30  int get_maxlvt(void)
   61.31  {
   61.32      unsigned int v, ver, maxlvt;
   61.33 @@ -907,7 +920,7 @@ int __init APIC_init_uniprocessor (void)
   61.34  #ifdef CONFIG_SMP
   61.35      cpu_online_map = 1;
   61.36  #endif
   61.37 -    phys_cpu_present_map = 1;
   61.38 +    phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid);
   61.39      apic_write_around(APIC_ID, boot_cpu_physical_apicid);
   61.40  
   61.41      setup_local_APIC();
    62.1 --- a/xen/arch/x86/audit.c	Fri May 06 17:04:27 2005 +0000
    62.2 +++ b/xen/arch/x86/audit.c	Wed May 11 13:43:50 2005 +0000
    62.3 @@ -49,7 +49,8 @@ static int l1, l2, oos_count, page_count
    62.4  int audit_adjust_pgtables(struct domain *d, int dir, int noisy)
    62.5  {
    62.6      int errors = 0;
    62.7 -    int shadow_enabled = shadow_mode_enabled(d) ? 1 : 0;
    62.8 +    int shadow_refcounts = !!shadow_mode_refcounts(d);
    62.9 +    int shadow_enabled = !!shadow_mode_enabled(d);
   62.10      int l2limit;
   62.11  
   62.12      void _adjust(struct pfn_info *page, int adjtype ADJUST_EXTRA_ARGS)
   62.13 @@ -119,7 +120,7 @@ int audit_adjust_pgtables(struct domain 
   62.14              page->count_info += dir;
   62.15      }
   62.16  
   62.17 -    void adjust_l2_page(unsigned long mfn)
   62.18 +    void adjust_l2_page(unsigned long mfn, int shadow)
   62.19      {
   62.20          unsigned long *pt = map_domain_mem(mfn << PAGE_SHIFT);
   62.21          int i;
   62.22 @@ -133,7 +134,7 @@ int audit_adjust_pgtables(struct domain 
   62.23  
   62.24                  if ( noisy )
   62.25                  {
   62.26 -                    if ( shadow_enabled )
   62.27 +                    if ( shadow )
   62.28                      {
   62.29                          if ( page_get_owner(l1page) != NULL )
   62.30                          {
   62.31 @@ -145,6 +146,17 @@ int audit_adjust_pgtables(struct domain 
   62.32                              errors++;
   62.33                              continue;
   62.34                          }
   62.35 +
   62.36 +                        u32 page_type = l1page->u.inuse.type_info & PGT_type_mask;
   62.37 +
   62.38 +                        if ( page_type != PGT_l1_shadow )
   62.39 +                        {
   62.40 +                            printk("Audit %d: [Shadow L2 mfn=%lx i=%x] "
   62.41 +                                   "Expected Shadow L1 t=%x mfn=%lx\n",
   62.42 +                                   d->id, mfn, i,
   62.43 +                                   l1page->u.inuse.type_info, l1mfn);
   62.44 +                            errors++;
   62.45 +                        }
   62.46                      }
   62.47                      else
   62.48                      {
   62.49 @@ -154,7 +166,9 @@ int audit_adjust_pgtables(struct domain 
   62.50                                     "belonging to other dom %p (id=%d)\n",
   62.51                                     l1mfn,
   62.52                                     page_get_owner(l1page),
   62.53 -                                   page_get_owner(l1page)->id);
   62.54 +                                   (page_get_owner(l1page)
   62.55 +                                    ? page_get_owner(l1page)->id
   62.56 +                                    : -1));
   62.57                              errors++;
   62.58                              continue;
   62.59                          }
   62.60 @@ -179,7 +193,7 @@ int audit_adjust_pgtables(struct domain 
   62.61                      }
   62.62                  }
   62.63  
   62.64 -                adjust(l1page, !shadow_enabled);
   62.65 +                adjust(l1page, !shadow);
   62.66              }
   62.67          }
   62.68  
   62.69 @@ -280,7 +294,7 @@ int audit_adjust_pgtables(struct domain 
   62.70                              errors++;
   62.71                          }
   62.72  
   62.73 -                        if ( shadow_enabled &&
   62.74 +                        if ( shadow_refcounts &&
   62.75                               page_is_page_table(gpage) &&
   62.76                               ! page_out_of_sync(gpage) )
   62.77                          {
   62.78 @@ -336,19 +350,21 @@ int audit_adjust_pgtables(struct domain 
   62.79                      break;
   62.80                  case PGT_l1_shadow:
   62.81                      adjust(pfn_to_page(gmfn), 0);
   62.82 -                    adjust_l1_page(smfn);
   62.83 +                    if ( shadow_refcounts )
   62.84 +                        adjust_l1_page(smfn);
   62.85                      if ( page->u.inuse.type_info & PGT_pinned )
   62.86                          adjust(page, 0);
   62.87                      break;
   62.88                  case PGT_hl2_shadow:
   62.89                      adjust(pfn_to_page(gmfn), 0);
   62.90 -                    adjust_hl2_page(smfn);
   62.91 +                    if ( shadow_refcounts )
   62.92 +                        adjust_hl2_page(smfn);
   62.93                      if ( page->u.inuse.type_info & PGT_pinned )
   62.94                          adjust(page, 0);
   62.95                      break;
   62.96                  case PGT_l2_shadow:
   62.97                      adjust(pfn_to_page(gmfn), 0);
   62.98 -                    adjust_l2_page(smfn);
   62.99 +                    adjust_l2_page(smfn, 1);
  62.100                      if ( page->u.inuse.type_info & PGT_pinned )
  62.101                          adjust(page, 0);
  62.102                      break;
  62.103 @@ -391,45 +407,43 @@ int audit_adjust_pgtables(struct domain 
  62.104          struct exec_domain *ed;
  62.105  
  62.106          for_each_exec_domain(d, ed)
  62.107 -            {
  62.108 -                if ( !shadow_enabled )
  62.109 -                {
  62.110 -                    if ( pagetable_val(ed->arch.guest_table) )
  62.111 -                        adjust(&frame_table[pagetable_val(ed->arch.guest_table)
  62.112 -                                            >> PAGE_SHIFT], 1);
  62.113 -                }
  62.114 -                else
  62.115 -                {
  62.116 -                    if ( pagetable_val(ed->arch.guest_table) )
  62.117 -                        adjust(&frame_table[pagetable_val(ed->arch.guest_table)
  62.118 -                                            >> PAGE_SHIFT], 0);
  62.119 -                    if ( pagetable_val(ed->arch.shadow_table) )
  62.120 -                        adjust(&frame_table[pagetable_val(ed->arch.shadow_table)
  62.121 -                                            >> PAGE_SHIFT], 0);
  62.122 -                    if ( ed->arch.monitor_shadow_ref )
  62.123 -                        adjust(&frame_table[ed->arch.monitor_shadow_ref], 0);
  62.124 -                }
  62.125 -            }
  62.126 +        {
  62.127 +            if ( pagetable_val(ed->arch.guest_table) )
  62.128 +                adjust(&frame_table[pagetable_get_pfn(ed->arch.guest_table)], 1);
  62.129 +            if ( pagetable_val(ed->arch.shadow_table) )
  62.130 +                adjust(&frame_table[pagetable_get_pfn(ed->arch.shadow_table)], 0);
  62.131 +            if ( ed->arch.monitor_shadow_ref )
  62.132 +                adjust(&frame_table[ed->arch.monitor_shadow_ref], 0);
  62.133 +        }
  62.134      }
  62.135  
  62.136      void adjust_guest_pages()
  62.137      {
  62.138          struct list_head *list_ent = d->page_list.next;
  62.139          struct pfn_info *page;
  62.140 -        unsigned long mfn;
  62.141 +        unsigned long mfn, snapshot_mfn;
  62.142  
  62.143          while ( list_ent != &d->page_list )
  62.144          {
  62.145              u32 page_type;
  62.146  
  62.147              page = list_entry(list_ent, struct pfn_info, list);
  62.148 -            mfn = page_to_pfn(page);
  62.149 +            snapshot_mfn = mfn = page_to_pfn(page);
  62.150              page_type = page->u.inuse.type_info & PGT_type_mask;
  62.151  
  62.152              BUG_ON(page_get_owner(page) != d);
  62.153  
  62.154              page_count++;
  62.155  
  62.156 +            if ( shadow_enabled && !shadow_refcounts &&
  62.157 +                 page_out_of_sync(page) )
  62.158 +            {
  62.159 +                unsigned long gpfn = __mfn_to_gpfn(d, mfn);
  62.160 +                ASSERT( VALID_M2P(gpfn) );
  62.161 +                snapshot_mfn = __shadow_status(d, gpfn, PGT_snapshot);
  62.162 +                ASSERT( snapshot_mfn );
  62.163 +            }
  62.164 +
  62.165              switch ( page_type )
  62.166              {
  62.167              case PGT_l2_page_table:
  62.168 @@ -437,7 +451,7 @@ int audit_adjust_pgtables(struct domain 
  62.169  
  62.170                  if ( noisy )
  62.171                  {
  62.172 -                    if ( shadow_enabled )
  62.173 +                    if ( shadow_refcounts )
  62.174                      {
  62.175                          printk("Audit %d: found an L2 guest page "
  62.176                                 "mfn=%lx t=%08x c=%08x while in shadow mode\n",
  62.177 @@ -446,19 +460,22 @@ int audit_adjust_pgtables(struct domain 
  62.178                          errors++;
  62.179                      }
  62.180  
  62.181 -                    if ( (page->u.inuse.type_info & PGT_validated) !=
  62.182 -                         PGT_validated )
  62.183 +                    if ( (page->u.inuse.type_info & PGT_count_mask) != 0 )
  62.184                      {
  62.185 -                        printk("Audit %d: L2 mfn=%lx not validated %08x\n",
  62.186 -                               d->id, mfn, page->u.inuse.type_info);
  62.187 -                        errors++;
  62.188 -                    }
  62.189 +                        if ( (page->u.inuse.type_info & PGT_validated) !=
  62.190 +                             PGT_validated )
  62.191 +                        {
  62.192 +                            printk("Audit %d: L2 mfn=%lx not validated %08x\n",
  62.193 +                                   d->id, mfn, page->u.inuse.type_info);
  62.194 +                            errors++;
  62.195 +                        }
  62.196  
  62.197 -                    if ( (page->u.inuse.type_info & PGT_pinned) != PGT_pinned )
  62.198 -                    {
  62.199 -                        printk("Audit %d: L2 mfn=%lx not pinned t=%08x\n",
  62.200 -                               d->id, mfn, page->u.inuse.type_info);
  62.201 -                        errors++;
  62.202 +                        if ( (page->u.inuse.type_info & PGT_pinned) != PGT_pinned )
  62.203 +                        {
  62.204 +                            printk("Audit %d: L2 mfn=%lx not pinned t=%08x\n",
  62.205 +                                   d->id, mfn, page->u.inuse.type_info);
  62.206 +                            errors++;
  62.207 +                        }
  62.208                      }
  62.209                  }
  62.210  
  62.211 @@ -466,7 +483,7 @@ int audit_adjust_pgtables(struct domain 
  62.212                      adjust(page, 1);
  62.213  
  62.214                  if ( page->u.inuse.type_info & PGT_validated )
  62.215 -                    adjust_l2_page(mfn);
  62.216 +                    adjust_l2_page(snapshot_mfn, 0);
  62.217  
  62.218                  break;
  62.219  
  62.220 @@ -475,7 +492,7 @@ int audit_adjust_pgtables(struct domain 
  62.221  
  62.222                  if ( noisy )
  62.223                  {
  62.224 -                    if ( shadow_enabled )
  62.225 +                    if ( shadow_refcounts )
  62.226                      {
  62.227                          printk("found an L1 guest page mfn=%lx t=%08x c=%08x "
  62.228                                 "while in shadow mode\n",
  62.229 @@ -483,21 +500,24 @@ int audit_adjust_pgtables(struct domain 
  62.230                          errors++;
  62.231                      }
  62.232  
  62.233 -                    if ( (page->u.inuse.type_info & PGT_validated) != PGT_validated )
  62.234 +                    if ( (page->u.inuse.type_info & PGT_count_mask) != 0 )
  62.235                      {
  62.236 -                        printk("Audit %d: L1 not validated mfn=%lx t=%08x\n",
  62.237 -                               d->id, mfn, page->u.inuse.type_info);
  62.238 -                        errors++;
  62.239 -                    }
  62.240 -
  62.241 -                    if ( (page->u.inuse.type_info & PGT_pinned) != PGT_pinned )
  62.242 -                    {
  62.243 -                        if ( !VM_ASSIST(d, VMASST_TYPE_writable_pagetables) )
  62.244 +                        if ( (page->u.inuse.type_info & PGT_validated) !=
  62.245 +                             PGT_validated )
  62.246                          {
  62.247 -                            printk("Audit %d: L1 mfn=%lx not pinned t=%08x\n",
  62.248 +                            printk("Audit %d: L1 not validated mfn=%lx t=%08x\n",
  62.249                                     d->id, mfn, page->u.inuse.type_info);
  62.250                              errors++;
  62.251                          }
  62.252 +
  62.253 +                        if ( (page->u.inuse.type_info & PGT_pinned) != PGT_pinned )
  62.254 +                        {
  62.255 +                            if ( !VM_ASSIST(d, VMASST_TYPE_writable_pagetables) )
  62.256 +                            {
  62.257 +                                printk("Audit %d: L1 mfn=%lx not pinned t=%08x\n",
  62.258 +                                       d->id, mfn, page->u.inuse.type_info);
  62.259 +                            }
  62.260 +                        }
  62.261                      }
  62.262                  }
  62.263                  
  62.264 @@ -505,7 +525,7 @@ int audit_adjust_pgtables(struct domain 
  62.265                      adjust(page, 1);
  62.266  
  62.267                  if ( page->u.inuse.type_info & PGT_validated )
  62.268 -                    adjust_l1_page(mfn);
  62.269 +                    adjust_l1_page(snapshot_mfn);
  62.270  
  62.271                  break;
  62.272  
  62.273 @@ -520,7 +540,7 @@ int audit_adjust_pgtables(struct domain 
  62.274                  break;
  62.275  
  62.276              case PGT_writable_page:
  62.277 -                if ( shadow_enabled )
  62.278 +                if ( shadow_refcounts )
  62.279                  {
  62.280                      // In shadow mode, writable pages can get pinned by
  62.281                      // paravirtualized guests that think they are pinning
  62.282 @@ -589,6 +609,8 @@ void audit_pagelist(struct domain *d)
  62.283  
  62.284  void _audit_domain(struct domain *d, int flags)
  62.285  {
  62.286 +    int shadow_refcounts = !!shadow_mode_refcounts(d);
  62.287 +
  62.288      void scan_for_pfn_in_mfn(struct domain *d, unsigned long xmfn,
  62.289                               unsigned long mfn)
  62.290      {
  62.291 @@ -608,8 +630,29 @@ void _audit_domain(struct domain *d, int
  62.292          unmap_domain_mem(pt);           
  62.293      }
  62.294  
  62.295 +    void scan_for_pfn_in_grant_table(struct domain *d, unsigned xmfn)
  62.296 +    {
  62.297 +        int i;
  62.298 +        active_grant_entry_t *act = d->grant_table->active;
  62.299 +
  62.300 +        spin_lock(&d->grant_table->lock);
  62.301 +
  62.302 +        for ( i = 0; i < NR_GRANT_ENTRIES; i++ )
  62.303 +        {
  62.304 +            if ( act[i].pin && (act[i].frame == xmfn) )
  62.305 +            {
  62.306 +                printk("     found active grant table entry i=%d dom=%d pin=%d\n",
  62.307 +                       i, act[i].domid, act[i].pin);
  62.308 +            }
  62.309 +        }
  62.310 +
  62.311 +        spin_unlock(&d->grant_table->lock);
  62.312 +    }
  62.313 +
  62.314      void scan_for_pfn(struct domain *d, unsigned long xmfn)
  62.315      {
  62.316 +        scan_for_pfn_in_grant_table(d, xmfn);
  62.317 +
  62.318          if ( !shadow_mode_enabled(d) )
  62.319          {
  62.320              struct list_head *list_ent = d->page_list.next;
  62.321 @@ -688,7 +731,7 @@ void _audit_domain(struct domain *d, int
  62.322  
  62.323      // Maybe we should just be using BIGLOCK?
  62.324      //
  62.325 -    if ( !(flags & AUDIT_ALREADY_LOCKED) )
  62.326 +    if ( !(flags & AUDIT_SHADOW_ALREADY_LOCKED) )
  62.327          shadow_lock(d);
  62.328  
  62.329      spin_lock(&d->page_alloc_lock);
  62.330 @@ -716,7 +759,7 @@ void _audit_domain(struct domain *d, int
  62.331              errors++;
  62.332          }
  62.333  
  62.334 -        if ( shadow_mode_enabled(d) &&
  62.335 +        if ( shadow_mode_refcounts(d) &&
  62.336               (page_type == PGT_writable_page) &&
  62.337               !(page->u.inuse.type_info & PGT_validated) )
  62.338          {
  62.339 @@ -764,7 +807,9 @@ void _audit_domain(struct domain *d, int
  62.340                         mfn);
  62.341                  errors++;
  62.342              }
  62.343 -            if ( page_type != PGT_writable_page )
  62.344 +            if ( shadow_refcounts
  62.345 +                 ? (page_type != PGT_writable_page)
  62.346 +                 : !(page_type && (page_type <= PGT_l4_page_table)) )
  62.347              {
  62.348                  printk("out of sync page mfn=%lx has strange type "
  62.349                         "t=%08x c=%08x\n",
  62.350 @@ -821,7 +866,7 @@ void _audit_domain(struct domain *d, int
  62.351                         d->id, page->u.inuse.type_info, 
  62.352                         page->tlbflush_timestamp,
  62.353                         page->count_info, mfn);
  62.354 -                errors++;
  62.355 +                //errors++;
  62.356              }
  62.357              break;
  62.358          default:
  62.359 @@ -835,7 +880,7 @@ void _audit_domain(struct domain *d, int
  62.360                     page->count_info,
  62.361                     page->u.inuse.type_info, 
  62.362                     page->tlbflush_timestamp, mfn );
  62.363 -            errors++;
  62.364 +            //errors++;
  62.365              scan_for_pfn_remote(mfn);
  62.366          }
  62.367  
  62.368 @@ -870,6 +915,8 @@ void _audit_domain(struct domain *d, int
  62.369                                 d->id, page_to_pfn(page),
  62.370                                 page->u.inuse.type_info,
  62.371                                 page->count_info);
  62.372 +                        printk("a->gpfn_and_flags=%p\n",
  62.373 +                               (void *)a->gpfn_and_flags);
  62.374                          errors++;
  62.375                      }
  62.376                      break;
  62.377 @@ -905,7 +952,7 @@ void _audit_domain(struct domain *d, int
  62.378                 "pages=%d oos=%d l1=%d l2=%d ctot=%d ttot=%d\n",
  62.379                 d->id, page_count, oos_count, l1, l2, ctot, ttot);
  62.380  
  62.381 -    if ( !(flags & AUDIT_ALREADY_LOCKED) )
  62.382 +    if ( !(flags & AUDIT_SHADOW_ALREADY_LOCKED) )
  62.383          shadow_unlock(d);
  62.384  
  62.385      if ( d != current->domain )
    63.1 --- a/xen/arch/x86/dom0_ops.c	Fri May 06 17:04:27 2005 +0000
    63.2 +++ b/xen/arch/x86/dom0_ops.c	Wed May 11 13:43:50 2005 +0000
    63.3 @@ -397,7 +397,7 @@ void arch_getdomaininfo_ctxt(
    63.4  #endif
    63.5  
    63.6      c->flags = 0;
    63.7 -    if ( test_bit(EDF_DONEFPUINIT, &ed->ed_flags) )
    63.8 +    if ( test_bit(EDF_DONEFPUINIT, &ed->flags) )
    63.9          c->flags |= VGCF_I387_VALID;
   63.10      if ( KERNEL_MODE(ed, &ed->arch.guest_context.user_regs) )
   63.11          c->flags |= VGCF_IN_KERNEL;
    64.1 --- a/xen/arch/x86/domain.c	Fri May 06 17:04:27 2005 +0000
    64.2 +++ b/xen/arch/x86/domain.c	Wed May 11 13:43:50 2005 +0000
    64.3 @@ -252,7 +252,7 @@ void arch_do_createdomain(struct exec_do
    64.4  
    64.5          d->shared_info = (void *)alloc_xenheap_page();
    64.6          memset(d->shared_info, 0, PAGE_SIZE);
    64.7 -        ed->vcpu_info = &d->shared_info->vcpu_data[ed->eid];
    64.8 +        ed->vcpu_info = &d->shared_info->vcpu_data[ed->id];
    64.9          SHARE_PFN_WITH_DOMAIN(virt_to_page(d->shared_info), d);
   64.10          machine_to_phys_mapping[virt_to_phys(d->shared_info) >> 
   64.11                                 PAGE_SHIFT] = INVALID_M2P_ENTRY;
   64.12 @@ -294,7 +294,7 @@ void arch_do_boot_vcpu(struct exec_domai
   64.13      struct domain *d = ed->domain;
   64.14      ed->arch.schedule_tail = d->exec_domain[0]->arch.schedule_tail;
   64.15      ed->arch.perdomain_ptes = 
   64.16 -        d->arch.mm_perdomain_pt + (ed->eid << PDPT_VCPU_SHIFT);
   64.17 +        d->arch.mm_perdomain_pt + (ed->id << PDPT_VCPU_SHIFT);
   64.18      ed->arch.flags = TF_kernel_mode;
   64.19  }
   64.20  
   64.21 @@ -364,7 +364,8 @@ static int vmx_final_setup_guest(
   64.22  
   64.23          /* Put the domain in shadow mode even though we're going to be using
   64.24           * the shared 1:1 page table initially. It shouldn't hurt */
   64.25 -        shadow_mode_enable(ed->domain, SHM_enable|SHM_translate|SHM_external);
   64.26 +        shadow_mode_enable(ed->domain,
   64.27 +                           SHM_enable|SHM_refcounts|SHM_translate|SHM_external);
   64.28      }
   64.29  
   64.30      return 0;
   64.31 @@ -397,9 +398,9 @@ int arch_set_info_guest(
   64.32                  return -EINVAL;
   64.33      }
   64.34  
   64.35 -    clear_bit(EDF_DONEFPUINIT, &ed->ed_flags);
   64.36 +    clear_bit(EDF_DONEFPUINIT, &ed->flags);
   64.37      if ( c->flags & VGCF_I387_VALID )
   64.38 -        set_bit(EDF_DONEFPUINIT, &ed->ed_flags);
   64.39 +        set_bit(EDF_DONEFPUINIT, &ed->flags);
   64.40  
   64.41      ed->arch.flags &= ~TF_kernel_mode;
   64.42      if ( c->flags & VGCF_IN_KERNEL )
   64.43 @@ -415,7 +416,7 @@ int arch_set_info_guest(
   64.44      if ( !IS_PRIV(d) )
   64.45          ed->arch.guest_context.user_regs.eflags &= 0xffffcfff;
   64.46  
   64.47 -    if ( test_bit(EDF_DONEINIT, &ed->ed_flags) )
   64.48 +    if ( test_bit(EDF_DONEINIT, &ed->flags) )
   64.49          return 0;
   64.50  
   64.51      if ( (rc = (int)set_fast_trap(ed, c->fast_trap_idx)) != 0 )
   64.52 @@ -426,13 +427,13 @@ int arch_set_info_guest(
   64.53      for ( i = 0; i < 8; i++ )
   64.54          (void)set_debugreg(ed, i, c->debugreg[i]);
   64.55  
   64.56 -    if ( ed->eid == 0 )
   64.57 +    if ( ed->id == 0 )
   64.58          d->vm_assist = c->vm_assist;
   64.59  
   64.60      phys_basetab = c->pt_base;
   64.61      ed->arch.guest_table = mk_pagetable(phys_basetab);
   64.62  
   64.63 -    if ( shadow_mode_enabled(d) )
   64.64 +    if ( shadow_mode_refcounts(d) )
   64.65      {
   64.66          if ( !get_page(&frame_table[phys_basetab>>PAGE_SHIFT], d) )
   64.67              return -EINVAL;
   64.68 @@ -478,7 +479,7 @@ int arch_set_info_guest(
   64.69      update_pagetables(ed);
   64.70      
   64.71      /* Don't redo final setup */
   64.72 -    set_bit(EDF_DONEINIT, &ed->ed_flags);
   64.73 +    set_bit(EDF_DONEINIT, &ed->flags);
   64.74  
   64.75      return 0;
   64.76  }
   64.77 @@ -796,7 +797,7 @@ void context_switch(struct exec_domain *
   64.78       * 'prev' (after this point, a dying domain's info structure may be freed
   64.79       * without warning). 
   64.80       */
   64.81 -    clear_bit(EDF_RUNNING, &prev->ed_flags);
   64.82 +    clear_bit(EDF_RUNNING, &prev->flags);
   64.83  
   64.84      schedule_tail(next);
   64.85      BUG();
   64.86 @@ -981,17 +982,21 @@ void domain_relinquish_resources(struct 
   64.87      {
   64.88          if ( pagetable_val(ed->arch.guest_table) != 0 )
   64.89          {
   64.90 -            (shadow_mode_enabled(d) ? put_page : put_page_and_type)
   64.91 -                (&frame_table[pagetable_val(
   64.92 -                    ed->arch.guest_table) >> PAGE_SHIFT]);
   64.93 +            if ( shadow_mode_refcounts(d) )
   64.94 +                put_page(&frame_table[pagetable_get_pfn(ed->arch.guest_table)]);
   64.95 +            else
   64.96 +                put_page_and_type(&frame_table[pagetable_get_pfn(ed->arch.guest_table)]);
   64.97 +
   64.98              ed->arch.guest_table = mk_pagetable(0);
   64.99          }
  64.100  
  64.101          if ( pagetable_val(ed->arch.guest_table_user) != 0 )
  64.102          {
  64.103 -            (shadow_mode_enabled(d) ? put_page : put_page_and_type)
  64.104 -                (&frame_table[pagetable_val(
  64.105 -                    ed->arch.guest_table_user) >> PAGE_SHIFT]);
  64.106 +            if ( shadow_mode_refcounts(d) )
  64.107 +                put_page(&frame_table[pagetable_get_pfn(ed->arch.guest_table_user)]);
  64.108 +            else
  64.109 +                put_page_and_type(&frame_table[pagetable_get_pfn(ed->arch.guest_table_user)]);
  64.110 +
  64.111              ed->arch.guest_table_user = mk_pagetable(0);
  64.112          }
  64.113  
    65.1 --- a/xen/arch/x86/domain_build.c	Fri May 06 17:04:27 2005 +0000
    65.2 +++ b/xen/arch/x86/domain_build.c	Wed May 11 13:43:50 2005 +0000
    65.3 @@ -114,7 +114,7 @@ int construct_dom0(struct domain *d,
    65.4      /* Sanity! */
    65.5      if ( d->id != 0 ) 
    65.6          BUG();
    65.7 -    if ( test_bit(DF_CONSTRUCTED, &d->d_flags) ) 
    65.8 +    if ( test_bit(DF_CONSTRUCTED, &d->flags) ) 
    65.9          BUG();
   65.10  
   65.11      memset(&dsi, 0, sizeof(struct domain_setup_info));
   65.12 @@ -540,14 +540,14 @@ int construct_dom0(struct domain *d,
   65.13      /* DOM0 gets access to everything. */
   65.14      physdev_init_dom0(d);
   65.15  
   65.16 -    set_bit(DF_CONSTRUCTED, &d->d_flags);
   65.17 +    set_bit(DF_CONSTRUCTED, &d->flags);
   65.18  
   65.19      new_thread(ed, dsi.v_kernentry, vstack_end, vstartinfo_start);
   65.20  
   65.21      if ( opt_dom0_shadow || opt_dom0_translate )
   65.22      {
   65.23          shadow_mode_enable(d, (opt_dom0_translate
   65.24 -                               ? SHM_enable | SHM_translate
   65.25 +                               ? SHM_enable | SHM_refcounts | SHM_translate
   65.26                                 : SHM_enable));
   65.27          if ( opt_dom0_translate )
   65.28          {
   65.29 @@ -570,7 +570,7 @@ int construct_dom0(struct domain *d,
   65.30              idle_pg_table[1] = root_create_phys(pagetable_val(d->arch.phys_table),
   65.31                                                  __PAGE_HYPERVISOR);
   65.32              translate_l2pgtable(d, (l1_pgentry_t *)(1u << L2_PAGETABLE_SHIFT),
   65.33 -                                pagetable_val(ed->arch.guest_table) >> PAGE_SHIFT);
   65.34 +                                pagetable_get_pfn(ed->arch.guest_table));
   65.35              idle_pg_table[1] = root_empty();
   65.36              local_flush_tlb();
   65.37          }
    66.1 --- a/xen/arch/x86/i387.c	Fri May 06 17:04:27 2005 +0000
    66.2 +++ b/xen/arch/x86/i387.c	Wed May 11 13:43:50 2005 +0000
    66.3 @@ -18,7 +18,7 @@ void init_fpu(void)
    66.4      __asm__ __volatile__ ( "fninit" );
    66.5      if ( cpu_has_xmm )
    66.6          load_mxcsr(0x1f80);
    66.7 -    set_bit(EDF_DONEFPUINIT, &current->ed_flags);
    66.8 +    set_bit(EDF_DONEFPUINIT, &current->flags);
    66.9  }
   66.10  
   66.11  void save_init_fpu(struct exec_domain *tsk)
   66.12 @@ -28,7 +28,7 @@ void save_init_fpu(struct exec_domain *t
   66.13       * This causes us to set the real flag, so we'll need
   66.14       * to temporarily clear it while saving f-p state.
   66.15       */
   66.16 -    if ( test_bit(EDF_GUEST_STTS, &tsk->ed_flags) )
   66.17 +    if ( test_bit(EDF_GUEST_STTS, &tsk->flags) )
   66.18          clts();
   66.19  
   66.20      if ( cpu_has_fxsr )
   66.21 @@ -40,7 +40,7 @@ void save_init_fpu(struct exec_domain *t
   66.22              "fnsave %0 ; fwait"
   66.23              : "=m" (tsk->arch.guest_context.fpu_ctxt) );
   66.24  
   66.25 -    clear_bit(EDF_USEDFPU, &tsk->ed_flags);
   66.26 +    clear_bit(EDF_USEDFPU, &tsk->flags);
   66.27      stts();
   66.28  }
   66.29  
    67.1 --- a/xen/arch/x86/i8259.c	Fri May 06 17:04:27 2005 +0000
    67.2 +++ b/xen/arch/x86/i8259.c	Wed May 11 13:43:50 2005 +0000
    67.3 @@ -45,7 +45,7 @@ BUILD_COMMON_IRQ()
    67.4  
    67.5  /*
    67.6   * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts:
    67.7 - * (these are usually mapped to vectors 0x30-0x3f)
    67.8 + * (these are usually mapped to vectors 0x20-0x2f)
    67.9   */
   67.10  BUILD_16_IRQS(0x0)
   67.11  
   67.12 @@ -58,7 +58,7 @@ BUILD_16_IRQS(0x0)
   67.13   * of these. Plus, more powerful systems might have more than 64
   67.14   * IO-APIC registers.
   67.15   *
   67.16 - * (these are usually mapped into the 0x30-0xff vector range)
   67.17 + * (these are usually mapped into the 0x20-0xff vector range)
   67.18   */
   67.19  BUILD_16_IRQS(0x1) BUILD_16_IRQS(0x2) BUILD_16_IRQS(0x3)
   67.20  BUILD_16_IRQS(0x4) BUILD_16_IRQS(0x5) BUILD_16_IRQS(0x6) BUILD_16_IRQS(0x7)
   67.21 @@ -101,7 +101,7 @@ BUILD_SMP_INTERRUPT(spurious_interrupt,S
   67.22  	IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \
   67.23  	IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f)
   67.24  
   67.25 -    void *interrupt[NR_IRQS] = {
   67.26 +    void (*interrupt[NR_IRQS])(void) = {
   67.27  	IRQLIST_16(0x0),
   67.28  
   67.29  #ifdef CONFIG_X86_IO_APIC
   67.30 @@ -341,7 +341,7 @@ void __init init_8259A(int auto_eoi)
   67.31       * outb_p - this has to work on a wide range of PC hardware.
   67.32       */
   67.33      outb_p(0x11, 0x20);	/* ICW1: select 8259A-1 init */
   67.34 -    outb_p(0x30 + 0, 0x21);	/* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 */
   67.35 +    outb_p(0x20 + 0, 0x21);	/* ICW2: 8259A-1 IR0-7 mapped to 0x20-0x27 */
   67.36      outb_p(0x04, 0x21);	/* 8259A-1 (the master) has a slave on IR2 */
   67.37      if (auto_eoi)
   67.38          outb_p(0x03, 0x21);	/* master does Auto EOI */
   67.39 @@ -349,7 +349,7 @@ void __init init_8259A(int auto_eoi)
   67.40          outb_p(0x01, 0x21);	/* master expects normal EOI */
   67.41  
   67.42      outb_p(0x11, 0xA0);	/* ICW1: select 8259A-2 init */
   67.43 -    outb_p(0x30 + 8, 0xA1);	/* ICW2: 8259A-2 IR0-7 mapped to 0x38-0x3f */
   67.44 +    outb_p(0x20 + 8, 0xA1);	/* ICW2: 8259A-2 IR0-7 mapped to 0x28-0x2f */
   67.45      outb_p(0x02, 0xA1);	/* 8259A-2 is a slave on master's IR2 */
   67.46      outb_p(0x01, 0xA1);	/* (slave's support for AEOI in flat mode
   67.47                             is to be investigated) */
    68.1 --- a/xen/arch/x86/idle0_task.c	Fri May 06 17:04:27 2005 +0000
    68.2 +++ b/xen/arch/x86/idle0_task.c	Wed May 11 13:43:50 2005 +0000
    68.3 @@ -5,7 +5,7 @@
    68.4  
    68.5  struct domain idle0_domain = {
    68.6      id:          IDLE_DOMAIN_ID,
    68.7 -    d_flags:     1<<DF_IDLETASK,
    68.8 +    flags:       1<<DF_IDLETASK,
    68.9      refcnt:      ATOMIC_INIT(1)
   68.10  };
   68.11  
    69.1 --- a/xen/arch/x86/io_apic.c	Fri May 06 17:04:27 2005 +0000
    69.2 +++ b/xen/arch/x86/io_apic.c	Wed May 11 13:43:50 2005 +0000
    69.3 @@ -25,26 +25,20 @@
    69.4  #include <xen/irq.h>
    69.5  #include <xen/delay.h>
    69.6  #include <xen/sched.h>
    69.7 -#include <xen/config.h>
    69.8 +#include <xen/acpi.h>
    69.9 +#include <asm/io.h>
   69.10  #include <asm/mc146818rtc.h>
   69.11 -#include <asm/io.h>
   69.12 -#include <asm/mpspec.h>
   69.13 -#include <asm/io_apic.h>
   69.14  #include <asm/smp.h>
   69.15  #include <asm/desc.h>
   69.16 -#include <asm/smpboot.h>
   69.17 -
   69.18 -#ifdef CONFIG_X86_IO_APIC
   69.19 -
   69.20 -#undef APIC_LOCKUP_DEBUG
   69.21 +#include <mach_apic.h>
   69.22 +#include <io_ports.h>
   69.23  
   69.24 -#define APIC_LOCKUP_DEBUG
   69.25 +int (*ioapic_renumber_irq)(int ioapic, int irq);
   69.26 +atomic_t irq_mis_count;
   69.27  
   69.28 -static spinlock_t ioapic_lock = SPIN_LOCK_UNLOCKED;
   69.29 +static DEFINE_SPINLOCK(ioapic_lock);
   69.30  
   69.31 -unsigned int int_dest_addr_mode = APIC_DEST_LOGICAL;
   69.32 -unsigned char int_delivery_mode = dest_LowestPrio;
   69.33 -
   69.34 +int skip_ioapic_setup;
   69.35  
   69.36  /*
   69.37   * # of IRQ routing registers
   69.38 @@ -69,12 +63,20 @@ static struct irq_pin_list {
   69.39  	int apic, pin, next;
   69.40  } irq_2_pin[PIN_MAP_SIZE];
   69.41  
   69.42 +int vector_irq[NR_VECTORS] = { [0 ... NR_VECTORS - 1] = -1};
   69.43 +#ifdef CONFIG_PCI_MSI
   69.44 +#define vector_to_irq(vector) 	\
   69.45 +	(platform_legacy_irq(vector) ? vector : vector_irq[vector])
   69.46 +#else
   69.47 +#define vector_to_irq(vector)	(vector)
   69.48 +#endif
   69.49 +
   69.50  /*
   69.51   * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
   69.52   * shared ISA-space IRQs, so we have to support them. We are super
   69.53   * fast in the common case, and fast for shared ISA-space IRQs.
   69.54   */
   69.55 -static void __init add_pin_to_irq(unsigned int irq, int apic, int pin)
   69.56 +static void add_pin_to_irq(unsigned int irq, int apic, int pin)
   69.57  {
   69.58  	static int first_free_entry = NR_IRQS;
   69.59  	struct irq_pin_list *entry = irq_2_pin + irq;
   69.60 @@ -112,36 +114,48 @@ static void __init replace_pin_at_irq(un
   69.61  	}
   69.62  }
   69.63  
   69.64 -#define __DO_ACTION(R, ACTION, FINAL)					\
   69.65 -									\
   69.66 -{									\
   69.67 -	int pin;							\
   69.68 -	struct irq_pin_list *entry = irq_2_pin + irq;			\
   69.69 -									\
   69.70 -	for (;;) {							\
   69.71 -		unsigned int reg;					\
   69.72 -		pin = entry->pin;					\
   69.73 -		if (pin == -1)						\
   69.74 -			break;						\
   69.75 -		reg = io_apic_read(entry->apic, 0x10 + R + pin*2);	\
   69.76 -		reg ACTION;						\
   69.77 -		io_apic_write(entry->apic, 0x10 + R + pin*2, reg);	\
   69.78 -		if (!entry->next)					\
   69.79 -			break;						\
   69.80 -		entry = irq_2_pin + entry->next;			\
   69.81 -	}								\
   69.82 -	FINAL;								\
   69.83 +static void __modify_IO_APIC_irq (unsigned int irq, unsigned long enable, unsigned long disable)
   69.84 +{
   69.85 +	struct irq_pin_list *entry = irq_2_pin + irq;
   69.86 +	unsigned int pin, reg;
   69.87 +
   69.88 +	for (;;) {
   69.89 +		pin = entry->pin;
   69.90 +		if (pin == -1)
   69.91 +			break;
   69.92 +		reg = io_apic_read(entry->apic, 0x10 + pin*2);
   69.93 +		reg &= ~disable;
   69.94 +		reg |= enable;
   69.95 +		io_apic_modify(entry->apic, 0x10 + pin*2, reg);
   69.96 +		if (!entry->next)
   69.97 +			break;
   69.98 +		entry = irq_2_pin + entry->next;
   69.99 +	}
  69.100  }
  69.101  
  69.102 -#define DO_ACTION(name,R,ACTION, FINAL)					\
  69.103 -									\
  69.104 -	static void name##_IO_APIC_irq (unsigned int irq)		\
  69.105 -	__DO_ACTION(R, ACTION, FINAL)
  69.106 +/* mask = 1 */
  69.107 +static void __mask_IO_APIC_irq (unsigned int irq)
  69.108 +{
  69.109 +	__modify_IO_APIC_irq(irq, 0x00010000, 0);
  69.110 +}
  69.111  
  69.112 -DO_ACTION( __mask,    0, |= 0x00010000, io_apic_sync(entry->apic) )
  69.113 -DO_ACTION( __unmask,  0, &= 0xfffeffff, )
  69.114 -DO_ACTION( __edge,    0, &= 0xffff7fff, )
  69.115 -DO_ACTION( __level,   0, |= 0x00008000, )
  69.116 +/* mask = 0 */
  69.117 +static void __unmask_IO_APIC_irq (unsigned int irq)
  69.118 +{
  69.119 +	__modify_IO_APIC_irq(irq, 0, 0x00010000);
  69.120 +}
  69.121 +
  69.122 +/* trigger = 0 */
  69.123 +static void __edge_IO_APIC_irq (unsigned int irq)
  69.124 +{
  69.125 +	__modify_IO_APIC_irq(irq, 0, 0x00008000);
  69.126 +}
  69.127 +
  69.128 +/* trigger = 1 */
  69.129 +static void __level_IO_APIC_irq (unsigned int irq)
  69.130 +{
  69.131 +	__modify_IO_APIC_irq(irq, 0x00008000, 0);
  69.132 +}
  69.133  
  69.134  static void mask_IO_APIC_irq (unsigned int irq)
  69.135  {
  69.136 @@ -165,7 +179,7 @@ void clear_IO_APIC_pin(unsigned int apic
  69.137  {
  69.138  	struct IO_APIC_route_entry entry;
  69.139  	unsigned long flags;
  69.140 -
  69.141 +	
  69.142  	/* Check delivery_mode to be sure we're not clearing an SMI pin */
  69.143  	spin_lock_irqsave(&ioapic_lock, flags);
  69.144  	*(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
  69.145 @@ -194,85 +208,467 @@ static void clear_IO_APIC (void)
  69.146  			clear_IO_APIC_pin(apic, pin);
  69.147  }
  69.148  
  69.149 -static void set_ioapic_affinity (unsigned int irq, unsigned long mask)
  69.150 +static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
  69.151  {
  69.152  	unsigned long flags;
  69.153 -
  69.154 -	/*
  69.155 -	 * Only the first 8 bits are valid.
  69.156 -	 */
  69.157 -	mask = mask << 24;
  69.158 +	int pin;
  69.159 +	struct irq_pin_list *entry = irq_2_pin + irq;
  69.160 +	unsigned int apicid_value;
  69.161 +	
  69.162 +	apicid_value = cpu_mask_to_apicid(cpumask);
  69.163 +	/* Prepare to do the io_apic_write */
  69.164 +	apicid_value = apicid_value << 24;
  69.165  	spin_lock_irqsave(&ioapic_lock, flags);
  69.166 -	__DO_ACTION(1, = mask, )
  69.167 +	for (;;) {
  69.168 +		pin = entry->pin;
  69.169 +		if (pin == -1)
  69.170 +			break;
  69.171 +		io_apic_write(entry->apic, 0x10 + 1 + pin*2, apicid_value);
  69.172 +		if (!entry->next)
  69.173 +			break;
  69.174 +		entry = irq_2_pin + entry->next;
  69.175 +	}
  69.176  	spin_unlock_irqrestore(&ioapic_lock, flags);
  69.177  }
  69.178  
  69.179 -#define balance_irq(_irq) ((void)0)
  69.180 -
  69.181 -/*
  69.182 - * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
  69.183 - * specific CPU-side IRQs.
  69.184 - */
  69.185 -
  69.186 -#define MAX_PIRQS 8
  69.187 -int pirq_entries [MAX_PIRQS];
  69.188 -int pirqs_enabled;
  69.189 +#if defined(CONFIG_IRQBALANCE)
  69.190 +# include <asm/processor.h>	/* kernel_thread() */
  69.191 +# include <xen/kernel_stat.h>	/* kstat */
  69.192 +# include <xen/slab.h>		/* kmalloc() */
  69.193 +# include <xen/timer.h>	/* time_after() */
  69.194 + 
  69.195 +# ifdef CONFIG_BALANCED_IRQ_DEBUG
  69.196 +#  define TDprintk(x...) do { printk("<%ld:%s:%d>: ", jiffies, __FILE__, __LINE__); printk(x); } while (0)
  69.197 +#  define Dprintk(x...) do { TDprintk(x); } while (0)
  69.198 +# else
  69.199 +#  define TDprintk(x...) 
  69.200 +#  define Dprintk(x...) 
  69.201 +# endif
  69.202  
  69.203 -int skip_ioapic_setup;
  69.204 -#if 0
  69.205 +cpumask_t __cacheline_aligned pending_irq_balance_cpumask[NR_IRQS];
  69.206  
  69.207 -static int __init noioapic_setup(char *str)
  69.208 +#define IRQBALANCE_CHECK_ARCH -999
  69.209 +static int irqbalance_disabled = IRQBALANCE_CHECK_ARCH;
  69.210 +static int physical_balance = 0;
  69.211 +
  69.212 +struct irq_cpu_info {
  69.213 +	unsigned long * last_irq;
  69.214 +	unsigned long * irq_delta;
  69.215 +	unsigned long irq;
  69.216 +} irq_cpu_data[NR_CPUS];
  69.217 +
  69.218 +#define CPU_IRQ(cpu)		(irq_cpu_data[cpu].irq)
  69.219 +#define LAST_CPU_IRQ(cpu,irq)   (irq_cpu_data[cpu].last_irq[irq])
  69.220 +#define IRQ_DELTA(cpu,irq) 	(irq_cpu_data[cpu].irq_delta[irq])
  69.221 +
  69.222 +#define IDLE_ENOUGH(cpu,now) \
  69.223 +		(idle_cpu(cpu) && ((now) - irq_stat[(cpu)].idle_timestamp > 1))
  69.224 +
  69.225 +#define IRQ_ALLOWED(cpu, allowed_mask)	cpu_isset(cpu, allowed_mask)
  69.226 +
  69.227 +#define CPU_TO_PACKAGEINDEX(i) (first_cpu(cpu_sibling_map[i]))
  69.228 +
  69.229 +#define MAX_BALANCED_IRQ_INTERVAL	(5*HZ)
  69.230 +#define MIN_BALANCED_IRQ_INTERVAL	(HZ/2)
  69.231 +#define BALANCED_IRQ_MORE_DELTA		(HZ/10)
  69.232 +#define BALANCED_IRQ_LESS_DELTA		(HZ)
  69.233 +
  69.234 +long balanced_irq_interval = MAX_BALANCED_IRQ_INTERVAL;
  69.235 +
  69.236 +static unsigned long move(int curr_cpu, cpumask_t allowed_mask,
  69.237 +			unsigned long now, int direction)
  69.238  {
  69.239 -	skip_ioapic_setup = 1;
  69.240 -	return 1;
  69.241 -}
  69.242 +	int search_idle = 1;
  69.243 +	int cpu = curr_cpu;
  69.244  
  69.245 -__setup("noapic", noioapic_setup);
  69.246 +	goto inside;
  69.247  
  69.248 -static int __init ioapic_setup(char *str)
  69.249 -{
  69.250 -	skip_ioapic_setup = 0;
  69.251 -	return 1;
  69.252 +	do {
  69.253 +		if (unlikely(cpu == curr_cpu))
  69.254 +			search_idle = 0;
  69.255 +inside:
  69.256 +		if (direction == 1) {
  69.257 +			cpu++;
  69.258 +			if (cpu >= NR_CPUS)
  69.259 +				cpu = 0;
  69.260 +		} else {
  69.261 +			cpu--;
  69.262 +			if (cpu == -1)
  69.263 +				cpu = NR_CPUS-1;
  69.264 +		}
  69.265 +	} while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu,allowed_mask) ||
  69.266 +			(search_idle && !IDLE_ENOUGH(cpu,now)));
  69.267 +
  69.268 +	return cpu;
  69.269  }
  69.270  
  69.271 -__setup("apic", ioapic_setup);
  69.272 -
  69.273 -
  69.274 -
  69.275 -static int __init ioapic_pirq_setup(char *str)
  69.276 +static inline void balance_irq(int cpu, int irq)
  69.277  {
  69.278 -	int i, max;
  69.279 -	int ints[MAX_PIRQS+1];
  69.280 -
  69.281 -	get_options(str, ARRAY_SIZE(ints), ints);
  69.282 -
  69.283 -	for (i = 0; i < MAX_PIRQS; i++)
  69.284 -		pirq_entries[i] = -1;
  69.285 +	unsigned long now = jiffies;
  69.286 +	cpumask_t allowed_mask;
  69.287 +	unsigned int new_cpu;
  69.288 +		
  69.289 +	if (irqbalance_disabled)
  69.290 +		return; 
  69.291  
  69.292 -	pirqs_enabled = 1;
  69.293 -	printk(KERN_INFO "PIRQ redirection, working around broken MP-BIOS.\n");
  69.294 -	max = MAX_PIRQS;
  69.295 -	if (ints[0] < MAX_PIRQS)
  69.296 -		max = ints[0];
  69.297 +	cpus_and(allowed_mask, cpu_online_map, irq_affinity[irq]);
  69.298 +	new_cpu = move(cpu, allowed_mask, now, 1);
  69.299 +	if (cpu != new_cpu) {
  69.300 +		irq_desc_t *desc = irq_desc + irq;
  69.301 +		unsigned long flags;
  69.302  
  69.303 -	for (i = 0; i < max; i++) {
  69.304 -		printk(KERN_DEBUG "... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
  69.305 -		/*
  69.306 -		 * PIRQs are mapped upside down, usually.
  69.307 -		 */
  69.308 -		pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
  69.309 +		spin_lock_irqsave(&desc->lock, flags);
  69.310 +		pending_irq_balance_cpumask[irq] = cpumask_of_cpu(new_cpu);
  69.311 +		spin_unlock_irqrestore(&desc->lock, flags);
  69.312  	}
  69.313 -	return 1;
  69.314  }
  69.315  
  69.316 -__setup("pirq=", ioapic_pirq_setup);
  69.317 +static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold)
  69.318 +{
  69.319 +	int i, j;
  69.320 +	Dprintk("Rotating IRQs among CPUs.\n");
  69.321 +	for (i = 0; i < NR_CPUS; i++) {
  69.322 +		for (j = 0; cpu_online(i) && (j < NR_IRQS); j++) {
  69.323 +			if (!irq_desc[j].action)
  69.324 +				continue;
  69.325 +			/* Is it a significant load ?  */
  69.326 +			if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i),j) <
  69.327 +						useful_load_threshold)
  69.328 +				continue;
  69.329 +			balance_irq(i, j);
  69.330 +		}
  69.331 +	}
  69.332 +	balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
  69.333 +		balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);	
  69.334 +	return;
  69.335 +}
  69.336  
  69.337 -#endif
  69.338 +static void do_irq_balance(void)
  69.339 +{
  69.340 +	int i, j;
  69.341 +	unsigned long max_cpu_irq = 0, min_cpu_irq = (~0);
  69.342 +	unsigned long move_this_load = 0;
  69.343 +	int max_loaded = 0, min_loaded = 0;
  69.344 +	int load;
  69.345 +	unsigned long useful_load_threshold = balanced_irq_interval + 10;
  69.346 +	int selected_irq;
  69.347 +	int tmp_loaded, first_attempt = 1;
  69.348 +	unsigned long tmp_cpu_irq;
  69.349 +	unsigned long imbalance = 0;
  69.350 +	cpumask_t allowed_mask, target_cpu_mask, tmp;
  69.351 +
  69.352 +	for (i = 0; i < NR_CPUS; i++) {
  69.353 +		int package_index;
  69.354 +		CPU_IRQ(i) = 0;
  69.355 +		if (!cpu_online(i))
  69.356 +			continue;
  69.357 +		package_index = CPU_TO_PACKAGEINDEX(i);
  69.358 +		for (j = 0; j < NR_IRQS; j++) {
  69.359 +			unsigned long value_now, delta;
  69.360 +			/* Is this an active IRQ? */
  69.361 +			if (!irq_desc[j].action)
  69.362 +				continue;
  69.363 +			if ( package_index == i )
  69.364 +				IRQ_DELTA(package_index,j) = 0;
  69.365 +			/* Determine the total count per processor per IRQ */
  69.366 +			value_now = (unsigned long) kstat_cpu(i).irqs[j];
  69.367 +
  69.368 +			/* Determine the activity per processor per IRQ */
  69.369 +			delta = value_now - LAST_CPU_IRQ(i,j);
  69.370 +
  69.371 +			/* Update last_cpu_irq[][] for the next time */
  69.372 +			LAST_CPU_IRQ(i,j) = value_now;
  69.373 +
  69.374 +			/* Ignore IRQs whose rate is less than the clock */
  69.375 +			if (delta < useful_load_threshold)
  69.376 +				continue;
  69.377 +			/* update the load for the processor or package total */
  69.378 +			IRQ_DELTA(package_index,j) += delta;
  69.379 +
  69.380 +			/* Keep track of the higher numbered sibling as well */
  69.381 +			if (i != package_index)
  69.382 +				CPU_IRQ(i) += delta;
  69.383 +			/*
  69.384 +			 * We have sibling A and sibling B in the package
  69.385 +			 *
  69.386 +			 * cpu_irq[A] = load for cpu A + load for cpu B
  69.387 +			 * cpu_irq[B] = load for cpu B
  69.388 +			 */
  69.389 +			CPU_IRQ(package_index) += delta;
  69.390 +		}
  69.391 +	}
  69.392 +	/* Find the least loaded processor package */
  69.393 +	for (i = 0; i < NR_CPUS; i++) {
  69.394 +		if (!cpu_online(i))
  69.395 +			continue;
  69.396 +		if (i != CPU_TO_PACKAGEINDEX(i))
  69.397 +			continue;
  69.398 +		if (min_cpu_irq > CPU_IRQ(i)) {
  69.399 +			min_cpu_irq = CPU_IRQ(i);
  69.400 +			min_loaded = i;
  69.401 +		}
  69.402 +	}
  69.403 +	max_cpu_irq = ULONG_MAX;
  69.404 +
  69.405 +tryanothercpu:
  69.406 +	/* Look for heaviest loaded processor.
  69.407 +	 * We may come back to get the next heaviest loaded processor.
  69.408 +	 * Skip processors with trivial loads.
  69.409 +	 */
  69.410 +	tmp_cpu_irq = 0;
  69.411 +	tmp_loaded = -1;
  69.412 +	for (i = 0; i < NR_CPUS; i++) {
  69.413 +		if (!cpu_online(i))
  69.414 +			continue;
  69.415 +		if (i != CPU_TO_PACKAGEINDEX(i))
  69.416 +			continue;
  69.417 +		if (max_cpu_irq <= CPU_IRQ(i)) 
  69.418 +			continue;
  69.419 +		if (tmp_cpu_irq < CPU_IRQ(i)) {
  69.420 +			tmp_cpu_irq = CPU_IRQ(i);
  69.421 +			tmp_loaded = i;
  69.422 +		}
  69.423 +	}
  69.424 +
  69.425 +	if (tmp_loaded == -1) {
  69.426 + 	 /* In the case of small number of heavy interrupt sources, 
  69.427 +	  * loading some of the cpus too much. We use Ingo's original 
  69.428 +	  * approach to rotate them around.
  69.429 +	  */
  69.430 +		if (!first_attempt && imbalance >= useful_load_threshold) {
  69.431 +			rotate_irqs_among_cpus(useful_load_threshold);
  69.432 +			return;
  69.433 +		}
  69.434 +		goto not_worth_the_effort;
  69.435 +	}
  69.436 +	
  69.437 +	first_attempt = 0;		/* heaviest search */
  69.438 +	max_cpu_irq = tmp_cpu_irq;	/* load */
  69.439 +	max_loaded = tmp_loaded;	/* processor */
  69.440 +	imbalance = (max_cpu_irq - min_cpu_irq) / 2;
  69.441 +	
  69.442 +	Dprintk("max_loaded cpu = %d\n", max_loaded);
  69.443 +	Dprintk("min_loaded cpu = %d\n", min_loaded);
  69.444 +	Dprintk("max_cpu_irq load = %ld\n", max_cpu_irq);
  69.445 +	Dprintk("min_cpu_irq load = %ld\n", min_cpu_irq);
  69.446 +	Dprintk("load imbalance = %lu\n", imbalance);
  69.447 +
  69.448 +	/* if imbalance is less than approx 10% of max load, then
  69.449 +	 * observe diminishing returns action. - quit
  69.450 +	 */
  69.451 +	if (imbalance < (max_cpu_irq >> 3)) {
  69.452 +		Dprintk("Imbalance too trivial\n");
  69.453 +		goto not_worth_the_effort;
  69.454 +	}
  69.455 +
  69.456 +tryanotherirq:
  69.457 +	/* if we select an IRQ to move that can't go where we want, then
  69.458 +	 * see if there is another one to try.
  69.459 +	 */
  69.460 +	move_this_load = 0;
  69.461 +	selected_irq = -1;
  69.462 +	for (j = 0; j < NR_IRQS; j++) {
  69.463 +		/* Is this an active IRQ? */
  69.464 +		if (!irq_desc[j].action)
  69.465 +			continue;
  69.466 +		if (imbalance <= IRQ_DELTA(max_loaded,j))
  69.467 +			continue;
  69.468 +		/* Try to find the IRQ that is closest to the imbalance
  69.469 +		 * without going over.
  69.470 +		 */
  69.471 +		if (move_this_load < IRQ_DELTA(max_loaded,j)) {
  69.472 +			move_this_load = IRQ_DELTA(max_loaded,j);
  69.473 +			selected_irq = j;
  69.474 +		}
  69.475 +	}
  69.476 +	if (selected_irq == -1) {
  69.477 +		goto tryanothercpu;
  69.478 +	}
  69.479 +
  69.480 +	imbalance = move_this_load;
  69.481 +	
  69.482 +	/* For physical_balance case, we accumlated both load
  69.483 +	 * values in the one of the siblings cpu_irq[],
  69.484 +	 * to use the same code for physical and logical processors
  69.485 +	 * as much as possible. 
  69.486 +	 *
  69.487 +	 * NOTE: the cpu_irq[] array holds the sum of the load for
  69.488 +	 * sibling A and sibling B in the slot for the lowest numbered
  69.489 +	 * sibling (A), _AND_ the load for sibling B in the slot for
  69.490 +	 * the higher numbered sibling.
  69.491 +	 *
  69.492 +	 * We seek the least loaded sibling by making the comparison
  69.493 +	 * (A+B)/2 vs B
  69.494 +	 */
  69.495 +	load = CPU_IRQ(min_loaded) >> 1;
  69.496 +	for_each_cpu_mask(j, cpu_sibling_map[min_loaded]) {
  69.497 +		if (load > CPU_IRQ(j)) {
  69.498 +			/* This won't change cpu_sibling_map[min_loaded] */
  69.499 +			load = CPU_IRQ(j);
  69.500 +			min_loaded = j;
  69.501 +		}
  69.502 +	}
  69.503 +
  69.504 +	cpus_and(allowed_mask, cpu_online_map, irq_affinity[selected_irq]);
  69.505 +	target_cpu_mask = cpumask_of_cpu(min_loaded);
  69.506 +	cpus_and(tmp, target_cpu_mask, allowed_mask);
  69.507 +
  69.508 +	if (!cpus_empty(tmp)) {
  69.509 +		irq_desc_t *desc = irq_desc + selected_irq;
  69.510 +		unsigned long flags;
  69.511 +
  69.512 +		Dprintk("irq = %d moved to cpu = %d\n",
  69.513 +				selected_irq, min_loaded);
  69.514 +		/* mark for change destination */
  69.515 +		spin_lock_irqsave(&desc->lock, flags);
  69.516 +		pending_irq_balance_cpumask[selected_irq] =
  69.517 +					cpumask_of_cpu(min_loaded);
  69.518 +		spin_unlock_irqrestore(&desc->lock, flags);
  69.519 +		/* Since we made a change, come back sooner to 
  69.520 +		 * check for more variation.
  69.521 +		 */
  69.522 +		balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
  69.523 +			balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);	
  69.524 +		return;
  69.525 +	}
  69.526 +	goto tryanotherirq;
  69.527 +
  69.528 +not_worth_the_effort:
  69.529 +	/*
  69.530 +	 * if we did not find an IRQ to move, then adjust the time interval
  69.531 +	 * upward
  69.532 +	 */
  69.533 +	balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL,
  69.534 +		balanced_irq_interval + BALANCED_IRQ_MORE_DELTA);	
  69.535 +	Dprintk("IRQ worth rotating not found\n");
  69.536 +	return;
  69.537 +}
  69.538 +
  69.539 +static int balanced_irq(void *unused)
  69.540 +{
  69.541 +	int i;
  69.542 +	unsigned long prev_balance_time = jiffies;
  69.543 +	long time_remaining = balanced_irq_interval;
  69.544 +
  69.545 +	daemonize("kirqd");
  69.546 +	
  69.547 +	/* push everything to CPU 0 to give us a starting point.  */
  69.548 +	for (i = 0 ; i < NR_IRQS ; i++) {
  69.549 +		pending_irq_balance_cpumask[i] = cpumask_of_cpu(0);
  69.550 +	}
  69.551 +
  69.552 +	for ( ; ; ) {
  69.553 +		set_current_state(TASK_INTERRUPTIBLE);
  69.554 +		time_remaining = schedule_timeout(time_remaining);
  69.555 +		try_to_freeze(PF_FREEZE);
  69.556 +		if (time_after(jiffies,
  69.557 +				prev_balance_time+balanced_irq_interval)) {
  69.558 +			do_irq_balance();
  69.559 +			prev_balance_time = jiffies;
  69.560 +			time_remaining = balanced_irq_interval;
  69.561 +		}
  69.562 +	}
  69.563 +	return 0;
  69.564 +}
  69.565 +
  69.566 +static int __init balanced_irq_init(void)
  69.567 +{
  69.568 +	int i;
  69.569 +	struct cpuinfo_x86 *c;
  69.570 +	cpumask_t tmp;
  69.571 +
  69.572 +	cpus_shift_right(tmp, cpu_online_map, 2);
  69.573 +        c = &boot_cpu_data;
  69.574 +	/* When not overwritten by the command line ask subarchitecture. */
  69.575 +	if (irqbalance_disabled == IRQBALANCE_CHECK_ARCH)
  69.576 +		irqbalance_disabled = NO_BALANCE_IRQ;
  69.577 +	if (irqbalance_disabled)
  69.578 +		return 0;
  69.579 +	
  69.580 +	 /* disable irqbalance completely if there is only one processor online */
  69.581 +	if (num_online_cpus() < 2) {
  69.582 +		irqbalance_disabled = 1;
  69.583 +		return 0;
  69.584 +	}
  69.585 +	/*
  69.586 +	 * Enable physical balance only if more than 1 physical processor
  69.587 +	 * is present
  69.588 +	 */
  69.589 +	if (smp_num_siblings > 1 && !cpus_empty(tmp))
  69.590 +		physical_balance = 1;
  69.591 +
  69.592 +	for (i = 0; i < NR_CPUS; i++) {
  69.593 +		if (!cpu_online(i))
  69.594 +			continue;
  69.595 +		irq_cpu_data[i].irq_delta = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
  69.596 +		irq_cpu_data[i].last_irq = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
  69.597 +		if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) {
  69.598 +			printk(KERN_ERR "balanced_irq_init: out of memory");
  69.599 +			goto failed;
  69.600 +		}
  69.601 +		memset(irq_cpu_data[i].irq_delta,0,sizeof(unsigned long) * NR_IRQS);
  69.602 +		memset(irq_cpu_data[i].last_irq,0,sizeof(unsigned long) * NR_IRQS);
  69.603 +	}
  69.604 +	
  69.605 +	printk(KERN_INFO "Starting balanced_irq\n");
  69.606 +	if (kernel_thread(balanced_irq, NULL, CLONE_KERNEL) >= 0) 
  69.607 +		return 0;
  69.608 +	else 
  69.609 +		printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
  69.610 +failed:
  69.611 +	for (i = 0; i < NR_CPUS; i++) {
  69.612 +		if(irq_cpu_data[i].irq_delta)
  69.613 +			kfree(irq_cpu_data[i].irq_delta);
  69.614 +		if(irq_cpu_data[i].last_irq)
  69.615 +			kfree(irq_cpu_data[i].last_irq);
  69.616 +	}
  69.617 +	return 0;
  69.618 +}
  69.619 +
  69.620 +int __init irqbalance_disable(char *str)
  69.621 +{
  69.622 +	irqbalance_disabled = 1;
  69.623 +	return 0;
  69.624 +}
  69.625 +
  69.626 +__setup("noirqbalance", irqbalance_disable);
  69.627 +
  69.628 +static inline void move_irq(int irq)
  69.629 +{
  69.630 +	/* note - we hold the desc->lock */
  69.631 +	if (unlikely(!cpus_empty(pending_irq_balance_cpumask[irq]))) {
  69.632 +		set_ioapic_affinity_irq(irq, pending_irq_balance_cpumask[irq]);
  69.633 +		cpus_clear(pending_irq_balance_cpumask[irq]);
  69.634 +	}
  69.635 +}
  69.636 +
  69.637 +late_initcall(balanced_irq_init);
  69.638 +
  69.639 +#else /* !CONFIG_IRQBALANCE */
  69.640 +static inline void move_irq(int irq) { }
  69.641 +#endif /* CONFIG_IRQBALANCE */
  69.642 +
  69.643 +#ifndef CONFIG_SMP
  69.644 +void fastcall send_IPI_self(int vector)
  69.645 +{
  69.646 +	unsigned int cfg;
  69.647 +
  69.648 +	/*
  69.649 +	 * Wait for idle.
  69.650 +	 */
  69.651 +	apic_wait_icr_idle();
  69.652 +	cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL;
  69.653 +	/*
  69.654 +	 * Send the IPI. The write to APIC_ICR fires this off.
  69.655 +	 */
  69.656 +	apic_write_around(APIC_ICR, cfg);
  69.657 +}
  69.658 +#endif /* !CONFIG_SMP */
  69.659  
  69.660  /*
  69.661   * Find the IRQ entry number of a certain pin.
  69.662   */
  69.663 -static int __init find_irq_entry(int apic, int pin, int type)
  69.664 +static int find_irq_entry(int apic, int pin, int type)
  69.665  {
  69.666  	int i;
  69.667  
  69.668 @@ -289,7 +685,7 @@ static int __init find_irq_entry(int api
  69.669  /*
  69.670   * Find the pin to which IRQ[irq] (ISA) is connected
  69.671   */
  69.672 -static int __init find_isa_irq_pin(int irq, int type)
  69.673 +static int find_isa_irq_pin(int irq, int type)
  69.674  {
  69.675  	int i;
  69.676  
  69.677 @@ -298,7 +694,9 @@ static int __init find_isa_irq_pin(int i
  69.678  
  69.679  		if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
  69.680  		     mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
  69.681 -		     mp_bus_id_to_type[lbus] == MP_BUS_MCA) &&
  69.682 +		     mp_bus_id_to_type[lbus] == MP_BUS_MCA ||
  69.683 +		     mp_bus_id_to_type[lbus] == MP_BUS_NEC98
  69.684 +		    ) &&
  69.685  		    (mp_irqs[i].mpc_irqtype == type) &&
  69.686  		    (mp_irqs[i].mpc_srcbusirq == irq))
  69.687  
  69.688 @@ -313,16 +711,81 @@ static int __init find_isa_irq_pin(int i
  69.689   */
  69.690  static int pin_2_irq(int idx, int apic, int pin);
  69.691  
  69.692 +int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
  69.693 +{
  69.694 +	int apic, i, best_guess = -1;
  69.695 +
  69.696 +	apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, "
  69.697 +		"slot:%d, pin:%d.\n", bus, slot, pin);
  69.698 +	if (mp_bus_id_to_pci_bus[bus] == -1) {
  69.699 +		printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
  69.700 +		return -1;
  69.701 +	}
  69.702 +	for (i = 0; i < mp_irq_entries; i++) {
  69.703 +		int lbus = mp_irqs[i].mpc_srcbus;
  69.704 +
  69.705 +		for (apic = 0; apic < nr_ioapics; apic++)
  69.706 +			if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic ||
  69.707 +			    mp_irqs[i].mpc_dstapic == MP_APIC_ALL)
  69.708 +				break;
  69.709 +
  69.710 +		if ((mp_bus_id_to_type[lbus] == MP_BUS_PCI) &&
  69.711 +		    !mp_irqs[i].mpc_irqtype &&
  69.712 +		    (bus == lbus) &&
  69.713 +		    (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) {
  69.714 +			int irq = pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq);
  69.715 +
  69.716 +			if (!(apic || IO_APIC_IRQ(irq)))
  69.717 +				continue;
  69.718 +
  69.719 +			if (pin == (mp_irqs[i].mpc_srcbusirq & 3))
  69.720 +				return irq;
  69.721 +			/*
  69.722 +			 * Use the first all-but-pin matching entry as a
  69.723 +			 * best-guess fuzzy result for broken mptables.
  69.724 +			 */
  69.725 +			if (best_guess < 0)
  69.726 +				best_guess = irq;
  69.727 +		}
  69.728 +	}
  69.729 +	return best_guess;
  69.730 +}
  69.731 +
  69.732 +/*
  69.733 + * This function currently is only a helper for the i386 smp boot process where 
  69.734 + * we need to reprogram the ioredtbls to cater for the cpus which have come online
  69.735 + * so mask in all cases should simply be TARGET_CPUS
  69.736 + */
  69.737 +void __init setup_ioapic_dest(void)
  69.738 +{
  69.739 +	int pin, ioapic, irq, irq_entry;
  69.740 +
  69.741 +	if (skip_ioapic_setup == 1)
  69.742 +		return;
  69.743 +
  69.744 +	for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
  69.745 +		for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
  69.746 +			irq_entry = find_irq_entry(ioapic, pin, mp_INT);
  69.747 +			if (irq_entry == -1)
  69.748 +				continue;
  69.749 +			irq = pin_2_irq(irq_entry, ioapic, pin);
  69.750 +			set_ioapic_affinity_irq(irq, TARGET_CPUS);
  69.751 +		}
  69.752 +
  69.753 +	}
  69.754 +}
  69.755 +
  69.756  /*
  69.757   * EISA Edge/Level control register, ELCR
  69.758   */
  69.759 -static int __init EISA_ELCR(unsigned int irq)
  69.760 +static int EISA_ELCR(unsigned int irq)
  69.761  {
  69.762  	if (irq < 16) {
  69.763  		unsigned int port = 0x4d0 + (irq >> 3);
  69.764  		return (inb(port) >> (irq & 7)) & 1;
  69.765  	}
  69.766 -	printk(KERN_INFO "Broken MPtable reports ISA irq %d\n", irq);
  69.767 +	apic_printk(APIC_VERBOSE, KERN_INFO
  69.768 +			"Broken MPtable reports ISA irq %d\n", irq);
  69.769  	return 0;
  69.770  }
  69.771  
  69.772 @@ -352,6 +815,12 @@ static int __init EISA_ELCR(unsigned int
  69.773  #define default_MCA_trigger(idx)	(1)
  69.774  #define default_MCA_polarity(idx)	(0)
  69.775  
  69.776 +/* NEC98 interrupts are always polarity zero edge triggered,
  69.777 + * when listed as conforming in the MP table. */
  69.778 +
  69.779 +#define default_NEC98_trigger(idx)     (0)
  69.780 +#define default_NEC98_polarity(idx)    (0)
  69.781 +
  69.782  static int __init MPBIOS_polarity(int idx)
  69.783  {
  69.784  	int bus = mp_irqs[idx].mpc_srcbus;
  69.785 @@ -386,6 +855,11 @@ static int __init MPBIOS_polarity(int id
  69.786  					polarity = default_MCA_polarity(idx);
  69.787  					break;
  69.788  				}
  69.789 +				case MP_BUS_NEC98: /* NEC 98 pin */
  69.790 +				{
  69.791 +					polarity = default_NEC98_polarity(idx);
  69.792 +					break;
  69.793 +				}
  69.794  				default:
  69.795  				{
  69.796  					printk(KERN_WARNING "broken BIOS!!\n");
  69.797 @@ -421,7 +895,7 @@ static int __init MPBIOS_polarity(int id
  69.798  	return polarity;
  69.799  }
  69.800  
  69.801 -static int __init MPBIOS_trigger(int idx)
  69.802 +static int MPBIOS_trigger(int idx)
  69.803  {
  69.804  	int bus = mp_irqs[idx].mpc_srcbus;
  69.805  	int trigger;
  69.806 @@ -455,6 +929,11 @@ static int __init MPBIOS_trigger(int idx
  69.807  					trigger = default_MCA_trigger(idx);
  69.808  					break;
  69.809  				}
  69.810 +				case MP_BUS_NEC98: /* NEC 98 pin */
  69.811 +				{
  69.812 +					trigger = default_NEC98_trigger(idx);
  69.813 +					break;
  69.814 +				}
  69.815  				default:
  69.816  				{
  69.817  					printk(KERN_WARNING "broken BIOS!!\n");
  69.818 @@ -516,6 +995,7 @@ static int pin_2_irq(int idx, int apic, 
  69.819  		case MP_BUS_ISA: /* ISA pin */
  69.820  		case MP_BUS_EISA:
  69.821  		case MP_BUS_MCA:
  69.822 +		case MP_BUS_NEC98:
  69.823  		{
  69.824  			irq = mp_irqs[idx].mpc_srcbusirq;
  69.825  			break;
  69.826 @@ -529,6 +1009,13 @@ static int pin_2_irq(int idx, int apic, 
  69.827  			while (i < apic)
  69.828  				irq += nr_ioapic_registers[i++];
  69.829  			irq += pin;
  69.830 +
  69.831 +			/*
  69.832 +			 * For MPS mode, so far only needed by ES7000 platform
  69.833 +			 */
  69.834 +			if (ioapic_renumber_irq)
  69.835 +				irq = ioapic_renumber_irq(apic, irq);
  69.836 +
  69.837  			break;
  69.838  		}
  69.839  		default:
  69.840 @@ -539,20 +1026,6 @@ static int pin_2_irq(int idx, int apic, 
  69.841  		}
  69.842  	}
  69.843  
  69.844 -	/*
  69.845 -	 * PCI IRQ command line redirection. Yes, limits are hardcoded.
  69.846 -	 */
  69.847 -	if ((pin >= 16) && (pin <= 23)) {
  69.848 -		if (pirq_entries[pin-16] != -1) {
  69.849 -			if (!pirq_entries[pin-16]) {
  69.850 -				printk(KERN_DEBUG "disabling PIRQ%d\n", pin-16);
  69.851 -			} else {
  69.852 -				irq = pirq_entries[pin-16];
  69.853 -				printk(KERN_DEBUG "using PIRQ%d -> IRQ %d\n",
  69.854 -						pin-16, irq);
  69.855 -			}
  69.856 -		}
  69.857 -	}
  69.858  	return irq;
  69.859  }
  69.860  
  69.861 @@ -573,90 +1046,74 @@ static inline int IO_APIC_irq_trigger(in
  69.862  	return 0;
  69.863  }
  69.864  
  69.865 -int irq_vector[NR_IRQS] = { FIRST_DEVICE_VECTOR , 0 };
  69.866 -
  69.867 -int vector_irq[256];
  69.868 +/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
  69.869 +u8 irq_vector[NR_IRQ_VECTORS] = { FIRST_DEVICE_VECTOR , 0 };
  69.870  
  69.871  int assign_irq_vector(int irq)
  69.872  {
  69.873  	static int current_vector = FIRST_DEVICE_VECTOR, offset = 0;
  69.874 -	if (IO_APIC_VECTOR(irq) > 0)
  69.875 +
  69.876 +	BUG_ON(irq >= NR_IRQ_VECTORS);
  69.877 +	if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0)
  69.878  		return IO_APIC_VECTOR(irq);
  69.879  next:
  69.880  	current_vector += 8;
  69.881  
  69.882 -        /* Skip the hypercall vector. */
  69.883 -	if (current_vector == HYPERCALL_VECTOR) goto next;
  69.884 -        /* Skip the Linux/BSD fast-trap vector. */
  69.885 -        if (current_vector == 0x80) goto next;
  69.886 +	/* Skip the hypercall vector. */
  69.887 +	if (current_vector == HYPERCALL_VECTOR)
  69.888 +		goto next;
  69.889  
  69.890 -	if (current_vector > FIRST_SYSTEM_VECTOR) {
  69.891 +	/* Skip the Linux/BSD fast-trap vector. */
  69.892 +	if (current_vector == 0x80)
  69.893 +		goto next;
  69.894 +
  69.895 +	if (current_vector >= FIRST_SYSTEM_VECTOR) {
  69.896  		offset++;
  69.897 +		if (!(offset%8))
  69.898 +			return -ENOSPC;
  69.899  		current_vector = FIRST_DEVICE_VECTOR + offset;
  69.900  	}
  69.901  
  69.902 -	if (current_vector == FIRST_SYSTEM_VECTOR)
  69.903 -		panic("ran out of interrupt sources!");
  69.904 -
  69.905 -	IO_APIC_VECTOR(irq) = current_vector;
  69.906 -
  69.907 -        vector_irq[current_vector] = irq;
  69.908 +	vector_irq[current_vector] = irq;
  69.909 +	if (irq != AUTO_ASSIGN)
  69.910 +		IO_APIC_VECTOR(irq) = current_vector;
  69.911  
  69.912  	return current_vector;
  69.913  }
  69.914  
  69.915 -extern void (*interrupt[NR_IRQS])(void);
  69.916 -
  69.917 -/*
  69.918 - * Level and edge triggered IO-APIC interrupts need different handling,
  69.919 - * so we use two separate IRQ descriptors. Edge triggered IRQs can be
  69.920 - * handled with the level-triggered descriptor, but that one has slightly
  69.921 - * more overhead. Level-triggered interrupts cannot be handled with the
  69.922 - * edge-triggered handler, without risking IRQ storms and other ugly
  69.923 - * races.
  69.924 - */
  69.925 +static struct hw_interrupt_type ioapic_level_type;
  69.926 +static struct hw_interrupt_type ioapic_edge_type;
  69.927  
  69.928 -static unsigned int startup_edge_ioapic_irq(unsigned int irq);
  69.929 -#define shutdown_edge_ioapic_irq  disable_edge_ioapic_irq
  69.930 -#define enable_edge_ioapic_irq    unmask_IO_APIC_irq
  69.931 -static void disable_edge_ioapic_irq (unsigned int irq);
  69.932 -static void ack_edge_ioapic_irq(unsigned int irq);
  69.933 -static void end_edge_ioapic_irq (unsigned int i);
  69.934 -static struct hw_interrupt_type ioapic_edge_irq_type = {
  69.935 -	"IO-APIC-edge",
  69.936 -	startup_edge_ioapic_irq,
  69.937 -	shutdown_edge_ioapic_irq,
  69.938 -	enable_edge_ioapic_irq,
  69.939 -	disable_edge_ioapic_irq,
  69.940 -	ack_edge_ioapic_irq,
  69.941 -	end_edge_ioapic_irq,
  69.942 -	set_ioapic_affinity,
  69.943 -};
  69.944 +#define IOAPIC_AUTO	-1
  69.945 +#define IOAPIC_EDGE	0
  69.946 +#define IOAPIC_LEVEL	1
  69.947  
  69.948 -static unsigned int startup_level_ioapic_irq (unsigned int irq);
  69.949 -#define shutdown_level_ioapic_irq mask_IO_APIC_irq
  69.950 -#define enable_level_ioapic_irq   unmask_IO_APIC_irq
  69.951 -#define disable_level_ioapic_irq  mask_IO_APIC_irq
  69.952 -static void mask_and_ack_level_ioapic_irq (unsigned int irq);
  69.953 -static void end_level_ioapic_irq (unsigned int irq);
  69.954 -static struct hw_interrupt_type ioapic_level_irq_type = {
  69.955 -	"IO-APIC-level",
  69.956 -	startup_level_ioapic_irq,
  69.957 -	shutdown_level_ioapic_irq,
  69.958 -	enable_level_ioapic_irq,
  69.959 -	disable_level_ioapic_irq,
  69.960 -	mask_and_ack_level_ioapic_irq,
  69.961 -	end_level_ioapic_irq,
  69.962 -	set_ioapic_affinity,
  69.963 -};
  69.964 +static inline void ioapic_register_intr(int irq, int vector, unsigned long trigger)
  69.965 +{
  69.966 +	if (use_pci_vector() && !platform_legacy_irq(irq)) {
  69.967 +		if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
  69.968 +				trigger == IOAPIC_LEVEL)
  69.969 +			irq_desc[vector].handler = &ioapic_level_type;
  69.970 +		else
  69.971 +			irq_desc[vector].handler = &ioapic_edge_type;
  69.972 +		set_intr_gate(vector, interrupt[vector]);
  69.973 +	} else	{
  69.974 +		if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
  69.975 +				trigger == IOAPIC_LEVEL)
  69.976 +			irq_desc[irq].handler = &ioapic_level_type;
  69.977 +		else
  69.978 +			irq_desc[irq].handler = &ioapic_edge_type;
  69.979 +		set_intr_gate(vector, interrupt[irq]);
  69.980 +	}
  69.981 +}
  69.982  
  69.983  void __init setup_IO_APIC_irqs(void)
  69.984  {
  69.985  	struct IO_APIC_route_entry entry;
  69.986 -	int apic, pin, idx, irq, vector;
  69.987 +	int apic, pin, idx, irq, first_notcon = 1, vector;
  69.988  	unsigned long flags;
  69.989  
  69.990 -	printk(KERN_DEBUG "init IO_APIC IRQs\n");
  69.991 +	apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
  69.992  
  69.993  	for (apic = 0; apic < nr_ioapics; apic++) {
  69.994  	for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
  69.995 @@ -667,13 +1124,24 @@ void __init setup_IO_APIC_irqs(void)
  69.996  		memset(&entry,0,sizeof(entry));
  69.997  
  69.998  		entry.delivery_mode = INT_DELIVERY_MODE;
  69.999 -		entry.dest_mode = (INT_DEST_ADDR_MODE != 0);
 69.1000 +		entry.dest_mode = INT_DEST_MODE;
 69.1001  		entry.mask = 0;				/* enable IRQ */
 69.1002 -		entry.dest.logical.logical_dest = target_cpus();
 69.1003 +		entry.dest.logical.logical_dest = 
 69.1004 +					cpu_mask_to_apicid(TARGET_CPUS);
 69.1005  
 69.1006  		idx = find_irq_entry(apic,pin,mp_INT);
 69.1007 -		if (idx == -1)
 69.1008 +		if (idx == -1) {
 69.1009 +			if (first_notcon) {
 69.1010 +				apic_printk(APIC_VERBOSE, KERN_DEBUG
 69.1011 +						" IO-APIC (apicid-pin) %d-%d",
 69.1012 +						mp_ioapics[apic].mpc_apicid,
 69.1013 +						pin);
 69.1014 +				first_notcon = 0;
 69.1015 +			} else
 69.1016 +				apic_printk(APIC_VERBOSE, ", %d-%d",
 69.1017 +					mp_ioapics[apic].mpc_apicid, pin);
 69.1018  			continue;
 69.1019 +		}
 69.1020  
 69.1021  		entry.trigger = irq_trigger(idx);
 69.1022  		entry.polarity = irq_polarity(idx);
 69.1023 @@ -688,8 +1156,7 @@ void __init setup_IO_APIC_irqs(void)
 69.1024  		 * skip adding the timer int on secondary nodes, which causes
 69.1025  		 * a small but painful rift in the time-space continuum
 69.1026  		 */
 69.1027 -		if ((clustered_apic_mode == CLUSTERED_APIC_NUMAQ) 
 69.1028 -			&& (apic != 0) && (irq == 0))
 69.1029 +		if (multi_timer_check(apic, irq))
 69.1030  			continue;
 69.1031  		else
 69.1032  			add_pin_to_irq(irq, apic, pin);
 69.1033 @@ -700,13 +1167,7 @@ void __init setup_IO_APIC_irqs(void)
 69.1034  		if (IO_APIC_IRQ(irq)) {
 69.1035  			vector = assign_irq_vector(irq);
 69.1036  			entry.vector = vector;
 69.1037 -
 69.1038 -			if (IO_APIC_irq_trigger(irq))
 69.1039 -				irq_desc[irq].handler = &ioapic_level_irq_type;
 69.1040 -			else
 69.1041 -				irq_desc[irq].handler = &ioapic_edge_irq_type;
 69.1042 -
 69.1043 -			set_intr_gate(vector, interrupt[irq]);
 69.1044 +			ioapic_register_intr(irq, vector, IOAPIC_AUTO);
 69.1045  		
 69.1046  			if (!apic && (irq < 16))
 69.1047  				disable_8259A_irq(irq);
 69.1048 @@ -717,11 +1178,13 @@ void __init setup_IO_APIC_irqs(void)
 69.1049  		spin_unlock_irqrestore(&ioapic_lock, flags);
 69.1050  	}
 69.1051  	}
 69.1052 +
 69.1053 +	if (!first_notcon)
 69.1054 +		apic_printk(APIC_VERBOSE, " not connected.\n");
 69.1055  }
 69.1056  
 69.1057  /*
 69.1058 - * Set up the 8259A-master output pin as broadcast to all
 69.1059 - * CPUs.
 69.1060 + * Set up the 8259A-master output pin:
 69.1061   */
 69.1062  void __init setup_ExtINT_IRQ0_pin(unsigned int pin, int vector)
 69.1063  {
 69.1064 @@ -739,9 +1202,9 @@ void __init setup_ExtINT_IRQ0_pin(unsign
 69.1065  	 * We use logical delivery to get the timer IRQ
 69.1066  	 * to the first CPU.
 69.1067  	 */
 69.1068 -	entry.dest_mode = (INT_DEST_ADDR_MODE != 0);
 69.1069 +	entry.dest_mode = INT_DEST_MODE;
 69.1070  	entry.mask = 0;					/* unmask IRQ now */
 69.1071 -	entry.dest.logical.logical_dest = target_cpus();
 69.1072 +	entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
 69.1073  	entry.delivery_mode = INT_DELIVERY_MODE;
 69.1074  	entry.polarity = 0;
 69.1075  	entry.trigger = 0;
 69.1076 @@ -751,7 +1214,7 @@ void __init setup_ExtINT_IRQ0_pin(unsign
 69.1077  	 * The timer IRQ doesn't have to know that behind the
 69.1078  	 * scene we have a 8259A-master in AEOI mode ...
 69.1079  	 */
 69.1080 -	irq_desc[0].handler = &ioapic_edge_irq_type;
 69.1081 +	irq_desc[0].handler = &ioapic_edge_type;
 69.1082  
 69.1083  	/*
 69.1084  	 * Add it to the IO-APIC irq-routing table:
 69.1085 @@ -764,23 +1227,22 @@ void __init setup_ExtINT_IRQ0_pin(unsign
 69.1086  	enable_8259A_irq(0);
 69.1087  }
 69.1088  
 69.1089 -void __init UNEXPECTED_IO_APIC(void)
 69.1090 +static inline void UNEXPECTED_IO_APIC(void)
 69.1091  {
 69.1092 -	printk(KERN_WARNING 
 69.1093 -		"An unexpected IO-APIC was found. If this kernel release is less than\n"
 69.1094 -		"three months old please report this to linux-smp@vger.kernel.org\n");
 69.1095  }
 69.1096  
 69.1097  void __init print_IO_APIC(void)
 69.1098  {
 69.1099 -#ifdef VERBOSE
 69.1100  	int apic, i;
 69.1101 -	struct IO_APIC_reg_00 reg_00;
 69.1102 -	struct IO_APIC_reg_01 reg_01;
 69.1103 -	struct IO_APIC_reg_02 reg_02;
 69.1104 -	struct IO_APIC_reg_03 reg_03;
 69.1105 +	union IO_APIC_reg_00 reg_00;
 69.1106 +	union IO_APIC_reg_01 reg_01;
 69.1107 +	union IO_APIC_reg_02 reg_02;
 69.1108 +	union IO_APIC_reg_03 reg_03;
 69.1109  	unsigned long flags;
 69.1110  
 69.1111 +	if (apic_verbosity == APIC_QUIET)
 69.1112 +		return;
 69.1113 +
 69.1114   	printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
 69.1115  	for (i = 0; i < nr_ioapics; i++)
 69.1116  		printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
 69.1117 @@ -795,47 +1257,46 @@ void __init print_IO_APIC(void)
 69.1118  	for (apic = 0; apic < nr_ioapics; apic++) {
 69.1119  
 69.1120  	spin_lock_irqsave(&ioapic_lock, flags);
 69.1121 -	*(int *)&reg_00 = io_apic_read(apic, 0);
 69.1122 -	*(int *)&reg_01 = io_apic_read(apic, 1);
 69.1123 -	if (reg_01.version >= 0x10)
 69.1124 -		*(int *)&reg_02 = io_apic_read(apic, 2);
 69.1125 -	if (reg_01.version >= 0x20)
 69.1126 -		*(int *)&reg_03 = io_apic_read(apic, 3);
 69.1127 +	reg_00.raw = io_apic_read(apic, 0);
 69.1128 +	reg_01.raw = io_apic_read(apic, 1);
 69.1129 +	if (reg_01.bits.version >= 0x10)
 69.1130 +		reg_02.raw = io_apic_read(apic, 2);
 69.1131 +	if (reg_01.bits.version >= 0x20)
 69.1132 +		reg_03.raw = io_apic_read(apic, 3);
 69.1133  	spin_unlock_irqrestore(&ioapic_lock, flags);
 69.1134  
 69.1135 -	printk("\n");
 69.1136  	printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid);
 69.1137 -	printk(KERN_DEBUG ".... register #00: %08X\n", *(int *)&reg_00);
 69.1138 -	printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.ID);
 69.1139 -	printk(KERN_DEBUG ".......    : Delivery Type: %X\n", reg_00.delivery_type);
 69.1140 -	printk(KERN_DEBUG ".......    : LTS          : %X\n", reg_00.LTS);
 69.1141 -	if (reg_00.__reserved_0 || reg_00.__reserved_1 || reg_00.__reserved_2)
 69.1142 +	printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
 69.1143 +	printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.bits.ID);
 69.1144 +	printk(KERN_DEBUG ".......    : Delivery Type: %X\n", reg_00.bits.delivery_type);
 69.1145 +	printk(KERN_DEBUG ".......    : LTS          : %X\n", reg_00.bits.LTS);
 69.1146 +	if (reg_00.bits.ID >= get_physical_broadcast())
 69.1147 +		UNEXPECTED_IO_APIC();
 69.1148 +	if (reg_00.bits.__reserved_1 || reg_00.bits.__reserved_2)
 69.1149  		UNEXPECTED_IO_APIC();
 69.1150  
 69.1151 -	printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)&reg_01);
 69.1152 -	printk(KERN_DEBUG ".......     : max redirection entries: %04X\n", reg_01.entries);
 69.1153 -	if (	(reg_01.entries != 0x0f) && /* older (Neptune) boards */
 69.1154 -		(reg_01.entries != 0x17) && /* typical ISA+PCI boards */
 69.1155 -		(reg_01.entries != 0x1b) && /* Compaq Proliant boards */
 69.1156 -		(reg_01.entries != 0x1f) && /* dual Xeon boards */
 69.1157 -		(reg_01.entries != 0x22) && /* bigger Xeon boards */
 69.1158 -		(reg_01.entries != 0x2E) &&
 69.1159 -		(reg_01.entries != 0x3F)
 69.1160 +	printk(KERN_DEBUG ".... register #01: %08X\n", reg_01.raw);
 69.1161 +	printk(KERN_DEBUG ".......     : max redirection entries: %04X\n", reg_01.bits.entries);
 69.1162 +	if (	(reg_01.bits.entries != 0x0f) && /* older (Neptune) boards */
 69.1163 +		(reg_01.bits.entries != 0x17) && /* typical ISA+PCI boards */
 69.1164 +		(reg_01.bits.entries != 0x1b) && /* Compaq Proliant boards */
 69.1165 +		(reg_01.bits.entries != 0x1f) && /* dual Xeon boards */
 69.1166 +		(reg_01.bits.entries != 0x22) && /* bigger Xeon boards */
 69.1167 +		(reg_01.bits.entries != 0x2E) &&
 69.1168 +		(reg_01.bits.entries != 0x3F)
 69.1169  	)
 69.1170  		UNEXPECTED_IO_APIC();
 69.1171  
 69.1172 -	printk(KERN_DEBUG ".......     : PRQ implemented: %X\n", reg_01.PRQ);
 69.1173 -	printk(KERN_DEBUG ".......     : IO APIC version: %04X\n", reg_01.version);
 69.1174 -	if (	(reg_01.version != 0x01) && /* 82489DX IO-APICs */
 69.1175 -		(reg_01.version != 0x02) && /* VIA */
 69.1176 -		(reg_01.version != 0x03) && /* later VIA */
 69.1177 -		(reg_01.version != 0x10) && /* oldest IO-APICs */
 69.1178 -		(reg_01.version != 0x11) && /* Pentium/Pro IO-APICs */
 69.1179 -		(reg_01.version != 0x13) && /* Xeon IO-APICs */
 69.1180 -		(reg_01.version != 0x20)    /* Intel P64H (82806 AA) */
 69.1181 +	printk(KERN_DEBUG ".......     : PRQ implemented: %X\n", reg_01.bits.PRQ);
 69.1182 +	printk(KERN_DEBUG ".......     : IO APIC version: %04X\n", reg_01.bits.version);
 69.1183 +	if (	(reg_01.bits.version != 0x01) && /* 82489DX IO-APICs */
 69.1184 +		(reg_01.bits.version != 0x10) && /* oldest IO-APICs */
 69.1185 +		(reg_01.bits.version != 0x11) && /* Pentium/Pro IO-APICs */
 69.1186 +		(reg_01.bits.version != 0x13) && /* Xeon IO-APICs */
 69.1187 +		(reg_01.bits.version != 0x20)    /* Intel P64H (82806 AA) */
 69.1188  	)
 69.1189  		UNEXPECTED_IO_APIC();
 69.1190 -	if (reg_01.__reserved_1 || reg_01.__reserved_2)
 69.1191 +	if (reg_01.bits.__reserved_1 || reg_01.bits.__reserved_2)
 69.1192  		UNEXPECTED_IO_APIC();
 69.1193  
 69.1194  	/*
 69.1195 @@ -843,10 +1304,10 @@ void __init print_IO_APIC(void)
 69.1196  	 * but the value of reg_02 is read as the previous read register
 69.1197  	 * value, so ignore it if reg_02 == reg_01.
 69.1198  	 */
 69.1199 -	if (reg_01.version >= 0x10 && *(int *)&reg_02 != *(int *)&reg_01) {
 69.1200 -		printk(KERN_DEBUG ".... register #02: %08X\n", *(int *)&reg_02);
 69.1201 -		printk(KERN_DEBUG ".......     : arbitration: %02X\n", reg_02.arbitration);
 69.1202 -		if (reg_02.__reserved_1 || reg_02.__reserved_2)
 69.1203 +	if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) {
 69.1204 +		printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
 69.1205 +		printk(KERN_DEBUG ".......     : arbitration: %02X\n", reg_02.bits.arbitration);
 69.1206 +		if (reg_02.bits.__reserved_1 || reg_02.bits.__reserved_2)
 69.1207  			UNEXPECTED_IO_APIC();
 69.1208  	}
 69.1209  
 69.1210 @@ -855,11 +1316,11 @@ void __init print_IO_APIC(void)
 69.1211  	 * or reg_03, but the value of reg_0[23] is read as the previous read
 69.1212  	 * register value, so ignore it if reg_03 == reg_0[12].
 69.1213  	 */
 69.1214 -	if (reg_01.version >= 0x20 && *(int *)&reg_03 != *(int *)&reg_02 &&
 69.1215 -	    *(int *)&reg_03 != *(int *)&reg_01) {
 69.1216 -		printk(KERN_DEBUG ".... register #03: %08X\n", *(int *)&reg_03);
 69.1217 -		printk(KERN_DEBUG ".......     : Boot DT    : %X\n", reg_03.boot_DT);
 69.1218 -		if (reg_03.__reserved_1)
 69.1219 +	if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw &&
 69.1220 +	    reg_03.raw != reg_01.raw) {
 69.1221 +		printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw);
 69.1222 +		printk(KERN_DEBUG ".......     : Boot DT    : %X\n", reg_03.bits.boot_DT);
 69.1223 +		if (reg_03.bits.__reserved_1)
 69.1224  			UNEXPECTED_IO_APIC();
 69.1225  	}
 69.1226  
 69.1227 @@ -868,7 +1329,7 @@ void __init print_IO_APIC(void)
 69.1228  	printk(KERN_DEBUG " NR Log Phy Mask Trig IRR Pol"
 69.1229  			  " Stat Dest Deli Vect:   \n");
 69.1230  
 69.1231 -	for (i = 0; i <= reg_01.entries; i++) {
 69.1232 +	for (i = 0; i <= reg_01.bits.entries; i++) {
 69.1233  		struct IO_APIC_route_entry entry;
 69.1234  
 69.1235  		spin_lock_irqsave(&ioapic_lock, flags);
 69.1236 @@ -894,12 +1355,17 @@ void __init print_IO_APIC(void)
 69.1237  		);
 69.1238  	}
 69.1239  	}
 69.1240 +	if (use_pci_vector())
 69.1241 +		printk(KERN_INFO "Using vector-based indexing\n");
 69.1242  	printk(KERN_DEBUG "IRQ to pin mappings:\n");
 69.1243  	for (i = 0; i < NR_IRQS; i++) {
 69.1244  		struct irq_pin_list *entry = irq_2_pin + i;
 69.1245  		if (entry->pin < 0)
 69.1246  			continue;
 69.1247 -		printk(KERN_DEBUG "IRQ%d ", i);
 69.1248 + 		if (use_pci_vector() && !platform_legacy_irq(i))
 69.1249 +			printk(KERN_DEBUG "IRQ%d ", IO_APIC_VECTOR(i));
 69.1250 +		else
 69.1251 +			printk(KERN_DEBUG "IRQ%d ", i);
 69.1252  		for (;;) {
 69.1253  			printk("-> %d:%d", entry->apic, entry->pin);
 69.1254  			if (!entry->next)
 69.1255 @@ -910,152 +1376,13 @@ void __init print_IO_APIC(void)
 69.1256  	}
 69.1257  
 69.1258  	printk(KERN_INFO ".................................... done.\n");
 69.1259 -#endif
 69.1260 -}
 69.1261 -
 69.1262 -
 69.1263 -#if 0 /* Maybe useful for debugging, but not currently used anywhere. */
 69.1264 -
 69.1265 -static void print_APIC_bitfield (int base)
 69.1266 -{
 69.1267 -	unsigned int v;
 69.1268 -	int i, j;
 69.1269 -
 69.1270 -	printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG);
 69.1271 -	for (i = 0; i < 8; i++) {
 69.1272 -		v = apic_read(base + i*0x10);
 69.1273 -		for (j = 0; j < 32; j++) {
 69.1274 -			if (v & (1<<j))
 69.1275 -				printk("1");
 69.1276 -			else
 69.1277 -				printk("0");
 69.1278 -		}
 69.1279 -		printk("\n");
 69.1280 -	}
 69.1281 -}
 69.1282 -
 69.1283 -
 69.1284 -void /*__init*/ print_local_APIC(void * dummy)
 69.1285 -{
 69.1286 -	unsigned int v, ver, maxlvt;
 69.1287 -
 69.1288 -	printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
 69.1289 -		smp_processor_id(), hard_smp_processor_id());
 69.1290 -	v = apic_read(APIC_ID);
 69.1291 -	printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, GET_APIC_ID(v));
 69.1292 -	v = apic_read(APIC_LVR);
 69.1293 -	printk(KERN_INFO "... APIC VERSION: %08x\n", v);
 69.1294 -	ver = GET_APIC_VERSION(v);
 69.1295 -	maxlvt = get_maxlvt();
 69.1296 -
 69.1297 -	v = apic_read(APIC_TASKPRI);
 69.1298 -	printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
 69.1299 -
 69.1300 -	if (APIC_INTEGRATED(ver)) {			/* !82489DX */
 69.1301 -		v = apic_read(APIC_ARBPRI);
 69.1302 -		printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
 69.1303 -			v & APIC_ARBPRI_MASK);
 69.1304 -		v = apic_read(APIC_PROCPRI);
 69.1305 -		printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
 69.1306 -	}
 69.1307 -
 69.1308 -	v = apic_read(APIC_EOI);
 69.1309 -	printk(KERN_DEBUG "... APIC EOI: %08x\n", v);
 69.1310 -	v = apic_read(APIC_RRR);
 69.1311 -	printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
 69.1312 -	v = apic_read(APIC_LDR);
 69.1313 -	printk(KERN_DEBUG "... APIC LDR: %08x\n", v);
 69.1314 -	v = apic_read(APIC_DFR);
 69.1315 -	printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
 69.1316 -	v = apic_read(APIC_SPIV);
 69.1317 -	printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
 69.1318 -
 69.1319 -	printk(KERN_DEBUG "... APIC ISR field:\n");
 69.1320 -	print_APIC_bitfield(APIC_ISR);
 69.1321 -	printk(KERN_DEBUG "... APIC TMR field:\n");
 69.1322 -	print_APIC_bitfield(APIC_TMR);
 69.1323 -	printk(KERN_DEBUG "... APIC IRR field:\n");
 69.1324 -	print_APIC_bitfield(APIC_IRR);
 69.1325  
 69.1326 -	if (APIC_INTEGRATED(ver)) {		/* !82489DX */
 69.1327 -		if (maxlvt > 3)		/* Due to the Pentium erratum 3AP. */
 69.1328 -			apic_write(APIC_ESR, 0);
 69.1329 -		v = apic_read(APIC_ESR);
 69.1330 -		printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
 69.1331 -	}
 69.1332 -
 69.1333 -	v = apic_read(APIC_ICR);
 69.1334 -	printk(KERN_DEBUG "... APIC ICR: %08x\n", v);
 69.1335 -	v = apic_read(APIC_ICR2);
 69.1336 -	printk(KERN_DEBUG "... APIC ICR2: %08x\n", v);
 69.1337 -
 69.1338 -	v = apic_read(APIC_LVTT);
 69.1339 -	printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
 69.1340 -
 69.1341 -	if (maxlvt > 3) {                       /* PC is LVT#4. */
 69.1342 -		v = apic_read(APIC_LVTPC);
 69.1343 -		printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v);
 69.1344 -	}
 69.1345 -	v = apic_read(APIC_LVT0);
 69.1346 -	printk(KERN_DEBUG "... APIC LVT0: %08x\n", v);
 69.1347 -	v = apic_read(APIC_LVT1);
 69.1348 -	printk(KERN_DEBUG "... APIC LVT1: %08x\n", v);
 69.1349 -
 69.1350 -	if (maxlvt > 2) {			/* ERR is LVT#3. */
 69.1351 -		v = apic_read(APIC_LVTERR);
 69.1352 -		printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v);
 69.1353 -	}
 69.1354 -
 69.1355 -	v = apic_read(APIC_TMICT);
 69.1356 -	printk(KERN_DEBUG "... APIC TMICT: %08x\n", v);
 69.1357 -	v = apic_read(APIC_TMCCT);
 69.1358 -	printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
 69.1359 -	v = apic_read(APIC_TDCR);
 69.1360 -	printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
 69.1361 -	printk("\n");
 69.1362 +	return;
 69.1363  }
 69.1364  
 69.1365 -void print_all_local_APICs (void)
 69.1366 -{
 69.1367 -	smp_call_function(print_local_APIC, NULL, 1, 1);
 69.1368 -	print_local_APIC(NULL);
 69.1369 -}
 69.1370 -
 69.1371 -void /*__init*/ print_PIC(void)
 69.1372 -{
 69.1373 -	extern spinlock_t i8259A_lock;
 69.1374 -	unsigned int v, flags;
 69.1375 -
 69.1376 -	printk(KERN_DEBUG "\nprinting PIC contents\n");
 69.1377 -
 69.1378 -	spin_lock_irqsave(&i8259A_lock, flags);
 69.1379 -
 69.1380 -	v = inb(0xa1) << 8 | inb(0x21);
 69.1381 -	printk(KERN_DEBUG "... PIC  IMR: %04x\n", v);
 69.1382 -
 69.1383 -	v = inb(0xa0) << 8 | inb(0x20);
 69.1384 -	printk(KERN_DEBUG "... PIC  IRR: %04x\n", v);
 69.1385 -
 69.1386 -	outb(0x0b,0xa0);
 69.1387 -	outb(0x0b,0x20);
 69.1388 -	v = inb(0xa0) << 8 | inb(0x20);
 69.1389 -	outb(0x0a,0xa0);
 69.1390 -	outb(0x0a,0x20);
 69.1391 -
 69.1392 -	spin_unlock_irqrestore(&i8259A_lock, flags);
 69.1393 -
 69.1394 -	printk(KERN_DEBUG "... PIC  ISR: %04x\n", v);
 69.1395 -
 69.1396 -	v = inb(0x4d1) << 8 | inb(0x4d0);
 69.1397 -	printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
 69.1398 -}
 69.1399 -
 69.1400 -#endif /* 0 */
 69.1401 -
 69.1402 -
 69.1403  static void __init enable_IO_APIC(void)
 69.1404  {
 69.1405 -	struct IO_APIC_reg_01 reg_01;
 69.1406 +	union IO_APIC_reg_01 reg_01;
 69.1407  	int i;
 69.1408  	unsigned long flags;
 69.1409  
 69.1410 @@ -1063,18 +1390,15 @@ static void __init enable_IO_APIC(void)
 69.1411  		irq_2_pin[i].pin = -1;
 69.1412  		irq_2_pin[i].next = 0;
 69.1413  	}
 69.1414 -	if (!pirqs_enabled)
 69.1415 -		for (i = 0; i < MAX_PIRQS; i++)
 69.1416 -			pirq_entries[i] = -1;
 69.1417  
 69.1418  	/*
 69.1419  	 * The number of IO-APIC IRQ registers (== #pins):
 69.1420  	 */
 69.1421  	for (i = 0; i < nr_ioapics; i++) {
 69.1422  		spin_lock_irqsave(&ioapic_lock, flags);
 69.1423 -		*(int *)&reg_01 = io_apic_read(i, 1);
 69.1424 +		reg_01.raw = io_apic_read(i, 1);
 69.1425  		spin_unlock_irqrestore(&ioapic_lock, flags);
 69.1426 -		nr_ioapic_registers[i] = reg_01.entries+1;
 69.1427 +		nr_ioapic_registers[i] = reg_01.bits.entries+1;
 69.1428  	}
 69.1429  
 69.1430  	/*
 69.1431 @@ -1103,18 +1427,22 @@ void disable_IO_APIC(void)
 69.1432   * by Matt Domsch <Matt_Domsch@dell.com>  Tue Dec 21 12:25:05 CST 1999
 69.1433   */
 69.1434  
 69.1435 -static void __init setup_ioapic_ids_from_mpc (void)
 69.1436 +#ifndef CONFIG_X86_NUMAQ
 69.1437 +static void __init setup_ioapic_ids_from_mpc(void)
 69.1438  {
 69.1439 -	struct IO_APIC_reg_00 reg_00;
 69.1440 -	unsigned long phys_id_present_map = phys_cpu_present_map;
 69.1441 +	union IO_APIC_reg_00 reg_00;
 69.1442 +	physid_mask_t phys_id_present_map;
 69.1443  	int apic;
 69.1444  	int i;
 69.1445  	unsigned char old_id;
 69.1446  	unsigned long flags;
 69.1447  
 69.1448 -	if (clustered_apic_mode)
 69.1449 -		/* We don't have a good way to do this yet - hack */
 69.1450 -		phys_id_present_map = (u_long) 0xf;
 69.1451 +	/*
 69.1452 +	 * This is broken; anything with a real cpu count has to
 69.1453 +	 * circumvent this idiocy regardless.
 69.1454 +	 */
 69.1455 +	phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map);
 69.1456 +
 69.1457  	/*
 69.1458  	 * Set the IOAPIC ID to the value stored in the MPC table.
 69.1459  	 */
 69.1460 @@ -1122,41 +1450,48 @@ static void __init setup_ioapic_ids_from
 69.1461  
 69.1462  		/* Read the register 0 value */
 69.1463  		spin_lock_irqsave(&ioapic_lock, flags);
 69.1464 -		*(int *)&reg_00 = io_apic_read(apic, 0);
 69.1465 +		reg_00.raw = io_apic_read(apic, 0);
 69.1466  		spin_unlock_irqrestore(&ioapic_lock, flags);
 69.1467  		
 69.1468  		old_id = mp_ioapics[apic].mpc_apicid;
 69.1469  
 69.1470 -		if (mp_ioapics[apic].mpc_apicid >= apic_broadcast_id) {
 69.1471 +		if (mp_ioapics[apic].mpc_apicid >= get_physical_broadcast()) {
 69.1472  			printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
 69.1473  				apic, mp_ioapics[apic].mpc_apicid);
 69.1474  			printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
 69.1475 -				reg_00.ID);
 69.1476 -			mp_ioapics[apic].mpc_apicid = reg_00.ID;
 69.1477 +				reg_00.bits.ID);
 69.1478 +			mp_ioapics[apic].mpc_apicid = reg_00.bits.ID;
 69.1479  		}
 69.1480  
 69.1481 +		/* Don't check I/O APIC IDs for some xAPIC systems.  They have
 69.1482 +		 * no meaning without the serial APIC bus. */
 69.1483 +		if (NO_IOAPIC_CHECK)
 69.1484 +			continue;
 69.1485  		/*
 69.1486  		 * Sanity check, is the ID really free? Every APIC in a
 69.1487  		 * system must have a unique ID or we get lots of nice
 69.1488  		 * 'stuck on smp_invalidate_needed IPI wait' messages.
 69.1489 -		 * I/O APIC IDs no longer have any meaning for xAPICs and SAPICs.
 69.1490  		 */
 69.1491 -		if ((clustered_apic_mode != CLUSTERED_APIC_XAPIC) &&
 69.1492 -		    (phys_id_present_map & (1 << mp_ioapics[apic].mpc_apicid))) {
 69.1493 +		if (check_apicid_used(phys_id_present_map,
 69.1494 +					mp_ioapics[apic].mpc_apicid)) {
 69.1495  			printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
 69.1496  				apic, mp_ioapics[apic].mpc_apicid);
 69.1497 -			for (i = 0; i < 0xf; i++)
 69.1498 -				if (!(phys_id_present_map & (1 << i)))
 69.1499 +			for (i = 0; i < get_physical_broadcast(); i++)
 69.1500 +				if (!physid_isset(i, phys_id_present_map))
 69.1501  					break;
 69.1502 -			if (i >= apic_broadcast_id)
 69.1503 +			if (i >= get_physical_broadcast())
 69.1504  				panic("Max APIC ID exceeded!\n");
 69.1505  			printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
 69.1506  				i);
 69.1507 -			phys_id_present_map |= 1 << i;
 69.1508 +			physid_set(i, phys_id_present_map);
 69.1509  			mp_ioapics[apic].mpc_apicid = i;
 69.1510  		} else {
 69.1511 -			printk("Setting %d in the phys_id_present_map\n", mp_ioapics[apic].mpc_apicid);
 69.1512 -			phys_id_present_map |= 1 << mp_ioapics[apic].mpc_apicid;
 69.1513 +			physid_mask_t tmp;
 69.1514 +			tmp = apicid_to_cpu_present(mp_ioapics[apic].mpc_apicid);
 69.1515 +			apic_printk(APIC_VERBOSE, "Setting %d in the "
 69.1516 +					"phys_id_present_map\n",
 69.1517 +					mp_ioapics[apic].mpc_apicid);
 69.1518 +			physids_or(phys_id_present_map, phys_id_present_map, tmp);
 69.1519  		}
 69.1520  
 69.1521  
 69.1522 @@ -1174,26 +1509,30 @@ static void __init setup_ioapic_ids_from
 69.1523  		 * Read the right value from the MPC table and
 69.1524  		 * write it into the ID register.
 69.1525  	 	 */
 69.1526 -		printk(KERN_INFO "...changing IO-APIC physical APIC ID to %d ...",
 69.1527 -					mp_ioapics[apic].mpc_apicid);
 69.1528 +		apic_printk(APIC_VERBOSE, KERN_INFO
 69.1529 +			"...changing IO-APIC physical APIC ID to %d ...",
 69.1530 +			mp_ioapics[apic].mpc_apicid);
 69.1531  
 69.1532 -		reg_00.ID = mp_ioapics[apic].mpc_apicid;
 69.1533 +		reg_00.bits.ID = mp_ioapics[apic].mpc_apicid;
 69.1534  		spin_lock_irqsave(&ioapic_lock, flags);
 69.1535 -		io_apic_write(apic, 0, *(int *)&reg_00);
 69.1536 +		io_apic_write(apic, 0, reg_00.raw);
 69.1537  		spin_unlock_irqrestore(&ioapic_lock, flags);
 69.1538  
 69.1539  		/*
 69.1540  		 * Sanity check
 69.1541  		 */
 69.1542  		spin_lock_irqsave(&ioapic_lock, flags);
 69.1543 -		*(int *)&reg_00 = io_apic_read(apic, 0);
 69.1544 +		reg_00.raw = io_apic_read(apic, 0);
 69.1545  		spin_unlock_irqrestore(&ioapic_lock, flags);
 69.1546 -		if (reg_00.ID != mp_ioapics[apic].mpc_apicid)
 69.1547 -			panic("could not set ID!\n");
 69.1548 +		if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid)
 69.1549 +			printk("could not set ID!\n");
 69.1550  		else
 69.1551 -			printk(" ok.\n");
 69.1552 +			apic_printk(APIC_VERBOSE, " ok.\n");
 69.1553  	}
 69.1554  }
 69.1555 +#else
 69.1556 +static void __init setup_ioapic_ids_from_mpc(void) { }
 69.1557 +#endif
 69.1558  
 69.1559  /*
 69.1560   * There is a nasty bug in some older SMP boards, their mptable lies
 69.1561 @@ -1205,9 +1544,9 @@ static void __init setup_ioapic_ids_from
 69.1562   */
 69.1563  static int __init timer_irq_works(void)
 69.1564  {
 69.1565 -	unsigned int t1 = jiffies;
 69.1566 +	unsigned long t1 = jiffies;
 69.1567  
 69.1568 -	__sti();
 69.1569 +	local_irq_enable();
 69.1570  	/* Let ten ticks pass... */
 69.1571  	mdelay((10 * 1000) / HZ);
 69.1572  
 69.1573 @@ -1224,7 +1563,18 @@ static int __init timer_irq_works(void)
 69.1574  	return 0;
 69.1575  }
 69.1576  
 69.1577 -static void disable_edge_ioapic_irq (unsigned int irq) { /* nothing */ }
 69.1578 +/*
 69.1579 + * In the SMP+IOAPIC case it might happen that there are an unspecified
 69.1580 + * number of pending IRQ events unhandled. These cases are very rare,
 69.1581 + * so we 'resend' these IRQs via IPIs, to the same CPU. It's much
 69.1582 + * better to do it this way as thus we do not have to be aware of
 69.1583 + * 'pending' interrupts in the IRQ path, except at this point.
 69.1584 + */
 69.1585 +/*
 69.1586 + * Edge triggered needs to resend any interrupt
 69.1587 + * that was delayed but this is now handled in the device
 69.1588 + * independent code.
 69.1589 + */
 69.1590  
 69.1591  /*
 69.1592   * Starting up a edge-triggered IO-APIC interrupt is
 69.1593 @@ -1235,7 +1585,6 @@ static void disable_edge_ioapic_irq (uns
 69.1594   * This is not complete - we should be able to fake
 69.1595   * an edge even if it isn't on the 8259A...
 69.1596   */
 69.1597 -
 69.1598  static unsigned int startup_edge_ioapic_irq(unsigned int irq)
 69.1599  {
 69.1600  	int was_pending = 0;
 69.1601 @@ -1260,16 +1609,13 @@ static unsigned int startup_edge_ioapic_
 69.1602   */
 69.1603  static void ack_edge_ioapic_irq(unsigned int irq)
 69.1604  {
 69.1605 -	balance_irq(irq);
 69.1606 +	move_irq(irq);
 69.1607  	if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED))
 69.1608  					== (IRQ_PENDING | IRQ_DISABLED))
 69.1609  		mask_IO_APIC_irq(irq);
 69.1610  	ack_APIC_irq();
 69.1611  }
 69.1612  
 69.1613 -static void end_edge_ioapic_irq (unsigned int i) { /* nothing */ }
 69.1614 -
 69.1615 -
 69.1616  /*
 69.1617   * Level triggered interrupts can just be masked,
 69.1618   * and shutting down and starting up the interrupt
 69.1619 @@ -1291,15 +1637,14 @@ static unsigned int startup_level_ioapic
 69.1620  	return 0; /* don't check for pending */
 69.1621  }
 69.1622  
 69.1623 -static void mask_and_ack_level_ioapic_irq(unsigned int irq)
 69.1624 +static void mask_and_ack_level_ioapic_irq (unsigned int irq)
 69.1625  {
 69.1626  	unsigned long v;
 69.1627  	int i;
 69.1628  
 69.1629 -	balance_irq(irq);
 69.1630 +	move_irq(irq);
 69.1631  
 69.1632  	mask_IO_APIC_irq(irq);
 69.1633 -
 69.1634  /*
 69.1635   * It appears there is an erratum which affects at least version 0x11
 69.1636   * of I/O APIC (that's the 82093AA and cores integrated into various
 69.1637 @@ -1320,45 +1665,114 @@ static void mask_and_ack_level_ioapic_ir
 69.1638   * The idea is from Manfred Spraul.  --macro
 69.1639   */
 69.1640  	i = IO_APIC_VECTOR(irq);
 69.1641 +
 69.1642  	v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
 69.1643  
 69.1644  	ack_APIC_irq();
 69.1645  
 69.1646  	if (!(v & (1 << (i & 0x1f)))) {
 69.1647 -#ifdef APIC_LOCKUP_DEBUG
 69.1648 -		struct irq_pin_list *entry;
 69.1649 -#endif
 69.1650 -
 69.1651 -#ifdef APIC_MISMATCH_DEBUG
 69.1652  		atomic_inc(&irq_mis_count);
 69.1653 -#endif
 69.1654  		spin_lock(&ioapic_lock);
 69.1655  		__edge_IO_APIC_irq(irq);
 69.1656 -#ifdef APIC_LOCKUP_DEBUG
 69.1657 -		for (entry = irq_2_pin + irq;;) {
 69.1658 -			unsigned int reg;
 69.1659 -
 69.1660 -			if (entry->pin == -1)
 69.1661 -				break;
 69.1662 -			reg = io_apic_read(entry->apic, 0x10 + entry->pin * 2);
 69.1663 -			if (reg & 0x00004000)
 69.1664 -				printk(KERN_CRIT "Aieee!!!  Remote IRR"
 69.1665 -					" still set after unlock!\n");
 69.1666 -			if (!entry->next)
 69.1667 -				break;
 69.1668 -			entry = irq_2_pin + entry->next;
 69.1669 -		}
 69.1670 -#endif
 69.1671  		__level_IO_APIC_irq(irq);
 69.1672  		spin_unlock(&ioapic_lock);
 69.1673  	}
 69.1674  }
 69.1675  
 69.1676 -static void end_level_ioapic_irq(unsigned int irq)
 69.1677 +static void end_level_ioapic_irq (unsigned int irq)
 69.1678  {
 69.1679  	unmask_IO_APIC_irq(irq);
 69.1680  }
 69.1681  
 69.1682 +#ifdef CONFIG_PCI_MSI
 69.1683 +static unsigned int startup_edge_ioapic_vector(unsigned int vector)
 69.1684 +{
 69.1685 +	int irq = vector_to_irq(vector);
 69.1686 +
 69.1687 +	return startup_edge_ioapic_irq(irq);
 69.1688 +}
 69.1689 +
 69.1690 +static void ack_edge_ioapic_vector(unsigned int vector)
 69.1691 +{
 69.1692 +	int irq = vector_to_irq(vector);
 69.1693 +
 69.1694 +	ack_edge_ioapic_irq(irq);
 69.1695 +}
 69.1696 +
 69.1697 +static unsigned int startup_level_ioapic_vector (unsigned int vector)
 69.1698 +{
 69.1699 +	int irq = vector_to_irq(vector);
 69.1700 +
 69.1701 +	return startup_level_ioapic_irq (irq);
 69.1702 +}
 69.1703 +
 69.1704 +static void mask_and_ack_level_ioapic_vector (unsigned int vector)
 69.1705 +{
 69.1706 +	int irq = vector_to_irq(vector);
 69.1707 +
 69.1708 +	mask_and_ack_level_ioapic_irq(irq);
 69.1709 +}
 69.1710 +
 69.1711 +static void end_level_ioapic_vector (unsigned int vector)
 69.1712 +{
 69.1713 +	int irq = vector_to_irq(vector);
 69.1714 +
 69.1715 +	end_level_ioapic_irq(irq);
 69.1716 +}
 69.1717 +
 69.1718 +static void mask_IO_APIC_vector (unsigned int vector)
 69.1719 +{
 69.1720 +	int irq = vector_to_irq(vector);
 69.1721 +
 69.1722 +	mask_IO_APIC_irq(irq);
 69.1723 +}
 69.1724 +
 69.1725 +static void unmask_IO_APIC_vector (unsigned int vector)
 69.1726 +{
 69.1727 +	int irq = vector_to_irq(vector);
 69.1728 +
 69.1729 +	unmask_IO_APIC_irq(irq);
 69.1730 +}
 69.1731 +
 69.1732 +static void set_ioapic_affinity_vector (unsigned int vector,
 69.1733 +					cpumask_t cpu_mask)
 69.1734 +{
 69.1735 +	int irq = vector_to_irq(vector);
 69.1736 +
 69.1737 +	set_ioapic_affinity_irq(irq, cpu_mask);
 69.1738 +}
 69.1739 +#endif
 69.1740 +
 69.1741 +/*
 69.1742 + * Level and edge triggered IO-APIC interrupts need different handling,
 69.1743 + * so we use two separate IRQ descriptors. Edge triggered IRQs can be
 69.1744 + * handled with the level-triggered descriptor, but that one has slightly
 69.1745 + * more overhead. Level-triggered interrupts cannot be handled with the
 69.1746 + * edge-triggered handler, without risking IRQ storms and other ugly
 69.1747 + * races.
 69.1748 + */
 69.1749 +static struct hw_interrupt_type ioapic_edge_type = {
 69.1750 +	.typename 	= "IO-APIC-edge",
 69.1751 +	.startup 	= startup_edge_ioapic,
 69.1752 +	.shutdown 	= shutdown_edge_ioapic,
 69.1753 +	.enable 	= enable_edge_ioapic,
 69.1754 +	.disable 	= disable_edge_ioapic,
 69.1755 +	.ack 		= ack_edge_ioapic,
 69.1756 +	.end 		= end_edge_ioapic,
 69.1757 +	.set_affinity 	= set_ioapic_affinity,
 69.1758 +};
 69.1759 +
 69.1760 +static struct hw_interrupt_type ioapic_level_type = {
 69.1761 +	.typename 	= "IO-APIC-level",
 69.1762 +	.startup 	= startup_level_ioapic,
 69.1763 +	.shutdown 	= shutdown_level_ioapic,
 69.1764 +	.enable 	= enable_level_ioapic,
 69.1765 +	.disable 	= disable_level_ioapic,
 69.1766 +	.ack 		= mask_and_ack_level_ioapic,
 69.1767 +	.end 		= end_level_ioapic,
 69.1768 +	.set_affinity 	= set_ioapic_affinity,
 69.1769 +};
 69.1770 +
 69.1771  static inline void init_IO_APIC_traps(void)
 69.1772  {
 69.1773  	int irq;
 69.1774 @@ -1375,7 +1789,13 @@ static inline void init_IO_APIC_traps(vo
 69.1775  	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
 69.1776  	 */
 69.1777  	for (irq = 0; irq < NR_IRQS ; irq++) {
 69.1778 -		if (IO_APIC_IRQ(irq) && !IO_APIC_VECTOR(irq)) {
 69.1779 +		int tmp = irq;
 69.1780 +		if (use_pci_vector()) {
 69.1781 +			if (!platform_legacy_irq(tmp))
 69.1782 +				if ((tmp = vector_to_irq(tmp)) == -1)
 69.1783 +					continue;
 69.1784 +		}
 69.1785 +		if (IO_APIC_IRQ(tmp) && !IO_APIC_VECTOR(tmp)) {
 69.1786  			/*
 69.1787  			 * Hmm.. We don't have an entry for this,
 69.1788  			 * so default to an old-fashioned 8259
 69.1789 @@ -1414,13 +1834,13 @@ static void ack_lapic_irq (unsigned int 
 69.1790  static void end_lapic_irq (unsigned int i) { /* nothing */ }
 69.1791  
 69.1792  static struct hw_interrupt_type lapic_irq_type = {
 69.1793 -	"local-APIC-edge",
 69.1794 -	NULL, /* startup_irq() not used for IRQ0 */
 69.1795 -	NULL, /* shutdown_irq() not used for IRQ0 */
 69.1796 -	enable_lapic_irq,
 69.1797 -	disable_lapic_irq,
 69.1798 -	ack_lapic_irq,
 69.1799 -	end_lapic_irq
 69.1800 +	.typename 	= "local-APIC-edge",
 69.1801 +	.startup 	= NULL, /* startup_irq() not used for IRQ0 */
 69.1802 +	.shutdown 	= NULL, /* shutdown_irq() not used for IRQ0 */
 69.1803 +	.enable 	= enable_lapic_irq,
 69.1804 +	.disable 	= disable_lapic_irq,
 69.1805 +	.ack 		= ack_lapic_irq,
 69.1806 +	.end 		= end_lapic_irq
 69.1807  };
 69.1808  
 69.1809  /*
 69.1810 @@ -1493,7 +1913,6 @@ static inline void unlock_ExtINT_logic(v
 69.1811   */
 69.1812  static inline void check_timer(void)
 69.1813  {
 69.1814 -	extern int timer_ack;
 69.1815  	int pin1, pin2;
 69.1816  	int vector;
 69.1817  
 69.1818 @@ -1526,8 +1945,9 @@ static inline void check_timer(void)
 69.1819  		 * Ok, does IRQ0 through the IOAPIC work?
 69.1820  		 */
 69.1821  		unmask_IO_APIC_irq(0);
 69.1822 -		if (timer_irq_works())
 69.1823 +		if (timer_irq_works()) {
 69.1824  			return;
 69.1825 +		}
 69.1826  		clear_IO_APIC_pin(0, pin1);
 69.1827  		printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n");
 69.1828  	}
 69.1829 @@ -1554,6 +1974,11 @@ static inline void check_timer(void)
 69.1830  	}
 69.1831  	printk(" failed.\n");
 69.1832  
 69.1833 +	if (nmi_watchdog == NMI_IO_APIC) {
 69.1834 +		printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n");
 69.1835 +		nmi_watchdog = 0;
 69.1836 +	}
 69.1837 +
 69.1838  	printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
 69.1839  
 69.1840  	disable_8259A_irq(0);
 69.1841 @@ -1570,6 +1995,7 @@ static inline void check_timer(void)
 69.1842  
 69.1843  	printk(KERN_INFO "...trying to set up timer as ExtINT IRQ...");
 69.1844  
 69.1845 +	timer_ack = 0;
 69.1846  	init_8259A(0);
 69.1847  	make_8259A_irq(0);
 69.1848  	apic_write_around(APIC_LVT0, APIC_DM_EXTINT);
 69.1849 @@ -1581,7 +2007,8 @@ static inline void check_timer(void)
 69.1850  		return;
 69.1851  	}
 69.1852  	printk(" failed :(.\n");
 69.1853 -	panic("IO-APIC + timer doesn't work! pester mingo@redhat.com");
 69.1854 +	panic("IO-APIC + timer doesn't work!  Boot with apic=debug and send a "
 69.1855 +		"report.  Then try booting with the 'noapic' option");
 69.1856  }
 69.1857  
 69.1858  #define NR_IOAPIC_BIOSIDS 256
 69.1859 @@ -1596,20 +2023,12 @@ static void store_ioapic_biosid_mapping(
 69.1860  
 69.1861  /*
 69.1862   *
 69.1863 - * IRQ's that are handled by the old PIC in all cases:
 69.1864 + * IRQ's that are handled by the PIC in the MPS IOAPIC case.
 69.1865   * - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ.
 69.1866   *   Linux doesn't really care, as it's not actually used
 69.1867   *   for any interrupt handling anyway.
 69.1868 - * - There used to be IRQ13 here as well, but all
 69.1869 - *   MPS-compliant must not use it for FPU coupling and we
 69.1870 - *   want to use exception 16 anyway.  And there are
 69.1871 - *   systems who connect it to an I/O APIC for other uses.
 69.1872 - *   Thus we don't mark it special any longer.
 69.1873 - *
 69.1874 - * Additionally, something is definitely wrong with irq9
 69.1875 - * on PIIX4 boards.
 69.1876   */
 69.1877 -#define PIC_IRQS	(1<<2)
 69.1878 +#define PIC_IRQS	(1 << PIC_CASCADE_IR)
 69.1879  
 69.1880  void __init setup_IO_APIC(void)
 69.1881  {
 69.1882 @@ -1617,7 +2036,11 @@ void __init setup_IO_APIC(void)
 69.1883  
 69.1884  	enable_IO_APIC();
 69.1885  
 69.1886 -	io_apic_irqs = ~PIC_IRQS;
 69.1887 +	if (acpi_ioapic)
 69.1888 +		io_apic_irqs = ~0;	/* all IRQs go through IOAPIC */
 69.1889 +	else
 69.1890 +		io_apic_irqs = ~PIC_IRQS;
 69.1891 +
 69.1892  	printk("ENABLING IO-APIC IRQs\n");
 69.1893  
 69.1894  	/*
 69.1895 @@ -1632,22 +2055,17 @@ void __init setup_IO_APIC(void)
 69.1896  	print_IO_APIC();
 69.1897  }
 69.1898  
 69.1899 -#endif /* CONFIG_X86_IO_APIC */
 69.1900 -
 69.1901 -
 69.1902 -
 69.1903  /* --------------------------------------------------------------------------
 69.1904                            ACPI-based IOAPIC Configuration
 69.1905     -------------------------------------------------------------------------- */
 69.1906  
 69.1907  #ifdef CONFIG_ACPI_BOOT
 69.1908  
 69.1909 -#define IO_APIC_MAX_ID		15
 69.1910 -
 69.1911  int __init io_apic_get_unique_id (int ioapic, int apic_id)
 69.1912  {
 69.1913 -	struct IO_APIC_reg_00 reg_00;
 69.1914 -	static unsigned long apic_id_map = 0;
 69.1915 +	union IO_APIC_reg_00 reg_00;
 69.1916 +	static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
 69.1917 +	physid_mask_t tmp;
 69.1918  	unsigned long flags;
 69.1919  	int i = 0;
 69.1920  
 69.1921 @@ -1660,38 +2078,31 @@ int __init io_apic_get_unique_id (int io
 69.1922  	 *      advantage of new APIC bus architecture.
 69.1923  	 */
 69.1924  
 69.1925 -	if (!apic_id_map)
 69.1926 -		apic_id_map = phys_cpu_present_map;
 69.1927 +	if (physids_empty(apic_id_map))
 69.1928 +		apic_id_map = ioapic_phys_id_map(phys_cpu_present_map);
 69.1929  
 69.1930  	spin_lock_irqsave(&ioapic_lock, flags);
 69.1931 -	*(int *)&reg_00 = io_apic_read(ioapic, 0);
 69.1932 +	reg_00.raw = io_apic_read(ioapic, 0);
 69.1933  	spin_unlock_irqrestore(&ioapic_lock, flags);
 69.1934  
 69.1935 -	if (apic_id >= IO_APIC_MAX_ID) {
 69.1936 +	if (apic_id >= get_physical_broadcast()) {
 69.1937  		printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
 69.1938 -			"%d\n", ioapic, apic_id, reg_00.ID);
 69.1939 -		apic_id = reg_00.ID;
 69.1940 -	}
 69.1941 -
 69.1942 -	/* XAPICs do not need unique IDs */
 69.1943 -	if (clustered_apic_mode == CLUSTERED_APIC_XAPIC){
 69.1944 -		printk(KERN_INFO "IOAPIC[%d]: Assigned apic_id %d\n", 
 69.1945 -			ioapic, apic_id);
 69.1946 -		return apic_id;
 69.1947 +			"%d\n", ioapic, apic_id, reg_00.bits.ID);
 69.1948 +		apic_id = reg_00.bits.ID;
 69.1949  	}
 69.1950  
 69.1951  	/*
 69.1952  	 * Every APIC in a system must have a unique ID or we get lots of nice 
 69.1953  	 * 'stuck on smp_invalidate_needed IPI wait' messages.
 69.1954  	 */
 69.1955 -	if (apic_id_map & (1 << apic_id)) {
 69.1956 +	if (check_apicid_used(apic_id_map, apic_id)) {
 69.1957  
 69.1958 -		for (i = 0; i < IO_APIC_MAX_ID; i++) {
 69.1959 -			if (!(apic_id_map & (1 << i)))
 69.1960 +		for (i = 0; i < get_physical_broadcast(); i++) {
 69.1961 +			if (!check_apicid_used(apic_id_map, i))
 69.1962  				break;
 69.1963  		}
 69.1964  
 69.1965 -		if (i == IO_APIC_MAX_ID)
 69.1966 +		if (i == get_physical_broadcast())
 69.1967  			panic("Max apic_id exceeded!\n");
 69.1968  
 69.1969  		printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
 69.1970 @@ -1700,22 +2111,24 @@ int __init io_apic_get_unique_id (int io
 69.1971  		apic_id = i;
 69.1972  	} 
 69.1973  
 69.1974 -	apic_id_map |= (1 << apic_id);
 69.1975 +	tmp = apicid_to_cpu_present(apic_id);
 69.1976 +	physids_or(apic_id_map, apic_id_map, tmp);
 69.1977  
 69.1978 -	if (reg_00.ID != apic_id) {
 69.1979 -		reg_00.ID = apic_id;
 69.1980 +	if (reg_00.bits.ID != apic_id) {
 69.1981 +		reg_00.bits.ID = apic_id;
 69.1982  
 69.1983  		spin_lock_irqsave(&ioapic_lock, flags);
 69.1984 -		io_apic_write(ioapic, 0, *(int *)&reg_00);
 69.1985 -		*(int *)&reg_00 = io_apic_read(ioapic, 0);
 69.1986 +		io_apic_write(ioapic, 0, reg_00.raw);
 69.1987 +		reg_00.raw = io_apic_read(ioapic, 0);
 69.1988  		spin_unlock_irqrestore(&ioapic_lock, flags);
 69.1989  
 69.1990  		/* Sanity check */
 69.1991 -		if (reg_00.ID != apic_id)
 69.1992 +		if (reg_00.bits.ID != apic_id)
 69.1993  			panic("IOAPIC[%d]: Unable change apic_id!\n", ioapic);
 69.1994  	}
 69.1995  
 69.1996 -	printk(KERN_INFO "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
 69.1997 +	apic_printk(APIC_VERBOSE, KERN_INFO
 69.1998 +			"IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
 69.1999  
 69.2000  	return apic_id;
 69.2001  }
 69.2002 @@ -1723,27 +2136,27 @@ int __init io_apic_get_unique_id (int io
 69.2003  
 69.2004  int __init io_apic_get_version (int ioapic)
 69.2005  {
 69.2006 -	struct IO_APIC_reg_01	reg_01;
 69.2007 +	union IO_APIC_reg_01	reg_01;
 69.2008  	unsigned long flags;
 69.2009  
 69.2010  	spin_lock_irqsave(&ioapic_lock, flags);
 69.2011 -	*(int *)&reg_01 = io_apic_read(ioapic, 1);
 69.2012 +	reg_01.raw = io_apic_read(ioapic, 1);
 69.2013  	spin_unlock_irqrestore(&ioapic_lock, flags);
 69.2014  
 69.2015 -	return reg_01.version;
 69.2016 +	return reg_01.bits.version;
 69.2017  }
 69.2018  
 69.2019  
 69.2020  int __init io_apic_get_redir_entries (int ioapic)
 69.2021  {
 69.2022 -	struct IO_APIC_reg_01	reg_01;
 69.2023 +	union IO_APIC_reg_01	reg_01;
 69.2024  	unsigned long flags;
 69.2025  
 69.2026  	spin_lock_irqsave(&ioapic_lock, flags);
 69.2027 -	*(int *)&reg_01 = io_apic_read(ioapic, 1);
 69.2028 +	reg_01.raw = io_apic_read(ioapic, 1);
 69.2029  	spin_unlock_irqrestore(&ioapic_lock, flags);
 69.2030  
 69.2031 -	return reg_01.entries;
 69.2032 +	return reg_01.bits.entries;
 69.2033  }
 69.2034  
 69.2035  
 69.2036 @@ -1753,7 +2166,7 @@ int io_apic_set_pci_routing (int ioapic,
 69.2037  	unsigned long flags;
 69.2038  
 69.2039  	if (!IO_APIC_IRQ(irq)) {
 69.2040 -		printk(KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0/n", 
 69.2041 +		printk(KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
 69.2042  			ioapic);
 69.2043  		return -EINVAL;
 69.2044  	}
 69.2045 @@ -1766,12 +2179,12 @@ int io_apic_set_pci_routing (int ioapic,
 69.2046  
 69.2047  	memset(&entry,0,sizeof(entry));
 69.2048  
 69.2049 -	entry.delivery_mode = dest_LowestPrio;
 69.2050 -	entry.dest_mode = INT_DELIVERY_MODE;
 69.2051 -	entry.dest.logical.logical_dest = target_cpus();
 69.2052 -	entry.mask = 1;					 /* Disabled (masked) */
 69.2053 +	entry.delivery_mode = INT_DELIVERY_MODE;
 69.2054 +	entry.dest_mode = INT_DEST_MODE;
 69.2055 +	entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
 69.2056  	entry.trigger = edge_level;
 69.2057  	entry.polarity = active_high_low;
 69.2058 +	entry.mask  = 1;
 69.2059  
 69.2060  	/*
 69.2061  	 * IRQs < 16 are already in the irq_2_pin[] map
 69.2062 @@ -1781,17 +2194,12 @@ int io_apic_set_pci_routing (int ioapic,
 69.2063  
 69.2064  	entry.vector = assign_irq_vector(irq);
 69.2065  
 69.2066 -	printk(KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry (%d-%d -> 0x%x -> "
 69.2067 -		"IRQ %d Mode:%i Active:%i)\n", ioapic,
 69.2068 -		mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq, edge_level, active_high_low);
 69.2069 +	apic_printk(APIC_DEBUG, KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry "
 69.2070 +		"(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", ioapic,
 69.2071 +		mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq,
 69.2072 +		edge_level, active_high_low);
 69.2073  
 69.2074 -	if (edge_level) {
 69.2075 -		irq_desc[irq].handler = &ioapic_level_irq_type;
 69.2076 -	} else {
 69.2077 -		irq_desc[irq].handler = &ioapic_edge_irq_type;
 69.2078 -	}
 69.2079 -
 69.2080 -	set_intr_gate(entry.vector, interrupt[irq]);
 69.2081 +	ioapic_register_intr(irq, entry.vector, edge_level);
 69.2082  
 69.2083  	if (!ioapic && (irq < 16))
 69.2084  		disable_8259A_irq(irq);
 69.2085 @@ -1806,123 +2214,12 @@ int io_apic_set_pci_routing (int ioapic,
 69.2086  
 69.2087  #endif /*CONFIG_ACPI_BOOT*/
 69.2088  
 69.2089 -/* opt_leveltrigger, opt_edgetrigger: Force an IO-APIC-routed IRQ to be */
 69.2090 -/*                                    level- or edge-triggered.         */
 69.2091 -/* Example: 'leveltrigger=4,5,6,20 edgetrigger=21'. */
 69.2092 -static char opt_leveltrigger[30] = "", opt_edgetrigger[30] = "";
 69.2093 -string_param("leveltrigger", opt_leveltrigger);
 69.2094 -string_param("edgetrigger", opt_edgetrigger);
 69.2095 -
 69.2096 -static int __init ioapic_trigger_setup(void)
 69.2097 -{
 69.2098 -    char       *p;
 69.2099 -    irq_desc_t *desc;
 69.2100 -    long        irq;
 69.2101 -
 69.2102 -    p = opt_leveltrigger;
 69.2103 -    while ( *p != '\0' )
 69.2104 -    {
 69.2105 -        irq = simple_strtol(p, &p, 10);
 69.2106 -        if ( (irq <= 0) || (irq >= NR_IRQS) )
 69.2107 -        {
 69.2108 -            printk("IRQ '%ld' out of range in level-trigger list '%s'\n",
 69.2109 -                   irq, opt_leveltrigger);
 69.2110 -            break;
 69.2111 -        }
 69.2112 -
 69.2113 -        printk("Forcing IRQ %ld to level-trigger: ", irq);
 69.2114 -
 69.2115 -        desc = &irq_desc[irq];
 69.2116 -        spin_lock_irq(&desc->lock);
 69.2117 -
 69.2118 -        if ( desc->handler == &ioapic_level_irq_type )
 69.2119 -        {
 69.2120 -            printk("already level-triggered (no force applied).\n");
 69.2121 -        }
 69.2122 -        else if ( desc->handler != &ioapic_edge_irq_type )
 69.2123 -        {
 69.2124 -            printk("cannot force (can only force IO-APIC-edge IRQs).\n");
 69.2125 -        }
 69.2126 -        else
 69.2127 -        {
 69.2128 -            desc->handler = &ioapic_level_irq_type;
 69.2129 -            __mask_IO_APIC_irq(irq);
 69.2130 -            __level_IO_APIC_irq(irq);        
 69.2131 -            printk("done.\n");
 69.2132 -        }
 69.2133 -
 69.2134 -        spin_unlock_irq(&desc->lock);
 69.2135 -
 69.2136 -        if ( *p == '\0' )
 69.2137 -            break;
 69.2138 -
 69.2139 -        if ( *p != ',' )
 69.2140 -        {
 69.2141 -            printk("Unexpected character '%c' in level-trigger list '%s'\n",
 69.2142 -                   *p, opt_leveltrigger);
 69.2143 -            break;
 69.2144 -        }
 69.2145 -
 69.2146 -        p++;
 69.2147 -    }
 69.2148 -
 69.2149 -    p = opt_edgetrigger;
 69.2150 -    while ( *p != '\0' )
 69.2151 -    {
 69.2152 -        irq = simple_strtol(p, &p, 10);
 69.2153 -        if ( (irq <= 0) || (irq >= NR_IRQS) )
 69.2154 -        {
 69.2155 -            printk("IRQ '%ld' out of range in edge-trigger list '%s'\n",
 69.2156 -                   irq, opt_edgetrigger);
 69.2157 -            break;
 69.2158 -        }
 69.2159 -
 69.2160 -        printk("Forcing IRQ %ld to edge-trigger: ", irq);
 69.2161 -
 69.2162 -        desc = &irq_desc[irq];
 69.2163 -        spin_lock_irq(&desc->lock);
 69.2164 -
 69.2165 -        if ( desc->handler == &ioapic_edge_irq_type )
 69.2166 -        {
 69.2167 -            printk("already edge-triggered (no force applied).\n");
 69.2168 -        }
 69.2169 -        else if ( desc->handler != &ioapic_level_irq_type )
 69.2170 -        {
 69.2171 -            printk("cannot force (can only force IO-APIC-level IRQs).\n");
 69.2172 -        }
 69.2173 -        else
 69.2174 -        {
 69.2175 -            desc->handler = &ioapic_edge_irq_type;
 69.2176 -            __edge_IO_APIC_irq(irq);        
 69.2177 -            desc->status |= IRQ_PENDING; /* may have lost a masked edge */
 69.2178 -            printk("done.\n");
 69.2179 -        }
 69.2180 -
 69.2181 -        spin_unlock_irq(&desc->lock);
 69.2182 -
 69.2183 -        if ( *p == '\0' )
 69.2184 -            break;
 69.2185 -
 69.2186 -        if ( *p != ',' )
 69.2187 -        {
 69.2188 -            printk("Unexpected character '%c' in edge-trigger list '%s'\n",
 69.2189 -                   *p, opt_edgetrigger);
 69.2190 -            break;
 69.2191 -        }
 69.2192 -
 69.2193 -        p++;
 69.2194 -    }
 69.2195 -
 69.2196 -    return 0;
 69.2197 -}
 69.2198 -
 69.2199 -__initcall(ioapic_trigger_setup);
 69.2200  
 69.2201  int ioapic_guest_read(int apicid, int address, u32 *pval)
 69.2202  {
 69.2203      u32 val;
 69.2204      int apicenum;
 69.2205 -    struct IO_APIC_reg_00 reg_00;
 69.2206 +    union IO_APIC_reg_00 reg_00;
 69.2207      unsigned long flags;
 69.2208  
 69.2209      if ( (apicid >= NR_IOAPIC_BIOSIDS) ||
 69.2210 @@ -1936,9 +2233,9 @@ int ioapic_guest_read(int apicid, int ad
 69.2211      /* Rewrite APIC ID to what the BIOS originally specified. */
 69.2212      if ( address == 0 )
 69.2213      {
 69.2214 -        *(int *)&reg_00 = val;
 69.2215 -        reg_00.ID = apicid;
 69.2216 -        val = *(u32 *)&reg_00;
 69.2217 +        reg_00.raw = val;
 69.2218 +        reg_00.bits.ID = apicid;
 69.2219 +        val = reg_00.raw;
 69.2220      }
 69.2221  
 69.2222      *pval = val;
 69.2223 @@ -1974,7 +2271,7 @@ int ioapic_guest_write(int apicid, int a
 69.2224  
 69.2225          /* Set the correct irq-handling type. */
 69.2226          irq_desc[irq].handler = rte.trigger ? 
 69.2227 -            &ioapic_level_irq_type: &ioapic_edge_irq_type;
 69.2228 +            &ioapic_level_type: &ioapic_edge_type;
 69.2229  
 69.2230          /* Record the pin<->irq mapping. */
 69.2231          for ( entry = &irq_2_pin[irq]; ; entry = &irq_2_pin[entry->next] )
    70.1 --- a/xen/arch/x86/irq.c	Fri May 06 17:04:27 2005 +0000
    70.2 +++ b/xen/arch/x86/irq.c	Wed May 11 13:43:50 2005 +0000
    70.3 @@ -42,7 +42,6 @@ struct hw_interrupt_type no_irq_type = {
    70.4  };
    70.5  
    70.6  atomic_t irq_err_count;
    70.7 -atomic_t irq_mis_count;
    70.8  
    70.9  inline void disable_irq_nosync(unsigned int irq)
   70.10  {
    71.1 --- a/xen/arch/x86/microcode.c	Fri May 06 17:04:27 2005 +0000
    71.2 +++ b/xen/arch/x86/microcode.c	Wed May 11 13:43:50 2005 +0000
    71.3 @@ -87,13 +87,6 @@
    71.4  #define vmalloc(_s) xmalloc_bytes(_s)
    71.5  #define vfree(_p) xfree(_p)
    71.6  #define num_online_cpus() smp_num_cpus
    71.7 -static inline int on_each_cpu(
    71.8 -    void (*func) (void *info), void *info, int retry, int wait)
    71.9 -{
   71.10 -    int ret = smp_call_function(func, info, retry, wait);
   71.11 -    func(info);
   71.12 -    return ret;
   71.13 -}
   71.14  
   71.15  #if 0
   71.16  MODULE_DESCRIPTION("Intel CPU (IA-32) Microcode Update Driver");
    72.1 --- a/xen/arch/x86/mm.c	Fri May 06 17:04:27 2005 +0000
    72.2 +++ b/xen/arch/x86/mm.c	Wed May 11 13:43:50 2005 +0000
    72.3 @@ -316,7 +316,7 @@ int map_ldt_shadow_page(unsigned int off
    72.4  
    72.5      res = get_page_and_type(&frame_table[gmfn], d, PGT_ldt_page);
    72.6  
    72.7 -    if ( !res && unlikely(shadow_mode_enabled(d)) )
    72.8 +    if ( !res && unlikely(shadow_mode_refcounts(d)) )
    72.9      {
   72.10          shadow_lock(d);
   72.11          shadow_remove_all_write_access(d, gpfn, gmfn);
   72.12 @@ -392,7 +392,7 @@ get_linear_pagetable(
   72.13      struct pfn_info *page;
   72.14      unsigned long pfn;
   72.15  
   72.16 -    ASSERT( !shadow_mode_enabled(d) );
   72.17 +    ASSERT( !shadow_mode_refcounts(d) );
   72.18  
   72.19      if ( (root_get_flags(re) & _PAGE_RW) )
   72.20      {
   72.21 @@ -482,7 +482,7 @@ get_page_from_l2e(
   72.22  {
   72.23      int rc;
   72.24  
   72.25 -    ASSERT(!shadow_mode_enabled(d));
   72.26 +    ASSERT(!shadow_mode_refcounts(d));
   72.27  
   72.28      if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
   72.29          return 1;
   72.30 @@ -512,6 +512,8 @@ static int
   72.31  get_page_from_l3e(
   72.32      l3_pgentry_t l3e, unsigned long pfn, struct domain *d)
   72.33  {
   72.34 +    ASSERT( !shadow_mode_refcounts(d) );
   72.35 +
   72.36      if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
   72.37          return 1;
   72.38  
   72.39 @@ -533,6 +535,8 @@ get_page_from_l4e(
   72.40  {
   72.41      int rc;
   72.42  
   72.43 +    ASSERT( !shadow_mode_refcounts(d) );
   72.44 +
   72.45      if ( !(l4e_get_flags(l4e) & _PAGE_PRESENT) )
   72.46          return 1;
   72.47  
   72.48 @@ -641,7 +645,7 @@ static int alloc_l1_table(struct pfn_inf
   72.49      l1_pgentry_t  *pl1e;
   72.50      int            i;
   72.51  
   72.52 -    ASSERT(!shadow_mode_enabled(d));
   72.53 +    ASSERT(!shadow_mode_refcounts(d));
   72.54  
   72.55      pl1e = map_domain_mem(pfn << PAGE_SHIFT);
   72.56  
   72.57 @@ -670,10 +674,12 @@ static int alloc_l2_table(struct pfn_inf
   72.58      l2_pgentry_t  *pl2e;
   72.59      int            i;
   72.60  
   72.61 +    // See the code in shadow_promote() to understand why this is here...
   72.62      if ( (PGT_base_page_table == PGT_l2_page_table) &&
   72.63 -         shadow_mode_enabled(d) )
   72.64 +         unlikely(shadow_mode_refcounts(d)) )
   72.65          return 1;
   72.66 -    ASSERT( !shadow_mode_enabled(d) );
   72.67 +
   72.68 +    ASSERT( !shadow_mode_refcounts(d) );
   72.69     
   72.70      pl2e = map_domain_mem(pfn << PAGE_SHIFT);
   72.71  
   72.72 @@ -716,7 +722,7 @@ static int alloc_l3_table(struct pfn_inf
   72.73      l3_pgentry_t  *pl3e = page_to_virt(page);
   72.74      int            i;
   72.75  
   72.76 -    ASSERT( !shadow_mode_enabled(d) );
   72.77 +    ASSERT( !shadow_mode_refcounts(d) );
   72.78  
   72.79      for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
   72.80          if ( is_guest_l3_slot(i) &&
   72.81 @@ -741,10 +747,12 @@ static int alloc_l4_table(struct pfn_inf
   72.82      l4_pgentry_t  *pl4e = page_to_virt(page);
   72.83      int            i;
   72.84  
   72.85 +    // See the code in shadow_promote() to understand why this is here...
   72.86      if ( (PGT_base_page_table == PGT_l4_page_table) &&
   72.87 -         shadow_mode_enabled(d) )
   72.88 +         shadow_mode_refcounts(d) )
   72.89          return 1;
   72.90 -    ASSERT( !shadow_mode_enabled(d) );
   72.91 +
   72.92 +    ASSERT( !shadow_mode_refcounts(d) );
   72.93  
   72.94      for ( i = 0; i < L4_PAGETABLE_ENTRIES; i++ )
   72.95          if ( is_guest_l4_slot(i) &&
   72.96 @@ -861,11 +869,12 @@ static int mod_l1_entry(l1_pgentry_t *pl
   72.97      l1_pgentry_t ol1e;
   72.98      struct domain *d = current->domain;
   72.99  
  72.100 -    ASSERT( !shadow_mode_enabled(d) );
  72.101 -
  72.102      if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) )
  72.103          return 0;
  72.104  
  72.105 +    if ( unlikely(shadow_mode_refcounts(d)) )
  72.106 +        return update_l1e(pl1e, ol1e, nl1e);
  72.107 +
  72.108      if ( l1e_get_flags(nl1e) & _PAGE_PRESENT )
  72.109      {
  72.110          if ( unlikely(l1e_get_flags(nl1e) & L1_DISALLOW_MASK) )
  72.111 @@ -893,7 +902,7 @@ static int mod_l1_entry(l1_pgentry_t *pl
  72.112          if ( unlikely(!update_l1e(pl1e, ol1e, nl1e)) )
  72.113              return 0;
  72.114      }
  72.115 -    
  72.116 +
  72.117      put_page_from_l1e(ol1e, d);
  72.118      return 1;
  72.119  }
  72.120 @@ -1095,8 +1104,19 @@ int alloc_page_type(struct pfn_info *pag
  72.121  void free_page_type(struct pfn_info *page, unsigned int type)
  72.122  {
  72.123      struct domain *owner = page_get_owner(page);
  72.124 -    if ( likely(owner != NULL) && unlikely(shadow_mode_enabled(owner)) )
  72.125 -        return;
  72.126 +    unsigned long gpfn;
  72.127 +
  72.128 +    if ( owner != NULL )
  72.129 +    {
  72.130 +        if ( unlikely(shadow_mode_refcounts(owner)) )
  72.131 +            return;
  72.132 +        if ( unlikely(shadow_mode_enabled(owner)) )
  72.133 +        {
  72.134 +            gpfn = __mfn_to_gpfn(owner, page_to_pfn(page));
  72.135 +            ASSERT(VALID_M2P(gpfn));
  72.136 +            remove_shadow(owner, gpfn, type);
  72.137 +        }
  72.138 +    }
  72.139  
  72.140      switch ( type )
  72.141      {
  72.142 @@ -1142,7 +1162,7 @@ void put_page_type(struct pfn_info *page
  72.143           * See domain.c:relinquish_list().
  72.144           */
  72.145          ASSERT((x & PGT_validated) || 
  72.146 -               test_bit(DF_DYING, &page_get_owner(page)->d_flags));
  72.147 +               test_bit(DF_DYING, &page_get_owner(page)->flags));
  72.148  
  72.149          if ( unlikely((nx & PGT_count_mask) == 0) )
  72.150          {
  72.151 @@ -1287,7 +1307,7 @@ int new_guest_cr3(unsigned long mfn)
  72.152      int okay;
  72.153      unsigned long old_base_mfn;
  72.154  
  72.155 -    if ( shadow_mode_enabled(d) )
  72.156 +    if ( shadow_mode_refcounts(d) )
  72.157          okay = get_page_from_pagenr(mfn, d);
  72.158      else
  72.159          okay = get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d);
  72.160 @@ -1296,24 +1316,24 @@ int new_guest_cr3(unsigned long mfn)
  72.161      {
  72.162          invalidate_shadow_ldt(ed);
  72.163  
  72.164 -        old_base_mfn = pagetable_val(ed->arch.guest_table) >> PAGE_SHIFT;
  72.165 +        old_base_mfn = pagetable_get_pfn(ed->arch.guest_table);
  72.166          ed->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT);
  72.167          update_pagetables(ed); /* update shadow_table and monitor_table */
  72.168  
  72.169          write_ptbase(ed);
  72.170  
  72.171 -        if ( shadow_mode_enabled(d) )
  72.172 +        if ( shadow_mode_refcounts(d) )
  72.173              put_page(&frame_table[old_base_mfn]);
  72.174          else
  72.175              put_page_and_type(&frame_table[old_base_mfn]);
  72.176  
  72.177 -        /* CR3 holds its own ref to its shadow. */
  72.178 +        /* CR3 also holds a ref to its shadow... */
  72.179          if ( shadow_mode_enabled(d) )
  72.180          {
  72.181              if ( ed->arch.monitor_shadow_ref )
  72.182                  put_shadow_ref(ed->arch.monitor_shadow_ref);
  72.183              ed->arch.monitor_shadow_ref =
  72.184 -                pagetable_val(ed->arch.monitor_table) >> PAGE_SHIFT;
  72.185 +                pagetable_get_pfn(ed->arch.monitor_table);
  72.186              ASSERT(!page_get_owner(&frame_table[ed->arch.monitor_shadow_ref]));
  72.187              get_shadow_ref(ed->arch.monitor_shadow_ref);
  72.188          }
  72.189 @@ -1486,7 +1506,7 @@ int do_mmuext_op(
  72.190              type = PGT_l1_page_table | PGT_va_mutable;
  72.191  
  72.192          pin_page:
  72.193 -            if ( shadow_mode_enabled(FOREIGNDOM) )
  72.194 +            if ( shadow_mode_refcounts(FOREIGNDOM) )
  72.195                  type = PGT_writable_page;
  72.196  
  72.197              okay = get_page_and_type_from_pagenr(op.mfn, type, FOREIGNDOM);
  72.198 @@ -1557,7 +1577,7 @@ int do_mmuext_op(
  72.199              else
  72.200              {
  72.201                  unsigned long old_mfn =
  72.202 -                    pagetable_val(ed->arch.guest_table_user) >> PAGE_SHIFT;
  72.203 +                    pagetable_get_pfn(ed->arch.guest_table_user);
  72.204                  ed->arch.guest_table_user = mk_pagetable(op.mfn << PAGE_SHIFT);
  72.205                  if ( old_mfn != 0 )
  72.206                      put_page_and_type(&frame_table[old_mfn]);
  72.207 @@ -1691,13 +1711,13 @@ int do_mmuext_op(
  72.208               * it is dying. 
  72.209               */
  72.210              ASSERT(e->tot_pages <= e->max_pages);
  72.211 -            if ( unlikely(test_bit(DF_DYING, &e->d_flags)) ||
  72.212 +            if ( unlikely(test_bit(DF_DYING, &e->flags)) ||
  72.213                   unlikely(e->tot_pages == e->max_pages) ||
  72.214                   unlikely(IS_XEN_HEAP_FRAME(page)) )
  72.215              {
  72.216                  MEM_LOG("Transferee has no reservation headroom (%d,%d), or "
  72.217                          "page is in Xen heap (%lx), or dom is dying (%ld).\n",
  72.218 -                        e->tot_pages, e->max_pages, op.mfn, e->d_flags);
  72.219 +                        e->tot_pages, e->max_pages, op.mfn, e->flags);
  72.220                  okay = 0;
  72.221                  goto reassign_fail;
  72.222              }
  72.223 @@ -1785,13 +1805,15 @@ int do_mmu_update(
  72.224      unsigned int foreigndom)
  72.225  {
  72.226      mmu_update_t req;
  72.227 -    unsigned long va = 0, mfn, prev_mfn = 0, gpfn;
  72.228 +    void *va;
  72.229 +    unsigned long gpfn, mfn;
  72.230      struct pfn_info *page;
  72.231      int rc = 0, okay = 1, i = 0, cpu = smp_processor_id();
  72.232      unsigned int cmd, done = 0;
  72.233      struct exec_domain *ed = current;
  72.234      struct domain *d = ed->domain;
  72.235      u32 type_info;
  72.236 +    struct map_dom_mem_cache mapcache, sh_mapcache;
  72.237  
  72.238      LOCK_BIGLOCK(d);
  72.239  
  72.240 @@ -1807,6 +1829,9 @@ int do_mmu_update(
  72.241              (void)get_user(done, pdone);
  72.242      }
  72.243  
  72.244 +    init_map_domain_mem_cache(&mapcache);
  72.245 +    init_map_domain_mem_cache(&sh_mapcache);
  72.246 +
  72.247      if ( !set_foreigndom(cpu, foreigndom) )
  72.248      {
  72.249          rc = -EINVAL;
  72.250 @@ -1841,8 +1866,6 @@ int do_mmu_update(
  72.251          }
  72.252  
  72.253          cmd = req.ptr & (sizeof(l1_pgentry_t)-1);
  72.254 -        mfn = req.ptr >> PAGE_SHIFT;
  72.255 -
  72.256          okay = 0;
  72.257  
  72.258          switch ( cmd )
  72.259 @@ -1851,73 +1874,75 @@ int do_mmu_update(
  72.260               * MMU_NORMAL_PT_UPDATE: Normal update to any level of page table.
  72.261               */
  72.262          case MMU_NORMAL_PT_UPDATE:
  72.263 +
  72.264 +            gpfn = req.ptr >> PAGE_SHIFT;
  72.265 +            mfn = __gpfn_to_mfn(d, gpfn);
  72.266 +
  72.267              if ( unlikely(!get_page_from_pagenr(mfn, current->domain)) )
  72.268              {
  72.269                  MEM_LOG("Could not get page for normal update");
  72.270                  break;
  72.271              }
  72.272  
  72.273 -            if ( likely(prev_mfn == mfn) )
  72.274 -            {
  72.275 -                va = (va & PAGE_MASK) | (req.ptr & ~PAGE_MASK);
  72.276 -            }
  72.277 -            else
  72.278 -            {
  72.279 -                if ( prev_mfn != 0 )
  72.280 -                    unmap_domain_mem((void *)va);
  72.281 -                va = (unsigned long)map_domain_mem(req.ptr);
  72.282 -                prev_mfn = mfn;
  72.283 -            }
  72.284 -
  72.285 +            va = map_domain_mem_with_cache(req.ptr, &mapcache);
  72.286              page = &frame_table[mfn];
  72.287 +
  72.288              switch ( (type_info = page->u.inuse.type_info) & PGT_type_mask )
  72.289              {
  72.290              case PGT_l1_page_table: 
  72.291 -                ASSERT(!shadow_mode_enabled(d));
  72.292 +                ASSERT( !shadow_mode_refcounts(d) );
  72.293                  if ( likely(get_page_type(
  72.294                      page, type_info & (PGT_type_mask|PGT_va_mask))) )
  72.295                  {
  72.296 -                    l1_pgentry_t pte;
  72.297 +                    l1_pgentry_t l1e;
  72.298  
  72.299                      /* FIXME: doesn't work with PAE */
  72.300 -                    pte = l1e_create_phys(req.val, req.val);
  72.301 -                    okay = mod_l1_entry((l1_pgentry_t *)va, pte);
  72.302 +                    l1e = l1e_create_phys(req.val, req.val);
  72.303 +                    okay = mod_l1_entry(va, l1e);
  72.304 +                    if ( okay && unlikely(shadow_mode_enabled(d)) )
  72.305 +                        shadow_l1_normal_pt_update(d, req.ptr, l1e, &sh_mapcache);
  72.306                      put_page_type(page);
  72.307                  }
  72.308                  break;
  72.309              case PGT_l2_page_table:
  72.310 -                ASSERT(!shadow_mode_enabled(d));
  72.311 +                ASSERT( !shadow_mode_refcounts(d) );
  72.312                  if ( likely(get_page_type(page, PGT_l2_page_table)) )
  72.313                  {
  72.314                      l2_pgentry_t l2e;
  72.315  
  72.316                      /* FIXME: doesn't work with PAE */
  72.317                      l2e = l2e_create_phys(req.val, req.val);
  72.318 -                    okay = mod_l2_entry((l2_pgentry_t *)va, l2e, mfn);
  72.319 +                    okay = mod_l2_entry(va, l2e, mfn);
  72.320 +                    if ( okay && unlikely(shadow_mode_enabled(d)) )
  72.321 +                        shadow_l2_normal_pt_update(d, req.ptr, l2e, &sh_mapcache);
  72.322                      put_page_type(page);
  72.323                  }
  72.324                  break;
  72.325  #ifdef __x86_64__
  72.326              case PGT_l3_page_table:
  72.327 -                ASSERT(!shadow_mode_enabled(d));
  72.328 +                ASSERT( !shadow_mode_refcounts(d) );
  72.329                  if ( likely(get_page_type(page, PGT_l3_page_table)) )
  72.330                  {
  72.331                      l3_pgentry_t l3e;
  72.332  
  72.333                      /* FIXME: doesn't work with PAE */
  72.334                      l3e = l3e_create_phys(req.val,req.val);
  72.335 -                    okay = mod_l3_entry((l3_pgentry_t *)va, l3e, mfn);
  72.336 +                    okay = mod_l3_entry(va, l3e, mfn);
  72.337 +                    if ( okay && unlikely(shadow_mode_enabled(d)) )
  72.338 +                        shadow_l3_normal_pt_update(d, req.ptr, l3e, &sh_mapcache);
  72.339                      put_page_type(page);
  72.340                  }
  72.341                  break;
  72.342              case PGT_l4_page_table:
  72.343 -                ASSERT(!shadow_mode_enabled(d));
  72.344 +                ASSERT( !shadow_mode_refcounts(d) );
  72.345                  if ( likely(get_page_type(page, PGT_l4_page_table)) )
  72.346                  {
  72.347                      l4_pgentry_t l4e;
  72.348  
  72.349                      l4e = l4e_create_phys(req.val,req.val);
  72.350 -                    okay = mod_l4_entry((l4_pgentry_t *)va, l4e, mfn);
  72.351 +                    okay = mod_l4_entry(va, l4e, mfn);
  72.352 +                    if ( okay && unlikely(shadow_mode_enabled(d)) )
  72.353 +                        shadow_l4_normal_pt_update(d, req.ptr, l4e, &sh_mapcache);
  72.354                      put_page_type(page);
  72.355                  }
  72.356                  break;
  72.357 @@ -1932,9 +1957,6 @@ int do_mmu_update(
  72.358                          if ( shadow_mode_log_dirty(d) )
  72.359                              __mark_dirty(d, mfn);
  72.360  
  72.361 -                        gpfn = __mfn_to_gpfn(d, mfn);
  72.362 -                        ASSERT(VALID_M2P(gpfn));
  72.363 -
  72.364                          if ( page_is_page_table(page) &&
  72.365                               !page_out_of_sync(page) )
  72.366                          {
  72.367 @@ -1953,24 +1975,29 @@ int do_mmu_update(
  72.368                  break;
  72.369              }
  72.370  
  72.371 +            unmap_domain_mem_with_cache(va, &mapcache);
  72.372 +
  72.373              put_page(page);
  72.374              break;
  72.375  
  72.376          case MMU_MACHPHYS_UPDATE:
  72.377  
  72.378 +            mfn = req.ptr >> PAGE_SHIFT;
  72.379 +            gpfn = req.val;
  72.380 +
  72.381              /* HACK ALERT...  Need to think about this some more... */
  72.382              if ( unlikely(shadow_mode_translate(FOREIGNDOM) && IS_PRIV(d)) )
  72.383              {
  72.384 -                rc = FOREIGNDOM->next_io_page++;
  72.385 -                printk("privileged guest dom%d requests mfn=%lx for dom%d, "
  72.386 -                       "gets pfn=%x\n",
  72.387 -                       d->id, mfn, FOREIGNDOM->id, rc);
  72.388 -                set_machinetophys(mfn, rc);
  72.389 -                set_p2m_entry(FOREIGNDOM, rc, mfn);
  72.390 +                shadow_lock(FOREIGNDOM);
  72.391 +                printk("privileged guest dom%d requests pfn=%lx to map mfn=%lx for dom%d\n",
  72.392 +                       d->id, gpfn, mfn, FOREIGNDOM->id);
  72.393 +                set_machinetophys(mfn, gpfn);
  72.394 +                set_p2m_entry(FOREIGNDOM, gpfn, mfn, &sh_mapcache, &mapcache);
  72.395                  okay = 1;
  72.396 +                shadow_unlock(FOREIGNDOM);
  72.397                  break;
  72.398              }
  72.399 -            
  72.400 +
  72.401              if ( unlikely(!get_page_from_pagenr(mfn, FOREIGNDOM)) )
  72.402              {
  72.403                  MEM_LOG("Could not get page for mach->phys update");
  72.404 @@ -1983,7 +2010,7 @@ int do_mmu_update(
  72.405                  break;
  72.406              }
  72.407  
  72.408 -            set_machinetophys(mfn, req.val);
  72.409 +            set_machinetophys(mfn, gpfn);
  72.410              okay = 1;
  72.411  
  72.412              /*
  72.413 @@ -2012,8 +2039,8 @@ int do_mmu_update(
  72.414      }
  72.415  
  72.416   out:
  72.417 -    if ( prev_mfn != 0 )
  72.418 -        unmap_domain_mem((void *)va);
  72.419 +    destroy_map_domain_mem_cache(&mapcache);
  72.420 +    destroy_map_domain_mem_cache(&sh_mapcache);
  72.421  
  72.422      process_deferred_ops(cpu);
  72.423  
  72.424 @@ -2031,73 +2058,6 @@ int do_mmu_update(
  72.425  /* This function assumes the caller is holding the domain's BIGLOCK
  72.426   * and is running in a shadow mode
  72.427   */
  72.428 -int update_shadow_va_mapping(unsigned long va,
  72.429 -                             l1_pgentry_t val,
  72.430 -                             struct exec_domain *ed,
  72.431 -                             struct domain *d)
  72.432 -{
  72.433 -    unsigned long l1mfn;
  72.434 -    l1_pgentry_t spte;
  72.435 -    int rc = 0;
  72.436 -
  72.437 -    check_pagetable(ed, "pre-va"); /* debug */
  72.438 -    shadow_lock(d);
  72.439 -        
  72.440 -    // This is actually overkill - we don't need to sync the L1 itself,
  72.441 -    // just everything involved in getting to this L1 (i.e. we need
  72.442 -    // linear_pg_table[l1_linear_offset(va)] to be in sync)...
  72.443 -    //
  72.444 -    __shadow_sync_va(ed, va);
  72.445 -
  72.446 -#if 1 /* keep check_pagetables() happy */
  72.447 -    /*
  72.448 -     * However, the above doesn't guarantee that there's no snapshot of
  72.449 -     * the L1 table in question; it just says that the relevant L2 and L1
  72.450 -     * entries for VA are in-sync.  There might still be a snapshot.
  72.451 -     *
  72.452 -     * The checking code in _check_pagetables() assumes that no one will
  72.453 -     * mutate the shadow of a page that has a snapshot.  It's actually
  72.454 -     * OK to not sync this page, but it seems simpler to:
  72.455 -     * 1) keep all code paths the same, and
  72.456 -     * 2) maintain the invariant for _check_pagetables(), rather than try
  72.457 -     *    to teach it about this boundary case.
  72.458 -     * So we flush this L1 page, if it's out of sync.
  72.459 -     */
  72.460 -    l1mfn = l2e_get_pfn(linear_l2_table(ed)[l2_table_offset(va)]);
  72.461 -    if ( mfn_out_of_sync(l1mfn) )
  72.462 -    {
  72.463 -        perfc_incrc(extra_va_update_sync);
  72.464 -        __shadow_sync_mfn(d, l1mfn);
  72.465 -    }
  72.466 -#endif /* keep check_pagetables() happy */
  72.467 -
  72.468 -    if ( unlikely(__copy_to_user(&linear_pg_table[l1_linear_offset(va)],
  72.469 -                                 &val, sizeof(val))))
  72.470 -    {
  72.471 -        rc = -EINVAL;
  72.472 -        goto out;
  72.473 -    }
  72.474 -
  72.475 -    // also need to update the shadow
  72.476 -
  72.477 -    l1pte_propagate_from_guest(d, val, &spte);
  72.478 -    shadow_set_l1e(va, spte, 0);
  72.479 -
  72.480 -    /*
  72.481 -     * If we're in log-dirty mode then we need to note that we've updated
  72.482 -     * the PTE in the PT-holding page. We need the machine frame number
  72.483 -     * for this.
  72.484 -     */
  72.485 -    if ( shadow_mode_log_dirty(d) )
  72.486 -        mark_dirty(d, va_to_l1mfn(ed, va));
  72.487 -
  72.488 - out:
  72.489 -    shadow_unlock(d);
  72.490 -    check_pagetable(ed, "post-va"); /* debug */
  72.491 -
  72.492 -    return rc;
  72.493 -}
  72.494 -
  72.495  int update_grant_va_mapping(unsigned long va,
  72.496                              l1_pgentry_t _nl1e, 
  72.497                              struct domain *d,
  72.498 @@ -2116,11 +2076,17 @@ int update_grant_va_mapping(unsigned lon
  72.499      
  72.500      cleanup_writable_pagetable(d);
  72.501  
  72.502 +    // This is actually overkill - we don't need to sync the L1 itself,
  72.503 +    // just everything involved in getting to this L1 (i.e. we need
  72.504 +    // linear_pg_table[l1_linear_offset(va)] to be in sync)...
  72.505 +    //
  72.506 +    __shadow_sync_va(ed, va);
  72.507 +
  72.508      pl1e = &linear_pg_table[l1_linear_offset(va)];
  72.509  
  72.510      if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) )
  72.511          rc = -EINVAL;
  72.512 -    else
  72.513 +    else if ( !shadow_mode_refcounts(d) )
  72.514      {
  72.515          if ( update_l1e(pl1e, ol1e, _nl1e) )
  72.516          {
  72.517 @@ -2133,9 +2099,14 @@ int update_grant_va_mapping(unsigned lon
  72.518          else
  72.519              rc = -EINVAL;
  72.520      }
  72.521 +    else
  72.522 +    {
  72.523 +        printk("grant tables and shadow mode currently don't work together\n");
  72.524 +        BUG();
  72.525 +    }
  72.526  
  72.527      if ( unlikely(shadow_mode_enabled(d)) )
  72.528 -        update_shadow_va_mapping(va, _nl1e, ed, d);
  72.529 +        shadow_do_update_va_mapping(va, _nl1e, ed);
  72.530  
  72.531      return rc;
  72.532  }
  72.533 @@ -2161,6 +2132,13 @@ int do_update_va_mapping(unsigned long v
  72.534      cleanup_writable_pagetable(d);
  72.535  
  72.536      if ( unlikely(shadow_mode_enabled(d)) )
  72.537 +        check_pagetable(ed, "pre-va"); /* debug */
  72.538 +
  72.539 +    if ( unlikely(!mod_l1_entry(&linear_pg_table[l1_linear_offset(va)],
  72.540 +                                val)) )
  72.541 +        rc = -EINVAL;
  72.542 +
  72.543 +    if ( likely(rc == 0) && unlikely(shadow_mode_enabled(d)) )
  72.544      {
  72.545          if ( unlikely(percpu_info[cpu].foreign &&
  72.546                        (shadow_mode_translate(d) ||
  72.547 @@ -2173,11 +2151,10 @@ int do_update_va_mapping(unsigned long v
  72.548              domain_crash();
  72.549          }
  72.550      
  72.551 -        rc = update_shadow_va_mapping(va, val, ed, d);
  72.552 +        rc = shadow_do_update_va_mapping(va, val, ed);
  72.553 +
  72.554 +        check_pagetable(ed, "post-va"); /* debug */
  72.555      }
  72.556 -    else if ( unlikely(!mod_l1_entry(&linear_pg_table[l1_linear_offset(va)],
  72.557 -                                     val)) )
  72.558 -        rc = -EINVAL;
  72.559  
  72.560      switch ( flags & UVMF_FLUSHTYPE_MASK )
  72.561      {
  72.562 @@ -2468,14 +2445,68 @@ int ptwr_debug = 0x0;
  72.563  #define PTWR_PRINTK(_f, _a...) ((void)0)
  72.564  #endif
  72.565  
  72.566 +/* Re-validate a given p.t. page, given its prior snapshot */
  72.567 +int revalidate_l1(struct domain *d, l1_pgentry_t *l1page, l1_pgentry_t *snapshot)
  72.568 +{
  72.569 +    l1_pgentry_t ol1e, nl1e;
  72.570 +    int modified = 0, i;
  72.571 +
  72.572 +#if 0
  72.573 +    if ( d->id )
  72.574 +        printk("%s: l1page mfn=%lx snapshot mfn=%lx\n", __func__,
  72.575 +               l1e_get_pfn(linear_pg_table[l1_linear_offset((unsigned long)l1page)]),
  72.576 +               l1e_get_pfn(linear_pg_table[l1_linear_offset((unsigned long)snapshot)]));
  72.577 +#endif
  72.578 +
  72.579 +    for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
  72.580 +    {
  72.581 +        ol1e = snapshot[i];
  72.582 +        nl1e = l1page[i];
  72.583 +
  72.584 +        if ( likely(l1e_get_value(ol1e) == l1e_get_value(nl1e)) )
  72.585 +            continue;
  72.586 +
  72.587 +        /* Update number of entries modified. */
  72.588 +        modified++;
  72.589 +
  72.590 +        /*
  72.591 +         * Fast path for PTEs that have merely been write-protected
  72.592 +         * (e.g., during a Unix fork()). A strict reduction in privilege.
  72.593 +         */
  72.594 +        if ( likely(l1e_get_value(ol1e) == (l1e_get_value(nl1e)|_PAGE_RW)) )
  72.595 +        {
  72.596 +            if ( likely(l1e_get_flags(nl1e) & _PAGE_PRESENT) )
  72.597 +                put_page_type(&frame_table[l1e_get_pfn(nl1e)]);
  72.598 +            continue;
  72.599 +        }
  72.600 +
  72.601 +        if ( unlikely(!get_page_from_l1e(nl1e, d)) )
  72.602 +        {
  72.603 +            MEM_LOG("ptwr: Could not re-validate l1 page\n");
  72.604 +            /*
  72.605 +             * Make the remaining p.t's consistent before crashing, so the
  72.606 +             * reference counts are correct.
  72.607 +             */
  72.608 +            memcpy(&l1page[i], &snapshot[i],
  72.609 +                   (L1_PAGETABLE_ENTRIES - i) * sizeof(l1_pgentry_t));
  72.610 +            domain_crash();
  72.611 +            break;
  72.612 +        }
  72.613 +        
  72.614 +        put_page_from_l1e(ol1e, d);
  72.615 +    }
  72.616 +
  72.617 +    return modified;
  72.618 +}
  72.619 +
  72.620 +
  72.621  /* Flush the given writable p.t. page and write-protect it again. */
  72.622  void ptwr_flush(struct domain *d, const int which)
  72.623  {
  72.624      unsigned long  pte, *ptep, l1va;
  72.625 -    l1_pgentry_t  *pl1e, ol1e, nl1e;
  72.626 +    l1_pgentry_t  *pl1e;
  72.627      l2_pgentry_t  *pl2e;
  72.628 -    int            i;
  72.629 -    unsigned int   modified = 0;
  72.630 +    unsigned int   modified;
  72.631  
  72.632      ASSERT(!shadow_mode_enabled(d));
  72.633  
  72.634 @@ -2524,45 +2555,8 @@ void ptwr_flush(struct domain *d, const 
  72.635       */
  72.636  
  72.637      pl1e = d->arch.ptwr[which].pl1e;
  72.638 -    for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
  72.639 -    {
  72.640 -        ol1e = d->arch.ptwr[which].page[i];
  72.641 -        nl1e = pl1e[i];
  72.642 -
  72.643 -        if ( likely(l1e_get_value(ol1e) == l1e_get_value(nl1e)) )
  72.644 -            continue;
  72.645 -
  72.646 -        /* Update number of entries modified. */
  72.647 -        modified++;
  72.648 -
  72.649 -        /*
  72.650 -         * Fast path for PTEs that have merely been write-protected
  72.651 -         * (e.g., during a Unix fork()). A strict reduction in privilege.
  72.652 -         */
  72.653 -        if ( likely(l1e_get_value(ol1e) == (l1e_get_value(nl1e)|_PAGE_RW)) )
  72.654 -        {
  72.655 -            if ( likely(l1e_get_flags(nl1e) & _PAGE_PRESENT) )
  72.656 -                put_page_type(&frame_table[l1e_get_pfn(nl1e)]);
  72.657 -            continue;
  72.658 -        }
  72.659 -
  72.660 -        if ( unlikely(!get_page_from_l1e(nl1e, d)) )
  72.661 -        {
  72.662 -            MEM_LOG("ptwr: Could not re-validate l1 page\n");
  72.663 -            /*
  72.664 -             * Make the remaining p.t's consistent before crashing, so the
  72.665 -             * reference counts are correct.
  72.666 -             */
  72.667 -            memcpy(&pl1e[i], &d->arch.ptwr[which].page[i],
  72.668 -                   (L1_PAGETABLE_ENTRIES - i) * sizeof(l1_pgentry_t));
  72.669 -            domain_crash();
  72.670 -            break;
  72.671 -        }
  72.672 -        
  72.673 -        put_page_from_l1e(ol1e, d);
  72.674 -    }
  72.675 +    modified = revalidate_l1(d, pl1e, d->arch.ptwr[which].page);
  72.676      unmap_domain_mem(pl1e);
  72.677 -    
  72.678      perfc_incr_histo(wpt_updates, modified, PT_UPDATES);
  72.679      d->arch.ptwr[which].prev_nr_updates  = modified;
  72.680  
  72.681 @@ -2776,7 +2770,7 @@ int ptwr_do_page_fault(struct domain *d,
  72.682       * If this is a multi-processor guest then ensure that the page is hooked
  72.683       * into at most one L2 table, which must be the one running on this VCPU.
  72.684       */
  72.685 -    if ( (d->exec_domain[0]->ed_next_list != NULL) &&
  72.686 +    if ( (d->exec_domain[0]->next_in_list != NULL) &&
  72.687           ((page->u.inuse.type_info & PGT_count_mask) != 
  72.688            (!!(page->u.inuse.type_info & PGT_pinned) +
  72.689             (which == PTWR_PT_ACTIVE))) )
  72.690 @@ -2945,13 +2939,13 @@ void ptwr_destroy(struct domain *d)
  72.691           * Also, a domain mustn't have PGC_allocated pages when it is dying.
  72.692           */
  72.693          ASSERT(e->tot_pages <= e->max_pages);
  72.694 -        if ( unlikely(test_bit(DF_DYING, &e->d_flags)) ||
  72.695 +        if ( unlikely(test_bit(DF_DYING, &e->flags)) ||
  72.696               unlikely(e->tot_pages == e->max_pages) ||
  72.697               unlikely(!gnttab_prepare_for_transfer(e, d, gntref)) )
  72.698          {
  72.699              MEM_LOG("Transferee has no reservation headroom (%d,%d), or "
  72.700                      "provided a bad grant ref, or is dying (%p).\n",
  72.701 -                    e->tot_pages, e->max_pages, e->d_flags);
  72.702 +                    e->tot_pages, e->max_pages, e->flags);
  72.703              spin_unlock(&e->page_alloc_lock);
  72.704              put_domain(e);
  72.705              okay = 0;
    73.1 --- a/xen/arch/x86/mpparse.c	Fri May 06 17:04:27 2005 +0000
    73.2 +++ b/xen/arch/x86/mpparse.c	Wed May 11 13:43:50 2005 +0000
    73.3 @@ -1,5 +1,5 @@
    73.4  /*
    73.5 - *	Intel Multiprocessor Specificiation 1.1 and 1.4
    73.6 + *	Intel Multiprocessor Specification 1.1 and 1.4
    73.7   *	compliant MP-table parsing routines.
    73.8   *
    73.9   *	(c) 1995 Alan Cox, Building #3 <alan@redhat.com>
   73.10 @@ -14,44 +14,48 @@
   73.11   */
   73.12  
   73.13  #include <xen/config.h>
   73.14 +#include <xen/types.h>
   73.15 +#include <xen/irq.h>
   73.16  #include <xen/init.h>
   73.17 -#include <xen/lib.h>
   73.18 -#include <xen/kernel.h>
   73.19 -#include <xen/irq.h>
   73.20 -#include <xen/smp.h>
   73.21 -#include <xen/mm.h>
   73.22  #include <xen/acpi.h>
   73.23 +#include <xen/delay.h>
   73.24 +#include <xen/sched.h>
   73.25 +
   73.26 +#include <asm/mc146818rtc.h>
   73.27 +#include <asm/bitops.h>
   73.28 +#include <asm/smp.h>
   73.29  #include <asm/acpi.h>
   73.30 -#include <asm/io.h>
   73.31 -#include <asm/apic.h>
   73.32 +#include <asm/mtrr.h>
   73.33  #include <asm/mpspec.h>
   73.34 -#include <asm/flushtlb.h>
   73.35 -#include <asm/smpboot.h>
   73.36 +#include <asm/io_apic.h>
   73.37  
   73.38 -int numnodes = 1; /* XXX Xen */
   73.39 +#include <mach_apic.h>
   73.40 +#include <mach_mpparse.h>
   73.41 +#include <bios_ebda.h>
   73.42 +
   73.43 +#define es7000_plat 0 /* XXX XEN */
   73.44  
   73.45  /* Have we found an MP table */
   73.46  int smp_found_config;
   73.47 +unsigned int __initdata maxcpus = NR_CPUS;
   73.48  
   73.49  /*
   73.50   * Various Linux-internal data structures created from the
   73.51   * MP-table.
   73.52   */
   73.53  int apic_version [MAX_APICS];
   73.54 +int mp_bus_id_to_type [MAX_MP_BUSSES];
   73.55 +int mp_bus_id_to_node [MAX_MP_BUSSES];
   73.56 +int mp_bus_id_to_local [MAX_MP_BUSSES];
   73.57  int quad_local_to_mp_bus_id [NR_CPUS/4][4];
   73.58 +int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
   73.59  int mp_current_pci_id;
   73.60 -int *mp_bus_id_to_type;
   73.61 -int *mp_bus_id_to_node;
   73.62 -int *mp_bus_id_to_local;
   73.63 -int *mp_bus_id_to_pci_bus;
   73.64 -int max_mp_busses;
   73.65 -int max_irq_sources;
   73.66  
   73.67  /* I/O APIC entries */
   73.68  struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
   73.69  
   73.70  /* # of MP IRQ source entries */
   73.71 -struct mpc_config_intsrc *mp_irqs;
   73.72 +struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
   73.73  
   73.74  /* MP IRQ source entries */
   73.75  int mp_irq_entries;
   73.76 @@ -65,24 +69,18 @@ unsigned long mp_lapic_addr;
   73.77  unsigned int boot_cpu_physical_apicid = -1U;
   73.78  unsigned int boot_cpu_logical_apicid = -1U;
   73.79  /* Internal processor count */
   73.80 -static unsigned int num_processors;
   73.81 +static unsigned int __initdata num_processors;
   73.82  
   73.83  /* Bitmask of physically existing CPUs */
   73.84 -unsigned long phys_cpu_present_map;
   73.85 -unsigned long logical_cpu_present_map;
   73.86 +physid_mask_t phys_cpu_present_map;
   73.87  
   73.88 -#ifdef CONFIG_X86_CLUSTERED_APIC
   73.89 -unsigned char esr_disable = 0;
   73.90 -unsigned char clustered_apic_mode = CLUSTERED_APIC_NONE;
   73.91 -unsigned int apic_broadcast_id = APIC_BROADCAST_ID_APIC;
   73.92 -#endif
   73.93 -unsigned char raw_phys_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
   73.94 +u8 bios_cpu_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
   73.95  
   73.96  /*
   73.97   * Intel MP BIOS table parsing routines:
   73.98   */
   73.99  
  73.100 -#ifndef CONFIG_X86_VISWS_APIC
  73.101 +
  73.102  /*
  73.103   * Checksum an MP configuration block.
  73.104   */
  73.105 @@ -98,48 +96,6 @@ static int __init mpf_checksum(unsigned 
  73.106  }
  73.107  
  73.108  /*
  73.109 - * Processor encoding in an MP configuration block
  73.110 - */
  73.111 -
  73.112 -static char __init *mpc_family(int family,int model)
  73.113 -{
  73.114 -	static char n[32];
  73.115 -	static char *model_defs[]=
  73.116 -	{
  73.117 -		"80486DX","80486DX",
  73.118 -		"80486SX","80486DX/2 or 80487",
  73.119 -		"80486SL","80486SX/2",
  73.120 -		"Unknown","80486DX/2-WB",
  73.121 -		"80486DX/4","80486DX/4-WB"
  73.122 -	};
  73.123 -
  73.124 -	switch (family) {
  73.125 -		case 0x04:
  73.126 -			if (model < 10)
  73.127 -				return model_defs[model];
  73.128 -			break;
  73.129 -
  73.130 -		case 0x05:
  73.131 -			return("Pentium(tm)");
  73.132 -
  73.133 -		case 0x06:
  73.134 -			return("Pentium(tm) Pro");
  73.135 -
  73.136 -		case 0x0F:
  73.137 -			if (model == 0x00)
  73.138 -				return("Pentium 4(tm)");
  73.139 -			if (model == 0x01)
  73.140 -				return("Pentium 4(tm)");
  73.141 -			if (model == 0x02)
  73.142 -				return("Pentium 4(tm) XEON(tm)");
  73.143 -			if (model == 0x0F)
  73.144 -				return("Special controller");
  73.145 -	}
  73.146 -	sprintf(n,"Unknown CPU [%d:%d]",family, model);
  73.147 -	return n;
  73.148 -}
  73.149 -
  73.150 -/* 
  73.151   * Have to match translation table entries to main table entries by counter
  73.152   * hence the mpc_record variable .... can't see a less disgusting way of
  73.153   * doing this ....
  73.154 @@ -148,30 +104,30 @@ static char __init *mpc_family(int famil
  73.155  static int mpc_record; 
  73.156  static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] __initdata;
  73.157  
  73.158 +#ifdef CONFIG_X86_NUMAQ
  73.159 +static int MP_valid_apicid(int apicid, int version)
  73.160 +{
  73.161 +	return hweight_long(apicid & 0xf) == 1 && (apicid >> 4) != 0xf;
  73.162 +}
  73.163 +#else
  73.164 +static int MP_valid_apicid(int apicid, int version)
  73.165 +{
  73.166 +	if (version >= 0x14)
  73.167 +		return apicid < 0xff;
  73.168 +	else
  73.169 +		return apicid < 0xf;
  73.170 +}
  73.171 +#endif
  73.172 +
  73.173  void __init MP_processor_info (struct mpc_config_processor *m)
  73.174  {
  73.175 - 	int ver, quad, logical_apicid;
  73.176 + 	int ver, apicid;
  73.177 +	physid_mask_t tmp;
  73.178   	
  73.179  	if (!(m->mpc_cpuflag & CPU_ENABLED))
  73.180  		return;
  73.181  
  73.182 -	logical_apicid = m->mpc_apicid;
  73.183 -	if (clustered_apic_mode == CLUSTERED_APIC_NUMAQ) {
  73.184 -		quad = translation_table[mpc_record]->trans_quad;
  73.185 -		logical_apicid = (quad << 4) + 
  73.186 -			(m->mpc_apicid ? m->mpc_apicid << 1 : 1);
  73.187 -		printk("Processor #%d %s APIC version %d (quad %d, apic %d)\n",
  73.188 -			m->mpc_apicid,
  73.189 -			mpc_family((m->mpc_cpufeature & CPU_FAMILY_MASK)>>8 ,
  73.190 -				   (m->mpc_cpufeature & CPU_MODEL_MASK)>>4),
  73.191 -			m->mpc_apicver, quad, logical_apicid);
  73.192 -	} else {
  73.193 -		printk("Processor #%d %s APIC version %d\n",
  73.194 -			m->mpc_apicid,
  73.195 -			mpc_family((m->mpc_cpufeature & CPU_FAMILY_MASK)>>8 ,
  73.196 -				   (m->mpc_cpufeature & CPU_MODEL_MASK)>>4),
  73.197 -			m->mpc_apicver);
  73.198 -	}
  73.199 +	apicid = mpc_apic_id(m, translation_table[mpc_record]);
  73.200  
  73.201  	if (m->mpc_featureflag&(1<<0))
  73.202  		Dprintk("    Floating point unit present.\n");
  73.203 @@ -224,68 +180,68 @@ void __init MP_processor_info (struct mp
  73.204  	if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
  73.205  		Dprintk("    Bootup CPU\n");
  73.206  		boot_cpu_physical_apicid = m->mpc_apicid;
  73.207 -		boot_cpu_logical_apicid = logical_apicid;
  73.208 +		boot_cpu_logical_apicid = apicid;
  73.209  	}
  73.210  
  73.211 -	if (num_processors >= NR_CPUS){
  73.212 -		printk(KERN_WARNING "NR_CPUS limit of %i reached. Cannot "
  73.213 -			"boot CPU(apicid 0x%x).\n", NR_CPUS, m->mpc_apicid);
  73.214 +	if (num_processors >= NR_CPUS) {
  73.215 +		printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
  73.216 +			"  Processor ignored.\n", NR_CPUS); 
  73.217 +		return;
  73.218 +	}
  73.219 +
  73.220 +	if (num_processors >= maxcpus) {
  73.221 +		printk(KERN_WARNING "WARNING: maxcpus limit of %i reached."
  73.222 +			" Processor ignored.\n", maxcpus); 
  73.223  		return;
  73.224  	}
  73.225  	num_processors++;
  73.226 +	ver = m->mpc_apicver;
  73.227  
  73.228 -	if (m->mpc_apicid > MAX_APICS) {
  73.229 -		printk("Processor #%d INVALID. (Max ID: %d).\n",
  73.230 +	if (!MP_valid_apicid(apicid, ver)) {
  73.231 +		printk(KERN_WARNING "Processor #%d INVALID. (Max ID: %d).\n",
  73.232  			m->mpc_apicid, MAX_APICS);
  73.233  		--num_processors;
  73.234  		return;
  73.235  	}
  73.236 -	ver = m->mpc_apicver;
  73.237  
  73.238 -	logical_cpu_present_map |= 1 << (num_processors-1);
  73.239 - 	phys_cpu_present_map |= apicid_to_phys_cpu_present(m->mpc_apicid);
  73.240 - 
  73.241 +	tmp = apicid_to_cpu_present(apicid);
  73.242 +	physids_or(phys_cpu_present_map, phys_cpu_present_map, tmp);
  73.243 +	
  73.244  	/*
  73.245  	 * Validate version
  73.246  	 */
  73.247  	if (ver == 0x0) {
  73.248 -		printk("BIOS bug, APIC version is 0 for CPU#%d! fixing up to 0x10. (tell your hw vendor)\n", m->mpc_apicid);
  73.249 +		printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! fixing up to 0x10. (tell your hw vendor)\n", m->mpc_apicid);
  73.250  		ver = 0x10;
  73.251  	}
  73.252  	apic_version[m->mpc_apicid] = ver;
  73.253 -	raw_phys_apicid[num_processors - 1] = m->mpc_apicid;
  73.254 +	bios_cpu_apicid[num_processors - 1] = m->mpc_apicid;
  73.255  }
  73.256  
  73.257  static void __init MP_bus_info (struct mpc_config_bus *m)
  73.258  {
  73.259  	char str[7];
  73.260 -	int quad;
  73.261  
  73.262  	memcpy(str, m->mpc_bustype, 6);
  73.263  	str[6] = 0;
  73.264 -	
  73.265 -	if (clustered_apic_mode == CLUSTERED_APIC_NUMAQ) {
  73.266 -		quad = translation_table[mpc_record]->trans_quad;
  73.267 -		mp_bus_id_to_node[m->mpc_busid] = quad;
  73.268 -		mp_bus_id_to_local[m->mpc_busid] = translation_table[mpc_record]->trans_local;
  73.269 -		quad_local_to_mp_bus_id[quad][translation_table[mpc_record]->trans_local] = m->mpc_busid;
  73.270 -		printk("Bus #%d is %s (node %d)\n", m->mpc_busid, str, quad);
  73.271 -	} else {
  73.272 -		Dprintk("Bus #%d is %s\n", m->mpc_busid, str);
  73.273 -	}
  73.274 +
  73.275 +	mpc_oem_bus_info(m, str, translation_table[mpc_record]);
  73.276  
  73.277  	if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) {
  73.278  		mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
  73.279  	} else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA)-1) == 0) {
  73.280  		mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA;
  73.281  	} else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI)-1) == 0) {
  73.282 +		mpc_oem_pci_bus(m, translation_table[mpc_record]);
  73.283  		mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
  73.284  		mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id;
  73.285  		mp_current_pci_id++;
  73.286  	} else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA)-1) == 0) {
  73.287  		mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA;
  73.288 +	} else if (strncmp(str, BUSTYPE_NEC98, sizeof(BUSTYPE_NEC98)-1) == 0) {
  73.289 +		mp_bus_id_to_type[m->mpc_busid] = MP_BUS_NEC98;
  73.290  	} else {
  73.291 -		printk("Unknown bustype %s - ignoring\n", str);
  73.292 +		printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str);
  73.293  	}
  73.294  }
  73.295  
  73.296 @@ -294,10 +250,10 @@ static void __init MP_ioapic_info (struc
  73.297  	if (!(m->mpc_flags & MPC_APIC_USABLE))
  73.298  		return;
  73.299  
  73.300 -	printk("I/O APIC #%d Version %d at 0x%X.\n",
  73.301 +	printk(KERN_INFO "I/O APIC #%d Version %d at 0x%X.\n",
  73.302  		m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr);
  73.303  	if (nr_ioapics >= MAX_IO_APICS) {
  73.304 -		printk("Max # of I/O APICs (%d) exceeded (found %d).\n",
  73.305 +		printk(KERN_CRIT "Max # of I/O APICs (%d) exceeded (found %d).\n",
  73.306  			MAX_IO_APICS, nr_ioapics);
  73.307  		panic("Recompile kernel with bigger MAX_IO_APICS!.\n");
  73.308  	}
  73.309 @@ -318,7 +274,7 @@ static void __init MP_intsrc_info (struc
  73.310  			m->mpc_irqtype, m->mpc_irqflag & 3,
  73.311  			(m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
  73.312  			m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
  73.313 -	if (++mp_irq_entries == max_irq_sources)
  73.314 +	if (++mp_irq_entries == MAX_IRQ_SOURCES)
  73.315  		panic("Max # of irq sources exceeded!!\n");
  73.316  }
  73.317  
  73.318 @@ -344,16 +300,17 @@ static void __init MP_lintsrc_info (stru
  73.319  			BUG();
  73.320  }
  73.321  
  73.322 +#ifdef CONFIG_X86_NUMAQ
  73.323  static void __init MP_translation_info (struct mpc_config_translation *m)
  73.324  {
  73.325 -	printk("Translation: record %d, type %d, quad %d, global %d, local %d\n", mpc_record, m->trans_type, m->trans_quad, m->trans_global, m->trans_local);
  73.326 +	printk(KERN_INFO "Translation: record %d, type %d, quad %d, global %d, local %d\n", mpc_record, m->trans_type, m->trans_quad, m->trans_global, m->trans_local);
  73.327  
  73.328  	if (mpc_record >= MAX_MPC_ENTRY) 
  73.329 -		printk("MAX_MPC_ENTRY exceeded!\n");
  73.330 +		printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n");
  73.331  	else
  73.332  		translation_table[mpc_record] = m; /* stash this for later */
  73.333 -	if (m->trans_quad+1 > numnodes)
  73.334 -		numnodes = m->trans_quad+1;
  73.335 +	if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad))
  73.336 +		node_set_online(m->trans_quad);
  73.337  }
  73.338  
  73.339  /*
  73.340 @@ -366,10 +323,11 @@ static void __init smp_read_mpc_oem(stru
  73.341  	int count = sizeof (*oemtable); /* the header size */
  73.342  	unsigned char *oemptr = ((unsigned char *)oemtable)+count;
  73.343  	
  73.344 -	printk("Found an OEM MPC table at %8p - parsing it ... \n", oemtable);
  73.345 +	mpc_record = 0;
  73.346 +	printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n", oemtable);
  73.347  	if (memcmp(oemtable->oem_signature,MPC_OEM_SIGNATURE,4))
  73.348  	{
  73.349 -		printk("SMP mpc oemtable: bad signature [%c%c%c%c]!\n",
  73.350 +		printk(KERN_WARNING "SMP mpc oemtable: bad signature [%c%c%c%c]!\n",
  73.351  			oemtable->oem_signature[0],
  73.352  			oemtable->oem_signature[1],
  73.353  			oemtable->oem_signature[2],
  73.354 @@ -378,7 +336,7 @@ static void __init smp_read_mpc_oem(stru
  73.355  	}
  73.356  	if (mpf_checksum((unsigned char *)oemtable,oemtable->oem_length))
  73.357  	{
  73.358 -		printk("SMP oem mptable: checksum error!\n");
  73.359 +		printk(KERN_WARNING "SMP oem mptable: checksum error!\n");
  73.360  		return;
  73.361  	}
  73.362  	while (count < oemtable->oem_length) {
  73.363 @@ -395,36 +353,42 @@ static void __init smp_read_mpc_oem(stru
  73.364  			}
  73.365  			default:
  73.366  			{
  73.367 -				printk("Unrecognised OEM table entry type! - %d\n", (int) *oemptr);
  73.368 +				printk(KERN_WARNING "Unrecognised OEM table entry type! - %d\n", (int) *oemptr);
  73.369  				return;
  73.370  			}
  73.371  		}
  73.372         }
  73.373  }
  73.374  
  73.375 +static inline void mps_oem_check(struct mp_config_table *mpc, char *oem,
  73.376 +		char *productid)
  73.377 +{
  73.378 +	if (strncmp(oem, "IBM NUMA", 8))
  73.379 +		printk("Warning!  May not be a NUMA-Q system!\n");
  73.380 +	if (mpc->mpc_oemptr)
  73.381 +		smp_read_mpc_oem((struct mp_config_oemtable *) mpc->mpc_oemptr,
  73.382 +				mpc->mpc_oemsize);
  73.383 +}
  73.384 +#endif	/* CONFIG_X86_NUMAQ */
  73.385 +
  73.386  /*
  73.387   * Read/parse the MPC
  73.388   */
  73.389  
  73.390  static int __init smp_read_mpc(struct mp_config_table *mpc)
  73.391  {
  73.392 -	char oem[16], prod[14];
  73.393 +	char str[16];
  73.394 +	char oem[10];
  73.395  	int count=sizeof(*mpc);
  73.396  	unsigned char *mpt=((unsigned char *)mpc)+count;
  73.397 -	int num_bus = 0;
  73.398 -	int num_irq = 0;
  73.399 -	unsigned char *bus_data;
  73.400  
  73.401  	if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) {
  73.402 -		panic("SMP mptable: bad signature [%c%c%c%c]!\n",
  73.403 -			mpc->mpc_signature[0],
  73.404 -			mpc->mpc_signature[1],
  73.405 -			mpc->mpc_signature[2],
  73.406 -			mpc->mpc_signature[3]);
  73.407 +		printk(KERN_ERR "SMP mptable: bad signature [0x%x]!\n",
  73.408 +			*(u32 *)mpc->mpc_signature);
  73.409  		return 0;
  73.410  	}
  73.411  	if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) {
  73.412 -		panic("SMP mptable: checksum error!\n");
  73.413 +		printk(KERN_ERR "SMP mptable: checksum error!\n");
  73.414  		return 0;
  73.415  	}
  73.416  	if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) {
  73.417 @@ -438,14 +402,14 @@ static int __init smp_read_mpc(struct mp
  73.418  	}
  73.419  	memcpy(oem,mpc->mpc_oem,8);
  73.420  	oem[8]=0;
  73.421 -	printk("OEM ID: %s ",oem);
  73.422 +	printk(KERN_INFO "OEM ID: %s ",oem);
  73.423  
  73.424 -	memcpy(prod,mpc->mpc_productid,12);
  73.425 -	prod[12]=0;
  73.426 -	printk("Product ID: %s ",prod);
  73.427 +	memcpy(str,mpc->mpc_productid,12);
  73.428 +	str[12]=0;
  73.429 +	printk("Product ID: %s ",str);
  73.430  
  73.431 -	detect_clustered_apic(oem, prod);
  73.432 -	
  73.433 +	mps_oem_check(mpc, oem, str);
  73.434 +
  73.435  	printk("APIC at: 0x%X\n",mpc->mpc_lapic);
  73.436  
  73.437  	/* 
  73.438 @@ -455,77 +419,10 @@ static int __init smp_read_mpc(struct mp
  73.439  	if (!acpi_lapic)
  73.440  		mp_lapic_addr = mpc->mpc_lapic;
  73.441  
  73.442 -	if ((clustered_apic_mode == CLUSTERED_APIC_NUMAQ) && mpc->mpc_oemptr) {
  73.443 -		/* We need to process the oem mpc tables to tell us which quad things are in ... */
  73.444 -		mpc_record = 0;
  73.445 -		smp_read_mpc_oem((struct mp_config_oemtable *)(unsigned long)mpc->mpc_oemptr, mpc->mpc_oemsize);
  73.446 -		mpc_record = 0;
  73.447 -	}
  73.448 -
  73.449 -	/* Pre-scan to determine the number of bus and 
  73.450 -	 * interrupts records we have
  73.451 -	 */
  73.452 -	while (count < mpc->mpc_length) {
  73.453 -		switch (*mpt) {
  73.454 -			case MP_PROCESSOR:
  73.455 -				mpt += sizeof(struct mpc_config_processor);
  73.456 -				count += sizeof(struct mpc_config_processor);
  73.457 -				break;
  73.458 -			case MP_BUS:
  73.459 -				++num_bus;
  73.460 -				mpt += sizeof(struct mpc_config_bus);
  73.461 -				count += sizeof(struct mpc_config_bus);
  73.462 -				break;
  73.463 -			case MP_INTSRC:
  73.464 -				++num_irq;
  73.465 -				mpt += sizeof(struct mpc_config_intsrc);
  73.466 -				count += sizeof(struct mpc_config_intsrc);
  73.467 -				break;
  73.468 -			case MP_IOAPIC:
  73.469 -				mpt += sizeof(struct mpc_config_ioapic);
  73.470 -				count += sizeof(struct mpc_config_ioapic);
  73.471 -				break;
  73.472 -			case MP_LINTSRC:
  73.473 -				mpt += sizeof(struct mpc_config_lintsrc);
  73.474 -				count += sizeof(struct mpc_config_lintsrc);
  73.475 -				break;
  73.476 -			default:
  73.477 -				count = mpc->mpc_length;
  73.478 -				break;
  73.479 -		}
  73.480 -	}
  73.481 -	/* 
  73.482 -	 * Paranoia: Allocate one extra of both the number of busses and number
  73.483 -	 * of irqs, and make sure that we have at least 4 interrupts per PCI
  73.484 -	 * slot.  But some machines do not report very many busses, so we need
  73.485 -	 * to fall back on the older defaults.
  73.486 -	 */
  73.487 -	++num_bus;
  73.488 -	max_mp_busses = max(num_bus, MAX_MP_BUSSES);
  73.489 -	if (num_irq < (4 * max_mp_busses))
  73.490 -		num_irq = 4 * num_bus;	/* 4 intr/PCI slot */
  73.491 -	++num_irq;
  73.492 -	max_irq_sources = max(num_irq, MAX_IRQ_SOURCES);
  73.493 -	
  73.494 -	count = (max_mp_busses * sizeof(int)) * 4;
  73.495 -	count += (max_irq_sources * sizeof(struct mpc_config_intsrc));
  73.496 -	bus_data = (void *)alloc_xenheap_pages(get_order(count));
  73.497 -	if (!bus_data) {
  73.498 -		printk(KERN_ERR "SMP mptable: out of memory!\n");
  73.499 -		return 0;
  73.500 -	}
  73.501 -	mp_bus_id_to_type = (int *)&bus_data[0];
  73.502 -	mp_bus_id_to_node = (int *)&bus_data[(max_mp_busses * sizeof(int))];
  73.503 -	mp_bus_id_to_local = (int *)&bus_data[(max_mp_busses * sizeof(int)) * 2];
  73.504 -	mp_bus_id_to_pci_bus = (int *)&bus_data[(max_mp_busses * sizeof(int)) * 3];
  73.505 -	mp_irqs = (struct mpc_config_intsrc *)&bus_data[(max_mp_busses * sizeof(int)) * 4];
  73.506 -	memset(mp_bus_id_to_pci_bus, -1, max_mp_busses * sizeof(int));
  73.507 -
  73.508  	/*
  73.509  	 *	Now process the configuration blocks.
  73.510  	 */
  73.511 -	count = sizeof(*mpc);
  73.512 -	mpt = ((unsigned char *)mpc)+count;
  73.513 +	mpc_record = 0;
  73.514  	while (count < mpc->mpc_length) {
  73.515  		switch(*mpt) {
  73.516  			case MP_PROCESSOR:
  73.517 @@ -584,21 +481,7 @@ static int __init smp_read_mpc(struct mp
  73.518  		}
  73.519  		++mpc_record;
  73.520  	}
  73.521 -
  73.522 -	if (clustered_apic_mode){
  73.523 -		phys_cpu_present_map = logical_cpu_present_map;
  73.524 -	}
  73.525 -
  73.526 -
  73.527 -	printk("Enabling APIC mode: ");
  73.528 -	if(clustered_apic_mode == CLUSTERED_APIC_NUMAQ)
  73.529 -		printk("Clustered Logical.	");
  73.530 -	else if(clustered_apic_mode == CLUSTERED_APIC_XAPIC)
  73.531 -		printk("Physical.	");
  73.532 -	else
  73.533 -		printk("Flat.	");
  73.534 -	printk("Using %d I/O APICs\n",nr_ioapics);
  73.535 -
  73.536 +	clustered_apic_check();
  73.537  	if (!num_processors)
  73.538  		printk(KERN_ERR "SMP mptable: no processors registered!\n");
  73.539  	return num_processors;
  73.540 @@ -634,12 +517,12 @@ static void __init construct_default_ioi
  73.541  	 *  If it does, we assume it's valid.
  73.542  	 */
  73.543  	if (mpc_default_type == 5) {
  73.544 -		printk("ISA/PCI bus type with no IRQ information... falling back to ELCR\n");
  73.545 +		printk(KERN_INFO "ISA/PCI bus type with no IRQ information... falling back to ELCR\n");
  73.546  
  73.547  		if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) || ELCR_trigger(13))
  73.548 -			printk("ELCR contains invalid data... not using ELCR\n");
  73.549 +			printk(KERN_WARNING "ELCR contains invalid data... not using ELCR\n");
  73.550  		else {
  73.551 -			printk("Using ELCR to identify PCI interrupts\n");
  73.552 +			printk(KERN_INFO "Using ELCR to identify PCI interrupts\n");
  73.553  			ELCR_fallback = 1;
  73.554  		}
  73.555  	}
  73.556 @@ -686,24 +569,6 @@ static inline void __init construct_defa
  73.557  	struct mpc_config_lintsrc lintsrc;
  73.558  	int linttypes[2] = { mp_ExtINT, mp_NMI };
  73.559  	int i;
  73.560 -	struct {
  73.561 -		int mp_bus_id_to_type[MAX_MP_BUSSES];
  73.562 -		int mp_bus_id_to_node[MAX_MP_BUSSES];
  73.563 -		int mp_bus_id_to_local[MAX_MP_BUSSES];
  73.564 -		int mp_bus_id_to_pci_bus[MAX_MP_BUSSES];
  73.565 -		struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
  73.566 -	} *bus_data;
  73.567 -
  73.568 -	bus_data = (void *)alloc_xenheap_pages(get_order(sizeof(*bus_data)));
  73.569 -	if (!bus_data)
  73.570 -		panic("SMP mptable: out of memory!\n");
  73.571 -	mp_bus_id_to_type = bus_data->mp_bus_id_to_type;
  73.572 -	mp_bus_id_to_node = bus_data->mp_bus_id_to_node;
  73.573 -	mp_bus_id_to_local = bus_data->mp_bus_id_to_local;
  73.574 -	mp_bus_id_to_pci_bus = bus_data->mp_bus_id_to_pci_bus;
  73.575 -	mp_irqs = bus_data->mp_irqs;
  73.576 -	for (i = 0; i < MAX_MP_BUSSES; ++i)
  73.577 -		mp_bus_id_to_pci_bus[i] = -1;
  73.578  
  73.579  	/*
  73.580  	 * local APIC has default address
  73.581 @@ -732,7 +597,8 @@ static inline void __init construct_defa
  73.582  	bus.mpc_busid = 0;
  73.583  	switch (mpc_default_type) {
  73.584  		default:
  73.585 -			printk("???\nUnknown standard configuration %d\n",
  73.586 +			printk("???\n");
  73.587 +			printk(KERN_ERR "Unknown standard configuration %d\n",
  73.588  				mpc_default_type);
  73.589  			/* fall through */
  73.590  		case 1:
  73.591 @@ -790,7 +656,7 @@ void __init get_smp_config (void)
  73.592  
  73.593  	/*
  73.594  	 * ACPI may be used to obtain the entire SMP configuration or just to 
  73.595 -	 * enumerate/configure processors (CONFIG_ACPI_HT_ONLY).  Note that 
  73.596 +	 * enumerate/configure processors (CONFIG_ACPI_BOOT).  Note that 
  73.597  	 * ACPI supports both logical (e.g. Hyper-Threading) and physical 
  73.598  	 * processors, where MPS only supports physical.
  73.599  	 */
  73.600 @@ -801,12 +667,12 @@ void __init get_smp_config (void)
  73.601  	else if (acpi_lapic)
  73.602  		printk(KERN_INFO "Using ACPI for processor (LAPIC) configuration information\n");
  73.603  
  73.604 -	printk("Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification);
  73.605 +	printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification);
  73.606  	if (mpf->mpf_feature2 & (1<<7)) {
  73.607 -		printk("    IMCR and PIC compatibility mode.\n");
  73.608 +		printk(KERN_INFO "    IMCR and PIC compatibility mode.\n");
  73.609  		pic_mode = 1;
  73.610  	} else {
  73.611 -		printk("    Virtual Wire compatibility mode.\n");
  73.612 +		printk(KERN_INFO "    Virtual Wire compatibility mode.\n");
  73.613  		pic_mode = 0;
  73.614  	}
  73.615  
  73.616 @@ -815,7 +681,7 @@ void __init get_smp_config (void)
  73.617  	 */
  73.618  	if (mpf->mpf_feature1 != 0) {
  73.619  
  73.620 -		printk("Default MP configuration #%d\n", mpf->mpf_feature1);
  73.621 +		printk(KERN_INFO "Default MP configuration #%d\n", mpf->mpf_feature1);
  73.622  		construct_default_ISA_mptable(mpf->mpf_feature1);
  73.623  
  73.624  	} else if (mpf->mpf_physptr) {
  73.625 @@ -838,7 +704,7 @@ void __init get_smp_config (void)
  73.626  		if (!mp_irq_entries) {
  73.627  			struct mpc_config_bus bus;
  73.628  
  73.629 -			printk("BIOS bug, no explicit IRQ entries, using default mptable. (tell your hw vendor)\n");
  73.630 +			printk(KERN_ERR "BIOS bug, no explicit IRQ entries, using default mptable. (tell your hw vendor)\n");
  73.631  
  73.632  			bus.mpc_type = MP_BUS;
  73.633  			bus.mpc_busid = 0;
  73.634 @@ -851,7 +717,7 @@ void __init get_smp_config (void)
  73.635  	} else
  73.636  		BUG();
  73.637  
  73.638 -	printk("Processors: %d\n", num_processors);
  73.639 +	printk(KERN_INFO "Processors: %d\n", num_processors);
  73.640  	/*
  73.641  	 * Only use the first configuration found.
  73.642  	 */
  73.643 @@ -875,11 +741,27 @@ static int __init smp_scan_config (unsig
  73.644  				|| (mpf->mpf_specification == 4)) ) {
  73.645  
  73.646  			smp_found_config = 1;
  73.647 -			printk("found SMP MP-table at %08lx\n",
  73.648 +			printk(KERN_INFO "found SMP MP-table at %08lx\n",
  73.649  						virt_to_phys(mpf));
  73.650 +#if 0
  73.651  			reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE);
  73.652 -			if (mpf->mpf_physptr)
  73.653 -				reserve_bootmem((unsigned long)mpf->mpf_physptr, PAGE_SIZE);
  73.654 +			if (mpf->mpf_physptr) {
  73.655 +				/*
  73.656 +				 * We cannot access to MPC table to compute
  73.657 +				 * table size yet, as only few megabytes from
  73.658 +				 * the bottom is mapped now.
  73.659 +				 * PC-9800's MPC table places on the very last
  73.660 +				 * of physical memory; so that simply reserving
  73.661 +				 * PAGE_SIZE from mpg->mpf_physptr yields BUG()
  73.662 +				 * in reserve_bootmem.
  73.663 +				 */
  73.664 +				unsigned long size = PAGE_SIZE;
  73.665 +				unsigned long end = max_low_pfn * PAGE_SIZE;
  73.666 +				if (mpf->mpf_physptr + size > end)
  73.667 +					size = end - mpf->mpf_physptr;
  73.668 +				reserve_bootmem(mpf->mpf_physptr, size);
  73.669 +			}
  73.670 +#endif
  73.671  			mpf_found = mpf;
  73.672  			return 1;
  73.673  		}
  73.674 @@ -889,7 +771,7 @@ static int __init smp_scan_config (unsig
  73.675  	return 0;
  73.676  }
  73.677  
  73.678 -void __init find_intel_smp (void)
  73.679 +void __init find_smp_config (void)
  73.680  {
  73.681  	unsigned int address;
  73.682  
  73.683 @@ -913,53 +795,20 @@ void __init find_intel_smp (void)
  73.684  	 * there is a real-mode segmented pointer pointing to the
  73.685  	 * 4K EBDA area at 0x40E, calculate and scan it here.
  73.686  	 *
  73.687 -	 * NOTE! There were Linux loaders that will corrupt the EBDA
  73.688 +	 * NOTE! There are Linux loaders that will corrupt the EBDA
  73.689  	 * area, and as such this kind of SMP config may be less
  73.690  	 * trustworthy, simply because the SMP table may have been
  73.691 -	 * stomped on during early boot.  Thankfully the bootloaders
  73.692 -	 * now honour the EBDA.
  73.693 +	 * stomped on during early boot. These loaders are buggy and
  73.694 +	 * should be fixed.
  73.695 +	 *
  73.696 +	 * MP1.4 SPEC states to only scan first 1K of 4K EBDA.
  73.697  	 */
  73.698  
  73.699 -	address = *(unsigned short *)phys_to_virt(0x40E);
  73.700 -	address <<= 4;
  73.701 -	smp_scan_config(address, 0x1000);
  73.702 +	address = get_bios_ebda();
  73.703 +	if (address)
  73.704 +		smp_scan_config(address, 0x400);
  73.705  }
  73.706  
  73.707 -#else
  73.708 -
  73.709 -/*
  73.710 - * The Visual Workstation is Intel MP compliant in the hardware
  73.711 - * sense, but it doesn't have a BIOS(-configuration table).
  73.712 - * No problem for Linux.
  73.713 - */
  73.714 -void __init find_visws_smp(void)
  73.715 -{
  73.716 -	smp_found_config = 1;
  73.717 -
  73.718 -	phys_cpu_present_map |= 2; /* or in id 1 */
  73.719 -	apic_version[1] |= 0x10; /* integrated APIC */
  73.720 -	apic_version[0] |= 0x10;
  73.721 -
  73.722 -	mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
  73.723 -}
  73.724 -
  73.725 -#endif
  73.726 -
  73.727 -/*
  73.728 - * - Intel MP Configuration Table
  73.729 - * - or SGI Visual Workstation configuration
  73.730 - */
  73.731 -void __init find_smp_config (void)
  73.732 -{
  73.733 -#ifdef CONFIG_X86_LOCAL_APIC
  73.734 -	find_intel_smp();
  73.735 -#endif
  73.736 -#ifdef CONFIG_VISWS
  73.737 -	find_visws_smp();
  73.738 -#endif
  73.739 -}
  73.740 -
  73.741 -
  73.742  /* --------------------------------------------------------------------------
  73.743                              ACPI-based MP Configuration
  73.744     -------------------------------------------------------------------------- */
  73.745 @@ -987,7 +836,7 @@ void __init mp_register_lapic (
  73.746  	struct mpc_config_processor processor;
  73.747  	int			boot_cpu = 0;
  73.748  	
  73.749 -	if (id >= MAX_APICS) {
  73.750 +	if (MAX_APICS - id <= 0) {
  73.751  		printk(KERN_WARNING "Processor #%d invalid (max %d)\n",
  73.752  			id, MAX_APICS);
  73.753  		return;
  73.754 @@ -998,14 +847,7 @@ void __init mp_register_lapic (
  73.755  
  73.756  	processor.mpc_type = MP_PROCESSOR;
  73.757  	processor.mpc_apicid = id;
  73.758 -
  73.759 -	/*
  73.760 -	 * mp_register_lapic_address() which is called before the
  73.761 -	 * current function does the fixmap of FIX_APIC_BASE.
  73.762 -	 * Read in the correct APIC version from there
  73.763 -	 */
  73.764 -	processor.mpc_apicver = apic_read(APIC_LVR);
  73.765 -
  73.766 +	processor.mpc_apicver = GET_APIC_VERSION(apic_read(APIC_LVR));
  73.767  	processor.mpc_cpuflag = (enabled ? CPU_ENABLED : 0);
  73.768  	processor.mpc_cpuflag |= (boot_cpu ? CPU_BOOTPROCESSOR : 0);
  73.769  	processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | 
  73.770 @@ -1017,32 +859,32 @@ void __init mp_register_lapic (
  73.771  	MP_processor_info(&processor);
  73.772  }
  73.773  
  73.774 -#if defined(CONFIG_X86_IO_APIC) /*&& defined(CONFIG_ACPI_INTERPRETER)*/
  73.775 +#if defined(CONFIG_X86_IO_APIC) && (defined(CONFIG_ACPI_INTERPRETER) || defined(CONFIG_ACPI_BOOT))
  73.776  
  73.777  #define MP_ISA_BUS		0
  73.778  #define MP_MAX_IOAPIC_PIN	127
  73.779  
  73.780  struct mp_ioapic_routing {
  73.781  	int			apic_id;
  73.782 -	int			irq_start;
  73.783 -	int			irq_end;
  73.784 +	int			gsi_base;
  73.785 +	int			gsi_end;
  73.786  	u32			pin_programmed[4];
  73.787  } mp_ioapic_routing[MAX_IO_APICS];
  73.788  
  73.789  
  73.790 -static int __init mp_find_ioapic (
  73.791 -	int			irq)
  73.792 +static int mp_find_ioapic (
  73.793 +	int			gsi)
  73.794  {
  73.795  	int			i = 0;
  73.796  
  73.797 -	/* Find the IOAPIC that manages this IRQ. */
  73.798 +	/* Find the IOAPIC that manages this GSI. */
  73.799  	for (i = 0; i < nr_ioapics; i++) {
  73.800 -		if ((irq >= mp_ioapic_routing[i].irq_start)
  73.801 -			&& (irq <= mp_ioapic_routing[i].irq_end))
  73.802 +		if ((gsi >= mp_ioapic_routing[i].gsi_base)
  73.803 +			&& (gsi <= mp_ioapic_routing[i].gsi_end))
  73.804  			return i;
  73.805  	}
  73.806  
  73.807 -	printk(KERN_ERR "ERROR: Unable to locate IOAPIC for IRQ %d\n", irq);
  73.808 +	printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
  73.809  
  73.810  	return -1;
  73.811  }
  73.812 @@ -1051,7 +893,7 @@ static int __init mp_find_ioapic (
  73.813  void __init mp_register_ioapic (
  73.814  	u8			id, 
  73.815  	u32			address,
  73.816 -	u32			irq_base)
  73.817 +	u32			gsi_base)
  73.818  {
  73.819  	int			idx = 0;
  73.820  
  73.821 @@ -1077,19 +919,19 @@ void __init mp_register_ioapic (
  73.822  	mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx);
  73.823  	
  73.824  	/* 
  73.825 -	 * Build basic IRQ lookup table to facilitate irq->io_apic lookups
  73.826 -	 * and to prevent reprogramming of IOAPIC pins (PCI IRQs).
  73.827 +	 * Build basic GSI lookup table to facilitate gsi->io_apic lookups
  73.828 +	 * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
  73.829  	 */
  73.830  	mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid;
  73.831 -	mp_ioapic_routing[idx].irq_start = irq_base;
  73.832 -	mp_ioapic_routing[idx].irq_end = irq_base + 
  73.833 +	mp_ioapic_routing[idx].gsi_base = gsi_base;
  73.834 +	mp_ioapic_routing[idx].gsi_end = gsi_base + 
  73.835  		io_apic_get_redir_entries(idx);
  73.836  
  73.837  	printk("IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
  73.838 -		"IRQ %d-%d\n", idx, mp_ioapics[idx].mpc_apicid, 
  73.839 +		"GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid, 
  73.840  		mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr,
  73.841 -		mp_ioapic_routing[idx].irq_start,
  73.842 -		mp_ioapic_routing[idx].irq_end);
  73.843 +		mp_ioapic_routing[idx].gsi_base,
  73.844 +		mp_ioapic_routing[idx].gsi_end);
  73.845  
  73.846  	return;
  73.847  }
  73.848 @@ -1099,21 +941,19 @@ void __init mp_override_legacy_irq (
  73.849  	u8			bus_irq,
  73.850  	u8			polarity, 
  73.851  	u8			trigger, 
  73.852 -	u32			global_irq)
  73.853 +	u32			gsi)
  73.854  {
  73.855  	struct mpc_config_intsrc intsrc;
  73.856 -	int			i = 0;
  73.857 -	int			found = 0;
  73.858  	int			ioapic = -1;
  73.859  	int			pin = -1;
  73.860  
  73.861  	/* 
  73.862 -	 * Convert 'global_irq' to 'ioapic.pin'.
  73.863 +	 * Convert 'gsi' to 'ioapic.pin'.
  73.864  	 */
  73.865 -	ioapic = mp_find_ioapic(global_irq);
  73.866 +	ioapic = mp_find_ioapic(gsi);
  73.867  	if (ioapic < 0)
  73.868  		return;
  73.869 -	pin = global_irq - mp_ioapic_routing[ioapic].irq_start;
  73.870 +	pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
  73.871  
  73.872  	/*
  73.873  	 * TBD: This check is for faulty timer entries, where the override
  73.874 @@ -1136,23 +976,9 @@ void __init mp_override_legacy_irq (
  73.875  		(intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, 
  73.876  		intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, intsrc.mpc_dstirq);
  73.877  
  73.878 -	/* 
  73.879 -	 * If an existing [IOAPIC.PIN -> IRQ] routing entry exists we override it.
  73.880 -	 * Otherwise create a new entry (e.g. global_irq == 2).
  73.881 -	 */
  73.882 -	for (i = 0; i < mp_irq_entries; i++) {
  73.883 -		if ((mp_irqs[i].mpc_srcbus == intsrc.mpc_srcbus) 
  73.884 -			&& (mp_irqs[i].mpc_srcbusirq == intsrc.mpc_srcbusirq)) {
  73.885 -			mp_irqs[i] = intsrc;
  73.886 -			found = 1;
  73.887 -			break;
  73.888 -		}
  73.889 -	}
  73.890 -	if (!found) {
  73.891 -		mp_irqs[mp_irq_entries] = intsrc;
  73.892 -		if (++mp_irq_entries == MAX_IRQ_SOURCES)
  73.893 -			panic("Max # of irq sources exceeded!\n");
  73.894 -	}
  73.895 +	mp_irqs[mp_irq_entries] = intsrc;
  73.896 +	if (++mp_irq_entries == MAX_IRQ_SOURCES)
  73.897 +		panic("Max # of irq sources exceeded!\n");
  73.898  
  73.899  	return;
  73.900  }
  73.901 @@ -1160,35 +986,22 @@ void __init mp_override_legacy_irq (
  73.902  
  73.903  void __init mp_config_acpi_legacy_irqs (void)
  73.904  {
  73.905 +	struct mpc_config_intsrc intsrc;
  73.906  	int			i = 0;
  73.907  	int			ioapic = -1;
  73.908  
  73.909 -	/*
  73.910 -	 * Initialize mp_irqs for IRQ configuration.
  73.911 -	 */
  73.912 -	unsigned char *bus_data;
  73.913 -	int count;
  73.914 -
  73.915 -	count = (MAX_MP_BUSSES * sizeof(int)) * 4;
  73.916 -	count += (MAX_IRQ_SOURCES * sizeof(int)) * 4;
  73.917 -	bus_data = (void *)alloc_xenheap_pages(get_order(count));
  73.918 -	if (!bus_data) {
  73.919 -		panic("Fatal: can't allocate bus memory for ACPI legacy IRQ!");
  73.920 -	}
  73.921 -	mp_bus_id_to_type = (int *)&bus_data[0];
  73.922 -	mp_bus_id_to_node = (int *)&bus_data[(MAX_MP_BUSSES * sizeof(int))];
  73.923 -	mp_bus_id_to_local = (int *)&bus_data[(MAX_MP_BUSSES * sizeof(int)) * 2];
  73.924 -	mp_bus_id_to_pci_bus = (int *)&bus_data[(MAX_MP_BUSSES * sizeof(int)) * 3];
  73.925 -	mp_irqs = (struct mpc_config_intsrc *)&bus_data[(MAX_MP_BUSSES * sizeof(int)) * 4];
  73.926 -	for (i = 0; i < MAX_MP_BUSSES; ++i)
  73.927 -	  mp_bus_id_to_pci_bus[i] = -1;
  73.928 -
  73.929  	/* 
  73.930  	 * Fabricate the legacy ISA bus (bus #31).
  73.931  	 */
  73.932  	mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA;
  73.933  	Dprintk("Bus #%d is ISA\n", MP_ISA_BUS);
  73.934  
  73.935 +	/*
  73.936 +	 * ES7000 has no legacy identity mappings
  73.937 +	 */
  73.938 +	if (es7000_plat)
  73.939 +		return;
  73.940 +
  73.941  	/* 
  73.942  	 * Locate the IOAPIC that manages the ISA IRQs (0-15). 
  73.943  	 */
  73.944 @@ -1196,118 +1009,101 @@ void __init mp_config_acpi_legacy_irqs (
  73.945  	if (ioapic < 0)
  73.946  		return;
  73.947  
  73.948 +	intsrc.mpc_type = MP_INTSRC;
  73.949 +	intsrc.mpc_irqflag = 0;					/* Conforming */
  73.950 +	intsrc.mpc_srcbus = MP_ISA_BUS;
  73.951 +	intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid;
  73.952 +
  73.953  	/* 
  73.954 -	 * Use the default configuration for the IRQs 0-15.  These may be
  73.955 +	 * Use the default configuration for the IRQs 0-15.  Unless
  73.956  	 * overriden by (MADT) interrupt source override entries.
  73.957  	 */
  73.958  	for (i = 0; i < 16; i++) {
  73.959 -
  73.960 -		if (i == 2)
  73.961 -			continue;			/* Don't connect IRQ2 */
  73.962 +		int idx;
  73.963  
  73.964 -		mp_irqs[mp_irq_entries].mpc_type = MP_INTSRC;
  73.965 -		mp_irqs[mp_irq_entries].mpc_irqflag = 0;	/* Conforming */
  73.966 -		mp_irqs[mp_irq_entries].mpc_srcbus = MP_ISA_BUS;
  73.967 -		mp_irqs[mp_irq_entries].mpc_dstapic = mp_ioapics[ioapic].mpc_apicid;
  73.968 -		mp_irqs[mp_irq_entries].mpc_irqtype = mp_INT;
  73.969 -		mp_irqs[mp_irq_entries].mpc_srcbusirq = i;	   /* Identity mapped */
  73.970 -		mp_irqs[mp_irq_entries].mpc_dstirq = i;
  73.971 +		for (idx = 0; idx < mp_irq_entries; idx++) {
  73.972 +			struct mpc_config_intsrc *irq = mp_irqs + idx;
  73.973 +
  73.974 +			/* Do we already have a mapping for this ISA IRQ? */
  73.975 +			if (irq->mpc_srcbus == MP_ISA_BUS && irq->mpc_srcbusirq == i)
  73.976 +				break;
  73.977 +
  73.978 +			/* Do we already have a mapping for this IOAPIC pin */
  73.979 +			if ((irq->mpc_dstapic == intsrc.mpc_dstapic) &&
  73.980 +				(irq->mpc_dstirq == i))
  73.981 +				break;
  73.982 +		}
  73.983 +
  73.984 +		if (idx != mp_irq_entries) {
  73.985 +			printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i);
  73.986 +			continue;			/* IRQ already used */
  73.987 +		}
  73.988 +
  73.989 +		intsrc.mpc_irqtype = mp_INT;
  73.990 +		intsrc.mpc_srcbusirq = i;		   /* Identity mapped */
  73.991 +		intsrc.mpc_dstirq = i;
  73.992  
  73.993  		Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, "
  73.994 -			"%d-%d\n", 
  73.995 -			mp_irqs[mp_irq_entries].mpc_irqtype, 
  73.996 -			mp_irqs[mp_irq_entries].mpc_irqflag & 3, 
  73.997 -			(mp_irqs[mp_irq_entries].mpc_irqflag >> 2) & 3, 
  73.998 -			mp_irqs[mp_irq_entries].mpc_srcbus, 
  73.999 -			mp_irqs[mp_irq_entries].mpc_srcbusirq, 
 73.1000 -			mp_irqs[mp_irq_entries].mpc_dstapic, 
 73.1001 -			mp_irqs[mp_irq_entries].mpc_dstirq);
 73.1002 +			"%d-%d\n", intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, 
 73.1003 +			(intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus,