direct-io.hg
changeset 4808:487de0451d2b
bitkeeper revision 1.1389.1.36 (428075b7XUww-l4YTG96au0lbtOO_g)
Merge firebug.cl.cam.ac.uk:/auto/groups/xeno-xenod/BK/xen-unstable.bk
into firebug.cl.cam.ac.uk:/local/scratch/cl349/xen-unstable.bk
Merge firebug.cl.cam.ac.uk:/auto/groups/xeno-xenod/BK/xen-unstable.bk
into firebug.cl.cam.ac.uk:/local/scratch/cl349/xen-unstable.bk
line diff
1.1 --- a/.rootkeys Tue May 03 14:42:00 2005 +0000 1.2 +++ b/.rootkeys Tue May 10 08:49:59 2005 +0000 1.3 @@ -21,6 +21,7 @@ 4022a73cgxX1ryj1HgS-IwwB6NUi2A docs/misc 1.4 412f4bd9sm5mCQ8BkrgKcAKZGadq7Q docs/misc/blkif-drivers-explained.txt 1.5 420b949cy9ZGzED74Fz_DaWlK7tT4g docs/misc/crashdb.txt 1.6 4251a1f82AexscYEiF4Iku8Gc_kWfQ docs/misc/grant-tables.txt 1.7 +424d462b5GuApQ_NyMsRFt9LbrsWow docs/misc/sedf_scheduler_mini-HOWTO.txt 1.8 40d6ccbfKKBq8jE0ula4eHEzBiQuDA docs/misc/xen_config.html 1.9 410a4c2bAO_m_l4RsiiPHnZ4ixHWbQ docs/misc/xend.tex 1.10 3f9e7d564bWFB-Czjv1qdmE6o0GqNg docs/src/interface.tex 1.11 @@ -353,7 +354,6 @@ 41979925z1MsKU1SfuuheM1IFDQ_bA linux-2.6 1.12 4118b6a418gnL6AZsTdglC92YGqYTg linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/highmem.h 1.13 42539fb5A9hsS3NFQ-2VY4y1TONZZQ linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/hypercall.h 1.14 40f5623aJVXQwpJMOLE99XgvGsfQ8Q linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/io.h 1.15 -42778a69MXZVxch4pQqYsMPS0WnNSg linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/io_apic.h 1.16 40f5623aKXkBBxgpLx2NcvkncQ1Yyw linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mach-xen/irq_vectors.h 1.17 40f5623aDMCsWOFO0jktZ4e8sjwvEg linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_post.h 1.18 40f5623arsFXkGdPvIqvFi3yFXGR0Q linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_pre.h 1.19 @@ -745,6 +745,7 @@ 3fbba6dctWRWlFJkYb6hdix2X4WMuw tools/lib 1.20 3fbba6dcbVrG2hPzEzwdeV_UC8kydQ tools/libxc/xc_private.h 1.21 42337174PxyzzPk62raDiYCIsfStDg tools/libxc/xc_ptrace.c 1.22 4273458duzL--nsTfT6e_q6Kfij48g tools/libxc/xc_ptrace_core.c 1.23 +41ebbfe9U0b0kI-HgjK7VEY4EvW7_w tools/libxc/xc_sedf.c 1.24 41dde8b0pLfAKMs_L9Uri2hnzHiCRQ tools/libxc/xc_vmx_build.c 1.25 40e1b09dMYB4ItGCqcMIzirdMd9I-w tools/libxutil/Makefile 1.26 40e033325Sjqs-_4TuzeUEprP_gYFg tools/libxutil/allocate.c 1.27 @@ -1188,7 +1189,7 @@ 421098b6lY2JzrV1oFDbrt7XQhtElg xen/arch/ 1.28 427664f5eygrc3nEhI3RKf0Y37PzyA xen/arch/ia64/xentime.c 1.29 3ddb79bcZbRBzT3elFWSX7u6NtMagQ xen/arch/x86/Makefile 1.30 3ddb79bcBQF85CfLS4i1WGZ4oLLaCA xen/arch/x86/Rules.mk 1.31 -3e5636e5FAYZ5_vQnmgwFJfSdmO5Mw xen/arch/x86/acpi.c 1.32 +3e5636e5FAYZ5_vQnmgwFJfSdmO5Mw xen/arch/x86/acpi/boot.c 1.33 3ddb79bcsjinG9k1KcvbVBuas1R2dA xen/arch/x86/apic.c 1.34 42360b3244-Q6BpEKhR_A1YtG1wPNQ xen/arch/x86/audit.c 1.35 3ddb79c4yGZ7_22QAFFwPzqP4NSHwA xen/arch/x86/boot/mkelf32.c 1.36 @@ -1218,10 +1219,7 @@ 41aaf567tqrKGSTDK8OVeAbpeoccPw xen/arch/ 1.37 41aaf567a36esU-rUK7twPiv-yTFyw xen/arch/x86/mtrr/mtrr.h 1.38 41aaf567DcTL6pqVtLZJI5cSryyA1A xen/arch/x86/mtrr/state.c 1.39 3f12cff65EV3qOG2j37Qm0ShgvXGRw xen/arch/x86/nmi.c 1.40 -3ddb79bdHe6_Uij4-glW91vInNtBYQ xen/arch/x86/pci-irq.c 1.41 -3ddb79bcZ_2FxINljqNSkqa17ISyJw xen/arch/x86/pci-pc.c 1.42 -3ddb79bdeJ7_86z03yTAPIeeywOg3Q xen/arch/x86/pci-x86.c 1.43 -3ddb79bdIKgipvGoqExEQ7jawfVowA xen/arch/x86/pci-x86.h 1.44 +4051bcecFeq4DE70p4zGO5setf47CA xen/arch/x86/physdev.c 1.45 3ddb79bc7KxGCEJsgBnkDX7XjD_ZEQ xen/arch/x86/rwlock.c 1.46 3ddb79bcrD6Z_rUvSDgrvjyb4846Eg xen/arch/x86/setup.c 1.47 405b8599xI_PoEr3zZoJ2on-jdn7iw xen/arch/x86/shadow.c 1.48 @@ -1253,6 +1251,7 @@ 40e96d3akN3Hu_J5Bk-WXD8OGscrYQ xen/arch/ 1.49 422f27c8J9DQfCpegccMid59XhSmGA xen/arch/x86/x86_emulate.c 1.50 3ddb79bdff-gj-jFGKjOejeHLqL8Lg xen/common/Makefile 1.51 3e397e66AyyD5fYraAySWuwi9uqSXg xen/common/ac_timer.c 1.52 +427fa2d0J0LU2s5oKbsM0nTZ2iyd2Q xen/common/bitmap.c 1.53 3ddb79bdLX_P6iB7ILiblRLWvebapg xen/common/dom0_ops.c 1.54 3e6377e4i0c9GtKN65e99OtRbw3AZw xen/common/dom_mem_ops.c 1.55 3ddb79bdYO5D8Av12NHqPeSviav7cg xen/common/domain.c 1.56 @@ -1265,9 +1264,9 @@ 3ddb79bduhSEZI8xa7IbGQCpap5y2A xen/commo 1.57 41a61536SZbR6cj1ukWTb0DYU-vz9w xen/common/multicall.c 1.58 3ddb79bdD4SLmmdMD7yLW5HcUWucXw xen/common/page_alloc.c 1.59 3e54c38dkHAev597bPr71-hGzTdocg xen/common/perfc.c 1.60 -4051bcecFeq4DE70p4zGO5setf47CA xen/common/physdev.c 1.61 3ddb79bdHqdQpATqC0rmUZNbsb6L6A xen/common/resource.c 1.62 40589968dD2D1aejwSOvrROg7fOvGQ xen/common/sched_bvt.c 1.63 +41ebbfe9oF1BF3cH5v7yE3eOL9uPbA xen/common/sched_sedf.c 1.64 3e397e6619PgAfBbw2XFbXkewvUWgw xen/common/schedule.c 1.65 3ddb79bd0gVQYmL2zvuJnldvD0AGxQ xen/common/softirq.c 1.66 3e7f358awXBC3Vw-wFRwPw18qL1khg xen/common/string.c 1.67 @@ -1276,50 +1275,29 @@ 3ddb79bd3zgV33PHdt-cgh3sxcb1hw xen/commo 1.68 4203fb92Qcy7mGpauBdq09J-WAqfoA xen/common/xmalloc.c 1.69 3ddb79c0ppNeJtjC4va8j41ADCnchA xen/drivers/Makefile 1.70 40715b2bi9gU43-cYzlmPDgreYQchw xen/drivers/acpi/Makefile 1.71 -40715b2bDxNCz5LFV8FAXihmYJZFUQ xen/drivers/acpi/acpi_ksyms.c 1.72 40715b2cNVOegtvyft_AHFKJYRprfA xen/drivers/acpi/tables.c 1.73 3e4a8cb7alzQCDKS7MlioPoHBKYkdQ xen/drivers/char/Makefile 1.74 4049e6bfNSIq7s7OV-Bd69QD0RpR2Q xen/drivers/char/console.c 1.75 3e4a8cb7nMChlro4wvOBo76n__iCFA xen/drivers/char/serial.c 1.76 -3ddb79beUWngyIhMHgyPtuTem4o4JA xen/drivers/pci/Makefile 1.77 -3ddb79beU9td0Mnm0VUMklerBa37qQ xen/drivers/pci/compat.c 1.78 -3ddb79beHkGQE58z5t5gyUCYiwOxvw xen/drivers/pci/gen-devlist.c 1.79 -3ddb79bfoQcFKLf5P6wZlDl36alWdQ xen/drivers/pci/names.c 1.80 -3ddb79bfyX7-pD6XdxY_mdNrJR20iw xen/drivers/pci/pci.c 1.81 -3ddb79bf2AS7YBGwooE_Kbv7XgUqNQ xen/drivers/pci/pci.ids 1.82 -3ddb79bf7sTn85WtP_8Nc2YEmmVExQ xen/drivers/pci/quirks.c 1.83 -3ddb79bfJaf0bkE1Y67bnll8-kjEPg xen/drivers/pci/setup-res.c 1.84 40715b2cFpte_UNWnBZW0Du7z9AhTQ xen/include/acpi/acconfig.h 1.85 -40715b2cEQWX-PaxEH30qI48K1krnQ xen/include/acpi/acdebug.h 1.86 -40715b2c_7j-oy3ZNAuqE3IFNPzArg xen/include/acpi/acdisasm.h 1.87 -40715b2cFdcPx-2FHGM1Q-M-592xYQ xen/include/acpi/acdispat.h 1.88 -40715b2cKqD_Lihnlmvnyl4AAl3EFw xen/include/acpi/acevents.h 1.89 40715b2ctNvVZ058w8eM8DR9hOat_A xen/include/acpi/acexcep.h 1.90 40715b2com8I01qcHcAw47e93XsCqQ xen/include/acpi/acglobal.h 1.91 40715b2cS1t4uI3sMsu-c0M4qqAIrw xen/include/acpi/achware.h 1.92 -40715b2chSz545A4Tq-y3WAtanzPMQ xen/include/acpi/acinterp.h 1.93 40715b2cGf23lRI58NphiaDQl698-w xen/include/acpi/aclocal.h 1.94 40715b2cdG7tCF2NMk0j1RCQQPzPXg xen/include/acpi/acmacros.h 1.95 -40715b2ckP2XZZDkSTehu6riuMogDA xen/include/acpi/acnamesp.h 1.96 40715b2c4AvHYn2-2YIyt3mx-Mm5tw xen/include/acpi/acobject.h 1.97 40715b2cPUXsHzmchvXx7QHAfW0nMw xen/include/acpi/acoutput.h 1.98 -40715b2cNbYpt9CHmCHg3RG7fBdACA xen/include/acpi/acparser.h 1.99 40715b2cWM_6zR14U9Tp0s_q8D002A xen/include/acpi/acpi.h 1.100 40715b2dcJDTiROgyMk9swD_veWktA xen/include/acpi/acpi_bus.h 1.101 40715b2dRFlZK6apnH7WkUCBdyFXWA xen/include/acpi/acpi_drivers.h 1.102 40715b2dtgZhNGAzlyBMe3kqve3mqw xen/include/acpi/acpiosxf.h 1.103 40715b2dpW5TY7n5rzCufsDhJVWuMQ xen/include/acpi/acpixf.h 1.104 -40715b2dp9-_D9LTjcvtf-Yopih5mQ xen/include/acpi/acresrc.h 1.105 40715b2djvd97KbIpt4wyJgxwqCqmg xen/include/acpi/acstruct.h 1.106 -40715b2dRIU9cQgmTjtCRNleIJDAYg xen/include/acpi/actables.h 1.107 40715b2dy8ECRkSo9x0tRRueAjPx1g xen/include/acpi/actbl.h 1.108 40715b2ds4J-XWn9Ix-lgBiJffNgxw xen/include/acpi/actbl1.h 1.109 40715b2d_aMKMjKKNImJR4km52KRHA xen/include/acpi/actbl2.h 1.110 -40715b2dxC03aemijgL-iDH_-BkKRw xen/include/acpi/actbl71.h 1.111 40715b2d0oQUijKwEw6SDJ4LhD8c4g xen/include/acpi/actypes.h 1.112 40715b2dBByvcAtRpN5mafyEYLcBWA xen/include/acpi/acutils.h 1.113 -40715b2d_iPcZ0uUVGblPvjsHOwE5Q xen/include/acpi/amlcode.h 1.114 -40715b2d3CdS6dIpZDTiCJRlDG3LCA xen/include/acpi/amlresrc.h 1.115 40715b2dKRW7A71SNaeV6zfrEzYxPw xen/include/acpi/platform/acenv.h 1.116 40715b2d8fYydJMcODFrV1ocLklGDg xen/include/acpi/platform/acgcc.h 1.117 40715b2d1yZkqyAt0kgx2xEwsatuuA xen/include/acpi/platform/aclinux.h 1.118 @@ -1369,18 +1347,39 @@ 3ddb79c3TMDjkxVndKFKnGiwY0HzDg xen/inclu 1.119 4204e7acwXDo-5iAAiO2eQbtDeYZXA xen/include/asm-x86/init.h 1.120 3ddb79c3fQ_O3o5NHK2N8AJdk0Ea4Q xen/include/asm-x86/io.h 1.121 3ddb79c2TKeScYHQZreTdHqYNLbehQ xen/include/asm-x86/io_apic.h 1.122 -42605109qxLSrHVE2SRhGXmgk907iw xen/include/asm-x86/io_ports.h 1.123 3ddb79c2L7rTlFzazOLW1XuSZefpFw xen/include/asm-x86/irq.h 1.124 404f1b93OjLO4bFfBXYNaJdIqlNz-Q xen/include/asm-x86/ldt.h 1.125 -4260510aYPj2kr6rMbBfMxcvvmXndQ xen/include/asm-x86/mach_apic.h 1.126 +427fa2d0m8MOSSXT13zgb-q0fGA_Dw xen/include/asm-x86/mach-default/apm.h 1.127 +427fa2d0suK9Av7vsAXhsQxZjqpc_Q xen/include/asm-x86/mach-default/bios_ebda.h 1.128 +427fa2d0yC3KzLozoeK3Xa3uGVfIdw xen/include/asm-x86/mach-default/do_timer.h 1.129 +427fa2d0bWQkR1mW5OBYxn07AN-bDw xen/include/asm-x86/mach-default/entry_arch.h 1.130 +427fa2d0-SWcuwbdSypo4953bc2JdQ xen/include/asm-x86/mach-default/io_ports.h 1.131 +427fa2d0eyAl7LAeO-SVV4IW7lZPGQ xen/include/asm-x86/mach-default/irq_vectors.h 1.132 +427fa2d0df7VWG4KKpnKbKR2Cbd1_w xen/include/asm-x86/mach-default/irq_vectors_limits.h 1.133 +4260510aYPj2kr6rMbBfMxcvvmXndQ xen/include/asm-x86/mach-default/mach_apic.h 1.134 +427fa2d0I3FWjE2tWdOhlEOJn7stcg xen/include/asm-x86/mach-default/mach_apicdef.h 1.135 +427fa2d093fDS2gOBLcl7Yndzl7HmA xen/include/asm-x86/mach-default/mach_ipi.h 1.136 +427fa2d0Y7bD35d-FvDAeiJDIdRw2A xen/include/asm-x86/mach-default/mach_mpparse.h 1.137 +427fa2d0aLQgE9e1GY9ZP5jrMOC8pQ xen/include/asm-x86/mach-default/mach_mpspec.h 1.138 +427fa2d0fJ5nNn5ydJuOaZIL6F2fjQ xen/include/asm-x86/mach-default/mach_reboot.h 1.139 +427fa2d0VlN555TE68TjKMsrOoFXNA xen/include/asm-x86/mach-default/mach_time.h 1.140 +427fa2d0C0jWTKYjy7WJjGKeujSpSg xen/include/asm-x86/mach-default/mach_timer.h 1.141 +427fa2d0UXLiS1scpNrK26ZT6Oes3g xen/include/asm-x86/mach-default/mach_traps.h 1.142 +427fa2d0OfglYyfpDTD5DII4M0uZRw xen/include/asm-x86/mach-default/mach_wakecpu.h 1.143 +427fa2d0_OBPxdi5Qo04JWgZhz7BFA xen/include/asm-x86/mach-default/pci-functions.h 1.144 +427fa2d0mrTtXrliqDfLuJc5LLVXaA xen/include/asm-x86/mach-default/setup_arch_post.h 1.145 +427fa2d0Uoo7gC61Kep6Yy7Os367Hg xen/include/asm-x86/mach-default/setup_arch_pre.h 1.146 +427fa2d1EKnA8zCq2QLHiGOUqOgszg xen/include/asm-x86/mach-default/smpboot_hooks.h 1.147 3ddb79c3I98vWcQR8xEo34JMJ4Ahyw xen/include/asm-x86/mc146818rtc.h 1.148 40ec25fd7cSvbP7Biw91zaU_g0xsEQ xen/include/asm-x86/mm.h 1.149 3ddb79c3n_UbPuxlkNxvvLycClIkxA xen/include/asm-x86/mpspec.h 1.150 +427fa2d1eJRenftJJnRyLsHKl1ghtA xen/include/asm-x86/mpspec_def.h 1.151 3ddb79c2wa0dA_LGigxOelSGbJ284Q xen/include/asm-x86/msr.h 1.152 41aaf567Mi3OishhvrCtET1y-mxQBg xen/include/asm-x86/mtrr.h 1.153 41a61536MFhNalgbVmYGXAhQsPTZNw xen/include/asm-x86/multicall.h 1.154 3ddb79c3xjYnrv5t3VqYlR4tNEOl4Q xen/include/asm-x86/page.h 1.155 3ddb79c3ysKUbxZuwKBRK3WXU2TlEg xen/include/asm-x86/pci.h 1.156 +42422fb0FVX-TJkSvAXnbfwMf19XFA xen/include/asm-x86/physdev.h 1.157 3ddb79c2QF5-pZGzuX4QukPCDAl59A xen/include/asm-x86/processor.h 1.158 40cf1596bim9F9DNdV75klgRSZ6Y2A xen/include/asm-x86/regs.h 1.159 3ddb79c2plf7ciNgoNjU-RsbUzawsw xen/include/asm-x86/rwlock.h 1.160 @@ -1436,11 +1435,15 @@ 4266bd01Ul-pC01ZVvBkhBnv5eqzvw xen/inclu 1.161 3ddb79c25UE59iu4JJcbRalx95mvcg xen/include/public/xen.h 1.162 3e397e66m2tO3s-J8Jnr7Ws_tGoPTg xen/include/xen/ac_timer.h 1.163 40715b2epYl2jBbxzz9CI2rgIca7Zg xen/include/xen/acpi.h 1.164 +422f0995xCgnbsVhTjSncnqIABs64g xen/include/xen/adv_sched_hist.h 1.165 +427fa2d1wyoVbvCyZRLposYjA_D_4g xen/include/xen/bitmap.h 1.166 +427fa2d1ItcC_yWuBUkhc7adedP5ow xen/include/xen/bitops.h 1.167 3ddb79c0c0cX_DZE209-Bb-Rx1v-Aw xen/include/xen/cache.h 1.168 41f2cea7Yna7xc0X9fyavIjoSFFeVg xen/include/xen/compile.h.in 1.169 3f840f12CkbYSlwMrY2S11Mpyxg7Nw xen/include/xen/compiler.h 1.170 3ddb79c259jh8hE7vre_8NuE7nwNSA xen/include/xen/config.h 1.171 3eb165e0eawr3R-p2ZQtSdLWtLRN_A xen/include/xen/console.h 1.172 +427fa2d1bQCWgEQqTTh5MjG4MPEH9g xen/include/xen/cpumask.h 1.173 3ddb79c1V44RD26YqCUm-kqIupM37A xen/include/xen/ctype.h 1.174 3ddb79c05DdHQ0UxX_jKsXdR4QlMCA xen/include/xen/delay.h 1.175 40f2b4a2hC3HtChu-ArD8LyojxWMjg xen/include/xen/domain.h 1.176 @@ -1463,7 +1466,6 @@ 3ddb79c2Fg44_PBPVxHSC0gTOMq4Ow xen/inclu 1.177 3ddb79c0MOVXq8qZDQRGb6z64_xAwg xen/include/xen/pci_ids.h 1.178 3e54c38dlSCVdyVM4PKcrSfzLLxWUQ xen/include/xen/perfc.h 1.179 3e54c38de9SUSYSAwxDf_DwkpAnQFA xen/include/xen/perfc_defn.h 1.180 -42422fb0FVX-TJkSvAXnbfwMf19XFA xen/include/xen/physdev.h 1.181 3ddb79c04nQVR3EYM5L4zxDV_MCo1g xen/include/xen/prefetch.h 1.182 3e4540ccU1sgCx8seIMGlahmMfv7yQ xen/include/xen/reboot.h 1.183 40589969nPq3DMzv24RDb5LXE9brHw xen/include/xen/sched-if.h
3.1 --- a/BitKeeper/etc/logging_ok Tue May 03 14:42:00 2005 +0000 3.2 +++ b/BitKeeper/etc/logging_ok Tue May 10 08:49:59 2005 +0000 3.3 @@ -60,6 +60,7 @@ kmacy@shemp.lab.netapp.com 3.4 kraxel@bytesex.org 3.5 laudney@eclipse.(none) 3.6 leendert@watson.ibm.com 3.7 +lists-xen@pimb.org 3.8 lynx@idefix.cl.cam.ac.uk 3.9 maf46@burn.cl.cam.ac.uk 3.10 mafetter@fleming.research 3.11 @@ -99,3 +100,4 @@ vh249@airwolf.cl.cam.ac.uk 3.12 vh249@arcadians.cl.cam.ac.uk 3.13 xen-ia64.adm@bkbits.net 3.14 xenbk@gandalf.hpl.hp.com 3.15 +ydroneaud@mandriva.com
4.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 4.2 +++ b/docs/misc/sedf_scheduler_mini-HOWTO.txt Tue May 10 08:49:59 2005 +0000 4.3 @@ -0,0 +1,44 @@ 4.4 +sEDF scheduler 4.5 +-------------- 4.6 +Author: 4.7 + Stephan.Diestelhorst@{cl.cam.ac.uk, inf.tu-dresden.de} 4.8 + 4.9 +Overview: 4.10 + This scheduler provides weighted CPU sharing in an intuitive way and 4.11 + uses realtime-algorithms to ensure time guarantees. 4.12 + 4.13 +Usage: 4.14 + -add "sched=sedf" on Xen's boot command-line 4.15 + -create domains as usual 4.16 + -use "xm sedf <dom-id> <period> <slice> <latency-hint> <extra> <weight>" 4.17 + Where: 4.18 + -period/slice are the normal EDF scheduling parameters in nanosecs 4.19 + -latency-hint is the scaled period in case the domain is doing heavy I/O 4.20 + (unused by the currently compiled version) 4.21 + -extra is a flag (0/1), which controls whether the domain can run in 4.22 + extra-time 4.23 + -weight is mutually exclusive with period/slice and specifies another 4.24 + way of setting a domains cpu slice 4.25 + 4.26 +Examples: 4.27 + normal EDF (20ms/5ms): 4.28 + xm sedf <dom-id> 20000000 5000000 0 0 0 4.29 + 4.30 + best-effort domains (i.e. non-realtime): 4.31 + xm sedf <dom-id> 20000000 0 0 1 0 4.32 + 4.33 + normal EDF (20ms/5ms) + share of extra-time: 4.34 + xm sedf <dom-id> 20000000 5000000 0 1 0 4.35 + 4.36 + 4 domains with weights 2:3:4:2 4.37 + xm sedf <d1> 0 0 0 0 2 4.38 + xm sedf <d2> 0 0 0 0 3 4.39 + xm sedf <d3> 0 0 0 0 4 4.40 + xm sedf <d4> 0 0 0 0 2 4.41 + 4.42 + 1 fully-specified (10ms/3ms) domain, 3 other domains share 4.43 + available rest in 2:7:3 ratio: 4.44 + xm sedf <d1> 10000000 3000000 0 0 0 4.45 + xm sedf <d2> 0 0 0 0 2 4.46 + xm sedf <d3> 0 0 0 0 7 4.47 + xm sedf <d4> 0 0 0 0 3 4.48 \ No newline at end of file
5.1 --- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/clock.c Tue May 03 14:42:00 2005 +0000 5.2 +++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/clock.c Tue May 10 08:49:59 2005 +0000 5.3 @@ -87,6 +87,12 @@ 5.4 5.5 /* XEN specific defines */ 5.6 #include <machine/xen_intr.h> 5.7 +#include <vm/vm.h> /* needed by machine/pmap.h */ 5.8 +#include <vm/pmap.h> /* needed by machine/pmap.h */ 5.9 +#include <machine/pmap.h> /* needed by xen-os.h */ 5.10 +#include <machine/hypervisor-ifs.h> 5.11 +#include <machine/xen-os.h> /* needed by xenfunc.h */ 5.12 +#include <machine/xenfunc.h> 5.13 5.14 /* 5.15 * 32-bit time_t's can't reach leap years before 1904 or after 2036, so we 5.16 @@ -129,7 +135,15 @@ static uint64_t shadow_system_time; 5.17 static uint32_t shadow_time_version; 5.18 static struct timeval shadow_tv; 5.19 5.20 +#define DEFINE_PER_CPU(type, name) \ 5.21 + __typeof__(type) per_cpu__##name 5.22 + 5.23 +#define per_cpu(var, cpu) (*((void)cpu, &per_cpu__##var)) 5.24 + 5.25 + 5.26 static uint64_t processed_system_time;/* System time (ns) at last processing. */ 5.27 +static DEFINE_PER_CPU(uint64_t, processed_system_time); 5.28 + 5.29 5.30 #define NS_PER_TICK (1000000000ULL/hz) 5.31 5.32 @@ -202,18 +216,19 @@ static struct timecounter xen_timecounte 5.33 static void 5.34 clkintr(struct clockframe *frame) 5.35 { 5.36 - int64_t delta; 5.37 + int64_t cpu_delta, delta; 5.38 + int cpu = smp_processor_id(); 5.39 long ticks = 0; 5.40 5.41 - 5.42 do { 5.43 __get_time_values_from_xen(); 5.44 - delta = (int64_t)(shadow_system_time + 5.45 - xen_get_offset() * 1000 - 5.46 - processed_system_time); 5.47 + delta = cpu_delta = (int64_t)shadow_system_time + 5.48 + (int64_t)xen_get_offset() * 1000; 5.49 + delta -= processed_system_time; 5.50 + cpu_delta -= per_cpu(processed_system_time, cpu); 5.51 } while (!TIME_VALUES_UP_TO_DATE); 5.52 5.53 - if (unlikely(delta < 0)) { 5.54 + if (unlikely(delta < 0) || unlikely(cpu_delta < 0)) { 5.55 printk("Timer ISR: Time went backwards: %lld\n", delta); 5.56 return; 5.57 } 5.58 @@ -225,15 +240,28 @@ clkintr(struct clockframe *frame) 5.59 delta -= NS_PER_TICK; 5.60 processed_system_time += NS_PER_TICK; 5.61 } 5.62 - 5.63 - if (ticks > 0) { 5.64 - if (frame) 5.65 - timer_func(frame); 5.66 -#ifdef SMP 5.67 - if (timer_func == hardclock && frame) 5.68 - forward_hardclock(); 5.69 + /* Local CPU jiffy work. */ 5.70 + while (cpu_delta >= NS_PER_TICK) { 5.71 + cpu_delta -= NS_PER_TICK; 5.72 + per_cpu(processed_system_time, cpu) += NS_PER_TICK; 5.73 +#if 0 5.74 + update_process_times(user_mode(regs)); 5.75 + profile_tick(CPU_PROFILING, regs); 5.76 #endif 5.77 } 5.78 + if (ticks > 0) { 5.79 + if (frame) timer_func(frame); 5.80 + } 5.81 + 5.82 + if (cpu != 0) 5.83 + return; 5.84 + /* 5.85 + * Take synchronised time from Xen once a minute if we're not 5.86 + * synchronised ourselves, and we haven't chosen to keep an independent 5.87 + * time base. 5.88 + */ 5.89 + 5.90 + /* XXX TODO */ 5.91 } 5.92 5.93 #include "opt_ddb.h" 5.94 @@ -429,7 +457,7 @@ resettodr() 5.95 * Start clocks running. 5.96 */ 5.97 void 5.98 -cpu_initclocks() 5.99 +cpu_initclocks(void) 5.100 { 5.101 int diag; 5.102 int time_irq = bind_virq_to_irq(VIRQ_TIMER); 5.103 @@ -445,8 +473,26 @@ cpu_initclocks() 5.104 /* initialize xen values */ 5.105 __get_time_values_from_xen(); 5.106 processed_system_time = shadow_system_time; 5.107 + per_cpu(processed_system_time, 0) = processed_system_time; 5.108 + 5.109 } 5.110 5.111 +#ifdef SMP 5.112 +void 5.113 +ap_cpu_initclocks(void) 5.114 +{ 5.115 + int irq; 5.116 + int cpu = smp_processor_id(); 5.117 + 5.118 + per_cpu(processed_system_time, cpu) = shadow_system_time; 5.119 + 5.120 + irq = bind_virq_to_irq(VIRQ_TIMER); 5.121 + PCPU_SET(time_irq, irq); 5.122 + PANIC_IF(intr_add_handler("clk", irq, (driver_intr_t *)clkintr, 5.123 + NULL, INTR_TYPE_CLK | INTR_FAST, NULL)); 5.124 +} 5.125 +#endif 5.126 + 5.127 void 5.128 cpu_startprofclock(void) 5.129 {
6.1 --- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/evtchn.c Tue May 03 14:42:00 2005 +0000 6.2 +++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/evtchn.c Tue May 10 08:49:59 2005 +0000 6.3 @@ -79,9 +79,14 @@ evtchn_do_upcall(struct intrframe *frame 6.4 l2 &= ~(1 << l2i); 6.5 6.6 port = (l1i << 5) + l2i; 6.7 + irq = evtchn_to_irq[port]; 6.8 +#ifdef SMP 6.9 + if (irq == PCPU_GET(cpuast)) 6.10 + continue; 6.11 +#endif 6.12 if ( (owned = mtx_owned(&sched_lock)) != 0 ) 6.13 mtx_unlock_spin_flags(&sched_lock, MTX_QUIET); 6.14 - if ( (irq = evtchn_to_irq[port]) != -1 ) { 6.15 + if ( irq != -1 ) { 6.16 struct intsrc *isrc = intr_lookup_source(irq); 6.17 intr_execute_handlers(isrc, frame); 6.18 } else { 6.19 @@ -584,6 +589,7 @@ ap_evtchn_init(int cpu) 6.20 PCPU_GET(virq_to_irq)[i] = -1; 6.21 } 6.22 6.23 + 6.24 static void 6.25 evtchn_init(void *dummy __unused) 6.26 { 6.27 @@ -591,13 +597,6 @@ evtchn_init(void *dummy __unused) 6.28 struct xenpic *xp; 6.29 struct xenpic_intsrc *pin; 6.30 6.31 - /* 6.32 - * xenpic_lock: in order to allow an interrupt to occur in a critical 6.33 - * section, to set pcpu->ipending (etc...) properly, we 6.34 - * must be able to get the icu lock, so it can't be 6.35 - * under witness. 6.36 - */ 6.37 - mtx_init(&irq_mapping_update_lock, "xp", NULL, MTX_DEF); 6.38 6.39 /* XXX -- expedience hack */ 6.40 PCPU_SET(virq_to_irq, (int *)&virq_to_irq[0]); 6.41 @@ -657,3 +656,11 @@ evtchn_init(void *dummy __unused) 6.42 } 6.43 6.44 SYSINIT(evtchn_init, SI_SUB_INTR, SI_ORDER_ANY, evtchn_init, NULL); 6.45 + /* 6.46 + * xenpic_lock: in order to allow an interrupt to occur in a critical 6.47 + * section, to set pcpu->ipending (etc...) properly, we 6.48 + * must be able to get the icu lock, so it can't be 6.49 + * under witness. 6.50 + */ 6.51 + 6.52 +MTX_SYSINIT(irq_mapping_update_lock, &irq_mapping_update_lock, "xp", MTX_DEF|MTX_NOWITNESS);
7.1 --- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/machdep.c Tue May 03 14:42:00 2005 +0000 7.2 +++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/machdep.c Tue May 10 08:49:59 2005 +0000 7.3 @@ -78,6 +78,7 @@ 7.4 #include <sys/sched.h> 7.5 #include <sys/sysent.h> 7.6 #include <sys/sysctl.h> 7.7 +#include <sys/smp.h> 7.8 #include <sys/ucontext.h> 7.9 #include <sys/vmmeter.h> 7.10 #include <sys/bus.h> 7.11 @@ -883,14 +884,6 @@ SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_ 7.12 static void 7.13 cpu_idle_default(void) 7.14 { 7.15 -#if 0 7.16 - /* 7.17 - * we must absolutely guarentee that hlt is the 7.18 - * absolute next instruction after sti or we 7.19 - * introduce a timing window. 7.20 - */ 7.21 - __asm __volatile("sti; hlt"); 7.22 -#endif 7.23 idle_block(); 7.24 enable_intr(); 7.25 } 7.26 @@ -1376,6 +1369,7 @@ pteinfo_t *pteinfo_list; 7.27 unsigned long *xen_machine_phys = ((unsigned long *)VADDR(1008, 0)); 7.28 int preemptable; 7.29 int gdt_set; 7.30 +static int ncpus; 7.31 7.32 /* Linux infection */ 7.33 #define PAGE_OFFSET KERNBASE 7.34 @@ -1387,6 +1381,10 @@ initvalues(start_info_t *startinfo) 7.35 int i; 7.36 vm_paddr_t pdir_shadow_ma, KPTphys; 7.37 vm_offset_t *pdir_shadow; 7.38 +#ifdef SMP 7.39 + int j; 7.40 +#endif 7.41 + 7.42 #ifdef WRITABLE_PAGETABLES 7.43 printk("using writable pagetables\n"); 7.44 HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables); 7.45 @@ -1447,18 +1445,19 @@ initvalues(start_info_t *startinfo) 7.46 7.47 7.48 #ifdef SMP 7.49 +#if 0 7.50 /* allocate cpu0 private page */ 7.51 cpu0prvpage = (KERNBASE + (tmpindex << PAGE_SHIFT)); 7.52 tmpindex++; 7.53 - 7.54 +#endif 7.55 /* allocate SMP page table */ 7.56 SMPpt = (unsigned long *)(KERNBASE + (tmpindex << PAGE_SHIFT)); 7.57 - 7.58 +#if 0 7.59 /* Map the private page into the SMP page table */ 7.60 SMPpt[0] = vtomach(cpu0prvpage) | PG_RW | PG_M | PG_V | PG_A; 7.61 - 7.62 +#endif 7.63 /* map SMP page table RO */ 7.64 - PT_SET_MA(SMPpt, vtomach(SMPpt) & ~PG_RW); 7.65 + PT_SET_MA(SMPpt, *vtopte((vm_offset_t)SMPpt) & ~PG_RW); 7.66 7.67 /* put the page table into the page directory */ 7.68 xen_queue_pt_update((vm_paddr_t)(IdlePTD + MPPTDI), 7.69 @@ -1496,44 +1495,61 @@ initvalues(start_info_t *startinfo) 7.70 tmpindex++; 7.71 7.72 HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list = (unsigned long)xen_phys_machine; 7.73 + ncpus = HYPERVISOR_shared_info->n_vcpu; 7.74 +#ifdef SMP 7.75 + for (i = 0; i < ncpus; i++) { 7.76 + int npages = (sizeof(struct privatespace) + 1)/PAGE_SIZE; 7.77 + for (j = 0; j < npages; j++) { 7.78 + vm_paddr_t ma = xpmap_ptom(tmpindex << PAGE_SHIFT); 7.79 + tmpindex++; 7.80 + PT_SET_VA_MA(SMPpt + i*npages + j, ma | PG_A | PG_V | PG_RW | PG_M, FALSE); 7.81 + } 7.82 + } 7.83 + xen_flush_queue(); 7.84 +#endif 7.85 7.86 init_first = tmpindex; 7.87 7.88 } 7.89 7.90 + 7.91 +trap_info_t trap_table[] = { 7.92 + { 0, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(div)}, 7.93 + { 1, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dbg)}, 7.94 + { 3, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bpt)}, 7.95 + { 4, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ofl)}, 7.96 + /* This is UPL on Linux and KPL on BSD */ 7.97 + { 5, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bnd)}, 7.98 + { 6, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ill)}, 7.99 + { 7, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dna)}, 7.100 + /* 7.101 + * { 8, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(XXX)}, 7.102 + * no handler for double fault 7.103 + */ 7.104 + { 9, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpusegm)}, 7.105 + {10, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(tss)}, 7.106 + {11, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(missing)}, 7.107 + {12, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(stk)}, 7.108 + {13, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(prot)}, 7.109 + {14, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(page)}, 7.110 + {15, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(rsvd)}, 7.111 + {16, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpu)}, 7.112 + {17, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(align)}, 7.113 + {18, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(mchk)}, 7.114 + {19, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(xmm)}, 7.115 + {0x80, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(int0x80_syscall)}, 7.116 + { 0, 0, 0, 0 } 7.117 +}; 7.118 + 7.119 void 7.120 init386(void) 7.121 { 7.122 int gsel_tss, metadata_missing, off, x, error; 7.123 struct pcpu *pc; 7.124 unsigned long gdtmachpfn; 7.125 - trap_info_t trap_table[] = { 7.126 - { 0, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(div)}, 7.127 - { 1, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dbg)}, 7.128 - { 3, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bpt)}, 7.129 - { 4, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ofl)}, 7.130 - /* This is UPL on Linux and KPL on BSD */ 7.131 - { 5, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bnd)}, 7.132 - { 6, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ill)}, 7.133 - { 7, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dna)}, 7.134 - /* 7.135 - * { 8, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(XXX)}, 7.136 - * no handler for double fault 7.137 - */ 7.138 - { 9, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpusegm)}, 7.139 - {10, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(tss)}, 7.140 - {11, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(missing)}, 7.141 - {12, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(stk)}, 7.142 - {13, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(prot)}, 7.143 - {14, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(page)}, 7.144 - {15, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(rsvd)}, 7.145 - {16, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpu)}, 7.146 - {17, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(align)}, 7.147 - {18, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(mchk)}, 7.148 - {19, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(xmm)}, 7.149 - {0x80, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(int0x80_syscall)}, 7.150 - { 0, 0, 0, 0 } 7.151 - }; 7.152 +#ifdef SMP 7.153 + int i; 7.154 +#endif 7.155 proc0.p_uarea = proc0uarea; 7.156 thread0.td_kstack = proc0kstack; 7.157 thread0.td_pcb = (struct pcb *) 7.158 @@ -1583,26 +1599,42 @@ init386(void) 7.159 gdt_segs[GDATA_SEL].ssd_limit = atop(0 - ((1 << 26) - (1 << 22) + (1 << 16))); 7.160 #endif 7.161 #ifdef SMP 7.162 - /* this correspond to the cpu private page as mapped into the SMP page 7.163 - * table in initvalues 7.164 + /* XXX this will blow up if there are more than 512/NGDT vcpus - will never 7.165 + * be an issue in the real world but should add an assert on general principles 7.166 + * we'll likely blow up when we hit LAST_RESERVED_GDT_ENTRY, at which point we 7.167 + * would need to start allocating more pages for the GDT 7.168 */ 7.169 pc = &SMP_prvspace[0].pcpu; 7.170 - gdt_segs[GPRIV_SEL].ssd_limit = 7.171 - atop(sizeof(struct privatespace) - 1); 7.172 + for (i = 0; i < ncpus; i++) { 7.173 + cpu_add(i, (i == 0)); 7.174 + 7.175 + gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[i]; 7.176 + gdt_segs[GPRIV_SEL].ssd_limit = 7.177 + atop(sizeof(struct privatespace) - 1); 7.178 + gdt_segs[GPROC0_SEL].ssd_base = 7.179 + (int) &SMP_prvspace[i].pcpu.pc_common_tss; 7.180 + SMP_prvspace[i].pcpu.pc_prvspace = 7.181 + &SMP_prvspace[i].pcpu; 7.182 + 7.183 + for (x = 0; x < NGDT; x++) { 7.184 + ssdtosd(&gdt_segs[x], &gdt[i * NGDT + x].sd); 7.185 + } 7.186 + } 7.187 #else 7.188 pc = &__pcpu; 7.189 gdt_segs[GPRIV_SEL].ssd_limit = 7.190 atop(sizeof(struct pcpu) - 1); 7.191 -#endif 7.192 gdt_segs[GPRIV_SEL].ssd_base = (int) pc; 7.193 gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss; 7.194 for (x = 0; x < NGDT; x++) 7.195 ssdtosd(&gdt_segs[x], &gdt[x].sd); 7.196 +#endif 7.197 + 7.198 7.199 PT_SET_MA(gdt, *vtopte((unsigned long)gdt) & ~PG_RW); 7.200 gdtmachpfn = vtomach(gdt) >> PAGE_SHIFT; 7.201 - if ((error = HYPERVISOR_set_gdt(&gdtmachpfn, LAST_RESERVED_GDT_ENTRY + 1))) 7.202 - panic("set_gdt failed"); 7.203 + PANIC_IF(HYPERVISOR_set_gdt(&gdtmachpfn, LAST_RESERVED_GDT_ENTRY + 1) != 0); 7.204 + 7.205 7.206 lgdt_finish(); 7.207 gdt_set = 1;
8.1 --- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/mp_machdep.c Tue May 03 14:42:00 2005 +0000 8.2 +++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/mp_machdep.c Tue May 10 08:49:59 2005 +0000 8.3 @@ -83,7 +83,16 @@ 8.4 #include <machine/specialreg.h> 8.5 #include <machine/privatespace.h> 8.6 8.7 + 8.8 +/* XEN includes */ 8.9 #include <machine/xenfunc.h> 8.10 +#include <machine/xen_intr.h> 8.11 + 8.12 +void Xhypervisor_callback(void); 8.13 +void failsafe_callback(void); 8.14 + 8.15 +/***************/ 8.16 + 8.17 8.18 #define WARMBOOT_TARGET 0 8.19 #define WARMBOOT_OFF (KERNBASE + 0x0467) 8.20 @@ -94,6 +103,10 @@ 8.21 #define BIOS_RESET (0x0f) 8.22 #define BIOS_WARM (0x0a) 8.23 8.24 + 8.25 +#undef POSTCODE 8.26 +#define POSTCODE(x) 8.27 + 8.28 /* 8.29 * this code MUST be enabled here and in mpboot.s. 8.30 * it follows the very early stages of AP boot by placing values in CMOS ram. 8.31 @@ -175,6 +188,8 @@ extern pt_entry_t *KPTphys; 8.32 /* SMP page table page */ 8.33 extern pt_entry_t *SMPpt; 8.34 8.35 +extern trap_info_t trap_table[]; 8.36 + 8.37 struct pcb stoppcbs[MAXCPU]; 8.38 8.39 /* Variables needed for SMP tlb shootdown. */ 8.40 @@ -208,7 +223,9 @@ static u_int boot_address; 8.41 8.42 static void set_logical_apic_ids(void); 8.43 static int start_all_aps(void); 8.44 +#if 0 8.45 static void install_ap_tramp(void); 8.46 +#endif 8.47 static int start_ap(int apic_id); 8.48 static void release_aps(void *dummy); 8.49 8.50 @@ -314,6 +331,7 @@ int 8.51 cpu_mp_probe(void) 8.52 { 8.53 8.54 + mp_ncpus = HYPERVISOR_shared_info->n_vcpu; 8.55 /* 8.56 * Always record BSP in CPU map so that the mbuf init code works 8.57 * correctly. 8.58 @@ -342,20 +360,24 @@ cpu_mp_probe(void) 8.59 return (1); 8.60 } 8.61 8.62 -/* 8.63 - * Initialize the IPI handlers and start up the AP's. 8.64 - */ 8.65 -void 8.66 -cpu_mp_start(void) 8.67 +static void 8.68 +cpu_mp_ipi_init(void) 8.69 { 8.70 - int i; 8.71 - 8.72 - POSTCODE(MP_START_POST); 8.73 - 8.74 - /* Initialize the logical ID to APIC ID table. */ 8.75 - for (i = 0; i < MAXCPU; i++) 8.76 - cpu_apic_ids[i] = -1; 8.77 - 8.78 + int irq; 8.79 + int cpu = smp_processor_id(); 8.80 + /* 8.81 + * these are not needed by XenFreeBSD - from Keir: 8.82 + * For TLB-flush related IPIs, Xen has hypercalls 8.83 + * you should use instead. You can pass a pointer 8.84 + * to a vcpu bitmap to update_va_mapping(), and to 8.85 + * MMUEXT_flush_tlb_multi and MMEXT_invlpg_multi. 8.86 + * Xen will then make sure that those vcpus get 8.87 + * flushed appropriately before returning to the 8.88 + * caller. 8.89 + * There is also no indication that we need to forward 8.90 + * clock interrupts. 8.91 + */ 8.92 +#if 0 8.93 /* Install an inter-CPU IPI for TLB invalidation */ 8.94 setidt(IPI_INVLTLB, IDTVEC(invltlb), 8.95 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 8.96 @@ -371,22 +393,69 @@ cpu_mp_start(void) 8.97 /* Install an inter-CPU IPI for forwarding statclock() */ 8.98 setidt(IPI_STATCLOCK, IDTVEC(statclock), 8.99 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 8.100 - 8.101 +#endif 8.102 + 8.103 + /* 8.104 + * These can all be consolidated. For now leaving 8.105 + * as individual IPIs. 8.106 + * 8.107 + */ 8.108 +#if 0 8.109 /* Install an inter-CPU IPI for lazy pmap release */ 8.110 setidt(IPI_LAZYPMAP, IDTVEC(lazypmap), 8.111 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 8.112 +#else 8.113 + irq = bind_ipi_on_cpu_to_irq(cpu, IPI_LAZYPMAP); 8.114 + PCPU_SET(lazypmap, irq); 8.115 + PANIC_IF(intr_add_handler("pmap_lazyfix", irq, 8.116 + (driver_intr_t *)pmap_lazyfix_action, 8.117 + NULL, INTR_TYPE_CLK | INTR_FAST, NULL)); 8.118 +#endif 8.119 8.120 +#if 0 8.121 /* Install an inter-CPU IPI for all-CPU rendezvous */ 8.122 setidt(IPI_RENDEZVOUS, IDTVEC(rendezvous), 8.123 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 8.124 +#else 8.125 + irq = bind_ipi_on_cpu_to_irq(cpu, IPI_RENDEZVOUS); 8.126 + PCPU_SET(rendezvous, irq); 8.127 + PANIC_IF(intr_add_handler("smp_rendezvous", irq, 8.128 + (driver_intr_t *)smp_rendezvous_action, 8.129 + NULL, INTR_TYPE_CLK | INTR_FAST, NULL)); 8.130 +#endif 8.131 8.132 +#if 0 8.133 /* Install an inter-CPU IPI for forcing an additional software trap */ 8.134 setidt(IPI_AST, IDTVEC(cpuast), 8.135 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 8.136 - 8.137 +#else 8.138 + irq = bind_ipi_on_cpu_to_irq(cpu, IPI_AST); 8.139 + PCPU_SET(cpuast, irq); 8.140 +#endif 8.141 + /* XXX ignore for now */ 8.142 +#if 0 8.143 /* Install an inter-CPU IPI for CPU stop/restart */ 8.144 setidt(IPI_STOP, IDTVEC(cpustop), 8.145 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 8.146 +#endif 8.147 + 8.148 +} 8.149 + 8.150 +SYSINIT(ipi_setup, SI_SUB_INTR, SI_ORDER_ANY, cpu_mp_ipi_init, NULL); 8.151 + 8.152 +/* 8.153 + * Initialize the IPI handlers and start up the AP's. 8.154 + */ 8.155 +void 8.156 +cpu_mp_start(void) /* --- Start here --- */ 8.157 +{ 8.158 + int i; 8.159 + 8.160 + POSTCODE(MP_START_POST); 8.161 + 8.162 + /* Initialize the logical ID to APIC ID table. */ 8.163 + for (i = 0; i < MAXCPU; i++) 8.164 + cpu_apic_ids[i] = -1; 8.165 8.166 8.167 /* Set boot_cpu_id if needed. */ 8.168 @@ -437,35 +506,44 @@ cpu_mp_announce(void) 8.169 void 8.170 init_secondary(void) 8.171 { 8.172 - int gsel_tss; 8.173 - int x, myid; 8.174 + int myid; 8.175 + unsigned long gdtmachpfn; 8.176 + printk("MADE IT!!"); 8.177 + 8.178 #if 0 8.179 u_int cr0; 8.180 #endif 8.181 + /* Steps to booting SMP on xen as gleaned from XenLinux: 8.182 + * - cpu_init() - processor specific initialization 8.183 + * - smp_callin() 8.184 + * - wait 2s for BP to finish its startup sequence 8.185 + * - map_cpu_to_logical_apicid() 8.186 + * - save cpuid info 8.187 + * - set bit in callin map to let master (BP?) continue 8.188 + * - local setup timer() - per cpu timer initialization 8.189 + * - ldebug_setup() - bind debug IRQ to local CPU. 8.190 + * - smp_intr_init() - IPI setup that we do in cpu_mp_start 8.191 + * - local_irq_enable() - enable interrupts locally 8.192 + * - cpu_set(id, map) - announce that we're up 8.193 + * - cpu_idle() - make us schedulable 8.194 + */ 8.195 + 8.196 + 8.197 /* bootAP is set in start_ap() to our ID. */ 8.198 myid = bootAP; 8.199 - gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[myid]; 8.200 - gdt_segs[GPROC0_SEL].ssd_base = 8.201 - (int) &SMP_prvspace[myid].pcpu.pc_common_tss; 8.202 - SMP_prvspace[myid].pcpu.pc_prvspace = 8.203 - &SMP_prvspace[myid].pcpu; 8.204 + 8.205 + gdtmachpfn = vtomach(gdt) >> PAGE_SHIFT; 8.206 + PANIC_IF(HYPERVISOR_set_gdt(&gdtmachpfn, LAST_RESERVED_GDT_ENTRY + 1) != 0); 8.207 8.208 - for (x = 0; x < NGDT; x++) { 8.209 - ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd); 8.210 - } 8.211 + 8.212 + lgdt_finish(); 8.213 8.214 -#if 0 8.215 - r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; 8.216 - r_gdt.rd_base = (int) &gdt[myid * NGDT]; 8.217 - lgdt(&r_gdt); /* does magic intra-segment return */ 8.218 + PCPU_SET(cpuid, myid); 8.219 8.220 - lidt(&r_idt); 8.221 - lldt(_default_ldt); 8.222 -#endif 8.223 + 8.224 + set_user_ldt((struct mdproc *)_default_ldt); 8.225 PCPU_SET(currentldt, _default_ldt); 8.226 8.227 - gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); 8.228 - gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS; 8.229 PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */ 8.230 PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL)); 8.231 PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16); 8.232 @@ -557,6 +635,13 @@ init_secondary(void) 8.233 while (smp_started == 0) 8.234 ia32_pause(); 8.235 8.236 + /* need to wait until now to setup the IPIs as SI_SUB_CPU is 8.237 + * much earlier than SI_SUB_INTR 8.238 + */ 8.239 + ap_evtchn_init(myid); 8.240 + ap_cpu_initclocks(); 8.241 + cpu_mp_ipi_init(); 8.242 + 8.243 /* ok, now grab sched_lock and enter the scheduler */ 8.244 mtx_lock_spin(&sched_lock); 8.245 8.246 @@ -610,28 +695,35 @@ set_logical_apic_ids(void) 8.247 static int 8.248 start_all_aps(void) 8.249 { 8.250 -#ifndef PC98 8.251 - u_char mpbiosreason; 8.252 -#endif 8.253 - u_long mpbioswarmvec; 8.254 struct pcpu *pc; 8.255 char *stack; 8.256 - uintptr_t kptbase; 8.257 - int i, pg, apic_id, cpu; 8.258 + int i, apic_id, cpu; 8.259 + 8.260 + /* 8.261 + * This function corresponds most closely to 8.262 + * smp_boot_cpus in XenLinux - the sequence there 8.263 + * is: 8.264 + * - check if SMP config is found - if not: 8.265 + * - clear the I/O APIC IRQs 8.266 + * - map cpu to logical apicid 8.267 + * - exit 8.268 + * - smp_intr_init - IPI initialization 8.269 + * - map cpu to logical apicid 8.270 + * - boot each of the vcpus 8.271 + * - clear and then construct the cpu sibling [logical CPUs] map. 8.272 + * 8.273 + */ 8.274 8.275 POSTCODE(START_ALL_APS_POST); 8.276 8.277 mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN); 8.278 - 8.279 +#if 0 8.280 /* install the AP 1st level boot code */ 8.281 install_ap_tramp(); 8.282 8.283 /* save the current value of the warm-start vector */ 8.284 mpbioswarmvec = *((u_long *) WARMBOOT_OFF); 8.285 -#ifndef PC98 8.286 - outb(CMOS_REG, BIOS_RESET); 8.287 - mpbiosreason = inb(CMOS_DATA); 8.288 -#endif 8.289 + 8.290 8.291 /* set up temporary P==V mapping for AP boot */ 8.292 /* XXX this is a hack, we should boot the AP on its own stack/PTD */ 8.293 @@ -640,7 +732,7 @@ start_all_aps(void) 8.294 PTD[i] = (pd_entry_t)(PG_V | PG_RW | 8.295 ((kptbase + i * PAGE_SIZE) & PG_FRAME)); 8.296 invltlb(); 8.297 - 8.298 +#endif 8.299 /* start each AP */ 8.300 for (cpu = 0, apic_id = 0; apic_id < MAXCPU; apic_id++) { 8.301 if (!cpu_info[apic_id].cpu_present || 8.302 @@ -650,7 +742,7 @@ start_all_aps(void) 8.303 8.304 /* save APIC ID for this logical ID */ 8.305 cpu_apic_ids[cpu] = apic_id; 8.306 - 8.307 +#if 0 8.308 /* first page of AP's private space */ 8.309 pg = cpu * i386_btop(sizeof(struct privatespace)); 8.310 8.311 @@ -665,11 +757,14 @@ start_all_aps(void) 8.312 for (i = 0; i < KSTACK_PAGES; i++) 8.313 SMPpt[pg + 1 + i] = (pt_entry_t) 8.314 (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack)); 8.315 +#endif 8.316 + pc = &SMP_prvspace[cpu].pcpu; 8.317 8.318 /* prime data page for it to use */ 8.319 pcpu_init(pc, cpu, sizeof(struct pcpu)); 8.320 pc->pc_apic_id = apic_id; 8.321 8.322 +#if 0 8.323 /* setup a vector to our boot code */ 8.324 *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET; 8.325 *((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4); 8.326 @@ -677,7 +772,7 @@ start_all_aps(void) 8.327 outb(CMOS_REG, BIOS_RESET); 8.328 outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */ 8.329 #endif 8.330 - 8.331 +#endif 8.332 bootSTK = &SMP_prvspace[cpu].idlekstack[KSTACK_PAGES * 8.333 PAGE_SIZE]; 8.334 bootAP = cpu; 8.335 @@ -700,13 +795,10 @@ start_all_aps(void) 8.336 /* build our map of 'other' CPUs */ 8.337 PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); 8.338 8.339 +#if 0 8.340 /* restore the warmstart vector */ 8.341 *(u_long *) WARMBOOT_OFF = mpbioswarmvec; 8.342 -#ifndef PC98 8.343 - outb(CMOS_REG, BIOS_RESET); 8.344 - outb(CMOS_DATA, mpbiosreason); 8.345 #endif 8.346 - 8.347 /* 8.348 * Set up the idle context for the BSP. Similar to above except 8.349 * that some was done by locore, some by pmap.c and some is implicit 8.350 @@ -739,7 +831,7 @@ extern void bootDataSeg(void); 8.351 extern void MPentry(void); 8.352 extern u_int MP_GDT; 8.353 extern u_int mp_gdtbase; 8.354 - 8.355 +#if 0 8.356 static void 8.357 install_ap_tramp(void) 8.358 { 8.359 @@ -791,6 +883,21 @@ install_ap_tramp(void) 8.360 *dst16 = (u_int) boot_address & 0xffff; 8.361 *dst8 = ((u_int) boot_address >> 16) & 0xff; 8.362 } 8.363 +#endif 8.364 + 8.365 +static int 8.366 +cpu_mp_trap_init(trap_info_t *trap_ctxt) 8.367 +{ 8.368 + 8.369 + trap_info_t *t = trap_table; 8.370 + 8.371 + for (t = trap_table; t->address; t++) { 8.372 + trap_ctxt[t->vector].flags = t->flags; 8.373 + trap_ctxt[t->vector].cs = t->cs; 8.374 + trap_ctxt[t->vector].address = t->address; 8.375 + } 8.376 + return 0x80 /*SYSCALL_VECTOR*/; 8.377 +} 8.378 8.379 /* 8.380 * This function starts the AP (application processor) identified 8.381 @@ -802,8 +909,25 @@ install_ap_tramp(void) 8.382 static int 8.383 start_ap(int apic_id) 8.384 { 8.385 - int vector, ms; 8.386 - int cpus; 8.387 + int vector, ms, i; 8.388 + int cpus, boot_error; 8.389 + vcpu_guest_context_t ctxt; 8.390 + 8.391 + /* 8.392 + * This is the FreeBSD equivalent to do_boot_cpu(apicid) in 8.393 + * smpboot.c. 8.394 + * its initialization sequence consists of: 8.395 + * - fork_idle(cpu) to create separate idle context 8.396 + * - initialization of idle's context to start_secondary 8.397 + * - initialization of cpu ctxt to start in startup_32_smp 8.398 + * - then we call HYPERVISOR_boot_vcpu with the cpu index and 8.399 + * a pointer to the context. 8.400 + * - on boot success we: 8.401 + * - set ourselves in the callout_map 8.402 + * - wait up to 5 seconds for us to be set in the callin map 8.403 + * - set x86_cpu_to_apicid[cpu] = apicid; 8.404 + * 8.405 + */ 8.406 8.407 POSTCODE(START_AP_POST); 8.408 8.409 @@ -813,6 +937,55 @@ start_ap(int apic_id) 8.410 /* used as a watchpoint to signal AP startup */ 8.411 cpus = mp_naps; 8.412 8.413 + memset(&ctxt, 0, sizeof(ctxt)); 8.414 + 8.415 + ctxt.user_regs.ds = GSEL(GDATA_SEL, SEL_KPL); 8.416 + ctxt.user_regs.es = GSEL(GDATA_SEL, SEL_KPL); 8.417 + ctxt.user_regs.fs = 0; 8.418 + ctxt.user_regs.gs = 0; 8.419 + ctxt.user_regs.ss = __KERNEL_DS; 8.420 + ctxt.user_regs.cs = __KERNEL_CS; 8.421 + ctxt.user_regs.eip = (unsigned long)init_secondary; 8.422 + ctxt.user_regs.esp = (unsigned long)bootSTK; 8.423 +#ifdef notyet 8.424 + ctxt.user_regs.eflags = (1<<9) | (1<<2) | (idle->thread.io_pl<<12); 8.425 +#else 8.426 + ctxt.user_regs.eflags = (1<<9) | (1<<2); 8.427 +#endif 8.428 + /* FPU is set up to default initial state. */ 8.429 + memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt)); 8.430 + 8.431 + /* Virtual IDT is empty at start-of-day. */ 8.432 + for ( i = 0; i < 256; i++ ) 8.433 + { 8.434 + ctxt.trap_ctxt[i].vector = i; 8.435 + ctxt.trap_ctxt[i].cs = FLAT_KERNEL_CS; 8.436 + } 8.437 + ctxt.fast_trap_idx = cpu_mp_trap_init(ctxt.trap_ctxt); 8.438 + 8.439 + /* No LDT. */ 8.440 + ctxt.ldt_ents = 0; 8.441 + 8.442 + /* Ring 1 stack is the initial stack. */ 8.443 + ctxt.kernel_ss = __KERNEL_DS; 8.444 + ctxt.kernel_sp = (unsigned long)bootSTK; 8.445 + 8.446 + /* Callback handlers. */ 8.447 + ctxt.event_callback_cs = __KERNEL_CS; 8.448 + ctxt.event_callback_eip = (unsigned long)Xhypervisor_callback; 8.449 + ctxt.failsafe_callback_cs = __KERNEL_CS; 8.450 + ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback; 8.451 + 8.452 + ctxt.pt_base = (vm_paddr_t)IdlePTD; 8.453 + 8.454 + boot_error = HYPERVISOR_boot_vcpu(bootAP, &ctxt); 8.455 + 8.456 + 8.457 + if (boot_error) 8.458 + printk("Houston we have a problem\n"); 8.459 + else 8.460 + printk("boot_vcpu succeeded\n"); 8.461 +#if 0 8.462 /* 8.463 * first we do an INIT/RESET IPI this INIT IPI might be run, reseting 8.464 * and running the target CPU. OR this INIT IPI might be latched (P5 8.465 @@ -862,6 +1035,7 @@ start_ap(int apic_id) 8.466 APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP | 8.467 vector, apic_id); 8.468 lapic_ipi_wait(-1); 8.469 +#endif 8.470 DELAY(200); /* wait ~200uS */ 8.471 8.472 /* Wait up to 5 seconds for it to start. */
9.1 --- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/pmap.c Tue May 03 14:42:00 2005 +0000 9.2 +++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/pmap.c Tue May 10 08:49:59 2005 +0000 9.3 @@ -1374,7 +1374,6 @@ static u_int *lazymask; 9.4 static u_int lazyptd; 9.5 static volatile u_int lazywait; 9.6 9.7 -void pmap_lazyfix_action(void); 9.8 9.9 void 9.10 pmap_lazyfix_action(void)
10.1 --- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_machdep.c Tue May 03 14:42:00 2005 +0000 10.2 +++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_machdep.c Tue May 10 08:49:59 2005 +0000 10.3 @@ -380,8 +380,6 @@ printk(const char *fmt, ...) 10.4 (void)HYPERVISOR_console_write(buf, ret); 10.5 } 10.6 10.7 -#define PANIC_IF(exp) if (unlikely(exp)) {printk("%s failed\n",#exp); panic("%s: %s:%d", #exp, __FILE__, __LINE__);} 10.8 - 10.9 10.10 #define XPQUEUE_SIZE 128 10.11 #ifdef SMP
11.1 --- a/freebsd-5.3-xen-sparse/i386-xen/include/hypervisor.h Tue May 03 14:42:00 2005 +0000 11.2 +++ b/freebsd-5.3-xen-sparse/i386-xen/include/hypervisor.h Tue May 10 08:49:59 2005 +0000 11.3 @@ -441,4 +441,20 @@ HYPERVISOR_vm_assist(unsigned int cmd, u 11.4 return ret; 11.5 } 11.6 11.7 +static inline int 11.8 +HYPERVISOR_boot_vcpu( 11.9 + unsigned long vcpu, vcpu_guest_context_t *ctxt) 11.10 +{ 11.11 + int ret; 11.12 + unsigned long ign1, ign2; 11.13 + 11.14 + __asm__ __volatile__ ( 11.15 + TRAP_INSTR 11.16 + : "=a" (ret), "=b" (ign1), "=c" (ign2) 11.17 + : "0" (__HYPERVISOR_boot_vcpu), "1" (vcpu), "2" (ctxt) 11.18 + : "memory"); 11.19 + 11.20 + return ret; 11.21 +} 11.22 + 11.23 #endif /* __HYPERVISOR_H__ */
12.1 --- a/freebsd-5.3-xen-sparse/i386-xen/include/pcpu.h Tue May 03 14:42:00 2005 +0000 12.2 +++ b/freebsd-5.3-xen-sparse/i386-xen/include/pcpu.h Tue May 10 08:49:59 2005 +0000 12.3 @@ -53,7 +53,12 @@ 12.4 int *pc_ipi_to_evtchn; \ 12.5 int *pc_virq_to_irq; \ 12.6 u_int pc_cr2; \ 12.7 - u_int pc_pdir 12.8 + u_int pc_pdir; \ 12.9 + u_int pc_lazypmap; \ 12.10 + u_int pc_rendezvous; \ 12.11 + u_int pc_cpuast; \ 12.12 + u_int pc_time_irq; \ 12.13 + uint64_t pc_processed_system_time; 12.14 12.15 #if defined(lint) 12.16
13.1 --- a/freebsd-5.3-xen-sparse/i386-xen/include/pmap.h Tue May 03 14:42:00 2005 +0000 13.2 +++ b/freebsd-5.3-xen-sparse/i386-xen/include/pmap.h Tue May 10 08:49:59 2005 +0000 13.3 @@ -343,6 +343,7 @@ void pmap_set_pg(void); 13.4 void pmap_invalidate_page(pmap_t, vm_offset_t); 13.5 void pmap_invalidate_range(pmap_t, vm_offset_t, vm_offset_t); 13.6 void pmap_invalidate_all(pmap_t); 13.7 +void pmap_lazyfix_action(void); 13.8 13.9 void pmap_map_readonly(pmap_t pmap, vm_offset_t va, int len); 13.10 void pmap_map_readwrite(pmap_t pmap, vm_offset_t va, int len);
14.1 --- a/freebsd-5.3-xen-sparse/i386-xen/include/xenfunc.h Tue May 03 14:42:00 2005 +0000 14.2 +++ b/freebsd-5.3-xen-sparse/i386-xen/include/xenfunc.h Tue May 10 08:49:59 2005 +0000 14.3 @@ -61,6 +61,9 @@ void load_cr3(uint32_t val); 14.4 void xen_machphys_update(unsigned long, unsigned long); 14.5 void xen_update_descriptor(union descriptor *, union descriptor *); 14.6 void lldt(u_short sel); 14.7 +void ap_cpu_initclocks(void); 14.8 + 14.9 + 14.10 /* 14.11 * Invalidate a patricular VA on all cpus 14.12 * 14.13 @@ -79,5 +82,6 @@ invltlb(void) 14.14 14.15 } 14.16 14.17 +#define PANIC_IF(exp) if (unlikely(exp)) {printk("%s failed\n",#exp); panic("%s: %s:%d", #exp, __FILE__, __LINE__);} 14.18 14.19 #endif /* _XEN_XENFUNC_H_ */
15.1 --- a/linux-2.4.29-xen-sparse/mkbuildtree Tue May 03 14:42:00 2005 +0000 15.2 +++ b/linux-2.4.29-xen-sparse/mkbuildtree Tue May 10 08:49:59 2005 +0000 15.3 @@ -66,7 +66,7 @@ relative_lndir () 15.4 ( 15.5 cd $i 15.6 pref=`echo $i | sed -e 's#/[^/]*#../#g' -e 's#^\.##'` 15.7 - for j in `find . -type f -o -type l -maxdepth 1`; do 15.8 + for j in `find . -maxdepth 1 -type f -o -type l`; do 15.9 ln -sf ${pref}${REAL_DIR}/$i/$j ${SYMLINK_DIR}/$i/$j 15.10 done 15.11 ) 15.12 @@ -232,7 +232,6 @@ ln -sf ../../../${LINUX_26}/arch/xen/ker 15.13 ln -sf ../../../${LINUX_26}/arch/xen/kernel/gnttab.c 15.14 ln -sf ../../../${LINUX_26}/arch/xen/kernel/reboot.c 15.15 ln -sf ../../../${LINUX_26}/arch/xen/kernel/skbuff.c 15.16 -ln -sf ../../../${LINUX_26}/arch/xen/i386/kernel/pci-dma.c 15.17 15.18 cd ${AD}/arch/xen/lib 15.19 ln -sf ../../i386/lib/checksum.S
16.1 --- a/linux-2.6.11-xen-sparse/arch/xen/Kconfig.drivers Tue May 03 14:42:00 2005 +0000 16.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/Kconfig.drivers Tue May 10 08:49:59 2005 +0000 16.3 @@ -49,9 +49,46 @@ source "drivers/infiniband/Kconfig" 16.4 endif 16.5 16.6 if !XEN_PHYSDEV_ACCESS 16.7 + 16.8 +menu "Character devices" 16.9 + 16.10 config UNIX98_PTYS 16.11 bool 16.12 default y 16.13 + 16.14 +config LEGACY_PTYS 16.15 + bool "Legacy (BSD) PTY support" 16.16 + default y 16.17 + ---help--- 16.18 + A pseudo terminal (PTY) is a software device consisting of two 16.19 + halves: a master and a slave. The slave device behaves identical to 16.20 + a physical terminal; the master device is used by a process to 16.21 + read data from and write data to the slave, thereby emulating a 16.22 + terminal. Typical programs for the master side are telnet servers 16.23 + and xterms. 16.24 + 16.25 + Linux has traditionally used the BSD-like names /dev/ptyxx 16.26 + for masters and /dev/ttyxx for slaves of pseudo 16.27 + terminals. This scheme has a number of problems, including 16.28 + security. This option enables these legacy devices; on most 16.29 + systems, it is safe to say N. 16.30 + 16.31 + 16.32 +config LEGACY_PTY_COUNT 16.33 + int "Maximum number of legacy PTY in use" 16.34 + depends on LEGACY_PTYS 16.35 + range 1 256 16.36 + default "256" 16.37 + ---help--- 16.38 + The maximum number of legacy PTYs that can be used at any one time. 16.39 + The default is 256, and should be more than enough. Embedded 16.40 + systems may want to reduce this to save memory. 16.41 + 16.42 + When not in use, each legacy PTY occupies 12 bytes on 32-bit 16.43 + architectures and 24 bytes on 64-bit architectures. 16.44 + 16.45 +endmenu 16.46 + 16.47 endif 16.48 16.49 endmenu
17.1 --- a/linux-2.6.11-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32 Tue May 03 14:42:00 2005 +0000 17.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32 Tue May 10 08:49:59 2005 +0000 17.3 @@ -1,7 +1,7 @@ 17.4 # 17.5 # Automatically generated make config: don't edit 17.6 # Linux kernel version: 2.6.11-xen0 17.7 -# Tue May 3 13:25:48 2005 17.8 +# Wed May 4 17:11:56 2005 17.9 # 17.10 CONFIG_XEN=y 17.11 CONFIG_ARCH_XEN=y 17.12 @@ -146,7 +146,7 @@ CONFIG_PCI=y 17.13 # CONFIG_PCI_GOMMCONFIG is not set 17.14 # CONFIG_PCI_GODIRECT is not set 17.15 CONFIG_PCI_GOANY=y 17.16 -CONFIG_PCI_BIOS=y 17.17 +# CONFIG_PCI_BIOS is not set 17.18 CONFIG_PCI_DIRECT=y 17.19 # CONFIG_PCIEPORTBUS is not set 17.20 # CONFIG_PCI_MSI is not set 17.21 @@ -176,16 +176,22 @@ CONFIG_PCMCIA_PROBE=y 17.22 # Kernel hacking 17.23 # 17.24 CONFIG_DEBUG_KERNEL=y 17.25 +CONFIG_MAGIC_SYSRQ=y 17.26 +# CONFIG_SCHEDSTATS is not set 17.27 +# CONFIG_DEBUG_SLAB is not set 17.28 +# CONFIG_DEBUG_PREEMPT is not set 17.29 +# CONFIG_DEBUG_SPINLOCK is not set 17.30 +# CONFIG_DEBUG_SPINLOCK_SLEEP is not set 17.31 +# CONFIG_DEBUG_KOBJECT is not set 17.32 +CONFIG_DEBUG_BUGVERBOSE=y 17.33 +# CONFIG_DEBUG_INFO is not set 17.34 +# CONFIG_DEBUG_FS is not set 17.35 +# CONFIG_FRAME_POINTER is not set 17.36 CONFIG_EARLY_PRINTK=y 17.37 # CONFIG_DEBUG_STACKOVERFLOW is not set 17.38 +# CONFIG_KPROBES is not set 17.39 # CONFIG_DEBUG_STACK_USAGE is not set 17.40 -# CONFIG_DEBUG_SLAB is not set 17.41 -CONFIG_MAGIC_SYSRQ=y 17.42 -# CONFIG_DEBUG_SPINLOCK is not set 17.43 # CONFIG_DEBUG_PAGEALLOC is not set 17.44 -# CONFIG_DEBUG_INFO is not set 17.45 -# CONFIG_DEBUG_SPINLOCK_SLEEP is not set 17.46 -# CONFIG_FRAME_POINTER is not set 17.47 # CONFIG_4KSTACKS is not set 17.48 CONFIG_X86_FIND_SMP_CONFIG=y 17.49 CONFIG_X86_MPPARSE=y
18.1 --- a/linux-2.6.11-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32 Tue May 03 14:42:00 2005 +0000 18.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32 Tue May 10 08:49:59 2005 +0000 18.3 @@ -1,7 +1,7 @@ 18.4 # 18.5 # Automatically generated make config: don't edit 18.6 # Linux kernel version: 2.6.11-xenU 18.7 -# Wed Apr 6 09:20:09 2005 18.8 +# Wed May 4 17:14:10 2005 18.9 # 18.10 CONFIG_XEN=y 18.11 CONFIG_ARCH_XEN=y 18.12 @@ -132,16 +132,22 @@ CONFIG_HAVE_DEC_LOCK=y 18.13 # Kernel hacking 18.14 # 18.15 CONFIG_DEBUG_KERNEL=y 18.16 +CONFIG_MAGIC_SYSRQ=y 18.17 +# CONFIG_SCHEDSTATS is not set 18.18 +# CONFIG_DEBUG_SLAB is not set 18.19 +# CONFIG_DEBUG_PREEMPT is not set 18.20 +# CONFIG_DEBUG_SPINLOCK is not set 18.21 +# CONFIG_DEBUG_SPINLOCK_SLEEP is not set 18.22 +# CONFIG_DEBUG_KOBJECT is not set 18.23 +CONFIG_DEBUG_BUGVERBOSE=y 18.24 +# CONFIG_DEBUG_INFO is not set 18.25 +# CONFIG_DEBUG_FS is not set 18.26 +# CONFIG_FRAME_POINTER is not set 18.27 CONFIG_EARLY_PRINTK=y 18.28 # CONFIG_DEBUG_STACKOVERFLOW is not set 18.29 +# CONFIG_KPROBES is not set 18.30 # CONFIG_DEBUG_STACK_USAGE is not set 18.31 -# CONFIG_DEBUG_SLAB is not set 18.32 -CONFIG_MAGIC_SYSRQ=y 18.33 -# CONFIG_DEBUG_SPINLOCK is not set 18.34 # CONFIG_DEBUG_PAGEALLOC is not set 18.35 -# CONFIG_DEBUG_INFO is not set 18.36 -# CONFIG_DEBUG_SPINLOCK_SLEEP is not set 18.37 -# CONFIG_FRAME_POINTER is not set 18.38 # CONFIG_4KSTACKS is not set 18.39 CONFIG_GENERIC_HARDIRQS=y 18.40 CONFIG_GENERIC_IRQ_PROBE=y 18.41 @@ -334,6 +340,12 @@ CONFIG_NETDEVICES=y 18.42 # CONFIG_SHAPER is not set 18.43 # CONFIG_NETCONSOLE is not set 18.44 CONFIG_UNIX98_PTYS=y 18.45 +CONFIG_LEGACY_PTYS=y 18.46 +CONFIG_LEGACY_PTY_COUNT=256 18.47 + 18.48 +# 18.49 +# Character devices 18.50 +# 18.51 18.52 # 18.53 # File systems
19.1 --- a/linux-2.6.11-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64 Tue May 03 14:42:00 2005 +0000 19.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64 Tue May 10 08:49:59 2005 +0000 19.3 @@ -316,6 +316,12 @@ CONFIG_NETDEVICES=y 19.4 # CONFIG_SHAPER is not set 19.5 # CONFIG_NETCONSOLE is not set 19.6 CONFIG_UNIX98_PTYS=y 19.7 +CONFIG_LEGACY_PTYS=y 19.8 +CONFIG_LEGACY_PTY_COUNT=256 19.9 + 19.10 +# 19.11 +# Character devices 19.12 +# 19.13 19.14 # 19.15 # File systems
20.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/Kconfig Tue May 03 14:42:00 2005 +0000 20.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/Kconfig Tue May 10 08:49:59 2005 +0000 20.3 @@ -12,6 +12,11 @@ config XENARCH 20.4 config X86 20.5 bool 20.6 default y 20.7 + help 20.8 + This is Linux's home port. Linux was originally native to the Intel 20.9 + 386, and runs on all the later x86 processors including the Intel 20.10 + 486, 586, Pentiums, and various instruction-set-compatible chips by 20.11 + AMD, Cyrix, and others. 20.12 20.13 config MMU 20.14 bool 20.15 @@ -782,13 +787,19 @@ endchoice 20.16 config PCI_BIOS 20.17 bool 20.18 depends on !X86_VISWS && PCI && (PCI_GOBIOS || PCI_GOANY) 20.19 - default n 20.20 + default y 20.21 20.22 config PCI_DIRECT 20.23 bool 20.24 depends on PCI && ((PCI_GODIRECT || PCI_GOANY) || X86_VISWS) 20.25 default y 20.26 20.27 +config PCI_MMCONFIG 20.28 + bool 20.29 + depends on PCI && (PCI_GOMMCONFIG || (PCI_GOANY && ACPI)) 20.30 + select ACPI_BOOT 20.31 + default y 20.32 + 20.33 source "drivers/pci/pcie/Kconfig" 20.34 20.35 source "drivers/pci/Kconfig" 20.36 @@ -856,129 +867,7 @@ endmenu 20.37 20.38 endif 20.39 20.40 -menu "Kernel hacking" 20.41 - 20.42 -config DEBUG_KERNEL 20.43 - bool "Kernel debugging" 20.44 - help 20.45 - Say Y here if you are developing drivers or trying to debug and 20.46 - identify kernel problems. 20.47 - 20.48 -config EARLY_PRINTK 20.49 - bool "Early printk" if EMBEDDED 20.50 - default y 20.51 - help 20.52 - Write kernel log output directly into the VGA buffer or to a serial 20.53 - port. 20.54 - 20.55 - This is useful for kernel debugging when your machine crashes very 20.56 - early before the console code is initialized. For normal operation 20.57 - it is not recommended because it looks ugly and doesn't cooperate 20.58 - with klogd/syslogd or the X server. You should normally N here, 20.59 - unless you want to debug such a crash. 20.60 - 20.61 -config DEBUG_STACKOVERFLOW 20.62 - bool "Check for stack overflows" 20.63 - depends on DEBUG_KERNEL 20.64 - 20.65 -config DEBUG_STACK_USAGE 20.66 - bool "Stack utilization instrumentation" 20.67 - depends on DEBUG_KERNEL 20.68 - help 20.69 - Enables the display of the minimum amount of free stack which each 20.70 - task has ever had available in the sysrq-T and sysrq-P debug output. 20.71 - 20.72 - This option will slow down process creation somewhat. 20.73 - 20.74 -config DEBUG_SLAB 20.75 - bool "Debug memory allocations" 20.76 - depends on DEBUG_KERNEL 20.77 - help 20.78 - Say Y here to have the kernel do limited verification on memory 20.79 - allocation as well as poisoning memory on free to catch use of freed 20.80 - memory. 20.81 - 20.82 -config MAGIC_SYSRQ 20.83 - bool "Magic SysRq key" 20.84 - depends on DEBUG_KERNEL 20.85 - help 20.86 - If you say Y here, you will have some control over the system even 20.87 - if the system crashes for example during kernel debugging (e.g., you 20.88 - will be able to flush the buffer cache to disk, reboot the system 20.89 - immediately or dump some status information). This is accomplished 20.90 - by pressing various keys while holding SysRq (Alt+PrintScreen). It 20.91 - also works on a serial console (on PC hardware at least), if you 20.92 - send a BREAK and then within 5 seconds a command keypress. The 20.93 - keys are documented in <file:Documentation/sysrq.txt>. Don't say Y 20.94 - unless you really know what this hack does. 20.95 - 20.96 -config DEBUG_SPINLOCK 20.97 - bool "Spinlock debugging" 20.98 - depends on DEBUG_KERNEL 20.99 - help 20.100 - Say Y here and build SMP to catch missing spinlock initialization 20.101 - and certain other kinds of spinlock errors commonly made. This is 20.102 - best used in conjunction with the NMI watchdog so that spinlock 20.103 - deadlocks are also debuggable. 20.104 - 20.105 -config DEBUG_PAGEALLOC 20.106 - bool "Page alloc debugging" 20.107 - depends on DEBUG_KERNEL 20.108 - help 20.109 - Unmap pages from the kernel linear mapping after free_pages(). 20.110 - This results in a large slowdown, but helps to find certain types 20.111 - of memory corruptions. 20.112 - 20.113 -config DEBUG_HIGHMEM 20.114 - bool "Highmem debugging" 20.115 - depends on DEBUG_KERNEL && HIGHMEM 20.116 - help 20.117 - This options enables addition error checking for high memory systems. 20.118 - Disable for production systems. 20.119 - 20.120 -config DEBUG_INFO 20.121 - bool "Compile the kernel with debug info" 20.122 - depends on DEBUG_KERNEL 20.123 - help 20.124 - If you say Y here the resulting kernel image will include 20.125 - debugging info resulting in a larger kernel image. 20.126 - Say Y here only if you plan to use gdb to debug the kernel. 20.127 - If you don't debug the kernel, you can say N. 20.128 - 20.129 -config DEBUG_SPINLOCK_SLEEP 20.130 - bool "Sleep-inside-spinlock checking" 20.131 - help 20.132 - If you say Y here, various routines which may sleep will become very 20.133 - noisy if they are called with a spinlock held. 20.134 - 20.135 -config FRAME_POINTER 20.136 - bool "Compile the kernel with frame pointers" 20.137 - help 20.138 - If you say Y here the resulting kernel image will be slightly larger 20.139 - and slower, but it will give very useful debugging information. 20.140 - If you don't debug the kernel, you can say N, but we may not be able 20.141 - to solve problems without frame pointers. 20.142 - 20.143 -config 4KSTACKS 20.144 - bool "Use 4Kb for kernel stacks instead of 8Kb" 20.145 - help 20.146 - If you say Y here the kernel will use a 4Kb stacksize for the 20.147 - kernel stack attached to each process/thread. This facilitates 20.148 - running more threads on a system and also reduces the pressure 20.149 - on the VM subsystem for higher order allocations. This option 20.150 - will also use IRQ stacks to compensate for the reduced stackspace. 20.151 - 20.152 -config X86_FIND_SMP_CONFIG 20.153 - bool 20.154 - depends on X86_LOCAL_APIC || X86_VOYAGER 20.155 - default n 20.156 - 20.157 -config X86_MPPARSE 20.158 - bool 20.159 - depends on X86_LOCAL_APIC && !X86_VISWS 20.160 - default y 20.161 - 20.162 -endmenu 20.163 +source "arch/i386/Kconfig.debug" 20.164 20.165 # 20.166 # Use the generic interrupt handling code in kernel/irq/:
21.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/Makefile Tue May 03 14:42:00 2005 +0000 21.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/Makefile Tue May 10 08:49:59 2005 +0000 21.3 @@ -20,7 +20,7 @@ s-obj-y := 21.4 21.5 obj-y += cpu/ 21.6 obj-y += timers/ 21.7 -c-obj-$(CONFIG_ACPI_BOOT) += acpi/ 21.8 +obj-$(CONFIG_ACPI_BOOT) += acpi/ 21.9 #c-obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o 21.10 c-obj-$(CONFIG_MCA) += mca.o 21.11 c-obj-$(CONFIG_X86_MSR) += msr.o 21.12 @@ -29,10 +29,10 @@ obj-$(CONFIG_MICROCODE) += microcode.o 21.13 c-obj-$(CONFIG_APM) += apm.o 21.14 obj-$(CONFIG_X86_SMP) += smp.o smpboot.o 21.15 #obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o 21.16 -c-obj-$(CONFIG_X86_MPPARSE) += mpparse.o 21.17 +obj-$(CONFIG_X86_MPPARSE) += mpparse.o 21.18 obj-$(CONFIG_X86_LOCAL_APIC) += apic.o 21.19 c-obj-$(CONFIG_X86_LOCAL_APIC) += nmi.o 21.20 -c-obj-$(CONFIG_X86_IO_APIC) += io_apic.o 21.21 +obj-$(CONFIG_X86_IO_APIC) += io_apic.o 21.22 c-obj-$(CONFIG_X86_NUMAQ) += numaq.o 21.23 c-obj-$(CONFIG_X86_SUMMIT_NUMA) += summit.o 21.24 c-obj-$(CONFIG_MODULES) += module.o
22.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/acpi/Makefile Tue May 03 14:42:00 2005 +0000 22.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/acpi/Makefile Tue May 10 08:49:59 2005 +0000 22.3 @@ -7,3 +7,7 @@ c-link 22.4 $(patsubst %.o,$(obj)/%.c,$(c-obj-y) $(c-link)): 22.5 @ln -fsn $(srctree)/arch/i386/kernel/acpi/$(notdir $@) $@ 22.6 22.7 +obj-y += $(c-obj-y) $(s-obj-y) 22.8 + 22.9 +clean-files += $(patsubst %.o,%.c,$(c-obj-y) $(c-obj-) $(c-link)) 22.10 +clean-files += $(patsubst %.o,%.S,$(s-obj-y) $(s-obj-) $(s-link))
23.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/acpi/boot.c Tue May 03 14:42:00 2005 +0000 23.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/acpi/boot.c Tue May 10 08:49:59 2005 +0000 23.3 @@ -109,17 +109,17 @@ enum acpi_irq_model_id acpi_irq_model = 23.4 23.5 char *__acpi_map_table(unsigned long phys_addr, unsigned long size) 23.6 { 23.7 - unsigned int i,j; 23.8 + unsigned int i,j; 23.9 23.10 - j = PAGE_ALIGN(size) >> PAGE_SHIFT; 23.11 - for (i = 0; (i < FIX_ACPI_PAGES) && j ; i++, j--) { 23.12 - __set_fixmap_ma(FIX_ACPI_END - i, 23.13 - (phys_addr & PAGE_MASK) + (i << PAGE_SHIFT), 23.14 - PAGE_KERNEL); 23.15 - } 23.16 + j = PAGE_ALIGN(size) >> PAGE_SHIFT; 23.17 + for (i = 0; (i < FIX_ACPI_PAGES) && j ; i++, j--) { 23.18 + set_fixmap(FIX_ACPI_END - i, 23.19 + (phys_addr & PAGE_MASK) + (i << PAGE_SHIFT)); 23.20 + } 23.21 23.22 - return (char *) __fix_to_virt(FIX_ACPI_END) + (phys_addr & ~PAGE_MASK); 23.23 + return (char *) __fix_to_virt(FIX_ACPI_END) + (phys_addr & ~PAGE_MASK); 23.24 } 23.25 + 23.26 #else 23.27 #ifdef CONFIG_X86_64 23.28 23.29 @@ -523,7 +523,7 @@ acpi_scan_rsdp ( 23.30 { 23.31 unsigned long offset = 0; 23.32 unsigned long sig_len = sizeof("RSD PTR ") - 1; 23.33 - unsigned long vstart = isa_bus_to_virt(start); 23.34 + unsigned long vstart = (unsigned long)isa_bus_to_virt(start); 23.35 23.36 /* 23.37 * Scan all 16-byte boundaries of the physical memory region for the 23.38 @@ -649,16 +649,15 @@ acpi_find_rsdp (void) 23.39 else if (efi.acpi) 23.40 return __pa(efi.acpi); 23.41 } 23.42 - 23.43 /* 23.44 * Scan memory looking for the RSDP signature. First search EBDA (low 23.45 * memory) paragraphs and then search upper memory (E0000-FFFFF). 23.46 */ 23.47 rsdp_phys = acpi_scan_rsdp (0, 0x400); 23.48 if (!rsdp_phys) 23.49 - rsdp_phys = acpi_scan_rsdp (0xE0000, 0xFFFFF); 23.50 + rsdp_phys = acpi_scan_rsdp (0xE0000, 0x20000); 23.51 23.52 - __set_fixmap_ma(FIX_ACPI_RSDP_PAGE, rsdp_phys, PAGE_KERNEL); 23.53 + set_fixmap(FIX_ACPI_RSDP_PAGE, rsdp_phys); 23.54 23.55 return rsdp_phys; 23.56 } 23.57 @@ -673,10 +672,6 @@ acpi_parse_madt_lapic_entries(void) 23.58 { 23.59 int count; 23.60 23.61 -#ifdef CONFIG_XEN 23.62 - return 0; 23.63 -#endif 23.64 - 23.65 /* 23.66 * Note that the LAPIC address is obtained from the MADT (32-bit value) 23.67 * and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value). 23.68 @@ -872,7 +867,7 @@ acpi_boot_table_init(void) 23.69 } 23.70 23.71 #ifdef __i386__ 23.72 - //check_acpi_pci(); 23.73 + check_acpi_pci(); 23.74 #endif 23.75 23.76 acpi_table_parse(ACPI_BOOT, acpi_parse_sbf);
24.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/apic.c Tue May 03 14:42:00 2005 +0000 24.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/apic.c Tue May 10 08:49:59 2005 +0000 24.3 @@ -16,8 +16,32 @@ 24.4 24.5 #include <linux/config.h> 24.6 #include <linux/init.h> 24.7 -#include <asm/apic.h> 24.8 + 24.9 +#include <linux/mm.h> 24.10 +#include <linux/irq.h> 24.11 +#include <linux/delay.h> 24.12 +#include <linux/bootmem.h> 24.13 +#include <linux/smp_lock.h> 24.14 +#include <linux/interrupt.h> 24.15 +#include <linux/mc146818rtc.h> 24.16 +#include <linux/kernel_stat.h> 24.17 +#include <linux/sysdev.h> 24.18 24.19 +#include <asm/atomic.h> 24.20 +#include <asm/smp.h> 24.21 +#include <asm/mtrr.h> 24.22 +#include <asm/mpspec.h> 24.23 +#include <asm/desc.h> 24.24 +#include <asm/arch_hooks.h> 24.25 +#include <asm/hpet.h> 24.26 + 24.27 +#include <mach_apic.h> 24.28 + 24.29 +#include "io_ports.h" 24.30 + 24.31 +/* 24.32 + * Debug level 24.33 + */ 24.34 int apic_verbosity; 24.35 24.36 int get_physical_broadcast(void) 24.37 @@ -49,5 +73,11 @@ void ack_bad_irq(unsigned int irq) 24.38 */ 24.39 int __init APIC_init_uniprocessor (void) 24.40 { 24.41 +#ifdef CONFIG_X86_IO_APIC 24.42 + if (smp_found_config) 24.43 + if (!skip_ioapic_setup && nr_ioapics) 24.44 + setup_IO_APIC(); 24.45 +#endif 24.46 + 24.47 return 0; 24.48 }
25.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/io_apic.c Tue May 03 14:42:00 2005 +0000 25.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/io_apic.c Tue May 10 08:49:59 2005 +0000 25.3 @@ -37,19 +37,56 @@ 25.4 #include <asm/smp.h> 25.5 #include <asm/desc.h> 25.6 #include <asm/timer.h> 25.7 -#include <asm/io_apic.h> 25.8 -#include <asm/apic.h> 25.9 25.10 #include <mach_apic.h> 25.11 25.12 #include "io_ports.h" 25.13 25.14 +#ifdef CONFIG_XEN 25.15 + 25.16 +#include <asm-xen/xen-public/xen.h> 25.17 +#include <asm-xen/xen-public/physdev.h> 25.18 + 25.19 +/* Fake i8259 */ 25.20 +#define make_8259A_irq(_irq) (io_apic_irqs &= ~(1UL<<(_irq))) 25.21 +#define disable_8259A_irq(_irq) ((void)0) 25.22 +#define i8259A_irq_pending(_irq) (0) 25.23 + 25.24 +unsigned long io_apic_irqs; 25.25 + 25.26 +static inline unsigned int xen_io_apic_read(unsigned int apic, unsigned int reg) 25.27 +{ 25.28 + physdev_op_t op; 25.29 + int ret; 25.30 + 25.31 + op.cmd = PHYSDEVOP_APIC_READ; 25.32 + op.u.apic_op.apic = mp_ioapics[apic].mpc_apicid; 25.33 + op.u.apic_op.offset = reg; 25.34 + ret = HYPERVISOR_physdev_op(&op); 25.35 + if (ret) 25.36 + return ret; 25.37 + return op.u.apic_op.value; 25.38 +} 25.39 + 25.40 +static inline void xen_io_apic_write(unsigned int apic, unsigned int reg, unsigned int value) 25.41 +{ 25.42 + physdev_op_t op; 25.43 + 25.44 + op.cmd = PHYSDEVOP_APIC_WRITE; 25.45 + op.u.apic_op.apic = mp_ioapics[apic].mpc_apicid; 25.46 + op.u.apic_op.offset = reg; 25.47 + op.u.apic_op.value = value; 25.48 + HYPERVISOR_physdev_op(&op); 25.49 +} 25.50 + 25.51 +#define io_apic_read(a,r) xen_io_apic_read(a,r) 25.52 +#define io_apic_write(a,r,v) xen_io_apic_write(a,r,v) 25.53 + 25.54 +#endif /* CONFIG_XEN */ 25.55 + 25.56 int (*ioapic_renumber_irq)(int ioapic, int irq); 25.57 atomic_t irq_mis_count; 25.58 25.59 -unsigned long io_apic_irqs; 25.60 -int skip_ioapic_setup; 25.61 - 25.62 static DEFINE_SPINLOCK(ioapic_lock); 25.63 25.64 /* 25.65 @@ -89,24 +126,6 @@ int vector_irq[NR_VECTORS] = { [0 ... NR 25.66 #define vector_to_irq(vector) (vector) 25.67 #endif 25.68 25.69 - 25.70 -#ifndef CONFIG_SMP 25.71 -void fastcall send_IPI_self(int vector) 25.72 -{ 25.73 - return; 25.74 -} 25.75 -#endif 25.76 - 25.77 -int irqbalance_disable(char *str) 25.78 -{ 25.79 - return 0; 25.80 -} 25.81 - 25.82 -void print_IO_APIC(void) 25.83 -{ 25.84 - return; 25.85 -} 25.86 - 25.87 /* 25.88 * The common case is 1:1 IRQ<->pin mappings. Sometimes there are 25.89 * shared ISA-space IRQs, so we have to support them. We are super 25.90 @@ -130,6 +149,583 @@ static void add_pin_to_irq(unsigned int 25.91 entry->pin = pin; 25.92 } 25.93 25.94 +#ifndef CONFIG_XEN 25.95 +/* 25.96 + * Reroute an IRQ to a different pin. 25.97 + */ 25.98 +static void __init replace_pin_at_irq(unsigned int irq, 25.99 + int oldapic, int oldpin, 25.100 + int newapic, int newpin) 25.101 +{ 25.102 + struct irq_pin_list *entry = irq_2_pin + irq; 25.103 + 25.104 + while (1) { 25.105 + if (entry->apic == oldapic && entry->pin == oldpin) { 25.106 + entry->apic = newapic; 25.107 + entry->pin = newpin; 25.108 + } 25.109 + if (!entry->next) 25.110 + break; 25.111 + entry = irq_2_pin + entry->next; 25.112 + } 25.113 +} 25.114 + 25.115 +static void __modify_IO_APIC_irq (unsigned int irq, unsigned long enable, unsigned long disable) 25.116 +{ 25.117 + struct irq_pin_list *entry = irq_2_pin + irq; 25.118 + unsigned int pin, reg; 25.119 + 25.120 + for (;;) { 25.121 + pin = entry->pin; 25.122 + if (pin == -1) 25.123 + break; 25.124 + reg = io_apic_read(entry->apic, 0x10 + pin*2); 25.125 + reg &= ~disable; 25.126 + reg |= enable; 25.127 + io_apic_modify(entry->apic, 0x10 + pin*2, reg); 25.128 + if (!entry->next) 25.129 + break; 25.130 + entry = irq_2_pin + entry->next; 25.131 + } 25.132 +} 25.133 + 25.134 +/* mask = 1 */ 25.135 +static void __mask_IO_APIC_irq (unsigned int irq) 25.136 +{ 25.137 + __modify_IO_APIC_irq(irq, 0x00010000, 0); 25.138 +} 25.139 + 25.140 +/* mask = 0 */ 25.141 +static void __unmask_IO_APIC_irq (unsigned int irq) 25.142 +{ 25.143 + __modify_IO_APIC_irq(irq, 0, 0x00010000); 25.144 +} 25.145 + 25.146 +/* mask = 1, trigger = 0 */ 25.147 +static void __mask_and_edge_IO_APIC_irq (unsigned int irq) 25.148 +{ 25.149 + __modify_IO_APIC_irq(irq, 0x00010000, 0x00008000); 25.150 +} 25.151 + 25.152 +/* mask = 0, trigger = 1 */ 25.153 +static void __unmask_and_level_IO_APIC_irq (unsigned int irq) 25.154 +{ 25.155 + __modify_IO_APIC_irq(irq, 0x00008000, 0x00010000); 25.156 +} 25.157 + 25.158 +static void mask_IO_APIC_irq (unsigned int irq) 25.159 +{ 25.160 + unsigned long flags; 25.161 + 25.162 + spin_lock_irqsave(&ioapic_lock, flags); 25.163 + __mask_IO_APIC_irq(irq); 25.164 + spin_unlock_irqrestore(&ioapic_lock, flags); 25.165 +} 25.166 + 25.167 +static void unmask_IO_APIC_irq (unsigned int irq) 25.168 +{ 25.169 + unsigned long flags; 25.170 + 25.171 + spin_lock_irqsave(&ioapic_lock, flags); 25.172 + __unmask_IO_APIC_irq(irq); 25.173 + spin_unlock_irqrestore(&ioapic_lock, flags); 25.174 +} 25.175 + 25.176 +void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) 25.177 +{ 25.178 + struct IO_APIC_route_entry entry; 25.179 + unsigned long flags; 25.180 + 25.181 + /* Check delivery_mode to be sure we're not clearing an SMI pin */ 25.182 + spin_lock_irqsave(&ioapic_lock, flags); 25.183 + *(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin); 25.184 + *(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin); 25.185 + spin_unlock_irqrestore(&ioapic_lock, flags); 25.186 + if (entry.delivery_mode == dest_SMI) 25.187 + return; 25.188 + 25.189 + /* 25.190 + * Disable it in the IO-APIC irq-routing table: 25.191 + */ 25.192 + memset(&entry, 0, sizeof(entry)); 25.193 + entry.mask = 1; 25.194 + spin_lock_irqsave(&ioapic_lock, flags); 25.195 + io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0)); 25.196 + io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1)); 25.197 + spin_unlock_irqrestore(&ioapic_lock, flags); 25.198 +} 25.199 + 25.200 +static void clear_IO_APIC (void) 25.201 +{ 25.202 + int apic, pin; 25.203 + 25.204 + for (apic = 0; apic < nr_ioapics; apic++) 25.205 + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) 25.206 + clear_IO_APIC_pin(apic, pin); 25.207 +} 25.208 + 25.209 +static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) 25.210 +{ 25.211 + unsigned long flags; 25.212 + int pin; 25.213 + struct irq_pin_list *entry = irq_2_pin + irq; 25.214 + unsigned int apicid_value; 25.215 + 25.216 + apicid_value = cpu_mask_to_apicid(cpumask); 25.217 + /* Prepare to do the io_apic_write */ 25.218 + apicid_value = apicid_value << 24; 25.219 + spin_lock_irqsave(&ioapic_lock, flags); 25.220 + for (;;) { 25.221 + pin = entry->pin; 25.222 + if (pin == -1) 25.223 + break; 25.224 + io_apic_write(entry->apic, 0x10 + 1 + pin*2, apicid_value); 25.225 + if (!entry->next) 25.226 + break; 25.227 + entry = irq_2_pin + entry->next; 25.228 + } 25.229 + spin_unlock_irqrestore(&ioapic_lock, flags); 25.230 +} 25.231 +#else 25.232 +#define clear_IO_APIC() ((void)0) 25.233 +#endif 25.234 + 25.235 +#if defined(CONFIG_IRQBALANCE) 25.236 +# include <asm/processor.h> /* kernel_thread() */ 25.237 +# include <linux/kernel_stat.h> /* kstat */ 25.238 +# include <linux/slab.h> /* kmalloc() */ 25.239 +# include <linux/timer.h> /* time_after() */ 25.240 + 25.241 +# ifdef CONFIG_BALANCED_IRQ_DEBUG 25.242 +# define TDprintk(x...) do { printk("<%ld:%s:%d>: ", jiffies, __FILE__, __LINE__); printk(x); } while (0) 25.243 +# define Dprintk(x...) do { TDprintk(x); } while (0) 25.244 +# else 25.245 +# define TDprintk(x...) 25.246 +# define Dprintk(x...) 25.247 +# endif 25.248 + 25.249 +cpumask_t __cacheline_aligned pending_irq_balance_cpumask[NR_IRQS]; 25.250 + 25.251 +#define IRQBALANCE_CHECK_ARCH -999 25.252 +static int irqbalance_disabled = IRQBALANCE_CHECK_ARCH; 25.253 +static int physical_balance = 0; 25.254 + 25.255 +struct irq_cpu_info { 25.256 + unsigned long * last_irq; 25.257 + unsigned long * irq_delta; 25.258 + unsigned long irq; 25.259 +} irq_cpu_data[NR_CPUS]; 25.260 + 25.261 +#define CPU_IRQ(cpu) (irq_cpu_data[cpu].irq) 25.262 +#define LAST_CPU_IRQ(cpu,irq) (irq_cpu_data[cpu].last_irq[irq]) 25.263 +#define IRQ_DELTA(cpu,irq) (irq_cpu_data[cpu].irq_delta[irq]) 25.264 + 25.265 +#define IDLE_ENOUGH(cpu,now) \ 25.266 + (idle_cpu(cpu) && ((now) - irq_stat[(cpu)].idle_timestamp > 1)) 25.267 + 25.268 +#define IRQ_ALLOWED(cpu, allowed_mask) cpu_isset(cpu, allowed_mask) 25.269 + 25.270 +#define CPU_TO_PACKAGEINDEX(i) (first_cpu(cpu_sibling_map[i])) 25.271 + 25.272 +#define MAX_BALANCED_IRQ_INTERVAL (5*HZ) 25.273 +#define MIN_BALANCED_IRQ_INTERVAL (HZ/2) 25.274 +#define BALANCED_IRQ_MORE_DELTA (HZ/10) 25.275 +#define BALANCED_IRQ_LESS_DELTA (HZ) 25.276 + 25.277 +long balanced_irq_interval = MAX_BALANCED_IRQ_INTERVAL; 25.278 + 25.279 +static unsigned long move(int curr_cpu, cpumask_t allowed_mask, 25.280 + unsigned long now, int direction) 25.281 +{ 25.282 + int search_idle = 1; 25.283 + int cpu = curr_cpu; 25.284 + 25.285 + goto inside; 25.286 + 25.287 + do { 25.288 + if (unlikely(cpu == curr_cpu)) 25.289 + search_idle = 0; 25.290 +inside: 25.291 + if (direction == 1) { 25.292 + cpu++; 25.293 + if (cpu >= NR_CPUS) 25.294 + cpu = 0; 25.295 + } else { 25.296 + cpu--; 25.297 + if (cpu == -1) 25.298 + cpu = NR_CPUS-1; 25.299 + } 25.300 + } while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu,allowed_mask) || 25.301 + (search_idle && !IDLE_ENOUGH(cpu,now))); 25.302 + 25.303 + return cpu; 25.304 +} 25.305 + 25.306 +static inline void balance_irq(int cpu, int irq) 25.307 +{ 25.308 + unsigned long now = jiffies; 25.309 + cpumask_t allowed_mask; 25.310 + unsigned int new_cpu; 25.311 + 25.312 + if (irqbalance_disabled) 25.313 + return; 25.314 + 25.315 + cpus_and(allowed_mask, cpu_online_map, irq_affinity[irq]); 25.316 + new_cpu = move(cpu, allowed_mask, now, 1); 25.317 + if (cpu != new_cpu) { 25.318 + irq_desc_t *desc = irq_desc + irq; 25.319 + unsigned long flags; 25.320 + 25.321 + spin_lock_irqsave(&desc->lock, flags); 25.322 + pending_irq_balance_cpumask[irq] = cpumask_of_cpu(new_cpu); 25.323 + spin_unlock_irqrestore(&desc->lock, flags); 25.324 + } 25.325 +} 25.326 + 25.327 +static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold) 25.328 +{ 25.329 + int i, j; 25.330 + Dprintk("Rotating IRQs among CPUs.\n"); 25.331 + for (i = 0; i < NR_CPUS; i++) { 25.332 + for (j = 0; cpu_online(i) && (j < NR_IRQS); j++) { 25.333 + if (!irq_desc[j].action) 25.334 + continue; 25.335 + /* Is it a significant load ? */ 25.336 + if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i),j) < 25.337 + useful_load_threshold) 25.338 + continue; 25.339 + balance_irq(i, j); 25.340 + } 25.341 + } 25.342 + balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL, 25.343 + balanced_irq_interval - BALANCED_IRQ_LESS_DELTA); 25.344 + return; 25.345 +} 25.346 + 25.347 +static void do_irq_balance(void) 25.348 +{ 25.349 + int i, j; 25.350 + unsigned long max_cpu_irq = 0, min_cpu_irq = (~0); 25.351 + unsigned long move_this_load = 0; 25.352 + int max_loaded = 0, min_loaded = 0; 25.353 + int load; 25.354 + unsigned long useful_load_threshold = balanced_irq_interval + 10; 25.355 + int selected_irq; 25.356 + int tmp_loaded, first_attempt = 1; 25.357 + unsigned long tmp_cpu_irq; 25.358 + unsigned long imbalance = 0; 25.359 + cpumask_t allowed_mask, target_cpu_mask, tmp; 25.360 + 25.361 + for (i = 0; i < NR_CPUS; i++) { 25.362 + int package_index; 25.363 + CPU_IRQ(i) = 0; 25.364 + if (!cpu_online(i)) 25.365 + continue; 25.366 + package_index = CPU_TO_PACKAGEINDEX(i); 25.367 + for (j = 0; j < NR_IRQS; j++) { 25.368 + unsigned long value_now, delta; 25.369 + /* Is this an active IRQ? */ 25.370 + if (!irq_desc[j].action) 25.371 + continue; 25.372 + if ( package_index == i ) 25.373 + IRQ_DELTA(package_index,j) = 0; 25.374 + /* Determine the total count per processor per IRQ */ 25.375 + value_now = (unsigned long) kstat_cpu(i).irqs[j]; 25.376 + 25.377 + /* Determine the activity per processor per IRQ */ 25.378 + delta = value_now - LAST_CPU_IRQ(i,j); 25.379 + 25.380 + /* Update last_cpu_irq[][] for the next time */ 25.381 + LAST_CPU_IRQ(i,j) = value_now; 25.382 + 25.383 + /* Ignore IRQs whose rate is less than the clock */ 25.384 + if (delta < useful_load_threshold) 25.385 + continue; 25.386 + /* update the load for the processor or package total */ 25.387 + IRQ_DELTA(package_index,j) += delta; 25.388 + 25.389 + /* Keep track of the higher numbered sibling as well */ 25.390 + if (i != package_index) 25.391 + CPU_IRQ(i) += delta; 25.392 + /* 25.393 + * We have sibling A and sibling B in the package 25.394 + * 25.395 + * cpu_irq[A] = load for cpu A + load for cpu B 25.396 + * cpu_irq[B] = load for cpu B 25.397 + */ 25.398 + CPU_IRQ(package_index) += delta; 25.399 + } 25.400 + } 25.401 + /* Find the least loaded processor package */ 25.402 + for (i = 0; i < NR_CPUS; i++) { 25.403 + if (!cpu_online(i)) 25.404 + continue; 25.405 + if (i != CPU_TO_PACKAGEINDEX(i)) 25.406 + continue; 25.407 + if (min_cpu_irq > CPU_IRQ(i)) { 25.408 + min_cpu_irq = CPU_IRQ(i); 25.409 + min_loaded = i; 25.410 + } 25.411 + } 25.412 + max_cpu_irq = ULONG_MAX; 25.413 + 25.414 +tryanothercpu: 25.415 + /* Look for heaviest loaded processor. 25.416 + * We may come back to get the next heaviest loaded processor. 25.417 + * Skip processors with trivial loads. 25.418 + */ 25.419 + tmp_cpu_irq = 0; 25.420 + tmp_loaded = -1; 25.421 + for (i = 0; i < NR_CPUS; i++) { 25.422 + if (!cpu_online(i)) 25.423 + continue; 25.424 + if (i != CPU_TO_PACKAGEINDEX(i)) 25.425 + continue; 25.426 + if (max_cpu_irq <= CPU_IRQ(i)) 25.427 + continue; 25.428 + if (tmp_cpu_irq < CPU_IRQ(i)) { 25.429 + tmp_cpu_irq = CPU_IRQ(i); 25.430 + tmp_loaded = i; 25.431 + } 25.432 + } 25.433 + 25.434 + if (tmp_loaded == -1) { 25.435 + /* In the case of small number of heavy interrupt sources, 25.436 + * loading some of the cpus too much. We use Ingo's original 25.437 + * approach to rotate them around. 25.438 + */ 25.439 + if (!first_attempt && imbalance >= useful_load_threshold) { 25.440 + rotate_irqs_among_cpus(useful_load_threshold); 25.441 + return; 25.442 + } 25.443 + goto not_worth_the_effort; 25.444 + } 25.445 + 25.446 + first_attempt = 0; /* heaviest search */ 25.447 + max_cpu_irq = tmp_cpu_irq; /* load */ 25.448 + max_loaded = tmp_loaded; /* processor */ 25.449 + imbalance = (max_cpu_irq - min_cpu_irq) / 2; 25.450 + 25.451 + Dprintk("max_loaded cpu = %d\n", max_loaded); 25.452 + Dprintk("min_loaded cpu = %d\n", min_loaded); 25.453 + Dprintk("max_cpu_irq load = %ld\n", max_cpu_irq); 25.454 + Dprintk("min_cpu_irq load = %ld\n", min_cpu_irq); 25.455 + Dprintk("load imbalance = %lu\n", imbalance); 25.456 + 25.457 + /* if imbalance is less than approx 10% of max load, then 25.458 + * observe diminishing returns action. - quit 25.459 + */ 25.460 + if (imbalance < (max_cpu_irq >> 3)) { 25.461 + Dprintk("Imbalance too trivial\n"); 25.462 + goto not_worth_the_effort; 25.463 + } 25.464 + 25.465 +tryanotherirq: 25.466 + /* if we select an IRQ to move that can't go where we want, then 25.467 + * see if there is another one to try. 25.468 + */ 25.469 + move_this_load = 0; 25.470 + selected_irq = -1; 25.471 + for (j = 0; j < NR_IRQS; j++) { 25.472 + /* Is this an active IRQ? */ 25.473 + if (!irq_desc[j].action) 25.474 + continue; 25.475 + if (imbalance <= IRQ_DELTA(max_loaded,j)) 25.476 + continue; 25.477 + /* Try to find the IRQ that is closest to the imbalance 25.478 + * without going over. 25.479 + */ 25.480 + if (move_this_load < IRQ_DELTA(max_loaded,j)) { 25.481 + move_this_load = IRQ_DELTA(max_loaded,j); 25.482 + selected_irq = j; 25.483 + } 25.484 + } 25.485 + if (selected_irq == -1) { 25.486 + goto tryanothercpu; 25.487 + } 25.488 + 25.489 + imbalance = move_this_load; 25.490 + 25.491 + /* For physical_balance case, we accumlated both load 25.492 + * values in the one of the siblings cpu_irq[], 25.493 + * to use the same code for physical and logical processors 25.494 + * as much as possible. 25.495 + * 25.496 + * NOTE: the cpu_irq[] array holds the sum of the load for 25.497 + * sibling A and sibling B in the slot for the lowest numbered 25.498 + * sibling (A), _AND_ the load for sibling B in the slot for 25.499 + * the higher numbered sibling. 25.500 + * 25.501 + * We seek the least loaded sibling by making the comparison 25.502 + * (A+B)/2 vs B 25.503 + */ 25.504 + load = CPU_IRQ(min_loaded) >> 1; 25.505 + for_each_cpu_mask(j, cpu_sibling_map[min_loaded]) { 25.506 + if (load > CPU_IRQ(j)) { 25.507 + /* This won't change cpu_sibling_map[min_loaded] */ 25.508 + load = CPU_IRQ(j); 25.509 + min_loaded = j; 25.510 + } 25.511 + } 25.512 + 25.513 + cpus_and(allowed_mask, cpu_online_map, irq_affinity[selected_irq]); 25.514 + target_cpu_mask = cpumask_of_cpu(min_loaded); 25.515 + cpus_and(tmp, target_cpu_mask, allowed_mask); 25.516 + 25.517 + if (!cpus_empty(tmp)) { 25.518 + irq_desc_t *desc = irq_desc + selected_irq; 25.519 + unsigned long flags; 25.520 + 25.521 + Dprintk("irq = %d moved to cpu = %d\n", 25.522 + selected_irq, min_loaded); 25.523 + /* mark for change destination */ 25.524 + spin_lock_irqsave(&desc->lock, flags); 25.525 + pending_irq_balance_cpumask[selected_irq] = 25.526 + cpumask_of_cpu(min_loaded); 25.527 + spin_unlock_irqrestore(&desc->lock, flags); 25.528 + /* Since we made a change, come back sooner to 25.529 + * check for more variation. 25.530 + */ 25.531 + balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL, 25.532 + balanced_irq_interval - BALANCED_IRQ_LESS_DELTA); 25.533 + return; 25.534 + } 25.535 + goto tryanotherirq; 25.536 + 25.537 +not_worth_the_effort: 25.538 + /* 25.539 + * if we did not find an IRQ to move, then adjust the time interval 25.540 + * upward 25.541 + */ 25.542 + balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL, 25.543 + balanced_irq_interval + BALANCED_IRQ_MORE_DELTA); 25.544 + Dprintk("IRQ worth rotating not found\n"); 25.545 + return; 25.546 +} 25.547 + 25.548 +static int balanced_irq(void *unused) 25.549 +{ 25.550 + int i; 25.551 + unsigned long prev_balance_time = jiffies; 25.552 + long time_remaining = balanced_irq_interval; 25.553 + 25.554 + daemonize("kirqd"); 25.555 + 25.556 + /* push everything to CPU 0 to give us a starting point. */ 25.557 + for (i = 0 ; i < NR_IRQS ; i++) { 25.558 + pending_irq_balance_cpumask[i] = cpumask_of_cpu(0); 25.559 + } 25.560 + 25.561 + for ( ; ; ) { 25.562 + set_current_state(TASK_INTERRUPTIBLE); 25.563 + time_remaining = schedule_timeout(time_remaining); 25.564 + try_to_freeze(PF_FREEZE); 25.565 + if (time_after(jiffies, 25.566 + prev_balance_time+balanced_irq_interval)) { 25.567 + do_irq_balance(); 25.568 + prev_balance_time = jiffies; 25.569 + time_remaining = balanced_irq_interval; 25.570 + } 25.571 + } 25.572 + return 0; 25.573 +} 25.574 + 25.575 +static int __init balanced_irq_init(void) 25.576 +{ 25.577 + int i; 25.578 + struct cpuinfo_x86 *c; 25.579 + cpumask_t tmp; 25.580 + 25.581 + cpus_shift_right(tmp, cpu_online_map, 2); 25.582 + c = &boot_cpu_data; 25.583 + /* When not overwritten by the command line ask subarchitecture. */ 25.584 + if (irqbalance_disabled == IRQBALANCE_CHECK_ARCH) 25.585 + irqbalance_disabled = NO_BALANCE_IRQ; 25.586 + if (irqbalance_disabled) 25.587 + return 0; 25.588 + 25.589 + /* disable irqbalance completely if there is only one processor online */ 25.590 + if (num_online_cpus() < 2) { 25.591 + irqbalance_disabled = 1; 25.592 + return 0; 25.593 + } 25.594 + /* 25.595 + * Enable physical balance only if more than 1 physical processor 25.596 + * is present 25.597 + */ 25.598 + if (smp_num_siblings > 1 && !cpus_empty(tmp)) 25.599 + physical_balance = 1; 25.600 + 25.601 + for (i = 0; i < NR_CPUS; i++) { 25.602 + if (!cpu_online(i)) 25.603 + continue; 25.604 + irq_cpu_data[i].irq_delta = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL); 25.605 + irq_cpu_data[i].last_irq = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL); 25.606 + if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) { 25.607 + printk(KERN_ERR "balanced_irq_init: out of memory"); 25.608 + goto failed; 25.609 + } 25.610 + memset(irq_cpu_data[i].irq_delta,0,sizeof(unsigned long) * NR_IRQS); 25.611 + memset(irq_cpu_data[i].last_irq,0,sizeof(unsigned long) * NR_IRQS); 25.612 + } 25.613 + 25.614 + printk(KERN_INFO "Starting balanced_irq\n"); 25.615 + if (kernel_thread(balanced_irq, NULL, CLONE_KERNEL) >= 0) 25.616 + return 0; 25.617 + else 25.618 + printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq"); 25.619 +failed: 25.620 + for (i = 0; i < NR_CPUS; i++) { 25.621 + if(irq_cpu_data[i].irq_delta) 25.622 + kfree(irq_cpu_data[i].irq_delta); 25.623 + if(irq_cpu_data[i].last_irq) 25.624 + kfree(irq_cpu_data[i].last_irq); 25.625 + } 25.626 + return 0; 25.627 +} 25.628 + 25.629 +int __init irqbalance_disable(char *str) 25.630 +{ 25.631 + irqbalance_disabled = 1; 25.632 + return 0; 25.633 +} 25.634 + 25.635 +__setup("noirqbalance", irqbalance_disable); 25.636 + 25.637 +static inline void move_irq(int irq) 25.638 +{ 25.639 + /* note - we hold the desc->lock */ 25.640 + if (unlikely(!cpus_empty(pending_irq_balance_cpumask[irq]))) { 25.641 + set_ioapic_affinity_irq(irq, pending_irq_balance_cpumask[irq]); 25.642 + cpus_clear(pending_irq_balance_cpumask[irq]); 25.643 + } 25.644 +} 25.645 + 25.646 +late_initcall(balanced_irq_init); 25.647 + 25.648 +#else /* !CONFIG_IRQBALANCE */ 25.649 +static inline void move_irq(int irq) { } 25.650 +#endif /* CONFIG_IRQBALANCE */ 25.651 + 25.652 +#ifndef CONFIG_SMP 25.653 +void fastcall send_IPI_self(int vector) 25.654 +{ 25.655 +#ifndef CONFIG_XEN 25.656 + unsigned int cfg; 25.657 + 25.658 + /* 25.659 + * Wait for idle. 25.660 + */ 25.661 + apic_wait_icr_idle(); 25.662 + cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL; 25.663 + /* 25.664 + * Send the IPI. The write to APIC_ICR fires this off. 25.665 + */ 25.666 + apic_write_around(APIC_ICR, cfg); 25.667 +#endif 25.668 +} 25.669 +#endif /* !CONFIG_SMP */ 25.670 + 25.671 /* 25.672 * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to 25.673 * specific CPU-side IRQs. 25.674 @@ -138,6 +734,88 @@ static void add_pin_to_irq(unsigned int 25.675 #define MAX_PIRQS 8 25.676 int pirq_entries [MAX_PIRQS]; 25.677 int pirqs_enabled; 25.678 +int skip_ioapic_setup; 25.679 + 25.680 +static int __init ioapic_setup(char *str) 25.681 +{ 25.682 + skip_ioapic_setup = 1; 25.683 + return 1; 25.684 +} 25.685 + 25.686 +__setup("noapic", ioapic_setup); 25.687 + 25.688 +static int __init ioapic_pirq_setup(char *str) 25.689 +{ 25.690 + int i, max; 25.691 + int ints[MAX_PIRQS+1]; 25.692 + 25.693 + get_options(str, ARRAY_SIZE(ints), ints); 25.694 + 25.695 + for (i = 0; i < MAX_PIRQS; i++) 25.696 + pirq_entries[i] = -1; 25.697 + 25.698 + pirqs_enabled = 1; 25.699 + apic_printk(APIC_VERBOSE, KERN_INFO 25.700 + "PIRQ redirection, working around broken MP-BIOS.\n"); 25.701 + max = MAX_PIRQS; 25.702 + if (ints[0] < MAX_PIRQS) 25.703 + max = ints[0]; 25.704 + 25.705 + for (i = 0; i < max; i++) { 25.706 + apic_printk(APIC_VERBOSE, KERN_DEBUG 25.707 + "... PIRQ%d -> IRQ %d\n", i, ints[i+1]); 25.708 + /* 25.709 + * PIRQs are mapped upside down, usually. 25.710 + */ 25.711 + pirq_entries[MAX_PIRQS-i-1] = ints[i+1]; 25.712 + } 25.713 + return 1; 25.714 +} 25.715 + 25.716 +__setup("pirq=", ioapic_pirq_setup); 25.717 + 25.718 +/* 25.719 + * Find the IRQ entry number of a certain pin. 25.720 + */ 25.721 +static int find_irq_entry(int apic, int pin, int type) 25.722 +{ 25.723 + int i; 25.724 + 25.725 + for (i = 0; i < mp_irq_entries; i++) 25.726 + if (mp_irqs[i].mpc_irqtype == type && 25.727 + (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid || 25.728 + mp_irqs[i].mpc_dstapic == MP_APIC_ALL) && 25.729 + mp_irqs[i].mpc_dstirq == pin) 25.730 + return i; 25.731 + 25.732 + return -1; 25.733 +} 25.734 + 25.735 +#ifndef CONFIG_XEN 25.736 +/* 25.737 + * Find the pin to which IRQ[irq] (ISA) is connected 25.738 + */ 25.739 +static int find_isa_irq_pin(int irq, int type) 25.740 +{ 25.741 + int i; 25.742 + 25.743 + for (i = 0; i < mp_irq_entries; i++) { 25.744 + int lbus = mp_irqs[i].mpc_srcbus; 25.745 + 25.746 + if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA || 25.747 + mp_bus_id_to_type[lbus] == MP_BUS_EISA || 25.748 + mp_bus_id_to_type[lbus] == MP_BUS_MCA || 25.749 + mp_bus_id_to_type[lbus] == MP_BUS_NEC98 25.750 + ) && 25.751 + (mp_irqs[i].mpc_irqtype == type) && 25.752 + (mp_irqs[i].mpc_srcbusirq == irq)) 25.753 + 25.754 + return mp_irqs[i].mpc_dstirq; 25.755 + } 25.756 + return -1; 25.757 +} 25.758 +#endif 25.759 + 25.760 /* 25.761 * Find a specific PCI IRQ entry. 25.762 * Not an __init, possibly needed by modules 25.763 @@ -184,6 +862,236 @@ int IO_APIC_get_PCI_irq_vector(int bus, 25.764 return best_guess; 25.765 } 25.766 25.767 +#ifndef CONFIG_XEN 25.768 +/* 25.769 + * This function currently is only a helper for the i386 smp boot process where 25.770 + * we need to reprogram the ioredtbls to cater for the cpus which have come online 25.771 + * so mask in all cases should simply be TARGET_CPUS 25.772 + */ 25.773 +void __init setup_ioapic_dest(void) 25.774 +{ 25.775 + int pin, ioapic, irq, irq_entry; 25.776 + 25.777 + if (skip_ioapic_setup == 1) 25.778 + return; 25.779 + 25.780 + for (ioapic = 0; ioapic < nr_ioapics; ioapic++) { 25.781 + for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) { 25.782 + irq_entry = find_irq_entry(ioapic, pin, mp_INT); 25.783 + if (irq_entry == -1) 25.784 + continue; 25.785 + irq = pin_2_irq(irq_entry, ioapic, pin); 25.786 + set_ioapic_affinity_irq(irq, TARGET_CPUS); 25.787 + } 25.788 + 25.789 + } 25.790 +} 25.791 +#endif /* !CONFIG_XEN */ 25.792 + 25.793 +/* 25.794 + * EISA Edge/Level control register, ELCR 25.795 + */ 25.796 +static int EISA_ELCR(unsigned int irq) 25.797 +{ 25.798 + if (irq < 16) { 25.799 + unsigned int port = 0x4d0 + (irq >> 3); 25.800 + return (inb(port) >> (irq & 7)) & 1; 25.801 + } 25.802 + apic_printk(APIC_VERBOSE, KERN_INFO 25.803 + "Broken MPtable reports ISA irq %d\n", irq); 25.804 + return 0; 25.805 +} 25.806 + 25.807 +/* EISA interrupts are always polarity zero and can be edge or level 25.808 + * trigger depending on the ELCR value. If an interrupt is listed as 25.809 + * EISA conforming in the MP table, that means its trigger type must 25.810 + * be read in from the ELCR */ 25.811 + 25.812 +#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mpc_srcbusirq)) 25.813 +#define default_EISA_polarity(idx) (0) 25.814 + 25.815 +/* ISA interrupts are always polarity zero edge triggered, 25.816 + * when listed as conforming in the MP table. */ 25.817 + 25.818 +#define default_ISA_trigger(idx) (0) 25.819 +#define default_ISA_polarity(idx) (0) 25.820 + 25.821 +/* PCI interrupts are always polarity one level triggered, 25.822 + * when listed as conforming in the MP table. */ 25.823 + 25.824 +#define default_PCI_trigger(idx) (1) 25.825 +#define default_PCI_polarity(idx) (1) 25.826 + 25.827 +/* MCA interrupts are always polarity zero level triggered, 25.828 + * when listed as conforming in the MP table. */ 25.829 + 25.830 +#define default_MCA_trigger(idx) (1) 25.831 +#define default_MCA_polarity(idx) (0) 25.832 + 25.833 +/* NEC98 interrupts are always polarity zero edge triggered, 25.834 + * when listed as conforming in the MP table. */ 25.835 + 25.836 +#define default_NEC98_trigger(idx) (0) 25.837 +#define default_NEC98_polarity(idx) (0) 25.838 + 25.839 +static int __init MPBIOS_polarity(int idx) 25.840 +{ 25.841 + int bus = mp_irqs[idx].mpc_srcbus; 25.842 + int polarity; 25.843 + 25.844 + /* 25.845 + * Determine IRQ line polarity (high active or low active): 25.846 + */ 25.847 + switch (mp_irqs[idx].mpc_irqflag & 3) 25.848 + { 25.849 + case 0: /* conforms, ie. bus-type dependent polarity */ 25.850 + { 25.851 + switch (mp_bus_id_to_type[bus]) 25.852 + { 25.853 + case MP_BUS_ISA: /* ISA pin */ 25.854 + { 25.855 + polarity = default_ISA_polarity(idx); 25.856 + break; 25.857 + } 25.858 + case MP_BUS_EISA: /* EISA pin */ 25.859 + { 25.860 + polarity = default_EISA_polarity(idx); 25.861 + break; 25.862 + } 25.863 + case MP_BUS_PCI: /* PCI pin */ 25.864 + { 25.865 + polarity = default_PCI_polarity(idx); 25.866 + break; 25.867 + } 25.868 + case MP_BUS_MCA: /* MCA pin */ 25.869 + { 25.870 + polarity = default_MCA_polarity(idx); 25.871 + break; 25.872 + } 25.873 + case MP_BUS_NEC98: /* NEC 98 pin */ 25.874 + { 25.875 + polarity = default_NEC98_polarity(idx); 25.876 + break; 25.877 + } 25.878 + default: 25.879 + { 25.880 + printk(KERN_WARNING "broken BIOS!!\n"); 25.881 + polarity = 1; 25.882 + break; 25.883 + } 25.884 + } 25.885 + break; 25.886 + } 25.887 + case 1: /* high active */ 25.888 + { 25.889 + polarity = 0; 25.890 + break; 25.891 + } 25.892 + case 2: /* reserved */ 25.893 + { 25.894 + printk(KERN_WARNING "broken BIOS!!\n"); 25.895 + polarity = 1; 25.896 + break; 25.897 + } 25.898 + case 3: /* low active */ 25.899 + { 25.900 + polarity = 1; 25.901 + break; 25.902 + } 25.903 + default: /* invalid */ 25.904 + { 25.905 + printk(KERN_WARNING "broken BIOS!!\n"); 25.906 + polarity = 1; 25.907 + break; 25.908 + } 25.909 + } 25.910 + return polarity; 25.911 +} 25.912 + 25.913 +static int MPBIOS_trigger(int idx) 25.914 +{ 25.915 + int bus = mp_irqs[idx].mpc_srcbus; 25.916 + int trigger; 25.917 + 25.918 + /* 25.919 + * Determine IRQ trigger mode (edge or level sensitive): 25.920 + */ 25.921 + switch ((mp_irqs[idx].mpc_irqflag>>2) & 3) 25.922 + { 25.923 + case 0: /* conforms, ie. bus-type dependent */ 25.924 + { 25.925 + switch (mp_bus_id_to_type[bus]) 25.926 + { 25.927 + case MP_BUS_ISA: /* ISA pin */ 25.928 + { 25.929 + trigger = default_ISA_trigger(idx); 25.930 + break; 25.931 + } 25.932 + case MP_BUS_EISA: /* EISA pin */ 25.933 + { 25.934 + trigger = default_EISA_trigger(idx); 25.935 + break; 25.936 + } 25.937 + case MP_BUS_PCI: /* PCI pin */ 25.938 + { 25.939 + trigger = default_PCI_trigger(idx); 25.940 + break; 25.941 + } 25.942 + case MP_BUS_MCA: /* MCA pin */ 25.943 + { 25.944 + trigger = default_MCA_trigger(idx); 25.945 + break; 25.946 + } 25.947 + case MP_BUS_NEC98: /* NEC 98 pin */ 25.948 + { 25.949 + trigger = default_NEC98_trigger(idx); 25.950 + break; 25.951 + } 25.952 + default: 25.953 + { 25.954 + printk(KERN_WARNING "broken BIOS!!\n"); 25.955 + trigger = 1; 25.956 + break; 25.957 + } 25.958 + } 25.959 + break; 25.960 + } 25.961 + case 1: /* edge */ 25.962 + { 25.963 + trigger = 0; 25.964 + break; 25.965 + } 25.966 + case 2: /* reserved */ 25.967 + { 25.968 + printk(KERN_WARNING "broken BIOS!!\n"); 25.969 + trigger = 1; 25.970 + break; 25.971 + } 25.972 + case 3: /* level */ 25.973 + { 25.974 + trigger = 1; 25.975 + break; 25.976 + } 25.977 + default: /* invalid */ 25.978 + { 25.979 + printk(KERN_WARNING "broken BIOS!!\n"); 25.980 + trigger = 0; 25.981 + break; 25.982 + } 25.983 + } 25.984 + return trigger; 25.985 +} 25.986 + 25.987 +static inline int irq_polarity(int idx) 25.988 +{ 25.989 + return MPBIOS_polarity(idx); 25.990 +} 25.991 + 25.992 +static inline int irq_trigger(int idx) 25.993 +{ 25.994 + return MPBIOS_trigger(idx); 25.995 +} 25.996 + 25.997 static int pin_2_irq(int idx, int apic, int pin) 25.998 { 25.999 int irq, i; 25.1000 @@ -250,26 +1158,41 @@ static int pin_2_irq(int idx, int apic, 25.1001 return irq; 25.1002 } 25.1003 25.1004 +static inline int IO_APIC_irq_trigger(int irq) 25.1005 +{ 25.1006 + int apic, idx, pin; 25.1007 + 25.1008 + for (apic = 0; apic < nr_ioapics; apic++) { 25.1009 + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { 25.1010 + idx = find_irq_entry(apic,pin,mp_INT); 25.1011 + if ((idx != -1) && (irq == pin_2_irq(idx,apic,pin))) 25.1012 + return irq_trigger(idx); 25.1013 + } 25.1014 + } 25.1015 + /* 25.1016 + * nonexistent IRQs are edge default 25.1017 + */ 25.1018 + return 0; 25.1019 +} 25.1020 + 25.1021 /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */ 25.1022 -u8 irq_vector[NR_IRQ_VECTORS] = { FIRST_DEVICE_VECTOR , 0 }; 25.1023 +u8 irq_vector[NR_IRQ_VECTORS]; /* = { FIRST_DEVICE_VECTOR , 0 }; */ 25.1024 25.1025 int assign_irq_vector(int irq) 25.1026 { 25.1027 static int current_vector = FIRST_DEVICE_VECTOR; 25.1028 - physdev_op_t op; 25.1029 - int ret; 25.1030 + physdev_op_t op; 25.1031 25.1032 BUG_ON(irq >= NR_IRQ_VECTORS); 25.1033 if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) 25.1034 return IO_APIC_VECTOR(irq); 25.1035 25.1036 - op.cmd = PHYSDEVOP_ASSIGN_VECTOR; 25.1037 - op.u.irq_op.irq = irq; 25.1038 - ret = HYPERVISOR_physdev_op(&op); 25.1039 - if (ret) 25.1040 - return -ENOSPC; 25.1041 + op.cmd = PHYSDEVOP_ASSIGN_VECTOR; 25.1042 + op.u.irq_op.irq = irq; 25.1043 + if (HYPERVISOR_physdev_op(&op)) 25.1044 + return -ENOSPC; 25.1045 + current_vector = op.u.irq_op.vector; 25.1046 25.1047 - current_vector = op.u.irq_op.vector; 25.1048 vector_irq[current_vector] = irq; 25.1049 if (irq != AUTO_ASSIGN) 25.1050 IO_APIC_VECTOR(irq) = current_vector; 25.1051 @@ -277,12 +1200,1268 @@ int assign_irq_vector(int irq) 25.1052 return current_vector; 25.1053 } 25.1054 25.1055 +#ifndef CONFIG_XEN 25.1056 +static struct hw_interrupt_type ioapic_level_type; 25.1057 +static struct hw_interrupt_type ioapic_edge_type; 25.1058 + 25.1059 +#define IOAPIC_AUTO -1 25.1060 +#define IOAPIC_EDGE 0 25.1061 +#define IOAPIC_LEVEL 1 25.1062 + 25.1063 +static inline void ioapic_register_intr(int irq, int vector, unsigned long trigger) 25.1064 +{ 25.1065 + if (use_pci_vector() && !platform_legacy_irq(irq)) { 25.1066 + if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || 25.1067 + trigger == IOAPIC_LEVEL) 25.1068 + irq_desc[vector].handler = &ioapic_level_type; 25.1069 + else 25.1070 + irq_desc[vector].handler = &ioapic_edge_type; 25.1071 + set_intr_gate(vector, interrupt[vector]); 25.1072 + } else { 25.1073 + if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || 25.1074 + trigger == IOAPIC_LEVEL) 25.1075 + irq_desc[irq].handler = &ioapic_level_type; 25.1076 + else 25.1077 + irq_desc[irq].handler = &ioapic_edge_type; 25.1078 + set_intr_gate(vector, interrupt[irq]); 25.1079 + } 25.1080 +} 25.1081 +#else 25.1082 +#define ioapic_register_intr(_irq,_vector,_trigger) ((void)0) 25.1083 +#endif 25.1084 + 25.1085 +void __init setup_IO_APIC_irqs(void) 25.1086 +{ 25.1087 + struct IO_APIC_route_entry entry; 25.1088 + int apic, pin, idx, irq, first_notcon = 1, vector; 25.1089 + unsigned long flags; 25.1090 + 25.1091 + apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); 25.1092 + 25.1093 + for (apic = 0; apic < nr_ioapics; apic++) { 25.1094 + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { 25.1095 + 25.1096 + /* 25.1097 + * add it to the IO-APIC irq-routing table: 25.1098 + */ 25.1099 + memset(&entry,0,sizeof(entry)); 25.1100 + 25.1101 + entry.delivery_mode = INT_DELIVERY_MODE; 25.1102 + entry.dest_mode = INT_DEST_MODE; 25.1103 + entry.mask = 0; /* enable IRQ */ 25.1104 + entry.dest.logical.logical_dest = 25.1105 + cpu_mask_to_apicid(TARGET_CPUS); 25.1106 + 25.1107 + idx = find_irq_entry(apic,pin,mp_INT); 25.1108 + if (idx == -1) { 25.1109 + if (first_notcon) { 25.1110 + apic_printk(APIC_VERBOSE, KERN_DEBUG 25.1111 + " IO-APIC (apicid-pin) %d-%d", 25.1112 + mp_ioapics[apic].mpc_apicid, 25.1113 + pin); 25.1114 + first_notcon = 0; 25.1115 + } else 25.1116 + apic_printk(APIC_VERBOSE, ", %d-%d", 25.1117 + mp_ioapics[apic].mpc_apicid, pin); 25.1118 + continue; 25.1119 + } 25.1120 + 25.1121 + entry.trigger = irq_trigger(idx); 25.1122 + entry.polarity = irq_polarity(idx); 25.1123 + 25.1124 + if (irq_trigger(idx)) { 25.1125 + entry.trigger = 1; 25.1126 + entry.mask = 1; 25.1127 + } 25.1128 + 25.1129 + irq = pin_2_irq(idx, apic, pin); 25.1130 + /* 25.1131 + * skip adding the timer int on secondary nodes, which causes 25.1132 + * a small but painful rift in the time-space continuum 25.1133 + */ 25.1134 + if (multi_timer_check(apic, irq)) 25.1135 + continue; 25.1136 + else 25.1137 + add_pin_to_irq(irq, apic, pin); 25.1138 + 25.1139 + if (/*!apic &&*/ !IO_APIC_IRQ(irq)) 25.1140 + continue; 25.1141 + 25.1142 + if (IO_APIC_IRQ(irq)) { 25.1143 + vector = assign_irq_vector(irq); 25.1144 + entry.vector = vector; 25.1145 + ioapic_register_intr(irq, vector, IOAPIC_AUTO); 25.1146 + 25.1147 + if (!apic && (irq < 16)) 25.1148 + disable_8259A_irq(irq); 25.1149 + } 25.1150 + spin_lock_irqsave(&ioapic_lock, flags); 25.1151 + io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1)); 25.1152 + io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0)); 25.1153 + spin_unlock_irqrestore(&ioapic_lock, flags); 25.1154 + } 25.1155 + } 25.1156 + 25.1157 + if (!first_notcon) 25.1158 + apic_printk(APIC_VERBOSE, " not connected.\n"); 25.1159 +} 25.1160 + 25.1161 +/* 25.1162 + * Set up the 8259A-master output pin: 25.1163 + */ 25.1164 +#ifndef CONFIG_XEN 25.1165 +void __init setup_ExtINT_IRQ0_pin(unsigned int pin, int vector) 25.1166 +{ 25.1167 + struct IO_APIC_route_entry entry; 25.1168 + unsigned long flags; 25.1169 + 25.1170 + memset(&entry,0,sizeof(entry)); 25.1171 + 25.1172 + disable_8259A_irq(0); 25.1173 + 25.1174 + /* mask LVT0 */ 25.1175 + apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); 25.1176 + 25.1177 + /* 25.1178 + * We use logical delivery to get the timer IRQ 25.1179 + * to the first CPU. 25.1180 + */ 25.1181 + entry.dest_mode = INT_DEST_MODE; 25.1182 + entry.mask = 0; /* unmask IRQ now */ 25.1183 + entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); 25.1184 + entry.delivery_mode = INT_DELIVERY_MODE; 25.1185 + entry.polarity = 0; 25.1186 + entry.trigger = 0; 25.1187 + entry.vector = vector; 25.1188 + 25.1189 + /* 25.1190 + * The timer IRQ doesn't have to know that behind the 25.1191 + * scene we have a 8259A-master in AEOI mode ... 25.1192 + */ 25.1193 + irq_desc[0].handler = &ioapic_edge_type; 25.1194 + 25.1195 + /* 25.1196 + * Add it to the IO-APIC irq-routing table: 25.1197 + */ 25.1198 + spin_lock_irqsave(&ioapic_lock, flags); 25.1199 + io_apic_write(0, 0x11+2*pin, *(((int *)&entry)+1)); 25.1200 + io_apic_write(0, 0x10+2*pin, *(((int *)&entry)+0)); 25.1201 + spin_unlock_irqrestore(&ioapic_lock, flags); 25.1202 + 25.1203 + enable_8259A_irq(0); 25.1204 +} 25.1205 + 25.1206 +static inline void UNEXPECTED_IO_APIC(void) 25.1207 +{ 25.1208 +} 25.1209 + 25.1210 +void __init print_IO_APIC(void) 25.1211 +{ 25.1212 + int apic, i; 25.1213 + union IO_APIC_reg_00 reg_00; 25.1214 + union IO_APIC_reg_01 reg_01; 25.1215 + union IO_APIC_reg_02 reg_02; 25.1216 + union IO_APIC_reg_03 reg_03; 25.1217 + unsigned long flags; 25.1218 + 25.1219 + if (apic_verbosity == APIC_QUIET) 25.1220 + return; 25.1221 + 25.1222 + printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); 25.1223 + for (i = 0; i < nr_ioapics; i++) 25.1224 + printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", 25.1225 + mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]); 25.1226 + 25.1227 + /* 25.1228 + * We are a bit conservative about what we expect. We have to 25.1229 + * know about every hardware change ASAP. 25.1230 + */ 25.1231 + printk(KERN_INFO "testing the IO APIC.......................\n"); 25.1232 + 25.1233 + for (apic = 0; apic < nr_ioapics; apic++) { 25.1234 + 25.1235 + spin_lock_irqsave(&ioapic_lock, flags); 25.1236 + reg_00.raw = io_apic_read(apic, 0); 25.1237 + reg_01.raw = io_apic_read(apic, 1); 25.1238 + if (reg_01.bits.version >= 0x10) 25.1239 + reg_02.raw = io_apic_read(apic, 2); 25.1240 + if (reg_01.bits.version >= 0x20) 25.1241 + reg_03.raw = io_apic_read(apic, 3); 25.1242 + spin_unlock_irqrestore(&ioapic_lock, flags); 25.1243 + 25.1244 + printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid); 25.1245 + printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); 25.1246 + printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); 25.1247 + printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); 25.1248 + printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS); 25.1249 + if (reg_00.bits.ID >= get_physical_broadcast()) 25.1250 + UNEXPECTED_IO_APIC(); 25.1251 + if (reg_00.bits.__reserved_1 || reg_00.bits.__reserved_2) 25.1252 + UNEXPECTED_IO_APIC(); 25.1253 + 25.1254 + printk(KERN_DEBUG ".... register #01: %08X\n", reg_01.raw); 25.1255 + printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries); 25.1256 + if ( (reg_01.bits.entries != 0x0f) && /* older (Neptune) boards */ 25.1257 + (reg_01.bits.entries != 0x17) && /* typical ISA+PCI boards */ 25.1258 + (reg_01.bits.entries != 0x1b) && /* Compaq Proliant boards */ 25.1259 + (reg_01.bits.entries != 0x1f) && /* dual Xeon boards */ 25.1260 + (reg_01.bits.entries != 0x22) && /* bigger Xeon boards */ 25.1261 + (reg_01.bits.entries != 0x2E) && 25.1262 + (reg_01.bits.entries != 0x3F) 25.1263 + ) 25.1264 + UNEXPECTED_IO_APIC(); 25.1265 + 25.1266 + printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ); 25.1267 + printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version); 25.1268 + if ( (reg_01.bits.version != 0x01) && /* 82489DX IO-APICs */ 25.1269 + (reg_01.bits.version != 0x10) && /* oldest IO-APICs */ 25.1270 + (reg_01.bits.version != 0x11) && /* Pentium/Pro IO-APICs */ 25.1271 + (reg_01.bits.version != 0x13) && /* Xeon IO-APICs */ 25.1272 + (reg_01.bits.version != 0x20) /* Intel P64H (82806 AA) */ 25.1273 + ) 25.1274 + UNEXPECTED_IO_APIC(); 25.1275 + if (reg_01.bits.__reserved_1 || reg_01.bits.__reserved_2) 25.1276 + UNEXPECTED_IO_APIC(); 25.1277 + 25.1278 + /* 25.1279 + * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02, 25.1280 + * but the value of reg_02 is read as the previous read register 25.1281 + * value, so ignore it if reg_02 == reg_01. 25.1282 + */ 25.1283 + if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) { 25.1284 + printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw); 25.1285 + printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration); 25.1286 + if (reg_02.bits.__reserved_1 || reg_02.bits.__reserved_2) 25.1287 + UNEXPECTED_IO_APIC(); 25.1288 + } 25.1289 + 25.1290 + /* 25.1291 + * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02 25.1292 + * or reg_03, but the value of reg_0[23] is read as the previous read 25.1293 + * register value, so ignore it if reg_03 == reg_0[12]. 25.1294 + */ 25.1295 + if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw && 25.1296 + reg_03.raw != reg_01.raw) { 25.1297 + printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw); 25.1298 + printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.bits.boot_DT); 25.1299 + if (reg_03.bits.__reserved_1) 25.1300 + UNEXPECTED_IO_APIC(); 25.1301 + } 25.1302 + 25.1303 + printk(KERN_DEBUG ".... IRQ redirection table:\n"); 25.1304 + 25.1305 + printk(KERN_DEBUG " NR Log Phy Mask Trig IRR Pol" 25.1306 + " Stat Dest Deli Vect: \n"); 25.1307 + 25.1308 + for (i = 0; i <= reg_01.bits.entries; i++) { 25.1309 + struct IO_APIC_route_entry entry; 25.1310 + 25.1311 + spin_lock_irqsave(&ioapic_lock, flags); 25.1312 + *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2); 25.1313 + *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2); 25.1314 + spin_unlock_irqrestore(&ioapic_lock, flags); 25.1315 + 25.1316 + printk(KERN_DEBUG " %02x %03X %02X ", 25.1317 + i, 25.1318 + entry.dest.logical.logical_dest, 25.1319 + entry.dest.physical.physical_dest 25.1320 + ); 25.1321 + 25.1322 + printk("%1d %1d %1d %1d %1d %1d %1d %02X\n", 25.1323 + entry.mask, 25.1324 + entry.trigger, 25.1325 + entry.irr, 25.1326 + entry.polarity, 25.1327 + entry.delivery_status, 25.1328 + entry.dest_mode, 25.1329 + entry.delivery_mode, 25.1330 + entry.vector 25.1331 + ); 25.1332 + } 25.1333 + } 25.1334 + if (use_pci_vector()) 25.1335 + printk(KERN_INFO "Using vector-based indexing\n"); 25.1336 + printk(KERN_DEBUG "IRQ to pin mappings:\n"); 25.1337 + for (i = 0; i < NR_IRQS; i++) { 25.1338 + struct irq_pin_list *entry = irq_2_pin + i; 25.1339 + if (entry->pin < 0) 25.1340 + continue; 25.1341 + if (use_pci_vector() && !platform_legacy_irq(i)) 25.1342 + printk(KERN_DEBUG "IRQ%d ", IO_APIC_VECTOR(i)); 25.1343 + else 25.1344 + printk(KERN_DEBUG "IRQ%d ", i); 25.1345 + for (;;) { 25.1346 + printk("-> %d:%d", entry->apic, entry->pin); 25.1347 + if (!entry->next) 25.1348 + break; 25.1349 + entry = irq_2_pin + entry->next; 25.1350 + } 25.1351 + printk("\n"); 25.1352 + } 25.1353 + 25.1354 + printk(KERN_INFO ".................................... done.\n"); 25.1355 + 25.1356 + return; 25.1357 +} 25.1358 + 25.1359 +static void print_APIC_bitfield (int base) 25.1360 +{ 25.1361 + unsigned int v; 25.1362 + int i, j; 25.1363 + 25.1364 + if (apic_verbosity == APIC_QUIET) 25.1365 + return; 25.1366 + 25.1367 + printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG); 25.1368 + for (i = 0; i < 8; i++) { 25.1369 + v = apic_read(base + i*0x10); 25.1370 + for (j = 0; j < 32; j++) { 25.1371 + if (v & (1<<j)) 25.1372 + printk("1"); 25.1373 + else 25.1374 + printk("0"); 25.1375 + } 25.1376 + printk("\n"); 25.1377 + } 25.1378 +} 25.1379 + 25.1380 +void /*__init*/ print_local_APIC(void * dummy) 25.1381 +{ 25.1382 + unsigned int v, ver, maxlvt; 25.1383 + 25.1384 + if (apic_verbosity == APIC_QUIET) 25.1385 + return; 25.1386 + 25.1387 + printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", 25.1388 + smp_processor_id(), hard_smp_processor_id()); 25.1389 + v = apic_read(APIC_ID); 25.1390 + printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(v)); 25.1391 + v = apic_read(APIC_LVR); 25.1392 + printk(KERN_INFO "... APIC VERSION: %08x\n", v); 25.1393 + ver = GET_APIC_VERSION(v); 25.1394 + maxlvt = get_maxlvt(); 25.1395 + 25.1396 + v = apic_read(APIC_TASKPRI); 25.1397 + printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK); 25.1398 + 25.1399 + if (APIC_INTEGRATED(ver)) { /* !82489DX */ 25.1400 + v = apic_read(APIC_ARBPRI); 25.1401 + printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v, 25.1402 + v & APIC_ARBPRI_MASK); 25.1403 + v = apic_read(APIC_PROCPRI); 25.1404 + printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v); 25.1405 + } 25.1406 + 25.1407 + v = apic_read(APIC_EOI); 25.1408 + printk(KERN_DEBUG "... APIC EOI: %08x\n", v); 25.1409 + v = apic_read(APIC_RRR); 25.1410 + printk(KERN_DEBUG "... APIC RRR: %08x\n", v); 25.1411 + v = apic_read(APIC_LDR); 25.1412 + printk(KERN_DEBUG "... APIC LDR: %08x\n", v); 25.1413 + v = apic_read(APIC_DFR); 25.1414 + printk(KERN_DEBUG "... APIC DFR: %08x\n", v); 25.1415 + v = apic_read(APIC_SPIV); 25.1416 + printk(KERN_DEBUG "... APIC SPIV: %08x\n", v); 25.1417 + 25.1418 + printk(KERN_DEBUG "... APIC ISR field:\n"); 25.1419 + print_APIC_bitfield(APIC_ISR); 25.1420 + printk(KERN_DEBUG "... APIC TMR field:\n"); 25.1421 + print_APIC_bitfield(APIC_TMR); 25.1422 + printk(KERN_DEBUG "... APIC IRR field:\n"); 25.1423 + print_APIC_bitfield(APIC_IRR); 25.1424 + 25.1425 + if (APIC_INTEGRATED(ver)) { /* !82489DX */ 25.1426 + if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ 25.1427 + apic_write(APIC_ESR, 0); 25.1428 + v = apic_read(APIC_ESR); 25.1429 + printk(KERN_DEBUG "... APIC ESR: %08x\n", v); 25.1430 + } 25.1431 + 25.1432 + v = apic_read(APIC_ICR); 25.1433 + printk(KERN_DEBUG "... APIC ICR: %08x\n", v); 25.1434 + v = apic_read(APIC_ICR2); 25.1435 + printk(KERN_DEBUG "... APIC ICR2: %08x\n", v); 25.1436 + 25.1437 + v = apic_read(APIC_LVTT); 25.1438 + printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); 25.1439 + 25.1440 + if (maxlvt > 3) { /* PC is LVT#4. */ 25.1441 + v = apic_read(APIC_LVTPC); 25.1442 + printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v); 25.1443 + } 25.1444 + v = apic_read(APIC_LVT0); 25.1445 + printk(KERN_DEBUG "... APIC LVT0: %08x\n", v); 25.1446 + v = apic_read(APIC_LVT1); 25.1447 + printk(KERN_DEBUG "... APIC LVT1: %08x\n", v); 25.1448 + 25.1449 + if (maxlvt > 2) { /* ERR is LVT#3. */ 25.1450 + v = apic_read(APIC_LVTERR); 25.1451 + printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v); 25.1452 + } 25.1453 + 25.1454 + v = apic_read(APIC_TMICT); 25.1455 + printk(KERN_DEBUG "... APIC TMICT: %08x\n", v); 25.1456 + v = apic_read(APIC_TMCCT); 25.1457 + printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v); 25.1458 + v = apic_read(APIC_TDCR); 25.1459 + printk(KERN_DEBUG "... APIC TDCR: %08x\n", v); 25.1460 + printk("\n"); 25.1461 +} 25.1462 + 25.1463 +void print_all_local_APICs (void) 25.1464 +{ 25.1465 + on_each_cpu(print_local_APIC, NULL, 1, 1); 25.1466 +} 25.1467 + 25.1468 +void /*__init*/ print_PIC(void) 25.1469 +{ 25.1470 + extern spinlock_t i8259A_lock; 25.1471 + unsigned int v; 25.1472 + unsigned long flags; 25.1473 + 25.1474 + if (apic_verbosity == APIC_QUIET) 25.1475 + return; 25.1476 + 25.1477 + printk(KERN_DEBUG "\nprinting PIC contents\n"); 25.1478 + 25.1479 + spin_lock_irqsave(&i8259A_lock, flags); 25.1480 + 25.1481 + v = inb(0xa1) << 8 | inb(0x21); 25.1482 + printk(KERN_DEBUG "... PIC IMR: %04x\n", v); 25.1483 + 25.1484 + v = inb(0xa0) << 8 | inb(0x20); 25.1485 + printk(KERN_DEBUG "... PIC IRR: %04x\n", v); 25.1486 + 25.1487 + outb(0x0b,0xa0); 25.1488 + outb(0x0b,0x20); 25.1489 + v = inb(0xa0) << 8 | inb(0x20); 25.1490 + outb(0x0a,0xa0); 25.1491 + outb(0x0a,0x20); 25.1492 + 25.1493 + spin_unlock_irqrestore(&i8259A_lock, flags); 25.1494 + 25.1495 + printk(KERN_DEBUG "... PIC ISR: %04x\n", v); 25.1496 + 25.1497 + v = inb(0x4d1) << 8 | inb(0x4d0); 25.1498 + printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); 25.1499 +} 25.1500 +#else 25.1501 +void __init print_IO_APIC(void) { } 25.1502 +#endif /* !CONFIG_XEN */ 25.1503 + 25.1504 +static void __init enable_IO_APIC(void) 25.1505 +{ 25.1506 + union IO_APIC_reg_01 reg_01; 25.1507 + int i; 25.1508 + unsigned long flags; 25.1509 + 25.1510 + for (i = 0; i < PIN_MAP_SIZE; i++) { 25.1511 + irq_2_pin[i].pin = -1; 25.1512 + irq_2_pin[i].next = 0; 25.1513 + } 25.1514 + if (!pirqs_enabled) 25.1515 + for (i = 0; i < MAX_PIRQS; i++) 25.1516 + pirq_entries[i] = -1; 25.1517 + 25.1518 + /* 25.1519 + * The number of IO-APIC IRQ registers (== #pins): 25.1520 + */ 25.1521 + for (i = 0; i < nr_ioapics; i++) { 25.1522 + spin_lock_irqsave(&ioapic_lock, flags); 25.1523 + reg_01.raw = io_apic_read(i, 1); 25.1524 + spin_unlock_irqrestore(&ioapic_lock, flags); 25.1525 + nr_ioapic_registers[i] = reg_01.bits.entries+1; 25.1526 + } 25.1527 + 25.1528 + /* 25.1529 + * Do not trust the IO-APIC being empty at bootup 25.1530 + */ 25.1531 + clear_IO_APIC(); 25.1532 +} 25.1533 + 25.1534 +/* 25.1535 + * Not an __init, needed by the reboot code 25.1536 + */ 25.1537 +void disable_IO_APIC(void) 25.1538 +{ 25.1539 + /* 25.1540 + * Clear the IO-APIC before rebooting: 25.1541 + */ 25.1542 + clear_IO_APIC(); 25.1543 + 25.1544 +#ifndef CONFIG_XEN 25.1545 + disconnect_bsp_APIC(); 25.1546 +#endif 25.1547 +} 25.1548 + 25.1549 +/* 25.1550 + * function to set the IO-APIC physical IDs based on the 25.1551 + * values stored in the MPC table. 25.1552 + * 25.1553 + * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999 25.1554 + */ 25.1555 + 25.1556 +#if !defined(CONFIG_XEN) && !defined(CONFIG_X86_NUMAQ) 25.1557 +static void __init setup_ioapic_ids_from_mpc(void) 25.1558 +{ 25.1559 + union IO_APIC_reg_00 reg_00; 25.1560 + physid_mask_t phys_id_present_map; 25.1561 + int apic; 25.1562 + int i; 25.1563 + unsigned char old_id; 25.1564 + unsigned long flags; 25.1565 + 25.1566 + /* 25.1567 + * This is broken; anything with a real cpu count has to 25.1568 + * circumvent this idiocy regardless. 25.1569 + */ 25.1570 + phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map); 25.1571 + 25.1572 + /* 25.1573 + * Set the IOAPIC ID to the value stored in the MPC table. 25.1574 + */ 25.1575 + for (apic = 0; apic < nr_ioapics; apic++) { 25.1576 + 25.1577 + /* Read the register 0 value */ 25.1578 + spin_lock_irqsave(&ioapic_lock, flags); 25.1579 + reg_00.raw = io_apic_read(apic, 0); 25.1580 + spin_unlock_irqrestore(&ioapic_lock, flags); 25.1581 + 25.1582 + old_id = mp_ioapics[apic].mpc_apicid; 25.1583 + 25.1584 + if (mp_ioapics[apic].mpc_apicid >= get_physical_broadcast()) { 25.1585 + printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", 25.1586 + apic, mp_ioapics[apic].mpc_apicid); 25.1587 + printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", 25.1588 + reg_00.bits.ID); 25.1589 + mp_ioapics[apic].mpc_apicid = reg_00.bits.ID; 25.1590 + } 25.1591 + 25.1592 + /* Don't check I/O APIC IDs for some xAPIC systems. They have 25.1593 + * no meaning without the serial APIC bus. */ 25.1594 + if (NO_IOAPIC_CHECK) 25.1595 + continue; 25.1596 + /* 25.1597 + * Sanity check, is the ID really free? Every APIC in a 25.1598 + * system must have a unique ID or we get lots of nice 25.1599 + * 'stuck on smp_invalidate_needed IPI wait' messages. 25.1600 + */ 25.1601 + if (check_apicid_used(phys_id_present_map, 25.1602 + mp_ioapics[apic].mpc_apicid)) { 25.1603 + printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", 25.1604 + apic, mp_ioapics[apic].mpc_apicid); 25.1605 + for (i = 0; i < get_physical_broadcast(); i++) 25.1606 + if (!physid_isset(i, phys_id_present_map)) 25.1607 + break; 25.1608 + if (i >= get_physical_broadcast()) 25.1609 + panic("Max APIC ID exceeded!\n"); 25.1610 + printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", 25.1611 + i); 25.1612 + physid_set(i, phys_id_present_map); 25.1613 + mp_ioapics[apic].mpc_apicid = i; 25.1614 + } else { 25.1615 + physid_mask_t tmp; 25.1616 + tmp = apicid_to_cpu_present(mp_ioapics[apic].mpc_apicid); 25.1617 + apic_printk(APIC_VERBOSE, "Setting %d in the " 25.1618 + "phys_id_present_map\n", 25.1619 + mp_ioapics[apic].mpc_apicid); 25.1620 + physids_or(phys_id_present_map, phys_id_present_map, tmp); 25.1621 + } 25.1622 + 25.1623 + 25.1624 + /* 25.1625 + * We need to adjust the IRQ routing table 25.1626 + * if the ID changed. 25.1627 + */ 25.1628 + if (old_id != mp_ioapics[apic].mpc_apicid) 25.1629 + for (i = 0; i < mp_irq_entries; i++) 25.1630 + if (mp_irqs[i].mpc_dstapic == old_id) 25.1631 + mp_irqs[i].mpc_dstapic 25.1632 + = mp_ioapics[apic].mpc_apicid; 25.1633 + 25.1634 + /* 25.1635 + * Read the right value from the MPC table and 25.1636 + * write it into the ID register. 25.1637 + */ 25.1638 + apic_printk(APIC_VERBOSE, KERN_INFO 25.1639 + "...changing IO-APIC physical APIC ID to %d ...", 25.1640 + mp_ioapics[apic].mpc_apicid); 25.1641 + 25.1642 + reg_00.bits.ID = mp_ioapics[apic].mpc_apicid; 25.1643 + spin_lock_irqsave(&ioapic_lock, flags); 25.1644 + io_apic_write(apic, 0, reg_00.raw); 25.1645 + spin_unlock_irqrestore(&ioapic_lock, flags); 25.1646 + 25.1647 + /* 25.1648 + * Sanity check 25.1649 + */ 25.1650 + spin_lock_irqsave(&ioapic_lock, flags); 25.1651 + reg_00.raw = io_apic_read(apic, 0); 25.1652 + spin_unlock_irqrestore(&ioapic_lock, flags); 25.1653 + if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid) 25.1654 + printk("could not set ID!\n"); 25.1655 + else 25.1656 + apic_printk(APIC_VERBOSE, " ok.\n"); 25.1657 + } 25.1658 +} 25.1659 +#else 25.1660 +static void __init setup_ioapic_ids_from_mpc(void) { } 25.1661 +#endif 25.1662 + 25.1663 +#ifndef CONFIG_XEN 25.1664 +/* 25.1665 + * There is a nasty bug in some older SMP boards, their mptable lies 25.1666 + * about the timer IRQ. We do the following to work around the situation: 25.1667 + * 25.1668 + * - timer IRQ defaults to IO-APIC IRQ 25.1669 + * - if this function detects that timer IRQs are defunct, then we fall 25.1670 + * back to ISA timer IRQs 25.1671 + */ 25.1672 +static int __init timer_irq_works(void) 25.1673 +{ 25.1674 + unsigned long t1 = jiffies; 25.1675 + 25.1676 + local_irq_enable(); 25.1677 + /* Let ten ticks pass... */ 25.1678 + mdelay((10 * 1000) / HZ); 25.1679 + 25.1680 + /* 25.1681 + * Expect a few ticks at least, to be sure some possible 25.1682 + * glue logic does not lock up after one or two first 25.1683 + * ticks in a non-ExtINT mode. Also the local APIC 25.1684 + * might have cached one ExtINT interrupt. Finally, at 25.1685 + * least one tick may be lost due to delays. 25.1686 + */ 25.1687 + if (jiffies - t1 > 4) 25.1688 + return 1; 25.1689 + 25.1690 + return 0; 25.1691 +} 25.1692 + 25.1693 +/* 25.1694 + * In the SMP+IOAPIC case it might happen that there are an unspecified 25.1695 + * number of pending IRQ events unhandled. These cases are very rare, 25.1696 + * so we 'resend' these IRQs via IPIs, to the same CPU. It's much 25.1697 + * better to do it this way as thus we do not have to be aware of 25.1698 + * 'pending' interrupts in the IRQ path, except at this point. 25.1699 + */ 25.1700 +/* 25.1701 + * Edge triggered needs to resend any interrupt 25.1702 + * that was delayed but this is now handled in the device 25.1703 + * independent code. 25.1704 + */ 25.1705 + 25.1706 +/* 25.1707 + * Starting up a edge-triggered IO-APIC interrupt is 25.1708 + * nasty - we need to make sure that we get the edge. 25.1709 + * If it is already asserted for some reason, we need 25.1710 + * return 1 to indicate that is was pending. 25.1711 + * 25.1712 + * This is not complete - we should be able to fake 25.1713 + * an edge even if it isn't on the 8259A... 25.1714 + */ 25.1715 +static unsigned int startup_edge_ioapic_irq(unsigned int irq) 25.1716 +{ 25.1717 + int was_pending = 0; 25.1718 + unsigned long flags; 25.1719 + 25.1720 + spin_lock_irqsave(&ioapic_lock, flags); 25.1721 + if (irq < 16) { 25.1722 + disable_8259A_irq(irq); 25.1723 + if (i8259A_irq_pending(irq)) 25.1724 + was_pending = 1; 25.1725 + } 25.1726 + __unmask_IO_APIC_irq(irq); 25.1727 + spin_unlock_irqrestore(&ioapic_lock, flags); 25.1728 + 25.1729 + return was_pending; 25.1730 +} 25.1731 + 25.1732 +/* 25.1733 + * Once we have recorded IRQ_PENDING already, we can mask the 25.1734 + * interrupt for real. This prevents IRQ storms from unhandled 25.1735 + * devices. 25.1736 + */ 25.1737 +static void ack_edge_ioapic_irq(unsigned int irq) 25.1738 +{ 25.1739 + move_irq(irq); 25.1740 + if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED)) 25.1741 + == (IRQ_PENDING | IRQ_DISABLED)) 25.1742 + mask_IO_APIC_irq(irq); 25.1743 + ack_APIC_irq(); 25.1744 +} 25.1745 + 25.1746 +/* 25.1747 + * Level triggered interrupts can just be masked, 25.1748 + * and shutting down and starting up the interrupt 25.1749 + * is the same as enabling and disabling them -- except 25.1750 + * with a startup need to return a "was pending" value. 25.1751 + * 25.1752 + * Level triggered interrupts are special because we 25.1753 + * do not touch any IO-APIC register while handling 25.1754 + * them. We ack the APIC in the end-IRQ handler, not 25.1755 + * in the start-IRQ-handler. Protection against reentrance 25.1756 + * from the same interrupt is still provided, both by the 25.1757 + * generic IRQ layer and by the fact that an unacked local 25.1758 + * APIC does not accept IRQs. 25.1759 + */ 25.1760 +static unsigned int startup_level_ioapic_irq (unsigned int irq) 25.1761 +{ 25.1762 + unmask_IO_APIC_irq(irq); 25.1763 + 25.1764 + return 0; /* don't check for pending */ 25.1765 +} 25.1766 + 25.1767 +static void end_level_ioapic_irq (unsigned int irq) 25.1768 +{ 25.1769 + unsigned long v; 25.1770 + int i; 25.1771 + 25.1772 + move_irq(irq); 25.1773 +/* 25.1774 + * It appears there is an erratum which affects at least version 0x11 25.1775 + * of I/O APIC (that's the 82093AA and cores integrated into various 25.1776 + * chipsets). Under certain conditions a level-triggered interrupt is 25.1777 + * erroneously delivered as edge-triggered one but the respective IRR 25.1778 + * bit gets set nevertheless. As a result the I/O unit expects an EOI 25.1779 + * message but it will never arrive and further interrupts are blocked 25.1780 + * from the source. The exact reason is so far unknown, but the 25.1781 + * phenomenon was observed when two consecutive interrupt requests 25.1782 + * from a given source get delivered to the same CPU and the source is 25.1783 + * temporarily disabled in between. 25.1784 + * 25.1785 + * A workaround is to simulate an EOI message manually. We achieve it 25.1786 + * by setting the trigger mode to edge and then to level when the edge 25.1787 + * trigger mode gets detected in the TMR of a local APIC for a 25.1788 + * level-triggered interrupt. We mask the source for the time of the 25.1789 + * operation to prevent an edge-triggered interrupt escaping meanwhile. 25.1790 + * The idea is from Manfred Spraul. --macro 25.1791 + */ 25.1792 + i = IO_APIC_VECTOR(irq); 25.1793 + 25.1794 + v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); 25.1795 + 25.1796 + ack_APIC_irq(); 25.1797 + 25.1798 + if (!(v & (1 << (i & 0x1f)))) { 25.1799 + atomic_inc(&irq_mis_count); 25.1800 + spin_lock(&ioapic_lock); 25.1801 + __mask_and_edge_IO_APIC_irq(irq); 25.1802 + __unmask_and_level_IO_APIC_irq(irq); 25.1803 + spin_unlock(&ioapic_lock); 25.1804 + } 25.1805 +} 25.1806 + 25.1807 +#ifdef CONFIG_PCI_MSI 25.1808 +static unsigned int startup_edge_ioapic_vector(unsigned int vector) 25.1809 +{ 25.1810 + int irq = vector_to_irq(vector); 25.1811 + 25.1812 + return startup_edge_ioapic_irq(irq); 25.1813 +} 25.1814 + 25.1815 +static void ack_edge_ioapic_vector(unsigned int vector) 25.1816 +{ 25.1817 + int irq = vector_to_irq(vector); 25.1818 + 25.1819 + ack_edge_ioapic_irq(irq); 25.1820 +} 25.1821 + 25.1822 +static unsigned int startup_level_ioapic_vector (unsigned int vector) 25.1823 +{ 25.1824 + int irq = vector_to_irq(vector); 25.1825 + 25.1826 + return startup_level_ioapic_irq (irq); 25.1827 +} 25.1828 + 25.1829 +static void end_level_ioapic_vector (unsigned int vector) 25.1830 +{ 25.1831 + int irq = vector_to_irq(vector); 25.1832 + 25.1833 + end_level_ioapic_irq(irq); 25.1834 +} 25.1835 + 25.1836 +static void mask_IO_APIC_vector (unsigned int vector) 25.1837 +{ 25.1838 + int irq = vector_to_irq(vector); 25.1839 + 25.1840 + mask_IO_APIC_irq(irq); 25.1841 +} 25.1842 + 25.1843 +static void unmask_IO_APIC_vector (unsigned int vector) 25.1844 +{ 25.1845 + int irq = vector_to_irq(vector); 25.1846 + 25.1847 + unmask_IO_APIC_irq(irq); 25.1848 +} 25.1849 + 25.1850 +static void set_ioapic_affinity_vector (unsigned int vector, 25.1851 + cpumask_t cpu_mask) 25.1852 +{ 25.1853 + int irq = vector_to_irq(vector); 25.1854 + 25.1855 + set_ioapic_affinity_irq(irq, cpu_mask); 25.1856 +} 25.1857 +#endif 25.1858 + 25.1859 +/* 25.1860 + * Level and edge triggered IO-APIC interrupts need different handling, 25.1861 + * so we use two separate IRQ descriptors. Edge triggered IRQs can be 25.1862 + * handled with the level-triggered descriptor, but that one has slightly 25.1863 + * more overhead. Level-triggered interrupts cannot be handled with the 25.1864 + * edge-triggered handler, without risking IRQ storms and other ugly 25.1865 + * races. 25.1866 + */ 25.1867 +static struct hw_interrupt_type ioapic_edge_type = { 25.1868 + .typename = "IO-APIC-edge", 25.1869 + .startup = startup_edge_ioapic, 25.1870 + .shutdown = shutdown_edge_ioapic, 25.1871 + .enable = enable_edge_ioapic, 25.1872 + .disable = disable_edge_ioapic, 25.1873 + .ack = ack_edge_ioapic, 25.1874 + .end = end_edge_ioapic, 25.1875 + .set_affinity = set_ioapic_affinity, 25.1876 +}; 25.1877 + 25.1878 +static struct hw_interrupt_type ioapic_level_type = { 25.1879 + .typename = "IO-APIC-level", 25.1880 + .startup = startup_level_ioapic, 25.1881 + .shutdown = shutdown_level_ioapic, 25.1882 + .enable = enable_level_ioapic, 25.1883 + .disable = disable_level_ioapic, 25.1884 + .ack = mask_and_ack_level_ioapic, 25.1885 + .end = end_level_ioapic, 25.1886 + .set_affinity = set_ioapic_affinity, 25.1887 +}; 25.1888 +#endif /* !CONFIG_XEN */ 25.1889 + 25.1890 +static inline void init_IO_APIC_traps(void) 25.1891 +{ 25.1892 + int irq; 25.1893 + 25.1894 + /* 25.1895 + * NOTE! The local APIC isn't very good at handling 25.1896 + * multiple interrupts at the same interrupt level. 25.1897 + * As the interrupt level is determined by taking the 25.1898 + * vector number and shifting that right by 4, we 25.1899 + * want to spread these out a bit so that they don't 25.1900 + * all fall in the same interrupt level. 25.1901 + * 25.1902 + * Also, we've got to be careful not to trash gate 25.1903 + * 0x80, because int 0x80 is hm, kind of importantish. ;) 25.1904 + */ 25.1905 + for (irq = 0; irq < NR_IRQS ; irq++) { 25.1906 + int tmp = irq; 25.1907 + if (use_pci_vector()) { 25.1908 + if (!platform_legacy_irq(tmp)) 25.1909 + if ((tmp = vector_to_irq(tmp)) == -1) 25.1910 + continue; 25.1911 + } 25.1912 + if (IO_APIC_IRQ(tmp) && !IO_APIC_VECTOR(tmp)) { 25.1913 + /* 25.1914 + * Hmm.. We don't have an entry for this, 25.1915 + * so default to an old-fashioned 8259 25.1916 + * interrupt if we can.. 25.1917 + */ 25.1918 + if (irq < 16) 25.1919 + make_8259A_irq(irq); 25.1920 +#ifndef CONFIG_XEN 25.1921 + else 25.1922 + /* Strange. Oh, well.. */ 25.1923 + irq_desc[irq].handler = &no_irq_type; 25.1924 +#endif 25.1925 + } 25.1926 + } 25.1927 +} 25.1928 + 25.1929 +#ifndef CONFIG_XEN 25.1930 +static void enable_lapic_irq (unsigned int irq) 25.1931 +{ 25.1932 + unsigned long v; 25.1933 + 25.1934 + v = apic_read(APIC_LVT0); 25.1935 + apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED); 25.1936 +} 25.1937 + 25.1938 +static void disable_lapic_irq (unsigned int irq) 25.1939 +{ 25.1940 + unsigned long v; 25.1941 + 25.1942 + v = apic_read(APIC_LVT0); 25.1943 + apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED); 25.1944 +} 25.1945 + 25.1946 +static void ack_lapic_irq (unsigned int irq) 25.1947 +{ 25.1948 + ack_APIC_irq(); 25.1949 +} 25.1950 + 25.1951 +static void end_lapic_irq (unsigned int i) { /* nothing */ } 25.1952 + 25.1953 +static struct hw_interrupt_type lapic_irq_type = { 25.1954 + .typename = "local-APIC-edge", 25.1955 + .startup = NULL, /* startup_irq() not used for IRQ0 */ 25.1956 + .shutdown = NULL, /* shutdown_irq() not used for IRQ0 */ 25.1957 + .enable = enable_lapic_irq, 25.1958 + .disable = disable_lapic_irq, 25.1959 + .ack = ack_lapic_irq, 25.1960 + .end = end_lapic_irq 25.1961 +}; 25.1962 + 25.1963 +static void setup_nmi (void) 25.1964 +{ 25.1965 + /* 25.1966 + * Dirty trick to enable the NMI watchdog ... 25.1967 + * We put the 8259A master into AEOI mode and 25.1968 + * unmask on all local APICs LVT0 as NMI. 25.1969 + * 25.1970 + * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire') 25.1971 + * is from Maciej W. Rozycki - so we do not have to EOI from 25.1972 + * the NMI handler or the timer interrupt. 25.1973 + */ 25.1974 + apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ..."); 25.1975 + 25.1976 + on_each_cpu(enable_NMI_through_LVT0, NULL, 1, 1); 25.1977 + 25.1978 + apic_printk(APIC_VERBOSE, " done.\n"); 25.1979 +} 25.1980 + 25.1981 +/* 25.1982 + * This looks a bit hackish but it's about the only one way of sending 25.1983 + * a few INTA cycles to 8259As and any associated glue logic. ICR does 25.1984 + * not support the ExtINT mode, unfortunately. We need to send these 25.1985 + * cycles as some i82489DX-based boards have glue logic that keeps the 25.1986 + * 8259A interrupt line asserted until INTA. --macro 25.1987 + */ 25.1988 +static inline void unlock_ExtINT_logic(void) 25.1989 +{ 25.1990 + int pin, i; 25.1991 + struct IO_APIC_route_entry entry0, entry1; 25.1992 + unsigned char save_control, save_freq_select; 25.1993 + unsigned long flags; 25.1994 + 25.1995 + pin = find_isa_irq_pin(8, mp_INT); 25.1996 + if (pin == -1) 25.1997 + return; 25.1998 + 25.1999 + spin_lock_irqsave(&ioapic_lock, flags); 25.2000 + *(((int *)&entry0) + 1) = io_apic_read(0, 0x11 + 2 * pin); 25.2001 + *(((int *)&entry0) + 0) = io_apic_read(0, 0x10 + 2 * pin); 25.2002 + spin_unlock_irqrestore(&ioapic_lock, flags); 25.2003 + clear_IO_APIC_pin(0, pin); 25.2004 + 25.2005 + memset(&entry1, 0, sizeof(entry1)); 25.2006 + 25.2007 + entry1.dest_mode = 0; /* physical delivery */ 25.2008 + entry1.mask = 0; /* unmask IRQ now */ 25.2009 + entry1.dest.physical.physical_dest = hard_smp_processor_id(); 25.2010 + entry1.delivery_mode = dest_ExtINT; 25.2011 + entry1.polarity = entry0.polarity; 25.2012 + entry1.trigger = 0; 25.2013 + entry1.vector = 0; 25.2014 + 25.2015 + spin_lock_irqsave(&ioapic_lock, flags); 25.2016 + io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry1) + 1)); 25.2017 + io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry1) + 0)); 25.2018 + spin_unlock_irqrestore(&ioapic_lock, flags); 25.2019 + 25.2020 + save_control = CMOS_READ(RTC_CONTROL); 25.2021 + save_freq_select = CMOS_READ(RTC_FREQ_SELECT); 25.2022 + CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6, 25.2023 + RTC_FREQ_SELECT); 25.2024 + CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL); 25.2025 + 25.2026 + i = 100; 25.2027 + while (i-- > 0) { 25.2028 + mdelay(10); 25.2029 + if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF) 25.2030 + i -= 10; 25.2031 + } 25.2032 + 25.2033 + CMOS_WRITE(save_control, RTC_CONTROL); 25.2034 + CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); 25.2035 + clear_IO_APIC_pin(0, pin); 25.2036 + 25.2037 + spin_lock_irqsave(&ioapic_lock, flags); 25.2038 + io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry0) + 1)); 25.2039 + io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry0) + 0)); 25.2040 + spin_unlock_irqrestore(&ioapic_lock, flags); 25.2041 +} 25.2042 + 25.2043 +/* 25.2044 + * This code may look a bit paranoid, but it's supposed to cooperate with 25.2045 + * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ 25.2046 + * is so screwy. Thanks to Brian Perkins for testing/hacking this beast 25.2047 + * fanatically on his truly buggy board. 25.2048 + */ 25.2049 +static inline void check_timer(void) 25.2050 +{ 25.2051 + int pin1, pin2; 25.2052 + int vector; 25.2053 + 25.2054 + /* 25.2055 + * get/set the timer IRQ vector: 25.2056 + */ 25.2057 + disable_8259A_irq(0); 25.2058 + vector = assign_irq_vector(0); 25.2059 + set_intr_gate(vector, interrupt[0]); 25.2060 + 25.2061 + /* 25.2062 + * Subtle, code in do_timer_interrupt() expects an AEOI 25.2063 + * mode for the 8259A whenever interrupts are routed 25.2064 + * through I/O APICs. Also IRQ0 has to be enabled in 25.2065 + * the 8259A which implies the virtual wire has to be 25.2066 + * disabled in the local APIC. 25.2067 + */ 25.2068 + apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); 25.2069 + init_8259A(1); 25.2070 + timer_ack = 1; 25.2071 + enable_8259A_irq(0); 25.2072 + 25.2073 + pin1 = find_isa_irq_pin(0, mp_INT); 25.2074 + pin2 = find_isa_irq_pin(0, mp_ExtINT); 25.2075 + 25.2076 + printk(KERN_INFO "..TIMER: vector=0x%02X pin1=%d pin2=%d\n", vector, pin1, pin2); 25.2077 + 25.2078 + if (pin1 != -1) { 25.2079 + /* 25.2080 + * Ok, does IRQ0 through the IOAPIC work? 25.2081 + */ 25.2082 + unmask_IO_APIC_irq(0); 25.2083 + if (timer_irq_works()) { 25.2084 + if (nmi_watchdog == NMI_IO_APIC) { 25.2085 + disable_8259A_irq(0); 25.2086 + setup_nmi(); 25.2087 + enable_8259A_irq(0); 25.2088 + check_nmi_watchdog(); 25.2089 + } 25.2090 + return; 25.2091 + } 25.2092 + clear_IO_APIC_pin(0, pin1); 25.2093 + printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n"); 25.2094 + } 25.2095 + 25.2096 + printk(KERN_INFO "...trying to set up timer (IRQ0) through the 8259A ... "); 25.2097 + if (pin2 != -1) { 25.2098 + printk("\n..... (found pin %d) ...", pin2); 25.2099 + /* 25.2100 + * legacy devices should be connected to IO APIC #0 25.2101 + */ 25.2102 + setup_ExtINT_IRQ0_pin(pin2, vector); 25.2103 + if (timer_irq_works()) { 25.2104 + printk("works.\n"); 25.2105 + if (pin1 != -1) 25.2106 + replace_pin_at_irq(0, 0, pin1, 0, pin2); 25.2107 + else 25.2108 + add_pin_to_irq(0, 0, pin2); 25.2109 + if (nmi_watchdog == NMI_IO_APIC) { 25.2110 + setup_nmi(); 25.2111 + check_nmi_watchdog(); 25.2112 + } 25.2113 + return; 25.2114 + } 25.2115 + /* 25.2116 + * Cleanup, just in case ... 25.2117 + */ 25.2118 + clear_IO_APIC_pin(0, pin2); 25.2119 + } 25.2120 + printk(" failed.\n"); 25.2121 + 25.2122 + if (nmi_watchdog == NMI_IO_APIC) { 25.2123 + printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n"); 25.2124 + nmi_watchdog = 0; 25.2125 + } 25.2126 + 25.2127 + printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ..."); 25.2128 + 25.2129 + disable_8259A_irq(0); 25.2130 + irq_desc[0].handler = &lapic_irq_type; 25.2131 + apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */ 25.2132 + enable_8259A_irq(0); 25.2133 + 25.2134 + if (timer_irq_works()) { 25.2135 + printk(" works.\n"); 25.2136 + return; 25.2137 + } 25.2138 + apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector); 25.2139 + printk(" failed.\n"); 25.2140 + 25.2141 + printk(KERN_INFO "...trying to set up timer as ExtINT IRQ..."); 25.2142 + 25.2143 + timer_ack = 0; 25.2144 + init_8259A(0); 25.2145 + make_8259A_irq(0); 25.2146 + apic_write_around(APIC_LVT0, APIC_DM_EXTINT); 25.2147 + 25.2148 + unlock_ExtINT_logic(); 25.2149 + 25.2150 + if (timer_irq_works()) { 25.2151 + printk(" works.\n"); 25.2152 + return; 25.2153 + } 25.2154 + printk(" failed :(.\n"); 25.2155 + panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a " 25.2156 + "report. Then try booting with the 'noapic' option"); 25.2157 +} 25.2158 +#else 25.2159 +#define check_timer() ((void)0) 25.2160 +#endif 25.2161 + 25.2162 +/* 25.2163 + * 25.2164 + * IRQ's that are handled by the PIC in the MPS IOAPIC case. 25.2165 + * - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ. 25.2166 + * Linux doesn't really care, as it's not actually used 25.2167 + * for any interrupt handling anyway. 25.2168 + */ 25.2169 +#define PIC_IRQS (1 << PIC_CASCADE_IR) 25.2170 + 25.2171 +void __init setup_IO_APIC(void) 25.2172 +{ 25.2173 + enable_IO_APIC(); 25.2174 + 25.2175 + if (acpi_ioapic) 25.2176 + io_apic_irqs = ~0; /* all IRQs go through IOAPIC */ 25.2177 + else 25.2178 + io_apic_irqs = ~PIC_IRQS; 25.2179 + 25.2180 + printk("ENABLING IO-APIC IRQs\n"); 25.2181 + 25.2182 + /* 25.2183 + * Set up IO-APIC IRQ routing. 25.2184 + */ 25.2185 + if (!acpi_ioapic) 25.2186 + setup_ioapic_ids_from_mpc(); 25.2187 +#ifndef CONFIG_XEN 25.2188 + sync_Arb_IDs(); 25.2189 +#endif 25.2190 + setup_IO_APIC_irqs(); 25.2191 + init_IO_APIC_traps(); 25.2192 + check_timer(); 25.2193 + if (!acpi_ioapic) 25.2194 + print_IO_APIC(); 25.2195 +} 25.2196 + 25.2197 +/* 25.2198 + * Called after all the initialization is done. If we didnt find any 25.2199 + * APIC bugs then we can allow the modify fast path 25.2200 + */ 25.2201 + 25.2202 +static int __init io_apic_bug_finalize(void) 25.2203 +{ 25.2204 + if(sis_apic_bug == -1) 25.2205 + sis_apic_bug = 0; 25.2206 + return 0; 25.2207 +} 25.2208 + 25.2209 +late_initcall(io_apic_bug_finalize); 25.2210 + 25.2211 +struct sysfs_ioapic_data { 25.2212 + struct sys_device dev; 25.2213 + struct IO_APIC_route_entry entry[0]; 25.2214 +}; 25.2215 +static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS]; 25.2216 + 25.2217 +static int ioapic_suspend(struct sys_device *dev, u32 state) 25.2218 +{ 25.2219 + struct IO_APIC_route_entry *entry; 25.2220 + struct sysfs_ioapic_data *data; 25.2221 + unsigned long flags; 25.2222 + int i; 25.2223 + 25.2224 + data = container_of(dev, struct sysfs_ioapic_data, dev); 25.2225 + entry = data->entry; 25.2226 + spin_lock_irqsave(&ioapic_lock, flags); 25.2227 + for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) { 25.2228 + *(((int *)entry) + 1) = io_apic_read(dev->id, 0x11 + 2 * i); 25.2229 + *(((int *)entry) + 0) = io_apic_read(dev->id, 0x10 + 2 * i); 25.2230 + } 25.2231 + spin_unlock_irqrestore(&ioapic_lock, flags); 25.2232 + 25.2233 + return 0; 25.2234 +} 25.2235 + 25.2236 +static int ioapic_resume(struct sys_device *dev) 25.2237 +{ 25.2238 + struct IO_APIC_route_entry *entry; 25.2239 + struct sysfs_ioapic_data *data; 25.2240 + unsigned long flags; 25.2241 + union IO_APIC_reg_00 reg_00; 25.2242 + int i; 25.2243 + 25.2244 + data = container_of(dev, struct sysfs_ioapic_data, dev); 25.2245 + entry = data->entry; 25.2246 + 25.2247 + spin_lock_irqsave(&ioapic_lock, flags); 25.2248 + reg_00.raw = io_apic_read(dev->id, 0); 25.2249 + if (reg_00.bits.ID != mp_ioapics[dev->id].mpc_apicid) { 25.2250 + reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid; 25.2251 + io_apic_write(dev->id, 0, reg_00.raw); 25.2252 + } 25.2253 + for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) { 25.2254 + io_apic_write(dev->id, 0x11+2*i, *(((int *)entry)+1)); 25.2255 + io_apic_write(dev->id, 0x10+2*i, *(((int *)entry)+0)); 25.2256 + } 25.2257 + spin_unlock_irqrestore(&ioapic_lock, flags); 25.2258 + 25.2259 + return 0; 25.2260 +} 25.2261 + 25.2262 +static struct sysdev_class ioapic_sysdev_class = { 25.2263 + set_kset_name("ioapic"), 25.2264 + .suspend = ioapic_suspend, 25.2265 + .resume = ioapic_resume, 25.2266 +}; 25.2267 + 25.2268 +static int __init ioapic_init_sysfs(void) 25.2269 +{ 25.2270 + struct sys_device * dev; 25.2271 + int i, size, error = 0; 25.2272 + 25.2273 + error = sysdev_class_register(&ioapic_sysdev_class); 25.2274 + if (error) 25.2275 + return error; 25.2276 + 25.2277 + for (i = 0; i < nr_ioapics; i++ ) { 25.2278 + size = sizeof(struct sys_device) + nr_ioapic_registers[i] 25.2279 + * sizeof(struct IO_APIC_route_entry); 25.2280 + mp_ioapic_data[i] = kmalloc(size, GFP_KERNEL); 25.2281 + if (!mp_ioapic_data[i]) { 25.2282 + printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i); 25.2283 + continue; 25.2284 + } 25.2285 + memset(mp_ioapic_data[i], 0, size); 25.2286 + dev = &mp_ioapic_data[i]->dev; 25.2287 + dev->id = i; 25.2288 + dev->cls = &ioapic_sysdev_class; 25.2289 + error = sysdev_register(dev); 25.2290 + if (error) { 25.2291 + kfree(mp_ioapic_data[i]); 25.2292 + mp_ioapic_data[i] = NULL; 25.2293 + printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i); 25.2294 + continue; 25.2295 + } 25.2296 + } 25.2297 + 25.2298 + return 0; 25.2299 +} 25.2300 + 25.2301 +device_initcall(ioapic_init_sysfs); 25.2302 + 25.2303 +/* -------------------------------------------------------------------------- 25.2304 + ACPI-based IOAPIC Configuration 25.2305 + -------------------------------------------------------------------------- */ 25.2306 + 25.2307 #ifdef CONFIG_ACPI_BOOT 25.2308 + 25.2309 int __init io_apic_get_unique_id (int ioapic, int apic_id) 25.2310 { 25.2311 +#ifndef CONFIG_XEN 25.2312 union IO_APIC_reg_00 reg_00; 25.2313 static physid_mask_t apic_id_map = PHYSID_MASK_NONE; 25.2314 + physid_mask_t tmp; 25.2315 unsigned long flags; 25.2316 + int i = 0; 25.2317 25.2318 /* 25.2319 * The P4 platform supports up to 256 APIC IDs on two separate APIC 25.2320 @@ -306,8 +2485,45 @@ int __init io_apic_get_unique_id (int io 25.2321 apic_id = reg_00.bits.ID; 25.2322 } 25.2323 25.2324 + /* 25.2325 + * Every APIC in a system must have a unique ID or we get lots of nice 25.2326 + * 'stuck on smp_invalidate_needed IPI wait' messages. 25.2327 + */ 25.2328 + if (check_apicid_used(apic_id_map, apic_id)) { 25.2329 + 25.2330 + for (i = 0; i < get_physical_broadcast(); i++) { 25.2331 + if (!check_apicid_used(apic_id_map, i)) 25.2332 + break; 25.2333 + } 25.2334 + 25.2335 + if (i == get_physical_broadcast()) 25.2336 + panic("Max apic_id exceeded!\n"); 25.2337 + 25.2338 + printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, " 25.2339 + "trying %d\n", ioapic, apic_id, i); 25.2340 + 25.2341 + apic_id = i; 25.2342 + } 25.2343 + 25.2344 + tmp = apicid_to_cpu_present(apic_id); 25.2345 + physids_or(apic_id_map, apic_id_map, tmp); 25.2346 + 25.2347 + if (reg_00.bits.ID != apic_id) { 25.2348 + reg_00.bits.ID = apic_id; 25.2349 + 25.2350 + spin_lock_irqsave(&ioapic_lock, flags); 25.2351 + io_apic_write(ioapic, 0, reg_00.raw); 25.2352 + reg_00.raw = io_apic_read(ioapic, 0); 25.2353 + spin_unlock_irqrestore(&ioapic_lock, flags); 25.2354 + 25.2355 + /* Sanity check */ 25.2356 + if (reg_00.bits.ID != apic_id) 25.2357 + panic("IOAPIC[%d]: Unable change apic_id!\n", ioapic); 25.2358 + } 25.2359 + 25.2360 apic_printk(APIC_VERBOSE, KERN_INFO 25.2361 "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id); 25.2362 +#endif /* !CONFIG_XEN */ 25.2363 25.2364 return apic_id; 25.2365 } 25.2366 @@ -338,6 +2554,7 @@ int __init io_apic_get_redir_entries (in 25.2367 return reg_01.bits.entries; 25.2368 } 25.2369 25.2370 + 25.2371 int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low) 25.2372 { 25.2373 struct IO_APIC_route_entry entry; 25.2374 @@ -377,12 +2594,10 @@ int io_apic_set_pci_routing (int ioapic, 25.2375 mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq, 25.2376 edge_level, active_high_low); 25.2377 25.2378 -#ifndef CONFIG_XEN 25.2379 ioapic_register_intr(irq, entry.vector, edge_level); 25.2380 25.2381 if (!ioapic && (irq < 16)) 25.2382 disable_8259A_irq(irq); 25.2383 -#endif 25.2384 25.2385 spin_lock_irqsave(&ioapic_lock, flags); 25.2386 io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1)); 25.2387 @@ -391,4 +2606,5 @@ int io_apic_set_pci_routing (int ioapic, 25.2388 25.2389 return 0; 25.2390 } 25.2391 + 25.2392 #endif /*CONFIG_ACPI_BOOT*/
26.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/mpparse.c Tue May 03 14:42:00 2005 +0000 26.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/mpparse.c Tue May 10 08:49:59 2005 +0000 26.3 @@ -109,7 +109,7 @@ static int MP_valid_apicid(int apicid, i 26.4 { 26.5 return hweight_long(apicid & 0xf) == 1 && (apicid >> 4) != 0xf; 26.6 } 26.7 -#else 26.8 +#elif !defined(CONFIG_XEN) 26.9 static int MP_valid_apicid(int apicid, int version) 26.10 { 26.11 if (version >= 0x14) 26.12 @@ -119,6 +119,7 @@ static int MP_valid_apicid(int apicid, i 26.13 } 26.14 #endif 26.15 26.16 +#ifndef CONFIG_XEN 26.17 void __init MP_processor_info (struct mpc_config_processor *m) 26.18 { 26.19 int ver, apicid; 26.20 @@ -217,6 +218,12 @@ void __init MP_processor_info (struct mp 26.21 apic_version[m->mpc_apicid] = ver; 26.22 bios_cpu_apicid[num_processors - 1] = m->mpc_apicid; 26.23 } 26.24 +#else 26.25 +void __init MP_processor_info (struct mpc_config_processor *m) 26.26 +{ 26.27 + num_processors++; 26.28 +} 26.29 +#endif /* CONFIG_XEN */ 26.30 26.31 static void __init MP_bus_info (struct mpc_config_bus *m) 26.32 { 26.33 @@ -690,7 +697,7 @@ void __init get_smp_config (void) 26.34 * Read the physical hardware table. Anything here will 26.35 * override the defaults. 26.36 */ 26.37 - if (!smp_read_mpc((void *)mpf->mpf_physptr)) { 26.38 + if (!smp_read_mpc(isa_bus_to_virt(mpf->mpf_physptr))) { 26.39 smp_found_config = 0; 26.40 printk(KERN_ERR "BIOS bug, MP table errors detected!...\n"); 26.41 printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n"); 26.42 @@ -743,7 +750,6 @@ static int __init smp_scan_config (unsig 26.43 smp_found_config = 1; 26.44 printk(KERN_INFO "found SMP MP-table at %08lx\n", 26.45 virt_to_phys(mpf)); 26.46 - reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE); 26.47 if (mpf->mpf_physptr) { 26.48 /* 26.49 * We cannot access to MPC table to compute 26.50 @@ -817,12 +823,14 @@ void __init find_smp_config (void) 26.51 void __init mp_register_lapic_address ( 26.52 u64 address) 26.53 { 26.54 +#ifndef CONFIG_XEN 26.55 mp_lapic_addr = (unsigned long) address; 26.56 26.57 if (boot_cpu_physical_apicid == -1U) 26.58 boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); 26.59 26.60 Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid); 26.61 +#endif 26.62 } 26.63 26.64 26.65 @@ -842,6 +850,7 @@ void __init mp_register_lapic ( 26.66 if (id == boot_cpu_physical_apicid) 26.67 boot_cpu = 1; 26.68 26.69 +#ifndef CONFIG_XEN 26.70 processor.mpc_type = MP_PROCESSOR; 26.71 processor.mpc_apicid = id; 26.72 processor.mpc_apicver = GET_APIC_VERSION(apic_read(APIC_LVR)); 26.73 @@ -852,6 +861,7 @@ void __init mp_register_lapic ( 26.74 processor.mpc_featureflag = boot_cpu_data.x86_capability[0]; 26.75 processor.mpc_reserved[0] = 0; 26.76 processor.mpc_reserved[1] = 0; 26.77 +#endif 26.78 26.79 MP_processor_info(&processor); 26.80 }
27.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/setup.c Tue May 03 14:42:00 2005 +0000 27.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/setup.c Tue May 10 08:49:59 2005 +0000 27.3 @@ -779,7 +779,7 @@ static void __init parse_cmdline_early ( 27.4 noexec_setup(from + 7); 27.5 27.6 27.7 -#ifdef CONFIG_X86_SMP 27.8 +#ifdef CONFIG_X86_MPPARSE 27.9 /* 27.10 * If the BIOS enumerates physical processors before logical, 27.11 * maxcpus=N at enumeration-time can be used to disable HT. 27.12 @@ -1133,12 +1133,6 @@ static unsigned long __init setup_memory 27.13 */ 27.14 acpi_reserve_bootmem(); 27.15 #endif 27.16 -#ifdef CONFIG_X86_FIND_SMP_CONFIG 27.17 - /* 27.18 - * Find and reserve possible boot-time SMP configuration: 27.19 - */ 27.20 - find_smp_config(); 27.21 -#endif 27.22 27.23 #ifdef CONFIG_BLK_DEV_INITRD 27.24 if (xen_start_info.mod_start) { 27.25 @@ -1503,6 +1497,13 @@ void __init setup_arch(char **cmdline_p) 27.26 #endif 27.27 paging_init(); 27.28 27.29 +#ifdef CONFIG_X86_FIND_SMP_CONFIG 27.30 + /* 27.31 + * Find and reserve possible boot-time SMP configuration: 27.32 + */ 27.33 + find_smp_config(); 27.34 +#endif 27.35 + 27.36 /* Make sure we have a correctly sized P->M table. */ 27.37 if (max_pfn != xen_start_info.nr_pages) { 27.38 phys_to_machine_mapping = alloc_bootmem_low_pages( 27.39 @@ -1566,6 +1567,10 @@ void __init setup_arch(char **cmdline_p) 27.40 if (efi_enabled) 27.41 efi_map_memmap(); 27.42 27.43 + op.cmd = PHYSDEVOP_SET_IOPL; 27.44 + op.u.set_iopl.iopl = current->thread.io_pl = 1; 27.45 + HYPERVISOR_physdev_op(&op); 27.46 + 27.47 /* 27.48 * Parse the ACPI tables for possible boot-time SMP configuration. 27.49 */ 27.50 @@ -1583,10 +1588,6 @@ void __init setup_arch(char **cmdline_p) 27.51 27.52 register_memory(); 27.53 27.54 - op.cmd = PHYSDEVOP_SET_IOPL; 27.55 - op.u.set_iopl.iopl = current->thread.io_pl = 1; 27.56 - HYPERVISOR_physdev_op(&op); 27.57 - 27.58 if (xen_start_info.flags & SIF_INITDOMAIN) { 27.59 if (!(xen_start_info.flags & SIF_PRIVILEGED)) 27.60 panic("Xen granted us console access "
28.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/mm/init.c Tue May 03 14:42:00 2005 +0000 28.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/init.c Tue May 10 08:49:59 2005 +0000 28.3 @@ -361,6 +361,7 @@ static void __init pagetable_init (void) 28.4 make_page_writable(old_pgd); 28.5 __flush_tlb_all(); 28.6 free_bootmem(__pa(old_pgd), PAGE_SIZE); 28.7 + init_mm.context.pinned = 1; 28.8 28.9 kernel_physical_mapping_init(pgd_base); 28.10 remap_numa_kva(); 28.11 @@ -562,7 +563,7 @@ void __init paging_init(void) 28.12 zone_sizes_init(); 28.13 28.14 /* Switch to the real shared_info page, and clear the dummy page. */ 28.15 - set_fixmap_ma(FIX_SHARED_INFO, xen_start_info.shared_info); 28.16 + set_fixmap(FIX_SHARED_INFO, xen_start_info.shared_info); 28.17 HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO); 28.18 memset(empty_zero_page, 0, sizeof(empty_zero_page)); 28.19 28.20 @@ -570,10 +571,11 @@ void __init paging_init(void) 28.21 /* Setup mapping of lower 1st MB */ 28.22 for (i = 0; i < NR_FIX_ISAMAPS; i++) 28.23 if (xen_start_info.flags & SIF_PRIVILEGED) 28.24 - set_fixmap_ma(FIX_ISAMAP_BEGIN - i, i * PAGE_SIZE); 28.25 + set_fixmap(FIX_ISAMAP_BEGIN - i, i * PAGE_SIZE); 28.26 else 28.27 - set_fixmap_ma_ro(FIX_ISAMAP_BEGIN - i, 28.28 - virt_to_machine(empty_zero_page)); 28.29 + __set_fixmap(FIX_ISAMAP_BEGIN - i, 28.30 + virt_to_machine(empty_zero_page), 28.31 + PAGE_KERNEL_RO); 28.32 #endif 28.33 } 28.34
29.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/mm/ioremap.c Tue May 03 14:42:00 2005 +0000 29.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/ioremap.c Tue May 10 08:49:59 2005 +0000 29.3 @@ -256,7 +256,7 @@ void __init *bt_ioremap(unsigned long ph 29.4 */ 29.5 idx = FIX_BTMAP_BEGIN; 29.6 while (nrpages > 0) { 29.7 - set_fixmap_ma(idx, phys_addr); 29.8 + set_fixmap(idx, phys_addr); 29.9 phys_addr += PAGE_SIZE; 29.10 --idx; 29.11 --nrpages;
30.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/mm/pgtable.c Tue May 03 14:42:00 2005 +0000 30.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/pgtable.c Tue May 10 08:49:59 2005 +0000 30.3 @@ -22,6 +22,7 @@ 30.4 #include <asm/tlb.h> 30.5 #include <asm/tlbflush.h> 30.6 #include <asm/io.h> 30.7 +#include <asm/mmu_context.h> 30.8 30.9 #include <asm-xen/foreign_page.h> 30.10 30.11 @@ -176,18 +177,18 @@ void __set_fixmap (enum fixed_addresses 30.12 BUG(); 30.13 return; 30.14 } 30.15 - set_pte_pfn(address, phys >> PAGE_SHIFT, flags); 30.16 -} 30.17 - 30.18 -void __set_fixmap_ma (enum fixed_addresses idx, unsigned long phys, pgprot_t flags) 30.19 -{ 30.20 - unsigned long address = __fix_to_virt(idx); 30.21 - 30.22 - if (idx >= __end_of_fixed_addresses) { 30.23 - BUG(); 30.24 - return; 30.25 + switch (idx) { 30.26 + case FIX_WP_TEST: 30.27 + case FIX_VSYSCALL: 30.28 +#ifdef CONFIG_X86_F00F_BUG 30.29 + case FIX_F00F_IDT: 30.30 +#endif 30.31 + set_pte_pfn(address, phys >> PAGE_SHIFT, flags); 30.32 + break; 30.33 + default: 30.34 + set_pte_pfn_ma(address, phys >> PAGE_SHIFT, flags); 30.35 + break; 30.36 } 30.37 - set_pte_pfn_ma(address, phys >> PAGE_SHIFT, flags); 30.38 } 30.39 30.40 pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) 30.41 @@ -480,7 +481,6 @@ void mm_unpin(struct mm_struct *mm) 30.42 30.43 void _arch_exit_mmap(struct mm_struct *mm) 30.44 { 30.45 - unsigned int cpu = smp_processor_id(); 30.46 struct task_struct *tsk = current; 30.47 30.48 task_lock(tsk); 30.49 @@ -494,9 +494,7 @@ void _arch_exit_mmap(struct mm_struct *m 30.50 tsk->active_mm = &init_mm; 30.51 atomic_inc(&init_mm.mm_count); 30.52 30.53 - cpu_set(cpu, init_mm.cpu_vm_mask); 30.54 - load_cr3(swapper_pg_dir); 30.55 - cpu_clear(cpu, mm->cpu_vm_mask); 30.56 + switch_mm(mm, &init_mm, tsk); 30.57 30.58 atomic_dec(&mm->mm_count); 30.59 BUG_ON(atomic_read(&mm->mm_count) == 0);
31.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/pci/Makefile Tue May 03 14:42:00 2005 +0000 31.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/pci/Makefile Tue May 10 08:49:59 2005 +0000 31.3 @@ -11,7 +11,8 @@ c-obj-$(CONFIG_PCI_DIRECT) += direct.o 31.4 c-pci-y := fixup.o 31.5 c-pci-$(CONFIG_ACPI_PCI) += acpi.o 31.6 c-pci-y += legacy.o 31.7 -c-pci-y += irq.o 31.8 +# Make sure irq.o gets linked in after legacy.o 31.9 +l-pci-y += irq.o 31.10 31.11 c-pci-$(CONFIG_X86_VISWS) := visws.o fixup.o 31.12 pci-$(CONFIG_X86_VISWS) := 31.13 @@ -26,6 +27,6 @@ c-link := 31.14 $(patsubst %.o,$(obj)/%.c,$(c-obj-y) $(c-link)): 31.15 @ln -fsn $(srctree)/arch/i386/pci/$(notdir $@) $@ 31.16 31.17 -obj-y += $(c-obj-y) 31.18 +obj-y += $(c-obj-y) $(l-pci-y) 31.19 31.20 clean-files += $(patsubst %.o,%.c,$(c-obj-y) $(c-obj-) $(c-link))
32.1 --- a/linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c Tue May 03 14:42:00 2005 +0000 32.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c Tue May 10 08:49:59 2005 +0000 32.3 @@ -20,10 +20,6 @@ 32.4 #include <asm-xen/linux-public/privcmd.h> 32.5 #include <asm-xen/gnttab.h> 32.6 32.7 -#ifndef set_fixmap_ma 32.8 -#define set_fixmap_ma set_fixmap 32.9 -#endif 32.10 - 32.11 #if 1 32.12 #define ASSERT(_p) \ 32.13 if ( !(_p) ) { printk(KERN_ALERT"Assertion '%s': line %d, file %s\n", \ 32.14 @@ -339,7 +335,7 @@ int gnttab_resume(void) 32.15 BUG_ON(setup.status != 0); 32.16 32.17 for ( i = 0; i < NR_GRANT_FRAMES; i++ ) 32.18 - set_fixmap_ma(FIX_GNTTAB_END - i, frames[i] << PAGE_SHIFT); 32.19 + set_fixmap(FIX_GNTTAB_END - i, frames[i] << PAGE_SHIFT); 32.20 32.21 return 0; 32.22 }
33.1 --- a/linux-2.6.11-xen-sparse/arch/xen/kernel/reboot.c Tue May 03 14:42:00 2005 +0000 33.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/kernel/reboot.c Tue May 10 08:49:59 2005 +0000 33.3 @@ -129,11 +129,7 @@ static void __do_suspend(void) 33.4 memcpy(&xen_start_info, &suspend_record->resume_info, 33.5 sizeof(xen_start_info)); 33.6 33.7 -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) 33.8 - set_fixmap_ma(FIX_SHARED_INFO, xen_start_info.shared_info); 33.9 -#else 33.10 set_fixmap(FIX_SHARED_INFO, xen_start_info.shared_info); 33.11 -#endif 33.12 33.13 HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO); 33.14
34.1 --- a/linux-2.6.11-xen-sparse/arch/xen/x86_64/mm/init.c Tue May 03 14:42:00 2005 +0000 34.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/mm/init.c Tue May 10 08:49:59 2005 +0000 34.3 @@ -361,7 +361,14 @@ void __set_fixmap (enum fixed_addresses 34.4 printk("Invalid __set_fixmap\n"); 34.5 return; 34.6 } 34.7 - set_pte_phys(address, phys, prot, SET_FIXMAP_KERNEL); 34.8 + switch (idx) { 34.9 + case VSYSCALL_FIRST_PAGE: 34.10 + set_pte_phys(address, phys, prot, SET_FIXMAP_KERNEL); 34.11 + break; 34.12 + default: 34.13 + set_pte_phys_ma(address, phys, prot); 34.14 + break; 34.15 + } 34.16 } 34.17 34.18 34.19 @@ -414,18 +421,6 @@ void __set_fixmap_user (enum fixed_addre 34.20 34.21 } 34.22 34.23 - 34.24 -void __set_fixmap_ma (enum fixed_addresses idx, unsigned long phys, pgprot_t prot) 34.25 -{ 34.26 - unsigned long address = __fix_to_virt(idx); 34.27 - 34.28 - if (idx >= __end_of_fixed_addresses) { 34.29 - printk("Invalid __set_fixmap\n"); 34.30 - return; 34.31 - } 34.32 - set_pte_phys_ma(address, phys, prot); 34.33 -} 34.34 - 34.35 unsigned long __initdata table_start, table_end, tables_reserved; 34.36 34.37 #if 0 34.38 @@ -632,8 +627,7 @@ void __init paging_init(void) 34.39 free_area_init(zones_size); 34.40 } 34.41 34.42 - __set_fixmap_ma(FIX_SHARED_INFO, xen_start_info.shared_info, 34.43 - __pgprot(_KERNPG_TABLE)); 34.44 + set_fixmap(FIX_SHARED_INFO, xen_start_info.shared_info); 34.45 HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO); 34.46 34.47 memset(empty_zero_page, 0, sizeof(empty_zero_page)); 34.48 @@ -642,11 +636,11 @@ void __init paging_init(void) 34.49 /* Setup mapping of lower 1st MB */ 34.50 for (i = 0; i < NR_FIX_ISAMAPS; i++) 34.51 if (xen_start_info.flags & SIF_PRIVILEGED) 34.52 - __set_fixmap_ma(FIX_ISAMAP_BEGIN - i, i * PAGE_SIZE, 34.53 - __pgprot(_KERNPG_TABLE)); 34.54 + set_fixmap(FIX_ISAMAP_BEGIN - i, i * PAGE_SIZE); 34.55 else 34.56 - set_fixmap_ma_ro(FIX_ISAMAP_BEGIN - i, 34.57 - virt_to_machine(empty_zero_page)); 34.58 + __set_fixmap(FIX_ISAMAP_BEGIN - i, 34.59 + virt_to_machine(empty_zero_page), 34.60 + PAGE_KERNEL_RO); 34.61 #endif 34.62 34.63 }
35.1 --- a/linux-2.6.11-xen-sparse/arch/xen/x86_64/mm/ioremap.c Tue May 03 14:42:00 2005 +0000 35.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/mm/ioremap.c Tue May 10 08:49:59 2005 +0000 35.3 @@ -273,7 +273,7 @@ void __init *bt_ioremap(unsigned long ph 35.4 */ 35.5 idx = FIX_BTMAP_BEGIN; 35.6 while (nrpages > 0) { 35.7 - set_fixmap_ma(idx, phys_addr); 35.8 + set_fixmap(idx, phys_addr); 35.9 phys_addr += PAGE_SIZE; 35.10 --idx; 35.11 --nrpages;
36.1 --- a/linux-2.6.11-xen-sparse/arch/xen/x86_64/pci/Makefile Tue May 03 14:42:00 2005 +0000 36.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/pci/Makefile Tue May 10 08:49:59 2005 +0000 36.3 @@ -12,7 +12,7 @@ c-obj-y := i386.o 36.4 c-obj-y += fixup.o 36.5 c-obj-$(CONFIG_ACPI_PCI) += acpi.o 36.6 c-obj-y += legacy.o common.o 36.7 -c-xen-obj-$(CONFIG_PCI_DIRECT)+= direct.o 36.8 +c-obj-$(CONFIG_PCI_DIRECT)+= direct.o 36.9 c-xen-obj-y += irq.o 36.10 # mmconfig has a 64bit special 36.11 c-obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o
37.1 --- a/linux-2.6.11-xen-sparse/drivers/acpi/tables.c Tue May 03 14:42:00 2005 +0000 37.2 +++ b/linux-2.6.11-xen-sparse/drivers/acpi/tables.c Tue May 10 08:49:59 2005 +0000 37.3 @@ -581,8 +581,8 @@ acpi_table_init (void) 37.4 return -ENODEV; 37.5 } 37.6 37.7 - rsdp = (struct acpi_table_rsdp *) (__fix_to_virt(FIX_ACPI_RSDP_PAGE) 37.8 - + (rsdp_phys & ~PAGE_MASK)); 37.9 + rsdp = (struct acpi_table_rsdp *) (__fix_to_virt(FIX_ACPI_RSDP_PAGE) + 37.10 + (rsdp_phys & ~PAGE_MASK)); 37.11 if (!rsdp) { 37.12 printk(KERN_WARNING PREFIX "Unable to map RSDP\n"); 37.13 return -ENODEV;
38.1 --- a/linux-2.6.11-xen-sparse/drivers/xen/Makefile Tue May 03 14:42:00 2005 +0000 38.2 +++ b/linux-2.6.11-xen-sparse/drivers/xen/Makefile Tue May 10 08:49:59 2005 +0000 38.3 @@ -3,8 +3,8 @@ 38.4 obj-y += console/ 38.5 obj-y += evtchn/ 38.6 obj-y += balloon/ 38.7 +obj-y += privcmd/ 38.8 38.9 -obj-$(CONFIG_XEN_PRIVILEGED_GUEST) += privcmd/ 38.10 obj-$(CONFIG_XEN_BLKDEV_BACKEND) += blkback/ 38.11 obj-$(CONFIG_XEN_NETDEV_BACKEND) += netback/ 38.12 obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += blkfront/
39.1 --- a/linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c Tue May 03 14:42:00 2005 +0000 39.2 +++ b/linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c Tue May 10 08:49:59 2005 +0000 39.3 @@ -50,6 +50,7 @@ 39.4 #include <asm-xen/xen-public/io/netif.h> 39.5 #include <asm-xen/balloon.h> 39.6 #include <asm/page.h> 39.7 +#include <asm/uaccess.h> 39.8 39.9 #ifndef __GFP_NOWARN 39.10 #define __GFP_NOWARN 0
40.1 --- a/linux-2.6.11-xen-sparse/drivers/xen/privcmd/privcmd.c Tue May 03 14:42:00 2005 +0000 40.2 +++ b/linux-2.6.11-xen-sparse/drivers/xen/privcmd/privcmd.c Tue May 10 08:49:59 2005 +0000 40.3 @@ -219,9 +219,6 @@ static struct file_operations privcmd_fi 40.4 40.5 static int __init privcmd_init(void) 40.6 { 40.7 - if ( !(xen_start_info.flags & SIF_PRIVILEGED) ) 40.8 - return 0; 40.9 - 40.10 privcmd_intf = create_xen_proc_entry("privcmd", 0400); 40.11 if ( privcmd_intf != NULL ) 40.12 privcmd_intf->proc_fops = &privcmd_file_ops;
41.1 --- a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/fixmap.h Tue May 03 14:42:00 2005 +0000 41.2 +++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/fixmap.h Tue May 10 08:49:59 2005 +0000 41.3 @@ -80,7 +80,7 @@ enum fixed_addresses { 41.4 #ifdef CONFIG_ACPI_BOOT 41.5 FIX_ACPI_BEGIN, 41.6 FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1, 41.7 - FIX_ACPI_RSDP_PAGE, 41.8 + FIX_ACPI_RSDP_PAGE, 41.9 #endif 41.10 #ifdef CONFIG_PCI_MMCONFIG 41.11 FIX_PCIE_MCFG, 41.12 @@ -104,15 +104,9 @@ enum fixed_addresses { 41.13 41.14 extern void __set_fixmap (enum fixed_addresses idx, 41.15 unsigned long phys, pgprot_t flags); 41.16 -extern void __set_fixmap_ma (enum fixed_addresses idx, 41.17 - unsigned long mach, pgprot_t flags); 41.18 41.19 #define set_fixmap(idx, phys) \ 41.20 __set_fixmap(idx, phys, PAGE_KERNEL) 41.21 -#define set_fixmap_ma(idx, phys) \ 41.22 - __set_fixmap_ma(idx, phys, PAGE_KERNEL) 41.23 -#define set_fixmap_ma_ro(idx, phys) \ 41.24 - __set_fixmap_ma(idx, phys, PAGE_KERNEL_RO) 41.25 /* 41.26 * Some hardware wants to get fixmapped without caching. 41.27 */
42.1 --- a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/io_apic.h Tue May 03 14:42:00 2005 +0000 42.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 42.3 @@ -1,230 +0,0 @@ 42.4 -#ifndef __ASM_IO_APIC_H 42.5 -#define __ASM_IO_APIC_H 42.6 - 42.7 -#include <linux/config.h> 42.8 -#include <asm/types.h> 42.9 -#include <asm/mpspec.h> 42.10 - 42.11 -#include <asm-xen/xen-public/xen.h> 42.12 -#include <asm-xen/xen-public/physdev.h> 42.13 - 42.14 -/* 42.15 - * Intel IO-APIC support for SMP and UP systems. 42.16 - * 42.17 - * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar 42.18 - */ 42.19 - 42.20 -#ifdef CONFIG_X86_IO_APIC 42.21 - 42.22 -#ifdef CONFIG_PCI_MSI 42.23 -static inline int use_pci_vector(void) {return 1;} 42.24 -static inline void disable_edge_ioapic_vector(unsigned int vector) { } 42.25 -static inline void mask_and_ack_level_ioapic_vector(unsigned int vector) { } 42.26 -static inline void end_edge_ioapic_vector (unsigned int vector) { } 42.27 -#define startup_level_ioapic startup_level_ioapic_vector 42.28 -#define shutdown_level_ioapic mask_IO_APIC_vector 42.29 -#define enable_level_ioapic unmask_IO_APIC_vector 42.30 -#define disable_level_ioapic mask_IO_APIC_vector 42.31 -#define mask_and_ack_level_ioapic mask_and_ack_level_ioapic_vector 42.32 -#define end_level_ioapic end_level_ioapic_vector 42.33 -#define set_ioapic_affinity set_ioapic_affinity_vector 42.34 - 42.35 -#define startup_edge_ioapic startup_edge_ioapic_vector 42.36 -#define shutdown_edge_ioapic disable_edge_ioapic_vector 42.37 -#define enable_edge_ioapic unmask_IO_APIC_vector 42.38 -#define disable_edge_ioapic disable_edge_ioapic_vector 42.39 -#define ack_edge_ioapic ack_edge_ioapic_vector 42.40 -#define end_edge_ioapic end_edge_ioapic_vector 42.41 -#else 42.42 -static inline int use_pci_vector(void) {return 0;} 42.43 -static inline void disable_edge_ioapic_irq(unsigned int irq) { } 42.44 -static inline void mask_and_ack_level_ioapic_irq(unsigned int irq) { } 42.45 -static inline void end_edge_ioapic_irq (unsigned int irq) { } 42.46 -#define startup_level_ioapic startup_level_ioapic_irq 42.47 -#define shutdown_level_ioapic mask_IO_APIC_irq 42.48 -#define enable_level_ioapic unmask_IO_APIC_irq 42.49 -#define disable_level_ioapic mask_IO_APIC_irq 42.50 -#define mask_and_ack_level_ioapic mask_and_ack_level_ioapic_irq 42.51 -#define end_level_ioapic end_level_ioapic_irq 42.52 -#define set_ioapic_affinity set_ioapic_affinity_irq 42.53 - 42.54 -#define startup_edge_ioapic startup_edge_ioapic_irq 42.55 -#define shutdown_edge_ioapic disable_edge_ioapic_irq 42.56 -#define enable_edge_ioapic unmask_IO_APIC_irq 42.57 -#define disable_edge_ioapic disable_edge_ioapic_irq 42.58 -#define ack_edge_ioapic ack_edge_ioapic_irq 42.59 -#define end_edge_ioapic end_edge_ioapic_irq 42.60 -#endif 42.61 - 42.62 -#define IO_APIC_BASE(idx) \ 42.63 - ((volatile int *)(__fix_to_virt(FIX_IO_APIC_BASE_0 + idx) \ 42.64 - + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK))) 42.65 - 42.66 -/* 42.67 - * The structure of the IO-APIC: 42.68 - */ 42.69 -union IO_APIC_reg_00 { 42.70 - u32 raw; 42.71 - struct { 42.72 - u32 __reserved_2 : 14, 42.73 - LTS : 1, 42.74 - delivery_type : 1, 42.75 - __reserved_1 : 8, 42.76 - ID : 8; 42.77 - } __attribute__ ((packed)) bits; 42.78 -}; 42.79 - 42.80 -union IO_APIC_reg_01 { 42.81 - u32 raw; 42.82 - struct { 42.83 - u32 version : 8, 42.84 - __reserved_2 : 7, 42.85 - PRQ : 1, 42.86 - entries : 8, 42.87 - __reserved_1 : 8; 42.88 - } __attribute__ ((packed)) bits; 42.89 -}; 42.90 - 42.91 -union IO_APIC_reg_02 { 42.92 - u32 raw; 42.93 - struct { 42.94 - u32 __reserved_2 : 24, 42.95 - arbitration : 4, 42.96 - __reserved_1 : 4; 42.97 - } __attribute__ ((packed)) bits; 42.98 -}; 42.99 - 42.100 -union IO_APIC_reg_03 { 42.101 - u32 raw; 42.102 - struct { 42.103 - u32 boot_DT : 1, 42.104 - __reserved_1 : 31; 42.105 - } __attribute__ ((packed)) bits; 42.106 -}; 42.107 - 42.108 -/* 42.109 - * # of IO-APICs and # of IRQ routing registers 42.110 - */ 42.111 -extern int nr_ioapics; 42.112 -extern int nr_ioapic_registers[MAX_IO_APICS]; 42.113 - 42.114 -enum ioapic_irq_destination_types { 42.115 - dest_Fixed = 0, 42.116 - dest_LowestPrio = 1, 42.117 - dest_SMI = 2, 42.118 - dest__reserved_1 = 3, 42.119 - dest_NMI = 4, 42.120 - dest_INIT = 5, 42.121 - dest__reserved_2 = 6, 42.122 - dest_ExtINT = 7 42.123 -}; 42.124 - 42.125 -struct IO_APIC_route_entry { 42.126 - __u32 vector : 8, 42.127 - delivery_mode : 3, /* 000: FIXED 42.128 - * 001: lowest prio 42.129 - * 111: ExtINT 42.130 - */ 42.131 - dest_mode : 1, /* 0: physical, 1: logical */ 42.132 - delivery_status : 1, 42.133 - polarity : 1, 42.134 - irr : 1, 42.135 - trigger : 1, /* 0: edge, 1: level */ 42.136 - mask : 1, /* 0: enabled, 1: disabled */ 42.137 - __reserved_2 : 15; 42.138 - 42.139 - union { struct { __u32 42.140 - __reserved_1 : 24, 42.141 - physical_dest : 4, 42.142 - __reserved_2 : 4; 42.143 - } physical; 42.144 - 42.145 - struct { __u32 42.146 - __reserved_1 : 24, 42.147 - logical_dest : 8; 42.148 - } logical; 42.149 - } dest; 42.150 - 42.151 -} __attribute__ ((packed)); 42.152 - 42.153 -/* 42.154 - * MP-BIOS irq configuration table structures: 42.155 - */ 42.156 - 42.157 -/* I/O APIC entries */ 42.158 -extern struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS]; 42.159 - 42.160 -/* # of MP IRQ source entries */ 42.161 -extern int mp_irq_entries; 42.162 - 42.163 -/* MP IRQ source entries */ 42.164 -extern struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; 42.165 - 42.166 -/* non-0 if default (table-less) MP configuration */ 42.167 -extern int mpc_default_type; 42.168 - 42.169 -static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) 42.170 -{ 42.171 - physdev_op_t op; 42.172 - int ret; 42.173 - 42.174 - op.cmd = PHYSDEVOP_APIC_READ; 42.175 - op.u.apic_op.apic = apic; 42.176 - op.u.apic_op.offset = reg; 42.177 - ret = HYPERVISOR_physdev_op(&op); 42.178 - if (ret) 42.179 - return ret; 42.180 - return op.u.apic_op.value; 42.181 -} 42.182 - 42.183 -static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value) 42.184 -{ 42.185 - physdev_op_t op; 42.186 - int ret; 42.187 - 42.188 - op.cmd = PHYSDEVOP_APIC_WRITE; 42.189 - op.u.apic_op.apic = apic; 42.190 - op.u.apic_op.offset = reg; 42.191 - op.u.apic_op.value = value; 42.192 - ret = HYPERVISOR_physdev_op(&op); 42.193 -} 42.194 - 42.195 -/* 42.196 - * Re-write a value: to be used for read-modify-write 42.197 - * cycles where the read already set up the index register. 42.198 - * 42.199 - * Older SiS APIC requires we rewrite the index regiser 42.200 - */ 42.201 -extern int sis_apic_bug; 42.202 -static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value) 42.203 -{ 42.204 - if (sis_apic_bug) 42.205 - *IO_APIC_BASE(apic) = reg; 42.206 - *(IO_APIC_BASE(apic)+4) = value; 42.207 -} 42.208 - 42.209 -/* 1 if "noapic" boot option passed */ 42.210 -extern int skip_ioapic_setup; 42.211 - 42.212 -/* 42.213 - * If we use the IO-APIC for IRQ routing, disable automatic 42.214 - * assignment of PCI IRQ's. 42.215 - */ 42.216 -#define io_apic_assign_pci_irqs (mp_irq_entries && !skip_ioapic_setup && io_apic_irqs) 42.217 - 42.218 -#ifdef CONFIG_ACPI_BOOT 42.219 -extern int io_apic_get_unique_id (int ioapic, int apic_id); 42.220 -extern int io_apic_get_version (int ioapic); 42.221 -extern int io_apic_get_redir_entries (int ioapic); 42.222 -extern int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low); 42.223 -#endif /*CONFIG_ACPI_BOOT*/ 42.224 - 42.225 -extern int (*ioapic_renumber_irq)(int ioapic, int irq); 42.226 - 42.227 -#else /* !CONFIG_X86_IO_APIC */ 42.228 -#define io_apic_assign_pci_irqs 0 42.229 -#endif 42.230 - 42.231 -extern int assign_irq_vector(int irq); 42.232 - 42.233 -#endif
43.1 --- a/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/fixmap.h Tue May 03 14:42:00 2005 +0000 43.2 +++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/fixmap.h Tue May 10 08:49:59 2005 +0000 43.3 @@ -60,17 +60,8 @@ enum fixed_addresses { 43.4 extern void __set_fixmap (enum fixed_addresses idx, 43.5 unsigned long phys, pgprot_t flags); 43.6 43.7 -extern void __set_fixmap_ma (enum fixed_addresses idx, 43.8 - unsigned long mach, pgprot_t flags); 43.9 - 43.10 #define set_fixmap(idx, phys) \ 43.11 __set_fixmap(idx, phys, PAGE_KERNEL) 43.12 - 43.13 -#define set_fixmap_ma(idx, phys) \ 43.14 - __set_fixmap_ma(idx, phys, PAGE_KERNEL) 43.15 -#define set_fixmap_ma_ro(idx, phys) \ 43.16 - __set_fixmap_ma(idx, phys, PAGE_KERNEL_RO) 43.17 - 43.18 /* 43.19 * Some hardware wants to get fixmapped without caching. 43.20 */
44.1 --- a/linux-2.6.11-xen-sparse/mkbuildtree Tue May 03 14:42:00 2005 +0000 44.2 +++ b/linux-2.6.11-xen-sparse/mkbuildtree Tue May 10 08:49:59 2005 +0000 44.3 @@ -66,7 +66,7 @@ relative_lndir () 44.4 ( 44.5 cd $i 44.6 pref=`echo $i | sed -e 's#/[^/]*#../#g' -e 's#^\.##'` 44.7 - for j in `find . -type f -o -type l -maxdepth 1`; do 44.8 + for j in `find . -maxdepth 1 -type f -o -type l`; do 44.9 ln -sf ${pref}${REAL_DIR}/$i/$j ${SYMLINK_DIR}/$i/$j 44.10 done 44.11 )
45.1 --- a/tools/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/linux-xen-low.c Tue May 03 14:42:00 2005 +0000 45.2 +++ b/tools/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/linux-xen-low.c Tue May 10 08:49:59 2005 +0000 45.3 @@ -338,7 +338,9 @@ regsets_store_inferior_registers () 45.4 } 45.5 else 45.6 { 45.7 +#ifdef DEBUG 45.8 perror ("Warning: ptrace(regsets_store_inferior_registers)"); 45.9 +#endif 45.10 } 45.11 } 45.12 regset ++; 45.13 @@ -364,7 +366,6 @@ linux_fetch_registers (int regno) 45.14 void 45.15 linux_store_registers (int regno) 45.16 { 45.17 - printf("store %d\n", regno); 45.18 if (use_regsets_p) 45.19 { 45.20 if (regsets_store_inferior_registers () == 0)
46.1 --- a/tools/libxc/Makefile Tue May 03 14:42:00 2005 +0000 46.2 +++ b/tools/libxc/Makefile Tue May 10 08:49:59 2005 +0000 46.3 @@ -16,6 +16,7 @@ vpath %c $(XEN_LIBXUTIL) 46.4 INCLUDES += -I $(XEN_LIBXUTIL) 46.5 46.6 SRCS := 46.7 +SRCS += xc_sedf.c 46.8 SRCS += xc_bvtsched.c 46.9 SRCS += xc_core.c 46.10 SRCS += xc_domain.c
47.1 --- a/tools/libxc/xc.h Tue May 03 14:42:00 2005 +0000 47.2 +++ b/tools/libxc/xc.h Tue May 10 08:49:59 2005 +0000 47.3 @@ -296,6 +296,14 @@ int xc_bvtsched_domain_get(int xc_handle 47.4 long long *warpl, 47.5 long long *warpu); 47.6 47.7 +int xc_sedf_domain_set(int xc_handle, 47.8 + u32 domid, 47.9 + u64 period, u64 slice, u64 latency, u16 extratime, u16 weight); 47.10 + 47.11 +int xc_sedf_domain_get(int xc_handle, 47.12 + u32 domid, 47.13 + u64* period, u64 *slice, u64 *latency, u16 *extratime, u16* weight); 47.14 + 47.15 typedef evtchn_status_t xc_evtchn_status_t; 47.16 47.17 /*
48.1 --- a/tools/libxc/xc_domain.c Tue May 03 14:42:00 2005 +0000 48.2 +++ b/tools/libxc/xc_domain.c Tue May 10 08:49:59 2005 +0000 48.3 @@ -43,7 +43,7 @@ int xc_domain_create(int xc_handle, 48.4 goto fail; 48.5 } 48.6 48.7 - return err; 48.8 + return 0; 48.9 48.10 fail: 48.11 errno_saved = errno; 48.12 @@ -103,6 +103,7 @@ int xc_domain_getinfo(int xc_handle, 48.13 unsigned int nr_doms; 48.14 u32 next_domid = first_domid; 48.15 dom0_op_t op; 48.16 + int rc = 0; 48.17 48.18 for ( nr_doms = 0; nr_doms < max_doms; nr_doms++ ) 48.19 { 48.20 @@ -110,7 +111,7 @@ int xc_domain_getinfo(int xc_handle, 48.21 op.u.getdomaininfo.domain = (domid_t)next_domid; 48.22 op.u.getdomaininfo.exec_domain = 0; // FIX ME?!? 48.23 op.u.getdomaininfo.ctxt = NULL; /* no exec context info, thanks. */ 48.24 - if ( do_dom0_op(xc_handle, &op) < 0 ) 48.25 + if ( (rc = do_dom0_op(xc_handle, &op)) < 0 ) 48.26 break; 48.27 info->domid = (u16)op.u.getdomaininfo.domain; 48.28 48.29 @@ -137,6 +138,8 @@ int xc_domain_getinfo(int xc_handle, 48.30 info++; 48.31 } 48.32 48.33 + if(!nr_doms) return rc; 48.34 + 48.35 return nr_doms; 48.36 } 48.37
49.1 --- a/tools/libxc/xc_physdev.c Tue May 03 14:42:00 2005 +0000 49.2 +++ b/tools/libxc/xc_physdev.c Tue May 10 08:49:59 2005 +0000 49.3 @@ -16,14 +16,6 @@ int xc_physdev_pci_access_modify(int xc_ 49.4 int func, 49.5 int enable) 49.6 { 49.7 - dom0_op_t op; 49.8 - 49.9 - op.cmd = DOM0_PCIDEV_ACCESS; 49.10 - op.u.pcidev_access.domain = (domid_t)domid; 49.11 - op.u.pcidev_access.bus = bus; 49.12 - op.u.pcidev_access.dev = dev; 49.13 - op.u.pcidev_access.func = func; 49.14 - op.u.pcidev_access.enable = enable; 49.15 - 49.16 - return do_dom0_op(xc_handle, &op); 49.17 + errno = ENOSYS; 49.18 + return -1; 49.19 }
50.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 50.2 +++ b/tools/libxc/xc_sedf.c Tue May 10 08:49:59 2005 +0000 50.3 @@ -0,0 +1,51 @@ 50.4 +/****************************************************************************** 50.5 + * xc_sedf.c 50.6 + * 50.7 + * API for manipulating parameters of the Simple EDF scheduler. 50.8 + * 50.9 + * changes by Stephan Diestelhorst 50.10 + * based on code 50.11 + * by Mark Williamson, Copyright (c) 2004 Intel Research Cambridge. 50.12 + */ 50.13 + 50.14 +#include "xc_private.h" 50.15 + 50.16 +int xc_sedf_domain_set(int xc_handle, 50.17 + u32 domid, u64 period, u64 slice,u64 latency, u16 extratime,u16 weight) 50.18 +{ 50.19 + dom0_op_t op; 50.20 + struct sedf_adjdom *p = &op.u.adjustdom.u.sedf; 50.21 + 50.22 + op.cmd = DOM0_ADJUSTDOM; 50.23 + op.u.adjustdom.domain = (domid_t)domid; 50.24 + op.u.adjustdom.sched_id = SCHED_SEDF; 50.25 + op.u.adjustdom.direction = SCHED_INFO_PUT; 50.26 + 50.27 + p->period = period; 50.28 + p->slice = slice; 50.29 + p->latency = latency; 50.30 + p->extratime = extratime; 50.31 + p->weight = weight; 50.32 + return do_dom0_op(xc_handle, &op); 50.33 +} 50.34 + 50.35 +int xc_sedf_domain_get(int xc_handle, u32 domid, u64 *period, u64 *slice, u64* latency, u16* extratime, u16* weight) 50.36 +{ 50.37 + dom0_op_t op; 50.38 + int ret; 50.39 + struct sedf_adjdom *p = &op.u.adjustdom.u.sedf; 50.40 + 50.41 + op.cmd = DOM0_ADJUSTDOM; 50.42 + op.u.adjustdom.domain = (domid_t)domid; 50.43 + op.u.adjustdom.sched_id = SCHED_SEDF; 50.44 + op.u.adjustdom.direction = SCHED_INFO_GET; 50.45 + 50.46 + ret = do_dom0_op(xc_handle, &op); 50.47 + 50.48 + *period = p->period; 50.49 + *slice = p->slice; 50.50 + *latency = p->latency; 50.51 + *extratime = p->extratime; 50.52 + *weight = p->weight; 50.53 + return ret; 50.54 +}
51.1 --- a/tools/libxutil/sxpr_parser.h Tue May 03 14:42:00 2005 +0000 51.2 +++ b/tools/libxutil/sxpr_parser.h Tue May 10 08:49:59 2005 +0000 51.3 @@ -28,7 +28,7 @@ 51.4 /** Size of a parser input buffer. 51.5 * Tokens read must fit into this size (including trailing null). 51.6 */ 51.7 -#define PARSER_BUF_SIZE 1024 51.8 +#define PARSER_BUF_SIZE 4096 51.9 51.10 struct Parser; 51.11 typedef int ParserStateFn(struct Parser *, char c);
52.1 --- a/tools/python/xen/lowlevel/xc/xc.c Tue May 03 14:42:00 2005 +0000 52.2 +++ b/tools/python/xen/lowlevel/xc/xc.c Tue May 10 08:49:59 2005 +0000 52.3 @@ -827,6 +827,52 @@ static PyObject *pyxc_physinfo(PyObject 52.4 "cpu_khz", info.cpu_khz); 52.5 } 52.6 52.7 +static PyObject *pyxc_sedf_domain_set(PyObject *self, 52.8 + PyObject *args, 52.9 + PyObject *kwds) 52.10 +{ 52.11 + XcObject *xc = (XcObject *)self; 52.12 + u32 domid; 52.13 + u64 period, slice, latency; 52.14 + u16 extratime, weight; 52.15 + static char *kwd_list[] = { "dom", "period", "slice", "latency", "extratime", "weight",NULL }; 52.16 + 52.17 + if( !PyArg_ParseTupleAndKeywords(args, kwds, "iLLLhh", kwd_list, &domid, 52.18 + &period, &slice, &latency, &extratime, &weight) ) 52.19 + return NULL; 52.20 + if ( xc_sedf_domain_set(xc->xc_handle, domid, period, slice, latency, extratime,weight) != 0 ) 52.21 + return PyErr_SetFromErrno(xc_error); 52.22 + 52.23 + Py_INCREF(zero); 52.24 + return zero; 52.25 +} 52.26 + 52.27 +static PyObject *pyxc_sedf_domain_get(PyObject *self, 52.28 + PyObject *args, 52.29 + PyObject *kwds) 52.30 +{ 52.31 + XcObject *xc = (XcObject *)self; 52.32 + u32 domid; 52.33 + u64 period, slice,latency; 52.34 + u16 weight, extratime; 52.35 + 52.36 + static char *kwd_list[] = { "dom", NULL }; 52.37 + 52.38 + if( !PyArg_ParseTupleAndKeywords(args, kwds, "i", kwd_list, &domid) ) 52.39 + return NULL; 52.40 + 52.41 + if ( xc_sedf_domain_get( xc->xc_handle, domid, &period, 52.42 + &slice,&latency,&extratime,&weight) ) 52.43 + return PyErr_SetFromErrno(xc_error); 52.44 + 52.45 + return Py_BuildValue("{s:i,s:L,s:L,s:L,s:i}", 52.46 + "domain", domid, 52.47 + "period", period, 52.48 + "slice", slice, 52.49 + "latency", latency, 52.50 + "extratime", extratime); 52.51 +} 52.52 + 52.53 static PyObject *pyxc_shadow_control(PyObject *self, 52.54 PyObject *args, 52.55 PyObject *kwds) 52.56 @@ -1027,6 +1073,30 @@ static PyMethodDef pyxc_methods[] = { 52.57 " warpu [long]: Unwarp requirement.\n" 52.58 " warpl [long]: Warp limit,\n" 52.59 }, 52.60 + 52.61 + { "sedf_domain_set", 52.62 + (PyCFunction)pyxc_sedf_domain_set, 52.63 + METH_KEYWORDS, "\n" 52.64 + "Set the scheduling parameters for a domain when running with Atropos.\n" 52.65 + " dom [int]: domain to set\n" 52.66 + " period [long]: domain's scheduling period\n" 52.67 + " slice [long]: domain's slice per period\n" 52.68 + " latency [long]: domain's wakeup latency hint\n" 52.69 + " extratime [int]: domain aware of extratime?\n" 52.70 + "Returns: [int] 0 on success; -1 on error.\n" }, 52.71 + 52.72 + { "sedf_domain_get", 52.73 + (PyCFunction)pyxc_sedf_domain_get, 52.74 + METH_KEYWORDS, "\n" 52.75 + "Get the current scheduling parameters for a domain when running with\n" 52.76 + "the Atropos scheduler." 52.77 + " dom [int]: domain to query\n" 52.78 + "Returns: [dict]\n" 52.79 + " domain [int]: domain ID\n" 52.80 + " period [long]: scheduler period\n" 52.81 + " slice [long]: CPU reservation per period\n" 52.82 + " latency [long]: domain's wakeup latency hint\n" 52.83 + " extratime [int]: domain aware of extratime?\n"}, 52.84 52.85 { "evtchn_alloc_unbound", 52.86 (PyCFunction)pyxc_evtchn_alloc_unbound,
53.1 --- a/tools/python/xen/xend/XendClient.py Tue May 03 14:42:00 2005 +0000 53.2 +++ b/tools/python/xen/xend/XendClient.py Tue May 10 08:49:59 2005 +0000 53.3 @@ -260,6 +260,15 @@ class Xend: 53.4 'warpl' : warpl, 53.5 'warpu' : warpu }) 53.6 53.7 + def xend_domain_cpu_sedf_set(self, id, period, slice, latency, extratime, weight): 53.8 + return self.xendPost(self.domainurl(id), 53.9 + {'op' : 'cpu_sedf_set', 53.10 + 'period' : period, 53.11 + 'slice' : slice, 53.12 + 'latency' : latency, 53.13 + 'extratime' : extratime, 53.14 + 'weight' : weight }) 53.15 + 53.16 def xend_domain_maxmem_set(self, id, memory): 53.17 return self.xendPost(self.domainurl(id), 53.18 { 'op' : 'maxmem_set',
54.1 --- a/tools/python/xen/xend/XendDomain.py Tue May 03 14:42:00 2005 +0000 54.2 +++ b/tools/python/xen/xend/XendDomain.py Tue May 10 08:49:59 2005 +0000 54.3 @@ -644,6 +644,24 @@ class XendDomain: 54.4 except Exception, ex: 54.5 raise XendError(str(ex)) 54.6 54.7 + 54.8 + def domain_cpu_sedf_set(self, id, period, slice, latency, extratime, weight): 54.9 + """Set Simple EDF scheduler parameters for a domain. 54.10 + """ 54.11 + dominfo = self.domain_lookup(id) 54.12 + try: 54.13 + return xc.sedf_domain_set(dominfo.dom, period, slice, latency, extratime, weight) 54.14 + except Exception, ex: 54.15 + raise XendError(str(ex)) 54.16 + 54.17 + def domain_cpu_sedf_get(self, id): 54.18 + """Get Atropos scheduler parameters for a domain. 54.19 + """ 54.20 + dominfo = self.domain_lookup(id) 54.21 + try: 54.22 + return xc.sedf_domain_get(dominfo.dom) 54.23 + except Exception, ex: 54.24 + raise XendError(str(ex)) 54.25 def domain_device_create(self, id, devconfig): 54.26 """Create a new device for a domain. 54.27
55.1 --- a/tools/python/xen/xend/server/SrvDomain.py Tue May 03 14:42:00 2005 +0000 55.2 +++ b/tools/python/xen/xend/server/SrvDomain.py Tue May 10 08:49:59 2005 +0000 55.3 @@ -107,6 +107,18 @@ class SrvDomain(SrvDir): 55.4 val = fn(req.args, {'dom': self.dom.id}) 55.5 return val 55.6 55.7 + 55.8 + def op_cpu_sedf_set(self, op, req): 55.9 + fn = FormFn(self.xd.domain_cpu_sedf_set, 55.10 + [['dom', 'str'], 55.11 + ['period', 'int'], 55.12 + ['slice', 'int'], 55.13 + ['latency', 'int'], 55.14 + ['extratime', 'int'], 55.15 + ['weight', 'int']]) 55.16 + val = fn(req.args, {'dom': self.dom.id}) 55.17 + return val 55.18 + 55.19 def op_maxmem_set(self, op, req): 55.20 fn = FormFn(self.xd.domain_maxmem_set, 55.21 [['dom', 'str'],
56.1 --- a/tools/python/xen/xm/main.py Tue May 03 14:42:00 2005 +0000 56.2 +++ b/tools/python/xen/xm/main.py Tue May 10 08:49:59 2005 +0000 56.3 @@ -591,6 +591,23 @@ class ProgBvtslice(Prog): 56.4 56.5 xm.prog(ProgBvtslice) 56.6 56.7 +class ProgSedf(Prog): 56.8 + group = 'scheduler' 56.9 + name= "sedf" 56.10 + info = """Set simple EDF parameters.""" 56.11 + 56.12 + def help(self, args): 56.13 + print args[0], "DOM PERIOD SLICE LATENCY EXTRATIME WEIGHT" 56.14 + print "\nSet simple EDF parameters." 56.15 + 56.16 + def main(self, args): 56.17 + if len(args) != 7: self.err("%s: Invalid argument(s)" % args[0]) 56.18 + dom = args[1] 56.19 + v = map(int, args[2:7]) 56.20 + server.xend_domain_cpu_sedf_set(dom, *v) 56.21 + 56.22 +xm.prog(ProgSedf) 56.23 + 56.24 class ProgInfo(Prog): 56.25 group = 'host' 56.26 name = "info"
57.1 --- a/xen/arch/ia64/domain.c Tue May 03 14:42:00 2005 +0000 57.2 +++ b/xen/arch/ia64/domain.c Tue May 10 08:49:59 2005 +0000 57.3 @@ -621,14 +621,12 @@ int construct_dom0(struct domain *d, 57.4 unsigned long pkern_entry; 57.5 unsigned long pkern_end; 57.6 57.7 - extern void physdev_init_dom0(struct domain *); 57.8 - 57.9 //printf("construct_dom0: starting\n"); 57.10 /* Sanity! */ 57.11 #ifndef CLONE_DOMAIN0 57.12 if ( d != dom0 ) 57.13 BUG(); 57.14 - if ( test_bit(DF_CONSTRUCTED, &d->d_flags) ) 57.15 + if ( test_bit(DF_CONSTRUCTED, &d->flags) ) 57.16 BUG(); 57.17 #endif 57.18 57.19 @@ -755,13 +753,7 @@ int construct_dom0(struct domain *d, 57.20 #endif 57.21 console_endboot(strstr(cmdline, "tty0") != NULL); 57.22 57.23 - /* DOM0 gets access to everything. */ 57.24 -#ifdef CLONE_DOMAIN0 57.25 -if (d == dom0) 57.26 -#endif 57.27 - physdev_init_dom0(d); 57.28 - 57.29 - set_bit(DF_CONSTRUCTED, &d->d_flags); 57.30 + set_bit(DF_CONSTRUCTED, &d->flags); 57.31 57.32 new_thread(ed, pkern_entry, 0, 0); 57.33 // FIXME: Hack for keyboard input 57.34 @@ -791,7 +783,7 @@ int construct_domU(struct domain *d, 57.35 unsigned long pkern_entry; 57.36 57.37 #ifndef DOMU_AUTO_RESTART 57.38 - if ( test_bit(DF_CONSTRUCTED, &d->d_flags) ) BUG(); 57.39 + if ( test_bit(DF_CONSTRUCTED, &d->flags) ) BUG(); 57.40 #endif 57.41 57.42 printk("*** LOADING DOMAIN %d ***\n",d->id); 57.43 @@ -824,7 +816,7 @@ int construct_domU(struct domain *d, 57.44 loaddomainelfimage(d,image_start); 57.45 printk("loaddomainelfimage returns\n"); 57.46 57.47 - set_bit(DF_CONSTRUCTED, &d->d_flags); 57.48 + set_bit(DF_CONSTRUCTED, &d->flags); 57.49 57.50 printk("calling new_thread, entry=%p\n",pkern_entry); 57.51 #ifdef DOMU_AUTO_RESTART
58.1 --- a/xen/arch/ia64/idle0_task.c Tue May 03 14:42:00 2005 +0000 58.2 +++ b/xen/arch/ia64/idle0_task.c Tue May 10 08:49:59 2005 +0000 58.3 @@ -22,7 +22,7 @@ 58.4 #define IDLE0_DOMAIN(_t) \ 58.5 { \ 58.6 id: IDLE_DOMAIN_ID, \ 58.7 - d_flags: 1<<DF_IDLETASK, \ 58.8 + flags: 1<<DF_IDLETASK, \ 58.9 refcnt: ATOMIC_INIT(1) \ 58.10 } 58.11
59.1 --- a/xen/arch/ia64/xenmisc.c Tue May 03 14:42:00 2005 +0000 59.2 +++ b/xen/arch/ia64/xenmisc.c Tue May 10 08:49:59 2005 +0000 59.3 @@ -133,73 +133,6 @@ void free_page_type(struct pfn_info *pag 59.4 } 59.5 59.6 /////////////////////////////// 59.7 -// from arch/x86/pci.c 59.8 -/////////////////////////////// 59.9 - 59.10 -int 59.11 -pcibios_prep_mwi (struct pci_dev *dev) 59.12 -{ 59.13 - dummy(); 59.14 -} 59.15 - 59.16 -/////////////////////////////// 59.17 -// from arch/x86/pci-irq.c 59.18 -/////////////////////////////// 59.19 - 59.20 -void pcibios_enable_irq(struct pci_dev *dev) 59.21 -{ 59.22 - dummy(); 59.23 -} 59.24 - 59.25 -/////////////////////////////// 59.26 -// from arch/ia64/pci-pc.c 59.27 -/////////////////////////////// 59.28 - 59.29 -#include <xen/pci.h> 59.30 - 59.31 -int pcibios_enable_device(struct pci_dev *dev, int mask) 59.32 -{ 59.33 - dummy(); 59.34 - return 0; 59.35 -} 59.36 - 59.37 -int (*pci_config_read)(int seg, int bus, int dev, int fn, int reg, int len, u32 *value) = NULL; 59.38 -int (*pci_config_write)(int seg, int bus, int dev, int fn, int reg, int len, u32 value) = NULL; 59.39 - 59.40 -//struct pci_fixup pcibios_fixups[] = { { 0 } }; 59.41 -struct pci_fixup pcibios_fixups[] = { { 0 } }; 59.42 - 59.43 -void 59.44 -pcibios_align_resource(void *data, struct resource *res, 59.45 - unsigned long size, unsigned long align) 59.46 -{ 59.47 - dummy(); 59.48 -} 59.49 - 59.50 -void 59.51 -pcibios_update_resource(struct pci_dev *dev, struct resource *root, 59.52 - struct resource *res, int resource) 59.53 -{ 59.54 - dummy(); 59.55 -} 59.56 - 59.57 -void __devinit pcibios_fixup_bus(struct pci_bus *b) 59.58 -{ 59.59 - dummy(); 59.60 -} 59.61 - 59.62 -void __init pcibios_init(void) 59.63 -{ 59.64 - dummy(); 59.65 -} 59.66 - 59.67 -char * __devinit pcibios_setup(char *str) 59.68 -{ 59.69 - dummy(); 59.70 - return 0; 59.71 -} 59.72 - 59.73 -/////////////////////////////// 59.74 // from arch/ia64/traps.c 59.75 /////////////////////////////// 59.76 59.77 @@ -217,33 +150,6 @@ void dump_pageframe_info(struct domain * 59.78 } 59.79 59.80 /////////////////////////////// 59.81 -// from common/physdev.c 59.82 -/////////////////////////////// 59.83 -void 59.84 -physdev_init_dom0(struct domain *d) 59.85 -{ 59.86 -} 59.87 - 59.88 -int 59.89 -physdev_pci_access_modify(domid_t id, int bus, int dev, int func, int enable) 59.90 -{ 59.91 - return -EINVAL; 59.92 -} 59.93 - 59.94 -void physdev_modify_ioport_access_range(struct domain *d, int enable, 59.95 - int port, int num) 59.96 -{ 59.97 - printk("physdev_modify_ioport_access_range not implemented\n"); 59.98 - dummy(); 59.99 -} 59.100 - 59.101 -void physdev_destroy_state(struct domain *d) 59.102 -{ 59.103 - printk("physdev_destroy_state not implemented\n"); 59.104 - dummy(); 59.105 -} 59.106 - 59.107 -/////////////////////////////// 59.108 // called from arch/ia64/head.S 59.109 /////////////////////////////// 59.110 59.111 @@ -334,7 +240,7 @@ int id = ((struct exec_domain *)current) 59.112 if (!cnt[id]--) { printk("%x",id); cnt[id] = 50; } 59.113 if (!i--) { printk("+",id); cnt[id] = 100; } 59.114 } 59.115 - clear_bit(EDF_RUNNING, &prev->ed_flags); 59.116 + clear_bit(EDF_RUNNING, &prev->flags); 59.117 //if (!is_idle_task(next->domain) ) 59.118 //send_guest_virq(next, VIRQ_TIMER); 59.119 load_region_regs(current); 59.120 @@ -364,7 +270,7 @@ loop: 59.121 printf(buf); 59.122 if (regs) show_registers(regs); 59.123 domain_pause_by_systemcontroller(current->domain); 59.124 - set_bit(DF_CRASHED, ed->domain->d_flags); 59.125 + set_bit(DF_CRASHED, ed->domain->flags); 59.126 if (ed->domain->id == 0) { 59.127 int i = 1000000000L; 59.128 // if domain0 crashes, just periodically print out panic
60.1 --- a/xen/arch/ia64/xensetup.c Tue May 03 14:42:00 2005 +0000 60.2 +++ b/xen/arch/ia64/xensetup.c Tue May 10 08:49:59 2005 +0000 60.3 @@ -69,9 +69,6 @@ unsigned char opt_pdb[10] = "none"; 60.4 unsigned int opt_tbuf_size = 10; 60.5 /* opt_sched: scheduler - default to Borrowed Virtual Time */ 60.6 char opt_sched[10] = "bvt"; 60.7 -/* opt_physdev_dom0_hide: list of PCI slots to hide from domain 0. */ 60.8 -/* Format is '(%02x:%02x.%1x)(%02x:%02x.%1x)' and so on. */ 60.9 -char opt_physdev_dom0_hide[200] = ""; 60.10 /* opt_leveltrigger, opt_edgetrigger: Force an IO-APIC-routed IRQ to be */ 60.11 /* level- or edge-triggered. */ 60.12 /* Example: 'leveltrigger=4,5,6,20 edgetrigger=21'. */ 60.13 @@ -270,7 +267,7 @@ printk("About to call init_idle_task()\n 60.14 if ( dom0 == NULL ) 60.15 panic("Error creating domain 0\n"); 60.16 60.17 - set_bit(DF_PRIVILEGED, &dom0->d_flags); 60.18 + set_bit(DF_PRIVILEGED, &dom0->flags); 60.19 60.20 /* 60.21 * We're going to setup domain0 using the module(s) that we stashed safely
61.1 --- a/xen/arch/x86/Makefile Tue May 03 14:42:00 2005 +0000 61.2 +++ b/xen/arch/x86/Makefile Tue May 10 08:49:59 2005 +0000 61.3 @@ -3,6 +3,7 @@ include $(BASEDIR)/Rules.mk 61.4 61.5 OBJS += $(patsubst %.S,%.o,$(wildcard $(TARGET_SUBARCH)/*.S)) 61.6 OBJS += $(patsubst %.c,%.o,$(wildcard $(TARGET_SUBARCH)/*.c)) 61.7 +OBJS += $(patsubst %.c,%.o,$(wildcard acpi/*.c)) 61.8 OBJS += $(patsubst %.c,%.o,$(wildcard mtrr/*.c)) 61.9 61.10 OBJS := $(subst $(TARGET_SUBARCH)/asm-offsets.o,,$(OBJS)) 61.11 @@ -11,8 +12,6 @@ ifneq ($(crash_debug),y) 61.12 OBJS := $(patsubst cdb%.o,,$(OBJS)) 61.13 endif 61.14 61.15 -OBJS := $(filter-out pci%,$(OBJS)) 61.16 - 61.17 default: $(TARGET) 61.18 61.19 $(TARGET): $(TARGET)-syms boot/mkelf32
62.1 --- a/xen/arch/x86/Rules.mk Tue May 03 14:42:00 2005 +0000 62.2 +++ b/xen/arch/x86/Rules.mk Tue May 10 08:49:59 2005 +0000 62.3 @@ -2,8 +2,8 @@ 62.4 # x86-specific definitions 62.5 62.6 CFLAGS += -nostdinc -fno-builtin -fno-common -fno-strict-aliasing 62.7 -CFLAGS += -iwithprefix include -Wall -Werror -pipe 62.8 -CFLAGS += -I$(BASEDIR)/include -Wno-pointer-arith -Wredundant-decls 62.9 +CFLAGS += -iwithprefix include -Wall -Werror -Wno-pointer-arith -pipe 62.10 +CFLAGS += -I$(BASEDIR)/include -I$(BASEDIR)/include/asm-x86/mach-default 62.11 62.12 ifeq ($(optimize),y) 62.13 CFLAGS += -O3 -fomit-frame-pointer
63.1 --- a/xen/arch/x86/acpi.c Tue May 03 14:42:00 2005 +0000 63.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 63.3 @@ -1,721 +0,0 @@ 63.4 -/* 63.5 - * acpi.c - Architecture-Specific Low-Level ACPI Support 63.6 - * 63.7 - * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com> 63.8 - * Copyright (C) 2001 Jun Nakajima <jun.nakajima@intel.com> 63.9 - * Copyright (C) 2001 Patrick Mochel <mochel@osdl.org> 63.10 - * 63.11 - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 63.12 - * 63.13 - * This program is free software; you can redistribute it and/or modify 63.14 - * it under the terms of the GNU General Public License as published by 63.15 - * the Free Software Foundation; either version 2 of the License, or 63.16 - * (at your option) any later version. 63.17 - * 63.18 - * This program is distributed in the hope that it will be useful, 63.19 - * but WITHOUT ANY WARRANTY; without even the implied warranty of 63.20 - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 63.21 - * GNU General Public License for more details. 63.22 - * 63.23 - * You should have received a copy of the GNU General Public License 63.24 - * along with this program; if not, write to the Free Software 63.25 - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 63.26 - * 63.27 - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 63.28 - */ 63.29 - 63.30 -#include <xen/config.h> 63.31 -#include <xen/kernel.h> 63.32 -#include <xen/init.h> 63.33 -#include <xen/types.h> 63.34 -#include <xen/slab.h> 63.35 -#include <xen/pci.h> 63.36 -#include <xen/irq.h> 63.37 -#include <xen/acpi.h> 63.38 -#include <asm/mpspec.h> 63.39 -#include <asm/io.h> 63.40 -#include <asm/apic.h> 63.41 -#include <asm/apicdef.h> 63.42 -#include <asm/page.h> 63.43 -#include <asm/io_apic.h> 63.44 -#include <asm/acpi.h> 63.45 -#include <asm/smpboot.h> 63.46 - 63.47 - 63.48 -#define PREFIX "ACPI: " 63.49 - 63.50 -int acpi_lapic; 63.51 -int acpi_ioapic; 63.52 -int acpi_strict; 63.53 - 63.54 -acpi_interrupt_flags acpi_sci_flags __initdata; 63.55 -int acpi_sci_override_gsi __initdata; 63.56 -/* -------------------------------------------------------------------------- 63.57 - Boot-time Configuration 63.58 - -------------------------------------------------------------------------- */ 63.59 - 63.60 -int acpi_noirq __initdata = 0; /* skip ACPI IRQ initialization */ 63.61 -int acpi_ht __initdata = 1; /* enable HT */ 63.62 - 63.63 -enum acpi_irq_model_id acpi_irq_model; 63.64 - 63.65 - 63.66 -/* 63.67 - * Temporarily use the virtual area starting from FIX_IO_APIC_BASE_END, 63.68 - * to map the target physical address. The problem is that set_fixmap() 63.69 - * provides a single page, and it is possible that the page is not 63.70 - * sufficient. 63.71 - * By using this area, we can map up to MAX_IO_APICS pages temporarily, 63.72 - * i.e. until the next __va_range() call. 63.73 - * 63.74 - * Important Safety Note: The fixed I/O APIC page numbers are *subtracted* 63.75 - * from the fixed base. That's why we start at FIX_IO_APIC_BASE_END and 63.76 - * count idx down while incrementing the phys address. 63.77 - */ 63.78 -char *__acpi_map_table(unsigned long phys, unsigned long size) 63.79 -{ 63.80 - unsigned long base, offset, mapped_size; 63.81 - int idx; 63.82 - 63.83 - if (phys + size < 8*1024*1024) 63.84 - return __va(phys); 63.85 - 63.86 - offset = phys & (PAGE_SIZE - 1); 63.87 - mapped_size = PAGE_SIZE - offset; 63.88 - set_fixmap(FIX_ACPI_END, phys); 63.89 - base = fix_to_virt(FIX_ACPI_END); 63.90 - 63.91 - /* 63.92 - * Most cases can be covered by the below. 63.93 - */ 63.94 - idx = FIX_ACPI_END; 63.95 - while (mapped_size < size) { 63.96 - if (--idx < FIX_ACPI_BEGIN) 63.97 - return 0; /* cannot handle this */ 63.98 - phys += PAGE_SIZE; 63.99 - set_fixmap(idx, phys); 63.100 - mapped_size += PAGE_SIZE; 63.101 - } 63.102 - 63.103 - return ((char *) base + offset); 63.104 -} 63.105 - 63.106 - 63.107 -#ifdef CONFIG_X86_LOCAL_APIC 63.108 - 63.109 -static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; 63.110 - 63.111 - 63.112 -static int __init 63.113 -acpi_parse_madt ( 63.114 - unsigned long phys_addr, 63.115 - unsigned long size) 63.116 -{ 63.117 - struct acpi_table_madt *madt = NULL; 63.118 - 63.119 - if (!phys_addr || !size) 63.120 - return -EINVAL; 63.121 - 63.122 - madt = (struct acpi_table_madt *) __acpi_map_table(phys_addr, size); 63.123 - if (!madt) { 63.124 - printk(KERN_WARNING PREFIX "Unable to map MADT\n"); 63.125 - return -ENODEV; 63.126 - } 63.127 - 63.128 - if (madt->lapic_address) 63.129 - acpi_lapic_addr = (u64) madt->lapic_address; 63.130 - 63.131 - printk(KERN_INFO PREFIX "Local APIC address 0x%08x\n", 63.132 - madt->lapic_address); 63.133 - 63.134 - detect_clustered_apic(madt->header.oem_id, madt->header.oem_table_id); 63.135 - 63.136 - return 0; 63.137 -} 63.138 - 63.139 - 63.140 -static int __init 63.141 -acpi_parse_lapic ( 63.142 - acpi_table_entry_header *header) 63.143 -{ 63.144 - struct acpi_table_lapic *processor = NULL; 63.145 - 63.146 - processor = (struct acpi_table_lapic*) header; 63.147 - if (!processor) 63.148 - return -EINVAL; 63.149 - 63.150 - acpi_table_print_madt_entry(header); 63.151 - 63.152 - mp_register_lapic ( 63.153 - processor->id, /* APIC ID */ 63.154 - processor->flags.enabled); /* Enabled? */ 63.155 - 63.156 - return 0; 63.157 -} 63.158 - 63.159 - 63.160 -static int __init 63.161 -acpi_parse_lapic_addr_ovr ( 63.162 - acpi_table_entry_header *header) 63.163 -{ 63.164 - struct acpi_table_lapic_addr_ovr *lapic_addr_ovr = NULL; 63.165 - 63.166 - lapic_addr_ovr = (struct acpi_table_lapic_addr_ovr*) header; 63.167 - if (!lapic_addr_ovr) 63.168 - return -EINVAL; 63.169 - 63.170 - acpi_lapic_addr = lapic_addr_ovr->address; 63.171 - 63.172 - return 0; 63.173 -} 63.174 - 63.175 -static int __init 63.176 -acpi_parse_lapic_nmi ( 63.177 - acpi_table_entry_header *header) 63.178 -{ 63.179 - struct acpi_table_lapic_nmi *lapic_nmi = NULL; 63.180 - 63.181 - lapic_nmi = (struct acpi_table_lapic_nmi*) header; 63.182 - if (!lapic_nmi) 63.183 - return -EINVAL; 63.184 - 63.185 - acpi_table_print_madt_entry(header); 63.186 - 63.187 - if (lapic_nmi->lint != 1) 63.188 - printk(KERN_WARNING PREFIX "NMI not connected to LINT 1!\n"); 63.189 - 63.190 - return 0; 63.191 -} 63.192 - 63.193 -#endif /*CONFIG_X86_LOCAL_APIC*/ 63.194 - 63.195 -#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_ACPI_INTERPRETER) 63.196 - 63.197 -static int __init 63.198 -acpi_parse_ioapic ( 63.199 - acpi_table_entry_header *header) 63.200 -{ 63.201 - struct acpi_table_ioapic *ioapic = NULL; 63.202 - 63.203 - ioapic = (struct acpi_table_ioapic*) header; 63.204 - if (!ioapic) 63.205 - return -EINVAL; 63.206 - 63.207 - acpi_table_print_madt_entry(header); 63.208 - 63.209 - mp_register_ioapic ( 63.210 - ioapic->id, 63.211 - ioapic->address, 63.212 - ioapic->global_irq_base); 63.213 - 63.214 - return 0; 63.215 -} 63.216 - 63.217 -/* 63.218 - * Parse Interrupt Source Override for the ACPI SCI 63.219 - */ 63.220 -static void 63.221 -acpi_sci_ioapic_setup(u32 gsi, u16 polarity, u16 trigger) 63.222 -{ 63.223 - if (trigger == 0) /* compatible SCI trigger is level */ 63.224 - trigger = 3; 63.225 - 63.226 - if (polarity == 0) /* compatible SCI polarity is low */ 63.227 - polarity = 3; 63.228 - 63.229 - /* Command-line over-ride via acpi_sci= */ 63.230 - if (acpi_sci_flags.trigger) 63.231 - trigger = acpi_sci_flags.trigger; 63.232 - 63.233 - if (acpi_sci_flags.polarity) 63.234 - polarity = acpi_sci_flags.polarity; 63.235 - 63.236 - /* 63.237 - * mp_config_acpi_legacy_irqs() already setup IRQs < 16 63.238 - * If GSI is < 16, this will update its flags, 63.239 - * else it will create a new mp_irqs[] entry. 63.240 - */ 63.241 - mp_override_legacy_irq(gsi, polarity, trigger, gsi); 63.242 - 63.243 - /* 63.244 - * stash over-ride to indicate we've been here 63.245 - * and for later update of acpi_fadt 63.246 - */ 63.247 - acpi_sci_override_gsi = gsi; 63.248 - return; 63.249 -} 63.250 - 63.251 -static int __init 63.252 -acpi_parse_fadt(unsigned long phys, unsigned long size) 63.253 -{ 63.254 - struct fadt_descriptor_rev2 *fadt =0; 63.255 - 63.256 - fadt = (struct fadt_descriptor_rev2*) __acpi_map_table(phys,size); 63.257 - if (!fadt) { 63.258 - printk(KERN_WARNING PREFIX "Unable to map FADT\n"); 63.259 - return 0; 63.260 - } 63.261 - 63.262 -#ifdef CONFIG_ACPI_INTERPRETER 63.263 - /* initialize sci_int early for INT_SRC_OVR MADT parsing */ 63.264 - acpi_fadt.sci_int = fadt->sci_int; 63.265 -#endif 63.266 - 63.267 - return 0; 63.268 -} 63.269 - 63.270 - 63.271 -static int __init 63.272 -acpi_parse_int_src_ovr ( 63.273 - acpi_table_entry_header *header) 63.274 -{ 63.275 - struct acpi_table_int_src_ovr *intsrc = NULL; 63.276 - 63.277 - intsrc = (struct acpi_table_int_src_ovr*) header; 63.278 - if (!intsrc) 63.279 - return -EINVAL; 63.280 - 63.281 - acpi_table_print_madt_entry(header); 63.282 - 63.283 - if (intsrc->bus_irq == acpi_fadt.sci_int) { 63.284 - acpi_sci_ioapic_setup(intsrc->global_irq, 63.285 - intsrc->flags.polarity, intsrc->flags.trigger); 63.286 - return 0; 63.287 - } 63.288 - 63.289 - mp_override_legacy_irq ( 63.290 - intsrc->bus_irq, 63.291 - intsrc->flags.polarity, 63.292 - intsrc->flags.trigger, 63.293 - intsrc->global_irq); 63.294 - 63.295 - return 0; 63.296 -} 63.297 - 63.298 - 63.299 -static int __init 63.300 -acpi_parse_nmi_src ( 63.301 - acpi_table_entry_header *header) 63.302 -{ 63.303 - struct acpi_table_nmi_src *nmi_src = NULL; 63.304 - 63.305 - nmi_src = (struct acpi_table_nmi_src*) header; 63.306 - if (!nmi_src) 63.307 - return -EINVAL; 63.308 - 63.309 - acpi_table_print_madt_entry(header); 63.310 - 63.311 - /* TBD: Support nimsrc entries? */ 63.312 - 63.313 - return 0; 63.314 -} 63.315 - 63.316 -#endif /*CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER*/ 63.317 - 63.318 - 63.319 -static unsigned long __init 63.320 -acpi_scan_rsdp ( 63.321 - unsigned long start, 63.322 - unsigned long length) 63.323 -{ 63.324 - unsigned long offset = 0; 63.325 - unsigned long sig_len = sizeof("RSD PTR ") - 1; 63.326 - 63.327 - /* 63.328 - * Scan all 16-byte boundaries of the physical memory region for the 63.329 - * RSDP signature. 63.330 - */ 63.331 - for (offset = 0; offset < length; offset += 16) { 63.332 - if (strncmp((char *) (start + offset), "RSD PTR ", sig_len)) 63.333 - continue; 63.334 - return (start + offset); 63.335 - } 63.336 - 63.337 - return 0; 63.338 -} 63.339 - 63.340 - 63.341 -unsigned long __init 63.342 -acpi_find_rsdp (void) 63.343 -{ 63.344 - unsigned long rsdp_phys = 0; 63.345 - 63.346 - /* 63.347 - * Scan memory looking for the RSDP signature. First search EBDA (low 63.348 - * memory) paragraphs and then search upper memory (E0000-FFFFF). 63.349 - */ 63.350 - rsdp_phys = acpi_scan_rsdp (0, 0x400); 63.351 - if (!rsdp_phys) 63.352 - rsdp_phys = acpi_scan_rsdp (0xE0000, 0x20000); 63.353 - 63.354 - return rsdp_phys; 63.355 -} 63.356 - 63.357 - 63.358 -/* 63.359 - * acpi_boot_init() 63.360 - * called from setup_arch(), always. 63.361 - * 1. maps ACPI tables for later use 63.362 - * 2. enumerates lapics 63.363 - * 3. enumerates io-apics 63.364 - * 63.365 - * side effects: 63.366 - * acpi_lapic = 1 if LAPIC found 63.367 - * acpi_ioapic = 1 if IOAPIC found 63.368 - * if (acpi_lapic && acpi_ioapic) smp_found_config = 1; 63.369 - * if acpi_blacklisted() disable_acpi() 63.370 - * acpi_irq_model=... 63.371 - * ... 63.372 - * 63.373 - * return value: (currently ignored) 63.374 - * 0: success 63.375 - * !0: failure 63.376 - */ 63.377 -int __init 63.378 -acpi_boot_init (void) 63.379 -{ 63.380 - int result = 0; 63.381 - 63.382 - if (acpi_disabled && !acpi_ht) 63.383 - return(1); 63.384 - 63.385 - /* 63.386 - * The default interrupt routing model is PIC (8259). This gets 63.387 - * overriden if IOAPICs are enumerated (below). 63.388 - */ 63.389 - acpi_irq_model = ACPI_IRQ_MODEL_PIC; 63.390 - 63.391 - /* 63.392 - * Initialize the ACPI boot-time table parser. 63.393 - */ 63.394 - result = acpi_table_init(); 63.395 - if (result) { 63.396 - disable_acpi(); 63.397 - return result; 63.398 - } 63.399 - 63.400 - result = acpi_blacklisted(); 63.401 - if (result) { 63.402 - printk(KERN_NOTICE PREFIX "BIOS listed in blacklist, disabling ACPI support\n"); 63.403 - disable_acpi(); 63.404 - return result; 63.405 - } 63.406 - 63.407 -#ifdef CONFIG_X86_LOCAL_APIC 63.408 - 63.409 - /* 63.410 - * MADT 63.411 - * ---- 63.412 - * Parse the Multiple APIC Description Table (MADT), if exists. 63.413 - * Note that this table provides platform SMP configuration 63.414 - * information -- the successor to MPS tables. 63.415 - */ 63.416 - 63.417 - result = acpi_table_parse(ACPI_APIC, acpi_parse_madt); 63.418 - if (!result) { 63.419 - return 0; 63.420 - } 63.421 - else if (result < 0) { 63.422 - printk(KERN_ERR PREFIX "Error parsing MADT\n"); 63.423 - return result; 63.424 - } 63.425 - else if (result > 1) 63.426 - printk(KERN_WARNING PREFIX "Multiple MADT tables exist\n"); 63.427 - 63.428 - /* 63.429 - * Local APIC 63.430 - * ---------- 63.431 - * Note that the LAPIC address is obtained from the MADT (32-bit value) 63.432 - * and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value). 63.433 - */ 63.434 - 63.435 - result = acpi_table_parse_madt(ACPI_MADT_LAPIC_ADDR_OVR, acpi_parse_lapic_addr_ovr); 63.436 - if (result < 0) { 63.437 - printk(KERN_ERR PREFIX "Error parsing LAPIC address override entry\n"); 63.438 - return result; 63.439 - } 63.440 - 63.441 - mp_register_lapic_address(acpi_lapic_addr); 63.442 - 63.443 - result = acpi_table_parse_madt(ACPI_MADT_LAPIC, acpi_parse_lapic); 63.444 - if (!result) { 63.445 - printk(KERN_ERR PREFIX "No LAPIC entries present\n"); 63.446 - /* TBD: Cleanup to allow fallback to MPS */ 63.447 - return -ENODEV; 63.448 - } 63.449 - else if (result < 0) { 63.450 - printk(KERN_ERR PREFIX "Error parsing LAPIC entry\n"); 63.451 - /* TBD: Cleanup to allow fallback to MPS */ 63.452 - return result; 63.453 - } 63.454 - 63.455 - result = acpi_table_parse_madt(ACPI_MADT_LAPIC_NMI, acpi_parse_lapic_nmi); 63.456 - if (result < 0) { 63.457 - printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n"); 63.458 - /* TBD: Cleanup to allow fallback to MPS */ 63.459 - return result; 63.460 - } 63.461 - 63.462 - acpi_lapic = 1; 63.463 - 63.464 -#endif /*CONFIG_X86_LOCAL_APIC*/ 63.465 - 63.466 -#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_ACPI_INTERPRETER) 63.467 - 63.468 - /* 63.469 - * I/O APIC 63.470 - * -------- 63.471 - */ 63.472 - 63.473 - /* 63.474 - * ACPI interpreter is required to complete interrupt setup, 63.475 - * so if it is off, don't enumerate the io-apics with ACPI. 63.476 - * If MPS is present, it will handle them, 63.477 - * otherwise the system will stay in PIC mode 63.478 - */ 63.479 - if (acpi_disabled || acpi_noirq) { 63.480 - return 1; 63.481 - } 63.482 - 63.483 - /* 63.484 - * if "noapic" boot option, don't look for IO-APICs 63.485 - */ 63.486 - if (ioapic_setup_disabled()) { 63.487 - printk(KERN_INFO PREFIX "Skipping IOAPIC probe " 63.488 - "due to 'noapic' option.\n"); 63.489 - return 1; 63.490 - } 63.491 - 63.492 - 63.493 - result = acpi_table_parse_madt(ACPI_MADT_IOAPIC, acpi_parse_ioapic); 63.494 - if (!result) { 63.495 - printk(KERN_ERR PREFIX "No IOAPIC entries present\n"); 63.496 - return -ENODEV; 63.497 - } 63.498 - else if (result < 0) { 63.499 - printk(KERN_ERR PREFIX "Error parsing IOAPIC entry\n"); 63.500 - return result; 63.501 - } 63.502 - 63.503 - /* Build a default routing table for legacy (ISA) interrupts. */ 63.504 - mp_config_acpi_legacy_irqs(); 63.505 - 63.506 - /* Record sci_int for use when looking for MADT sci_int override */ 63.507 - acpi_table_parse(ACPI_FADT, acpi_parse_fadt); 63.508 - 63.509 - result = acpi_table_parse_madt(ACPI_MADT_INT_SRC_OVR, acpi_parse_int_src_ovr); 63.510 - if (result < 0) { 63.511 - printk(KERN_ERR PREFIX "Error parsing interrupt source overrides entry\n"); 63.512 - /* TBD: Cleanup to allow fallback to MPS */ 63.513 - return result; 63.514 - } 63.515 - 63.516 - /* 63.517 - * If BIOS did not supply an INT_SRC_OVR for the SCI 63.518 - * pretend we got one so we can set the SCI flags. 63.519 - */ 63.520 - if (!acpi_sci_override_gsi) 63.521 - acpi_sci_ioapic_setup(acpi_fadt.sci_int, 0, 0); 63.522 - 63.523 - result = acpi_table_parse_madt(ACPI_MADT_NMI_SRC, acpi_parse_nmi_src); 63.524 - if (result < 0) { 63.525 - printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n"); 63.526 - /* TBD: Cleanup to allow fallback to MPS */ 63.527 - return result; 63.528 - } 63.529 - 63.530 - acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC; 63.531 - 63.532 - acpi_irq_balance_set(NULL); 63.533 - 63.534 - acpi_ioapic = 1; 63.535 - 63.536 - if (acpi_lapic && acpi_ioapic) 63.537 - smp_found_config = 1; 63.538 - 63.539 -#endif /*CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER*/ 63.540 - 63.541 - return 0; 63.542 -} 63.543 - 63.544 - 63.545 -#ifdef CONFIG_ACPI_BUS 63.546 -/* 63.547 - * acpi_pic_sci_set_trigger() 63.548 - * 63.549 - * use ELCR to set PIC-mode trigger type for SCI 63.550 - * 63.551 - * If a PIC-mode SCI is not recognized or gives spurious IRQ7's 63.552 - * it may require Edge Trigger -- use "acpi_sci=edge" 63.553 - * 63.554 - * Port 0x4d0-4d1 are ECLR1 and ECLR2, the Edge/Level Control Registers 63.555 - * for the 8259 PIC. bit[n] = 1 means irq[n] is Level, otherwise Edge. 63.556 - * ECLR1 is IRQ's 0-7 (IRQ 0, 1, 2 must be 0) 63.557 - * ECLR2 is IRQ's 8-15 (IRQ 8, 13 must be 0) 63.558 - */ 63.559 - 63.560 -void __init 63.561 -acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger) 63.562 -{ 63.563 - unsigned char mask = 1 << (irq & 7); 63.564 - unsigned int port = 0x4d0 + (irq >> 3); 63.565 - unsigned char val = inb(port); 63.566 - 63.567 - 63.568 - printk(PREFIX "IRQ%d SCI:", irq); 63.569 - if (!(val & mask)) { 63.570 - printk(" Edge"); 63.571 - 63.572 - if (trigger == 3) { 63.573 - printk(" set to Level"); 63.574 - outb(val | mask, port); 63.575 - } 63.576 - } else { 63.577 - printk(" Level"); 63.578 - 63.579 - if (trigger == 1) { 63.580 - printk(" set to Edge"); 63.581 - outb(val & ~mask, port); 63.582 - } 63.583 - } 63.584 - printk(" Trigger.\n"); 63.585 -} 63.586 - 63.587 -#endif /* CONFIG_ACPI_BUS */ 63.588 - 63.589 - 63.590 -/* -------------------------------------------------------------------------- 63.591 - Low-Level Sleep Support 63.592 - -------------------------------------------------------------------------- */ 63.593 - 63.594 -#ifdef CONFIG_ACPI_SLEEP 63.595 - 63.596 -#define DEBUG 63.597 - 63.598 -#ifdef DEBUG 63.599 -#include <xen/serial.h> 63.600 -#endif 63.601 - 63.602 -/* address in low memory of the wakeup routine. */ 63.603 -unsigned long acpi_wakeup_address = 0; 63.604 - 63.605 -/* new page directory that we will be using */ 63.606 -static pmd_t *pmd; 63.607 - 63.608 -/* saved page directory */ 63.609 -static pmd_t saved_pmd; 63.610 - 63.611 -/* page which we'll use for the new page directory */ 63.612 -static pte_t *ptep; 63.613 - 63.614 -extern unsigned long FASTCALL(acpi_copy_wakeup_routine(unsigned long)); 63.615 - 63.616 -/* 63.617 - * acpi_create_identity_pmd 63.618 - * 63.619 - * Create a new, identity mapped pmd. 63.620 - * 63.621 - * Do this by creating new page directory, and marking all the pages as R/W 63.622 - * Then set it as the new Page Middle Directory. 63.623 - * And, of course, flush the TLB so it takes effect. 63.624 - * 63.625 - * We save the address of the old one, for later restoration. 63.626 - */ 63.627 -static void acpi_create_identity_pmd (void) 63.628 -{ 63.629 - pgd_t *pgd; 63.630 - int i; 63.631 - 63.632 - ptep = (pte_t*)__get_free_page(GFP_KERNEL); 63.633 - 63.634 - /* fill page with low mapping */ 63.635 - for (i = 0; i < PTRS_PER_PTE; i++) 63.636 - set_pte(ptep + i, mk_pte_phys(i << PAGE_SHIFT, PAGE_SHARED)); 63.637 - 63.638 - pgd = pgd_offset(current->active_mm, 0); 63.639 - pmd = pmd_alloc(current->mm,pgd, 0); 63.640 - 63.641 - /* save the old pmd */ 63.642 - saved_pmd = *pmd; 63.643 - 63.644 - /* set the new one */ 63.645 - set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(ptep))); 63.646 - 63.647 - /* flush the TLB */ 63.648 - local_flush_tlb(); 63.649 -} 63.650 - 63.651 -/* 63.652 - * acpi_restore_pmd 63.653 - * 63.654 - * Restore the old pmd saved by acpi_create_identity_pmd and 63.655 - * free the page that said function alloc'd 63.656 - */ 63.657 -static void acpi_restore_pmd (void) 63.658 -{ 63.659 - set_pmd(pmd, saved_pmd); 63.660 - local_flush_tlb(); 63.661 - free_page((unsigned long)ptep); 63.662 -} 63.663 - 63.664 -/** 63.665 - * acpi_save_state_mem - save kernel state 63.666 - * 63.667 - * Create an identity mapped page table and copy the wakeup routine to 63.668 - * low memory. 63.669 - */ 63.670 -int acpi_save_state_mem (void) 63.671 -{ 63.672 - acpi_create_identity_pmd(); 63.673 - acpi_copy_wakeup_routine(acpi_wakeup_address); 63.674 - 63.675 - return 0; 63.676 -} 63.677 - 63.678 -/** 63.679 - * acpi_save_state_disk - save kernel state to disk 63.680 - * 63.681 - */ 63.682 -int acpi_save_state_disk (void) 63.683 -{ 63.684 - return 1; 63.685 -} 63.686 - 63.687 -/* 63.688 - * acpi_restore_state 63.689 - */ 63.690 -void acpi_restore_state_mem (void) 63.691 -{ 63.692 - acpi_restore_pmd(); 63.693 -} 63.694 - 63.695 -/** 63.696 - * acpi_reserve_bootmem - do _very_ early ACPI initialisation 63.697 - * 63.698 - * We allocate a page in low memory for the wakeup 63.699 - * routine for when we come back from a sleep state. The 63.700 - * runtime allocator allows specification of <16M pages, but not 63.701 - * <1M pages. 63.702 - */ 63.703 -void __init acpi_reserve_bootmem(void) 63.704 -{ 63.705 - acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE); 63.706 - if (!acpi_wakeup_address) 63.707 - printk(KERN_ERR "ACPI: Cannot allocate lowmem, S3 disabled.\n"); 63.708 -} 63.709 - 63.710 -void do_suspend_lowlevel_s4bios(int resume) 63.711 -{ 63.712 - if (!resume) { 63.713 - save_processor_context(); 63.714 - acpi_save_register_state((unsigned long)&&acpi_sleep_done); 63.715 - acpi_enter_sleep_state_s4bios(); 63.716 - return; 63.717 - } 63.718 -acpi_sleep_done: 63.719 - restore_processor_context(); 63.720 -} 63.721 - 63.722 - 63.723 -#endif /*CONFIG_ACPI_SLEEP*/ 63.724 -
64.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 64.2 +++ b/xen/arch/x86/acpi/boot.c Tue May 10 08:49:59 2005 +0000 64.3 @@ -0,0 +1,912 @@ 64.4 +/* 64.5 + * boot.c - Architecture-Specific Low-Level ACPI Boot Support 64.6 + * 64.7 + * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com> 64.8 + * Copyright (C) 2001 Jun Nakajima <jun.nakajima@intel.com> 64.9 + * 64.10 + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 64.11 + * 64.12 + * This program is free software; you can redistribute it and/or modify 64.13 + * it under the terms of the GNU General Public License as published by 64.14 + * the Free Software Foundation; either version 2 of the License, or 64.15 + * (at your option) any later version. 64.16 + * 64.17 + * This program is distributed in the hope that it will be useful, 64.18 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 64.19 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 64.20 + * GNU General Public License for more details. 64.21 + * 64.22 + * You should have received a copy of the GNU General Public License 64.23 + * along with this program; if not, write to the Free Software 64.24 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 64.25 + * 64.26 + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 64.27 + */ 64.28 + 64.29 +#include <xen/config.h> 64.30 +#include <xen/errno.h> 64.31 +#include <xen/init.h> 64.32 +#include <xen/acpi.h> 64.33 +#include <xen/irq.h> 64.34 +#include <xen/sched.h> 64.35 +#include <asm/page.h> 64.36 +#include <asm/apic.h> 64.37 +#include <asm/io_apic.h> 64.38 +#include <asm/apic.h> 64.39 +#include <asm/io.h> 64.40 +#include <asm/irq.h> 64.41 +#include <asm/mpspec.h> 64.42 + 64.43 +int sbf_port; /* XXX XEN */ 64.44 + 64.45 +#ifdef CONFIG_X86_64 64.46 + 64.47 +static inline void acpi_madt_oem_check(char *oem_id, char *oem_table_id) { } 64.48 +extern void __init clustered_apic_check(void); 64.49 +static inline int ioapic_setup_disabled(void) { return 0; } 64.50 +#include <asm/proto.h> 64.51 + 64.52 +#else /* X86 */ 64.53 + 64.54 +#ifdef CONFIG_X86_LOCAL_APIC 64.55 +#include <mach_apic.h> 64.56 +#include <mach_mpparse.h> 64.57 +#endif /* CONFIG_X86_LOCAL_APIC */ 64.58 + 64.59 +#endif /* X86 */ 64.60 + 64.61 +#define BAD_MADT_ENTRY(entry, end) ( \ 64.62 + (!entry) || (unsigned long)entry + sizeof(*entry) > end || \ 64.63 + ((acpi_table_entry_header *)entry)->length != sizeof(*entry)) 64.64 + 64.65 +#define PREFIX "ACPI: " 64.66 + 64.67 +#ifdef CONFIG_ACPI_PCI 64.68 +int acpi_noirq __initdata; /* skip ACPI IRQ initialization */ 64.69 +int acpi_pci_disabled __initdata; /* skip ACPI PCI scan and IRQ initialization */ 64.70 +#else 64.71 +int acpi_noirq __initdata = 1; 64.72 +int acpi_pci_disabled __initdata = 1; 64.73 +#endif 64.74 +int acpi_ht __initdata = 1; /* enable HT */ 64.75 + 64.76 +int acpi_lapic; 64.77 +int acpi_ioapic; 64.78 +int acpi_strict; 64.79 +EXPORT_SYMBOL(acpi_strict); 64.80 + 64.81 +acpi_interrupt_flags acpi_sci_flags __initdata; 64.82 +int acpi_sci_override_gsi __initdata; 64.83 +int acpi_skip_timer_override __initdata; 64.84 + 64.85 +#ifdef CONFIG_X86_LOCAL_APIC 64.86 +static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; 64.87 +#endif 64.88 + 64.89 +#ifndef __HAVE_ARCH_CMPXCHG 64.90 +#warning ACPI uses CMPXCHG, i486 and later hardware 64.91 +#endif 64.92 + 64.93 +#define MAX_MADT_ENTRIES 256 64.94 +u8 x86_acpiid_to_apicid[MAX_MADT_ENTRIES] = 64.95 + { [0 ... MAX_MADT_ENTRIES-1] = 0xff }; 64.96 +EXPORT_SYMBOL(x86_acpiid_to_apicid); 64.97 + 64.98 +/* -------------------------------------------------------------------------- 64.99 + Boot-time Configuration 64.100 + -------------------------------------------------------------------------- */ 64.101 + 64.102 +/* 64.103 + * The default interrupt routing model is PIC (8259). This gets 64.104 + * overriden if IOAPICs are enumerated (below). 64.105 + */ 64.106 +enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_PIC; 64.107 + 64.108 +#ifdef CONFIG_X86_64 64.109 + 64.110 +/* rely on all ACPI tables being in the direct mapping */ 64.111 +char *__acpi_map_table(unsigned long phys_addr, unsigned long size) 64.112 +{ 64.113 + if (!phys_addr || !size) 64.114 + return NULL; 64.115 + 64.116 + if (phys_addr < (end_pfn_map << PAGE_SHIFT)) 64.117 + return __va(phys_addr); 64.118 + 64.119 + return NULL; 64.120 +} 64.121 + 64.122 +#else 64.123 + 64.124 +/* 64.125 + * Temporarily use the virtual area starting from FIX_IO_APIC_BASE_END, 64.126 + * to map the target physical address. The problem is that set_fixmap() 64.127 + * provides a single page, and it is possible that the page is not 64.128 + * sufficient. 64.129 + * By using this area, we can map up to MAX_IO_APICS pages temporarily, 64.130 + * i.e. until the next __va_range() call. 64.131 + * 64.132 + * Important Safety Note: The fixed I/O APIC page numbers are *subtracted* 64.133 + * from the fixed base. That's why we start at FIX_IO_APIC_BASE_END and 64.134 + * count idx down while incrementing the phys address. 64.135 + */ 64.136 +char *__acpi_map_table(unsigned long phys, unsigned long size) 64.137 +{ 64.138 + unsigned long base, offset, mapped_size; 64.139 + int idx; 64.140 + 64.141 + if (phys + size < 8*1024*1024) 64.142 + return __va(phys); 64.143 + 64.144 + offset = phys & (PAGE_SIZE - 1); 64.145 + mapped_size = PAGE_SIZE - offset; 64.146 + set_fixmap(FIX_ACPI_END, phys); 64.147 + base = fix_to_virt(FIX_ACPI_END); 64.148 + 64.149 + /* 64.150 + * Most cases can be covered by the below. 64.151 + */ 64.152 + idx = FIX_ACPI_END; 64.153 + while (mapped_size < size) { 64.154 + if (--idx < FIX_ACPI_BEGIN) 64.155 + return NULL; /* cannot handle this */ 64.156 + phys += PAGE_SIZE; 64.157 + set_fixmap(idx, phys); 64.158 + mapped_size += PAGE_SIZE; 64.159 + } 64.160 + 64.161 + return ((unsigned char *) base + offset); 64.162 +} 64.163 +#endif 64.164 + 64.165 +#ifdef CONFIG_PCI_MMCONFIG 64.166 +static int __init acpi_parse_mcfg(unsigned long phys_addr, unsigned long size) 64.167 +{ 64.168 + struct acpi_table_mcfg *mcfg; 64.169 + 64.170 + if (!phys_addr || !size) 64.171 + return -EINVAL; 64.172 + 64.173 + mcfg = (struct acpi_table_mcfg *) __acpi_map_table(phys_addr, size); 64.174 + if (!mcfg) { 64.175 + printk(KERN_WARNING PREFIX "Unable to map MCFG\n"); 64.176 + return -ENODEV; 64.177 + } 64.178 + 64.179 + if (mcfg->base_reserved) { 64.180 + printk(KERN_ERR PREFIX "MMCONFIG not in low 4GB of memory\n"); 64.181 + return -ENODEV; 64.182 + } 64.183 + 64.184 + pci_mmcfg_base_addr = mcfg->base_address; 64.185 + 64.186 + return 0; 64.187 +} 64.188 +#else 64.189 +#define acpi_parse_mcfg NULL 64.190 +#endif /* !CONFIG_PCI_MMCONFIG */ 64.191 + 64.192 +#ifdef CONFIG_X86_LOCAL_APIC 64.193 +static int __init 64.194 +acpi_parse_madt ( 64.195 + unsigned long phys_addr, 64.196 + unsigned long size) 64.197 +{ 64.198 + struct acpi_table_madt *madt = NULL; 64.199 + 64.200 + if (!phys_addr || !size) 64.201 + return -EINVAL; 64.202 + 64.203 + madt = (struct acpi_table_madt *) __acpi_map_table(phys_addr, size); 64.204 + if (!madt) { 64.205 + printk(KERN_WARNING PREFIX "Unable to map MADT\n"); 64.206 + return -ENODEV; 64.207 + } 64.208 + 64.209 + if (madt->lapic_address) { 64.210 + acpi_lapic_addr = (u64) madt->lapic_address; 64.211 + 64.212 + printk(KERN_DEBUG PREFIX "Local APIC address 0x%08x\n", 64.213 + madt->lapic_address); 64.214 + } 64.215 + 64.216 + acpi_madt_oem_check(madt->header.oem_id, madt->header.oem_table_id); 64.217 + 64.218 + return 0; 64.219 +} 64.220 + 64.221 + 64.222 +static int __init 64.223 +acpi_parse_lapic ( 64.224 + acpi_table_entry_header *header, const unsigned long end) 64.225 +{ 64.226 + struct acpi_table_lapic *processor = NULL; 64.227 + 64.228 + processor = (struct acpi_table_lapic*) header; 64.229 + 64.230 + if (BAD_MADT_ENTRY(processor, end)) 64.231 + return -EINVAL; 64.232 + 64.233 + acpi_table_print_madt_entry(header); 64.234 + 64.235 + /* no utility in registering a disabled processor */ 64.236 + if (processor->flags.enabled == 0) 64.237 + return 0; 64.238 + 64.239 + x86_acpiid_to_apicid[processor->acpi_id] = processor->id; 64.240 + 64.241 + mp_register_lapic ( 64.242 + processor->id, /* APIC ID */ 64.243 + processor->flags.enabled); /* Enabled? */ 64.244 + 64.245 + return 0; 64.246 +} 64.247 + 64.248 +static int __init 64.249 +acpi_parse_lapic_addr_ovr ( 64.250 + acpi_table_entry_header *header, const unsigned long end) 64.251 +{ 64.252 + struct acpi_table_lapic_addr_ovr *lapic_addr_ovr = NULL; 64.253 + 64.254 + lapic_addr_ovr = (struct acpi_table_lapic_addr_ovr*) header; 64.255 + 64.256 + if (BAD_MADT_ENTRY(lapic_addr_ovr, end)) 64.257 + return -EINVAL; 64.258 + 64.259 + acpi_lapic_addr = lapic_addr_ovr->address; 64.260 + 64.261 + return 0; 64.262 +} 64.263 + 64.264 +static int __init 64.265 +acpi_parse_lapic_nmi ( 64.266 + acpi_table_entry_header *header, const unsigned long end) 64.267 +{ 64.268 + struct acpi_table_lapic_nmi *lapic_nmi = NULL; 64.269 + 64.270 + lapic_nmi = (struct acpi_table_lapic_nmi*) header; 64.271 + 64.272 + if (BAD_MADT_ENTRY(lapic_nmi, end)) 64.273 + return -EINVAL; 64.274 + 64.275 + acpi_table_print_madt_entry(header); 64.276 + 64.277 + if (lapic_nmi->lint != 1) 64.278 + printk(KERN_WARNING PREFIX "NMI not connected to LINT 1!\n"); 64.279 + 64.280 + return 0; 64.281 +} 64.282 + 64.283 + 64.284 +#endif /*CONFIG_X86_LOCAL_APIC*/ 64.285 + 64.286 +#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_ACPI_INTERPRETER) 64.287 + 64.288 +static int __init 64.289 +acpi_parse_ioapic ( 64.290 + acpi_table_entry_header *header, const unsigned long end) 64.291 +{ 64.292 + struct acpi_table_ioapic *ioapic = NULL; 64.293 + 64.294 + ioapic = (struct acpi_table_ioapic*) header; 64.295 + 64.296 + if (BAD_MADT_ENTRY(ioapic, end)) 64.297 + return -EINVAL; 64.298 + 64.299 + acpi_table_print_madt_entry(header); 64.300 + 64.301 + mp_register_ioapic ( 64.302 + ioapic->id, 64.303 + ioapic->address, 64.304 + ioapic->global_irq_base); 64.305 + 64.306 + return 0; 64.307 +} 64.308 + 64.309 +/* 64.310 + * Parse Interrupt Source Override for the ACPI SCI 64.311 + */ 64.312 +static void 64.313 +acpi_sci_ioapic_setup(u32 gsi, u16 polarity, u16 trigger) 64.314 +{ 64.315 + if (trigger == 0) /* compatible SCI trigger is level */ 64.316 + trigger = 3; 64.317 + 64.318 + if (polarity == 0) /* compatible SCI polarity is low */ 64.319 + polarity = 3; 64.320 + 64.321 + /* Command-line over-ride via acpi_sci= */ 64.322 + if (acpi_sci_flags.trigger) 64.323 + trigger = acpi_sci_flags.trigger; 64.324 + 64.325 + if (acpi_sci_flags.polarity) 64.326 + polarity = acpi_sci_flags.polarity; 64.327 + 64.328 + /* 64.329 + * mp_config_acpi_legacy_irqs() already setup IRQs < 16 64.330 + * If GSI is < 16, this will update its flags, 64.331 + * else it will create a new mp_irqs[] entry. 64.332 + */ 64.333 + mp_override_legacy_irq(gsi, polarity, trigger, gsi); 64.334 + 64.335 + /* 64.336 + * stash over-ride to indicate we've been here 64.337 + * and for later update of acpi_fadt 64.338 + */ 64.339 + acpi_sci_override_gsi = gsi; 64.340 + return; 64.341 +} 64.342 + 64.343 +static int __init 64.344 +acpi_parse_int_src_ovr ( 64.345 + acpi_table_entry_header *header, const unsigned long end) 64.346 +{ 64.347 + struct acpi_table_int_src_ovr *intsrc = NULL; 64.348 + 64.349 + intsrc = (struct acpi_table_int_src_ovr*) header; 64.350 + 64.351 + if (BAD_MADT_ENTRY(intsrc, end)) 64.352 + return -EINVAL; 64.353 + 64.354 + acpi_table_print_madt_entry(header); 64.355 + 64.356 + if (intsrc->bus_irq == acpi_fadt.sci_int) { 64.357 + acpi_sci_ioapic_setup(intsrc->global_irq, 64.358 + intsrc->flags.polarity, intsrc->flags.trigger); 64.359 + return 0; 64.360 + } 64.361 + 64.362 + if (acpi_skip_timer_override && 64.363 + intsrc->bus_irq == 0 && intsrc->global_irq == 2) { 64.364 + printk(PREFIX "BIOS IRQ0 pin2 override ignored.\n"); 64.365 + return 0; 64.366 + } 64.367 + 64.368 + mp_override_legacy_irq ( 64.369 + intsrc->bus_irq, 64.370 + intsrc->flags.polarity, 64.371 + intsrc->flags.trigger, 64.372 + intsrc->global_irq); 64.373 + 64.374 + return 0; 64.375 +} 64.376 + 64.377 + 64.378 +static int __init 64.379 +acpi_parse_nmi_src ( 64.380 + acpi_table_entry_header *header, const unsigned long end) 64.381 +{ 64.382 + struct acpi_table_nmi_src *nmi_src = NULL; 64.383 + 64.384 + nmi_src = (struct acpi_table_nmi_src*) header; 64.385 + 64.386 + if (BAD_MADT_ENTRY(nmi_src, end)) 64.387 + return -EINVAL; 64.388 + 64.389 + acpi_table_print_madt_entry(header); 64.390 + 64.391 + /* TBD: Support nimsrc entries? */ 64.392 + 64.393 + return 0; 64.394 +} 64.395 + 64.396 +#endif /* CONFIG_X86_IO_APIC */ 64.397 + 64.398 +#ifdef CONFIG_ACPI_BUS 64.399 + 64.400 +/* 64.401 + * acpi_pic_sci_set_trigger() 64.402 + * 64.403 + * use ELCR to set PIC-mode trigger type for SCI 64.404 + * 64.405 + * If a PIC-mode SCI is not recognized or gives spurious IRQ7's 64.406 + * it may require Edge Trigger -- use "acpi_sci=edge" 64.407 + * 64.408 + * Port 0x4d0-4d1 are ECLR1 and ECLR2, the Edge/Level Control Registers 64.409 + * for the 8259 PIC. bit[n] = 1 means irq[n] is Level, otherwise Edge. 64.410 + * ECLR1 is IRQ's 0-7 (IRQ 0, 1, 2 must be 0) 64.411 + * ECLR2 is IRQ's 8-15 (IRQ 8, 13 must be 0) 64.412 + */ 64.413 + 64.414 +void __init 64.415 +acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger) 64.416 +{ 64.417 + unsigned int mask = 1 << irq; 64.418 + unsigned int old, new; 64.419 + 64.420 + /* Real old ELCR mask */ 64.421 + old = inb(0x4d0) | (inb(0x4d1) << 8); 64.422 + 64.423 + /* 64.424 + * If we use ACPI to set PCI irq's, then we should clear ELCR 64.425 + * since we will set it correctly as we enable the PCI irq 64.426 + * routing. 64.427 + */ 64.428 + new = acpi_noirq ? old : 0; 64.429 + 64.430 + /* 64.431 + * Update SCI information in the ELCR, it isn't in the PCI 64.432 + * routing tables.. 64.433 + */ 64.434 + switch (trigger) { 64.435 + case 1: /* Edge - clear */ 64.436 + new &= ~mask; 64.437 + break; 64.438 + case 3: /* Level - set */ 64.439 + new |= mask; 64.440 + break; 64.441 + } 64.442 + 64.443 + if (old == new) 64.444 + return; 64.445 + 64.446 + printk(PREFIX "setting ELCR to %04x (from %04x)\n", new, old); 64.447 + outb(new, 0x4d0); 64.448 + outb(new >> 8, 0x4d1); 64.449 +} 64.450 + 64.451 + 64.452 +#endif /* CONFIG_ACPI_BUS */ 64.453 + 64.454 +int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) 64.455 +{ 64.456 +#ifdef CONFIG_X86_IO_APIC 64.457 + if (use_pci_vector() && !platform_legacy_irq(gsi)) 64.458 + *irq = IO_APIC_VECTOR(gsi); 64.459 + else 64.460 +#endif 64.461 + *irq = gsi; 64.462 + return 0; 64.463 +} 64.464 + 64.465 +unsigned int acpi_register_gsi(u32 gsi, int edge_level, int active_high_low) 64.466 +{ 64.467 + unsigned int irq; 64.468 + unsigned int plat_gsi = gsi; 64.469 + 64.470 +#ifdef CONFIG_PCI 64.471 + /* 64.472 + * Make sure all (legacy) PCI IRQs are set as level-triggered. 64.473 + */ 64.474 + if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) { 64.475 + extern void eisa_set_level_irq(unsigned int irq); 64.476 + 64.477 + if (edge_level == ACPI_LEVEL_SENSITIVE) 64.478 + eisa_set_level_irq(gsi); 64.479 + } 64.480 +#endif 64.481 + 64.482 +#ifdef CONFIG_X86_IO_APIC 64.483 + if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) { 64.484 + plat_gsi = mp_register_gsi(gsi, edge_level, active_high_low); 64.485 + } 64.486 +#endif 64.487 + acpi_gsi_to_irq(plat_gsi, &irq); 64.488 + return irq; 64.489 +} 64.490 +EXPORT_SYMBOL(acpi_register_gsi); 64.491 + 64.492 +/* 64.493 + * ACPI based hotplug support for CPU 64.494 + */ 64.495 +#ifdef CONFIG_ACPI_HOTPLUG_CPU 64.496 +int 64.497 +acpi_map_lsapic(acpi_handle handle, int *pcpu) 64.498 +{ 64.499 + /* TBD */ 64.500 + return -EINVAL; 64.501 +} 64.502 +EXPORT_SYMBOL(acpi_map_lsapic); 64.503 + 64.504 + 64.505 +int 64.506 +acpi_unmap_lsapic(int cpu) 64.507 +{ 64.508 + /* TBD */ 64.509 + return -EINVAL; 64.510 +} 64.511 +EXPORT_SYMBOL(acpi_unmap_lsapic); 64.512 +#endif /* CONFIG_ACPI_HOTPLUG_CPU */ 64.513 + 64.514 +static unsigned long __init 64.515 +acpi_scan_rsdp ( 64.516 + unsigned long start, 64.517 + unsigned long length) 64.518 +{ 64.519 + unsigned long offset = 0; 64.520 + unsigned long sig_len = sizeof("RSD PTR ") - 1; 64.521 + 64.522 + /* 64.523 + * Scan all 16-byte boundaries of the physical memory region for the 64.524 + * RSDP signature. 64.525 + */ 64.526 + for (offset = 0; offset < length; offset += 16) { 64.527 + if (strncmp((char *) (start + offset), "RSD PTR ", sig_len)) 64.528 + continue; 64.529 + return (start + offset); 64.530 + } 64.531 + 64.532 + return 0; 64.533 +} 64.534 + 64.535 +static int __init acpi_parse_sbf(unsigned long phys_addr, unsigned long size) 64.536 +{ 64.537 + struct acpi_table_sbf *sb; 64.538 + 64.539 + if (!phys_addr || !size) 64.540 + return -EINVAL; 64.541 + 64.542 + sb = (struct acpi_table_sbf *) __acpi_map_table(phys_addr, size); 64.543 + if (!sb) { 64.544 + printk(KERN_WARNING PREFIX "Unable to map SBF\n"); 64.545 + return -ENODEV; 64.546 + } 64.547 + 64.548 + sbf_port = sb->sbf_cmos; /* Save CMOS port */ 64.549 + 64.550 + return 0; 64.551 +} 64.552 + 64.553 + 64.554 +#ifdef CONFIG_HPET_TIMER 64.555 + 64.556 +static int __init acpi_parse_hpet(unsigned long phys, unsigned long size) 64.557 +{ 64.558 + struct acpi_table_hpet *hpet_tbl; 64.559 + 64.560 + if (!phys || !size) 64.561 + return -EINVAL; 64.562 + 64.563 + hpet_tbl = (struct acpi_table_hpet *) __acpi_map_table(phys, size); 64.564 + if (!hpet_tbl) { 64.565 + printk(KERN_WARNING PREFIX "Unable to map HPET\n"); 64.566 + return -ENODEV; 64.567 + } 64.568 + 64.569 + if (hpet_tbl->addr.space_id != ACPI_SPACE_MEM) { 64.570 + printk(KERN_WARNING PREFIX "HPET timers must be located in " 64.571 + "memory.\n"); 64.572 + return -1; 64.573 + } 64.574 + 64.575 +#ifdef CONFIG_X86_64 64.576 + vxtime.hpet_address = hpet_tbl->addr.addrl | 64.577 + ((long) hpet_tbl->addr.addrh << 32); 64.578 + 64.579 + printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n", 64.580 + hpet_tbl->id, vxtime.hpet_address); 64.581 +#else /* X86 */ 64.582 + { 64.583 + extern unsigned long hpet_address; 64.584 + 64.585 + hpet_address = hpet_tbl->addr.addrl; 64.586 + printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n", 64.587 + hpet_tbl->id, hpet_address); 64.588 + } 64.589 +#endif /* X86 */ 64.590 + 64.591 + return 0; 64.592 +} 64.593 +#else 64.594 +#define acpi_parse_hpet NULL 64.595 +#endif 64.596 + 64.597 +#ifdef CONFIG_X86_PM_TIMER 64.598 +extern u32 pmtmr_ioport; 64.599 +#endif 64.600 + 64.601 +static int __init acpi_parse_fadt(unsigned long phys, unsigned long size) 64.602 +{ 64.603 + struct fadt_descriptor_rev2 *fadt = NULL; 64.604 + 64.605 + fadt = (struct fadt_descriptor_rev2*) __acpi_map_table(phys,size); 64.606 + if(!fadt) { 64.607 + printk(KERN_WARNING PREFIX "Unable to map FADT\n"); 64.608 + return 0; 64.609 + } 64.610 + 64.611 +#ifdef CONFIG_ACPI_INTERPRETER 64.612 + /* initialize sci_int early for INT_SRC_OVR MADT parsing */ 64.613 + acpi_fadt.sci_int = fadt->sci_int; 64.614 +#endif 64.615 + 64.616 +#ifdef CONFIG_X86_PM_TIMER 64.617 + /* detect the location of the ACPI PM Timer */ 64.618 + if (fadt->revision >= FADT2_REVISION_ID) { 64.619 + /* FADT rev. 2 */ 64.620 + if (fadt->xpm_tmr_blk.address_space_id != ACPI_ADR_SPACE_SYSTEM_IO) 64.621 + return 0; 64.622 + 64.623 + pmtmr_ioport = fadt->xpm_tmr_blk.address; 64.624 + } else { 64.625 + /* FADT rev. 1 */ 64.626 + pmtmr_ioport = fadt->V1_pm_tmr_blk; 64.627 + } 64.628 + if (pmtmr_ioport) 64.629 + printk(KERN_INFO PREFIX "PM-Timer IO Port: %#x\n", pmtmr_ioport); 64.630 +#endif 64.631 + return 0; 64.632 +} 64.633 + 64.634 + 64.635 +unsigned long __init 64.636 +acpi_find_rsdp (void) 64.637 +{ 64.638 + unsigned long rsdp_phys = 0; 64.639 + 64.640 +#if 0 64.641 + if (efi_enabled) { 64.642 + if (efi.acpi20) 64.643 + return __pa(efi.acpi20); 64.644 + else if (efi.acpi) 64.645 + return __pa(efi.acpi); 64.646 + } 64.647 +#endif 64.648 + /* 64.649 + * Scan memory looking for the RSDP signature. First search EBDA (low 64.650 + * memory) paragraphs and then search upper memory (E0000-FFFFF). 64.651 + */ 64.652 + rsdp_phys = acpi_scan_rsdp (0, 0x400); 64.653 + if (!rsdp_phys) 64.654 + rsdp_phys = acpi_scan_rsdp (0xE0000, 0xFFFFF); 64.655 + 64.656 + return rsdp_phys; 64.657 +} 64.658 + 64.659 +#ifdef CONFIG_X86_LOCAL_APIC 64.660 +/* 64.661 + * Parse LAPIC entries in MADT 64.662 + * returns 0 on success, < 0 on error 64.663 + */ 64.664 +static int __init 64.665 +acpi_parse_madt_lapic_entries(void) 64.666 +{ 64.667 + int count; 64.668 + 64.669 + /* 64.670 + * Note that the LAPIC address is obtained from the MADT (32-bit value) 64.671 + * and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value). 64.672 + */ 64.673 + 64.674 + count = acpi_table_parse_madt(ACPI_MADT_LAPIC_ADDR_OVR, acpi_parse_lapic_addr_ovr, 0); 64.675 + if (count < 0) { 64.676 + printk(KERN_ERR PREFIX "Error parsing LAPIC address override entry\n"); 64.677 + return count; 64.678 + } 64.679 + 64.680 + mp_register_lapic_address(acpi_lapic_addr); 64.681 + 64.682 + count = acpi_table_parse_madt(ACPI_MADT_LAPIC, acpi_parse_lapic, 64.683 + MAX_APICS); 64.684 + if (!count) { 64.685 + printk(KERN_ERR PREFIX "No LAPIC entries present\n"); 64.686 + /* TBD: Cleanup to allow fallback to MPS */ 64.687 + return -ENODEV; 64.688 + } 64.689 + else if (count < 0) { 64.690 + printk(KERN_ERR PREFIX "Error parsing LAPIC entry\n"); 64.691 + /* TBD: Cleanup to allow fallback to MPS */ 64.692 + return count; 64.693 + } 64.694 + 64.695 + count = acpi_table_parse_madt(ACPI_MADT_LAPIC_NMI, acpi_parse_lapic_nmi, 0); 64.696 + if (count < 0) { 64.697 + printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n"); 64.698 + /* TBD: Cleanup to allow fallback to MPS */ 64.699 + return count; 64.700 + } 64.701 + return 0; 64.702 +} 64.703 +#endif /* CONFIG_X86_LOCAL_APIC */ 64.704 + 64.705 +#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_ACPI_INTERPRETER) 64.706 +/* 64.707 + * Parse IOAPIC related entries in MADT 64.708 + * returns 0 on success, < 0 on error 64.709 + */ 64.710 +static int __init 64.711 +acpi_parse_madt_ioapic_entries(void) 64.712 +{ 64.713 + int count; 64.714 + 64.715 + /* 64.716 + * ACPI interpreter is required to complete interrupt setup, 64.717 + * so if it is off, don't enumerate the io-apics with ACPI. 64.718 + * If MPS is present, it will handle them, 64.719 + * otherwise the system will stay in PIC mode 64.720 + */ 64.721 + if (acpi_disabled || acpi_noirq) { 64.722 + return -ENODEV; 64.723 + } 64.724 + 64.725 + /* 64.726 + * if "noapic" boot option, don't look for IO-APICs 64.727 + */ 64.728 + if (skip_ioapic_setup) { 64.729 + printk(KERN_INFO PREFIX "Skipping IOAPIC probe " 64.730 + "due to 'noapic' option.\n"); 64.731 + return -ENODEV; 64.732 + } 64.733 + 64.734 + count = acpi_table_parse_madt(ACPI_MADT_IOAPIC, acpi_parse_ioapic, MAX_IO_APICS); 64.735 + if (!count) { 64.736 + printk(KERN_ERR PREFIX "No IOAPIC entries present\n"); 64.737 + return -ENODEV; 64.738 + } 64.739 + else if (count < 0) { 64.740 + printk(KERN_ERR PREFIX "Error parsing IOAPIC entry\n"); 64.741 + return count; 64.742 + } 64.743 + 64.744 + count = acpi_table_parse_madt(ACPI_MADT_INT_SRC_OVR, acpi_parse_int_src_ovr, NR_IRQ_VECTORS); 64.745 + if (count < 0) { 64.746 + printk(KERN_ERR PREFIX "Error parsing interrupt source overrides entry\n"); 64.747 + /* TBD: Cleanup to allow fallback to MPS */ 64.748 + return count; 64.749 + } 64.750 + 64.751 + /* 64.752 + * If BIOS did not supply an INT_SRC_OVR for the SCI 64.753 + * pretend we got one so we can set the SCI flags. 64.754 + */ 64.755 + if (!acpi_sci_override_gsi) 64.756 + acpi_sci_ioapic_setup(acpi_fadt.sci_int, 0, 0); 64.757 + 64.758 + /* Fill in identity legacy mapings where no override */ 64.759 + mp_config_acpi_legacy_irqs(); 64.760 + 64.761 + count = acpi_table_parse_madt(ACPI_MADT_NMI_SRC, acpi_parse_nmi_src, NR_IRQ_VECTORS); 64.762 + if (count < 0) { 64.763 + printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n"); 64.764 + /* TBD: Cleanup to allow fallback to MPS */ 64.765 + return count; 64.766 + } 64.767 + 64.768 + return 0; 64.769 +} 64.770 +#else 64.771 +static inline int acpi_parse_madt_ioapic_entries(void) 64.772 +{ 64.773 + return -1; 64.774 +} 64.775 +#endif /* !(CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER) */ 64.776 + 64.777 + 64.778 +static void __init 64.779 +acpi_process_madt(void) 64.780 +{ 64.781 +#ifdef CONFIG_X86_LOCAL_APIC 64.782 + int count, error; 64.783 + 64.784 + count = acpi_table_parse(ACPI_APIC, acpi_parse_madt); 64.785 + if (count >= 1) { 64.786 + 64.787 + /* 64.788 + * Parse MADT LAPIC entries 64.789 + */ 64.790 + error = acpi_parse_madt_lapic_entries(); 64.791 + if (!error) { 64.792 + acpi_lapic = 1; 64.793 + 64.794 + /* 64.795 + * Parse MADT IO-APIC entries 64.796 + */ 64.797 + error = acpi_parse_madt_ioapic_entries(); 64.798 + if (!error) { 64.799 + acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC; 64.800 + acpi_irq_balance_set(NULL); 64.801 + acpi_ioapic = 1; 64.802 + 64.803 + smp_found_config = 1; 64.804 + clustered_apic_check(); 64.805 + } 64.806 + } 64.807 + if (error == -EINVAL) { 64.808 + /* 64.809 + * Dell Precision Workstation 410, 610 come here. 64.810 + */ 64.811 + printk(KERN_ERR PREFIX "Invalid BIOS MADT, disabling ACPI\n"); 64.812 + disable_acpi(); 64.813 + } 64.814 + } 64.815 +#endif 64.816 + return; 64.817 +} 64.818 + 64.819 +/* 64.820 + * acpi_boot_table_init() and acpi_boot_init() 64.821 + * called from setup_arch(), always. 64.822 + * 1. checksums all tables 64.823 + * 2. enumerates lapics 64.824 + * 3. enumerates io-apics 64.825 + * 64.826 + * acpi_table_init() is separate to allow reading SRAT without 64.827 + * other side effects. 64.828 + * 64.829 + * side effects of acpi_boot_init: 64.830 + * acpi_lapic = 1 if LAPIC found 64.831 + * acpi_ioapic = 1 if IOAPIC found 64.832 + * if (acpi_lapic && acpi_ioapic) smp_found_config = 1; 64.833 + * if acpi_blacklisted() acpi_disabled = 1; 64.834 + * acpi_irq_model=... 64.835 + * ... 64.836 + * 64.837 + * return value: (currently ignored) 64.838 + * 0: success 64.839 + * !0: failure 64.840 + */ 64.841 + 64.842 +int __init 64.843 +acpi_boot_table_init(void) 64.844 +{ 64.845 + int error; 64.846 + 64.847 + /* 64.848 + * If acpi_disabled, bail out 64.849 + * One exception: acpi=ht continues far enough to enumerate LAPICs 64.850 + */ 64.851 + if (acpi_disabled && !acpi_ht) 64.852 + return 1; 64.853 + 64.854 + /* 64.855 + * Initialize the ACPI boot-time table parser. 64.856 + */ 64.857 + error = acpi_table_init(); 64.858 + if (error) { 64.859 + disable_acpi(); 64.860 + return error; 64.861 + } 64.862 + 64.863 +#if 0 /*def __i386__*/ 64.864 + check_acpi_pci(); 64.865 +#endif 64.866 + 64.867 + acpi_table_parse(ACPI_BOOT, acpi_parse_sbf); 64.868 + 64.869 + /* 64.870 + * blacklist may disable ACPI entirely 64.871 + */ 64.872 + error = acpi_blacklisted(); 64.873 + if (error) { 64.874 + extern int acpi_force; 64.875 + 64.876 + if (acpi_force) { 64.877 + printk(KERN_WARNING PREFIX "acpi=force override\n"); 64.878 + } else { 64.879 + printk(KERN_WARNING PREFIX "Disabling ACPI support\n"); 64.880 + disable_acpi(); 64.881 + return error; 64.882 + } 64.883 + } 64.884 + 64.885 + return 0; 64.886 +} 64.887 + 64.888 + 64.889 +int __init acpi_boot_init(void) 64.890 +{ 64.891 + /* 64.892 + * If acpi_disabled, bail out 64.893 + * One exception: acpi=ht continues far enough to enumerate LAPICs 64.894 + */ 64.895 + if (acpi_disabled && !acpi_ht) 64.896 + return 1; 64.897 + 64.898 + acpi_table_parse(ACPI_BOOT, acpi_parse_sbf); 64.899 + 64.900 + /* 64.901 + * set sci_int and PM timer address 64.902 + */ 64.903 + acpi_table_parse(ACPI_FADT, acpi_parse_fadt); 64.904 + 64.905 + /* 64.906 + * Process the Multiple APIC Description Table (MADT), if present 64.907 + */ 64.908 + acpi_process_madt(); 64.909 + 64.910 + acpi_table_parse(ACPI_HPET, acpi_parse_hpet); 64.911 + acpi_table_parse(ACPI_MCFG, acpi_parse_mcfg); 64.912 + 64.913 + return 0; 64.914 +} 64.915 +
65.1 --- a/xen/arch/x86/apic.c Tue May 03 14:42:00 2005 +0000 65.2 +++ b/xen/arch/x86/apic.c Tue May 10 08:49:59 2005 +0000 65.3 @@ -34,14 +34,27 @@ 65.4 #include <asm/hardirq.h> 65.5 #include <asm/apic.h> 65.6 #include <asm/io_apic.h> 65.7 -#include <asm/mach_apic.h> 65.8 -#include <asm/io_ports.h> 65.9 +#include <mach_apic.h> 65.10 +#include <io_ports.h> 65.11 65.12 /* Using APIC to generate smp_local_timer_interrupt? */ 65.13 int using_apic_timer = 0; 65.14 65.15 +int apic_verbosity; 65.16 + 65.17 static int enabled_via_apicbase; 65.18 65.19 +int get_physical_broadcast(void) 65.20 +{ 65.21 + unsigned int lvr, version; 65.22 + lvr = apic_read(APIC_LVR); 65.23 + version = GET_APIC_VERSION(lvr); 65.24 + if (!APIC_INTEGRATED(version) || version >= 0x14) 65.25 + return 0xff; 65.26 + else 65.27 + return 0xf; 65.28 +} 65.29 + 65.30 int get_maxlvt(void) 65.31 { 65.32 unsigned int v, ver, maxlvt; 65.33 @@ -907,7 +920,7 @@ int __init APIC_init_uniprocessor (void) 65.34 #ifdef CONFIG_SMP 65.35 cpu_online_map = 1; 65.36 #endif 65.37 - phys_cpu_present_map = 1; 65.38 + phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid); 65.39 apic_write_around(APIC_ID, boot_cpu_physical_apicid); 65.40 65.41 setup_local_APIC();
66.1 --- a/xen/arch/x86/audit.c Tue May 03 14:42:00 2005 +0000 66.2 +++ b/xen/arch/x86/audit.c Tue May 10 08:49:59 2005 +0000 66.3 @@ -49,7 +49,8 @@ static int l1, l2, oos_count, page_count 66.4 int audit_adjust_pgtables(struct domain *d, int dir, int noisy) 66.5 { 66.6 int errors = 0; 66.7 - int shadow_enabled = shadow_mode_enabled(d) ? 1 : 0; 66.8 + int shadow_refcounts = !!shadow_mode_refcounts(d); 66.9 + int shadow_enabled = !!shadow_mode_enabled(d); 66.10 int l2limit; 66.11 66.12 void _adjust(struct pfn_info *page, int adjtype ADJUST_EXTRA_ARGS) 66.13 @@ -119,7 +120,7 @@ int audit_adjust_pgtables(struct domain 66.14 page->count_info += dir; 66.15 } 66.16 66.17 - void adjust_l2_page(unsigned long mfn) 66.18 + void adjust_l2_page(unsigned long mfn, int shadow) 66.19 { 66.20 unsigned long *pt = map_domain_mem(mfn << PAGE_SHIFT); 66.21 int i; 66.22 @@ -133,7 +134,7 @@ int audit_adjust_pgtables(struct domain 66.23 66.24 if ( noisy ) 66.25 { 66.26 - if ( shadow_enabled ) 66.27 + if ( shadow ) 66.28 { 66.29 if ( page_get_owner(l1page) != NULL ) 66.30 { 66.31 @@ -145,6 +146,17 @@ int audit_adjust_pgtables(struct domain 66.32 errors++; 66.33 continue; 66.34 } 66.35 + 66.36 + u32 page_type = l1page->u.inuse.type_info & PGT_type_mask; 66.37 + 66.38 + if ( page_type != PGT_l1_shadow ) 66.39 + { 66.40 + printk("Audit %d: [Shadow L2 mfn=%lx i=%x] " 66.41 + "Expected Shadow L1 t=%x mfn=%lx\n", 66.42 + d->id, mfn, i, 66.43 + l1page->u.inuse.type_info, l1mfn); 66.44 + errors++; 66.45 + } 66.46 } 66.47 else 66.48 { 66.49 @@ -154,7 +166,9 @@ int audit_adjust_pgtables(struct domain 66.50 "belonging to other dom %p (id=%d)\n", 66.51 l1mfn, 66.52 page_get_owner(l1page), 66.53 - page_get_owner(l1page)->id); 66.54 + (page_get_owner(l1page) 66.55 + ? page_get_owner(l1page)->id 66.56 + : -1)); 66.57 errors++; 66.58 continue; 66.59 } 66.60 @@ -179,7 +193,7 @@ int audit_adjust_pgtables(struct domain 66.61 } 66.62 } 66.63 66.64 - adjust(l1page, !shadow_enabled); 66.65 + adjust(l1page, !shadow); 66.66 } 66.67 } 66.68 66.69 @@ -280,7 +294,7 @@ int audit_adjust_pgtables(struct domain 66.70 errors++; 66.71 } 66.72 66.73 - if ( shadow_enabled && 66.74 + if ( shadow_refcounts && 66.75 page_is_page_table(gpage) && 66.76 ! page_out_of_sync(gpage) ) 66.77 { 66.78 @@ -336,19 +350,21 @@ int audit_adjust_pgtables(struct domain 66.79 break; 66.80 case PGT_l1_shadow: 66.81 adjust(pfn_to_page(gmfn), 0); 66.82 - adjust_l1_page(smfn); 66.83 + if ( shadow_refcounts ) 66.84 + adjust_l1_page(smfn); 66.85 if ( page->u.inuse.type_info & PGT_pinned ) 66.86 adjust(page, 0); 66.87 break; 66.88 case PGT_hl2_shadow: 66.89 adjust(pfn_to_page(gmfn), 0); 66.90 - adjust_hl2_page(smfn); 66.91 + if ( shadow_refcounts ) 66.92 + adjust_hl2_page(smfn); 66.93 if ( page->u.inuse.type_info & PGT_pinned ) 66.94 adjust(page, 0); 66.95 break; 66.96 case PGT_l2_shadow: 66.97 adjust(pfn_to_page(gmfn), 0); 66.98 - adjust_l2_page(smfn); 66.99 + adjust_l2_page(smfn, 1); 66.100 if ( page->u.inuse.type_info & PGT_pinned ) 66.101 adjust(page, 0); 66.102 break; 66.103 @@ -391,45 +407,43 @@ int audit_adjust_pgtables(struct domain 66.104 struct exec_domain *ed; 66.105 66.106 for_each_exec_domain(d, ed) 66.107 - { 66.108 - if ( !shadow_enabled ) 66.109 - { 66.110 - if ( pagetable_val(ed->arch.guest_table) ) 66.111 - adjust(&frame_table[pagetable_val(ed->arch.guest_table) 66.112 - >> PAGE_SHIFT], 1); 66.113 - } 66.114 - else 66.115 - { 66.116 - if ( pagetable_val(ed->arch.guest_table) ) 66.117 - adjust(&frame_table[pagetable_val(ed->arch.guest_table) 66.118 - >> PAGE_SHIFT], 0); 66.119 - if ( pagetable_val(ed->arch.shadow_table) ) 66.120 - adjust(&frame_table[pagetable_val(ed->arch.shadow_table) 66.121 - >> PAGE_SHIFT], 0); 66.122 - if ( ed->arch.monitor_shadow_ref ) 66.123 - adjust(&frame_table[ed->arch.monitor_shadow_ref], 0); 66.124 - } 66.125 - } 66.126 + { 66.127 + if ( pagetable_val(ed->arch.guest_table) ) 66.128 + adjust(&frame_table[pagetable_get_pfn(ed->arch.guest_table)], 1); 66.129 + if ( pagetable_val(ed->arch.shadow_table) ) 66.130 + adjust(&frame_table[pagetable_get_pfn(ed->arch.shadow_table)], 0); 66.131 + if ( ed->arch.monitor_shadow_ref ) 66.132 + adjust(&frame_table[ed->arch.monitor_shadow_ref], 0); 66.133 + } 66.134 } 66.135 66.136 void adjust_guest_pages() 66.137 { 66.138 struct list_head *list_ent = d->page_list.next; 66.139 struct pfn_info *page; 66.140 - unsigned long mfn; 66.141 + unsigned long mfn, snapshot_mfn; 66.142 66.143 while ( list_ent != &d->page_list ) 66.144 { 66.145 u32 page_type; 66.146 66.147 page = list_entry(list_ent, struct pfn_info, list); 66.148 - mfn = page_to_pfn(page); 66.149 + snapshot_mfn = mfn = page_to_pfn(page); 66.150 page_type = page->u.inuse.type_info & PGT_type_mask; 66.151 66.152 BUG_ON(page_get_owner(page) != d); 66.153 66.154 page_count++; 66.155 66.156 + if ( shadow_enabled && !shadow_refcounts && 66.157 + page_out_of_sync(page) ) 66.158 + { 66.159 + unsigned long gpfn = __mfn_to_gpfn(d, mfn); 66.160 + ASSERT( VALID_M2P(gpfn) ); 66.161 + snapshot_mfn = __shadow_status(d, gpfn, PGT_snapshot); 66.162 + ASSERT( snapshot_mfn ); 66.163 + } 66.164 + 66.165 switch ( page_type ) 66.166 { 66.167 case PGT_l2_page_table: 66.168 @@ -437,7 +451,7 @@ int audit_adjust_pgtables(struct domain 66.169 66.170 if ( noisy ) 66.171 { 66.172 - if ( shadow_enabled ) 66.173 + if ( shadow_refcounts ) 66.174 { 66.175 printk("Audit %d: found an L2 guest page " 66.176 "mfn=%lx t=%08x c=%08x while in shadow mode\n", 66.177 @@ -446,19 +460,22 @@ int audit_adjust_pgtables(struct domain 66.178 errors++; 66.179 } 66.180 66.181 - if ( (page->u.inuse.type_info & PGT_validated) != 66.182 - PGT_validated ) 66.183 + if ( (page->u.inuse.type_info & PGT_count_mask) != 0 ) 66.184 { 66.185 - printk("Audit %d: L2 mfn=%lx not validated %08x\n", 66.186 - d->id, mfn, page->u.inuse.type_info); 66.187 - errors++; 66.188 - } 66.189 + if ( (page->u.inuse.type_info & PGT_validated) != 66.190 + PGT_validated ) 66.191 + { 66.192 + printk("Audit %d: L2 mfn=%lx not validated %08x\n", 66.193 + d->id, mfn, page->u.inuse.type_info); 66.194 + errors++; 66.195 + } 66.196 66.197 - if ( (page->u.inuse.type_info & PGT_pinned) != PGT_pinned ) 66.198 - { 66.199 - printk("Audit %d: L2 mfn=%lx not pinned t=%08x\n", 66.200 - d->id, mfn, page->u.inuse.type_info); 66.201 - errors++; 66.202 + if ( (page->u.inuse.type_info & PGT_pinned) != PGT_pinned ) 66.203 + { 66.204 + printk("Audit %d: L2 mfn=%lx not pinned t=%08x\n", 66.205 + d->id, mfn, page->u.inuse.type_info); 66.206 + errors++; 66.207 + } 66.208 } 66.209 } 66.210 66.211 @@ -466,7 +483,7 @@ int audit_adjust_pgtables(struct domain 66.212 adjust(page, 1); 66.213 66.214 if ( page->u.inuse.type_info & PGT_validated ) 66.215 - adjust_l2_page(mfn); 66.216 + adjust_l2_page(snapshot_mfn, 0); 66.217 66.218 break; 66.219 66.220 @@ -475,7 +492,7 @@ int audit_adjust_pgtables(struct domain 66.221 66.222 if ( noisy ) 66.223 { 66.224 - if ( shadow_enabled ) 66.225 + if ( shadow_refcounts ) 66.226 { 66.227 printk("found an L1 guest page mfn=%lx t=%08x c=%08x " 66.228 "while in shadow mode\n", 66.229 @@ -483,21 +500,24 @@ int audit_adjust_pgtables(struct domain 66.230 errors++; 66.231 } 66.232 66.233 - if ( (page->u.inuse.type_info & PGT_validated) != PGT_validated ) 66.234 + if ( (page->u.inuse.type_info & PGT_count_mask) != 0 ) 66.235 { 66.236 - printk("Audit %d: L1 not validated mfn=%lx t=%08x\n", 66.237 - d->id, mfn, page->u.inuse.type_info); 66.238 - errors++; 66.239 - } 66.240 - 66.241 - if ( (page->u.inuse.type_info & PGT_pinned) != PGT_pinned ) 66.242 - { 66.243 - if ( !VM_ASSIST(d, VMASST_TYPE_writable_pagetables) ) 66.244 + if ( (page->u.inuse.type_info & PGT_validated) != 66.245 + PGT_validated ) 66.246 { 66.247 - printk("Audit %d: L1 mfn=%lx not pinned t=%08x\n", 66.248 + printk("Audit %d: L1 not validated mfn=%lx t=%08x\n", 66.249 d->id, mfn, page->u.inuse.type_info); 66.250 errors++; 66.251 } 66.252 + 66.253 + if ( (page->u.inuse.type_info & PGT_pinned) != PGT_pinned ) 66.254 + { 66.255 + if ( !VM_ASSIST(d, VMASST_TYPE_writable_pagetables) ) 66.256 + { 66.257 + printk("Audit %d: L1 mfn=%lx not pinned t=%08x\n", 66.258 + d->id, mfn, page->u.inuse.type_info); 66.259 + } 66.260 + } 66.261 } 66.262 } 66.263 66.264 @@ -505,7 +525,7 @@ int audit_adjust_pgtables(struct domain 66.265 adjust(page, 1); 66.266 66.267 if ( page->u.inuse.type_info & PGT_validated ) 66.268 - adjust_l1_page(mfn); 66.269 + adjust_l1_page(snapshot_mfn); 66.270 66.271 break; 66.272 66.273 @@ -520,7 +540,7 @@ int audit_adjust_pgtables(struct domain 66.274 break; 66.275 66.276 case PGT_writable_page: 66.277 - if ( shadow_enabled ) 66.278 + if ( shadow_refcounts ) 66.279 { 66.280 // In shadow mode, writable pages can get pinned by 66.281 // paravirtualized guests that think they are pinning 66.282 @@ -589,6 +609,8 @@ void audit_pagelist(struct domain *d) 66.283 66.284 void _audit_domain(struct domain *d, int flags) 66.285 { 66.286 + int shadow_refcounts = !!shadow_mode_refcounts(d); 66.287 + 66.288 void scan_for_pfn_in_mfn(struct domain *d, unsigned long xmfn, 66.289 unsigned long mfn) 66.290 { 66.291 @@ -608,8 +630,29 @@ void _audit_domain(struct domain *d, int 66.292 unmap_domain_mem(pt); 66.293 } 66.294 66.295 + void scan_for_pfn_in_grant_table(struct domain *d, unsigned xmfn) 66.296 + { 66.297 + int i; 66.298 + active_grant_entry_t *act = d->grant_table->active; 66.299 + 66.300 + spin_lock(&d->grant_table->lock); 66.301 + 66.302 + for ( i = 0; i < NR_GRANT_ENTRIES; i++ ) 66.303 + { 66.304 + if ( act[i].pin && (act[i].frame == xmfn) ) 66.305 + { 66.306 + printk(" found active grant table entry i=%d dom=%d pin=%d\n", 66.307 + i, act[i].domid, act[i].pin); 66.308 + } 66.309 + } 66.310 + 66.311 + spin_unlock(&d->grant_table->lock); 66.312 + } 66.313 + 66.314 void scan_for_pfn(struct domain *d, unsigned long xmfn) 66.315 { 66.316 + scan_for_pfn_in_grant_table(d, xmfn); 66.317 + 66.318 if ( !shadow_mode_enabled(d) ) 66.319 { 66.320 struct list_head *list_ent = d->page_list.next; 66.321 @@ -688,7 +731,7 @@ void _audit_domain(struct domain *d, int 66.322 66.323 // Maybe we should just be using BIGLOCK? 66.324 // 66.325 - if ( !(flags & AUDIT_ALREADY_LOCKED) ) 66.326 + if ( !(flags & AUDIT_SHADOW_ALREADY_LOCKED) ) 66.327 shadow_lock(d); 66.328 66.329 spin_lock(&d->page_alloc_lock); 66.330 @@ -716,7 +759,7 @@ void _audit_domain(struct domain *d, int 66.331 errors++; 66.332 } 66.333 66.334 - if ( shadow_mode_enabled(d) && 66.335 + if ( shadow_mode_refcounts(d) && 66.336 (page_type == PGT_writable_page) && 66.337 !(page->u.inuse.type_info & PGT_validated) ) 66.338 { 66.339 @@ -764,7 +807,9 @@ void _audit_domain(struct domain *d, int 66.340 mfn); 66.341 errors++; 66.342 } 66.343 - if ( page_type != PGT_writable_page ) 66.344 + if ( shadow_refcounts 66.345 + ? (page_type != PGT_writable_page) 66.346 + : !(page_type && (page_type <= PGT_l4_page_table)) ) 66.347 { 66.348 printk("out of sync page mfn=%lx has strange type " 66.349 "t=%08x c=%08x\n", 66.350 @@ -821,7 +866,7 @@ void _audit_domain(struct domain *d, int 66.351 d->id, page->u.inuse.type_info, 66.352 page->tlbflush_timestamp, 66.353 page->count_info, mfn); 66.354 - errors++; 66.355 + //errors++; 66.356 } 66.357 break; 66.358 default: 66.359 @@ -835,7 +880,7 @@ void _audit_domain(struct domain *d, int 66.360 page->count_info, 66.361 page->u.inuse.type_info, 66.362 page->tlbflush_timestamp, mfn ); 66.363 - errors++; 66.364 + //errors++; 66.365 scan_for_pfn_remote(mfn); 66.366 } 66.367 66.368 @@ -870,6 +915,8 @@ void _audit_domain(struct domain *d, int 66.369 d->id, page_to_pfn(page), 66.370 page->u.inuse.type_info, 66.371 page->count_info); 66.372 + printk("a->gpfn_and_flags=%p\n", 66.373 + (void *)a->gpfn_and_flags); 66.374 errors++; 66.375 } 66.376 break; 66.377 @@ -905,7 +952,7 @@ void _audit_domain(struct domain *d, int 66.378 "pages=%d oos=%d l1=%d l2=%d ctot=%d ttot=%d\n", 66.379 d->id, page_count, oos_count, l1, l2, ctot, ttot); 66.380 66.381 - if ( !(flags & AUDIT_ALREADY_LOCKED) ) 66.382 + if ( !(flags & AUDIT_SHADOW_ALREADY_LOCKED) ) 66.383 shadow_unlock(d); 66.384 66.385 if ( d != current->domain )
67.1 --- a/xen/arch/x86/dom0_ops.c Tue May 03 14:42:00 2005 +0000 67.2 +++ b/xen/arch/x86/dom0_ops.c Tue May 10 08:49:59 2005 +0000 67.3 @@ -397,7 +397,7 @@ void arch_getdomaininfo_ctxt( 67.4 #endif 67.5 67.6 c->flags = 0; 67.7 - if ( test_bit(EDF_DONEFPUINIT, &ed->ed_flags) ) 67.8 + if ( test_bit(EDF_DONEFPUINIT, &ed->flags) ) 67.9 c->flags |= VGCF_I387_VALID; 67.10 if ( KERNEL_MODE(ed, &ed->arch.guest_context.user_regs) ) 67.11 c->flags |= VGCF_IN_KERNEL;
68.1 --- a/xen/arch/x86/domain.c Tue May 03 14:42:00 2005 +0000 68.2 +++ b/xen/arch/x86/domain.c Tue May 10 08:49:59 2005 +0000 68.3 @@ -37,6 +37,7 @@ 68.4 #include <asm/vmx.h> 68.5 #include <asm/vmx_vmcs.h> 68.6 #include <asm/msr.h> 68.7 +#include <asm/physdev.h> 68.8 #include <xen/kernel.h> 68.9 #include <public/io/ioreq.h> 68.10 #include <xen/multicall.h> 68.11 @@ -251,7 +252,7 @@ void arch_do_createdomain(struct exec_do 68.12 68.13 d->shared_info = (void *)alloc_xenheap_page(); 68.14 memset(d->shared_info, 0, PAGE_SIZE); 68.15 - ed->vcpu_info = &d->shared_info->vcpu_data[ed->eid]; 68.16 + ed->vcpu_info = &d->shared_info->vcpu_data[ed->id]; 68.17 SHARE_PFN_WITH_DOMAIN(virt_to_page(d->shared_info), d); 68.18 machine_to_phys_mapping[virt_to_phys(d->shared_info) >> 68.19 PAGE_SHIFT] = INVALID_M2P_ENTRY; 68.20 @@ -293,7 +294,7 @@ void arch_do_boot_vcpu(struct exec_domai 68.21 struct domain *d = ed->domain; 68.22 ed->arch.schedule_tail = d->exec_domain[0]->arch.schedule_tail; 68.23 ed->arch.perdomain_ptes = 68.24 - d->arch.mm_perdomain_pt + (ed->eid << PDPT_VCPU_SHIFT); 68.25 + d->arch.mm_perdomain_pt + (ed->id << PDPT_VCPU_SHIFT); 68.26 ed->arch.flags = TF_kernel_mode; 68.27 } 68.28 68.29 @@ -363,7 +364,8 @@ static int vmx_final_setup_guest( 68.30 68.31 /* Put the domain in shadow mode even though we're going to be using 68.32 * the shared 1:1 page table initially. It shouldn't hurt */ 68.33 - shadow_mode_enable(ed->domain, SHM_enable|SHM_translate|SHM_external); 68.34 + shadow_mode_enable(ed->domain, 68.35 + SHM_enable|SHM_refcounts|SHM_translate|SHM_external); 68.36 } 68.37 68.38 return 0; 68.39 @@ -396,9 +398,9 @@ int arch_set_info_guest( 68.40 return -EINVAL; 68.41 } 68.42 68.43 - clear_bit(EDF_DONEFPUINIT, &ed->ed_flags); 68.44 + clear_bit(EDF_DONEFPUINIT, &ed->flags); 68.45 if ( c->flags & VGCF_I387_VALID ) 68.46 - set_bit(EDF_DONEFPUINIT, &ed->ed_flags); 68.47 + set_bit(EDF_DONEFPUINIT, &ed->flags); 68.48 68.49 ed->arch.flags &= ~TF_kernel_mode; 68.50 if ( c->flags & VGCF_IN_KERNEL ) 68.51 @@ -414,7 +416,7 @@ int arch_set_info_guest( 68.52 if ( !IS_PRIV(d) ) 68.53 ed->arch.guest_context.user_regs.eflags &= 0xffffcfff; 68.54 68.55 - if ( test_bit(EDF_DONEINIT, &ed->ed_flags) ) 68.56 + if ( test_bit(EDF_DONEINIT, &ed->flags) ) 68.57 return 0; 68.58 68.59 if ( (rc = (int)set_fast_trap(ed, c->fast_trap_idx)) != 0 ) 68.60 @@ -425,13 +427,13 @@ int arch_set_info_guest( 68.61 for ( i = 0; i < 8; i++ ) 68.62 (void)set_debugreg(ed, i, c->debugreg[i]); 68.63 68.64 - if ( ed->eid == 0 ) 68.65 + if ( ed->id == 0 ) 68.66 d->vm_assist = c->vm_assist; 68.67 68.68 phys_basetab = c->pt_base; 68.69 ed->arch.guest_table = mk_pagetable(phys_basetab); 68.70 68.71 - if ( shadow_mode_enabled(d) ) 68.72 + if ( shadow_mode_refcounts(d) ) 68.73 { 68.74 if ( !get_page(&frame_table[phys_basetab>>PAGE_SHIFT], d) ) 68.75 return -EINVAL; 68.76 @@ -477,7 +479,7 @@ int arch_set_info_guest( 68.77 update_pagetables(ed); 68.78 68.79 /* Don't redo final setup */ 68.80 - set_bit(EDF_DONEINIT, &ed->ed_flags); 68.81 + set_bit(EDF_DONEINIT, &ed->flags); 68.82 68.83 return 0; 68.84 } 68.85 @@ -795,7 +797,7 @@ void context_switch(struct exec_domain * 68.86 * 'prev' (after this point, a dying domain's info structure may be freed 68.87 * without warning). 68.88 */ 68.89 - clear_bit(EDF_RUNNING, &prev->ed_flags); 68.90 + clear_bit(EDF_RUNNING, &prev->flags); 68.91 68.92 schedule_tail(next); 68.93 BUG(); 68.94 @@ -968,6 +970,8 @@ void domain_relinquish_resources(struct 68.95 68.96 BUG_ON(d->cpuset != 0); 68.97 68.98 + physdev_destroy_state(d); 68.99 + 68.100 ptwr_destroy(d); 68.101 68.102 /* Release device mappings of other domains */ 68.103 @@ -978,17 +982,21 @@ void domain_relinquish_resources(struct 68.104 { 68.105 if ( pagetable_val(ed->arch.guest_table) != 0 ) 68.106 { 68.107 - (shadow_mode_enabled(d) ? put_page : put_page_and_type) 68.108 - (&frame_table[pagetable_val( 68.109 - ed->arch.guest_table) >> PAGE_SHIFT]); 68.110 + if ( shadow_mode_refcounts(d) ) 68.111 + put_page(&frame_table[pagetable_get_pfn(ed->arch.guest_table)]); 68.112 + else 68.113 + put_page_and_type(&frame_table[pagetable_get_pfn(ed->arch.guest_table)]); 68.114 + 68.115 ed->arch.guest_table = mk_pagetable(0); 68.116 } 68.117 68.118 if ( pagetable_val(ed->arch.guest_table_user) != 0 ) 68.119 { 68.120 - (shadow_mode_enabled(d) ? put_page : put_page_and_type) 68.121 - (&frame_table[pagetable_val( 68.122 - ed->arch.guest_table_user) >> PAGE_SHIFT]); 68.123 + if ( shadow_mode_refcounts(d) ) 68.124 + put_page(&frame_table[pagetable_get_pfn(ed->arch.guest_table_user)]); 68.125 + else 68.126 + put_page_and_type(&frame_table[pagetable_get_pfn(ed->arch.guest_table_user)]); 68.127 + 68.128 ed->arch.guest_table_user = mk_pagetable(0); 68.129 } 68.130
69.1 --- a/xen/arch/x86/domain_build.c Tue May 03 14:42:00 2005 +0000 69.2 +++ b/xen/arch/x86/domain_build.c Tue May 10 08:49:59 2005 +0000 69.3 @@ -114,7 +114,7 @@ int construct_dom0(struct domain *d, 69.4 /* Sanity! */ 69.5 if ( d->id != 0 ) 69.6 BUG(); 69.7 - if ( test_bit(DF_CONSTRUCTED, &d->d_flags) ) 69.8 + if ( test_bit(DF_CONSTRUCTED, &d->flags) ) 69.9 BUG(); 69.10 69.11 memset(&dsi, 0, sizeof(struct domain_setup_info)); 69.12 @@ -540,14 +540,14 @@ int construct_dom0(struct domain *d, 69.13 /* DOM0 gets access to everything. */ 69.14 physdev_init_dom0(d); 69.15 69.16 - set_bit(DF_CONSTRUCTED, &d->d_flags); 69.17 + set_bit(DF_CONSTRUCTED, &d->flags); 69.18 69.19 new_thread(ed, dsi.v_kernentry, vstack_end, vstartinfo_start); 69.20 69.21 if ( opt_dom0_shadow || opt_dom0_translate ) 69.22 { 69.23 shadow_mode_enable(d, (opt_dom0_translate 69.24 - ? SHM_enable | SHM_translate 69.25 + ? SHM_enable | SHM_refcounts | SHM_translate 69.26 : SHM_enable)); 69.27 if ( opt_dom0_translate ) 69.28 { 69.29 @@ -570,7 +570,7 @@ int construct_dom0(struct domain *d, 69.30 idle_pg_table[1] = root_create_phys(pagetable_val(d->arch.phys_table), 69.31 __PAGE_HYPERVISOR); 69.32 translate_l2pgtable(d, (l1_pgentry_t *)(1u << L2_PAGETABLE_SHIFT), 69.33 - pagetable_val(ed->arch.guest_table) >> PAGE_SHIFT); 69.34 + pagetable_get_pfn(ed->arch.guest_table)); 69.35 idle_pg_table[1] = root_empty(); 69.36 local_flush_tlb(); 69.37 }
70.1 --- a/xen/arch/x86/i387.c Tue May 03 14:42:00 2005 +0000 70.2 +++ b/xen/arch/x86/i387.c Tue May 10 08:49:59 2005 +0000 70.3 @@ -18,7 +18,7 @@ void init_fpu(void) 70.4 __asm__ __volatile__ ( "fninit" ); 70.5 if ( cpu_has_xmm ) 70.6 load_mxcsr(0x1f80); 70.7 - set_bit(EDF_DONEFPUINIT, ¤t->ed_flags); 70.8 + set_bit(EDF_DONEFPUINIT, ¤t->flags); 70.9 } 70.10 70.11 void save_init_fpu(struct exec_domain *tsk) 70.12 @@ -28,7 +28,7 @@ void save_init_fpu(struct exec_domain *t 70.13 * This causes us to set the real flag, so we'll need 70.14 * to temporarily clear it while saving f-p state. 70.15 */ 70.16 - if ( test_bit(EDF_GUEST_STTS, &tsk->ed_flags) ) 70.17 + if ( test_bit(EDF_GUEST_STTS, &tsk->flags) ) 70.18 clts(); 70.19 70.20 if ( cpu_has_fxsr ) 70.21 @@ -40,7 +40,7 @@ void save_init_fpu(struct exec_domain *t 70.22 "fnsave %0 ; fwait" 70.23 : "=m" (tsk->arch.guest_context.fpu_ctxt) ); 70.24 70.25 - clear_bit(EDF_USEDFPU, &tsk->ed_flags); 70.26 + clear_bit(EDF_USEDFPU, &tsk->flags); 70.27 stts(); 70.28 } 70.29
71.1 --- a/xen/arch/x86/i8259.c Tue May 03 14:42:00 2005 +0000 71.2 +++ b/xen/arch/x86/i8259.c Tue May 10 08:49:59 2005 +0000 71.3 @@ -45,7 +45,7 @@ BUILD_COMMON_IRQ() 71.4 71.5 /* 71.6 * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts: 71.7 - * (these are usually mapped to vectors 0x30-0x3f) 71.8 + * (these are usually mapped to vectors 0x20-0x2f) 71.9 */ 71.10 BUILD_16_IRQS(0x0) 71.11 71.12 @@ -58,7 +58,7 @@ BUILD_16_IRQS(0x0) 71.13 * of these. Plus, more powerful systems might have more than 64 71.14 * IO-APIC registers. 71.15 * 71.16 - * (these are usually mapped into the 0x30-0xff vector range) 71.17 + * (these are usually mapped into the 0x20-0xff vector range) 71.18 */ 71.19 BUILD_16_IRQS(0x1) BUILD_16_IRQS(0x2) BUILD_16_IRQS(0x3) 71.20 BUILD_16_IRQS(0x4) BUILD_16_IRQS(0x5) BUILD_16_IRQS(0x6) BUILD_16_IRQS(0x7) 71.21 @@ -101,7 +101,7 @@ BUILD_SMP_INTERRUPT(spurious_interrupt,S 71.22 IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \ 71.23 IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f) 71.24 71.25 - void *interrupt[NR_IRQS] = { 71.26 + void (*interrupt[NR_IRQS])(void) = { 71.27 IRQLIST_16(0x0), 71.28 71.29 #ifdef CONFIG_X86_IO_APIC 71.30 @@ -341,7 +341,7 @@ void __init init_8259A(int auto_eoi) 71.31 * outb_p - this has to work on a wide range of PC hardware. 71.32 */ 71.33 outb_p(0x11, 0x20); /* ICW1: select 8259A-1 init */ 71.34 - outb_p(0x30 + 0, 0x21); /* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 */ 71.35 + outb_p(0x20 + 0, 0x21); /* ICW2: 8259A-1 IR0-7 mapped to 0x20-0x27 */ 71.36 outb_p(0x04, 0x21); /* 8259A-1 (the master) has a slave on IR2 */ 71.37 if (auto_eoi) 71.38 outb_p(0x03, 0x21); /* master does Auto EOI */ 71.39 @@ -349,7 +349,7 @@ void __init init_8259A(int auto_eoi) 71.40 outb_p(0x01, 0x21); /* master expects normal EOI */ 71.41 71.42 outb_p(0x11, 0xA0); /* ICW1: select 8259A-2 init */ 71.43 - outb_p(0x30 + 8, 0xA1); /* ICW2: 8259A-2 IR0-7 mapped to 0x38-0x3f */ 71.44 + outb_p(0x20 + 8, 0xA1); /* ICW2: 8259A-2 IR0-7 mapped to 0x28-0x2f */ 71.45 outb_p(0x02, 0xA1); /* 8259A-2 is a slave on master's IR2 */ 71.46 outb_p(0x01, 0xA1); /* (slave's support for AEOI in flat mode 71.47 is to be investigated) */
72.1 --- a/xen/arch/x86/idle0_task.c Tue May 03 14:42:00 2005 +0000 72.2 +++ b/xen/arch/x86/idle0_task.c Tue May 10 08:49:59 2005 +0000 72.3 @@ -5,7 +5,7 @@ 72.4 72.5 struct domain idle0_domain = { 72.6 id: IDLE_DOMAIN_ID, 72.7 - d_flags: 1<<DF_IDLETASK, 72.8 + flags: 1<<DF_IDLETASK, 72.9 refcnt: ATOMIC_INIT(1) 72.10 }; 72.11
73.1 --- a/xen/arch/x86/io_apic.c Tue May 03 14:42:00 2005 +0000 73.2 +++ b/xen/arch/x86/io_apic.c Tue May 10 08:49:59 2005 +0000 73.3 @@ -25,26 +25,20 @@ 73.4 #include <xen/irq.h> 73.5 #include <xen/delay.h> 73.6 #include <xen/sched.h> 73.7 -#include <xen/config.h> 73.8 -#include <asm/mc146818rtc.h> 73.9 +#include <xen/acpi.h> 73.10 #include <asm/io.h> 73.11 -#include <asm/mpspec.h> 73.12 -#include <asm/io_apic.h> 73.13 +#include <asm/mc146818rtc.h> 73.14 #include <asm/smp.h> 73.15 #include <asm/desc.h> 73.16 -#include <asm/smpboot.h> 73.17 - 73.18 -#ifdef CONFIG_X86_IO_APIC 73.19 - 73.20 -#undef APIC_LOCKUP_DEBUG 73.21 +#include <mach_apic.h> 73.22 +#include <io_ports.h> 73.23 73.24 -#define APIC_LOCKUP_DEBUG 73.25 - 73.26 -static spinlock_t ioapic_lock = SPIN_LOCK_UNLOCKED; 73.27 +int (*ioapic_renumber_irq)(int ioapic, int irq); 73.28 +atomic_t irq_mis_count; 73.29 73.30 -unsigned int int_dest_addr_mode = APIC_DEST_LOGICAL; 73.31 -unsigned char int_delivery_mode = dest_LowestPrio; 73.32 +static DEFINE_SPINLOCK(ioapic_lock); 73.33 73.34 +int skip_ioapic_setup; 73.35 73.36 /* 73.37 * # of IRQ routing registers 73.38 @@ -69,12 +63,20 @@ static struct irq_pin_list { 73.39 int apic, pin, next; 73.40 } irq_2_pin[PIN_MAP_SIZE]; 73.41 73.42 +int vector_irq[NR_VECTORS] = { [0 ... NR_VECTORS - 1] = -1}; 73.43 +#ifdef CONFIG_PCI_MSI 73.44 +#define vector_to_irq(vector) \ 73.45 + (platform_legacy_irq(vector) ? vector : vector_irq[vector]) 73.46 +#else 73.47 +#define vector_to_irq(vector) (vector) 73.48 +#endif 73.49 + 73.50 /* 73.51 * The common case is 1:1 IRQ<->pin mappings. Sometimes there are 73.52 * shared ISA-space IRQs, so we have to support them. We are super 73.53 * fast in the common case, and fast for shared ISA-space IRQs. 73.54 */ 73.55 -static void __init add_pin_to_irq(unsigned int irq, int apic, int pin) 73.56 +static void add_pin_to_irq(unsigned int irq, int apic, int pin) 73.57 { 73.58 static int first_free_entry = NR_IRQS; 73.59 struct irq_pin_list *entry = irq_2_pin + irq; 73.60 @@ -112,36 +114,48 @@ static void __init replace_pin_at_irq(un 73.61 } 73.62 } 73.63 73.64 -#define __DO_ACTION(R, ACTION, FINAL) \ 73.65 - \ 73.66 -{ \ 73.67 - int pin; \ 73.68 - struct irq_pin_list *entry = irq_2_pin + irq; \ 73.69 - \ 73.70 - for (;;) { \ 73.71 - unsigned int reg; \ 73.72 - pin = entry->pin; \ 73.73 - if (pin == -1) \ 73.74 - break; \ 73.75 - reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \ 73.76 - reg ACTION; \ 73.77 - io_apic_write(entry->apic, 0x10 + R + pin*2, reg); \ 73.78 - if (!entry->next) \ 73.79 - break; \ 73.80 - entry = irq_2_pin + entry->next; \ 73.81 - } \ 73.82 - FINAL; \ 73.83 +static void __modify_IO_APIC_irq (unsigned int irq, unsigned long enable, unsigned long disable) 73.84 +{ 73.85 + struct irq_pin_list *entry = irq_2_pin + irq; 73.86 + unsigned int pin, reg; 73.87 + 73.88 + for (;;) { 73.89 + pin = entry->pin; 73.90 + if (pin == -1) 73.91 + break; 73.92 + reg = io_apic_read(entry->apic, 0x10 + pin*2); 73.93 + reg &= ~disable; 73.94 + reg |= enable; 73.95 + io_apic_modify(entry->apic, 0x10 + pin*2, reg); 73.96 + if (!entry->next) 73.97 + break; 73.98 + entry = irq_2_pin + entry->next; 73.99 + } 73.100 } 73.101 73.102 -#define DO_ACTION(name,R,ACTION, FINAL) \ 73.103 - \ 73.104 - static void name##_IO_APIC_irq (unsigned int irq) \ 73.105 - __DO_ACTION(R, ACTION, FINAL) 73.106 +/* mask = 1 */ 73.107 +static void __mask_IO_APIC_irq (unsigned int irq) 73.108 +{ 73.109 + __modify_IO_APIC_irq(irq, 0x00010000, 0); 73.110 +} 73.111 + 73.112 +/* mask = 0 */ 73.113 +static void __unmask_IO_APIC_irq (unsigned int irq) 73.114 +{ 73.115 + __modify_IO_APIC_irq(irq, 0, 0x00010000); 73.116 +} 73.117 73.118 -DO_ACTION( __mask, 0, |= 0x00010000, io_apic_sync(entry->apic) ) 73.119 -DO_ACTION( __unmask, 0, &= 0xfffeffff, ) 73.120 -DO_ACTION( __edge, 0, &= 0xffff7fff, ) 73.121 -DO_ACTION( __level, 0, |= 0x00008000, ) 73.122 +/* mask = 1, trigger = 0 */ 73.123 +static void __mask_and_edge_IO_APIC_irq (unsigned int irq) 73.124 +{ 73.125 + __modify_IO_APIC_irq(irq, 0x00010000, 0x00008000); 73.126 +} 73.127 + 73.128 +/* mask = 0, trigger = 1 */ 73.129 +static void __unmask_and_level_IO_APIC_irq (unsigned int irq) 73.130 +{ 73.131 + __modify_IO_APIC_irq(irq, 0x00008000, 0x00010000); 73.132 +} 73.133 73.134 static void mask_IO_APIC_irq (unsigned int irq) 73.135 { 73.136 @@ -165,7 +179,7 @@ void clear_IO_APIC_pin(unsigned int apic 73.137 { 73.138 struct IO_APIC_route_entry entry; 73.139 unsigned long flags; 73.140 - 73.141 + 73.142 /* Check delivery_mode to be sure we're not clearing an SMI pin */ 73.143 spin_lock_irqsave(&ioapic_lock, flags); 73.144 *(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin); 73.145 @@ -194,85 +208,467 @@ static void clear_IO_APIC (void) 73.146 clear_IO_APIC_pin(apic, pin); 73.147 } 73.148 73.149 -static void set_ioapic_affinity (unsigned int irq, unsigned long mask) 73.150 +static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) 73.151 { 73.152 unsigned long flags; 73.153 - 73.154 - /* 73.155 - * Only the first 8 bits are valid. 73.156 - */ 73.157 - mask = mask << 24; 73.158 + int pin; 73.159 + struct irq_pin_list *entry = irq_2_pin + irq; 73.160 + unsigned int apicid_value; 73.161 + 73.162 + apicid_value = cpu_mask_to_apicid(cpumask); 73.163 + /* Prepare to do the io_apic_write */ 73.164 + apicid_value = apicid_value << 24; 73.165 spin_lock_irqsave(&ioapic_lock, flags); 73.166 - __DO_ACTION(1, = mask, ) 73.167 + for (;;) { 73.168 + pin = entry->pin; 73.169 + if (pin == -1) 73.170 + break; 73.171 + io_apic_write(entry->apic, 0x10 + 1 + pin*2, apicid_value); 73.172 + if (!entry->next) 73.173 + break; 73.174 + entry = irq_2_pin + entry->next; 73.175 + } 73.176 spin_unlock_irqrestore(&ioapic_lock, flags); 73.177 } 73.178 73.179 -#define balance_irq(_irq) ((void)0) 73.180 +#if defined(CONFIG_IRQBALANCE) 73.181 +# include <asm/processor.h> /* kernel_thread() */ 73.182 +# include <xen/kernel_stat.h> /* kstat */ 73.183 +# include <xen/slab.h> /* kmalloc() */ 73.184 +# include <xen/timer.h> /* time_after() */ 73.185 + 73.186 +# ifdef CONFIG_BALANCED_IRQ_DEBUG 73.187 +# define TDprintk(x...) do { printk("<%ld:%s:%d>: ", jiffies, __FILE__, __LINE__); printk(x); } while (0) 73.188 +# define Dprintk(x...) do { TDprintk(x); } while (0) 73.189 +# else 73.190 +# define TDprintk(x...) 73.191 +# define Dprintk(x...) 73.192 +# endif 73.193 + 73.194 +cpumask_t __cacheline_aligned pending_irq_balance_cpumask[NR_IRQS]; 73.195 73.196 -/* 73.197 - * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to 73.198 - * specific CPU-side IRQs. 73.199 - */ 73.200 +#define IRQBALANCE_CHECK_ARCH -999 73.201 +static int irqbalance_disabled = IRQBALANCE_CHECK_ARCH; 73.202 +static int physical_balance = 0; 73.203 + 73.204 +struct irq_cpu_info { 73.205 + unsigned long * last_irq; 73.206 + unsigned long * irq_delta; 73.207 + unsigned long irq; 73.208 +} irq_cpu_data[NR_CPUS]; 73.209 + 73.210 +#define CPU_IRQ(cpu) (irq_cpu_data[cpu].irq) 73.211 +#define LAST_CPU_IRQ(cpu,irq) (irq_cpu_data[cpu].last_irq[irq]) 73.212 +#define IRQ_DELTA(cpu,irq) (irq_cpu_data[cpu].irq_delta[irq]) 73.213 + 73.214 +#define IDLE_ENOUGH(cpu,now) \ 73.215 + (idle_cpu(cpu) && ((now) - irq_stat[(cpu)].idle_timestamp > 1)) 73.216 + 73.217 +#define IRQ_ALLOWED(cpu, allowed_mask) cpu_isset(cpu, allowed_mask) 73.218 73.219 -#define MAX_PIRQS 8 73.220 -int pirq_entries [MAX_PIRQS]; 73.221 -int pirqs_enabled; 73.222 +#define CPU_TO_PACKAGEINDEX(i) (first_cpu(cpu_sibling_map[i])) 73.223 + 73.224 +#define MAX_BALANCED_IRQ_INTERVAL (5*HZ) 73.225 +#define MIN_BALANCED_IRQ_INTERVAL (HZ/2) 73.226 +#define BALANCED_IRQ_MORE_DELTA (HZ/10) 73.227 +#define BALANCED_IRQ_LESS_DELTA (HZ) 73.228 + 73.229 +long balanced_irq_interval = MAX_BALANCED_IRQ_INTERVAL; 73.230 + 73.231 +static unsigned long move(int curr_cpu, cpumask_t allowed_mask, 73.232 + unsigned long now, int direction) 73.233 +{ 73.234 + int search_idle = 1; 73.235 + int cpu = curr_cpu; 73.236 + 73.237 + goto inside; 73.238 73.239 -int skip_ioapic_setup; 73.240 -#if 0 73.241 + do { 73.242 + if (unlikely(cpu == curr_cpu)) 73.243 + search_idle = 0; 73.244 +inside: 73.245 + if (direction == 1) { 73.246 + cpu++; 73.247 + if (cpu >= NR_CPUS) 73.248 + cpu = 0; 73.249 + } else { 73.250 + cpu--; 73.251 + if (cpu == -1) 73.252 + cpu = NR_CPUS-1; 73.253 + } 73.254 + } while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu,allowed_mask) || 73.255 + (search_idle && !IDLE_ENOUGH(cpu,now))); 73.256 73.257 -static int __init noioapic_setup(char *str) 73.258 -{ 73.259 - skip_ioapic_setup = 1; 73.260 - return 1; 73.261 + return cpu; 73.262 } 73.263 73.264 -__setup("noapic", noioapic_setup); 73.265 +static inline void balance_irq(int cpu, int irq) 73.266 +{ 73.267 + unsigned long now = jiffies; 73.268 + cpumask_t allowed_mask; 73.269 + unsigned int new_cpu; 73.270 + 73.271 + if (irqbalance_disabled) 73.272 + return; 73.273 + 73.274 + cpus_and(allowed_mask, cpu_online_map, irq_affinity[irq]); 73.275 + new_cpu = move(cpu, allowed_mask, now, 1); 73.276 + if (cpu != new_cpu) { 73.277 + irq_desc_t *desc = irq_desc + irq; 73.278 + unsigned long flags; 73.279 73.280 -static int __init ioapic_setup(char *str) 73.281 + spin_lock_irqsave(&desc->lock, flags); 73.282 + pending_irq_balance_cpumask[irq] = cpumask_of_cpu(new_cpu); 73.283 + spin_unlock_irqrestore(&desc->lock, flags); 73.284 + } 73.285 +} 73.286 + 73.287 +static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold) 73.288 { 73.289 - skip_ioapic_setup = 0; 73.290 - return 1; 73.291 + int i, j; 73.292 + Dprintk("Rotating IRQs among CPUs.\n"); 73.293 + for (i = 0; i < NR_CPUS; i++) { 73.294 + for (j = 0; cpu_online(i) && (j < NR_IRQS); j++) { 73.295 + if (!irq_desc[j].action) 73.296 + continue; 73.297 + /* Is it a significant load ? */ 73.298 + if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i),j) < 73.299 + useful_load_threshold) 73.300 + continue; 73.301 + balance_irq(i, j); 73.302 + } 73.303 + } 73.304 + balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL, 73.305 + balanced_irq_interval - BALANCED_IRQ_LESS_DELTA); 73.306 + return; 73.307 } 73.308 73.309 -__setup("apic", ioapic_setup); 73.310 - 73.311 - 73.312 - 73.313 -static int __init ioapic_pirq_setup(char *str) 73.314 +static void do_irq_balance(void) 73.315 { 73.316 - int i, max; 73.317 - int ints[MAX_PIRQS+1]; 73.318 + int i, j; 73.319 + unsigned long max_cpu_irq = 0, min_cpu_irq = (~0); 73.320 + unsigned long move_this_load = 0; 73.321 + int max_loaded = 0, min_loaded = 0; 73.322 + int load; 73.323 + unsigned long useful_load_threshold = balanced_irq_interval + 10; 73.324 + int selected_irq; 73.325 + int tmp_loaded, first_attempt = 1; 73.326 + unsigned long tmp_cpu_irq; 73.327 + unsigned long imbalance = 0; 73.328 + cpumask_t allowed_mask, target_cpu_mask, tmp; 73.329 + 73.330 + for (i = 0; i < NR_CPUS; i++) { 73.331 + int package_index; 73.332 + CPU_IRQ(i) = 0; 73.333 + if (!cpu_online(i)) 73.334 + continue; 73.335 + package_index = CPU_TO_PACKAGEINDEX(i); 73.336 + for (j = 0; j < NR_IRQS; j++) { 73.337 + unsigned long value_now, delta; 73.338 + /* Is this an active IRQ? */ 73.339 + if (!irq_desc[j].action) 73.340 + continue; 73.341 + if ( package_index == i ) 73.342 + IRQ_DELTA(package_index,j) = 0; 73.343 + /* Determine the total count per processor per IRQ */ 73.344 + value_now = (unsigned long) kstat_cpu(i).irqs[j]; 73.345 + 73.346 + /* Determine the activity per processor per IRQ */ 73.347 + delta = value_now - LAST_CPU_IRQ(i,j); 73.348 + 73.349 + /* Update last_cpu_irq[][] for the next time */ 73.350 + LAST_CPU_IRQ(i,j) = value_now; 73.351 + 73.352 + /* Ignore IRQs whose rate is less than the clock */ 73.353 + if (delta < useful_load_threshold) 73.354 + continue; 73.355 + /* update the load for the processor or package total */ 73.356 + IRQ_DELTA(package_index,j) += delta; 73.357 73.358 - get_options(str, ARRAY_SIZE(ints), ints); 73.359 + /* Keep track of the higher numbered sibling as well */ 73.360 + if (i != package_index) 73.361 + CPU_IRQ(i) += delta; 73.362 + /* 73.363 + * We have sibling A and sibling B in the package 73.364 + * 73.365 + * cpu_irq[A] = load for cpu A + load for cpu B 73.366 + * cpu_irq[B] = load for cpu B 73.367 + */ 73.368 + CPU_IRQ(package_index) += delta; 73.369 + } 73.370 + } 73.371 + /* Find the least loaded processor package */ 73.372 + for (i = 0; i < NR_CPUS; i++) { 73.373 + if (!cpu_online(i)) 73.374 + continue; 73.375 + if (i != CPU_TO_PACKAGEINDEX(i)) 73.376 + continue; 73.377 + if (min_cpu_irq > CPU_IRQ(i)) { 73.378 + min_cpu_irq = CPU_IRQ(i); 73.379 + min_loaded = i; 73.380 + } 73.381 + } 73.382 + max_cpu_irq = ULONG_MAX; 73.383 73.384 - for (i = 0; i < MAX_PIRQS; i++) 73.385 - pirq_entries[i] = -1; 73.386 +tryanothercpu: 73.387 + /* Look for heaviest loaded processor. 73.388 + * We may come back to get the next heaviest loaded processor. 73.389 + * Skip processors with trivial loads. 73.390 + */ 73.391 + tmp_cpu_irq = 0; 73.392 + tmp_loaded = -1; 73.393 + for (i = 0; i < NR_CPUS; i++) { 73.394 + if (!cpu_online(i)) 73.395 + continue; 73.396 + if (i != CPU_TO_PACKAGEINDEX(i)) 73.397 + continue; 73.398 + if (max_cpu_irq <= CPU_IRQ(i)) 73.399 + continue; 73.400 + if (tmp_cpu_irq < CPU_IRQ(i)) { 73.401 + tmp_cpu_irq = CPU_IRQ(i); 73.402 + tmp_loaded = i; 73.403 + } 73.404 + } 73.405 73.406 - pirqs_enabled = 1; 73.407 - printk(KERN_INFO "PIRQ redirection, working around broken MP-BIOS.\n"); 73.408 - max = MAX_PIRQS; 73.409 - if (ints[0] < MAX_PIRQS) 73.410 - max = ints[0]; 73.411 + if (tmp_loaded == -1) { 73.412 + /* In the case of small number of heavy interrupt sources, 73.413 + * loading some of the cpus too much. We use Ingo's original 73.414 + * approach to rotate them around. 73.415 + */ 73.416 + if (!first_attempt && imbalance >= useful_load_threshold) { 73.417 + rotate_irqs_among_cpus(useful_load_threshold); 73.418 + return; 73.419 + } 73.420 + goto not_worth_the_effort; 73.421 + } 73.422 + 73.423 + first_attempt = 0; /* heaviest search */ 73.424 + max_cpu_irq = tmp_cpu_irq; /* load */ 73.425 + max_loaded = tmp_loaded; /* processor */ 73.426 + imbalance = (max_cpu_irq - min_cpu_irq) / 2; 73.427 + 73.428 + Dprintk("max_loaded cpu = %d\n", max_loaded); 73.429 + Dprintk("min_loaded cpu = %d\n", min_loaded); 73.430 + Dprintk("max_cpu_irq load = %ld\n", max_cpu_irq); 73.431 + Dprintk("min_cpu_irq load = %ld\n", min_cpu_irq); 73.432 + Dprintk("load imbalance = %lu\n", imbalance); 73.433 + 73.434 + /* if imbalance is less than approx 10% of max load, then 73.435 + * observe diminishing returns action. - quit 73.436 + */ 73.437 + if (imbalance < (max_cpu_irq >> 3)) { 73.438 + Dprintk("Imbalance too trivial\n"); 73.439 + goto not_worth_the_effort; 73.440 + } 73.441 + 73.442 +tryanotherirq: 73.443 + /* if we select an IRQ to move that can't go where we want, then 73.444 + * see if there is another one to try. 73.445 + */ 73.446 + move_this_load = 0; 73.447 + selected_irq = -1; 73.448 + for (j = 0; j < NR_IRQS; j++) { 73.449 + /* Is this an active IRQ? */ 73.450 + if (!irq_desc[j].action) 73.451 + continue; 73.452 + if (imbalance <= IRQ_DELTA(max_loaded,j)) 73.453 + continue; 73.454 + /* Try to find the IRQ that is closest to the imbalance 73.455 + * without going over. 73.456 + */ 73.457 + if (move_this_load < IRQ_DELTA(max_loaded,j)) { 73.458 + move_this_load = IRQ_DELTA(max_loaded,j); 73.459 + selected_irq = j; 73.460 + } 73.461 + } 73.462 + if (selected_irq == -1) { 73.463 + goto tryanothercpu; 73.464 + } 73.465 73.466 - for (i = 0; i < max; i++) { 73.467 - printk(KERN_DEBUG "... PIRQ%d -> IRQ %d\n", i, ints[i+1]); 73.468 - /* 73.469 - * PIRQs are mapped upside down, usually. 73.470 + imbalance = move_this_load; 73.471 + 73.472 + /* For physical_balance case, we accumlated both load 73.473 + * values in the one of the siblings cpu_irq[], 73.474 + * to use the same code for physical and logical processors 73.475 + * as much as possible. 73.476 + * 73.477 + * NOTE: the cpu_irq[] array holds the sum of the load for 73.478 + * sibling A and sibling B in the slot for the lowest numbered 73.479 + * sibling (A), _AND_ the load for sibling B in the slot for 73.480 + * the higher numbered sibling. 73.481 + * 73.482 + * We seek the least loaded sibling by making the comparison 73.483 + * (A+B)/2 vs B 73.484 + */ 73.485 + load = CPU_IRQ(min_loaded) >> 1; 73.486 + for_each_cpu_mask(j, cpu_sibling_map[min_loaded]) { 73.487 + if (load > CPU_IRQ(j)) { 73.488 + /* This won't change cpu_sibling_map[min_loaded] */ 73.489 + load = CPU_IRQ(j); 73.490 + min_loaded = j; 73.491 + } 73.492 + } 73.493 + 73.494 + cpus_and(allowed_mask, cpu_online_map, irq_affinity[selected_irq]); 73.495 + target_cpu_mask = cpumask_of_cpu(min_loaded); 73.496 + cpus_and(tmp, target_cpu_mask, allowed_mask); 73.497 + 73.498 + if (!cpus_empty(tmp)) { 73.499 + irq_desc_t *desc = irq_desc + selected_irq; 73.500 + unsigned long flags; 73.501 + 73.502 + Dprintk("irq = %d moved to cpu = %d\n", 73.503 + selected_irq, min_loaded); 73.504 + /* mark for change destination */ 73.505 + spin_lock_irqsave(&desc->lock, flags); 73.506 + pending_irq_balance_cpumask[selected_irq] = 73.507 + cpumask_of_cpu(min_loaded); 73.508 + spin_unlock_irqrestore(&desc->lock, flags); 73.509 + /* Since we made a change, come back sooner to 73.510 + * check for more variation. 73.511 */ 73.512 - pirq_entries[MAX_PIRQS-i-1] = ints[i+1]; 73.513 + balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL, 73.514 + balanced_irq_interval - BALANCED_IRQ_LESS_DELTA); 73.515 + return; 73.516 } 73.517 - return 1; 73.518 + goto tryanotherirq; 73.519 + 73.520 +not_worth_the_effort: 73.521 + /* 73.522 + * if we did not find an IRQ to move, then adjust the time interval 73.523 + * upward 73.524 + */ 73.525 + balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL, 73.526 + balanced_irq_interval + BALANCED_IRQ_MORE_DELTA); 73.527 + Dprintk("IRQ worth rotating not found\n"); 73.528 + return; 73.529 } 73.530 73.531 -__setup("pirq=", ioapic_pirq_setup); 73.532 +static int balanced_irq(void *unused) 73.533 +{ 73.534 + int i; 73.535 + unsigned long prev_balance_time = jiffies; 73.536 + long time_remaining = balanced_irq_interval; 73.537 + 73.538 + daemonize("kirqd"); 73.539 + 73.540 + /* push everything to CPU 0 to give us a starting point. */ 73.541 + for (i = 0 ; i < NR_IRQS ; i++) { 73.542 + pending_irq_balance_cpumask[i] = cpumask_of_cpu(0); 73.543 + } 73.544 + 73.545 + for ( ; ; ) { 73.546 + set_current_state(TASK_INTERRUPTIBLE); 73.547 + time_remaining = schedule_timeout(time_remaining); 73.548 + try_to_freeze(PF_FREEZE); 73.549 + if (time_after(jiffies, 73.550 + prev_balance_time+balanced_irq_interval)) { 73.551 + do_irq_balance(); 73.552 + prev_balance_time = jiffies; 73.553 + time_remaining = balanced_irq_interval; 73.554 + } 73.555 + } 73.556 + return 0; 73.557 +} 73.558 + 73.559 +static int __init balanced_irq_init(void) 73.560 +{ 73.561 + int i; 73.562 + struct cpuinfo_x86 *c; 73.563 + cpumask_t tmp; 73.564 + 73.565 + cpus_shift_right(tmp, cpu_online_map, 2); 73.566 + c = &boot_cpu_data; 73.567 + /* When not overwritten by the command line ask subarchitecture. */ 73.568 + if (irqbalance_disabled == IRQBALANCE_CHECK_ARCH) 73.569 + irqbalance_disabled = NO_BALANCE_IRQ; 73.570 + if (irqbalance_disabled) 73.571 + return 0; 73.572 + 73.573 + /* disable irqbalance completely if there is only one processor online */ 73.574 + if (num_online_cpus() < 2) { 73.575 + irqbalance_disabled = 1; 73.576 + return 0; 73.577 + } 73.578 + /* 73.579 + * Enable physical balance only if more than 1 physical processor 73.580 + * is present 73.581 + */ 73.582 + if (smp_num_siblings > 1 && !cpus_empty(tmp)) 73.583 + physical_balance = 1; 73.584 73.585 -#endif 73.586 + for (i = 0; i < NR_CPUS; i++) { 73.587 + if (!cpu_online(i)) 73.588 + continue; 73.589 + irq_cpu_data[i].irq_delta = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL); 73.590 + irq_cpu_data[i].last_irq = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL); 73.591 + if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) { 73.592 + printk(KERN_ERR "balanced_irq_init: out of memory"); 73.593 + goto failed; 73.594 + } 73.595 + memset(irq_cpu_data[i].irq_delta,0,sizeof(unsigned long) * NR_IRQS); 73.596 + memset(irq_cpu_data[i].last_irq,0,sizeof(unsigned long) * NR_IRQS); 73.597 + } 73.598 + 73.599 + printk(KERN_INFO "Starting balanced_irq\n"); 73.600 + if (kernel_thread(balanced_irq, NULL, CLONE_KERNEL) >= 0) 73.601 + return 0; 73.602 + else 73.603 + printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq"); 73.604 +failed: 73.605 + for (i = 0; i < NR_CPUS; i++) { 73.606 + if(irq_cpu_data[i].irq_delta) 73.607 + kfree(irq_cpu_data[i].irq_delta); 73.608 + if(irq_cpu_data[i].last_irq) 73.609 + kfree(irq_cpu_data[i].last_irq); 73.610 + } 73.611 + return 0; 73.612 +} 73.613 + 73.614 +int __init irqbalance_disable(char *str) 73.615 +{ 73.616 + irqbalance_disabled = 1; 73.617 + return 0; 73.618 +} 73.619 + 73.620 +__setup("noirqbalance", irqbalance_disable); 73.621 + 73.622 +static inline void move_irq(int irq) 73.623 +{ 73.624 + /* note - we hold the desc->lock */ 73.625 + if (unlikely(!cpus_empty(pending_irq_balance_cpumask[irq]))) { 73.626 + set_ioapic_affinity_irq(irq, pending_irq_balance_cpumask[irq]); 73.627 + cpus_clear(pending_irq_balance_cpumask[irq]); 73.628 + } 73.629 +} 73.630 + 73.631 +late_initcall(balanced_irq_init); 73.632 + 73.633 +#else /* !CONFIG_IRQBALANCE */ 73.634 +static inline void move_irq(int irq) { } 73.635 +#endif /* CONFIG_IRQBALANCE */ 73.636 + 73.637 +#ifndef CONFIG_SMP 73.638 +void fastcall send_IPI_self(int vector) 73.639 +{ 73.640 + unsigned int cfg; 73.641 + 73.642 + /* 73.643 + * Wait for idle. 73.644 + */ 73.645 + apic_wait_icr_idle(); 73.646 + cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL; 73.647 + /* 73.648 + * Send the IPI. The write to APIC_ICR fires this off. 73.649 + */ 73.650 + apic_write_around(APIC_ICR, cfg); 73.651 +} 73.652 +#endif /* !CONFIG_SMP */ 73.653 73.654 /* 73.655 * Find the IRQ entry number of a certain pin. 73.656 */ 73.657 -static int __init find_irq_entry(int apic, int pin, int type) 73.658 +static int find_irq_entry(int apic, int pin, int type) 73.659 { 73.660 int i; 73.661 73.662 @@ -289,7 +685,7 @@ static int __init find_irq_entry(int api 73.663 /* 73.664 * Find the pin to which IRQ[irq] (ISA) is connected 73.665 */ 73.666 -static int __init find_isa_irq_pin(int irq, int type) 73.667 +static int find_isa_irq_pin(int irq, int type) 73.668 { 73.669 int i; 73.670 73.671 @@ -298,7 +694,9 @@ static int __init find_isa_irq_pin(int i 73.672 73.673 if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA || 73.674 mp_bus_id_to_type[lbus] == MP_BUS_EISA || 73.675 - mp_bus_id_to_type[lbus] == MP_BUS_MCA) && 73.676 + mp_bus_id_to_type[lbus] == MP_BUS_MCA || 73.677 + mp_bus_id_to_type[lbus] == MP_BUS_NEC98 73.678 + ) && 73.679 (mp_irqs[i].mpc_irqtype == type) && 73.680 (mp_irqs[i].mpc_srcbusirq == irq)) 73.681 73.682 @@ -317,9 +715,9 @@ int IO_APIC_get_PCI_irq_vector(int bus, 73.683 { 73.684 int apic, i, best_guess = -1; 73.685 73.686 - Dprintk("querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n", 73.687 - bus, slot, pin); 73.688 - if ((mp_bus_id_to_pci_bus==NULL) || (mp_bus_id_to_pci_bus[bus] == -1)) { 73.689 + apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, " 73.690 + "slot:%d, pin:%d.\n", bus, slot, pin); 73.691 + if (mp_bus_id_to_pci_bus[bus] == -1) { 73.692 printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus %d!\n", bus); 73.693 return -1; 73.694 } 73.695 @@ -354,15 +752,40 @@ int IO_APIC_get_PCI_irq_vector(int bus, 73.696 } 73.697 73.698 /* 73.699 + * This function currently is only a helper for the i386 smp boot process where 73.700 + * we need to reprogram the ioredtbls to cater for the cpus which have come online 73.701 + * so mask in all cases should simply be TARGET_CPUS 73.702 + */ 73.703 +void __init setup_ioapic_dest(void) 73.704 +{ 73.705 + int pin, ioapic, irq, irq_entry; 73.706 + 73.707 + if (skip_ioapic_setup == 1) 73.708 + return; 73.709 + 73.710 + for (ioapic = 0; ioapic < nr_ioapics; ioapic++) { 73.711 + for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) { 73.712 + irq_entry = find_irq_entry(ioapic, pin, mp_INT); 73.713 + if (irq_entry == -1) 73.714 + continue; 73.715 + irq = pin_2_irq(irq_entry, ioapic, pin); 73.716 + set_ioapic_affinity_irq(irq, TARGET_CPUS); 73.717 + } 73.718 + 73.719 + } 73.720 +} 73.721 + 73.722 +/* 73.723 * EISA Edge/Level control register, ELCR 73.724 */ 73.725 -static int __init EISA_ELCR(unsigned int irq) 73.726 +static int EISA_ELCR(unsigned int irq) 73.727 { 73.728 if (irq < 16) { 73.729 unsigned int port = 0x4d0 + (irq >> 3); 73.730 return (inb(port) >> (irq & 7)) & 1; 73.731 } 73.732 - printk(KERN_INFO "Broken MPtable reports ISA irq %d\n", irq); 73.733 + apic_printk(APIC_VERBOSE, KERN_INFO 73.734 + "Broken MPtable reports ISA irq %d\n", irq); 73.735 return 0; 73.736 } 73.737 73.738 @@ -392,6 +815,12 @@ static int __init EISA_ELCR(unsigned int 73.739 #define default_MCA_trigger(idx) (1) 73.740 #define default_MCA_polarity(idx) (0) 73.741 73.742 +/* NEC98 interrupts are always polarity zero edge triggered, 73.743 + * when listed as conforming in the MP table. */ 73.744 + 73.745 +#define default_NEC98_trigger(idx) (0) 73.746 +#define default_NEC98_polarity(idx) (0) 73.747 + 73.748 static int __init MPBIOS_polarity(int idx) 73.749 { 73.750 int bus = mp_irqs[idx].mpc_srcbus; 73.751 @@ -426,6 +855,11 @@ static int __init MPBIOS_polarity(int id 73.752 polarity = default_MCA_polarity(idx); 73.753 break; 73.754 } 73.755 + case MP_BUS_NEC98: /* NEC 98 pin */ 73.756 + { 73.757 + polarity = default_NEC98_polarity(idx); 73.758 + break; 73.759 + } 73.760 default: 73.761 { 73.762 printk(KERN_WARNING "broken BIOS!!\n"); 73.763 @@ -461,7 +895,7 @@ static int __init MPBIOS_polarity(int id 73.764 return polarity; 73.765 } 73.766 73.767 -static int __init MPBIOS_trigger(int idx) 73.768 +static int MPBIOS_trigger(int idx) 73.769 { 73.770 int bus = mp_irqs[idx].mpc_srcbus; 73.771 int trigger; 73.772 @@ -495,6 +929,11 @@ static int __init MPBIOS_trigger(int idx 73.773 trigger = default_MCA_trigger(idx); 73.774 break; 73.775 } 73.776 + case MP_BUS_NEC98: /* NEC 98 pin */ 73.777 + { 73.778 + trigger = default_NEC98_trigger(idx); 73.779 + break; 73.780 + } 73.781 default: 73.782 { 73.783 printk(KERN_WARNING "broken BIOS!!\n"); 73.784 @@ -556,6 +995,7 @@ static int pin_2_irq(int idx, int apic, 73.785 case MP_BUS_ISA: /* ISA pin */ 73.786 case MP_BUS_EISA: 73.787 case MP_BUS_MCA: 73.788 + case MP_BUS_NEC98: 73.789 { 73.790 irq = mp_irqs[idx].mpc_srcbusirq; 73.791 break; 73.792 @@ -569,6 +1009,13 @@ static int pin_2_irq(int idx, int apic, 73.793 while (i < apic) 73.794 irq += nr_ioapic_registers[i++]; 73.795 irq += pin; 73.796 + 73.797 + /* 73.798 + * For MPS mode, so far only needed by ES7000 platform 73.799 + */ 73.800 + if (ioapic_renumber_irq) 73.801 + irq = ioapic_renumber_irq(apic, irq); 73.802 + 73.803 break; 73.804 } 73.805 default: 73.806 @@ -579,20 +1026,6 @@ static int pin_2_irq(int idx, int apic, 73.807 } 73.808 } 73.809 73.810 - /* 73.811 - * PCI IRQ command line redirection. Yes, limits are hardcoded. 73.812 - */ 73.813 - if ((pin >= 16) && (pin <= 23)) { 73.814 - if (pirq_entries[pin-16] != -1) { 73.815 - if (!pirq_entries[pin-16]) { 73.816 - printk(KERN_DEBUG "disabling PIRQ%d\n", pin-16); 73.817 - } else { 73.818 - irq = pirq_entries[pin-16]; 73.819 - printk(KERN_DEBUG "using PIRQ%d -> IRQ %d\n", 73.820 - pin-16, irq); 73.821 - } 73.822 - } 73.823 - } 73.824 return irq; 73.825 } 73.826 73.827 @@ -613,93 +1046,74 @@ static inline int IO_APIC_irq_trigger(in 73.828 return 0; 73.829 } 73.830 73.831 -int irq_vector[NR_IRQS] = { FIRST_DEVICE_VECTOR , 0 }; 73.832 - 73.833 -#ifdef CONFIG_VMX 73.834 -int vector_irq[256]; 73.835 -#endif 73.836 +/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */ 73.837 +u8 irq_vector[NR_IRQ_VECTORS] = { FIRST_DEVICE_VECTOR , 0 }; 73.838 73.839 int assign_irq_vector(int irq) 73.840 { 73.841 static int current_vector = FIRST_DEVICE_VECTOR, offset = 0; 73.842 - if (IO_APIC_VECTOR(irq) > 0) 73.843 + 73.844 + BUG_ON(irq >= NR_IRQ_VECTORS); 73.845 + if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) 73.846 return IO_APIC_VECTOR(irq); 73.847 next: 73.848 current_vector += 8; 73.849 73.850 - /* Skip the hypercall vector. */ 73.851 - if (current_vector == HYPERCALL_VECTOR) goto next; 73.852 - /* Skip the Linux/BSD fast-trap vector. */ 73.853 - if (current_vector == 0x80) goto next; 73.854 + /* Skip the hypercall vector. */ 73.855 + if (current_vector == HYPERCALL_VECTOR) 73.856 + goto next; 73.857 73.858 - if (current_vector > FIRST_SYSTEM_VECTOR) { 73.859 + /* Skip the Linux/BSD fast-trap vector. */ 73.860 + if (current_vector == 0x80) 73.861 + goto next; 73.862 + 73.863 + if (current_vector >= FIRST_SYSTEM_VECTOR) { 73.864 offset++; 73.865 + if (!(offset%8)) 73.866 + return -ENOSPC; 73.867 current_vector = FIRST_DEVICE_VECTOR + offset; 73.868 } 73.869 73.870 - if (current_vector == FIRST_SYSTEM_VECTOR) 73.871 - panic("ran out of interrupt sources!"); 73.872 + vector_irq[current_vector] = irq; 73.873 + if (irq != AUTO_ASSIGN) 73.874 + IO_APIC_VECTOR(irq) = current_vector; 73.875 73.876 - IO_APIC_VECTOR(irq) = current_vector; 73.877 -#ifdef CONFIG_VMX 73.878 - vector_irq[current_vector] = irq; 73.879 - printk("vector_irq[%x] = %d\n", current_vector, irq); 73.880 -#endif 73.881 return current_vector; 73.882 } 73.883 73.884 -extern void (*interrupt[NR_IRQS])(void); 73.885 +static struct hw_interrupt_type ioapic_level_type; 73.886 +static struct hw_interrupt_type ioapic_edge_type; 73.887 73.888 -/* 73.889 - * Level and edge triggered IO-APIC interrupts need different handling, 73.890 - * so we use two separate IRQ descriptors. Edge triggered IRQs can be 73.891 - * handled with the level-triggered descriptor, but that one has slightly 73.892 - * more overhead. Level-triggered interrupts cannot be handled with the 73.893 - * edge-triggered handler, without risking IRQ storms and other ugly 73.894 - * races. 73.895 - */ 73.896 +#define IOAPIC_AUTO -1 73.897 +#define IOAPIC_EDGE 0 73.898 +#define IOAPIC_LEVEL 1 73.899 73.900 -static unsigned int startup_edge_ioapic_irq(unsigned int irq); 73.901 -#define shutdown_edge_ioapic_irq disable_edge_ioapic_irq 73.902 -#define enable_edge_ioapic_irq unmask_IO_APIC_irq 73.903 -static void disable_edge_ioapic_irq (unsigned int irq); 73.904 -static void ack_edge_ioapic_irq(unsigned int irq); 73.905 -static void end_edge_ioapic_irq (unsigned int i); 73.906 -static struct hw_interrupt_type ioapic_edge_irq_type = { 73.907 - "IO-APIC-edge", 73.908 - startup_edge_ioapic_irq, 73.909 - shutdown_edge_ioapic_irq, 73.910 - enable_edge_ioapic_irq, 73.911 - disable_edge_ioapic_irq, 73.912 - ack_edge_ioapic_irq, 73.913 - end_edge_ioapic_irq, 73.914 - set_ioapic_affinity, 73.915 -}; 73.916 - 73.917 -static unsigned int startup_level_ioapic_irq (unsigned int irq); 73.918 -#define shutdown_level_ioapic_irq mask_IO_APIC_irq 73.919 -#define enable_level_ioapic_irq unmask_IO_APIC_irq 73.920 -#define disable_level_ioapic_irq mask_IO_APIC_irq 73.921 -static void mask_and_ack_level_ioapic_irq (unsigned int irq); 73.922 -static void end_level_ioapic_irq (unsigned int irq); 73.923 -static struct hw_interrupt_type ioapic_level_irq_type = { 73.924 - "IO-APIC-level", 73.925 - startup_level_ioapic_irq, 73.926 - shutdown_level_ioapic_irq, 73.927 - enable_level_ioapic_irq, 73.928 - disable_level_ioapic_irq, 73.929 - mask_and_ack_level_ioapic_irq, 73.930 - end_level_ioapic_irq, 73.931 - set_ioapic_affinity, 73.932 -}; 73.933 +static inline void ioapic_register_intr(int irq, int vector, unsigned long trigger) 73.934 +{ 73.935 + if (use_pci_vector() && !platform_legacy_irq(irq)) { 73.936 + if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || 73.937 + trigger == IOAPIC_LEVEL) 73.938 + irq_desc[vector].handler = &ioapic_level_type; 73.939 + else 73.940 + irq_desc[vector].handler = &ioapic_edge_type; 73.941 + set_intr_gate(vector, interrupt[vector]); 73.942 + } else { 73.943 + if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || 73.944 + trigger == IOAPIC_LEVEL) 73.945 + irq_desc[irq].handler = &ioapic_level_type; 73.946 + else 73.947 + irq_desc[irq].handler = &ioapic_edge_type; 73.948 + set_intr_gate(vector, interrupt[irq]); 73.949 + } 73.950 +} 73.951 73.952 void __init setup_IO_APIC_irqs(void) 73.953 { 73.954 struct IO_APIC_route_entry entry; 73.955 - int apic, pin, idx, irq, vector; 73.956 + int apic, pin, idx, irq, first_notcon = 1, vector; 73.957 unsigned long flags; 73.958 73.959 - printk(KERN_DEBUG "init IO_APIC IRQs\n"); 73.960 + apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); 73.961 73.962 for (apic = 0; apic < nr_ioapics; apic++) { 73.963 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { 73.964 @@ -710,13 +1124,24 @@ void __init setup_IO_APIC_irqs(void) 73.965 memset(&entry,0,sizeof(entry)); 73.966 73.967 entry.delivery_mode = INT_DELIVERY_MODE; 73.968 - entry.dest_mode = (INT_DEST_ADDR_MODE != 0); 73.969 + entry.dest_mode = INT_DEST_MODE; 73.970 entry.mask = 0; /* enable IRQ */ 73.971 - entry.dest.logical.logical_dest = target_cpus(); 73.972 + entry.dest.logical.logical_dest = 73.973 + cpu_mask_to_apicid(TARGET_CPUS); 73.974 73.975 idx = find_irq_entry(apic,pin,mp_INT); 73.976 - if (idx == -1) 73.977 + if (idx == -1) { 73.978 + if (first_notcon) { 73.979 + apic_printk(APIC_VERBOSE, KERN_DEBUG 73.980 + " IO-APIC (apicid-pin) %d-%d", 73.981 + mp_ioapics[apic].mpc_apicid, 73.982 + pin); 73.983 + first_notcon = 0; 73.984 + } else 73.985 + apic_printk(APIC_VERBOSE, ", %d-%d", 73.986 + mp_ioapics[apic].mpc_apicid, pin); 73.987 continue; 73.988 + } 73.989 73.990 entry.trigger = irq_trigger(idx); 73.991 entry.polarity = irq_polarity(idx); 73.992 @@ -731,8 +1156,7 @@ void __init setup_IO_APIC_irqs(void) 73.993 * skip adding the timer int on secondary nodes, which causes 73.994 * a small but painful rift in the time-space continuum 73.995 */ 73.996 - if ((clustered_apic_mode == CLUSTERED_APIC_NUMAQ) 73.997 - && (apic != 0) && (irq == 0)) 73.998 + if (multi_timer_check(apic, irq)) 73.999 continue; 73.1000 else 73.1001 add_pin_to_irq(irq, apic, pin); 73.1002 @@ -743,13 +1167,7 @@ void __init setup_IO_APIC_irqs(void) 73.1003 if (IO_APIC_IRQ(irq)) { 73.1004 vector = assign_irq_vector(irq); 73.1005 entry.vector = vector; 73.1006 - 73.1007 - if (IO_APIC_irq_trigger(irq)) 73.1008 - irq_desc[irq].handler = &ioapic_level_irq_type; 73.1009 - else 73.1010 - irq_desc[irq].handler = &ioapic_edge_irq_type; 73.1011 - 73.1012 - set_intr_gate(vector, interrupt[irq]); 73.1013 + ioapic_register_intr(irq, vector, IOAPIC_AUTO); 73.1014 73.1015 if (!apic && (irq < 16)) 73.1016 disable_8259A_irq(irq); 73.1017 @@ -760,11 +1178,13 @@ void __init setup_IO_APIC_irqs(void) 73.1018 spin_unlock_irqrestore(&ioapic_lock, flags); 73.1019 } 73.1020 } 73.1021 + 73.1022 + if (!first_notcon) 73.1023 + apic_printk(APIC_VERBOSE, " not connected.\n"); 73.1024 } 73.1025 73.1026 /* 73.1027 - * Set up the 8259A-master output pin as broadcast to all 73.1028 - * CPUs. 73.1029 + * Set up the 8259A-master output pin: 73.1030 */ 73.1031 void __init setup_ExtINT_IRQ0_pin(unsigned int pin, int vector) 73.1032 { 73.1033 @@ -782,9 +1202,9 @@ void __init setup_ExtINT_IRQ0_pin(unsign 73.1034 * We use logical delivery to get the timer IRQ 73.1035 * to the first CPU. 73.1036 */ 73.1037 - entry.dest_mode = (INT_DEST_ADDR_MODE != 0); 73.1038 + entry.dest_mode = INT_DEST_MODE; 73.1039 entry.mask = 0; /* unmask IRQ now */ 73.1040 - entry.dest.logical.logical_dest = target_cpus(); 73.1041 + entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); 73.1042 entry.delivery_mode = INT_DELIVERY_MODE; 73.1043 entry.polarity = 0; 73.1044 entry.trigger = 0; 73.1045 @@ -794,7 +1214,7 @@ void __init setup_ExtINT_IRQ0_pin(unsign 73.1046 * The timer IRQ doesn't have to know that behind the 73.1047 * scene we have a 8259A-master in AEOI mode ... 73.1048 */ 73.1049 - irq_desc[0].handler = &ioapic_edge_irq_type; 73.1050 + irq_desc[0].handler = &ioapic_edge_type; 73.1051 73.1052 /* 73.1053 * Add it to the IO-APIC irq-routing table: 73.1054 @@ -807,23 +1227,23 @@ void __init setup_ExtINT_IRQ0_pin(unsign 73.1055 enable_8259A_irq(0); 73.1056 } 73.1057 73.1058 -void __init UNEXPECTED_IO_APIC(void) 73.1059 +static inline void UNEXPECTED_IO_APIC(void) 73.1060 { 73.1061 - printk(KERN_WARNING 73.1062 - "An unexpected IO-APIC was found. If this kernel release is less than\n" 73.1063 - "three months old please report this to linux-smp@vger.kernel.org\n"); 73.1064 } 73.1065 73.1066 void __init print_IO_APIC(void) 73.1067 { 73.1068 -#ifdef VERBOSE 73.1069 +#ifndef NDEBUG 73.1070 int apic, i; 73.1071 - struct IO_APIC_reg_00 reg_00; 73.1072 - struct IO_APIC_reg_01 reg_01; 73.1073 - struct IO_APIC_reg_02 reg_02; 73.1074 - struct IO_APIC_reg_03 reg_03; 73.1075 + union IO_APIC_reg_00 reg_00; 73.1076 + union IO_APIC_reg_01 reg_01; 73.1077 + union IO_APIC_reg_02 reg_02; 73.1078 + union IO_APIC_reg_03 reg_03; 73.1079 unsigned long flags; 73.1080 73.1081 + if (apic_verbosity == APIC_QUIET) 73.1082 + return; 73.1083 + 73.1084 printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); 73.1085 for (i = 0; i < nr_ioapics; i++) 73.1086 printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", 73.1087 @@ -838,47 +1258,46 @@ void __init print_IO_APIC(void) 73.1088 for (apic = 0; apic < nr_ioapics; apic++) { 73.1089 73.1090 spin_lock_irqsave(&ioapic_lock, flags); 73.1091 - *(int *)®_00 = io_apic_read(apic, 0); 73.1092 - *(int *)®_01 = io_apic_read(apic, 1); 73.1093 - if (reg_01.version >= 0x10) 73.1094 - *(int *)®_02 = io_apic_read(apic, 2); 73.1095 - if (reg_01.version >= 0x20) 73.1096 - *(int *)®_03 = io_apic_read(apic, 3); 73.1097 + reg_00.raw = io_apic_read(apic, 0); 73.1098 + reg_01.raw = io_apic_read(apic, 1); 73.1099 + if (reg_01.bits.version >= 0x10) 73.1100 + reg_02.raw = io_apic_read(apic, 2); 73.1101 + if (reg_01.bits.version >= 0x20) 73.1102 + reg_03.raw = io_apic_read(apic, 3); 73.1103 spin_unlock_irqrestore(&ioapic_lock, flags); 73.1104 73.1105 - printk("\n"); 73.1106 printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid); 73.1107 - printk(KERN_DEBUG ".... register #00: %08X\n", *(int *)®_00); 73.1108 - printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.ID); 73.1109 - printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.delivery_type); 73.1110 - printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.LTS); 73.1111 - if (reg_00.__reserved_0 || reg_00.__reserved_1 || reg_00.__reserved_2) 73.1112 + printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); 73.1113 + printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); 73.1114 + printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); 73.1115 + printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS); 73.1116 + if (reg_00.bits.ID >= get_physical_broadcast()) 73.1117 + UNEXPECTED_IO_APIC(); 73.1118 + if (reg_00.bits.__reserved_1 || reg_00.bits.__reserved_2) 73.1119 UNEXPECTED_IO_APIC(); 73.1120 73.1121 - printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)®_01); 73.1122 - printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.entries); 73.1123 - if ( (reg_01.entries != 0x0f) && /* older (Neptune) boards */ 73.1124 - (reg_01.entries != 0x17) && /* typical ISA+PCI boards */ 73.1125 - (reg_01.entries != 0x1b) && /* Compaq Proliant boards */ 73.1126 - (reg_01.entries != 0x1f) && /* dual Xeon boards */ 73.1127 - (reg_01.entries != 0x22) && /* bigger Xeon boards */ 73.1128 - (reg_01.entries != 0x2E) && 73.1129 - (reg_01.entries != 0x3F) 73.1130 + printk(KERN_DEBUG ".... register #01: %08X\n", reg_01.raw); 73.1131 + printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries); 73.1132 + if ( (reg_01.bits.entries != 0x0f) && /* older (Neptune) boards */ 73.1133 + (reg_01.bits.entries != 0x17) && /* typical ISA+PCI boards */ 73.1134 + (reg_01.bits.entries != 0x1b) && /* Compaq Proliant boards */ 73.1135 + (reg_01.bits.entries != 0x1f) && /* dual Xeon boards */ 73.1136 + (reg_01.bits.entries != 0x22) && /* bigger Xeon boards */ 73.1137 + (reg_01.bits.entries != 0x2E) && 73.1138 + (reg_01.bits.entries != 0x3F) 73.1139 ) 73.1140 UNEXPECTED_IO_APIC(); 73.1141 73.1142 - printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.PRQ); 73.1143 - printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.version); 73.1144 - if ( (reg_01.version != 0x01) && /* 82489DX IO-APICs */ 73.1145 - (reg_01.version != 0x02) && /* VIA */ 73.1146 - (reg_01.version != 0x03) && /* later VIA */ 73.1147 - (reg_01.version != 0x10) && /* oldest IO-APICs */ 73.1148 - (reg_01.version != 0x11) && /* Pentium/Pro IO-APICs */ 73.1149 - (reg_01.version != 0x13) && /* Xeon IO-APICs */ 73.1150 - (reg_01.version != 0x20) /* Intel P64H (82806 AA) */ 73.1151 + printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ); 73.1152 + printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version); 73.1153 + if ( (reg_01.bits.version != 0x01) && /* 82489DX IO-APICs */ 73.1154 + (reg_01.bits.version != 0x10) && /* oldest IO-APICs */ 73.1155 + (reg_01.bits.version != 0x11) && /* Pentium/Pro IO-APICs */ 73.1156 + (reg_01.bits.version != 0x13) && /* Xeon IO-APICs */ 73.1157 + (reg_01.bits.version != 0x20) /* Intel P64H (82806 AA) */ 73.1158 ) 73.1159 UNEXPECTED_IO_APIC(); 73.1160 - if (reg_01.__reserved_1 || reg_01.__reserved_2) 73.1161 + if (reg_01.bits.__reserved_1 || reg_01.bits.__reserved_2) 73.1162 UNEXPECTED_IO_APIC(); 73.1163 73.1164 /* 73.1165 @@ -886,10 +1305,10 @@ void __init print_IO_APIC(void) 73.1166 * but the value of reg_02 is read as the previous read register 73.1167 * value, so ignore it if reg_02 == reg_01. 73.1168 */ 73.1169 - if (reg_01.version >= 0x10 && *(int *)®_02 != *(int *)®_01) { 73.1170 - printk(KERN_DEBUG ".... register #02: %08X\n", *(int *)®_02); 73.1171 - printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.arbitration); 73.1172 - if (reg_02.__reserved_1 || reg_02.__reserved_2) 73.1173 + if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) { 73.1174 + printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw); 73.1175 + printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration); 73.1176 + if (reg_02.bits.__reserved_1 || reg_02.bits.__reserved_2) 73.1177 UNEXPECTED_IO_APIC(); 73.1178 } 73.1179 73.1180 @@ -898,11 +1317,11 @@ void __init print_IO_APIC(void) 73.1181 * or reg_03, but the value of reg_0[23] is read as the previous read 73.1182 * register value, so ignore it if reg_03 == reg_0[12]. 73.1183 */ 73.1184 - if (reg_01.version >= 0x20 && *(int *)®_03 != *(int *)®_02 && 73.1185 - *(int *)®_03 != *(int *)®_01) { 73.1186 - printk(KERN_DEBUG ".... register #03: %08X\n", *(int *)®_03); 73.1187 - printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.boot_DT); 73.1188 - if (reg_03.__reserved_1) 73.1189 + if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw && 73.1190 + reg_03.raw != reg_01.raw) { 73.1191 + printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw); 73.1192 + printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.bits.boot_DT); 73.1193 + if (reg_03.bits.__reserved_1) 73.1194 UNEXPECTED_IO_APIC(); 73.1195 } 73.1196 73.1197 @@ -911,7 +1330,7 @@ void __init print_IO_APIC(void) 73.1198 printk(KERN_DEBUG " NR Log Phy Mask Trig IRR Pol" 73.1199 " Stat Dest Deli Vect: \n"); 73.1200 73.1201 - for (i = 0; i <= reg_01.entries; i++) { 73.1202 + for (i = 0; i <= reg_01.bits.entries; i++) { 73.1203 struct IO_APIC_route_entry entry; 73.1204 73.1205 spin_lock_irqsave(&ioapic_lock, flags); 73.1206 @@ -937,12 +1356,17 @@ void __init print_IO_APIC(void) 73.1207 ); 73.1208 } 73.1209 } 73.1210 + if (use_pci_vector()) 73.1211 + printk(KERN_INFO "Using vector-based indexing\n"); 73.1212 printk(KERN_DEBUG "IRQ to pin mappings:\n"); 73.1213 for (i = 0; i < NR_IRQS; i++) { 73.1214 struct irq_pin_list *entry = irq_2_pin + i; 73.1215 if (entry->pin < 0) 73.1216 continue; 73.1217 - printk(KERN_DEBUG "IRQ%d ", i); 73.1218 + if (use_pci_vector() && !platform_legacy_irq(i)) 73.1219 + printk(KERN_DEBUG "IRQ%d ", IO_APIC_VECTOR(i)); 73.1220 + else 73.1221 + printk(KERN_DEBUG "IRQ%d ", i); 73.1222 for (;;) { 73.1223 printk("-> %d:%d", entry->apic, entry->pin); 73.1224 if (!entry->next) 73.1225 @@ -953,152 +1377,13 @@ void __init print_IO_APIC(void) 73.1226 } 73.1227 73.1228 printk(KERN_INFO ".................................... done.\n"); 73.1229 -#endif 73.1230 -} 73.1231 - 73.1232 - 73.1233 -#if 0 /* Maybe useful for debugging, but not currently used anywhere. */ 73.1234 - 73.1235 -static void print_APIC_bitfield (int base) 73.1236 -{ 73.1237 - unsigned int v; 73.1238 - int i, j; 73.1239 - 73.1240 - printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG); 73.1241 - for (i = 0; i < 8; i++) { 73.1242 - v = apic_read(base + i*0x10); 73.1243 - for (j = 0; j < 32; j++) { 73.1244 - if (v & (1<<j)) 73.1245 - printk("1"); 73.1246 - else 73.1247 - printk("0"); 73.1248 - } 73.1249 - printk("\n"); 73.1250 - } 73.1251 +#endif /* !NDEBUG */ 73.1252 + return; 73.1253 } 73.1254 73.1255 - 73.1256 -void /*__init*/ print_local_APIC(void * dummy) 73.1257 -{ 73.1258 - unsigned int v, ver, maxlvt; 73.1259 - 73.1260 - printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", 73.1261 - smp_processor_id(), hard_smp_processor_id()); 73.1262 - v = apic_read(APIC_ID); 73.1263 - printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(v)); 73.1264 - v = apic_read(APIC_LVR); 73.1265 - printk(KERN_INFO "... APIC VERSION: %08x\n", v); 73.1266 - ver = GET_APIC_VERSION(v); 73.1267 - maxlvt = get_maxlvt(); 73.1268 - 73.1269 - v = apic_read(APIC_TASKPRI); 73.1270 - printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK); 73.1271 - 73.1272 - if (APIC_INTEGRATED(ver)) { /* !82489DX */ 73.1273 - v = apic_read(APIC_ARBPRI); 73.1274 - printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v, 73.1275 - v & APIC_ARBPRI_MASK); 73.1276 - v = apic_read(APIC_PROCPRI); 73.1277 - printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v); 73.1278 - } 73.1279 - 73.1280 - v = apic_read(APIC_EOI); 73.1281 - printk(KERN_DEBUG "... APIC EOI: %08x\n", v); 73.1282 - v = apic_read(APIC_RRR); 73.1283 - printk(KERN_DEBUG "... APIC RRR: %08x\n", v); 73.1284 - v = apic_read(APIC_LDR); 73.1285 - printk(KERN_DEBUG "... APIC LDR: %08x\n", v); 73.1286 - v = apic_read(APIC_DFR); 73.1287 - printk(KERN_DEBUG "... APIC DFR: %08x\n", v); 73.1288 - v = apic_read(APIC_SPIV); 73.1289 - printk(KERN_DEBUG "... APIC SPIV: %08x\n", v); 73.1290 - 73.1291 - printk(KERN_DEBUG "... APIC ISR field:\n"); 73.1292 - print_APIC_bitfield(APIC_ISR); 73.1293 - printk(KERN_DEBUG "... APIC TMR field:\n"); 73.1294 - print_APIC_bitfield(APIC_TMR); 73.1295 - printk(KERN_DEBUG "... APIC IRR field:\n"); 73.1296 - print_APIC_bitfield(APIC_IRR); 73.1297 - 73.1298 - if (APIC_INTEGRATED(ver)) { /* !82489DX */ 73.1299 - if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ 73.1300 - apic_write(APIC_ESR, 0); 73.1301 - v = apic_read(APIC_ESR); 73.1302 - printk(KERN_DEBUG "... APIC ESR: %08x\n", v); 73.1303 - } 73.1304 - 73.1305 - v = apic_read(APIC_ICR); 73.1306 - printk(KERN_DEBUG "... APIC ICR: %08x\n", v); 73.1307 - v = apic_read(APIC_ICR2); 73.1308 - printk(KERN_DEBUG "... APIC ICR2: %08x\n", v); 73.1309 - 73.1310 - v = apic_read(APIC_LVTT); 73.1311 - printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); 73.1312 - 73.1313 - if (maxlvt > 3) { /* PC is LVT#4. */ 73.1314 - v = apic_read(APIC_LVTPC); 73.1315 - printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v); 73.1316 - } 73.1317 - v = apic_read(APIC_LVT0); 73.1318 - printk(KERN_DEBUG "... APIC LVT0: %08x\n", v); 73.1319 - v = apic_read(APIC_LVT1); 73.1320 - printk(KERN_DEBUG "... APIC LVT1: %08x\n", v); 73.1321 - 73.1322 - if (maxlvt > 2) { /* ERR is LVT#3. */ 73.1323 - v = apic_read(APIC_LVTERR); 73.1324 - printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v); 73.1325 - } 73.1326 - 73.1327 - v = apic_read(APIC_TMICT); 73.1328 - printk(KERN_DEBUG "... APIC TMICT: %08x\n", v); 73.1329 - v = apic_read(APIC_TMCCT); 73.1330 - printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v); 73.1331 - v = apic_read(APIC_TDCR); 73.1332 - printk(KERN_DEBUG "... APIC TDCR: %08x\n", v); 73.1333 - printk("\n"); 73.1334 -} 73.1335 - 73.1336 -void print_all_local_APICs (void) 73.1337 -{ 73.1338 - smp_call_function(print_local_APIC, NULL, 1, 1); 73.1339 - print_local_APIC(NULL); 73.1340 -} 73.1341 - 73.1342 -void /*__init*/ print_PIC(void) 73.1343 -{ 73.1344 - extern spinlock_t i8259A_lock; 73.1345 - unsigned int v, flags; 73.1346 - 73.1347 - printk(KERN_DEBUG "\nprinting PIC contents\n"); 73.1348 - 73.1349 - spin_lock_irqsave(&i8259A_lock, flags); 73.1350 - 73.1351 - v = inb(0xa1) << 8 | inb(0x21); 73.1352 - printk(KERN_DEBUG "... PIC IMR: %04x\n", v); 73.1353 - 73.1354 - v = inb(0xa0) << 8 | inb(0x20); 73.1355 - printk(KERN_DEBUG "... PIC IRR: %04x\n", v); 73.1356 - 73.1357 - outb(0x0b,0xa0); 73.1358 - outb(0x0b,0x20); 73.1359 - v = inb(0xa0) << 8 | inb(0x20); 73.1360 - outb(0x0a,0xa0); 73.1361 - outb(0x0a,0x20); 73.1362 - 73.1363 - spin_unlock_irqrestore(&i8259A_lock, flags); 73.1364 - 73.1365 - printk(KERN_DEBUG "... PIC ISR: %04x\n", v); 73.1366 - 73.1367 - v = inb(0x4d1) << 8 | inb(0x4d0); 73.1368 - printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); 73.1369 -} 73.1370 - 73.1371 -#endif /* 0 */ 73.1372 - 73.1373 - 73.1374 static void __init enable_IO_APIC(void) 73.1375 { 73.1376 - struct IO_APIC_reg_01 reg_01; 73.1377 + union IO_APIC_reg_01 reg_01; 73.1378 int i; 73.1379 unsigned long flags; 73.1380 73.1381 @@ -1106,18 +1391,15 @@ static void __init enable_IO_APIC(void) 73.1382 irq_2_pin[i].pin = -1; 73.1383 irq_2_pin[i].next = 0; 73.1384 } 73.1385 - if (!pirqs_enabled) 73.1386 - for (i = 0; i < MAX_PIRQS; i++) 73.1387 - pirq_entries[i] = -1; 73.1388 73.1389 /* 73.1390 * The number of IO-APIC IRQ registers (== #pins): 73.1391 */ 73.1392 for (i = 0; i < nr_ioapics; i++) { 73.1393 spin_lock_irqsave(&ioapic_lock, flags); 73.1394 - *(int *)®_01 = io_apic_read(i, 1); 73.1395 + reg_01.raw = io_apic_read(i, 1); 73.1396 spin_unlock_irqrestore(&ioapic_lock, flags); 73.1397 - nr_ioapic_registers[i] = reg_01.entries+1; 73.1398 + nr_ioapic_registers[i] = reg_01.bits.entries+1; 73.1399 } 73.1400 73.1401 /* 73.1402 @@ -1146,18 +1428,22 @@ void disable_IO_APIC(void) 73.1403 * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999 73.1404 */ 73.1405 73.1406 -static void __init setup_ioapic_ids_from_mpc (void) 73.1407 +#ifndef CONFIG_X86_NUMAQ 73.1408 +static void __init setup_ioapic_ids_from_mpc(void) 73.1409 { 73.1410 - struct IO_APIC_reg_00 reg_00; 73.1411 - unsigned long phys_id_present_map = phys_cpu_present_map; 73.1412 + union IO_APIC_reg_00 reg_00; 73.1413 + physid_mask_t phys_id_present_map; 73.1414 int apic; 73.1415 int i; 73.1416 unsigned char old_id; 73.1417 unsigned long flags; 73.1418 73.1419 - if (clustered_apic_mode) 73.1420 - /* We don't have a good way to do this yet - hack */ 73.1421 - phys_id_present_map = (u_long) 0xf; 73.1422 + /* 73.1423 + * This is broken; anything with a real cpu count has to 73.1424 + * circumvent this idiocy regardless. 73.1425 + */ 73.1426 + phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map); 73.1427 + 73.1428 /* 73.1429 * Set the IOAPIC ID to the value stored in the MPC table. 73.1430 */ 73.1431 @@ -1165,41 +1451,48 @@ static void __init setup_ioapic_ids_from 73.1432 73.1433 /* Read the register 0 value */ 73.1434 spin_lock_irqsave(&ioapic_lock, flags); 73.1435 - *(int *)®_00 = io_apic_read(apic, 0); 73.1436 + reg_00.raw = io_apic_read(apic, 0); 73.1437 spin_unlock_irqrestore(&ioapic_lock, flags); 73.1438 73.1439 old_id = mp_ioapics[apic].mpc_apicid; 73.1440 73.1441 - if (mp_ioapics[apic].mpc_apicid >= apic_broadcast_id) { 73.1442 + if (mp_ioapics[apic].mpc_apicid >= get_physical_broadcast()) { 73.1443 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", 73.1444 apic, mp_ioapics[apic].mpc_apicid); 73.1445 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", 73.1446 - reg_00.ID); 73.1447 - mp_ioapics[apic].mpc_apicid = reg_00.ID; 73.1448 + reg_00.bits.ID); 73.1449 + mp_ioapics[apic].mpc_apicid = reg_00.bits.ID; 73.1450 } 73.1451 73.1452 + /* Don't check I/O APIC IDs for some xAPIC systems. They have 73.1453 + * no meaning without the serial APIC bus. */ 73.1454 + if (NO_IOAPIC_CHECK) 73.1455 + continue; 73.1456 /* 73.1457 * Sanity check, is the ID really free? Every APIC in a 73.1458 * system must have a unique ID or we get lots of nice 73.1459 * 'stuck on smp_invalidate_needed IPI wait' messages. 73.1460 - * I/O APIC IDs no longer have any meaning for xAPICs and SAPICs. 73.1461 */ 73.1462 - if ((clustered_apic_mode != CLUSTERED_APIC_XAPIC) && 73.1463 - (phys_id_present_map & (1 << mp_ioapics[apic].mpc_apicid))) { 73.1464 + if (check_apicid_used(phys_id_present_map, 73.1465 + mp_ioapics[apic].mpc_apicid)) { 73.1466 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", 73.1467 apic, mp_ioapics[apic].mpc_apicid); 73.1468 - for (i = 0; i < 0xf; i++) 73.1469 - if (!(phys_id_present_map & (1 << i))) 73.1470 + for (i = 0; i < get_physical_broadcast(); i++) 73.1471 + if (!physid_isset(i, phys_id_present_map)) 73.1472 break; 73.1473 - if (i >= apic_broadcast_id) 73.1474 + if (i >= get_physical_broadcast()) 73.1475 panic("Max APIC ID exceeded!\n"); 73.1476 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", 73.1477 i); 73.1478 - phys_id_present_map |= 1 << i; 73.1479 + physid_set(i, phys_id_present_map); 73.1480 mp_ioapics[apic].mpc_apicid = i; 73.1481 } else { 73.1482 - printk("Setting %d in the phys_id_present_map\n", mp_ioapics[apic].mpc_apicid); 73.1483 - phys_id_present_map |= 1 << mp_ioapics[apic].mpc_apicid; 73.1484 + physid_mask_t tmp; 73.1485 + tmp = apicid_to_cpu_present(mp_ioapics[apic].mpc_apicid); 73.1486 + apic_printk(APIC_VERBOSE, "Setting %d in the " 73.1487 + "phys_id_present_map\n", 73.1488 + mp_ioapics[apic].mpc_apicid); 73.1489 + physids_or(phys_id_present_map, phys_id_present_map, tmp); 73.1490 } 73.1491 73.1492 73.1493 @@ -1217,26 +1510,30 @@ static void __init setup_ioapic_ids_from 73.1494 * Read the right value from the MPC table and 73.1495 * write it into the ID register. 73.1496 */ 73.1497 - printk(KERN_INFO "...changing IO-APIC physical APIC ID to %d ...", 73.1498 - mp_ioapics[apic].mpc_apicid); 73.1499 + apic_printk(APIC_VERBOSE, KERN_INFO 73.1500 + "...changing IO-APIC physical APIC ID to %d ...", 73.1501 + mp_ioapics[apic].mpc_apicid); 73.1502 73.1503 - reg_00.ID = mp_ioapics[apic].mpc_apicid; 73.1504 + reg_00.bits.ID = mp_ioapics[apic].mpc_apicid; 73.1505 spin_lock_irqsave(&ioapic_lock, flags); 73.1506 - io_apic_write(apic, 0, *(int *)®_00); 73.1507 + io_apic_write(apic, 0, reg_00.raw); 73.1508 spin_unlock_irqrestore(&ioapic_lock, flags); 73.1509 73.1510 /* 73.1511 * Sanity check 73.1512 */ 73.1513 spin_lock_irqsave(&ioapic_lock, flags); 73.1514 - *(int *)®_00 = io_apic_read(apic, 0); 73.1515 + reg_00.raw = io_apic_read(apic, 0); 73.1516 spin_unlock_irqrestore(&ioapic_lock, flags); 73.1517 - if (reg_00.ID != mp_ioapics[apic].mpc_apicid) 73.1518 - panic("could not set ID!\n"); 73.1519 + if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid) 73.1520 + printk("could not set ID!\n"); 73.1521 else 73.1522 - printk(" ok.\n"); 73.1523 + apic_printk(APIC_VERBOSE, " ok.\n"); 73.1524 } 73.1525 } 73.1526 +#else 73.1527 +static void __init setup_ioapic_ids_from_mpc(void) { } 73.1528 +#endif 73.1529 73.1530 /* 73.1531 * There is a nasty bug in some older SMP boards, their mptable lies 73.1532 @@ -1248,9 +1545,9 @@ static void __init setup_ioapic_ids_from 73.1533 */ 73.1534 static int __init timer_irq_works(void) 73.1535 { 73.1536 - unsigned int t1 = jiffies; 73.1537 + unsigned long t1 = jiffies; 73.1538 73.1539 - __sti(); 73.1540 + local_irq_enable(); 73.1541 /* Let ten ticks pass... */ 73.1542 mdelay((10 * 1000) / HZ); 73.1543 73.1544 @@ -1267,7 +1564,18 @@ static int __init timer_irq_works(void) 73.1545 return 0; 73.1546 } 73.1547 73.1548 -static void disable_edge_ioapic_irq (unsigned int irq) { /* nothing */ } 73.1549 +/* 73.1550 + * In the SMP+IOAPIC case it might happen that there are an unspecified 73.1551 + * number of pending IRQ events unhandled. These cases are very rare, 73.1552 + * so we 'resend' these IRQs via IPIs, to the same CPU. It's much 73.1553 + * better to do it this way as thus we do not have to be aware of 73.1554 + * 'pending' interrupts in the IRQ path, except at this point. 73.1555 + */ 73.1556 +/* 73.1557 + * Edge triggered needs to resend any interrupt 73.1558 + * that was delayed but this is now handled in the device 73.1559 + * independent code. 73.1560 + */ 73.1561 73.1562 /* 73.1563 * Starting up a edge-triggered IO-APIC interrupt is 73.1564 @@ -1278,7 +1586,6 @@ static void disable_edge_ioapic_irq (uns 73.1565 * This is not complete - we should be able to fake 73.1566 * an edge even if it isn't on the 8259A... 73.1567 */ 73.1568 - 73.1569 static unsigned int startup_edge_ioapic_irq(unsigned int irq) 73.1570 { 73.1571 int was_pending = 0; 73.1572 @@ -1303,16 +1610,13 @@ static unsigned int startup_edge_ioapic_ 73.1573 */ 73.1574 static void ack_edge_ioapic_irq(unsigned int irq) 73.1575 { 73.1576 - balance_irq(irq); 73.1577 + move_irq(irq); 73.1578 if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED)) 73.1579 == (IRQ_PENDING | IRQ_DISABLED)) 73.1580 mask_IO_APIC_irq(irq); 73.1581 ack_APIC_irq(); 73.1582 } 73.1583 73.1584 -static void end_edge_ioapic_irq (unsigned int i) { /* nothing */ } 73.1585 - 73.1586 - 73.1587 /* 73.1588 * Level triggered interrupts can just be masked, 73.1589 * and shutting down and starting up the interrupt 73.1590 @@ -1334,15 +1638,12 @@ static unsigned int startup_level_ioapic 73.1591 return 0; /* don't check for pending */ 73.1592 } 73.1593 73.1594 -static void mask_and_ack_level_ioapic_irq(unsigned int irq) 73.1595 +static void end_level_ioapic_irq (unsigned int irq) 73.1596 { 73.1597 unsigned long v; 73.1598 int i; 73.1599 73.1600 - balance_irq(irq); 73.1601 - 73.1602 - mask_IO_APIC_irq(irq); 73.1603 - 73.1604 + move_irq(irq); 73.1605 /* 73.1606 * It appears there is an erratum which affects at least version 0x11 73.1607 * of I/O APIC (that's the 82093AA and cores integrated into various 73.1608 @@ -1363,45 +1664,102 @@ static void mask_and_ack_level_ioapic_ir 73.1609 * The idea is from Manfred Spraul. --macro 73.1610 */ 73.1611 i = IO_APIC_VECTOR(irq); 73.1612 + 73.1613 v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); 73.1614 73.1615 ack_APIC_irq(); 73.1616 73.1617 if (!(v & (1 << (i & 0x1f)))) { 73.1618 -#ifdef APIC_LOCKUP_DEBUG 73.1619 - struct irq_pin_list *entry; 73.1620 -#endif 73.1621 - 73.1622 -#ifdef APIC_MISMATCH_DEBUG 73.1623 atomic_inc(&irq_mis_count); 73.1624 -#endif 73.1625 spin_lock(&ioapic_lock); 73.1626 - __edge_IO_APIC_irq(irq); 73.1627 -#ifdef APIC_LOCKUP_DEBUG 73.1628 - for (entry = irq_2_pin + irq;;) { 73.1629 - unsigned int reg; 73.1630 - 73.1631 - if (entry->pin == -1) 73.1632 - break; 73.1633 - reg = io_apic_read(entry->apic, 0x10 + entry->pin * 2); 73.1634 - if (reg & 0x00004000) 73.1635 - printk(KERN_CRIT "Aieee!!! Remote IRR" 73.1636 - " still set after unlock!\n"); 73.1637 - if (!entry->next) 73.1638 - break; 73.1639 - entry = irq_2_pin + entry->next; 73.1640 - } 73.1641 -#endif 73.1642 - __level_IO_APIC_irq(irq); 73.1643 + __mask_and_edge_IO_APIC_irq(irq); 73.1644 + __unmask_and_level_IO_APIC_irq(irq); 73.1645 spin_unlock(&ioapic_lock); 73.1646 } 73.1647 } 73.1648 73.1649 -static void end_level_ioapic_irq(unsigned int irq) 73.1650 +#ifdef CONFIG_PCI_MSI 73.1651 +static unsigned int startup_edge_ioapic_vector(unsigned int vector) 73.1652 +{ 73.1653 + int irq = vector_to_irq(vector); 73.1654 + 73.1655 + return startup_edge_ioapic_irq(irq); 73.1656 +} 73.1657 + 73.1658 +static void ack_edge_ioapic_vector(unsigned int vector) 73.1659 +{ 73.1660 + int irq = vector_to_irq(vector); 73.1661 + 73.1662 + ack_edge_ioapic_irq(irq); 73.1663 +} 73.1664 + 73.1665 +static unsigned int startup_level_ioapic_vector (unsigned int vector) 73.1666 { 73.1667 + int irq = vector_to_irq(vector); 73.1668 + 73.1669 + return startup_level_ioapic_irq (irq); 73.1670 +} 73.1671 + 73.1672 +static void end_level_ioapic_vector (unsigned int vector) 73.1673 +{ 73.1674 + int irq = vector_to_irq(vector); 73.1675 + 73.1676 + end_level_ioapic_irq(irq); 73.1677 +} 73.1678 + 73.1679 +static void mask_IO_APIC_vector (unsigned int vector) 73.1680 +{ 73.1681 + int irq = vector_to_irq(vector); 73.1682 + 73.1683 + mask_IO_APIC_irq(irq); 73.1684 +} 73.1685 + 73.1686 +static void unmask_IO_APIC_vector (unsigned int vector) 73.1687 +{ 73.1688 + int irq = vector_to_irq(vector); 73.1689 + 73.1690 unmask_IO_APIC_irq(irq); 73.1691 } 73.1692 73.1693 +static void set_ioapic_affinity_vector (unsigned int vector, 73.1694 + cpumask_t cpu_mask) 73.1695 +{ 73.1696 + int irq = vector_to_irq(vector); 73.1697 + 73.1698 + set_ioapic_affinity_irq(irq, cpu_mask); 73.1699 +} 73.1700 +#endif 73.1701 + 73.1702 +/* 73.1703 + * Level and edge triggered IO-APIC interrupts need different handling, 73.1704 + * so we use two separate IRQ descriptors. Edge triggered IRQs can be 73.1705 + * handled with the level-triggered descriptor, but that one has slightly 73.1706 + * more overhead. Level-triggered interrupts cannot be handled with the 73.1707 + * edge-triggered handler, without risking IRQ storms and other ugly 73.1708 + * races. 73.1709 + */ 73.1710 +static struct hw_interrupt_type ioapic_edge_type = { 73.1711 + .typename = "IO-APIC-edge", 73.1712 + .startup = startup_edge_ioapic, 73.1713 + .shutdown = shutdown_edge_ioapic, 73.1714 + .enable = enable_edge_ioapic, 73.1715 + .disable = disable_edge_ioapic, 73.1716 + .ack = ack_edge_ioapic, 73.1717 + .end = end_edge_ioapic, 73.1718 + .set_affinity = set_ioapic_affinity, 73.1719 +}; 73.1720 + 73.1721 +static struct hw_interrupt_type ioapic_level_type = { 73.1722 + .typename = "IO-APIC-level", 73.1723 + .startup = startup_level_ioapic, 73.1724 + .shutdown = shutdown_level_ioapic, 73.1725 + .enable = enable_level_ioapic, 73.1726 + .disable = disable_level_ioapic, 73.1727 + .ack = mask_and_ack_level_ioapic, 73.1728 + .end = end_level_ioapic, 73.1729 + .set_affinity = set_ioapic_affinity, 73.1730 +}; 73.1731 + 73.1732 static inline void init_IO_APIC_traps(void) 73.1733 { 73.1734 int irq; 73.1735 @@ -1418,7 +1776,13 @@ static inline void init_IO_APIC_traps(vo 73.1736 * 0x80, because int 0x80 is hm, kind of importantish. ;) 73.1737 */ 73.1738 for (irq = 0; irq < NR_IRQS ; irq++) { 73.1739 - if (IO_APIC_IRQ(irq) && !IO_APIC_VECTOR(irq)) { 73.1740 + int tmp = irq; 73.1741 + if (use_pci_vector()) { 73.1742 + if (!platform_legacy_irq(tmp)) 73.1743 + if ((tmp = vector_to_irq(tmp)) == -1) 73.1744 + continue; 73.1745 + } 73.1746 + if (IO_APIC_IRQ(tmp) && !IO_APIC_VECTOR(tmp)) { 73.1747 /* 73.1748 * Hmm.. We don't have an entry for this, 73.1749 * so default to an old-fashioned 8259 73.1750 @@ -1457,15 +1821,35 @@ static void ack_lapic_irq (unsigned int 73.1751 static void end_lapic_irq (unsigned int i) { /* nothing */ } 73.1752 73.1753 static struct hw_interrupt_type lapic_irq_type = { 73.1754 - "local-APIC-edge", 73.1755 - NULL, /* startup_irq() not used for IRQ0 */ 73.1756 - NULL, /* shutdown_irq() not used for IRQ0 */ 73.1757 - enable_lapic_irq, 73.1758 - disable_lapic_irq, 73.1759 - ack_lapic_irq, 73.1760 - end_lapic_irq 73.1761 + .typename = "local-APIC-edge", 73.1762 + .startup = NULL, /* startup_irq() not used for IRQ0 */ 73.1763 + .shutdown = NULL, /* shutdown_irq() not used for IRQ0 */ 73.1764 + .enable = enable_lapic_irq, 73.1765 + .disable = disable_lapic_irq, 73.1766 + .ack = ack_lapic_irq, 73.1767 + .end = end_lapic_irq 73.1768 }; 73.1769 73.1770 +#if 0 73.1771 +static void setup_nmi (void) 73.1772 +{ 73.1773 + /* 73.1774 + * Dirty trick to enable the NMI watchdog ... 73.1775 + * We put the 8259A master into AEOI mode and 73.1776 + * unmask on all local APICs LVT0 as NMI. 73.1777 + * 73.1778 + * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire') 73.1779 + * is from Maciej W. Rozycki - so we do not have to EOI from 73.1780 + * the NMI handler or the timer interrupt. 73.1781 + */ 73.1782 + apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ..."); 73.1783 + 73.1784 + on_each_cpu(enable_NMI_through_LVT0, NULL, 1, 1); 73.1785 + 73.1786 + apic_printk(APIC_VERBOSE, " done.\n"); 73.1787 +} 73.1788 +#endif 73.1789 + 73.1790 /* 73.1791 * This looks a bit hackish but it's about the only one way of sending 73.1792 * a few INTA cycles to 8259As and any associated glue logic. ICR does 73.1793 @@ -1536,7 +1920,6 @@ static inline void unlock_ExtINT_logic(v 73.1794 */ 73.1795 static inline void check_timer(void) 73.1796 { 73.1797 - extern int timer_ack; 73.1798 int pin1, pin2; 73.1799 int vector; 73.1800 73.1801 @@ -1569,8 +1952,17 @@ static inline void check_timer(void) 73.1802 * Ok, does IRQ0 through the IOAPIC work? 73.1803 */ 73.1804 unmask_IO_APIC_irq(0); 73.1805 - if (timer_irq_works()) 73.1806 + if (timer_irq_works()) { 73.1807 +#if 0 73.1808 + if (nmi_watchdog == NMI_IO_APIC) { 73.1809 + disable_8259A_irq(0); 73.1810 + setup_nmi(); 73.1811 + enable_8259A_irq(0); 73.1812 + check_nmi_watchdog(); 73.1813 + } 73.1814 +#endif 73.1815 return; 73.1816 + } 73.1817 clear_IO_APIC_pin(0, pin1); 73.1818 printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n"); 73.1819 } 73.1820 @@ -1588,6 +1980,12 @@ static inline void check_timer(void) 73.1821 replace_pin_at_irq(0, 0, pin1, 0, pin2); 73.1822 else 73.1823 add_pin_to_irq(0, 0, pin2); 73.1824 +#if 0 73.1825 + if (nmi_watchdog == NMI_IO_APIC) { 73.1826 + setup_nmi(); 73.1827 + check_nmi_watchdog(); 73.1828 + } 73.1829 +#endif 73.1830 return; 73.1831 } 73.1832 /* 73.1833 @@ -1597,6 +1995,11 @@ static inline void check_timer(void) 73.1834 } 73.1835 printk(" failed.\n"); 73.1836 73.1837 + if (nmi_watchdog == NMI_IO_APIC) { 73.1838 + printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n"); 73.1839 + nmi_watchdog = 0; 73.1840 + } 73.1841 + 73.1842 printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ..."); 73.1843 73.1844 disable_8259A_irq(0); 73.1845 @@ -1613,6 +2016,7 @@ static inline void check_timer(void) 73.1846 73.1847 printk(KERN_INFO "...trying to set up timer as ExtINT IRQ..."); 73.1848 73.1849 + timer_ack = 0; 73.1850 init_8259A(0); 73.1851 make_8259A_irq(0); 73.1852 apic_write_around(APIC_LVT0, APIC_DM_EXTINT); 73.1853 @@ -1624,31 +2028,40 @@ static inline void check_timer(void) 73.1854 return; 73.1855 } 73.1856 printk(" failed :(.\n"); 73.1857 - panic("IO-APIC + timer doesn't work! pester mingo@redhat.com"); 73.1858 + panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a " 73.1859 + "report. Then try booting with the 'noapic' option"); 73.1860 +} 73.1861 + 73.1862 +#define NR_IOAPIC_BIOSIDS 256 73.1863 +static u8 ioapic_biosid_to_apic_enum[NR_IOAPIC_BIOSIDS]; 73.1864 +static void store_ioapic_biosid_mapping(void) 73.1865 +{ 73.1866 + u8 apic; 73.1867 + memset(ioapic_biosid_to_apic_enum, ~0, NR_IOAPIC_BIOSIDS); 73.1868 + for ( apic = 0; apic < nr_ioapics; apic++ ) 73.1869 + ioapic_biosid_to_apic_enum[mp_ioapics[apic].mpc_apicid] = apic; 73.1870 } 73.1871 73.1872 /* 73.1873 * 73.1874 - * IRQ's that are handled by the old PIC in all cases: 73.1875 + * IRQ's that are handled by the PIC in the MPS IOAPIC case. 73.1876 * - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ. 73.1877 * Linux doesn't really care, as it's not actually used 73.1878 * for any interrupt handling anyway. 73.1879 - * - There used to be IRQ13 here as well, but all 73.1880 - * MPS-compliant must not use it for FPU coupling and we 73.1881 - * want to use exception 16 anyway. And there are 73.1882 - * systems who connect it to an I/O APIC for other uses. 73.1883 - * Thus we don't mark it special any longer. 73.1884 - * 73.1885 - * Additionally, something is definitely wrong with irq9 73.1886 - * on PIIX4 boards. 73.1887 */ 73.1888 -#define PIC_IRQS (1<<2) 73.1889 +#define PIC_IRQS (1 << PIC_CASCADE_IR) 73.1890 73.1891 void __init setup_IO_APIC(void) 73.1892 { 73.1893 + store_ioapic_biosid_mapping(); 73.1894 + 73.1895 enable_IO_APIC(); 73.1896 73.1897 - io_apic_irqs = ~PIC_IRQS; 73.1898 + if (acpi_ioapic) 73.1899 + io_apic_irqs = ~0; /* all IRQs go through IOAPIC */ 73.1900 + else 73.1901 + io_apic_irqs = ~PIC_IRQS; 73.1902 + 73.1903 printk("ENABLING IO-APIC IRQs\n"); 73.1904 73.1905 /* 73.1906 @@ -1660,26 +2073,20 @@ void __init setup_IO_APIC(void) 73.1907 setup_IO_APIC_irqs(); 73.1908 init_IO_APIC_traps(); 73.1909 check_timer(); 73.1910 - if (!acpi_ioapic) 73.1911 - print_IO_APIC(); 73.1912 + print_IO_APIC(); 73.1913 } 73.1914 73.1915 -#endif /* CONFIG_X86_IO_APIC */ 73.1916 - 73.1917 - 73.1918 - 73.1919 /* -------------------------------------------------------------------------- 73.1920 ACPI-based IOAPIC Configuration 73.1921 -------------------------------------------------------------------------- */ 73.1922 73.1923 #ifdef CONFIG_ACPI_BOOT 73.1924 73.1925 -#define IO_APIC_MAX_ID 15 73.1926 - 73.1927 int __init io_apic_get_unique_id (int ioapic, int apic_id) 73.1928 { 73.1929 - struct IO_APIC_reg_00 reg_00; 73.1930 - static unsigned long apic_id_map = 0; 73.1931 + union IO_APIC_reg_00 reg_00; 73.1932 + static physid_mask_t apic_id_map = PHYSID_MASK_NONE; 73.1933 + physid_mask_t tmp; 73.1934 unsigned long flags; 73.1935 int i = 0; 73.1936 73.1937 @@ -1692,38 +2099,31 @@ int __init io_apic_get_unique_id (int io 73.1938 * advantage of new APIC bus architecture. 73.1939 */ 73.1940 73.1941 - if (!apic_id_map) 73.1942 - apic_id_map = phys_cpu_present_map; 73.1943 + if (physids_empty(apic_id_map)) 73.1944 + apic_id_map = ioapic_phys_id_map(phys_cpu_present_map); 73.1945 73.1946 spin_lock_irqsave(&ioapic_lock, flags); 73.1947 - *(int *)®_00 = io_apic_read(ioapic, 0); 73.1948 + reg_00.raw = io_apic_read(ioapic, 0); 73.1949 spin_unlock_irqrestore(&ioapic_lock, flags); 73.1950 73.1951 - if (apic_id >= IO_APIC_MAX_ID) { 73.1952 + if (apic_id >= get_physical_broadcast()) { 73.1953 printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying " 73.1954 - "%d\n", ioapic, apic_id, reg_00.ID); 73.1955 - apic_id = reg_00.ID; 73.1956 - } 73.1957 - 73.1958 - /* XAPICs do not need unique IDs */ 73.1959 - if (clustered_apic_mode == CLUSTERED_APIC_XAPIC){ 73.1960 - printk(KERN_INFO "IOAPIC[%d]: Assigned apic_id %d\n", 73.1961 - ioapic, apic_id); 73.1962 - return apic_id; 73.1963 + "%d\n", ioapic, apic_id, reg_00.bits.ID); 73.1964 + apic_id = reg_00.bits.ID; 73.1965 } 73.1966 73.1967 /* 73.1968 * Every APIC in a system must have a unique ID or we get lots of nice 73.1969 * 'stuck on smp_invalidate_needed IPI wait' messages. 73.1970 */ 73.1971 - if (apic_id_map & (1 << apic_id)) { 73.1972 + if (check_apicid_used(apic_id_map, apic_id)) { 73.1973 73.1974 - for (i = 0; i < IO_APIC_MAX_ID; i++) { 73.1975 - if (!(apic_id_map & (1 << i))) 73.1976 + for (i = 0; i < get_physical_broadcast(); i++) { 73.1977 + if (!check_apicid_used(apic_id_map, i)) 73.1978 break; 73.1979 } 73.1980 73.1981 - if (i == IO_APIC_MAX_ID) 73.1982 + if (i == get_physical_broadcast()) 73.1983 panic("Max apic_id exceeded!\n"); 73.1984 73.1985 printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, " 73.1986 @@ -1732,22 +2132,24 @@ int __init io_apic_get_unique_id (int io 73.1987 apic_id = i; 73.1988 } 73.1989 73.1990 - apic_id_map |= (1 << apic_id); 73.1991 + tmp = apicid_to_cpu_present(apic_id); 73.1992 + physids_or(apic_id_map, apic_id_map, tmp); 73.1993 73.1994 - if (reg_00.ID != apic_id) { 73.1995 - reg_00.ID = apic_id; 73.1996 + if (reg_00.bits.ID != apic_id) { 73.1997 + reg_00.bits.ID = apic_id; 73.1998 73.1999 spin_lock_irqsave(&ioapic_lock, flags); 73.2000 - io_apic_write(ioapic, 0, *(int *)®_00); 73.2001 - *(int *)®_00 = io_apic_read(ioapic, 0); 73.2002 + io_apic_write(ioapic, 0, reg_00.raw); 73.2003 + reg_00.raw = io_apic_read(ioapic, 0); 73.2004 spin_unlock_irqrestore(&ioapic_lock, flags); 73.2005 73.2006 /* Sanity check */ 73.2007 - if (reg_00.ID != apic_id) 73.2008 + if (reg_00.bits.ID != apic_id) 73.2009 panic("IOAPIC[%d]: Unable change apic_id!\n", ioapic); 73.2010 } 73.2011 73.2012 - printk(KERN_INFO "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id); 73.2013 + apic_printk(APIC_VERBOSE, KERN_INFO 73.2014 + "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id); 73.2015 73.2016 return apic_id; 73.2017 } 73.2018 @@ -1755,27 +2157,27 @@ int __init io_apic_get_unique_id (int io 73.2019 73.2020 int __init io_apic_get_version (int ioapic) 73.2021 { 73.2022 - struct IO_APIC_reg_01 reg_01; 73.2023 + union IO_APIC_reg_01 reg_01; 73.2024 unsigned long flags; 73.2025 73.2026 spin_lock_irqsave(&ioapic_lock, flags); 73.2027 - *(int *)®_01 = io_apic_read(ioapic, 1); 73.2028 + reg_01.raw = io_apic_read(ioapic, 1); 73.2029 spin_unlock_irqrestore(&ioapic_lock, flags); 73.2030 73.2031 - return reg_01.version; 73.2032 + return reg_01.bits.version; 73.2033 } 73.2034 73.2035 73.2036 int __init io_apic_get_redir_entries (int ioapic) 73.2037 { 73.2038 - struct IO_APIC_reg_01 reg_01; 73.2039 + union IO_APIC_reg_01 reg_01; 73.2040 unsigned long flags; 73.2041 73.2042 spin_lock_irqsave(&ioapic_lock, flags); 73.2043 - *(int *)®_01 = io_apic_read(ioapic, 1); 73.2044 + reg_01.raw = io_apic_read(ioapic, 1); 73.2045 spin_unlock_irqrestore(&ioapic_lock, flags); 73.2046 73.2047 - return reg_01.entries; 73.2048 + return reg_01.bits.entries; 73.2049 } 73.2050 73.2051 73.2052 @@ -1785,7 +2187,7 @@ int io_apic_set_pci_routing (int ioapic, 73.2053 unsigned long flags; 73.2054 73.2055 if (!IO_APIC_IRQ(irq)) { 73.2056 - printk(KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0/n", 73.2057 + printk(KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", 73.2058 ioapic); 73.2059 return -EINVAL; 73.2060 } 73.2061 @@ -1798,12 +2200,12 @@ int io_apic_set_pci_routing (int ioapic, 73.2062 73.2063 memset(&entry,0,sizeof(entry)); 73.2064 73.2065 - entry.delivery_mode = dest_LowestPrio; 73.2066 - entry.dest_mode = INT_DELIVERY_MODE; 73.2067 - entry.dest.logical.logical_dest = target_cpus(); 73.2068 - entry.mask = 1; /* Disabled (masked) */ 73.2069 + entry.delivery_mode = INT_DELIVERY_MODE; 73.2070 + entry.dest_mode = INT_DEST_MODE; 73.2071 + entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); 73.2072 entry.trigger = edge_level; 73.2073 entry.polarity = active_high_low; 73.2074 + entry.mask = 1; 73.2075 73.2076 /* 73.2077 * IRQs < 16 are already in the irq_2_pin[] map 73.2078 @@ -1813,17 +2215,12 @@ int io_apic_set_pci_routing (int ioapic, 73.2079 73.2080 entry.vector = assign_irq_vector(irq); 73.2081 73.2082 - printk(KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry (%d-%d -> 0x%x -> " 73.2083 - "IRQ %d Mode:%i Active:%i)\n", ioapic, 73.2084 - mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq, edge_level, active_high_low); 73.2085 + apic_printk(APIC_DEBUG, KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry " 73.2086 + "(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", ioapic, 73.2087 + mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq, 73.2088 + edge_level, active_high_low); 73.2089 73.2090 - if (edge_level) { 73.2091 - irq_desc[irq].handler = &ioapic_level_irq_type; 73.2092 - } else { 73.2093 - irq_desc[irq].handler = &ioapic_edge_irq_type; 73.2094 - } 73.2095 - 73.2096 - set_intr_gate(entry.vector, interrupt[irq]); 73.2097 + ioapic_register_intr(irq, entry.vector, edge_level); 73.2098 73.2099 if (!ioapic && (irq < 16)) 73.2100 disable_8259A_irq(irq); 73.2101 @@ -1838,114 +2235,82 @@ int io_apic_set_pci_routing (int ioapic, 73.2102 73.2103 #endif /*CONFIG_ACPI_BOOT*/ 73.2104 73.2105 -/* opt_leveltrigger, opt_edgetrigger: Force an IO-APIC-routed IRQ to be */ 73.2106 -/* level- or edge-triggered. */ 73.2107 -/* Example: 'leveltrigger=4,5,6,20 edgetrigger=21'. */ 73.2108 -static char opt_leveltrigger[30] = "", opt_edgetrigger[30] = ""; 73.2109 -string_param("leveltrigger", opt_leveltrigger); 73.2110 -string_param("edgetrigger", opt_edgetrigger); 73.2111 73.2112 -static int __init ioapic_trigger_setup(void) 73.2113 +int ioapic_guest_read(int apicid, int address, u32 *pval) 73.2114 { 73.2115 - char *p; 73.2116 - irq_desc_t *desc; 73.2117 - long irq; 73.2118 - 73.2119 - p = opt_leveltrigger; 73.2120 - while ( *p != '\0' ) 73.2121 - { 73.2122 - irq = simple_strtol(p, &p, 10); 73.2123 - if ( (irq <= 0) || (irq >= NR_IRQS) ) 73.2124 - { 73.2125 - printk("IRQ '%ld' out of range in level-trigger list '%s'\n", 73.2126 - irq, opt_leveltrigger); 73.2127 - break; 73.2128 - } 73.2129 - 73.2130 - printk("Forcing IRQ %ld to level-trigger: ", irq); 73.2131 - 73.2132 - desc = &irq_desc[irq]; 73.2133 - spin_lock_irq(&desc->lock); 73.2134 + u32 val; 73.2135 + int apicenum; 73.2136 + union IO_APIC_reg_00 reg_00; 73.2137 + unsigned long flags; 73.2138 73.2139 - if ( desc->handler == &ioapic_level_irq_type ) 73.2140 - { 73.2141 - printk("already level-triggered (no force applied).\n"); 73.2142 - } 73.2143 - else if ( desc->handler != &ioapic_edge_irq_type ) 73.2144 - { 73.2145 - printk("cannot force (can only force IO-APIC-edge IRQs).\n"); 73.2146 - } 73.2147 - else 73.2148 - { 73.2149 - desc->handler = &ioapic_level_irq_type; 73.2150 - __mask_IO_APIC_irq(irq); 73.2151 - __level_IO_APIC_irq(irq); 73.2152 - printk("done.\n"); 73.2153 - } 73.2154 + if ( (apicid >= NR_IOAPIC_BIOSIDS) || 73.2155 + ((apicenum = ioapic_biosid_to_apic_enum[apicid]) >= nr_ioapics) ) 73.2156 + return -EINVAL; 73.2157 73.2158 - spin_unlock_irq(&desc->lock); 73.2159 - 73.2160 - if ( *p == '\0' ) 73.2161 - break; 73.2162 + spin_lock_irqsave(&ioapic_lock, flags); 73.2163 + val = io_apic_read(apicenum, address); 73.2164 + spin_unlock_irqrestore(&ioapic_lock, flags); 73.2165 73.2166 - if ( *p != ',' ) 73.2167 - { 73.2168 - printk("Unexpected character '%c' in level-trigger list '%s'\n", 73.2169 - *p, opt_leveltrigger); 73.2170 - break; 73.2171 - } 73.2172 - 73.2173 - p++; 73.2174 + /* Rewrite APIC ID to what the BIOS originally specified. */ 73.2175 + if ( address == 0 ) 73.2176 + { 73.2177 + reg_00.raw = val; 73.2178 + reg_00.bits.ID = apicid; 73.2179 + val = reg_00.raw; 73.2180 } 73.2181 73.2182 - p = opt_edgetrigger; 73.2183 - while ( *p != '\0' ) 73.2184 - { 73.2185 - irq = simple_strtol(p, &p, 10); 73.2186 - if ( (irq <= 0) || (irq >= NR_IRQS) ) 73.2187 - { 73.2188 - printk("IRQ '%ld' out of range in edge-trigger list '%s'\n", 73.2189 - irq, opt_edgetrigger); 73.2190 - break; 73.2191 - } 73.2192 + *pval = val; 73.2193 + return 0; 73.2194 +} 73.2195 + 73.2196 +int ioapic_guest_write(int apicid, int address, u32 val) 73.2197 +{ 73.2198 + int apicenum, pin, irq; 73.2199 + struct IO_APIC_route_entry rte = { 0 }; 73.2200 + struct irq_pin_list *entry; 73.2201 + unsigned long flags; 73.2202 + 73.2203 + if ( (apicid >= NR_IOAPIC_BIOSIDS) || 73.2204 + ((apicenum = ioapic_biosid_to_apic_enum[apicid]) >= nr_ioapics) ) 73.2205 + return -EINVAL; 73.2206 73.2207 - printk("Forcing IRQ %ld to edge-trigger: ", irq); 73.2208 + /* Only write to the first half of a route entry. */ 73.2209 + if ( (address < 0x10) || (address & 1) ) 73.2210 + return 0; 73.2211 + 73.2212 + pin = (address - 0x10) >> 1; 73.2213 73.2214 - desc = &irq_desc[irq]; 73.2215 - spin_lock_irq(&desc->lock); 73.2216 + rte.dest.logical.logical_dest = target_cpus(); 73.2217 + *(int *)&rte = val; 73.2218 73.2219 - if ( desc->handler == &ioapic_edge_irq_type ) 73.2220 - { 73.2221 - printk("already edge-triggered (no force applied).\n"); 73.2222 - } 73.2223 - else if ( desc->handler != &ioapic_level_irq_type ) 73.2224 + if ( rte.vector >= FIRST_DEVICE_VECTOR ) 73.2225 + { 73.2226 + /* Is there a valid irq mapped to this vector? */ 73.2227 + irq = vector_irq[rte.vector]; 73.2228 + if ( !IO_APIC_IRQ(irq) ) 73.2229 + return 0; 73.2230 + 73.2231 + /* Set the correct irq-handling type. */ 73.2232 + irq_desc[irq].handler = rte.trigger ? 73.2233 + &ioapic_level_type: &ioapic_edge_type; 73.2234 + 73.2235 + /* Record the pin<->irq mapping. */ 73.2236 + for ( entry = &irq_2_pin[irq]; ; entry = &irq_2_pin[entry->next] ) 73.2237 { 73.2238 - printk("cannot force (can only force IO-APIC-level IRQs).\n"); 73.2239 + if ( (entry->apic == apicenum) && (entry->pin == pin) ) 73.2240 + break; 73.2241 + if ( !entry->next ) 73.2242 + { 73.2243 + add_pin_to_irq(irq, apicenum, pin); 73.2244 + break; 73.2245 + } 73.2246 } 73.2247 - else 73.2248 - { 73.2249 - desc->handler = &ioapic_edge_irq_type; 73.2250 - __edge_IO_APIC_irq(irq); 73.2251 - desc->status |= IRQ_PENDING; /* may have lost a masked edge */ 73.2252 - printk("done.\n"); 73.2253 - } 73.2254 - 73.2255 - spin_unlock_irq(&desc->lock); 73.2256 + } 73.2257 73.2258 - if ( *p == '\0' ) 73.2259 - break; 73.2260 - 73.2261 - if ( *p != ',' ) 73.2262 - { 73.2263 - printk("Unexpected character '%c' in edge-trigger list '%s'\n", 73.2264 - *p, opt_edgetrigger); 73.2265 - break; 73.2266 - } 73.2267 - 73.2268 - p++; 73.2269 - } 73.2270 + spin_lock_irqsave(&ioapic_lock, flags); 73.2271 + io_apic_write(apicenum, 0x10 + 2 * pin, *(((int *)&rte) + 0)); 73.2272 + io_apic_write(apicenum, 0x11 + 2 * pin, *(((int *)&rte) + 1)); 73.2273 + spin_unlock_irqrestore(&ioapic_lock, flags); 73.2274 73.2275 return 0; 73.2276 } 73.2277 - 73.2278 -__initcall(ioapic_trigger_setup);
74.1 --- a/xen/arch/x86/irq.c Tue May 03 14:42:00 2005 +0000 74.2 +++ b/xen/arch/x86/irq.c Tue May 10 08:49:59 2005 +0000 74.3 @@ -42,7 +42,6 @@ struct hw_interrupt_type no_irq_type = { 74.4 }; 74.5 74.6 atomic_t irq_err_count; 74.7 -atomic_t irq_mis_count; 74.8 74.9 inline void disable_irq_nosync(unsigned int irq) 74.10 {
75.1 --- a/xen/arch/x86/microcode.c Tue May 03 14:42:00 2005 +0000 75.2 +++ b/xen/arch/x86/microcode.c Tue May 10 08:49:59 2005 +0000 75.3 @@ -87,13 +87,6 @@ 75.4 #define vmalloc(_s) xmalloc_bytes(_s) 75.5 #define vfree(_p) xfree(_p) 75.6 #define num_online_cpus() smp_num_cpus 75.7 -static inline int on_each_cpu( 75.8 - void (*func) (void *info), void *info, int retry, int wait) 75.9 -{ 75.10 - int ret = smp_call_function(func, info, retry, wait); 75.11 - func(info); 75.12 - return ret; 75.13 -} 75.14 75.15 #if 0 75.16 MODULE_DESCRIPTION("Intel CPU (IA-32) Microcode Update Driver");
76.1 --- a/xen/arch/x86/mm.c Tue May 03 14:42:00 2005 +0000 76.2 +++ b/xen/arch/x86/mm.c Tue May 10 08:49:59 2005 +0000 76.3 @@ -316,7 +316,7 @@ int map_ldt_shadow_page(unsigned int off 76.4 76.5 res = get_page_and_type(&frame_table[gmfn], d, PGT_ldt_page); 76.6 76.7 - if ( !res && unlikely(shadow_mode_enabled(d)) ) 76.8 + if ( !res && unlikely(shadow_mode_refcounts(d)) ) 76.9 { 76.10 shadow_lock(d); 76.11 shadow_remove_all_write_access(d, gpfn, gmfn); 76.12 @@ -392,7 +392,7 @@ get_linear_pagetable( 76.13 struct pfn_info *page; 76.14 unsigned long pfn; 76.15 76.16 - ASSERT( !shadow_mode_enabled(d) ); 76.17 + ASSERT( !shadow_mode_refcounts(d) ); 76.18 76.19 if ( (root_get_flags(re) & _PAGE_RW) ) 76.20 { 76.21 @@ -482,7 +482,7 @@ get_page_from_l2e( 76.22 { 76.23 int rc; 76.24 76.25 - ASSERT(!shadow_mode_enabled(d)); 76.26 + ASSERT(!shadow_mode_refcounts(d)); 76.27 76.28 if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) ) 76.29 return 1; 76.30 @@ -512,6 +512,8 @@ static int 76.31 get_page_from_l3e( 76.32 l3_pgentry_t l3e, unsigned long pfn, struct domain *d) 76.33 { 76.34 + ASSERT( !shadow_mode_refcounts(d) ); 76.35 + 76.36 if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) ) 76.37 return 1; 76.38 76.39 @@ -533,6 +535,8 @@ get_page_from_l4e( 76.40 { 76.41 int rc; 76.42 76.43 + ASSERT( !shadow_mode_refcounts(d) ); 76.44 + 76.45 if ( !(l4e_get_flags(l4e) & _PAGE_PRESENT) ) 76.46 return 1; 76.47 76.48 @@ -641,7 +645,7 @@ static int alloc_l1_table(struct pfn_inf 76.49 l1_pgentry_t *pl1e; 76.50 int i; 76.51 76.52 - ASSERT(!shadow_mode_enabled(d)); 76.53 + ASSERT(!shadow_mode_refcounts(d)); 76.54 76.55 pl1e = map_domain_mem(pfn << PAGE_SHIFT); 76.56 76.57 @@ -670,10 +674,12 @@ static int alloc_l2_table(struct pfn_inf 76.58 l2_pgentry_t *pl2e; 76.59 int i; 76.60 76.61 + // See the code in shadow_promote() to understand why this is here... 76.62 if ( (PGT_base_page_table == PGT_l2_page_table) && 76.63 - shadow_mode_enabled(d) ) 76.64 + unlikely(shadow_mode_refcounts(d)) ) 76.65 return 1; 76.66 - ASSERT( !shadow_mode_enabled(d) ); 76.67 + 76.68 + ASSERT( !shadow_mode_refcounts(d) ); 76.69 76.70 pl2e = map_domain_mem(pfn << PAGE_SHIFT); 76.71 76.72 @@ -716,7 +722,7 @@ static int alloc_l3_table(struct pfn_inf 76.73 l3_pgentry_t *pl3e = page_to_virt(page); 76.74 int i; 76.75 76.76 - ASSERT( !shadow_mode_enabled(d) ); 76.77 + ASSERT( !shadow_mode_refcounts(d) ); 76.78 76.79 for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ ) 76.80 if ( is_guest_l3_slot(i) && 76.81 @@ -741,10 +747,12 @@ static int alloc_l4_table(struct pfn_inf 76.82 l4_pgentry_t *pl4e = page_to_virt(page); 76.83 int i; 76.84 76.85 + // See the code in shadow_promote() to understand why this is here... 76.86 if ( (PGT_base_page_table == PGT_l4_page_table) && 76.87 - shadow_mode_enabled(d) ) 76.88 + shadow_mode_refcounts(d) ) 76.89 return 1; 76.90 - ASSERT( !shadow_mode_enabled(d) ); 76.91 + 76.92 + ASSERT( !shadow_mode_refcounts(d) ); 76.93 76.94 for ( i = 0; i < L4_PAGETABLE_ENTRIES; i++ ) 76.95 if ( is_guest_l4_slot(i) && 76.96 @@ -861,11 +869,12 @@ static int mod_l1_entry(l1_pgentry_t *pl 76.97 l1_pgentry_t ol1e; 76.98 struct domain *d = current->domain; 76.99 76.100 - ASSERT( !shadow_mode_enabled(d) ); 76.101 - 76.102 if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) ) 76.103 return 0; 76.104 76.105 + if ( unlikely(shadow_mode_refcounts(d)) ) 76.106 + return update_l1e(pl1e, ol1e, nl1e); 76.107 + 76.108 if ( l1e_get_flags(nl1e) & _PAGE_PRESENT ) 76.109 { 76.110 if ( unlikely(l1e_get_flags(nl1e) & L1_DISALLOW_MASK) ) 76.111 @@ -893,7 +902,7 @@ static int mod_l1_entry(l1_pgentry_t *pl 76.112 if ( unlikely(!update_l1e(pl1e, ol1e, nl1e)) ) 76.113 return 0; 76.114 } 76.115 - 76.116 + 76.117 put_page_from_l1e(ol1e, d); 76.118 return 1; 76.119 } 76.120 @@ -1095,8 +1104,19 @@ int alloc_page_type(struct pfn_info *pag 76.121 void free_page_type(struct pfn_info *page, unsigned int type) 76.122 { 76.123 struct domain *owner = page_get_owner(page); 76.124 - if ( likely(owner != NULL) && unlikely(shadow_mode_enabled(owner)) ) 76.125 - return; 76.126 + unsigned long gpfn; 76.127 + 76.128 + if ( owner != NULL ) 76.129 + { 76.130 + if ( unlikely(shadow_mode_refcounts(owner)) ) 76.131 + return; 76.132 + if ( unlikely(shadow_mode_enabled(owner)) ) 76.133 + { 76.134 + gpfn = __mfn_to_gpfn(owner, page_to_pfn(page)); 76.135 + ASSERT(VALID_M2P(gpfn)); 76.136 + remove_shadow(owner, gpfn, type); 76.137 + } 76.138 + } 76.139 76.140 switch ( type ) 76.141 { 76.142 @@ -1142,7 +1162,7 @@ void put_page_type(struct pfn_info *page 76.143 * See domain.c:relinquish_list(). 76.144 */ 76.145 ASSERT((x & PGT_validated) || 76.146 - test_bit(DF_DYING, &page_get_owner(page)->d_flags)); 76.147 + test_bit(DF_DYING, &page_get_owner(page)->flags)); 76.148 76.149 if ( unlikely((nx & PGT_count_mask) == 0) ) 76.150 { 76.151 @@ -1287,7 +1307,7 @@ int new_guest_cr3(unsigned long mfn) 76.152 int okay; 76.153 unsigned long old_base_mfn; 76.154 76.155 - if ( shadow_mode_enabled(d) ) 76.156 + if ( shadow_mode_refcounts(d) ) 76.157 okay = get_page_from_pagenr(mfn, d); 76.158 else 76.159 okay = get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d); 76.160 @@ -1296,24 +1316,24 @@ int new_guest_cr3(unsigned long mfn) 76.161 { 76.162 invalidate_shadow_ldt(ed); 76.163 76.164 - old_base_mfn = pagetable_val(ed->arch.guest_table) >> PAGE_SHIFT; 76.165 + old_base_mfn = pagetable_get_pfn(ed->arch.guest_table); 76.166 ed->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT); 76.167 update_pagetables(ed); /* update shadow_table and monitor_table */ 76.168 76.169 write_ptbase(ed); 76.170 76.171 - if ( shadow_mode_enabled(d) ) 76.172 + if ( shadow_mode_refcounts(d) ) 76.173 put_page(&frame_table[old_base_mfn]); 76.174 else 76.175 put_page_and_type(&frame_table[old_base_mfn]); 76.176 76.177 - /* CR3 holds its own ref to its shadow. */ 76.178 + /* CR3 also holds a ref to its shadow... */ 76.179 if ( shadow_mode_enabled(d) ) 76.180 { 76.181 if ( ed->arch.monitor_shadow_ref ) 76.182 put_shadow_ref(ed->arch.monitor_shadow_ref); 76.183 ed->arch.monitor_shadow_ref = 76.184 - pagetable_val(ed->arch.monitor_table) >> PAGE_SHIFT; 76.185 + pagetable_get_pfn(ed->arch.monitor_table); 76.186 ASSERT(!page_get_owner(&frame_table[ed->arch.monitor_shadow_ref])); 76.187 get_shadow_ref(ed->arch.monitor_shadow_ref); 76.188 } 76.189 @@ -1486,7 +1506,7 @@ int do_mmuext_op( 76.190 type = PGT_l1_page_table | PGT_va_mutable; 76.191 76.192 pin_page: 76.193 - if ( shadow_mode_enabled(FOREIGNDOM) ) 76.194 + if ( shadow_mode_refcounts(FOREIGNDOM) ) 76.195 type = PGT_writable_page; 76.196 76.197 okay = get_page_and_type_from_pagenr(op.mfn, type, FOREIGNDOM); 76.198 @@ -1557,7 +1577,7 @@ int do_mmuext_op( 76.199 else 76.200 { 76.201 unsigned long old_mfn = 76.202 - pagetable_val(ed->arch.guest_table_user) >> PAGE_SHIFT; 76.203 + pagetable_get_pfn(ed->arch.guest_table_user); 76.204 ed->arch.guest_table_user = mk_pagetable(op.mfn << PAGE_SHIFT); 76.205 if ( old_mfn != 0 ) 76.206 put_page_and_type(&frame_table[old_mfn]); 76.207 @@ -1691,13 +1711,13 @@ int do_mmuext_op( 76.208 * it is dying. 76.209 */ 76.210 ASSERT(e->tot_pages <= e->max_pages); 76.211 - if ( unlikely(test_bit(DF_DYING, &e->d_flags)) || 76.212 + if ( unlikely(test_bit(DF_DYING, &e->flags)) || 76.213 unlikely(e->tot_pages == e->max_pages) || 76.214 unlikely(IS_XEN_HEAP_FRAME(page)) )