ia64/linux-2.6.18-xen.hg

changeset 26:a533be77c572

Imported linux-2.6-xen-sparse from xen-unstable.hg 15200:bd3d6b4c52ec
author Ian Campbell <ian.campbell@xensource.com>
date Mon Jun 04 10:05:28 2007 +0100 (2007-06-04)
parents 42f6970e18c9
children 28b48e63eba6
files arch/i386/Kconfig arch/i386/Kconfig.cpu arch/i386/Kconfig.debug arch/i386/Makefile arch/i386/boot-xen/Makefile arch/i386/kernel/Makefile arch/i386/kernel/acpi/Makefile arch/i386/kernel/acpi/boot-xen.c arch/i386/kernel/apic-xen.c arch/i386/kernel/asm-offsets.c arch/i386/kernel/cpu/Makefile arch/i386/kernel/cpu/common-xen.c arch/i386/kernel/cpu/mtrr/Makefile arch/i386/kernel/cpu/mtrr/main-xen.c arch/i386/kernel/crash.c arch/i386/kernel/early_printk-xen.c arch/i386/kernel/entry-xen.S arch/i386/kernel/fixup.c arch/i386/kernel/head-xen.S arch/i386/kernel/init_task-xen.c arch/i386/kernel/io_apic-xen.c arch/i386/kernel/ioport-xen.c arch/i386/kernel/irq-xen.c arch/i386/kernel/ldt-xen.c arch/i386/kernel/machine_kexec.c arch/i386/kernel/microcode-xen.c arch/i386/kernel/mpparse-xen.c arch/i386/kernel/pci-dma-xen.c arch/i386/kernel/process-xen.c arch/i386/kernel/quirks-xen.c arch/i386/kernel/setup-xen.c arch/i386/kernel/smp-xen.c arch/i386/kernel/swiotlb.c arch/i386/kernel/sysenter.c arch/i386/kernel/time-xen.c arch/i386/kernel/traps-xen.c arch/i386/kernel/vm86.c arch/i386/kernel/vsyscall-note-xen.S arch/i386/mach-xen/Makefile arch/i386/mach-xen/setup.c arch/i386/mm/Makefile arch/i386/mm/fault-xen.c arch/i386/mm/highmem-xen.c arch/i386/mm/hypervisor.c arch/i386/mm/init-xen.c arch/i386/mm/ioremap-xen.c arch/i386/mm/pgtable-xen.c arch/i386/oprofile/Makefile arch/i386/oprofile/xenoprof.c arch/i386/pci/Makefile arch/i386/pci/irq-xen.c arch/i386/pci/pcifront.c arch/i386/power/Makefile arch/ia64/Kconfig arch/ia64/Makefile arch/ia64/hp/common/sba_iommu.c arch/ia64/kernel/acpi.c arch/ia64/kernel/asm-offsets.c arch/ia64/kernel/entry.S arch/ia64/kernel/fsys.S arch/ia64/kernel/gate.S arch/ia64/kernel/gate.lds.S arch/ia64/kernel/head.S arch/ia64/kernel/iosapic.c arch/ia64/kernel/irq_ia64.c arch/ia64/kernel/pal.S arch/ia64/kernel/patch.c arch/ia64/kernel/perfmon.c arch/ia64/kernel/setup.c arch/ia64/kernel/time.c arch/ia64/mm/ioremap.c arch/ia64/oprofile/Makefile arch/ia64/oprofile/init.c arch/ia64/oprofile/oprofile_perfmon.h arch/ia64/oprofile/perfmon.c arch/ia64/oprofile/xenoprof.c arch/ia64/pci/pci.c arch/ia64/xen/Makefile arch/ia64/xen/hypercall.S arch/ia64/xen/hypervisor.c arch/ia64/xen/machvec.c arch/ia64/xen/mem.c arch/ia64/xen/swiotlb.c arch/ia64/xen/util.c arch/ia64/xen/xcom_hcall.c arch/ia64/xen/xcom_mini.c arch/ia64/xen/xcom_privcmd.c arch/ia64/xen/xen_dma.c arch/ia64/xen/xencomm.c arch/ia64/xen/xenentry.S arch/ia64/xen/xenhpski.c arch/ia64/xen/xenivt.S arch/ia64/xen/xenminstate.h arch/ia64/xen/xenpal.S arch/ia64/xen/xensetup.S arch/um/kernel/physmem.c arch/x86_64/Kconfig arch/x86_64/Makefile arch/x86_64/ia32/Makefile arch/x86_64/ia32/ia32entry-xen.S arch/x86_64/ia32/syscall32-xen.c arch/x86_64/ia32/syscall32_syscall-xen.S arch/x86_64/ia32/vsyscall-int80.S arch/x86_64/ia32/vsyscall-sigreturn.S arch/x86_64/kernel/Makefile arch/x86_64/kernel/acpi/Makefile arch/x86_64/kernel/apic-xen.c arch/x86_64/kernel/asm-offsets.c arch/x86_64/kernel/crash.c arch/x86_64/kernel/e820-xen.c arch/x86_64/kernel/early_printk-xen.c arch/x86_64/kernel/entry-xen.S arch/x86_64/kernel/genapic-xen.c arch/x86_64/kernel/genapic_xen.c arch/x86_64/kernel/head-xen.S arch/x86_64/kernel/head64-xen.c arch/x86_64/kernel/init_task.c arch/x86_64/kernel/io_apic-xen.c arch/x86_64/kernel/ioport-xen.c arch/x86_64/kernel/irq-xen.c arch/x86_64/kernel/ldt-xen.c arch/x86_64/kernel/machine_kexec.c arch/x86_64/kernel/mpparse-xen.c arch/x86_64/kernel/pci-swiotlb-xen.c arch/x86_64/kernel/process-xen.c arch/x86_64/kernel/setup-xen.c arch/x86_64/kernel/setup64-xen.c arch/x86_64/kernel/smp-xen.c arch/x86_64/kernel/traps-xen.c arch/x86_64/kernel/vsyscall-xen.c arch/x86_64/kernel/xen_entry.S arch/x86_64/mm/Makefile arch/x86_64/mm/fault-xen.c arch/x86_64/mm/init-xen.c arch/x86_64/mm/pageattr-xen.c arch/x86_64/oprofile/Makefile arch/x86_64/pci/Makefile drivers/Makefile drivers/acpi/Kconfig drivers/char/mem.c drivers/char/tpm/Kconfig drivers/char/tpm/Makefile drivers/char/tpm/tpm.h drivers/char/tpm/tpm_vtpm.c drivers/char/tpm/tpm_vtpm.h drivers/char/tpm/tpm_xen.c drivers/char/tty_io.c drivers/firmware/Kconfig drivers/pci/Kconfig drivers/serial/Kconfig drivers/video/console/Kconfig drivers/xen/Kconfig drivers/xen/Makefile drivers/xen/balloon/Makefile drivers/xen/balloon/balloon.c drivers/xen/balloon/common.h drivers/xen/balloon/sysfs.c drivers/xen/blkback/Makefile drivers/xen/blkback/blkback.c drivers/xen/blkback/common.h drivers/xen/blkback/interface.c drivers/xen/blkback/vbd.c drivers/xen/blkback/xenbus.c drivers/xen/blkfront/Makefile drivers/xen/blkfront/blkfront.c drivers/xen/blkfront/block.h drivers/xen/blkfront/vbd.c drivers/xen/blktap/Makefile drivers/xen/blktap/blktap.c drivers/xen/blktap/common.h drivers/xen/blktap/interface.c drivers/xen/blktap/xenbus.c drivers/xen/char/Makefile drivers/xen/char/mem.c drivers/xen/console/Makefile drivers/xen/console/console.c drivers/xen/console/xencons_ring.c drivers/xen/core/Makefile drivers/xen/core/cpu_hotplug.c drivers/xen/core/evtchn.c drivers/xen/core/features.c drivers/xen/core/gnttab.c drivers/xen/core/hypervisor_sysfs.c drivers/xen/core/machine_kexec.c drivers/xen/core/machine_reboot.c drivers/xen/core/reboot.c drivers/xen/core/smpboot.c drivers/xen/core/xen_proc.c drivers/xen/core/xen_sysfs.c drivers/xen/evtchn/Makefile drivers/xen/evtchn/evtchn.c drivers/xen/fbfront/Makefile drivers/xen/fbfront/xenfb.c drivers/xen/fbfront/xenkbd.c drivers/xen/gntdev/Makefile drivers/xen/gntdev/gntdev.c drivers/xen/netback/Makefile drivers/xen/netback/common.h drivers/xen/netback/interface.c drivers/xen/netback/loopback.c drivers/xen/netback/netback.c drivers/xen/netback/xenbus.c drivers/xen/netfront/Makefile drivers/xen/netfront/netfront.c drivers/xen/pciback/Makefile drivers/xen/pciback/conf_space.c drivers/xen/pciback/conf_space.h drivers/xen/pciback/conf_space_capability.c drivers/xen/pciback/conf_space_capability.h drivers/xen/pciback/conf_space_capability_pm.c drivers/xen/pciback/conf_space_capability_vpd.c drivers/xen/pciback/conf_space_header.c drivers/xen/pciback/conf_space_quirks.c drivers/xen/pciback/conf_space_quirks.h drivers/xen/pciback/passthrough.c drivers/xen/pciback/pci_stub.c drivers/xen/pciback/pciback.h drivers/xen/pciback/pciback_ops.c drivers/xen/pciback/slot.c drivers/xen/pciback/vpci.c drivers/xen/pciback/xenbus.c drivers/xen/pcifront/Makefile drivers/xen/pcifront/pci.c drivers/xen/pcifront/pci_op.c drivers/xen/pcifront/pcifront.h drivers/xen/pcifront/xenbus.c drivers/xen/privcmd/Makefile drivers/xen/privcmd/privcmd.c drivers/xen/tpmback/Makefile drivers/xen/tpmback/common.h drivers/xen/tpmback/interface.c drivers/xen/tpmback/tpmback.c drivers/xen/tpmback/xenbus.c drivers/xen/util.c drivers/xen/xenbus/Makefile drivers/xen/xenbus/xenbus_backend_client.c drivers/xen/xenbus/xenbus_client.c drivers/xen/xenbus/xenbus_comms.c drivers/xen/xenbus/xenbus_comms.h drivers/xen/xenbus/xenbus_dev.c drivers/xen/xenbus/xenbus_probe.c drivers/xen/xenbus/xenbus_probe.h drivers/xen/xenbus/xenbus_probe_backend.c drivers/xen/xenbus/xenbus_xs.c drivers/xen/xenoprof/xenoprofile.c fs/Kconfig include/asm-i386/apic.h include/asm-i386/kexec.h include/asm-i386/mach-xen/asm/agp.h include/asm-i386/mach-xen/asm/desc.h include/asm-i386/mach-xen/asm/dma-mapping.h include/asm-i386/mach-xen/asm/fixmap.h include/asm-i386/mach-xen/asm/floppy.h include/asm-i386/mach-xen/asm/highmem.h include/asm-i386/mach-xen/asm/hw_irq.h include/asm-i386/mach-xen/asm/hypercall.h include/asm-i386/mach-xen/asm/hypervisor.h include/asm-i386/mach-xen/asm/io.h include/asm-i386/mach-xen/asm/irqflags.h include/asm-i386/mach-xen/asm/maddr.h include/asm-i386/mach-xen/asm/mmu.h include/asm-i386/mach-xen/asm/mmu_context.h include/asm-i386/mach-xen/asm/page.h include/asm-i386/mach-xen/asm/param.h include/asm-i386/mach-xen/asm/pci.h include/asm-i386/mach-xen/asm/pgalloc.h include/asm-i386/mach-xen/asm/pgtable-2level-defs.h include/asm-i386/mach-xen/asm/pgtable-2level.h include/asm-i386/mach-xen/asm/pgtable-3level-defs.h include/asm-i386/mach-xen/asm/pgtable-3level.h include/asm-i386/mach-xen/asm/pgtable.h include/asm-i386/mach-xen/asm/processor.h include/asm-i386/mach-xen/asm/ptrace.h include/asm-i386/mach-xen/asm/scatterlist.h include/asm-i386/mach-xen/asm/segment.h include/asm-i386/mach-xen/asm/setup.h include/asm-i386/mach-xen/asm/smp.h include/asm-i386/mach-xen/asm/spinlock.h include/asm-i386/mach-xen/asm/swiotlb.h include/asm-i386/mach-xen/asm/synch_bitops.h include/asm-i386/mach-xen/asm/system.h include/asm-i386/mach-xen/asm/tlbflush.h include/asm-i386/mach-xen/asm/vga.h include/asm-i386/mach-xen/asm/xenoprof.h include/asm-i386/mach-xen/irq_vectors.h include/asm-i386/mach-xen/mach_traps.h include/asm-i386/mach-xen/setup_arch.h include/asm-ia64/agp.h include/asm-ia64/dma-mapping.h include/asm-ia64/gcc_intrin.h include/asm-ia64/hw_irq.h include/asm-ia64/hypercall.h include/asm-ia64/hypervisor.h include/asm-ia64/intel_intrin.h include/asm-ia64/io.h include/asm-ia64/iosapic.h include/asm-ia64/irq.h include/asm-ia64/machvec.h include/asm-ia64/machvec_xen.h include/asm-ia64/maddr.h include/asm-ia64/meminit.h include/asm-ia64/page.h include/asm-ia64/pal.h include/asm-ia64/pgalloc.h include/asm-ia64/privop.h include/asm-ia64/processor.h include/asm-ia64/sal.h include/asm-ia64/swiotlb.h include/asm-ia64/synch_bitops.h include/asm-ia64/system.h include/asm-ia64/uaccess.h include/asm-ia64/xen/privop.h include/asm-ia64/xen/xcom_hcall.h include/asm-ia64/xen/xencomm.h include/asm-ia64/xenoprof.h include/asm-um/page.h include/asm-x86_64/apic.h include/asm-x86_64/kexec.h include/asm-x86_64/mach-xen/asm/agp.h include/asm-x86_64/mach-xen/asm/arch_hooks.h include/asm-x86_64/mach-xen/asm/bootsetup.h include/asm-x86_64/mach-xen/asm/desc.h include/asm-x86_64/mach-xen/asm/dma-mapping.h include/asm-x86_64/mach-xen/asm/e820.h include/asm-x86_64/mach-xen/asm/fixmap.h include/asm-x86_64/mach-xen/asm/floppy.h include/asm-x86_64/mach-xen/asm/hw_irq.h include/asm-x86_64/mach-xen/asm/hypercall.h include/asm-x86_64/mach-xen/asm/hypervisor.h include/asm-x86_64/mach-xen/asm/io.h include/asm-x86_64/mach-xen/asm/irq.h include/asm-x86_64/mach-xen/asm/irqflags.h include/asm-x86_64/mach-xen/asm/maddr.h include/asm-x86_64/mach-xen/asm/mmu.h include/asm-x86_64/mach-xen/asm/mmu_context.h include/asm-x86_64/mach-xen/asm/msr.h include/asm-x86_64/mach-xen/asm/nmi.h include/asm-x86_64/mach-xen/asm/page.h include/asm-x86_64/mach-xen/asm/pci.h include/asm-x86_64/mach-xen/asm/pgalloc.h include/asm-x86_64/mach-xen/asm/pgtable.h include/asm-x86_64/mach-xen/asm/processor.h include/asm-x86_64/mach-xen/asm/ptrace.h include/asm-x86_64/mach-xen/asm/smp.h include/asm-x86_64/mach-xen/asm/synch_bitops.h include/asm-x86_64/mach-xen/asm/system.h include/asm-x86_64/mach-xen/asm/timer.h include/asm-x86_64/mach-xen/asm/tlbflush.h include/asm-x86_64/mach-xen/asm/vga.h include/asm-x86_64/mach-xen/asm/xenoprof.h include/asm-x86_64/mach-xen/asm/xor.h include/asm-x86_64/mach-xen/irq_vectors.h include/asm-x86_64/mach-xen/mach_time.h include/asm-x86_64/mach-xen/mach_timer.h include/asm-x86_64/mach-xen/setup_arch_post.h include/asm-x86_64/mach-xen/setup_arch_pre.h include/linux/gfp.h include/linux/highmem.h include/linux/interrupt.h include/linux/kexec.h include/linux/mm.h include/linux/page-flags.h include/linux/skbuff.h include/xen/balloon.h include/xen/blkif.h include/xen/cpu_hotplug.h include/xen/driver_util.h include/xen/evtchn.h include/xen/features.h include/xen/gnttab.h include/xen/hvm.h include/xen/hypercall.h include/xen/hypervisor_sysfs.h include/xen/interface/COPYING include/xen/interface/acm.h include/xen/interface/acm_ops.h include/xen/interface/arch-ia64.h include/xen/interface/arch-powerpc.h include/xen/interface/arch-x86/xen-x86_32.h include/xen/interface/arch-x86/xen-x86_64.h include/xen/interface/arch-x86/xen.h include/xen/interface/arch-x86_32.h include/xen/interface/arch-x86_64.h include/xen/interface/callback.h include/xen/interface/dom0_ops.h include/xen/interface/domctl.h include/xen/interface/elfnote.h include/xen/interface/elfstructs.h include/xen/interface/event_channel.h include/xen/interface/features.h include/xen/interface/foreign/Makefile include/xen/interface/foreign/mkchecker.py include/xen/interface/foreign/mkheader.py include/xen/interface/foreign/reference.size include/xen/interface/foreign/structs.py include/xen/interface/grant_table.h include/xen/interface/hvm/e820.h include/xen/interface/hvm/hvm_info_table.h include/xen/interface/hvm/hvm_op.h include/xen/interface/hvm/ioreq.h include/xen/interface/hvm/params.h include/xen/interface/hvm/save.h include/xen/interface/hvm/vmx_assist.h include/xen/interface/io/blkif.h include/xen/interface/io/console.h include/xen/interface/io/fbif.h include/xen/interface/io/kbdif.h include/xen/interface/io/netif.h include/xen/interface/io/pciif.h include/xen/interface/io/protocols.h include/xen/interface/io/ring.h include/xen/interface/io/tpmif.h include/xen/interface/io/xenbus.h include/xen/interface/io/xs_wire.h include/xen/interface/kexec.h include/xen/interface/libelf.h include/xen/interface/memory.h include/xen/interface/nmi.h include/xen/interface/physdev.h include/xen/interface/platform.h include/xen/interface/sched.h include/xen/interface/sysctl.h include/xen/interface/trace.h include/xen/interface/vcpu.h include/xen/interface/version.h include/xen/interface/xen-compat.h include/xen/interface/xen.h include/xen/interface/xencomm.h include/xen/interface/xenoprof.h include/xen/pcifront.h include/xen/public/evtchn.h include/xen/public/gntdev.h include/xen/public/privcmd.h include/xen/xen_proc.h include/xen/xenbus.h include/xen/xencons.h include/xen/xenoprof.h kernel/Kconfig.preempt kernel/fork.c kernel/irq/spurious.c kernel/kexec.c lib/Makefile mm/Kconfig mm/highmem.c mm/memory.c mm/mmap.c mm/page_alloc.c net/core/dev.c net/core/skbuff.c scripts/Makefile.xen
line diff
     1.1 --- a/arch/i386/Kconfig	Mon Jun 04 10:05:24 2007 +0100
     1.2 +++ b/arch/i386/Kconfig	Mon Jun 04 10:05:28 2007 +0100
     1.3 @@ -16,6 +16,7 @@ config X86_32
     1.4  
     1.5  config GENERIC_TIME
     1.6  	bool
     1.7 +	depends on !X86_XEN
     1.8  	default y
     1.9  
    1.10  config LOCKDEP_SUPPORT
    1.11 @@ -103,6 +104,15 @@ config X86_PC
    1.12  	help
    1.13  	  Choose this option if your computer is a standard PC or compatible.
    1.14  
    1.15 +config X86_XEN
    1.16 +	bool "Xen-compatible"
    1.17 +	select X86_UP_APIC if !SMP && XEN_PRIVILEGED_GUEST
    1.18 +	select X86_UP_IOAPIC if !SMP && XEN_PRIVILEGED_GUEST
    1.19 +	select SWIOTLB
    1.20 +	help
    1.21 +	  Choose this option if you plan to run this kernel on top of the
    1.22 +	  Xen Hypervisor.
    1.23 +
    1.24  config X86_ELAN
    1.25  	bool "AMD Elan"
    1.26  	help
    1.27 @@ -213,6 +223,7 @@ source "arch/i386/Kconfig.cpu"
    1.28  
    1.29  config HPET_TIMER
    1.30  	bool "HPET Timer Support"
    1.31 +	depends on !X86_XEN
    1.32  	help
    1.33  	  This enables the use of the HPET for the kernel's internal timer.
    1.34  	  HPET is the next generation timer replacing legacy 8254s.
    1.35 @@ -263,7 +274,7 @@ source "kernel/Kconfig.preempt"
    1.36  
    1.37  config X86_UP_APIC
    1.38  	bool "Local APIC support on uniprocessors"
    1.39 -	depends on !SMP && !(X86_VISWS || X86_VOYAGER)
    1.40 +	depends on !SMP && !(X86_VISWS || X86_VOYAGER || XEN_UNPRIVILEGED_GUEST)
    1.41  	help
    1.42  	  A local APIC (Advanced Programmable Interrupt Controller) is an
    1.43  	  integrated interrupt controller in the CPU. If you have a single-CPU
    1.44 @@ -288,12 +299,12 @@ config X86_UP_IOAPIC
    1.45  
    1.46  config X86_LOCAL_APIC
    1.47  	bool
    1.48 -	depends on X86_UP_APIC || ((X86_VISWS || SMP) && !X86_VOYAGER)
    1.49 +	depends on X86_UP_APIC || ((X86_VISWS || SMP) && !(X86_VOYAGER || XEN_UNPRIVILEGED_GUEST))
    1.50  	default y
    1.51  
    1.52  config X86_IO_APIC
    1.53  	bool
    1.54 -	depends on X86_UP_IOAPIC || (SMP && !(X86_VISWS || X86_VOYAGER))
    1.55 +	depends on X86_UP_IOAPIC || (SMP && !(X86_VISWS || X86_VOYAGER || XEN_UNPRIVILEGED_GUEST))
    1.56  	default y
    1.57  
    1.58  config X86_VISWS_APIC
    1.59 @@ -303,7 +314,7 @@ config X86_VISWS_APIC
    1.60  
    1.61  config X86_MCE
    1.62  	bool "Machine Check Exception"
    1.63 -	depends on !X86_VOYAGER
    1.64 +	depends on !(X86_VOYAGER || X86_XEN)
    1.65  	---help---
    1.66  	  Machine Check Exception support allows the processor to notify the
    1.67  	  kernel if it detects a problem (e.g. overheating, component failure).
    1.68 @@ -402,6 +413,7 @@ config X86_REBOOTFIXUPS
    1.69  
    1.70  config MICROCODE
    1.71  	tristate "/dev/cpu/microcode - Intel IA32 CPU microcode support"
    1.72 +	depends on !XEN_UNPRIVILEGED_GUEST
    1.73  	---help---
    1.74  	  If you say Y here and also to "/dev file system support" in the
    1.75  	  'File systems' section, you will be able to update the microcode on
    1.76 @@ -419,6 +431,7 @@ config MICROCODE
    1.77  
    1.78  config X86_MSR
    1.79  	tristate "/dev/cpu/*/msr - Model-specific register support"
    1.80 +	depends on !X86_XEN
    1.81  	help
    1.82  	  This device gives privileged processes access to the x86
    1.83  	  Model-Specific Registers (MSRs).  It is a character device with
    1.84 @@ -434,6 +447,10 @@ config X86_CPUID
    1.85  	  with major 203 and minors 0 to 31 for /dev/cpu/0/cpuid to
    1.86  	  /dev/cpu/31/cpuid.
    1.87  
    1.88 +config SWIOTLB
    1.89 +	bool
    1.90 +	default n
    1.91 +
    1.92  source "drivers/firmware/Kconfig"
    1.93  
    1.94  choice
    1.95 @@ -616,6 +633,7 @@ config HIGHPTE
    1.96  
    1.97  config MATH_EMULATION
    1.98  	bool "Math emulation"
    1.99 +	depends on !X86_XEN
   1.100  	---help---
   1.101  	  Linux can emulate a math coprocessor (used for floating point
   1.102  	  operations) if you don't have one. 486DX and Pentium processors have
   1.103 @@ -641,6 +659,8 @@ config MATH_EMULATION
   1.104  
   1.105  config MTRR
   1.106  	bool "MTRR (Memory Type Range Register) support"
   1.107 +	depends on !XEN_UNPRIVILEGED_GUEST
   1.108 +	default y if X86_XEN
   1.109  	---help---
   1.110  	  On Intel P6 family processors (Pentium Pro, Pentium II and later)
   1.111  	  the Memory Type Range Registers (MTRRs) may be used to control
   1.112 @@ -675,7 +695,7 @@ config MTRR
   1.113  
   1.114  config EFI
   1.115  	bool "Boot from EFI support"
   1.116 -	depends on ACPI
   1.117 +	depends on ACPI && !X86_XEN
   1.118  	default n
   1.119  	---help---
   1.120  	This enables the the kernel to boot on EFI platforms using
   1.121 @@ -693,7 +713,7 @@ config EFI
   1.122  
   1.123  config IRQBALANCE
   1.124   	bool "Enable kernel irq balancing"
   1.125 -	depends on SMP && X86_IO_APIC
   1.126 +	depends on SMP && X86_IO_APIC && !X86_XEN
   1.127  	default y
   1.128  	help
   1.129   	  The default yes will allow the kernel to do irq load balancing.
   1.130 @@ -741,7 +761,7 @@ source kernel/Kconfig.hz
   1.131  
   1.132  config KEXEC
   1.133  	bool "kexec system call (EXPERIMENTAL)"
   1.134 -	depends on EXPERIMENTAL
   1.135 +	depends on EXPERIMENTAL && !XEN_UNPRIVILEGED_GUEST
   1.136  	help
   1.137  	  kexec is a system call that implements the ability to shutdown your
   1.138  	  current kernel, and to start another kernel.  It is like a reboot
   1.139 @@ -793,6 +813,7 @@ config HOTPLUG_CPU
   1.140  
   1.141  config COMPAT_VDSO
   1.142  	bool "Compat VDSO support"
   1.143 +	depends on !X86_XEN
   1.144  	default y
   1.145  	help
   1.146  	  Map the VDSO to the predictable old-style address too.
   1.147 @@ -810,18 +831,20 @@ config ARCH_ENABLE_MEMORY_HOTPLUG
   1.148  	depends on HIGHMEM
   1.149  
   1.150  menu "Power management options (ACPI, APM)"
   1.151 -	depends on !X86_VOYAGER
   1.152 +	depends on !(X86_VOYAGER || XEN_UNPRIVILEGED_GUEST)
   1.153  
   1.154 +if !X86_XEN
   1.155  source kernel/power/Kconfig
   1.156 +endif
   1.157  
   1.158  source "drivers/acpi/Kconfig"
   1.159  
   1.160  menu "APM (Advanced Power Management) BIOS Support"
   1.161 -depends on PM && !X86_VISWS
   1.162 +depends on PM && !(X86_VISWS || X86_XEN)
   1.163  
   1.164  config APM
   1.165  	tristate "APM (Advanced Power Management) BIOS support"
   1.166 -	depends on PM
   1.167 +	depends on PM && PM_LEGACY
   1.168  	---help---
   1.169  	  APM is a BIOS specification for saving power using several different
   1.170  	  techniques. This is mostly useful for battery powered laptops with
   1.171 @@ -1006,6 +1029,7 @@ choice
   1.172  
   1.173  config PCI_GOBIOS
   1.174  	bool "BIOS"
   1.175 +	depends on !X86_XEN
   1.176  
   1.177  config PCI_GOMMCONFIG
   1.178  	bool "MMConfig"
   1.179 @@ -1013,6 +1037,13 @@ config PCI_GOMMCONFIG
   1.180  config PCI_GODIRECT
   1.181  	bool "Direct"
   1.182  
   1.183 +config PCI_GOXEN_FE
   1.184 +	bool "Xen PCI Frontend"
   1.185 +	depends on X86_XEN
   1.186 +	help
   1.187 +	  The PCI device frontend driver allows the kernel to import arbitrary
   1.188 +	  PCI devices from a PCI backend to support PCI driver domains.
   1.189 +
   1.190  config PCI_GOANY
   1.191  	bool "Any"
   1.192  
   1.193 @@ -1020,7 +1051,7 @@ endchoice
   1.194  
   1.195  config PCI_BIOS
   1.196  	bool
   1.197 -	depends on !X86_VISWS && PCI && (PCI_GOBIOS || PCI_GOANY)
   1.198 +	depends on !(X86_VISWS || X86_XEN) && PCI && (PCI_GOBIOS || PCI_GOANY)
   1.199  	default y
   1.200  
   1.201  config PCI_DIRECT
   1.202 @@ -1033,6 +1064,18 @@ config PCI_MMCONFIG
   1.203  	depends on PCI && ACPI && (PCI_GOMMCONFIG || PCI_GOANY)
   1.204  	default y
   1.205  
   1.206 +config XEN_PCIDEV_FRONTEND
   1.207 +	bool
   1.208 +	depends on PCI && X86_XEN && (PCI_GOXEN_FE || PCI_GOANY)
   1.209 +	default y
   1.210 +
   1.211 +config XEN_PCIDEV_FE_DEBUG
   1.212 +	bool "Xen PCI Frontend Debugging"
   1.213 +	depends on XEN_PCIDEV_FRONTEND
   1.214 +	default n
   1.215 +	help
   1.216 +	  Enables some debug statements within the PCI Frontend.
   1.217 +
   1.218  source "drivers/pci/pcie/Kconfig"
   1.219  
   1.220  source "drivers/pci/Kconfig"
   1.221 @@ -1043,7 +1086,7 @@ config ISA_DMA_API
   1.222  
   1.223  config ISA
   1.224  	bool "ISA support"
   1.225 -	depends on !(X86_VOYAGER || X86_VISWS)
   1.226 +	depends on !(X86_VOYAGER || X86_VISWS || X86_XEN)
   1.227  	help
   1.228  	  Find out whether you have ISA slots on your motherboard.  ISA is the
   1.229  	  name of a bus system, i.e. the way the CPU talks to the other stuff
   1.230 @@ -1070,7 +1113,7 @@ config EISA
   1.231  source "drivers/eisa/Kconfig"
   1.232  
   1.233  config MCA
   1.234 -	bool "MCA support" if !(X86_VISWS || X86_VOYAGER)
   1.235 +	bool "MCA support" if !(X86_VISWS || X86_VOYAGER || X86_XEN)
   1.236  	default y if X86_VOYAGER
   1.237  	help
   1.238  	  MicroChannel Architecture is found in some IBM PS/2 machines and
   1.239 @@ -1146,6 +1189,8 @@ source "security/Kconfig"
   1.240  
   1.241  source "crypto/Kconfig"
   1.242  
   1.243 +source "drivers/xen/Kconfig"
   1.244 +
   1.245  source "lib/Kconfig"
   1.246  
   1.247  #
   1.248 @@ -1171,7 +1216,7 @@ config X86_SMP
   1.249  
   1.250  config X86_HT
   1.251  	bool
   1.252 -	depends on SMP && !(X86_VISWS || X86_VOYAGER)
   1.253 +	depends on SMP && !(X86_VISWS || X86_VOYAGER || X86_XEN)
   1.254  	default y
   1.255  
   1.256  config X86_BIOS_REBOOT
   1.257 @@ -1184,6 +1229,16 @@ config X86_TRAMPOLINE
   1.258  	depends on X86_SMP || (X86_VOYAGER && SMP)
   1.259  	default y
   1.260  
   1.261 +config X86_NO_TSS
   1.262 +	bool
   1.263 +	depends on X86_XEN
   1.264 +	default y
   1.265 +
   1.266 +config X86_NO_IDT
   1.267 +	bool
   1.268 +	depends on X86_XEN
   1.269 +	default y
   1.270 +
   1.271  config KTIME_SCALAR
   1.272  	bool
   1.273  	default y
     2.1 --- a/arch/i386/Kconfig.cpu	Mon Jun 04 10:05:24 2007 +0100
     2.2 +++ b/arch/i386/Kconfig.cpu	Mon Jun 04 10:05:28 2007 +0100
     2.3 @@ -251,7 +251,7 @@ config X86_PPRO_FENCE
     2.4  
     2.5  config X86_F00F_BUG
     2.6  	bool
     2.7 -	depends on M586MMX || M586TSC || M586 || M486 || M386
     2.8 +	depends on (M586MMX || M586TSC || M586 || M486 || M386) && !X86_NO_IDT
     2.9  	default y
    2.10  
    2.11  config X86_WP_WORKS_OK
    2.12 @@ -311,5 +311,5 @@ config X86_OOSTORE
    2.13  
    2.14  config X86_TSC
    2.15  	bool
    2.16 -	depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MGEODEGX1 || MGEODE_LX) && !X86_NUMAQ
    2.17 +	depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MGEODEGX1 || MGEODE_LX) && !X86_NUMAQ && !X86_XEN
    2.18  	default y
     3.1 --- a/arch/i386/Kconfig.debug	Mon Jun 04 10:05:24 2007 +0100
     3.2 +++ b/arch/i386/Kconfig.debug	Mon Jun 04 10:05:28 2007 +0100
     3.3 @@ -79,6 +79,7 @@ config X86_MPPARSE
     3.4  config DOUBLEFAULT
     3.5  	default y
     3.6  	bool "Enable doublefault exception handler" if EMBEDDED
     3.7 +	depends on !X86_NO_TSS
     3.8  	help
     3.9            This option allows trapping of rare doublefault exceptions that
    3.10            would otherwise cause a system to silently reboot. Disabling this
     4.1 --- a/arch/i386/Makefile	Mon Jun 04 10:05:24 2007 +0100
     4.2 +++ b/arch/i386/Makefile	Mon Jun 04 10:05:28 2007 +0100
     4.3 @@ -48,6 +48,11 @@ CFLAGS				+= $(shell if [ $(call cc-vers
     4.4  
     4.5  CFLAGS += $(cflags-y)
     4.6  
     4.7 +cppflags-$(CONFIG_XEN) += \
     4.8 +	-D__XEN_INTERFACE_VERSION__=$(CONFIG_XEN_INTERFACE_VERSION)
     4.9 +
    4.10 +CPPFLAGS += $(cppflags-y)
    4.11 +
    4.12  # Default subarch .c files
    4.13  mcore-y  := mach-default
    4.14  
    4.15 @@ -71,6 +76,10 @@ mcore-$(CONFIG_X86_BIGSMP)	:= mach-defau
    4.16  mflags-$(CONFIG_X86_SUMMIT) := -Iinclude/asm-i386/mach-summit
    4.17  mcore-$(CONFIG_X86_SUMMIT)  := mach-default
    4.18  
    4.19 +# Xen subarch support
    4.20 +mflags-$(CONFIG_X86_XEN)	:= -Iinclude/asm-i386/mach-xen
    4.21 +mcore-$(CONFIG_X86_XEN)		:= mach-xen
    4.22 +
    4.23  # generic subarchitecture
    4.24  mflags-$(CONFIG_X86_GENERICARCH) := -Iinclude/asm-i386/mach-generic
    4.25  mcore-$(CONFIG_X86_GENERICARCH) := mach-default
    4.26 @@ -105,6 +114,19 @@ boot := arch/i386/boot
    4.27  PHONY += zImage bzImage compressed zlilo bzlilo \
    4.28           zdisk bzdisk fdimage fdimage144 fdimage288 isoimage install
    4.29  
    4.30 +ifdef CONFIG_XEN
    4.31 +CPPFLAGS := -Iinclude$(if $(KBUILD_SRC),2)/asm/mach-xen $(CPPFLAGS)
    4.32 +head-y := arch/i386/kernel/head-xen.o arch/i386/kernel/init_task-xen.o
    4.33 +boot := arch/i386/boot-xen
    4.34 +.PHONY: vmlinuz
    4.35 +all: vmlinuz
    4.36 +
    4.37 +vmlinuz: vmlinux
    4.38 +	$(Q)$(MAKE) $(build)=$(boot) $@
    4.39 +
    4.40 +install:
    4.41 +	$(Q)$(MAKE) $(build)=$(boot) $@
    4.42 +else
    4.43  all: bzImage
    4.44  
    4.45  # KBUILD_IMAGE specify target image being built
    4.46 @@ -127,6 +149,7 @@ fdimage fdimage144 fdimage288 isoimage: 
    4.47  
    4.48  install:
    4.49  	$(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) install
    4.50 +endif
    4.51  
    4.52  archclean:
    4.53  	$(Q)$(MAKE) $(clean)=arch/i386/boot
    4.54 @@ -145,3 +168,4 @@ endef
    4.55  CLEAN_FILES += arch/$(ARCH)/boot/fdimage \
    4.56  	       arch/$(ARCH)/boot/image.iso \
    4.57  	       arch/$(ARCH)/boot/mtools.conf
    4.58 +CLEAN_FILES += vmlinuz vmlinux-stripped
     5.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     5.2 +++ b/arch/i386/boot-xen/Makefile	Mon Jun 04 10:05:28 2007 +0100
     5.3 @@ -0,0 +1,19 @@
     5.4 +
     5.5 +OBJCOPYFLAGS := -g --strip-unneeded
     5.6 +
     5.7 +vmlinuz: vmlinux-stripped FORCE
     5.8 +	$(call if_changed,gzip)
     5.9 +
    5.10 +vmlinux-stripped: vmlinux FORCE
    5.11 +	$(call if_changed,objcopy)
    5.12 +
    5.13 +INSTALL_ROOT := $(patsubst %/boot,%,$(INSTALL_PATH))
    5.14 +
    5.15 +XINSTALL_NAME ?= $(KERNELRELEASE)
    5.16 +install:
    5.17 +	mkdir -p $(INSTALL_ROOT)/boot
    5.18 +	install -m0644 vmlinuz $(INSTALL_ROOT)/boot/vmlinuz-$(XINSTALL_NAME)$(INSTALL_SUFFIX)
    5.19 +	install -m0644 vmlinux $(INSTALL_ROOT)/boot/vmlinux-syms-$(XINSTALL_NAME)$(INSTALL_SUFFIX)
    5.20 +	install -m0664 .config $(INSTALL_ROOT)/boot/config-$(XINSTALL_NAME)$(INSTALL_SUFFIX)
    5.21 +	install -m0664 System.map $(INSTALL_ROOT)/boot/System.map-$(XINSTALL_NAME)$(INSTALL_SUFFIX)
    5.22 +	ln -f -s vmlinuz-$(XINSTALL_NAME)$(INSTALL_SUFFIX) $(INSTALL_ROOT)/boot/vmlinuz-$(VERSION).$(PATCHLEVEL)$(shell [ -e $(objtree)/localversion-xen ] && cat $(objtree)/localversion-xen)$(INSTALL_SUFFIX)
     6.1 --- a/arch/i386/kernel/Makefile	Mon Jun 04 10:05:24 2007 +0100
     6.2 +++ b/arch/i386/kernel/Makefile	Mon Jun 04 10:05:28 2007 +0100
     6.3 @@ -44,6 +44,12 @@ EXTRA_AFLAGS   := -traditional
     6.4  
     6.5  obj-$(CONFIG_SCx200)		+= scx200.o
     6.6  
     6.7 +ifdef CONFIG_XEN
     6.8 +vsyscall_note := vsyscall-note-xen.o
     6.9 +else
    6.10 +vsyscall_note := vsyscall-note.o
    6.11 +endif
    6.12 +
    6.13  # vsyscall.o contains the vsyscall DSO images as __initdata.
    6.14  # We must build both images before we can assemble it.
    6.15  # Note: kbuild does not track this dependency due to usage of .incbin
    6.16 @@ -65,7 +71,7 @@ SYSCFLAGS_vsyscall-int80.so	= $(vsyscall
    6.17  
    6.18  $(obj)/vsyscall-int80.so $(obj)/vsyscall-sysenter.so: \
    6.19  $(obj)/vsyscall-%.so: $(src)/vsyscall.lds \
    6.20 -		      $(obj)/vsyscall-%.o $(obj)/vsyscall-note.o FORCE
    6.21 +		      $(obj)/vsyscall-%.o $(obj)/$(vsyscall_note) FORCE
    6.22  	$(call if_changed,syscall)
    6.23  
    6.24  # We also create a special relocatable object that should mirror the symbol
    6.25 @@ -77,8 +83,20 @@ extra-y += vsyscall-syms.o
    6.26  
    6.27  SYSCFLAGS_vsyscall-syms.o = -r
    6.28  $(obj)/vsyscall-syms.o: $(src)/vsyscall.lds \
    6.29 -			$(obj)/vsyscall-sysenter.o $(obj)/vsyscall-note.o FORCE
    6.30 +			$(obj)/vsyscall-sysenter.o $(obj)/$(vsyscall_note) FORCE
    6.31  	$(call if_changed,syscall)
    6.32  
    6.33  k8-y                      += ../../x86_64/kernel/k8.o
    6.34  
    6.35 +ifdef CONFIG_XEN
    6.36 +include $(srctree)/scripts/Makefile.xen
    6.37 +
    6.38 +obj-y += fixup.o
    6.39 +microcode-$(subst m,y,$(CONFIG_MICROCODE)) := microcode-xen.o
    6.40 +n-obj-xen := i8259.o timers/ reboot.o smpboot.o trampoline.o
    6.41 +
    6.42 +obj-y := $(call filterxen, $(obj-y), $(n-obj-xen))
    6.43 +obj-y := $(call cherrypickxen, $(obj-y))
    6.44 +extra-y := $(call cherrypickxen, $(extra-y))
    6.45 +%/head-xen.o %/head-xen.s: EXTRA_AFLAGS :=
    6.46 +endif
     7.1 --- a/arch/i386/kernel/acpi/Makefile	Mon Jun 04 10:05:24 2007 +0100
     7.2 +++ b/arch/i386/kernel/acpi/Makefile	Mon Jun 04 10:05:28 2007 +0100
     7.3 @@ -6,3 +6,7 @@ ifneq ($(CONFIG_ACPI_PROCESSOR),)
     7.4  obj-y				+= cstate.o processor.o
     7.5  endif
     7.6  
     7.7 +ifdef CONFIG_XEN
     7.8 +include $(srctree)/scripts/Makefile.xen
     7.9 +obj-y := $(call cherrypickxen, $(obj-y), $(src))
    7.10 +endif
     8.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     8.2 +++ b/arch/i386/kernel/acpi/boot-xen.c	Mon Jun 04 10:05:28 2007 +0100
     8.3 @@ -0,0 +1,1168 @@
     8.4 +/*
     8.5 + *  boot.c - Architecture-Specific Low-Level ACPI Boot Support
     8.6 + *
     8.7 + *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
     8.8 + *  Copyright (C) 2001 Jun Nakajima <jun.nakajima@intel.com>
     8.9 + *
    8.10 + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    8.11 + *
    8.12 + *  This program is free software; you can redistribute it and/or modify
    8.13 + *  it under the terms of the GNU General Public License as published by
    8.14 + *  the Free Software Foundation; either version 2 of the License, or
    8.15 + *  (at your option) any later version.
    8.16 + *
    8.17 + *  This program is distributed in the hope that it will be useful,
    8.18 + *  but WITHOUT ANY WARRANTY; without even the implied warranty of
    8.19 + *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    8.20 + *  GNU General Public License for more details.
    8.21 + *
    8.22 + *  You should have received a copy of the GNU General Public License
    8.23 + *  along with this program; if not, write to the Free Software
    8.24 + *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
    8.25 + *
    8.26 + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    8.27 + */
    8.28 +
    8.29 +#include <linux/init.h>
    8.30 +#include <linux/acpi.h>
    8.31 +#include <linux/efi.h>
    8.32 +#include <linux/module.h>
    8.33 +#include <linux/dmi.h>
    8.34 +#include <linux/irq.h>
    8.35 +
    8.36 +#include <asm/pgtable.h>
    8.37 +#include <asm/io_apic.h>
    8.38 +#include <asm/apic.h>
    8.39 +#include <asm/io.h>
    8.40 +#include <asm/mpspec.h>
    8.41 +
    8.42 +#ifdef	CONFIG_X86_64
    8.43 +
    8.44 +extern void __init clustered_apic_check(void);
    8.45 +
    8.46 +extern int gsi_irq_sharing(int gsi);
    8.47 +#include <asm/proto.h>
    8.48 +
    8.49 +static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) { return 0; }
    8.50 +
    8.51 +
    8.52 +#else				/* X86 */
    8.53 +
    8.54 +#ifdef	CONFIG_X86_LOCAL_APIC
    8.55 +#include <mach_apic.h>
    8.56 +#include <mach_mpparse.h>
    8.57 +#endif				/* CONFIG_X86_LOCAL_APIC */
    8.58 +
    8.59 +static inline int gsi_irq_sharing(int gsi) { return gsi; }
    8.60 +
    8.61 +#endif				/* X86 */
    8.62 +
    8.63 +#define BAD_MADT_ENTRY(entry, end) (					    \
    8.64 +		(!entry) || (unsigned long)entry + sizeof(*entry) > end ||  \
    8.65 +		((acpi_table_entry_header *)entry)->length < sizeof(*entry))
    8.66 +
    8.67 +#define PREFIX			"ACPI: "
    8.68 +
    8.69 +int acpi_noirq __initdata;	/* skip ACPI IRQ initialization */
    8.70 +int acpi_pci_disabled __initdata;	/* skip ACPI PCI scan and IRQ initialization */
    8.71 +int acpi_ht __initdata = 1;	/* enable HT */
    8.72 +
    8.73 +int acpi_lapic;
    8.74 +int acpi_ioapic;
    8.75 +int acpi_strict;
    8.76 +EXPORT_SYMBOL(acpi_strict);
    8.77 +
    8.78 +acpi_interrupt_flags acpi_sci_flags __initdata;
    8.79 +int acpi_sci_override_gsi __initdata;
    8.80 +int acpi_skip_timer_override __initdata;
    8.81 +
    8.82 +#ifdef CONFIG_X86_LOCAL_APIC
    8.83 +static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
    8.84 +#endif
    8.85 +
    8.86 +#ifndef __HAVE_ARCH_CMPXCHG
    8.87 +#warning ACPI uses CMPXCHG, i486 and later hardware
    8.88 +#endif
    8.89 +
    8.90 +#define MAX_MADT_ENTRIES	256
    8.91 +u8 x86_acpiid_to_apicid[MAX_MADT_ENTRIES] =
    8.92 +    {[0 ... MAX_MADT_ENTRIES - 1] = 0xff };
    8.93 +EXPORT_SYMBOL(x86_acpiid_to_apicid);
    8.94 +
    8.95 +/* --------------------------------------------------------------------------
    8.96 +                              Boot-time Configuration
    8.97 +   -------------------------------------------------------------------------- */
    8.98 +
    8.99 +/*
   8.100 + * The default interrupt routing model is PIC (8259).  This gets
   8.101 + * overriden if IOAPICs are enumerated (below).
   8.102 + */
   8.103 +enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_PIC;
   8.104 +
   8.105 +#if defined(CONFIG_X86_64) && !defined(CONFIG_XEN)
   8.106 +
   8.107 +/* rely on all ACPI tables being in the direct mapping */
   8.108 +char *__acpi_map_table(unsigned long phys_addr, unsigned long size)
   8.109 +{
   8.110 +	if (!phys_addr || !size)
   8.111 +		return NULL;
   8.112 +
   8.113 +	if (phys_addr+size <= (end_pfn_map << PAGE_SHIFT) + PAGE_SIZE)
   8.114 +		return __va(phys_addr);
   8.115 +
   8.116 +	return NULL;
   8.117 +}
   8.118 +
   8.119 +#else
   8.120 +
   8.121 +/*
   8.122 + * Temporarily use the virtual area starting from FIX_IO_APIC_BASE_END,
   8.123 + * to map the target physical address. The problem is that set_fixmap()
   8.124 + * provides a single page, and it is possible that the page is not
   8.125 + * sufficient.
   8.126 + * By using this area, we can map up to MAX_IO_APICS pages temporarily,
   8.127 + * i.e. until the next __va_range() call.
   8.128 + *
   8.129 + * Important Safety Note:  The fixed I/O APIC page numbers are *subtracted*
   8.130 + * from the fixed base.  That's why we start at FIX_IO_APIC_BASE_END and
   8.131 + * count idx down while incrementing the phys address.
   8.132 + */
   8.133 +char *__acpi_map_table(unsigned long phys, unsigned long size)
   8.134 +{
   8.135 +	unsigned long base, offset, mapped_size;
   8.136 +	int idx;
   8.137 +
   8.138 +#ifndef CONFIG_XEN
   8.139 +	if (phys + size < 8 * 1024 * 1024)
   8.140 +		return __va(phys);
   8.141 +#endif
   8.142 +
   8.143 +	offset = phys & (PAGE_SIZE - 1);
   8.144 +	mapped_size = PAGE_SIZE - offset;
   8.145 +	set_fixmap(FIX_ACPI_END, phys);
   8.146 +	base = fix_to_virt(FIX_ACPI_END);
   8.147 +
   8.148 +	/*
   8.149 +	 * Most cases can be covered by the below.
   8.150 +	 */
   8.151 +	idx = FIX_ACPI_END;
   8.152 +	while (mapped_size < size) {
   8.153 +		if (--idx < FIX_ACPI_BEGIN)
   8.154 +			return NULL;	/* cannot handle this */
   8.155 +		phys += PAGE_SIZE;
   8.156 +		set_fixmap(idx, phys);
   8.157 +		mapped_size += PAGE_SIZE;
   8.158 +	}
   8.159 +
   8.160 +	return ((unsigned char *)base + offset);
   8.161 +}
   8.162 +#endif
   8.163 +
   8.164 +#ifdef CONFIG_PCI_MMCONFIG
   8.165 +/* The physical address of the MMCONFIG aperture.  Set from ACPI tables. */
   8.166 +struct acpi_table_mcfg_config *pci_mmcfg_config;
   8.167 +int pci_mmcfg_config_num;
   8.168 +
   8.169 +int __init acpi_parse_mcfg(unsigned long phys_addr, unsigned long size)
   8.170 +{
   8.171 +	struct acpi_table_mcfg *mcfg;
   8.172 +	unsigned long i;
   8.173 +	int config_size;
   8.174 +
   8.175 +	if (!phys_addr || !size)
   8.176 +		return -EINVAL;
   8.177 +
   8.178 +	mcfg = (struct acpi_table_mcfg *)__acpi_map_table(phys_addr, size);
   8.179 +	if (!mcfg) {
   8.180 +		printk(KERN_WARNING PREFIX "Unable to map MCFG\n");
   8.181 +		return -ENODEV;
   8.182 +	}
   8.183 +
   8.184 +	/* how many config structures do we have */
   8.185 +	pci_mmcfg_config_num = 0;
   8.186 +	i = size - sizeof(struct acpi_table_mcfg);
   8.187 +	while (i >= sizeof(struct acpi_table_mcfg_config)) {
   8.188 +		++pci_mmcfg_config_num;
   8.189 +		i -= sizeof(struct acpi_table_mcfg_config);
   8.190 +	};
   8.191 +	if (pci_mmcfg_config_num == 0) {
   8.192 +		printk(KERN_ERR PREFIX "MMCONFIG has no entries\n");
   8.193 +		return -ENODEV;
   8.194 +	}
   8.195 +
   8.196 +	config_size = pci_mmcfg_config_num * sizeof(*pci_mmcfg_config);
   8.197 +	pci_mmcfg_config = kmalloc(config_size, GFP_KERNEL);
   8.198 +	if (!pci_mmcfg_config) {
   8.199 +		printk(KERN_WARNING PREFIX
   8.200 +		       "No memory for MCFG config tables\n");
   8.201 +		return -ENOMEM;
   8.202 +	}
   8.203 +
   8.204 +	memcpy(pci_mmcfg_config, &mcfg->config, config_size);
   8.205 +	for (i = 0; i < pci_mmcfg_config_num; ++i) {
   8.206 +		if (mcfg->config[i].base_reserved) {
   8.207 +			printk(KERN_ERR PREFIX
   8.208 +			       "MMCONFIG not in low 4GB of memory\n");
   8.209 +			kfree(pci_mmcfg_config);
   8.210 +			pci_mmcfg_config_num = 0;
   8.211 +			return -ENODEV;
   8.212 +		}
   8.213 +	}
   8.214 +
   8.215 +	return 0;
   8.216 +}
   8.217 +#endif				/* CONFIG_PCI_MMCONFIG */
   8.218 +
   8.219 +#ifdef CONFIG_X86_LOCAL_APIC
   8.220 +static int __init acpi_parse_madt(unsigned long phys_addr, unsigned long size)
   8.221 +{
   8.222 +	struct acpi_table_madt *madt = NULL;
   8.223 +
   8.224 +	if (!phys_addr || !size || !cpu_has_apic)
   8.225 +		return -EINVAL;
   8.226 +
   8.227 +	madt = (struct acpi_table_madt *)__acpi_map_table(phys_addr, size);
   8.228 +	if (!madt) {
   8.229 +		printk(KERN_WARNING PREFIX "Unable to map MADT\n");
   8.230 +		return -ENODEV;
   8.231 +	}
   8.232 +
   8.233 +	if (madt->lapic_address) {
   8.234 +		acpi_lapic_addr = (u64) madt->lapic_address;
   8.235 +
   8.236 +		printk(KERN_DEBUG PREFIX "Local APIC address 0x%08x\n",
   8.237 +		       madt->lapic_address);
   8.238 +	}
   8.239 +
   8.240 +	acpi_madt_oem_check(madt->header.oem_id, madt->header.oem_table_id);
   8.241 +
   8.242 +	return 0;
   8.243 +}
   8.244 +
   8.245 +static int __init
   8.246 +acpi_parse_lapic(acpi_table_entry_header * header, const unsigned long end)
   8.247 +{
   8.248 +	struct acpi_table_lapic *processor = NULL;
   8.249 +
   8.250 +	processor = (struct acpi_table_lapic *)header;
   8.251 +
   8.252 +	if (BAD_MADT_ENTRY(processor, end))
   8.253 +		return -EINVAL;
   8.254 +
   8.255 +	acpi_table_print_madt_entry(header);
   8.256 +
   8.257 +	/* Record local apic id only when enabled */
   8.258 +	if (processor->flags.enabled)
   8.259 +		x86_acpiid_to_apicid[processor->acpi_id] = processor->id;
   8.260 +
   8.261 +	/*
   8.262 +	 * We need to register disabled CPU as well to permit
   8.263 +	 * counting disabled CPUs. This allows us to size
   8.264 +	 * cpus_possible_map more accurately, to permit
   8.265 +	 * to not preallocating memory for all NR_CPUS
   8.266 +	 * when we use CPU hotplug.
   8.267 +	 */
   8.268 +	mp_register_lapic(processor->id,	/* APIC ID */
   8.269 +			  processor->flags.enabled);	/* Enabled? */
   8.270 +
   8.271 +	return 0;
   8.272 +}
   8.273 +
   8.274 +static int __init
   8.275 +acpi_parse_lapic_addr_ovr(acpi_table_entry_header * header,
   8.276 +			  const unsigned long end)
   8.277 +{
   8.278 +	struct acpi_table_lapic_addr_ovr *lapic_addr_ovr = NULL;
   8.279 +
   8.280 +	lapic_addr_ovr = (struct acpi_table_lapic_addr_ovr *)header;
   8.281 +
   8.282 +	if (BAD_MADT_ENTRY(lapic_addr_ovr, end))
   8.283 +		return -EINVAL;
   8.284 +
   8.285 +	acpi_lapic_addr = lapic_addr_ovr->address;
   8.286 +
   8.287 +	return 0;
   8.288 +}
   8.289 +
   8.290 +static int __init
   8.291 +acpi_parse_lapic_nmi(acpi_table_entry_header * header, const unsigned long end)
   8.292 +{
   8.293 +	struct acpi_table_lapic_nmi *lapic_nmi = NULL;
   8.294 +
   8.295 +	lapic_nmi = (struct acpi_table_lapic_nmi *)header;
   8.296 +
   8.297 +	if (BAD_MADT_ENTRY(lapic_nmi, end))
   8.298 +		return -EINVAL;
   8.299 +
   8.300 +	acpi_table_print_madt_entry(header);
   8.301 +
   8.302 +	if (lapic_nmi->lint != 1)
   8.303 +		printk(KERN_WARNING PREFIX "NMI not connected to LINT 1!\n");
   8.304 +
   8.305 +	return 0;
   8.306 +}
   8.307 +
   8.308 +#endif				/*CONFIG_X86_LOCAL_APIC */
   8.309 +
   8.310 +#ifdef CONFIG_X86_IO_APIC
   8.311 +
   8.312 +static int __init
   8.313 +acpi_parse_ioapic(acpi_table_entry_header * header, const unsigned long end)
   8.314 +{
   8.315 +	struct acpi_table_ioapic *ioapic = NULL;
   8.316 +
   8.317 +	ioapic = (struct acpi_table_ioapic *)header;
   8.318 +
   8.319 +	if (BAD_MADT_ENTRY(ioapic, end))
   8.320 +		return -EINVAL;
   8.321 +
   8.322 +	acpi_table_print_madt_entry(header);
   8.323 +
   8.324 +	mp_register_ioapic(ioapic->id,
   8.325 +			   ioapic->address, ioapic->global_irq_base);
   8.326 +
   8.327 +	return 0;
   8.328 +}
   8.329 +
   8.330 +/*
   8.331 + * Parse Interrupt Source Override for the ACPI SCI
   8.332 + */
   8.333 +static void acpi_sci_ioapic_setup(u32 gsi, u16 polarity, u16 trigger)
   8.334 +{
   8.335 +	if (trigger == 0)	/* compatible SCI trigger is level */
   8.336 +		trigger = 3;
   8.337 +
   8.338 +	if (polarity == 0)	/* compatible SCI polarity is low */
   8.339 +		polarity = 3;
   8.340 +
   8.341 +	/* Command-line over-ride via acpi_sci= */
   8.342 +	if (acpi_sci_flags.trigger)
   8.343 +		trigger = acpi_sci_flags.trigger;
   8.344 +
   8.345 +	if (acpi_sci_flags.polarity)
   8.346 +		polarity = acpi_sci_flags.polarity;
   8.347 +
   8.348 +	/*
   8.349 +	 * mp_config_acpi_legacy_irqs() already setup IRQs < 16
   8.350 +	 * If GSI is < 16, this will update its flags,
   8.351 +	 * else it will create a new mp_irqs[] entry.
   8.352 +	 */
   8.353 +	mp_override_legacy_irq(gsi, polarity, trigger, gsi);
   8.354 +
   8.355 +	/*
   8.356 +	 * stash over-ride to indicate we've been here
   8.357 +	 * and for later update of acpi_fadt
   8.358 +	 */
   8.359 +	acpi_sci_override_gsi = gsi;
   8.360 +	return;
   8.361 +}
   8.362 +
   8.363 +static int __init
   8.364 +acpi_parse_int_src_ovr(acpi_table_entry_header * header,
   8.365 +		       const unsigned long end)
   8.366 +{
   8.367 +	struct acpi_table_int_src_ovr *intsrc = NULL;
   8.368 +
   8.369 +	intsrc = (struct acpi_table_int_src_ovr *)header;
   8.370 +
   8.371 +	if (BAD_MADT_ENTRY(intsrc, end))
   8.372 +		return -EINVAL;
   8.373 +
   8.374 +	acpi_table_print_madt_entry(header);
   8.375 +
   8.376 +	if (intsrc->bus_irq == acpi_fadt.sci_int) {
   8.377 +		acpi_sci_ioapic_setup(intsrc->global_irq,
   8.378 +				      intsrc->flags.polarity,
   8.379 +				      intsrc->flags.trigger);
   8.380 +		return 0;
   8.381 +	}
   8.382 +
   8.383 +	if (acpi_skip_timer_override &&
   8.384 +	    intsrc->bus_irq == 0 && intsrc->global_irq == 2) {
   8.385 +		printk(PREFIX "BIOS IRQ0 pin2 override ignored.\n");
   8.386 +		return 0;
   8.387 +	}
   8.388 +
   8.389 +	mp_override_legacy_irq(intsrc->bus_irq,
   8.390 +			       intsrc->flags.polarity,
   8.391 +			       intsrc->flags.trigger, intsrc->global_irq);
   8.392 +
   8.393 +	return 0;
   8.394 +}
   8.395 +
   8.396 +static int __init
   8.397 +acpi_parse_nmi_src(acpi_table_entry_header * header, const unsigned long end)
   8.398 +{
   8.399 +	struct acpi_table_nmi_src *nmi_src = NULL;
   8.400 +
   8.401 +	nmi_src = (struct acpi_table_nmi_src *)header;
   8.402 +
   8.403 +	if (BAD_MADT_ENTRY(nmi_src, end))
   8.404 +		return -EINVAL;
   8.405 +
   8.406 +	acpi_table_print_madt_entry(header);
   8.407 +
   8.408 +	/* TBD: Support nimsrc entries? */
   8.409 +
   8.410 +	return 0;
   8.411 +}
   8.412 +
   8.413 +#endif				/* CONFIG_X86_IO_APIC */
   8.414 +
   8.415 +/*
   8.416 + * acpi_pic_sci_set_trigger()
   8.417 + * 
   8.418 + * use ELCR to set PIC-mode trigger type for SCI
   8.419 + *
   8.420 + * If a PIC-mode SCI is not recognized or gives spurious IRQ7's
   8.421 + * it may require Edge Trigger -- use "acpi_sci=edge"
   8.422 + *
   8.423 + * Port 0x4d0-4d1 are ECLR1 and ECLR2, the Edge/Level Control Registers
   8.424 + * for the 8259 PIC.  bit[n] = 1 means irq[n] is Level, otherwise Edge.
   8.425 + * ECLR1 is IRQ's 0-7 (IRQ 0, 1, 2 must be 0)
   8.426 + * ECLR2 is IRQ's 8-15 (IRQ 8, 13 must be 0)
   8.427 + */
   8.428 +
   8.429 +void __init acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger)
   8.430 +{
   8.431 +	unsigned int mask = 1 << irq;
   8.432 +	unsigned int old, new;
   8.433 +
   8.434 +	/* Real old ELCR mask */
   8.435 +	old = inb(0x4d0) | (inb(0x4d1) << 8);
   8.436 +
   8.437 +	/*
   8.438 +	 * If we use ACPI to set PCI irq's, then we should clear ELCR
   8.439 +	 * since we will set it correctly as we enable the PCI irq
   8.440 +	 * routing.
   8.441 +	 */
   8.442 +	new = acpi_noirq ? old : 0;
   8.443 +
   8.444 +	/*
   8.445 +	 * Update SCI information in the ELCR, it isn't in the PCI
   8.446 +	 * routing tables..
   8.447 +	 */
   8.448 +	switch (trigger) {
   8.449 +	case 1:		/* Edge - clear */
   8.450 +		new &= ~mask;
   8.451 +		break;
   8.452 +	case 3:		/* Level - set */
   8.453 +		new |= mask;
   8.454 +		break;
   8.455 +	}
   8.456 +
   8.457 +	if (old == new)
   8.458 +		return;
   8.459 +
   8.460 +	printk(PREFIX "setting ELCR to %04x (from %04x)\n", new, old);
   8.461 +	outb(new, 0x4d0);
   8.462 +	outb(new >> 8, 0x4d1);
   8.463 +}
   8.464 +
   8.465 +int acpi_gsi_to_irq(u32 gsi, unsigned int *irq)
   8.466 +{
   8.467 +#ifdef CONFIG_X86_IO_APIC
   8.468 +	if (use_pci_vector() && !platform_legacy_irq(gsi))
   8.469 +		*irq = IO_APIC_VECTOR(gsi);
   8.470 +	else
   8.471 +#endif
   8.472 +		*irq = gsi_irq_sharing(gsi);
   8.473 +	return 0;
   8.474 +}
   8.475 +
   8.476 +/*
   8.477 + * success: return IRQ number (>=0)
   8.478 + * failure: return < 0
   8.479 + */
   8.480 +int acpi_register_gsi(u32 gsi, int triggering, int polarity)
   8.481 +{
   8.482 +	unsigned int irq;
   8.483 +	unsigned int plat_gsi = gsi;
   8.484 +
   8.485 +#ifdef CONFIG_PCI
   8.486 +	/*
   8.487 +	 * Make sure all (legacy) PCI IRQs are set as level-triggered.
   8.488 +	 */
   8.489 +	if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) {
   8.490 +		extern void eisa_set_level_irq(unsigned int irq);
   8.491 +
   8.492 +		if (triggering == ACPI_LEVEL_SENSITIVE)
   8.493 +			eisa_set_level_irq(gsi);
   8.494 +	}
   8.495 +#endif
   8.496 +
   8.497 +#ifdef CONFIG_X86_IO_APIC
   8.498 +	if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) {
   8.499 +		plat_gsi = mp_register_gsi(gsi, triggering, polarity);
   8.500 +	}
   8.501 +#endif
   8.502 +	acpi_gsi_to_irq(plat_gsi, &irq);
   8.503 +	return irq;
   8.504 +}
   8.505 +
   8.506 +EXPORT_SYMBOL(acpi_register_gsi);
   8.507 +
   8.508 +/*
   8.509 + *  ACPI based hotplug support for CPU
   8.510 + */
   8.511 +#ifdef CONFIG_ACPI_HOTPLUG_CPU
   8.512 +int acpi_map_lsapic(acpi_handle handle, int *pcpu)
   8.513 +{
   8.514 +	/* TBD */
   8.515 +	return -EINVAL;
   8.516 +}
   8.517 +
   8.518 +EXPORT_SYMBOL(acpi_map_lsapic);
   8.519 +
   8.520 +int acpi_unmap_lsapic(int cpu)
   8.521 +{
   8.522 +	/* TBD */
   8.523 +	return -EINVAL;
   8.524 +}
   8.525 +
   8.526 +EXPORT_SYMBOL(acpi_unmap_lsapic);
   8.527 +#endif				/* CONFIG_ACPI_HOTPLUG_CPU */
   8.528 +
   8.529 +int acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base)
   8.530 +{
   8.531 +	/* TBD */
   8.532 +	return -EINVAL;
   8.533 +}
   8.534 +
   8.535 +EXPORT_SYMBOL(acpi_register_ioapic);
   8.536 +
   8.537 +int acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base)
   8.538 +{
   8.539 +	/* TBD */
   8.540 +	return -EINVAL;
   8.541 +}
   8.542 +
   8.543 +EXPORT_SYMBOL(acpi_unregister_ioapic);
   8.544 +
   8.545 +static unsigned long __init
   8.546 +acpi_scan_rsdp(unsigned long start, unsigned long length)
   8.547 +{
   8.548 +	unsigned long offset = 0;
   8.549 +	unsigned long sig_len = sizeof("RSD PTR ") - 1;
   8.550 +	unsigned long vstart = (unsigned long)isa_bus_to_virt(start);
   8.551 +
   8.552 +	/*
   8.553 +	 * Scan all 16-byte boundaries of the physical memory region for the
   8.554 +	 * RSDP signature.
   8.555 +	 */
   8.556 +	for (offset = 0; offset < length; offset += 16) {
   8.557 +		if (strncmp((char *)(vstart + offset), "RSD PTR ", sig_len))
   8.558 +			continue;
   8.559 +		return (start + offset);
   8.560 +	}
   8.561 +
   8.562 +	return 0;
   8.563 +}
   8.564 +
   8.565 +static int __init acpi_parse_sbf(unsigned long phys_addr, unsigned long size)
   8.566 +{
   8.567 +	struct acpi_table_sbf *sb;
   8.568 +
   8.569 +	if (!phys_addr || !size)
   8.570 +		return -EINVAL;
   8.571 +
   8.572 +	sb = (struct acpi_table_sbf *)__acpi_map_table(phys_addr, size);
   8.573 +	if (!sb) {
   8.574 +		printk(KERN_WARNING PREFIX "Unable to map SBF\n");
   8.575 +		return -ENODEV;
   8.576 +	}
   8.577 +
   8.578 +	sbf_port = sb->sbf_cmos;	/* Save CMOS port */
   8.579 +
   8.580 +	return 0;
   8.581 +}
   8.582 +
   8.583 +#ifdef CONFIG_HPET_TIMER
   8.584 +
   8.585 +static int __init acpi_parse_hpet(unsigned long phys, unsigned long size)
   8.586 +{
   8.587 +	struct acpi_table_hpet *hpet_tbl;
   8.588 +
   8.589 +	if (!phys || !size)
   8.590 +		return -EINVAL;
   8.591 +
   8.592 +	hpet_tbl = (struct acpi_table_hpet *)__acpi_map_table(phys, size);
   8.593 +	if (!hpet_tbl) {
   8.594 +		printk(KERN_WARNING PREFIX "Unable to map HPET\n");
   8.595 +		return -ENODEV;
   8.596 +	}
   8.597 +
   8.598 +	if (hpet_tbl->addr.space_id != ACPI_SPACE_MEM) {
   8.599 +		printk(KERN_WARNING PREFIX "HPET timers must be located in "
   8.600 +		       "memory.\n");
   8.601 +		return -1;
   8.602 +	}
   8.603 +#ifdef	CONFIG_X86_64
   8.604 +	vxtime.hpet_address = hpet_tbl->addr.addrl |
   8.605 +	    ((long)hpet_tbl->addr.addrh << 32);
   8.606 +
   8.607 +	printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n",
   8.608 +	       hpet_tbl->id, vxtime.hpet_address);
   8.609 +#else				/* X86 */
   8.610 +	{
   8.611 +		extern unsigned long hpet_address;
   8.612 +
   8.613 +		hpet_address = hpet_tbl->addr.addrl;
   8.614 +		printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n",
   8.615 +		       hpet_tbl->id, hpet_address);
   8.616 +	}
   8.617 +#endif				/* X86 */
   8.618 +
   8.619 +	return 0;
   8.620 +}
   8.621 +#else
   8.622 +#define	acpi_parse_hpet	NULL
   8.623 +#endif
   8.624 +
   8.625 +#ifdef CONFIG_X86_PM_TIMER
   8.626 +extern u32 pmtmr_ioport;
   8.627 +#endif
   8.628 +
   8.629 +static int __init acpi_parse_fadt(unsigned long phys, unsigned long size)
   8.630 +{
   8.631 +	struct fadt_descriptor *fadt = NULL;
   8.632 +
   8.633 +	fadt = (struct fadt_descriptor *)__acpi_map_table(phys, size);
   8.634 +	if (!fadt) {
   8.635 +		printk(KERN_WARNING PREFIX "Unable to map FADT\n");
   8.636 +		return 0;
   8.637 +	}
   8.638 +	/* initialize sci_int early for INT_SRC_OVR MADT parsing */
   8.639 +	acpi_fadt.sci_int = fadt->sci_int;
   8.640 +
   8.641 +	/* initialize rev and apic_phys_dest_mode for x86_64 genapic */
   8.642 +	acpi_fadt.revision = fadt->revision;
   8.643 +	acpi_fadt.force_apic_physical_destination_mode =
   8.644 +	    fadt->force_apic_physical_destination_mode;
   8.645 +
   8.646 +#if defined(CONFIG_X86_PM_TIMER) && !defined(CONFIG_XEN)
   8.647 +	/* detect the location of the ACPI PM Timer */
   8.648 +	if (fadt->revision >= FADT2_REVISION_ID) {
   8.649 +		/* FADT rev. 2 */
   8.650 +		if (fadt->xpm_tmr_blk.address_space_id !=
   8.651 +		    ACPI_ADR_SPACE_SYSTEM_IO)
   8.652 +			return 0;
   8.653 +
   8.654 +		pmtmr_ioport = fadt->xpm_tmr_blk.address;
   8.655 +		/*
   8.656 +		 * "X" fields are optional extensions to the original V1.0
   8.657 +		 * fields, so we must selectively expand V1.0 fields if the
   8.658 +		 * corresponding X field is zero.
   8.659 +	 	 */
   8.660 +		if (!pmtmr_ioport)
   8.661 +			pmtmr_ioport = fadt->V1_pm_tmr_blk;
   8.662 +	} else {
   8.663 +		/* FADT rev. 1 */
   8.664 +		pmtmr_ioport = fadt->V1_pm_tmr_blk;
   8.665 +	}
   8.666 +	if (pmtmr_ioport)
   8.667 +		printk(KERN_INFO PREFIX "PM-Timer IO Port: %#x\n",
   8.668 +		       pmtmr_ioport);
   8.669 +#endif
   8.670 +	return 0;
   8.671 +}
   8.672 +
   8.673 +unsigned long __init acpi_find_rsdp(void)
   8.674 +{
   8.675 +	unsigned long rsdp_phys = 0;
   8.676 +
   8.677 +	if (efi_enabled) {
   8.678 +		if (efi.acpi20 != EFI_INVALID_TABLE_ADDR)
   8.679 +			return efi.acpi20;
   8.680 +		else if (efi.acpi != EFI_INVALID_TABLE_ADDR)
   8.681 +			return efi.acpi;
   8.682 +	}
   8.683 +	/*
   8.684 +	 * Scan memory looking for the RSDP signature. First search EBDA (low
   8.685 +	 * memory) paragraphs and then search upper memory (E0000-FFFFF).
   8.686 +	 */
   8.687 +	rsdp_phys = acpi_scan_rsdp(0, 0x400);
   8.688 +	if (!rsdp_phys)
   8.689 +		rsdp_phys = acpi_scan_rsdp(0xE0000, 0x20000);
   8.690 +
   8.691 +	return rsdp_phys;
   8.692 +}
   8.693 +
   8.694 +#ifdef	CONFIG_X86_LOCAL_APIC
   8.695 +/*
   8.696 + * Parse LAPIC entries in MADT
   8.697 + * returns 0 on success, < 0 on error
   8.698 + */
   8.699 +static int __init acpi_parse_madt_lapic_entries(void)
   8.700 +{
   8.701 +	int count;
   8.702 +
   8.703 +	if (!cpu_has_apic)
   8.704 +		return -ENODEV;
   8.705 +
   8.706 +	/* 
   8.707 +	 * Note that the LAPIC address is obtained from the MADT (32-bit value)
   8.708 +	 * and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value).
   8.709 +	 */
   8.710 +
   8.711 +	count =
   8.712 +	    acpi_table_parse_madt(ACPI_MADT_LAPIC_ADDR_OVR,
   8.713 +				  acpi_parse_lapic_addr_ovr, 0);
   8.714 +	if (count < 0) {
   8.715 +		printk(KERN_ERR PREFIX
   8.716 +		       "Error parsing LAPIC address override entry\n");
   8.717 +		return count;
   8.718 +	}
   8.719 +
   8.720 +	mp_register_lapic_address(acpi_lapic_addr);
   8.721 +
   8.722 +	count = acpi_table_parse_madt(ACPI_MADT_LAPIC, acpi_parse_lapic,
   8.723 +				      MAX_APICS);
   8.724 +	if (!count) {
   8.725 +		printk(KERN_ERR PREFIX "No LAPIC entries present\n");
   8.726 +		/* TBD: Cleanup to allow fallback to MPS */
   8.727 +		return -ENODEV;
   8.728 +	} else if (count < 0) {
   8.729 +		printk(KERN_ERR PREFIX "Error parsing LAPIC entry\n");
   8.730 +		/* TBD: Cleanup to allow fallback to MPS */
   8.731 +		return count;
   8.732 +	}
   8.733 +
   8.734 +	count =
   8.735 +	    acpi_table_parse_madt(ACPI_MADT_LAPIC_NMI, acpi_parse_lapic_nmi, 0);
   8.736 +	if (count < 0) {
   8.737 +		printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n");
   8.738 +		/* TBD: Cleanup to allow fallback to MPS */
   8.739 +		return count;
   8.740 +	}
   8.741 +	return 0;
   8.742 +}
   8.743 +#endif				/* CONFIG_X86_LOCAL_APIC */
   8.744 +
   8.745 +#ifdef	CONFIG_X86_IO_APIC
   8.746 +/*
   8.747 + * Parse IOAPIC related entries in MADT
   8.748 + * returns 0 on success, < 0 on error
   8.749 + */
   8.750 +static int __init acpi_parse_madt_ioapic_entries(void)
   8.751 +{
   8.752 +	int count;
   8.753 +
   8.754 +	/*
   8.755 +	 * ACPI interpreter is required to complete interrupt setup,
   8.756 +	 * so if it is off, don't enumerate the io-apics with ACPI.
   8.757 +	 * If MPS is present, it will handle them,
   8.758 +	 * otherwise the system will stay in PIC mode
   8.759 +	 */
   8.760 +	if (acpi_disabled || acpi_noirq) {
   8.761 +		return -ENODEV;
   8.762 +	}
   8.763 +
   8.764 +	if (!cpu_has_apic) 
   8.765 +		return -ENODEV;
   8.766 +
   8.767 +	/*
   8.768 +	 * if "noapic" boot option, don't look for IO-APICs
   8.769 +	 */
   8.770 +	if (skip_ioapic_setup) {
   8.771 +		printk(KERN_INFO PREFIX "Skipping IOAPIC probe "
   8.772 +		       "due to 'noapic' option.\n");
   8.773 +		return -ENODEV;
   8.774 +	}
   8.775 +
   8.776 +	count =
   8.777 +	    acpi_table_parse_madt(ACPI_MADT_IOAPIC, acpi_parse_ioapic,
   8.778 +				  MAX_IO_APICS);
   8.779 +	if (!count) {
   8.780 +		printk(KERN_ERR PREFIX "No IOAPIC entries present\n");
   8.781 +		return -ENODEV;
   8.782 +	} else if (count < 0) {
   8.783 +		printk(KERN_ERR PREFIX "Error parsing IOAPIC entry\n");
   8.784 +		return count;
   8.785 +	}
   8.786 +
   8.787 +	count =
   8.788 +	    acpi_table_parse_madt(ACPI_MADT_INT_SRC_OVR, acpi_parse_int_src_ovr,
   8.789 +				  NR_IRQ_VECTORS);
   8.790 +	if (count < 0) {
   8.791 +		printk(KERN_ERR PREFIX
   8.792 +		       "Error parsing interrupt source overrides entry\n");
   8.793 +		/* TBD: Cleanup to allow fallback to MPS */
   8.794 +		return count;
   8.795 +	}
   8.796 +
   8.797 +	/*
   8.798 +	 * If BIOS did not supply an INT_SRC_OVR for the SCI
   8.799 +	 * pretend we got one so we can set the SCI flags.
   8.800 +	 */
   8.801 +	if (!acpi_sci_override_gsi)
   8.802 +		acpi_sci_ioapic_setup(acpi_fadt.sci_int, 0, 0);
   8.803 +
   8.804 +	/* Fill in identity legacy mapings where no override */
   8.805 +	mp_config_acpi_legacy_irqs();
   8.806 +
   8.807 +	count =
   8.808 +	    acpi_table_parse_madt(ACPI_MADT_NMI_SRC, acpi_parse_nmi_src,
   8.809 +				  NR_IRQ_VECTORS);
   8.810 +	if (count < 0) {
   8.811 +		printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n");
   8.812 +		/* TBD: Cleanup to allow fallback to MPS */
   8.813 +		return count;
   8.814 +	}
   8.815 +
   8.816 +	return 0;
   8.817 +}
   8.818 +#else
   8.819 +static inline int acpi_parse_madt_ioapic_entries(void)
   8.820 +{
   8.821 +	return -1;
   8.822 +}
   8.823 +#endif	/* !CONFIG_X86_IO_APIC */
   8.824 +
   8.825 +static void __init acpi_process_madt(void)
   8.826 +{
   8.827 +#ifdef CONFIG_X86_LOCAL_APIC
   8.828 +	int count, error;
   8.829 +
   8.830 +	count = acpi_table_parse(ACPI_APIC, acpi_parse_madt);
   8.831 +	if (count >= 1) {
   8.832 +
   8.833 +		/*
   8.834 +		 * Parse MADT LAPIC entries
   8.835 +		 */
   8.836 +		error = acpi_parse_madt_lapic_entries();
   8.837 +		if (!error) {
   8.838 +			acpi_lapic = 1;
   8.839 +
   8.840 +#ifdef CONFIG_X86_GENERICARCH
   8.841 +			generic_bigsmp_probe();
   8.842 +#endif
   8.843 +			/*
   8.844 +			 * Parse MADT IO-APIC entries
   8.845 +			 */
   8.846 +			error = acpi_parse_madt_ioapic_entries();
   8.847 +			if (!error) {
   8.848 +				acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC;
   8.849 +				acpi_irq_balance_set(NULL);
   8.850 +				acpi_ioapic = 1;
   8.851 +
   8.852 +				smp_found_config = 1;
   8.853 +				clustered_apic_check();
   8.854 +			}
   8.855 +		}
   8.856 +		if (error == -EINVAL) {
   8.857 +			/*
   8.858 +			 * Dell Precision Workstation 410, 610 come here.
   8.859 +			 */
   8.860 +			printk(KERN_ERR PREFIX
   8.861 +			       "Invalid BIOS MADT, disabling ACPI\n");
   8.862 +			disable_acpi();
   8.863 +		}
   8.864 +	}
   8.865 +#endif
   8.866 +	return;
   8.867 +}
   8.868 +
   8.869 +extern int acpi_force;
   8.870 +
   8.871 +#ifdef __i386__
   8.872 +
   8.873 +static int __init disable_acpi_irq(struct dmi_system_id *d)
   8.874 +{
   8.875 +	if (!acpi_force) {
   8.876 +		printk(KERN_NOTICE "%s detected: force use of acpi=noirq\n",
   8.877 +		       d->ident);
   8.878 +		acpi_noirq_set();
   8.879 +	}
   8.880 +	return 0;
   8.881 +}
   8.882 +
   8.883 +static int __init disable_acpi_pci(struct dmi_system_id *d)
   8.884 +{
   8.885 +	if (!acpi_force) {
   8.886 +		printk(KERN_NOTICE "%s detected: force use of pci=noacpi\n",
   8.887 +		       d->ident);
   8.888 +		acpi_disable_pci();
   8.889 +	}
   8.890 +	return 0;
   8.891 +}
   8.892 +
   8.893 +static int __init dmi_disable_acpi(struct dmi_system_id *d)
   8.894 +{
   8.895 +	if (!acpi_force) {
   8.896 +		printk(KERN_NOTICE "%s detected: acpi off\n", d->ident);
   8.897 +		disable_acpi();
   8.898 +	} else {
   8.899 +		printk(KERN_NOTICE
   8.900 +		       "Warning: DMI blacklist says broken, but acpi forced\n");
   8.901 +	}
   8.902 +	return 0;
   8.903 +}
   8.904 +
   8.905 +/*
   8.906 + * Limit ACPI to CPU enumeration for HT
   8.907 + */
   8.908 +static int __init force_acpi_ht(struct dmi_system_id *d)
   8.909 +{
   8.910 +	if (!acpi_force) {
   8.911 +		printk(KERN_NOTICE "%s detected: force use of acpi=ht\n",
   8.912 +		       d->ident);
   8.913 +		disable_acpi();
   8.914 +		acpi_ht = 1;
   8.915 +	} else {
   8.916 +		printk(KERN_NOTICE
   8.917 +		       "Warning: acpi=force overrules DMI blacklist: acpi=ht\n");
   8.918 +	}
   8.919 +	return 0;
   8.920 +}
   8.921 +
   8.922 +/*
   8.923 + * If your system is blacklisted here, but you find that acpi=force
   8.924 + * works for you, please contact acpi-devel@sourceforge.net
   8.925 + */
   8.926 +static struct dmi_system_id __initdata acpi_dmi_table[] = {
   8.927 +	/*
   8.928 +	 * Boxes that need ACPI disabled
   8.929 +	 */
   8.930 +	{
   8.931 +	 .callback = dmi_disable_acpi,
   8.932 +	 .ident = "IBM Thinkpad",
   8.933 +	 .matches = {
   8.934 +		     DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
   8.935 +		     DMI_MATCH(DMI_BOARD_NAME, "2629H1G"),
   8.936 +		     },
   8.937 +	 },
   8.938 +
   8.939 +	/*
   8.940 +	 * Boxes that need acpi=ht
   8.941 +	 */
   8.942 +	{
   8.943 +	 .callback = force_acpi_ht,
   8.944 +	 .ident = "FSC Primergy T850",
   8.945 +	 .matches = {
   8.946 +		     DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"),
   8.947 +		     DMI_MATCH(DMI_PRODUCT_NAME, "PRIMERGY T850"),
   8.948 +		     },
   8.949 +	 },
   8.950 +	{
   8.951 +	 .callback = force_acpi_ht,
   8.952 +	 .ident = "DELL GX240",
   8.953 +	 .matches = {
   8.954 +		     DMI_MATCH(DMI_BOARD_VENDOR, "Dell Computer Corporation"),
   8.955 +		     DMI_MATCH(DMI_BOARD_NAME, "OptiPlex GX240"),
   8.956 +		     },
   8.957 +	 },
   8.958 +	{
   8.959 +	 .callback = force_acpi_ht,
   8.960 +	 .ident = "HP VISUALIZE NT Workstation",
   8.961 +	 .matches = {
   8.962 +		     DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"),
   8.963 +		     DMI_MATCH(DMI_PRODUCT_NAME, "HP VISUALIZE NT Workstation"),
   8.964 +		     },
   8.965 +	 },
   8.966 +	{
   8.967 +	 .callback = force_acpi_ht,
   8.968 +	 .ident = "Compaq Workstation W8000",
   8.969 +	 .matches = {
   8.970 +		     DMI_MATCH(DMI_SYS_VENDOR, "Compaq"),
   8.971 +		     DMI_MATCH(DMI_PRODUCT_NAME, "Workstation W8000"),
   8.972 +		     },
   8.973 +	 },
   8.974 +	{
   8.975 +	 .callback = force_acpi_ht,
   8.976 +	 .ident = "ASUS P4B266",
   8.977 +	 .matches = {
   8.978 +		     DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
   8.979 +		     DMI_MATCH(DMI_BOARD_NAME, "P4B266"),
   8.980 +		     },
   8.981 +	 },
   8.982 +	{
   8.983 +	 .callback = force_acpi_ht,
   8.984 +	 .ident = "ASUS P2B-DS",
   8.985 +	 .matches = {
   8.986 +		     DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
   8.987 +		     DMI_MATCH(DMI_BOARD_NAME, "P2B-DS"),
   8.988 +		     },
   8.989 +	 },
   8.990 +	{
   8.991 +	 .callback = force_acpi_ht,
   8.992 +	 .ident = "ASUS CUR-DLS",
   8.993 +	 .matches = {
   8.994 +		     DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
   8.995 +		     DMI_MATCH(DMI_BOARD_NAME, "CUR-DLS"),
   8.996 +		     },
   8.997 +	 },
   8.998 +	{
   8.999 +	 .callback = force_acpi_ht,
  8.1000 +	 .ident = "ABIT i440BX-W83977",
  8.1001 +	 .matches = {
  8.1002 +		     DMI_MATCH(DMI_BOARD_VENDOR, "ABIT <http://www.abit.com>"),
  8.1003 +		     DMI_MATCH(DMI_BOARD_NAME, "i440BX-W83977 (BP6)"),
  8.1004 +		     },
  8.1005 +	 },
  8.1006 +	{
  8.1007 +	 .callback = force_acpi_ht,
  8.1008 +	 .ident = "IBM Bladecenter",
  8.1009 +	 .matches = {
  8.1010 +		     DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
  8.1011 +		     DMI_MATCH(DMI_BOARD_NAME, "IBM eServer BladeCenter HS20"),
  8.1012 +		     },
  8.1013 +	 },
  8.1014 +	{
  8.1015 +	 .callback = force_acpi_ht,
  8.1016 +	 .ident = "IBM eServer xSeries 360",
  8.1017 +	 .matches = {
  8.1018 +		     DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
  8.1019 +		     DMI_MATCH(DMI_BOARD_NAME, "eServer xSeries 360"),
  8.1020 +		     },
  8.1021 +	 },
  8.1022 +	{
  8.1023 +	 .callback = force_acpi_ht,
  8.1024 +	 .ident = "IBM eserver xSeries 330",
  8.1025 +	 .matches = {
  8.1026 +		     DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
  8.1027 +		     DMI_MATCH(DMI_BOARD_NAME, "eserver xSeries 330"),
  8.1028 +		     },
  8.1029 +	 },
  8.1030 +	{
  8.1031 +	 .callback = force_acpi_ht,
  8.1032 +	 .ident = "IBM eserver xSeries 440",
  8.1033 +	 .matches = {
  8.1034 +		     DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
  8.1035 +		     DMI_MATCH(DMI_PRODUCT_NAME, "eserver xSeries 440"),
  8.1036 +		     },
  8.1037 +	 },
  8.1038 +
  8.1039 +	/*
  8.1040 +	 * Boxes that need ACPI PCI IRQ routing disabled
  8.1041 +	 */
  8.1042 +	{
  8.1043 +	 .callback = disable_acpi_irq,
  8.1044 +	 .ident = "ASUS A7V",
  8.1045 +	 .matches = {
  8.1046 +		     DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC"),
  8.1047 +		     DMI_MATCH(DMI_BOARD_NAME, "<A7V>"),
  8.1048 +		     /* newer BIOS, Revision 1011, does work */
  8.1049 +		     DMI_MATCH(DMI_BIOS_VERSION,
  8.1050 +			       "ASUS A7V ACPI BIOS Revision 1007"),
  8.1051 +		     },
  8.1052 +	 },
  8.1053 +
  8.1054 +	/*
  8.1055 +	 * Boxes that need ACPI PCI IRQ routing and PCI scan disabled
  8.1056 +	 */
  8.1057 +	{			/* _BBN 0 bug */
  8.1058 +	 .callback = disable_acpi_pci,
  8.1059 +	 .ident = "ASUS PR-DLS",
  8.1060 +	 .matches = {
  8.1061 +		     DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
  8.1062 +		     DMI_MATCH(DMI_BOARD_NAME, "PR-DLS"),
  8.1063 +		     DMI_MATCH(DMI_BIOS_VERSION,
  8.1064 +			       "ASUS PR-DLS ACPI BIOS Revision 1010"),
  8.1065 +		     DMI_MATCH(DMI_BIOS_DATE, "03/21/2003")
  8.1066 +		     },
  8.1067 +	 },
  8.1068 +	{
  8.1069 +	 .callback = disable_acpi_pci,
  8.1070 +	 .ident = "Acer TravelMate 36x Laptop",
  8.1071 +	 .matches = {
  8.1072 +		     DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
  8.1073 +		     DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 360"),
  8.1074 +		     },
  8.1075 +	 },
  8.1076 +	{}
  8.1077 +};
  8.1078 +
  8.1079 +#endif				/* __i386__ */
  8.1080 +
  8.1081 +/*
  8.1082 + * acpi_boot_table_init() and acpi_boot_init()
  8.1083 + *  called from setup_arch(), always.
  8.1084 + *	1. checksums all tables
  8.1085 + *	2. enumerates lapics
  8.1086 + *	3. enumerates io-apics
  8.1087 + *
  8.1088 + * acpi_table_init() is separate to allow reading SRAT without
  8.1089 + * other side effects.
  8.1090 + *
  8.1091 + * side effects of acpi_boot_init:
  8.1092 + *	acpi_lapic = 1 if LAPIC found
  8.1093 + *	acpi_ioapic = 1 if IOAPIC found
  8.1094 + *	if (acpi_lapic && acpi_ioapic) smp_found_config = 1;
  8.1095 + *	if acpi_blacklisted() acpi_disabled = 1;
  8.1096 + *	acpi_irq_model=...
  8.1097 + *	...
  8.1098 + *
  8.1099 + * return value: (currently ignored)
  8.1100 + *	0: success
  8.1101 + *	!0: failure
  8.1102 + */
  8.1103 +
  8.1104 +int __init acpi_boot_table_init(void)
  8.1105 +{
  8.1106 +	int error;
  8.1107 +
  8.1108 +#ifdef __i386__
  8.1109 +	dmi_check_system(acpi_dmi_table);
  8.1110 +#endif
  8.1111 +
  8.1112 +	/*
  8.1113 +	 * If acpi_disabled, bail out
  8.1114 +	 * One exception: acpi=ht continues far enough to enumerate LAPICs
  8.1115 +	 */
  8.1116 +	if (acpi_disabled && !acpi_ht)
  8.1117 +		return 1;
  8.1118 +
  8.1119 +	/* 
  8.1120 +	 * Initialize the ACPI boot-time table parser.
  8.1121 +	 */
  8.1122 +	error = acpi_table_init();
  8.1123 +	if (error) {
  8.1124 +		disable_acpi();
  8.1125 +		return error;
  8.1126 +	}
  8.1127 +
  8.1128 +	acpi_table_parse(ACPI_BOOT, acpi_parse_sbf);
  8.1129 +
  8.1130 +	/*
  8.1131 +	 * blacklist may disable ACPI entirely
  8.1132 +	 */
  8.1133 +	error = acpi_blacklisted();
  8.1134 +	if (error) {
  8.1135 +		if (acpi_force) {
  8.1136 +			printk(KERN_WARNING PREFIX "acpi=force override\n");
  8.1137 +		} else {
  8.1138 +			printk(KERN_WARNING PREFIX "Disabling ACPI support\n");
  8.1139 +			disable_acpi();
  8.1140 +			return error;
  8.1141 +		}
  8.1142 +	}
  8.1143 +
  8.1144 +	return 0;
  8.1145 +}
  8.1146 +
  8.1147 +int __init acpi_boot_init(void)
  8.1148 +{
  8.1149 +	/*
  8.1150 +	 * If acpi_disabled, bail out
  8.1151 +	 * One exception: acpi=ht continues far enough to enumerate LAPICs
  8.1152 +	 */
  8.1153 +	if (acpi_disabled && !acpi_ht)
  8.1154 +		return 1;
  8.1155 +
  8.1156 +	acpi_table_parse(ACPI_BOOT, acpi_parse_sbf);
  8.1157 +
  8.1158 +	/*
  8.1159 +	 * set sci_int and PM timer address
  8.1160 +	 */
  8.1161 +	acpi_table_parse(ACPI_FADT, acpi_parse_fadt);
  8.1162 +
  8.1163 +	/*
  8.1164 +	 * Process the Multiple APIC Description Table (MADT), if present
  8.1165 +	 */
  8.1166 +	acpi_process_madt();
  8.1167 +
  8.1168 +	acpi_table_parse(ACPI_HPET, acpi_parse_hpet);
  8.1169 +
  8.1170 +	return 0;
  8.1171 +}
     9.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     9.2 +++ b/arch/i386/kernel/apic-xen.c	Mon Jun 04 10:05:28 2007 +0100
     9.3 @@ -0,0 +1,155 @@
     9.4 +/*
     9.5 + *	Local APIC handling, local APIC timers
     9.6 + *
     9.7 + *	(c) 1999, 2000 Ingo Molnar <mingo@redhat.com>
     9.8 + *
     9.9 + *	Fixes
    9.10 + *	Maciej W. Rozycki	:	Bits for genuine 82489DX APICs;
    9.11 + *					thanks to Eric Gilmore
    9.12 + *					and Rolf G. Tews
    9.13 + *					for testing these extensively.
    9.14 + *	Maciej W. Rozycki	:	Various updates and fixes.
    9.15 + *	Mikael Pettersson	:	Power Management for UP-APIC.
    9.16 + *	Pavel Machek and
    9.17 + *	Mikael Pettersson	:	PM converted to driver model.
    9.18 + */
    9.19 +
    9.20 +#include <linux/init.h>
    9.21 +
    9.22 +#include <linux/mm.h>
    9.23 +#include <linux/delay.h>
    9.24 +#include <linux/bootmem.h>
    9.25 +#include <linux/smp_lock.h>
    9.26 +#include <linux/interrupt.h>
    9.27 +#include <linux/mc146818rtc.h>
    9.28 +#include <linux/kernel_stat.h>
    9.29 +#include <linux/sysdev.h>
    9.30 +#include <linux/cpu.h>
    9.31 +#include <linux/module.h>
    9.32 +
    9.33 +#include <asm/atomic.h>
    9.34 +#include <asm/smp.h>
    9.35 +#include <asm/mtrr.h>
    9.36 +#include <asm/mpspec.h>
    9.37 +#include <asm/desc.h>
    9.38 +#include <asm/arch_hooks.h>
    9.39 +#include <asm/hpet.h>
    9.40 +#include <asm/i8253.h>
    9.41 +#include <asm/nmi.h>
    9.42 +
    9.43 +#include <mach_apic.h>
    9.44 +#include <mach_apicdef.h>
    9.45 +#include <mach_ipi.h>
    9.46 +
    9.47 +#include "io_ports.h"
    9.48 +
    9.49 +#ifndef CONFIG_XEN
    9.50 +/*
    9.51 + * cpu_mask that denotes the CPUs that needs timer interrupt coming in as
    9.52 + * IPIs in place of local APIC timers
    9.53 + */
    9.54 +static cpumask_t timer_bcast_ipi;
    9.55 +#endif
    9.56 +
    9.57 +/*
    9.58 + * Knob to control our willingness to enable the local APIC.
    9.59 + */
    9.60 +int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */
    9.61 +
    9.62 +/*
    9.63 + * Debug level
    9.64 + */
    9.65 +int apic_verbosity;
    9.66 +
    9.67 +#ifndef CONFIG_XEN
    9.68 +static int modern_apic(void)
    9.69 +{
    9.70 +	unsigned int lvr, version;
    9.71 +	/* AMD systems use old APIC versions, so check the CPU */
    9.72 +	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
    9.73 +		boot_cpu_data.x86 >= 0xf)
    9.74 +		return 1;
    9.75 +	lvr = apic_read(APIC_LVR);
    9.76 +	version = GET_APIC_VERSION(lvr);
    9.77 +	return version >= 0x14;
    9.78 +}
    9.79 +#endif /* !CONFIG_XEN */
    9.80 +
    9.81 +/*
    9.82 + * 'what should we do if we get a hw irq event on an illegal vector'.
    9.83 + * each architecture has to answer this themselves.
    9.84 + */
    9.85 +void ack_bad_irq(unsigned int irq)
    9.86 +{
    9.87 +	printk("unexpected IRQ trap at vector %02x\n", irq);
    9.88 +	/*
    9.89 +	 * Currently unexpected vectors happen only on SMP and APIC.
    9.90 +	 * We _must_ ack these because every local APIC has only N
    9.91 +	 * irq slots per priority level, and a 'hanging, unacked' IRQ
    9.92 +	 * holds up an irq slot - in excessive cases (when multiple
    9.93 +	 * unexpected vectors occur) that might lock up the APIC
    9.94 +	 * completely.
    9.95 +	 * But only ack when the APIC is enabled -AK
    9.96 +	 */
    9.97 +	if (cpu_has_apic)
    9.98 +		ack_APIC_irq();
    9.99 +}
   9.100 +
   9.101 +int get_physical_broadcast(void)
   9.102 +{
   9.103 +        return 0xff;
   9.104 +}
   9.105 +
   9.106 +#ifndef CONFIG_XEN
   9.107 +#ifndef CONFIG_SMP
   9.108 +static void up_apic_timer_interrupt_call(struct pt_regs *regs)
   9.109 +{
   9.110 +	int cpu = smp_processor_id();
   9.111 +
   9.112 +	/*
   9.113 +	 * the NMI deadlock-detector uses this.
   9.114 +	 */
   9.115 +	per_cpu(irq_stat, cpu).apic_timer_irqs++;
   9.116 +
   9.117 +	smp_local_timer_interrupt(regs);
   9.118 +}
   9.119 +#endif
   9.120 +
   9.121 +void smp_send_timer_broadcast_ipi(struct pt_regs *regs)
   9.122 +{
   9.123 +	cpumask_t mask;
   9.124 +
   9.125 +	cpus_and(mask, cpu_online_map, timer_bcast_ipi);
   9.126 +	if (!cpus_empty(mask)) {
   9.127 +#ifdef CONFIG_SMP
   9.128 +		send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
   9.129 +#else
   9.130 +		/*
   9.131 +		 * We can directly call the apic timer interrupt handler
   9.132 +		 * in UP case. Minus all irq related functions
   9.133 +		 */
   9.134 +		up_apic_timer_interrupt_call(regs);
   9.135 +#endif
   9.136 +	}
   9.137 +}
   9.138 +#endif
   9.139 +
   9.140 +int setup_profiling_timer(unsigned int multiplier)
   9.141 +{
   9.142 +	return -EINVAL;
   9.143 +}
   9.144 +
   9.145 +/*
   9.146 + * This initializes the IO-APIC and APIC hardware if this is
   9.147 + * a UP kernel.
   9.148 + */
   9.149 +int __init APIC_init_uniprocessor (void)
   9.150 +{
   9.151 +#ifdef CONFIG_X86_IO_APIC
   9.152 +	if (smp_found_config)
   9.153 +		if (!skip_ioapic_setup && nr_ioapics)
   9.154 +			setup_IO_APIC();
   9.155 +#endif
   9.156 +
   9.157 +	return 0;
   9.158 +}
    10.1 --- a/arch/i386/kernel/asm-offsets.c	Mon Jun 04 10:05:24 2007 +0100
    10.2 +++ b/arch/i386/kernel/asm-offsets.c	Mon Jun 04 10:05:28 2007 +0100
    10.3 @@ -66,9 +66,14 @@ void foo(void)
    10.4  	OFFSET(pbe_orig_address, pbe, orig_address);
    10.5  	OFFSET(pbe_next, pbe, next);
    10.6  
    10.7 +#ifndef CONFIG_X86_NO_TSS
    10.8  	/* Offset from the sysenter stack to tss.esp0 */
    10.9 -	DEFINE(TSS_sysenter_esp0, offsetof(struct tss_struct, esp0) -
   10.10 +	DEFINE(SYSENTER_stack_esp0, offsetof(struct tss_struct, esp0) -
   10.11  		 sizeof(struct tss_struct));
   10.12 +#else
   10.13 +	/* sysenter stack points directly to esp0 */
   10.14 +	DEFINE(SYSENTER_stack_esp0, 0);
   10.15 +#endif
   10.16  
   10.17  	DEFINE(PAGE_SIZE_asm, PAGE_SIZE);
   10.18  	DEFINE(VDSO_PRELINK, VDSO_PRELINK);
    11.1 --- a/arch/i386/kernel/cpu/Makefile	Mon Jun 04 10:05:24 2007 +0100
    11.2 +++ b/arch/i386/kernel/cpu/Makefile	Mon Jun 04 10:05:28 2007 +0100
    11.3 @@ -17,3 +17,8 @@ obj-$(CONFIG_X86_MCE)	+=	mcheck/
    11.4  
    11.5  obj-$(CONFIG_MTRR)	+= 	mtrr/
    11.6  obj-$(CONFIG_CPU_FREQ)	+=	cpufreq/
    11.7 +
    11.8 +ifdef CONFIG_XEN
    11.9 +include $(srctree)/scripts/Makefile.xen
   11.10 +obj-y := $(call cherrypickxen, $(obj-y), $(src))
   11.11 +endif
    12.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    12.2 +++ b/arch/i386/kernel/cpu/common-xen.c	Mon Jun 04 10:05:28 2007 +0100
    12.3 @@ -0,0 +1,743 @@
    12.4 +#include <linux/init.h>
    12.5 +#include <linux/string.h>
    12.6 +#include <linux/delay.h>
    12.7 +#include <linux/smp.h>
    12.8 +#include <linux/module.h>
    12.9 +#include <linux/percpu.h>
   12.10 +#include <linux/bootmem.h>
   12.11 +#include <asm/semaphore.h>
   12.12 +#include <asm/processor.h>
   12.13 +#include <asm/i387.h>
   12.14 +#include <asm/msr.h>
   12.15 +#include <asm/io.h>
   12.16 +#include <asm/mmu_context.h>
   12.17 +#include <asm/mtrr.h>
   12.18 +#include <asm/mce.h>
   12.19 +#ifdef CONFIG_X86_LOCAL_APIC
   12.20 +#include <asm/mpspec.h>
   12.21 +#include <asm/apic.h>
   12.22 +#include <mach_apic.h>
   12.23 +#else
   12.24 +#ifdef CONFIG_XEN
   12.25 +#define phys_pkg_id(a,b) a
   12.26 +#endif
   12.27 +#endif
   12.28 +#include <asm/hypervisor.h>
   12.29 +
   12.30 +#include "cpu.h"
   12.31 +
   12.32 +DEFINE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr);
   12.33 +EXPORT_PER_CPU_SYMBOL(cpu_gdt_descr);
   12.34 +
   12.35 +#ifndef CONFIG_XEN
   12.36 +DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]);
   12.37 +EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack);
   12.38 +#endif
   12.39 +
   12.40 +static int cachesize_override __cpuinitdata = -1;
   12.41 +static int disable_x86_fxsr __cpuinitdata;
   12.42 +static int disable_x86_serial_nr __cpuinitdata = 1;
   12.43 +static int disable_x86_sep __cpuinitdata;
   12.44 +
   12.45 +struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {};
   12.46 +
   12.47 +extern int disable_pse;
   12.48 +
   12.49 +static void default_init(struct cpuinfo_x86 * c)
   12.50 +{
   12.51 +	/* Not much we can do here... */
   12.52 +	/* Check if at least it has cpuid */
   12.53 +	if (c->cpuid_level == -1) {
   12.54 +		/* No cpuid. It must be an ancient CPU */
   12.55 +		if (c->x86 == 4)
   12.56 +			strcpy(c->x86_model_id, "486");
   12.57 +		else if (c->x86 == 3)
   12.58 +			strcpy(c->x86_model_id, "386");
   12.59 +	}
   12.60 +}
   12.61 +
   12.62 +static struct cpu_dev default_cpu = {
   12.63 +	.c_init	= default_init,
   12.64 +	.c_vendor = "Unknown",
   12.65 +};
   12.66 +static struct cpu_dev * this_cpu = &default_cpu;
   12.67 +
   12.68 +static int __init cachesize_setup(char *str)
   12.69 +{
   12.70 +	get_option (&str, &cachesize_override);
   12.71 +	return 1;
   12.72 +}
   12.73 +__setup("cachesize=", cachesize_setup);
   12.74 +
   12.75 +int __cpuinit get_model_name(struct cpuinfo_x86 *c)
   12.76 +{
   12.77 +	unsigned int *v;
   12.78 +	char *p, *q;
   12.79 +
   12.80 +	if (cpuid_eax(0x80000000) < 0x80000004)
   12.81 +		return 0;
   12.82 +
   12.83 +	v = (unsigned int *) c->x86_model_id;
   12.84 +	cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
   12.85 +	cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
   12.86 +	cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
   12.87 +	c->x86_model_id[48] = 0;
   12.88 +
   12.89 +	/* Intel chips right-justify this string for some dumb reason;
   12.90 +	   undo that brain damage */
   12.91 +	p = q = &c->x86_model_id[0];
   12.92 +	while ( *p == ' ' )
   12.93 +	     p++;
   12.94 +	if ( p != q ) {
   12.95 +	     while ( *p )
   12.96 +		  *q++ = *p++;
   12.97 +	     while ( q <= &c->x86_model_id[48] )
   12.98 +		  *q++ = '\0';	/* Zero-pad the rest */
   12.99 +	}
  12.100 +
  12.101 +	return 1;
  12.102 +}
  12.103 +
  12.104 +
  12.105 +void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
  12.106 +{
  12.107 +	unsigned int n, dummy, ecx, edx, l2size;
  12.108 +
  12.109 +	n = cpuid_eax(0x80000000);
  12.110 +
  12.111 +	if (n >= 0x80000005) {
  12.112 +		cpuid(0x80000005, &dummy, &dummy, &ecx, &edx);
  12.113 +		printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n",
  12.114 +			edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
  12.115 +		c->x86_cache_size=(ecx>>24)+(edx>>24);	
  12.116 +	}
  12.117 +
  12.118 +	if (n < 0x80000006)	/* Some chips just has a large L1. */
  12.119 +		return;
  12.120 +
  12.121 +	ecx = cpuid_ecx(0x80000006);
  12.122 +	l2size = ecx >> 16;
  12.123 +	
  12.124 +	/* do processor-specific cache resizing */
  12.125 +	if (this_cpu->c_size_cache)
  12.126 +		l2size = this_cpu->c_size_cache(c,l2size);
  12.127 +
  12.128 +	/* Allow user to override all this if necessary. */
  12.129 +	if (cachesize_override != -1)
  12.130 +		l2size = cachesize_override;
  12.131 +
  12.132 +	if ( l2size == 0 )
  12.133 +		return;		/* Again, no L2 cache is possible */
  12.134 +
  12.135 +	c->x86_cache_size = l2size;
  12.136 +
  12.137 +	printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
  12.138 +	       l2size, ecx & 0xFF);
  12.139 +}
  12.140 +
  12.141 +/* Naming convention should be: <Name> [(<Codename>)] */
  12.142 +/* This table only is used unless init_<vendor>() below doesn't set it; */
  12.143 +/* in particular, if CPUID levels 0x80000002..4 are supported, this isn't used */
  12.144 +
  12.145 +/* Look up CPU names by table lookup. */
  12.146 +static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c)
  12.147 +{
  12.148 +	struct cpu_model_info *info;
  12.149 +
  12.150 +	if ( c->x86_model >= 16 )
  12.151 +		return NULL;	/* Range check */
  12.152 +
  12.153 +	if (!this_cpu)
  12.154 +		return NULL;
  12.155 +
  12.156 +	info = this_cpu->c_models;
  12.157 +
  12.158 +	while (info && info->family) {
  12.159 +		if (info->family == c->x86)
  12.160 +			return info->model_names[c->x86_model];
  12.161 +		info++;
  12.162 +	}
  12.163 +	return NULL;		/* Not found */
  12.164 +}
  12.165 +
  12.166 +
  12.167 +static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c, int early)
  12.168 +{
  12.169 +	char *v = c->x86_vendor_id;
  12.170 +	int i;
  12.171 +	static int printed;
  12.172 +
  12.173 +	for (i = 0; i < X86_VENDOR_NUM; i++) {
  12.174 +		if (cpu_devs[i]) {
  12.175 +			if (!strcmp(v,cpu_devs[i]->c_ident[0]) ||
  12.176 +			    (cpu_devs[i]->c_ident[1] && 
  12.177 +			     !strcmp(v,cpu_devs[i]->c_ident[1]))) {
  12.178 +				c->x86_vendor = i;
  12.179 +				if (!early)
  12.180 +					this_cpu = cpu_devs[i];
  12.181 +				return;
  12.182 +			}
  12.183 +		}
  12.184 +	}
  12.185 +	if (!printed) {
  12.186 +		printed++;
  12.187 +		printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n");
  12.188 +		printk(KERN_ERR "CPU: Your system may be unstable.\n");
  12.189 +	}
  12.190 +	c->x86_vendor = X86_VENDOR_UNKNOWN;
  12.191 +	this_cpu = &default_cpu;
  12.192 +}
  12.193 +
  12.194 +
  12.195 +static int __init x86_fxsr_setup(char * s)
  12.196 +{
  12.197 +	disable_x86_fxsr = 1;
  12.198 +	return 1;
  12.199 +}
  12.200 +__setup("nofxsr", x86_fxsr_setup);
  12.201 +
  12.202 +
  12.203 +static int __init x86_sep_setup(char * s)
  12.204 +{
  12.205 +	disable_x86_sep = 1;
  12.206 +	return 1;
  12.207 +}
  12.208 +__setup("nosep", x86_sep_setup);
  12.209 +
  12.210 +
  12.211 +/* Standard macro to see if a specific flag is changeable */
  12.212 +static inline int flag_is_changeable_p(u32 flag)
  12.213 +{
  12.214 +	u32 f1, f2;
  12.215 +
  12.216 +	asm("pushfl\n\t"
  12.217 +	    "pushfl\n\t"
  12.218 +	    "popl %0\n\t"
  12.219 +	    "movl %0,%1\n\t"
  12.220 +	    "xorl %2,%0\n\t"
  12.221 +	    "pushl %0\n\t"
  12.222 +	    "popfl\n\t"
  12.223 +	    "pushfl\n\t"
  12.224 +	    "popl %0\n\t"
  12.225 +	    "popfl\n\t"
  12.226 +	    : "=&r" (f1), "=&r" (f2)
  12.227 +	    : "ir" (flag));
  12.228 +
  12.229 +	return ((f1^f2) & flag) != 0;
  12.230 +}
  12.231 +
  12.232 +
  12.233 +/* Probe for the CPUID instruction */
  12.234 +static int __cpuinit have_cpuid_p(void)
  12.235 +{
  12.236 +	return flag_is_changeable_p(X86_EFLAGS_ID);
  12.237 +}
  12.238 +
  12.239 +/* Do minimum CPU detection early.
  12.240 +   Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment.
  12.241 +   The others are not touched to avoid unwanted side effects.
  12.242 +
  12.243 +   WARNING: this function is only called on the BP.  Don't add code here
  12.244 +   that is supposed to run on all CPUs. */
  12.245 +static void __init early_cpu_detect(void)
  12.246 +{
  12.247 +	struct cpuinfo_x86 *c = &boot_cpu_data;
  12.248 +
  12.249 +	c->x86_cache_alignment = 32;
  12.250 +
  12.251 +	if (!have_cpuid_p())
  12.252 +		return;
  12.253 +
  12.254 +	/* Get vendor name */
  12.255 +	cpuid(0x00000000, &c->cpuid_level,
  12.256 +	      (int *)&c->x86_vendor_id[0],
  12.257 +	      (int *)&c->x86_vendor_id[8],
  12.258 +	      (int *)&c->x86_vendor_id[4]);
  12.259 +
  12.260 +	get_cpu_vendor(c, 1);
  12.261 +
  12.262 +	c->x86 = 4;
  12.263 +	if (c->cpuid_level >= 0x00000001) {
  12.264 +		u32 junk, tfms, cap0, misc;
  12.265 +		cpuid(0x00000001, &tfms, &misc, &junk, &cap0);
  12.266 +		c->x86 = (tfms >> 8) & 15;
  12.267 +		c->x86_model = (tfms >> 4) & 15;
  12.268 +		if (c->x86 == 0xf)
  12.269 +			c->x86 += (tfms >> 20) & 0xff;
  12.270 +		if (c->x86 >= 0x6)
  12.271 +			c->x86_model += ((tfms >> 16) & 0xF) << 4;
  12.272 +		c->x86_mask = tfms & 15;
  12.273 +		if (cap0 & (1<<19))
  12.274 +			c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8;
  12.275 +	}
  12.276 +}
  12.277 +
  12.278 +void __cpuinit generic_identify(struct cpuinfo_x86 * c)
  12.279 +{
  12.280 +	u32 tfms, xlvl;
  12.281 +	int ebx;
  12.282 +
  12.283 +	if (have_cpuid_p()) {
  12.284 +		/* Get vendor name */
  12.285 +		cpuid(0x00000000, &c->cpuid_level,
  12.286 +		      (int *)&c->x86_vendor_id[0],
  12.287 +		      (int *)&c->x86_vendor_id[8],
  12.288 +		      (int *)&c->x86_vendor_id[4]);
  12.289 +		
  12.290 +		get_cpu_vendor(c, 0);
  12.291 +		/* Initialize the standard set of capabilities */
  12.292 +		/* Note that the vendor-specific code below might override */
  12.293 +	
  12.294 +		/* Intel-defined flags: level 0x00000001 */
  12.295 +		if ( c->cpuid_level >= 0x00000001 ) {
  12.296 +			u32 capability, excap;
  12.297 +			cpuid(0x00000001, &tfms, &ebx, &excap, &capability);
  12.298 +			c->x86_capability[0] = capability;
  12.299 +			c->x86_capability[4] = excap;
  12.300 +			c->x86 = (tfms >> 8) & 15;
  12.301 +			c->x86_model = (tfms >> 4) & 15;
  12.302 +			if (c->x86 == 0xf)
  12.303 +				c->x86 += (tfms >> 20) & 0xff;
  12.304 +			if (c->x86 >= 0x6)
  12.305 +				c->x86_model += ((tfms >> 16) & 0xF) << 4;
  12.306 +			c->x86_mask = tfms & 15;
  12.307 +#ifdef CONFIG_X86_HT
  12.308 +			c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0);
  12.309 +#else
  12.310 +			c->apicid = (ebx >> 24) & 0xFF;
  12.311 +#endif
  12.312 +		} else {
  12.313 +			/* Have CPUID level 0 only - unheard of */
  12.314 +			c->x86 = 4;
  12.315 +		}
  12.316 +
  12.317 +		/* AMD-defined flags: level 0x80000001 */
  12.318 +		xlvl = cpuid_eax(0x80000000);
  12.319 +		if ( (xlvl & 0xffff0000) == 0x80000000 ) {
  12.320 +			if ( xlvl >= 0x80000001 ) {
  12.321 +				c->x86_capability[1] = cpuid_edx(0x80000001);
  12.322 +				c->x86_capability[6] = cpuid_ecx(0x80000001);
  12.323 +			}
  12.324 +			if ( xlvl >= 0x80000004 )
  12.325 +				get_model_name(c); /* Default name */
  12.326 +		}
  12.327 +	}
  12.328 +
  12.329 +	early_intel_workaround(c);
  12.330 +
  12.331 +#ifdef CONFIG_X86_HT
  12.332 +	c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff;
  12.333 +#endif
  12.334 +}
  12.335 +
  12.336 +static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
  12.337 +{
  12.338 +	if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr ) {
  12.339 +		/* Disable processor serial number */
  12.340 +		unsigned long lo,hi;
  12.341 +		rdmsr(MSR_IA32_BBL_CR_CTL,lo,hi);
  12.342 +		lo |= 0x200000;
  12.343 +		wrmsr(MSR_IA32_BBL_CR_CTL,lo,hi);
  12.344 +		printk(KERN_NOTICE "CPU serial number disabled.\n");
  12.345 +		clear_bit(X86_FEATURE_PN, c->x86_capability);
  12.346 +
  12.347 +		/* Disabling the serial number may affect the cpuid level */
  12.348 +		c->cpuid_level = cpuid_eax(0);
  12.349 +	}
  12.350 +}
  12.351 +
  12.352 +static int __init x86_serial_nr_setup(char *s)
  12.353 +{
  12.354 +	disable_x86_serial_nr = 0;
  12.355 +	return 1;
  12.356 +}
  12.357 +__setup("serialnumber", x86_serial_nr_setup);
  12.358 +
  12.359 +
  12.360 +
  12.361 +/*
  12.362 + * This does the hard work of actually picking apart the CPU stuff...
  12.363 + */
  12.364 +void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
  12.365 +{
  12.366 +	int i;
  12.367 +
  12.368 +	c->loops_per_jiffy = loops_per_jiffy;
  12.369 +	c->x86_cache_size = -1;
  12.370 +	c->x86_vendor = X86_VENDOR_UNKNOWN;
  12.371 +	c->cpuid_level = -1;	/* CPUID not detected */
  12.372 +	c->x86_model = c->x86_mask = 0;	/* So far unknown... */
  12.373 +	c->x86_vendor_id[0] = '\0'; /* Unset */
  12.374 +	c->x86_model_id[0] = '\0';  /* Unset */
  12.375 +	c->x86_max_cores = 1;
  12.376 +	memset(&c->x86_capability, 0, sizeof c->x86_capability);
  12.377 +
  12.378 +	if (!have_cpuid_p()) {
  12.379 +		/* First of all, decide if this is a 486 or higher */
  12.380 +		/* It's a 486 if we can modify the AC flag */
  12.381 +		if ( flag_is_changeable_p(X86_EFLAGS_AC) )
  12.382 +			c->x86 = 4;
  12.383 +		else
  12.384 +			c->x86 = 3;
  12.385 +	}
  12.386 +
  12.387 +	generic_identify(c);
  12.388 +
  12.389 +	printk(KERN_DEBUG "CPU: After generic identify, caps:");
  12.390 +	for (i = 0; i < NCAPINTS; i++)
  12.391 +		printk(" %08lx", c->x86_capability[i]);
  12.392 +	printk("\n");
  12.393 +
  12.394 +	if (this_cpu->c_identify) {
  12.395 +		this_cpu->c_identify(c);
  12.396 +
  12.397 +		printk(KERN_DEBUG "CPU: After vendor identify, caps:");
  12.398 +		for (i = 0; i < NCAPINTS; i++)
  12.399 +			printk(" %08lx", c->x86_capability[i]);
  12.400 +		printk("\n");
  12.401 +	}
  12.402 +
  12.403 +	/*
  12.404 +	 * Vendor-specific initialization.  In this section we
  12.405 +	 * canonicalize the feature flags, meaning if there are
  12.406 +	 * features a certain CPU supports which CPUID doesn't
  12.407 +	 * tell us, CPUID claiming incorrect flags, or other bugs,
  12.408 +	 * we handle them here.
  12.409 +	 *
  12.410 +	 * At the end of this section, c->x86_capability better
  12.411 +	 * indicate the features this CPU genuinely supports!
  12.412 +	 */
  12.413 +	if (this_cpu->c_init)
  12.414 +		this_cpu->c_init(c);
  12.415 +
  12.416 +	/* Disable the PN if appropriate */
  12.417 +	squash_the_stupid_serial_number(c);
  12.418 +
  12.419 +	/*
  12.420 +	 * The vendor-specific functions might have changed features.  Now
  12.421 +	 * we do "generic changes."
  12.422 +	 */
  12.423 +
  12.424 +	/* TSC disabled? */
  12.425 +	if ( tsc_disable )
  12.426 +		clear_bit(X86_FEATURE_TSC, c->x86_capability);
  12.427 +
  12.428 +	/* FXSR disabled? */
  12.429 +	if (disable_x86_fxsr) {
  12.430 +		clear_bit(X86_FEATURE_FXSR, c->x86_capability);
  12.431 +		clear_bit(X86_FEATURE_XMM, c->x86_capability);
  12.432 +	}
  12.433 +
  12.434 +	/* SEP disabled? */
  12.435 +	if (disable_x86_sep)
  12.436 +		clear_bit(X86_FEATURE_SEP, c->x86_capability);
  12.437 +
  12.438 +	if (disable_pse)
  12.439 +		clear_bit(X86_FEATURE_PSE, c->x86_capability);
  12.440 +
  12.441 +	/* If the model name is still unset, do table lookup. */
  12.442 +	if ( !c->x86_model_id[0] ) {
  12.443 +		char *p;
  12.444 +		p = table_lookup_model(c);
  12.445 +		if ( p )
  12.446 +			strcpy(c->x86_model_id, p);
  12.447 +		else
  12.448 +			/* Last resort... */
  12.449 +			sprintf(c->x86_model_id, "%02x/%02x",
  12.450 +				c->x86, c->x86_model);
  12.451 +	}
  12.452 +
  12.453 +	/* Now the feature flags better reflect actual CPU features! */
  12.454 +
  12.455 +	printk(KERN_DEBUG "CPU: After all inits, caps:");
  12.456 +	for (i = 0; i < NCAPINTS; i++)
  12.457 +		printk(" %08lx", c->x86_capability[i]);
  12.458 +	printk("\n");
  12.459 +
  12.460 +	/*
  12.461 +	 * On SMP, boot_cpu_data holds the common feature set between
  12.462 +	 * all CPUs; so make sure that we indicate which features are
  12.463 +	 * common between the CPUs.  The first time this routine gets
  12.464 +	 * executed, c == &boot_cpu_data.
  12.465 +	 */
  12.466 +	if ( c != &boot_cpu_data ) {
  12.467 +		/* AND the already accumulated flags with these */
  12.468 +		for ( i = 0 ; i < NCAPINTS ; i++ )
  12.469 +			boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
  12.470 +	}
  12.471 +
  12.472 +	/* Init Machine Check Exception if available. */
  12.473 +	mcheck_init(c);
  12.474 +
  12.475 +	if (c == &boot_cpu_data)
  12.476 +		sysenter_setup();
  12.477 +	enable_sep_cpu();
  12.478 +
  12.479 +	if (c == &boot_cpu_data)
  12.480 +		mtrr_bp_init();
  12.481 +	else
  12.482 +		mtrr_ap_init();
  12.483 +}
  12.484 +
  12.485 +#ifdef CONFIG_X86_HT
  12.486 +void __cpuinit detect_ht(struct cpuinfo_x86 *c)
  12.487 +{
  12.488 +	u32 	eax, ebx, ecx, edx;
  12.489 +	int 	index_msb, core_bits;
  12.490 +
  12.491 +	cpuid(1, &eax, &ebx, &ecx, &edx);
  12.492 +
  12.493 +	if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
  12.494 +		return;
  12.495 +
  12.496 +	smp_num_siblings = (ebx & 0xff0000) >> 16;
  12.497 +
  12.498 +	if (smp_num_siblings == 1) {
  12.499 +		printk(KERN_INFO  "CPU: Hyper-Threading is disabled\n");
  12.500 +	} else if (smp_num_siblings > 1 ) {
  12.501 +
  12.502 +		if (smp_num_siblings > NR_CPUS) {
  12.503 +			printk(KERN_WARNING "CPU: Unsupported number of the "
  12.504 +					"siblings %d", smp_num_siblings);
  12.505 +			smp_num_siblings = 1;
  12.506 +			return;
  12.507 +		}
  12.508 +
  12.509 +		index_msb = get_count_order(smp_num_siblings);
  12.510 +		c->phys_proc_id = phys_pkg_id((ebx >> 24) & 0xFF, index_msb);
  12.511 +
  12.512 +		printk(KERN_INFO  "CPU: Physical Processor ID: %d\n",
  12.513 +		       c->phys_proc_id);
  12.514 +
  12.515 +		smp_num_siblings = smp_num_siblings / c->x86_max_cores;
  12.516 +
  12.517 +		index_msb = get_count_order(smp_num_siblings) ;
  12.518 +
  12.519 +		core_bits = get_count_order(c->x86_max_cores);
  12.520 +
  12.521 +		c->cpu_core_id = phys_pkg_id((ebx >> 24) & 0xFF, index_msb) &
  12.522 +					       ((1 << core_bits) - 1);
  12.523 +
  12.524 +		if (c->x86_max_cores > 1)
  12.525 +			printk(KERN_INFO  "CPU: Processor Core ID: %d\n",
  12.526 +			       c->cpu_core_id);
  12.527 +	}
  12.528 +}
  12.529 +#endif
  12.530 +
  12.531 +void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
  12.532 +{
  12.533 +	char *vendor = NULL;
  12.534 +
  12.535 +	if (c->x86_vendor < X86_VENDOR_NUM)
  12.536 +		vendor = this_cpu->c_vendor;
  12.537 +	else if (c->cpuid_level >= 0)
  12.538 +		vendor = c->x86_vendor_id;
  12.539 +
  12.540 +	if (vendor && strncmp(c->x86_model_id, vendor, strlen(vendor)))
  12.541 +		printk("%s ", vendor);
  12.542 +
  12.543 +	if (!c->x86_model_id[0])
  12.544 +		printk("%d86", c->x86);
  12.545 +	else
  12.546 +		printk("%s", c->x86_model_id);
  12.547 +
  12.548 +	if (c->x86_mask || c->cpuid_level >= 0) 
  12.549 +		printk(" stepping %02x\n", c->x86_mask);
  12.550 +	else
  12.551 +		printk("\n");
  12.552 +}
  12.553 +
  12.554 +cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
  12.555 +
  12.556 +/* This is hacky. :)
  12.557 + * We're emulating future behavior.
  12.558 + * In the future, the cpu-specific init functions will be called implicitly
  12.559 + * via the magic of initcalls.
  12.560 + * They will insert themselves into the cpu_devs structure.
  12.561 + * Then, when cpu_init() is called, we can just iterate over that array.
  12.562 + */
  12.563 +
  12.564 +extern int intel_cpu_init(void);
  12.565 +extern int cyrix_init_cpu(void);
  12.566 +extern int nsc_init_cpu(void);
  12.567 +extern int amd_init_cpu(void);
  12.568 +extern int centaur_init_cpu(void);
  12.569 +extern int transmeta_init_cpu(void);
  12.570 +extern int rise_init_cpu(void);
  12.571 +extern int nexgen_init_cpu(void);
  12.572 +extern int umc_init_cpu(void);
  12.573 +
  12.574 +void __init early_cpu_init(void)
  12.575 +{
  12.576 +	intel_cpu_init();
  12.577 +	cyrix_init_cpu();
  12.578 +	nsc_init_cpu();
  12.579 +	amd_init_cpu();
  12.580 +	centaur_init_cpu();
  12.581 +	transmeta_init_cpu();
  12.582 +	rise_init_cpu();
  12.583 +	nexgen_init_cpu();
  12.584 +	umc_init_cpu();
  12.585 +	early_cpu_detect();
  12.586 +
  12.587 +#ifdef CONFIG_DEBUG_PAGEALLOC
  12.588 +	/* pse is not compatible with on-the-fly unmapping,
  12.589 +	 * disable it even if the cpus claim to support it.
  12.590 +	 */
  12.591 +	clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
  12.592 +	disable_pse = 1;
  12.593 +#endif
  12.594 +}
  12.595 +
  12.596 +void __cpuinit cpu_gdt_init(struct Xgt_desc_struct *gdt_descr)
  12.597 +{
  12.598 +	unsigned long frames[16];
  12.599 +	unsigned long va;
  12.600 +	int f;
  12.601 +
  12.602 +	for (va = gdt_descr->address, f = 0;
  12.603 +	     va < gdt_descr->address + gdt_descr->size;
  12.604 +	     va += PAGE_SIZE, f++) {
  12.605 +		frames[f] = virt_to_mfn(va);
  12.606 +		make_lowmem_page_readonly(
  12.607 +			(void *)va, XENFEAT_writable_descriptor_tables);
  12.608 +	}
  12.609 +	if (HYPERVISOR_set_gdt(frames, gdt_descr->size / 8))
  12.610 +		BUG();
  12.611 +}
  12.612 +
  12.613 +/*
  12.614 + * cpu_init() initializes state that is per-CPU. Some data is already
  12.615 + * initialized (naturally) in the bootstrap process, such as the GDT
  12.616 + * and IDT. We reload them nevertheless, this function acts as a
  12.617 + * 'CPU state barrier', nothing should get across.
  12.618 + */
  12.619 +void __cpuinit cpu_init(void)
  12.620 +{
  12.621 +	int cpu = smp_processor_id();
  12.622 +#ifndef CONFIG_X86_NO_TSS
  12.623 +	struct tss_struct * t = &per_cpu(init_tss, cpu);
  12.624 +#endif
  12.625 +	struct thread_struct *thread = &current->thread;
  12.626 +	struct desc_struct *gdt;
  12.627 +	struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
  12.628 +
  12.629 +	if (cpu_test_and_set(cpu, cpu_initialized)) {
  12.630 +		printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
  12.631 +		for (;;) local_irq_enable();
  12.632 +	}
  12.633 +	printk(KERN_INFO "Initializing CPU#%d\n", cpu);
  12.634 +
  12.635 +	if (cpu_has_vme || cpu_has_de)
  12.636 +		clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
  12.637 +	if (tsc_disable && cpu_has_tsc) {
  12.638 +		printk(KERN_NOTICE "Disabling TSC...\n");
  12.639 +		/**** FIX-HPA: DOES THIS REALLY BELONG HERE? ****/
  12.640 +		clear_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability);
  12.641 +		set_in_cr4(X86_CR4_TSD);
  12.642 +	}
  12.643 +
  12.644 +#ifndef CONFIG_XEN
  12.645 +	/* The CPU hotplug case */
  12.646 +	if (cpu_gdt_descr->address) {
  12.647 +		gdt = (struct desc_struct *)cpu_gdt_descr->address;
  12.648 +		memset(gdt, 0, PAGE_SIZE);
  12.649 +		goto old_gdt;
  12.650 +	}
  12.651 +	/*
  12.652 +	 * This is a horrible hack to allocate the GDT.  The problem
  12.653 +	 * is that cpu_init() is called really early for the boot CPU
  12.654 +	 * (and hence needs bootmem) but much later for the secondary
  12.655 +	 * CPUs, when bootmem will have gone away
  12.656 +	 */
  12.657 +	if (NODE_DATA(0)->bdata->node_bootmem_map) {
  12.658 +		gdt = (struct desc_struct *)alloc_bootmem_pages(PAGE_SIZE);
  12.659 +		/* alloc_bootmem_pages panics on failure, so no check */
  12.660 +		memset(gdt, 0, PAGE_SIZE);
  12.661 +	} else {
  12.662 +		gdt = (struct desc_struct *)get_zeroed_page(GFP_KERNEL);
  12.663 +		if (unlikely(!gdt)) {
  12.664 +			printk(KERN_CRIT "CPU%d failed to allocate GDT\n", cpu);
  12.665 +			for (;;)
  12.666 +				local_irq_enable();
  12.667 +		}
  12.668 +	}
  12.669 +old_gdt:
  12.670 +	/*
  12.671 +	 * Initialize the per-CPU GDT with the boot GDT,
  12.672 +	 * and set up the GDT descriptor:
  12.673 +	 */
  12.674 + 	memcpy(gdt, cpu_gdt_table, GDT_SIZE);
  12.675 +
  12.676 +	/* Set up GDT entry for 16bit stack */
  12.677 + 	*(__u64 *)(&gdt[GDT_ENTRY_ESPFIX_SS]) |=
  12.678 +		((((__u64)stk16_off) << 16) & 0x000000ffffff0000ULL) |
  12.679 +		((((__u64)stk16_off) << 32) & 0xff00000000000000ULL) |
  12.680 +		(CPU_16BIT_STACK_SIZE - 1);
  12.681 +
  12.682 +	cpu_gdt_descr->size = GDT_SIZE - 1;
  12.683 + 	cpu_gdt_descr->address = (unsigned long)gdt;
  12.684 +#else
  12.685 +	if (cpu == 0 && cpu_gdt_descr->address == 0) {
  12.686 +		gdt = (struct desc_struct *)alloc_bootmem_pages(PAGE_SIZE);
  12.687 +		/* alloc_bootmem_pages panics on failure, so no check */
  12.688 +		memset(gdt, 0, PAGE_SIZE);
  12.689 +
  12.690 +		memcpy(gdt, cpu_gdt_table, GDT_SIZE);
  12.691 +		
  12.692 +		cpu_gdt_descr->size = GDT_SIZE;
  12.693 +		cpu_gdt_descr->address = (unsigned long)gdt;
  12.694 +	}
  12.695 +#endif
  12.696 +
  12.697 +	cpu_gdt_init(cpu_gdt_descr);
  12.698 +
  12.699 +	/*
  12.700 +	 * Set up and load the per-CPU TSS and LDT
  12.701 +	 */
  12.702 +	atomic_inc(&init_mm.mm_count);
  12.703 +	current->active_mm = &init_mm;
  12.704 +	if (current->mm)
  12.705 +		BUG();
  12.706 +	enter_lazy_tlb(&init_mm, current);
  12.707 +
  12.708 +	load_esp0(t, thread);
  12.709 +
  12.710 +	load_LDT(&init_mm.context);
  12.711 +
  12.712 +#ifdef CONFIG_DOUBLEFAULT
  12.713 +	/* Set up doublefault TSS pointer in the GDT */
  12.714 +	__set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
  12.715 +#endif
  12.716 +
  12.717 +	/* Clear %fs and %gs. */
  12.718 +	asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs");
  12.719 +
  12.720 +	/* Clear all 6 debug registers: */
  12.721 +	set_debugreg(0, 0);
  12.722 +	set_debugreg(0, 1);
  12.723 +	set_debugreg(0, 2);
  12.724 +	set_debugreg(0, 3);
  12.725 +	set_debugreg(0, 6);
  12.726 +	set_debugreg(0, 7);
  12.727 +
  12.728 +	/*
  12.729 +	 * Force FPU initialization:
  12.730 +	 */
  12.731 +	current_thread_info()->status = 0;
  12.732 +	clear_used_math();
  12.733 +	mxcsr_feature_mask_init();
  12.734 +}
  12.735 +
  12.736 +#ifdef CONFIG_HOTPLUG_CPU
  12.737 +void __cpuinit cpu_uninit(void)
  12.738 +{
  12.739 +	int cpu = raw_smp_processor_id();
  12.740 +	cpu_clear(cpu, cpu_initialized);
  12.741 +
  12.742 +	/* lazy TLB state */
  12.743 +	per_cpu(cpu_tlbstate, cpu).state = 0;
  12.744 +	per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm;
  12.745 +}
  12.746 +#endif
    13.1 --- a/arch/i386/kernel/cpu/mtrr/Makefile	Mon Jun 04 10:05:24 2007 +0100
    13.2 +++ b/arch/i386/kernel/cpu/mtrr/Makefile	Mon Jun 04 10:05:28 2007 +0100
    13.3 @@ -3,3 +3,10 @@ obj-y		+= amd.o
    13.4  obj-y		+= cyrix.o
    13.5  obj-y		+= centaur.o
    13.6  
    13.7 +ifdef CONFIG_XEN
    13.8 +include $(srctree)/scripts/Makefile.xen
    13.9 +n-obj-xen := generic.o state.o amd.o cyrix.o centaur.o
   13.10 +
   13.11 +obj-y := $(call filterxen, $(obj-y), $(n-obj-xen))
   13.12 +obj-y := $(call cherrypickxen, $(obj-y))
   13.13 +endif
    14.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    14.2 +++ b/arch/i386/kernel/cpu/mtrr/main-xen.c	Mon Jun 04 10:05:28 2007 +0100
    14.3 @@ -0,0 +1,197 @@
    14.4 +#include <linux/init.h>
    14.5 +#include <linux/proc_fs.h>
    14.6 +#include <linux/ctype.h>
    14.7 +#include <linux/module.h>
    14.8 +#include <linux/seq_file.h>
    14.9 +#include <asm/uaccess.h>
   14.10 +#include <linux/mutex.h>
   14.11 +
   14.12 +#include <asm/mtrr.h>
   14.13 +#include "mtrr.h"
   14.14 +
   14.15 +static DEFINE_MUTEX(mtrr_mutex);
   14.16 +
   14.17 +void generic_get_mtrr(unsigned int reg, unsigned long *base,
   14.18 +		      unsigned int *size, mtrr_type * type)
   14.19 +{
   14.20 +	struct xen_platform_op op;
   14.21 +
   14.22 +	op.cmd = XENPF_read_memtype;
   14.23 +	op.u.read_memtype.reg = reg;
   14.24 +	(void)HYPERVISOR_platform_op(&op);
   14.25 +
   14.26 +	*size = op.u.read_memtype.nr_mfns;
   14.27 +	*base = op.u.read_memtype.mfn;
   14.28 +	*type = op.u.read_memtype.type;
   14.29 +}
   14.30 +
   14.31 +struct mtrr_ops generic_mtrr_ops = {
   14.32 +	.use_intel_if      = 1,
   14.33 +	.get               = generic_get_mtrr,
   14.34 +};
   14.35 +
   14.36 +struct mtrr_ops *mtrr_if = &generic_mtrr_ops;
   14.37 +unsigned int num_var_ranges;
   14.38 +unsigned int *usage_table;
   14.39 +
   14.40 +static void __init set_num_var_ranges(void)
   14.41 +{
   14.42 +	struct xen_platform_op op;
   14.43 +
   14.44 +	for (num_var_ranges = 0; ; num_var_ranges++) {
   14.45 +		op.cmd = XENPF_read_memtype;
   14.46 +		op.u.read_memtype.reg = num_var_ranges;
   14.47 +		if (HYPERVISOR_platform_op(&op) != 0)
   14.48 +			break;
   14.49 +	}
   14.50 +}
   14.51 +
   14.52 +static void __init init_table(void)
   14.53 +{
   14.54 +	int i, max;
   14.55 +
   14.56 +	max = num_var_ranges;
   14.57 +	if ((usage_table = kmalloc(max * sizeof *usage_table, GFP_KERNEL))
   14.58 +	    == NULL) {
   14.59 +		printk(KERN_ERR "mtrr: could not allocate\n");
   14.60 +		return;
   14.61 +	}
   14.62 +	for (i = 0; i < max; i++)
   14.63 +		usage_table[i] = 0;
   14.64 +}
   14.65 +
   14.66 +int mtrr_add_page(unsigned long base, unsigned long size, 
   14.67 +		  unsigned int type, char increment)
   14.68 +{
   14.69 +	int error;
   14.70 +	struct xen_platform_op op;
   14.71 +
   14.72 +	mutex_lock(&mtrr_mutex);
   14.73 +
   14.74 +	op.cmd = XENPF_add_memtype;
   14.75 +	op.u.add_memtype.mfn     = base;
   14.76 +	op.u.add_memtype.nr_mfns = size;
   14.77 +	op.u.add_memtype.type    = type;
   14.78 +	error = HYPERVISOR_platform_op(&op);
   14.79 +	if (error) {
   14.80 +		mutex_unlock(&mtrr_mutex);
   14.81 +		BUG_ON(error > 0);
   14.82 +		return error;
   14.83 +	}
   14.84 +
   14.85 +	if (increment)
   14.86 +		++usage_table[op.u.add_memtype.reg];
   14.87 +
   14.88 +	mutex_unlock(&mtrr_mutex);
   14.89 +
   14.90 +	return op.u.add_memtype.reg;
   14.91 +}
   14.92 +
   14.93 +static int mtrr_check(unsigned long base, unsigned long size)
   14.94 +{
   14.95 +	if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
   14.96 +		printk(KERN_WARNING
   14.97 +			"mtrr: size and base must be multiples of 4 kiB\n");
   14.98 +		printk(KERN_DEBUG
   14.99 +			"mtrr: size: 0x%lx  base: 0x%lx\n", size, base);
  14.100 +		dump_stack();
  14.101 +		return -1;
  14.102 +	}
  14.103 +	return 0;
  14.104 +}
  14.105 +
  14.106 +int
  14.107 +mtrr_add(unsigned long base, unsigned long size, unsigned int type,
  14.108 +	 char increment)
  14.109 +{
  14.110 +	if (mtrr_check(base, size))
  14.111 +		return -EINVAL;
  14.112 +	return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type,
  14.113 +			     increment);
  14.114 +}
  14.115 +
  14.116 +int mtrr_del_page(int reg, unsigned long base, unsigned long size)
  14.117 +{
  14.118 +	unsigned i;
  14.119 +	mtrr_type ltype;
  14.120 +	unsigned long lbase;
  14.121 +	unsigned int lsize;
  14.122 +	int error = -EINVAL;
  14.123 +	struct xen_platform_op op;
  14.124 +
  14.125 +	mutex_lock(&mtrr_mutex);
  14.126 +
  14.127 +	if (reg < 0) {
  14.128 +		/*  Search for existing MTRR  */
  14.129 +		for (i = 0; i < num_var_ranges; ++i) {
  14.130 +			mtrr_if->get(i, &lbase, &lsize, &ltype);
  14.131 +			if (lbase == base && lsize == size) {
  14.132 +				reg = i;
  14.133 +				break;
  14.134 +			}
  14.135 +		}
  14.136 +		if (reg < 0) {
  14.137 +			printk(KERN_DEBUG "mtrr: no MTRR for %lx000,%lx000 found\n", base,
  14.138 +			       size);
  14.139 +			goto out;
  14.140 +		}
  14.141 +	}
  14.142 +	if (usage_table[reg] < 1) {
  14.143 +		printk(KERN_WARNING "mtrr: reg: %d has count=0\n", reg);
  14.144 +		goto out;
  14.145 +	}
  14.146 +	if (--usage_table[reg] < 1) {
  14.147 +		op.cmd = XENPF_del_memtype;
  14.148 +		op.u.del_memtype.handle = 0;
  14.149 +		op.u.del_memtype.reg    = reg;
  14.150 +		error = HYPERVISOR_platform_op(&op);
  14.151 +		if (error) {
  14.152 +			BUG_ON(error > 0);
  14.153 +			goto out;
  14.154 +		}
  14.155 +	}
  14.156 +	error = reg;
  14.157 + out:
  14.158 +	mutex_unlock(&mtrr_mutex);
  14.159 +	return error;
  14.160 +}
  14.161 +
  14.162 +int
  14.163 +mtrr_del(int reg, unsigned long base, unsigned long size)
  14.164 +{
  14.165 +	if (mtrr_check(base, size))
  14.166 +		return -EINVAL;
  14.167 +	return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT);
  14.168 +}
  14.169 +
  14.170 +EXPORT_SYMBOL(mtrr_add);
  14.171 +EXPORT_SYMBOL(mtrr_del);
  14.172 +
  14.173 +void __init mtrr_bp_init(void)
  14.174 +{
  14.175 +}
  14.176 +
  14.177 +void mtrr_ap_init(void)
  14.178 +{
  14.179 +}
  14.180 +
  14.181 +static int __init mtrr_init(void)
  14.182 +{
  14.183 +	struct cpuinfo_x86 *c = &boot_cpu_data;
  14.184 +
  14.185 +	if (!is_initial_xendomain())
  14.186 +		return -ENODEV;
  14.187 +
  14.188 +	if ((!cpu_has(c, X86_FEATURE_MTRR)) &&
  14.189 +	    (!cpu_has(c, X86_FEATURE_K6_MTRR)) &&
  14.190 +	    (!cpu_has(c, X86_FEATURE_CYRIX_ARR)) &&
  14.191 +	    (!cpu_has(c, X86_FEATURE_CENTAUR_MCR)))
  14.192 +		return -ENODEV;
  14.193 +
  14.194 +	set_num_var_ranges();
  14.195 +	init_table();
  14.196 +
  14.197 +	return 0;
  14.198 +}
  14.199 +
  14.200 +subsys_initcall(mtrr_init);
    15.1 --- a/arch/i386/kernel/crash.c	Mon Jun 04 10:05:24 2007 +0100
    15.2 +++ b/arch/i386/kernel/crash.c	Mon Jun 04 10:05:28 2007 +0100
    15.3 @@ -90,6 +90,7 @@ static void crash_save_self(struct pt_re
    15.4  	crash_save_this_cpu(regs, cpu);
    15.5  }
    15.6  
    15.7 +#ifndef CONFIG_XEN
    15.8  #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
    15.9  static atomic_t waiting_for_crash_ipi;
   15.10  
   15.11 @@ -154,6 +155,7 @@ static void nmi_shootdown_cpus(void)
   15.12  	/* There are no cpus to shootdown */
   15.13  }
   15.14  #endif
   15.15 +#endif /* CONFIG_XEN */
   15.16  
   15.17  void machine_crash_shutdown(struct pt_regs *regs)
   15.18  {
   15.19 @@ -170,10 +172,12 @@ void machine_crash_shutdown(struct pt_re
   15.20  
   15.21  	/* Make a note of crashing cpu. Will be used in NMI callback.*/
   15.22  	crashing_cpu = smp_processor_id();
   15.23 +#ifndef CONFIG_XEN
   15.24  	nmi_shootdown_cpus();
   15.25  	lapic_shutdown();
   15.26  #if defined(CONFIG_X86_IO_APIC)
   15.27  	disable_IO_APIC();
   15.28  #endif
   15.29 +#endif /* CONFIG_XEN */
   15.30  	crash_save_self(regs);
   15.31  }
    16.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    16.2 +++ b/arch/i386/kernel/early_printk-xen.c	Mon Jun 04 10:05:28 2007 +0100
    16.3 @@ -0,0 +1,2 @@
    16.4 +
    16.5 +#include "../../x86_64/kernel/early_printk-xen.c"
    17.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    17.2 +++ b/arch/i386/kernel/entry-xen.S	Mon Jun 04 10:05:28 2007 +0100
    17.3 @@ -0,0 +1,1216 @@
    17.4 +/*
    17.5 + *  linux/arch/i386/entry.S
    17.6 + *
    17.7 + *  Copyright (C) 1991, 1992  Linus Torvalds
    17.8 + */
    17.9 +
   17.10 +/*
   17.11 + * entry.S contains the system-call and fault low-level handling routines.
   17.12 + * This also contains the timer-interrupt handler, as well as all interrupts
   17.13 + * and faults that can result in a task-switch.
   17.14 + *
   17.15 + * NOTE: This code handles signal-recognition, which happens every time
   17.16 + * after a timer-interrupt and after each system call.
   17.17 + *
   17.18 + * I changed all the .align's to 4 (16 byte alignment), as that's faster
   17.19 + * on a 486.
   17.20 + *
   17.21 + * Stack layout in 'ret_from_system_call':
   17.22 + * 	ptrace needs to have all regs on the stack.
   17.23 + *	if the order here is changed, it needs to be
   17.24 + *	updated in fork.c:copy_process, signal.c:do_signal,
   17.25 + *	ptrace.c and ptrace.h
   17.26 + *
   17.27 + *	 0(%esp) - %ebx
   17.28 + *	 4(%esp) - %ecx
   17.29 + *	 8(%esp) - %edx
   17.30 + *       C(%esp) - %esi
   17.31 + *	10(%esp) - %edi
   17.32 + *	14(%esp) - %ebp
   17.33 + *	18(%esp) - %eax
   17.34 + *	1C(%esp) - %ds
   17.35 + *	20(%esp) - %es
   17.36 + *	24(%esp) - orig_eax
   17.37 + *	28(%esp) - %eip
   17.38 + *	2C(%esp) - %cs
   17.39 + *	30(%esp) - %eflags
   17.40 + *	34(%esp) - %oldesp
   17.41 + *	38(%esp) - %oldss
   17.42 + *
   17.43 + * "current" is in register %ebx during any slow entries.
   17.44 + */
   17.45 +
   17.46 +#include <linux/linkage.h>
   17.47 +#include <asm/thread_info.h>
   17.48 +#include <asm/irqflags.h>
   17.49 +#include <asm/errno.h>
   17.50 +#include <asm/segment.h>
   17.51 +#include <asm/smp.h>
   17.52 +#include <asm/page.h>
   17.53 +#include <asm/desc.h>
   17.54 +#include <asm/dwarf2.h>
   17.55 +#include "irq_vectors.h"
   17.56 +#include <xen/interface/xen.h>
   17.57 +
   17.58 +#define nr_syscalls ((syscall_table_size)/4)
   17.59 +
   17.60 +EBX		= 0x00
   17.61 +ECX		= 0x04
   17.62 +EDX		= 0x08
   17.63 +ESI		= 0x0C
   17.64 +EDI		= 0x10
   17.65 +EBP		= 0x14
   17.66 +EAX		= 0x18
   17.67 +DS		= 0x1C
   17.68 +ES		= 0x20
   17.69 +ORIG_EAX	= 0x24
   17.70 +EIP		= 0x28
   17.71 +CS		= 0x2C
   17.72 +EFLAGS		= 0x30
   17.73 +OLDESP		= 0x34
   17.74 +OLDSS		= 0x38
   17.75 +
   17.76 +CF_MASK		= 0x00000001
   17.77 +TF_MASK		= 0x00000100
   17.78 +IF_MASK		= 0x00000200
   17.79 +DF_MASK		= 0x00000400 
   17.80 +NT_MASK		= 0x00004000
   17.81 +VM_MASK		= 0x00020000
   17.82 +/* Pseudo-eflags. */
   17.83 +NMI_MASK	= 0x80000000
   17.84 +
   17.85 +#ifndef CONFIG_XEN
   17.86 +#define DISABLE_INTERRUPTS	cli
   17.87 +#define ENABLE_INTERRUPTS	sti
   17.88 +#else
   17.89 +/* Offsets into shared_info_t. */
   17.90 +#define evtchn_upcall_pending		/* 0 */
   17.91 +#define evtchn_upcall_mask		1
   17.92 +
   17.93 +#define sizeof_vcpu_shift		6
   17.94 +
   17.95 +#ifdef CONFIG_SMP
   17.96 +#define GET_VCPU_INFO		movl TI_cpu(%ebp),%esi			; \
   17.97 +				shl  $sizeof_vcpu_shift,%esi		; \
   17.98 +				addl HYPERVISOR_shared_info,%esi
   17.99 +#else
  17.100 +#define GET_VCPU_INFO		movl HYPERVISOR_shared_info,%esi
  17.101 +#endif
  17.102 +
  17.103 +#define __DISABLE_INTERRUPTS	movb $1,evtchn_upcall_mask(%esi)
  17.104 +#define __ENABLE_INTERRUPTS	movb $0,evtchn_upcall_mask(%esi)
  17.105 +#define DISABLE_INTERRUPTS	GET_VCPU_INFO				; \
  17.106 +				__DISABLE_INTERRUPTS
  17.107 +#define ENABLE_INTERRUPTS	GET_VCPU_INFO				; \
  17.108 +				__ENABLE_INTERRUPTS
  17.109 +#define __TEST_PENDING		testb $0xFF,evtchn_upcall_pending(%esi)
  17.110 +#endif
  17.111 +
  17.112 +#ifdef CONFIG_PREEMPT
  17.113 +#define preempt_stop		cli; TRACE_IRQS_OFF
  17.114 +#else
  17.115 +#define preempt_stop
  17.116 +#define resume_kernel		restore_nocheck
  17.117 +#endif
  17.118 +
  17.119 +.macro TRACE_IRQS_IRET
  17.120 +#ifdef CONFIG_TRACE_IRQFLAGS
  17.121 +	testl $IF_MASK,EFLAGS(%esp)     # interrupts off?
  17.122 +	jz 1f
  17.123 +	TRACE_IRQS_ON
  17.124 +1:
  17.125 +#endif
  17.126 +.endm
  17.127 +
  17.128 +#ifdef CONFIG_VM86
  17.129 +#define resume_userspace_sig	check_userspace
  17.130 +#else
  17.131 +#define resume_userspace_sig	resume_userspace
  17.132 +#endif
  17.133 +
  17.134 +#define SAVE_ALL \
  17.135 +	cld; \
  17.136 +	pushl %es; \
  17.137 +	CFI_ADJUST_CFA_OFFSET 4;\
  17.138 +	/*CFI_REL_OFFSET es, 0;*/\
  17.139 +	pushl %ds; \
  17.140 +	CFI_ADJUST_CFA_OFFSET 4;\
  17.141 +	/*CFI_REL_OFFSET ds, 0;*/\
  17.142 +	pushl %eax; \
  17.143 +	CFI_ADJUST_CFA_OFFSET 4;\
  17.144 +	CFI_REL_OFFSET eax, 0;\
  17.145 +	pushl %ebp; \
  17.146 +	CFI_ADJUST_CFA_OFFSET 4;\
  17.147 +	CFI_REL_OFFSET ebp, 0;\
  17.148 +	pushl %edi; \
  17.149 +	CFI_ADJUST_CFA_OFFSET 4;\
  17.150 +	CFI_REL_OFFSET edi, 0;\
  17.151 +	pushl %esi; \
  17.152 +	CFI_ADJUST_CFA_OFFSET 4;\
  17.153 +	CFI_REL_OFFSET esi, 0;\
  17.154 +	pushl %edx; \
  17.155 +	CFI_ADJUST_CFA_OFFSET 4;\
  17.156 +	CFI_REL_OFFSET edx, 0;\
  17.157 +	pushl %ecx; \
  17.158 +	CFI_ADJUST_CFA_OFFSET 4;\
  17.159 +	CFI_REL_OFFSET ecx, 0;\
  17.160 +	pushl %ebx; \
  17.161 +	CFI_ADJUST_CFA_OFFSET 4;\
  17.162 +	CFI_REL_OFFSET ebx, 0;\
  17.163 +	movl $(__USER_DS), %edx; \
  17.164 +	movl %edx, %ds; \
  17.165 +	movl %edx, %es;
  17.166 +
  17.167 +#define RESTORE_INT_REGS \
  17.168 +	popl %ebx;	\
  17.169 +	CFI_ADJUST_CFA_OFFSET -4;\
  17.170 +	CFI_RESTORE ebx;\
  17.171 +	popl %ecx;	\
  17.172 +	CFI_ADJUST_CFA_OFFSET -4;\
  17.173 +	CFI_RESTORE ecx;\
  17.174 +	popl %edx;	\
  17.175 +	CFI_ADJUST_CFA_OFFSET -4;\
  17.176 +	CFI_RESTORE edx;\
  17.177 +	popl %esi;	\
  17.178 +	CFI_ADJUST_CFA_OFFSET -4;\
  17.179 +	CFI_RESTORE esi;\
  17.180 +	popl %edi;	\
  17.181 +	CFI_ADJUST_CFA_OFFSET -4;\
  17.182 +	CFI_RESTORE edi;\
  17.183 +	popl %ebp;	\
  17.184 +	CFI_ADJUST_CFA_OFFSET -4;\
  17.185 +	CFI_RESTORE ebp;\
  17.186 +	popl %eax;	\
  17.187 +	CFI_ADJUST_CFA_OFFSET -4;\
  17.188 +	CFI_RESTORE eax
  17.189 +
  17.190 +#define RESTORE_REGS	\
  17.191 +	RESTORE_INT_REGS; \
  17.192 +1:	popl %ds;	\
  17.193 +	CFI_ADJUST_CFA_OFFSET -4;\
  17.194 +	/*CFI_RESTORE ds;*/\
  17.195 +2:	popl %es;	\
  17.196 +	CFI_ADJUST_CFA_OFFSET -4;\
  17.197 +	/*CFI_RESTORE es;*/\
  17.198 +.section .fixup,"ax";	\
  17.199 +3:	movl $0,(%esp);	\
  17.200 +	jmp 1b;		\
  17.201 +4:	movl $0,(%esp);	\
  17.202 +	jmp 2b;		\
  17.203 +.previous;		\
  17.204 +.section __ex_table,"a";\
  17.205 +	.align 4;	\
  17.206 +	.long 1b,3b;	\
  17.207 +	.long 2b,4b;	\
  17.208 +.previous
  17.209 +
  17.210 +#define RING0_INT_FRAME \
  17.211 +	CFI_STARTPROC simple;\
  17.212 +	CFI_DEF_CFA esp, 3*4;\
  17.213 +	/*CFI_OFFSET cs, -2*4;*/\
  17.214 +	CFI_OFFSET eip, -3*4
  17.215 +
  17.216 +#define RING0_EC_FRAME \
  17.217 +	CFI_STARTPROC simple;\
  17.218 +	CFI_DEF_CFA esp, 4*4;\
  17.219 +	/*CFI_OFFSET cs, -2*4;*/\
  17.220 +	CFI_OFFSET eip, -3*4
  17.221 +
  17.222 +#define RING0_PTREGS_FRAME \
  17.223 +	CFI_STARTPROC simple;\
  17.224 +	CFI_DEF_CFA esp, OLDESP-EBX;\
  17.225 +	/*CFI_OFFSET cs, CS-OLDESP;*/\
  17.226 +	CFI_OFFSET eip, EIP-OLDESP;\
  17.227 +	/*CFI_OFFSET es, ES-OLDESP;*/\
  17.228 +	/*CFI_OFFSET ds, DS-OLDESP;*/\
  17.229 +	CFI_OFFSET eax, EAX-OLDESP;\
  17.230 +	CFI_OFFSET ebp, EBP-OLDESP;\
  17.231 +	CFI_OFFSET edi, EDI-OLDESP;\
  17.232 +	CFI_OFFSET esi, ESI-OLDESP;\
  17.233 +	CFI_OFFSET edx, EDX-OLDESP;\
  17.234 +	CFI_OFFSET ecx, ECX-OLDESP;\
  17.235 +	CFI_OFFSET ebx, EBX-OLDESP
  17.236 +
  17.237 +ENTRY(ret_from_fork)
  17.238 +	CFI_STARTPROC
  17.239 +	pushl %eax
  17.240 +	CFI_ADJUST_CFA_OFFSET 4
  17.241 +	call schedule_tail
  17.242 +	GET_THREAD_INFO(%ebp)
  17.243 +	popl %eax
  17.244 +	CFI_ADJUST_CFA_OFFSET -4
  17.245 +	pushl $0x0202			# Reset kernel eflags
  17.246 +	CFI_ADJUST_CFA_OFFSET 4
  17.247 +	popfl
  17.248 +	CFI_ADJUST_CFA_OFFSET -4
  17.249 +	jmp syscall_exit
  17.250 +	CFI_ENDPROC
  17.251 +
  17.252 +/*
  17.253 + * Return to user mode is not as complex as all this looks,
  17.254 + * but we want the default path for a system call return to
  17.255 + * go as quickly as possible which is why some of this is
  17.256 + * less clear than it otherwise should be.
  17.257 + */
  17.258 +
  17.259 +	# userspace resumption stub bypassing syscall exit tracing
  17.260 +	ALIGN
  17.261 +	RING0_PTREGS_FRAME
  17.262 +ret_from_exception:
  17.263 +	preempt_stop
  17.264 +ret_from_intr:
  17.265 +	GET_THREAD_INFO(%ebp)
  17.266 +check_userspace:
  17.267 +	movl EFLAGS(%esp), %eax		# mix EFLAGS and CS
  17.268 +	movb CS(%esp), %al
  17.269 +	testl $(VM_MASK | 2), %eax
  17.270 +	jz resume_kernel
  17.271 +ENTRY(resume_userspace)
  17.272 +	DISABLE_INTERRUPTS		# make sure we don't miss an interrupt
  17.273 +					# setting need_resched or sigpending
  17.274 +					# between sampling and the iret
  17.275 +	movl TI_flags(%ebp), %ecx
  17.276 +	andl $_TIF_WORK_MASK, %ecx	# is there any work to be done on
  17.277 +					# int/exception return?
  17.278 +	jne work_pending
  17.279 +	jmp restore_all
  17.280 +
  17.281 +#ifdef CONFIG_PREEMPT
  17.282 +ENTRY(resume_kernel)
  17.283 +	cli
  17.284 +	cmpl $0,TI_preempt_count(%ebp)	# non-zero preempt_count ?
  17.285 +	jnz restore_nocheck
  17.286 +need_resched:
  17.287 +	movl TI_flags(%ebp), %ecx	# need_resched set ?
  17.288 +	testb $_TIF_NEED_RESCHED, %cl
  17.289 +	jz restore_all
  17.290 +	testl $IF_MASK,EFLAGS(%esp)     # interrupts off (exception path) ?
  17.291 +	jz restore_all
  17.292 +	call preempt_schedule_irq
  17.293 +	jmp need_resched
  17.294 +#endif
  17.295 +	CFI_ENDPROC
  17.296 +
  17.297 +/* SYSENTER_RETURN points to after the "sysenter" instruction in
  17.298 +   the vsyscall page.  See vsyscall-sysentry.S, which defines the symbol.  */
  17.299 +
  17.300 +	# sysenter call handler stub
  17.301 +ENTRY(sysenter_entry)
  17.302 +	CFI_STARTPROC simple
  17.303 +	CFI_DEF_CFA esp, 0
  17.304 +	CFI_REGISTER esp, ebp
  17.305 +	movl SYSENTER_stack_esp0(%esp),%esp
  17.306 +sysenter_past_esp:
  17.307 +	/*
  17.308 +	 * No need to follow this irqs on/off section: the syscall
  17.309 +	 * disabled irqs and here we enable it straight after entry:
  17.310 +	 */
  17.311 +	sti
  17.312 +	pushl $(__USER_DS)
  17.313 +	CFI_ADJUST_CFA_OFFSET 4
  17.314 +	/*CFI_REL_OFFSET ss, 0*/
  17.315 +	pushl %ebp
  17.316 +	CFI_ADJUST_CFA_OFFSET 4
  17.317 +	CFI_REL_OFFSET esp, 0
  17.318 +	pushfl
  17.319 +	CFI_ADJUST_CFA_OFFSET 4
  17.320 +	pushl $(__USER_CS)
  17.321 +	CFI_ADJUST_CFA_OFFSET 4
  17.322 +	/*CFI_REL_OFFSET cs, 0*/
  17.323 +	/*
  17.324 +	 * Push current_thread_info()->sysenter_return to the stack.
  17.325 +	 * A tiny bit of offset fixup is necessary - 4*4 means the 4 words
  17.326 +	 * pushed above; +8 corresponds to copy_thread's esp0 setting.
  17.327 +	 */
  17.328 +	pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp)
  17.329 +	CFI_ADJUST_CFA_OFFSET 4
  17.330 +	CFI_REL_OFFSET eip, 0
  17.331 +
  17.332 +/*
  17.333 + * Load the potential sixth argument from user stack.
  17.334 + * Careful about security.
  17.335 + */
  17.336 +	cmpl $__PAGE_OFFSET-3,%ebp
  17.337 +	jae syscall_fault
  17.338 +1:	movl (%ebp),%ebp
  17.339 +.section __ex_table,"a"
  17.340 +	.align 4
  17.341 +	.long 1b,syscall_fault
  17.342 +.previous
  17.343 +
  17.344 +	pushl %eax
  17.345 +	CFI_ADJUST_CFA_OFFSET 4
  17.346 +	SAVE_ALL
  17.347 +	GET_THREAD_INFO(%ebp)
  17.348 +
  17.349 +	/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
  17.350 +	testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
  17.351 +	jnz syscall_trace_entry
  17.352 +	cmpl $(nr_syscalls), %eax
  17.353 +	jae syscall_badsys
  17.354 +	call *sys_call_table(,%eax,4)
  17.355 +	movl %eax,EAX(%esp)
  17.356 +	DISABLE_INTERRUPTS
  17.357 +	TRACE_IRQS_OFF
  17.358 +	movl TI_flags(%ebp), %ecx
  17.359 +	testw $_TIF_ALLWORK_MASK, %cx
  17.360 +	jne syscall_exit_work
  17.361 +/* if something modifies registers it must also disable sysexit */
  17.362 +	movl EIP(%esp), %edx
  17.363 +	movl OLDESP(%esp), %ecx
  17.364 +	xorl %ebp,%ebp
  17.365 +#ifdef CONFIG_XEN
  17.366 +	TRACE_IRQS_ON
  17.367 +	__ENABLE_INTERRUPTS
  17.368 +sysexit_scrit:	/**** START OF SYSEXIT CRITICAL REGION ****/
  17.369 +	__TEST_PENDING
  17.370 +	jnz  14f			# process more events if necessary...
  17.371 +	movl ESI(%esp), %esi
  17.372 +	sysexit
  17.373 +14:	__DISABLE_INTERRUPTS
  17.374 +	TRACE_IRQS_OFF
  17.375 +sysexit_ecrit:	/**** END OF SYSEXIT CRITICAL REGION ****/
  17.376 +	push %esp
  17.377 +	call evtchn_do_upcall
  17.378 +	add  $4,%esp
  17.379 +	jmp  ret_from_intr
  17.380 +#else
  17.381 +	TRACE_IRQS_ON
  17.382 +	sti
  17.383 +	sysexit
  17.384 +#endif /* !CONFIG_XEN */
  17.385 +	CFI_ENDPROC
  17.386 +
  17.387 +
  17.388 +	# system call handler stub
  17.389 +ENTRY(system_call)
  17.390 +	RING0_INT_FRAME			# can't unwind into user space anyway
  17.391 +	pushl %eax			# save orig_eax
  17.392 +	CFI_ADJUST_CFA_OFFSET 4
  17.393 +	SAVE_ALL
  17.394 +	GET_THREAD_INFO(%ebp)
  17.395 +	testl $TF_MASK,EFLAGS(%esp)
  17.396 +	jz no_singlestep
  17.397 +	orl $_TIF_SINGLESTEP,TI_flags(%ebp)
  17.398 +no_singlestep:
  17.399 +					# system call tracing in operation / emulation
  17.400 +	/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
  17.401 +	testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
  17.402 +	jnz syscall_trace_entry
  17.403 +	cmpl $(nr_syscalls), %eax
  17.404 +	jae syscall_badsys
  17.405 +syscall_call:
  17.406 +	call *sys_call_table(,%eax,4)
  17.407 +	movl %eax,EAX(%esp)		# store the return value
  17.408 +syscall_exit:
  17.409 +	DISABLE_INTERRUPTS		# make sure we don't miss an interrupt
  17.410 +					# setting need_resched or sigpending
  17.411 +					# between sampling and the iret
  17.412 +	TRACE_IRQS_OFF
  17.413 +	movl TI_flags(%ebp), %ecx
  17.414 +	testw $_TIF_ALLWORK_MASK, %cx	# current->work
  17.415 +	jne syscall_exit_work
  17.416 +
  17.417 +restore_all:
  17.418 +#ifndef CONFIG_XEN
  17.419 +	movl EFLAGS(%esp), %eax		# mix EFLAGS, SS and CS
  17.420 +	# Warning: OLDSS(%esp) contains the wrong/random values if we
  17.421 +	# are returning to the kernel.
  17.422 +	# See comments in process.c:copy_thread() for details.
  17.423 +	movb OLDSS(%esp), %ah
  17.424 +	movb CS(%esp), %al
  17.425 +	andl $(VM_MASK | (4 << 8) | 3), %eax
  17.426 +	cmpl $((4 << 8) | 3), %eax
  17.427 +	CFI_REMEMBER_STATE
  17.428 +	je ldt_ss			# returning to user-space with LDT SS
  17.429 +restore_nocheck:
  17.430 +#else
  17.431 +restore_nocheck:
  17.432 +	movl EFLAGS(%esp), %eax
  17.433 +	testl $(VM_MASK|NMI_MASK), %eax
  17.434 +	CFI_REMEMBER_STATE
  17.435 +	jnz hypervisor_iret
  17.436 +	shr $9, %eax			# EAX[0] == IRET_EFLAGS.IF
  17.437 +	GET_VCPU_INFO
  17.438 +	andb evtchn_upcall_mask(%esi),%al
  17.439 +	andb $1,%al			# EAX[0] == IRET_EFLAGS.IF & event_mask
  17.440 +	CFI_REMEMBER_STATE
  17.441 +	jnz restore_all_enable_events	#        != 0 => enable event delivery
  17.442 +#endif
  17.443 +	TRACE_IRQS_IRET
  17.444 +restore_nocheck_notrace:
  17.445 +	RESTORE_REGS
  17.446 +	addl $4, %esp
  17.447 +	CFI_ADJUST_CFA_OFFSET -4
  17.448 +1:	iret
  17.449 +.section .fixup,"ax"
  17.450 +iret_exc:
  17.451 +#ifndef CONFIG_XEN
  17.452 +	TRACE_IRQS_ON
  17.453 +	sti
  17.454 +#endif
  17.455 +	pushl $0			# no error code
  17.456 +	pushl $do_iret_error
  17.457 +	jmp error_code
  17.458 +.previous
  17.459 +.section __ex_table,"a"
  17.460 +	.align 4
  17.461 +	.long 1b,iret_exc
  17.462 +.previous
  17.463 +
  17.464 +	CFI_RESTORE_STATE
  17.465 +#ifndef CONFIG_XEN
  17.466 +ldt_ss:
  17.467 +	larl OLDSS(%esp), %eax
  17.468 +	jnz restore_nocheck
  17.469 +	testl $0x00400000, %eax		# returning to 32bit stack?
  17.470 +	jnz restore_nocheck		# allright, normal return
  17.471 +	/* If returning to userspace with 16bit stack,
  17.472 +	 * try to fix the higher word of ESP, as the CPU
  17.473 +	 * won't restore it.
  17.474 +	 * This is an "official" bug of all the x86-compatible
  17.475 +	 * CPUs, which we can try to work around to make
  17.476 +	 * dosemu and wine happy. */
  17.477 +	subl $8, %esp		# reserve space for switch16 pointer
  17.478 +	CFI_ADJUST_CFA_OFFSET 8
  17.479 +	cli
  17.480 +	TRACE_IRQS_OFF
  17.481 +	movl %esp, %eax
  17.482 +	/* Set up the 16bit stack frame with switch32 pointer on top,
  17.483 +	 * and a switch16 pointer on top of the current frame. */
  17.484 +	call setup_x86_bogus_stack
  17.485 +	CFI_ADJUST_CFA_OFFSET -8	# frame has moved
  17.486 +	TRACE_IRQS_IRET
  17.487 +	RESTORE_REGS
  17.488 +	lss 20+4(%esp), %esp	# switch to 16bit stack
  17.489 +1:	iret
  17.490 +.section __ex_table,"a"
  17.491 +	.align 4
  17.492 +	.long 1b,iret_exc
  17.493 +.previous
  17.494 +#else
  17.495 +        ALIGN
  17.496 +restore_all_enable_events:
  17.497 +	TRACE_IRQS_ON
  17.498 +	__ENABLE_INTERRUPTS
  17.499 +scrit:	/**** START OF CRITICAL REGION ****/
  17.500 +	__TEST_PENDING
  17.501 +	jnz  14f			# process more events if necessary...
  17.502 +	RESTORE_REGS
  17.503 +	addl $4, %esp
  17.504 +	CFI_ADJUST_CFA_OFFSET -4
  17.505 +1:	iret
  17.506 +.section __ex_table,"a"
  17.507 +	.align 4
  17.508 +	.long 1b,iret_exc
  17.509 +.previous
  17.510 +14:	__DISABLE_INTERRUPTS
  17.511 +	TRACE_IRQS_OFF
  17.512 +	jmp  11f
  17.513 +ecrit:  /**** END OF CRITICAL REGION ****/
  17.514 +
  17.515 +	CFI_RESTORE_STATE
  17.516 +hypervisor_iret:
  17.517 +	andl $~NMI_MASK, EFLAGS(%esp)
  17.518 +	RESTORE_REGS
  17.519 +	addl $4, %esp
  17.520 +	CFI_ADJUST_CFA_OFFSET -4
  17.521 +	jmp  hypercall_page + (__HYPERVISOR_iret * 32)
  17.522 +#endif
  17.523 +	CFI_ENDPROC
  17.524 +
  17.525 +	# perform work that needs to be done immediately before resumption
  17.526 +	ALIGN
  17.527 +	RING0_PTREGS_FRAME		# can't unwind into user space anyway
  17.528 +work_pending:
  17.529 +	testb $_TIF_NEED_RESCHED, %cl
  17.530 +	jz work_notifysig
  17.531 +work_resched:
  17.532 +	call schedule
  17.533 +	DISABLE_INTERRUPTS		# make sure we don't miss an interrupt
  17.534 +					# setting need_resched or sigpending
  17.535 +					# between sampling and the iret
  17.536 +	TRACE_IRQS_OFF
  17.537 +	movl TI_flags(%ebp), %ecx
  17.538 +	andl $_TIF_WORK_MASK, %ecx	# is there any work to be done other
  17.539 +					# than syscall tracing?
  17.540 +	jz restore_all
  17.541 +	testb $_TIF_NEED_RESCHED, %cl
  17.542 +	jnz work_resched
  17.543 +
  17.544 +work_notifysig:				# deal with pending signals and
  17.545 +					# notify-resume requests
  17.546 +	testl $VM_MASK, EFLAGS(%esp)
  17.547 +	movl %esp, %eax
  17.548 +	jne work_notifysig_v86		# returning to kernel-space or
  17.549 +					# vm86-space
  17.550 +	xorl %edx, %edx
  17.551 +	call do_notify_resume
  17.552 +	jmp resume_userspace_sig
  17.553 +
  17.554 +	ALIGN
  17.555 +work_notifysig_v86:
  17.556 +#ifdef CONFIG_VM86
  17.557 +	pushl %ecx			# save ti_flags for do_notify_resume
  17.558 +	CFI_ADJUST_CFA_OFFSET 4
  17.559 +	call save_v86_state		# %eax contains pt_regs pointer
  17.560 +	popl %ecx
  17.561 +	CFI_ADJUST_CFA_OFFSET -4
  17.562 +	movl %eax, %esp
  17.563 +	xorl %edx, %edx
  17.564 +	call do_notify_resume
  17.565 +	jmp resume_userspace_sig
  17.566 +#endif
  17.567 +
  17.568 +	# perform syscall exit tracing
  17.569 +	ALIGN
  17.570 +syscall_trace_entry:
  17.571 +	movl $-ENOSYS,EAX(%esp)
  17.572 +	movl %esp, %eax
  17.573 +	xorl %edx,%edx
  17.574 +	call do_syscall_trace
  17.575 +	cmpl $0, %eax
  17.576 +	jne resume_userspace		# ret != 0 -> running under PTRACE_SYSEMU,
  17.577 +					# so must skip actual syscall
  17.578 +	movl ORIG_EAX(%esp), %eax
  17.579 +	cmpl $(nr_syscalls), %eax
  17.580 +	jnae syscall_call
  17.581 +	jmp syscall_exit
  17.582 +
  17.583 +	# perform syscall exit tracing
  17.584 +	ALIGN
  17.585 +syscall_exit_work:
  17.586 +	testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl
  17.587 +	jz work_pending
  17.588 +	TRACE_IRQS_ON
  17.589 +	ENABLE_INTERRUPTS		# could let do_syscall_trace() call
  17.590 +					# schedule() instead
  17.591 +	movl %esp, %eax
  17.592 +	movl $1, %edx
  17.593 +	call do_syscall_trace
  17.594 +	jmp resume_userspace
  17.595 +	CFI_ENDPROC
  17.596 +
  17.597 +	RING0_INT_FRAME			# can't unwind into user space anyway
  17.598 +syscall_fault:
  17.599 +	pushl %eax			# save orig_eax
  17.600 +	CFI_ADJUST_CFA_OFFSET 4
  17.601 +	SAVE_ALL
  17.602 +	GET_THREAD_INFO(%ebp)
  17.603 +	movl $-EFAULT,EAX(%esp)
  17.604 +	jmp resume_userspace
  17.605 +
  17.606 +syscall_badsys:
  17.607 +	movl $-ENOSYS,EAX(%esp)
  17.608 +	jmp resume_userspace
  17.609 +	CFI_ENDPROC
  17.610 +
  17.611 +#ifndef CONFIG_XEN
  17.612 +#define FIXUP_ESPFIX_STACK \
  17.613 +	movl %esp, %eax; \
  17.614 +	/* switch to 32bit stack using the pointer on top of 16bit stack */ \
  17.615 +	lss %ss:CPU_16BIT_STACK_SIZE-8, %esp; \
  17.616 +	/* copy data from 16bit stack to 32bit stack */ \
  17.617 +	call fixup_x86_bogus_stack; \
  17.618 +	/* put ESP to the proper location */ \
  17.619 +	movl %eax, %esp;
  17.620 +#define UNWIND_ESPFIX_STACK \
  17.621 +	pushl %eax; \
  17.622 +	CFI_ADJUST_CFA_OFFSET 4; \
  17.623 +	movl %ss, %eax; \
  17.624 +	/* see if on 16bit stack */ \
  17.625 +	cmpw $__ESPFIX_SS, %ax; \
  17.626 +	je 28f; \
  17.627 +27:	popl %eax; \
  17.628 +	CFI_ADJUST_CFA_OFFSET -4; \
  17.629 +.section .fixup,"ax"; \
  17.630 +28:	movl $__KERNEL_DS, %eax; \
  17.631 +	movl %eax, %ds; \
  17.632 +	movl %eax, %es; \
  17.633 +	/* switch to 32bit stack */ \
  17.634 +	FIXUP_ESPFIX_STACK; \
  17.635 +	jmp 27b; \
  17.636 +.previous
  17.637 +
  17.638 +/*
  17.639 + * Build the entry stubs and pointer table with
  17.640 + * some assembler magic.
  17.641 + */
  17.642 +.data
  17.643 +ENTRY(interrupt)
  17.644 +.text
  17.645 +
  17.646 +vector=0
  17.647 +ENTRY(irq_entries_start)
  17.648 +	RING0_INT_FRAME
  17.649 +.rept NR_IRQS
  17.650 +	ALIGN
  17.651 + .if vector
  17.652 +	CFI_ADJUST_CFA_OFFSET -4
  17.653 + .endif
  17.654 +1:	pushl $~(vector)
  17.655 +	CFI_ADJUST_CFA_OFFSET 4
  17.656 +	jmp common_interrupt
  17.657 +.data
  17.658 +	.long 1b
  17.659 +.text
  17.660 +vector=vector+1
  17.661 +.endr
  17.662 +
  17.663 +/*
  17.664 + * the CPU automatically disables interrupts when executing an IRQ vector,
  17.665 + * so IRQ-flags tracing has to follow that:
  17.666 + */
  17.667 +	ALIGN
  17.668 +common_interrupt:
  17.669 +	SAVE_ALL
  17.670 +	TRACE_IRQS_OFF
  17.671 +	movl %esp,%eax
  17.672 +	call do_IRQ
  17.673 +	jmp ret_from_intr
  17.674 +	CFI_ENDPROC
  17.675 +
  17.676 +#define BUILD_INTERRUPT(name, nr)	\
  17.677 +ENTRY(name)				\
  17.678 +	RING0_INT_FRAME;		\
  17.679 +	pushl $~(nr);			\
  17.680 +	CFI_ADJUST_CFA_OFFSET 4;	\
  17.681 +	SAVE_ALL;			\
  17.682 +	TRACE_IRQS_OFF			\
  17.683 +	movl %esp,%eax;			\
  17.684 +	call smp_/**/name;		\
  17.685 +	jmp ret_from_intr;		\
  17.686 +	CFI_ENDPROC
  17.687 +
  17.688 +/* The include is where all of the SMP etc. interrupts come from */
  17.689 +#include "entry_arch.h"
  17.690 +#else
  17.691 +#define UNWIND_ESPFIX_STACK
  17.692 +#endif
  17.693 +
  17.694 +ENTRY(divide_error)
  17.695 +	RING0_INT_FRAME
  17.696 +	pushl $0			# no error code
  17.697 +	CFI_ADJUST_CFA_OFFSET 4
  17.698 +	pushl $do_divide_error
  17.699 +	CFI_ADJUST_CFA_OFFSET 4
  17.700 +	ALIGN
  17.701 +error_code:
  17.702 +	pushl %ds
  17.703 +	CFI_ADJUST_CFA_OFFSET 4
  17.704 +	/*CFI_REL_OFFSET ds, 0*/
  17.705 +	pushl %eax
  17.706 +	CFI_ADJUST_CFA_OFFSET 4
  17.707 +	CFI_REL_OFFSET eax, 0
  17.708 +	xorl %eax, %eax
  17.709 +	pushl %ebp
  17.710 +	CFI_ADJUST_CFA_OFFSET 4
  17.711 +	CFI_REL_OFFSET ebp, 0
  17.712 +	pushl %edi
  17.713 +	CFI_ADJUST_CFA_OFFSET 4
  17.714 +	CFI_REL_OFFSET edi, 0
  17.715 +	pushl %esi
  17.716 +	CFI_ADJUST_CFA_OFFSET 4
  17.717 +	CFI_REL_OFFSET esi, 0
  17.718 +	pushl %edx
  17.719 +	CFI_ADJUST_CFA_OFFSET 4
  17.720 +	CFI_REL_OFFSET edx, 0
  17.721 +	decl %eax			# eax = -1
  17.722 +	pushl %ecx
  17.723 +	CFI_ADJUST_CFA_OFFSET 4
  17.724 +	CFI_REL_OFFSET ecx, 0
  17.725 +	pushl %ebx
  17.726 +	CFI_ADJUST_CFA_OFFSET 4
  17.727 +	CFI_REL_OFFSET ebx, 0
  17.728 +	cld
  17.729 +	pushl %es
  17.730 +	CFI_ADJUST_CFA_OFFSET 4
  17.731 +	/*CFI_REL_OFFSET es, 0*/
  17.732 +	UNWIND_ESPFIX_STACK
  17.733 +	popl %ecx
  17.734 +	CFI_ADJUST_CFA_OFFSET -4
  17.735 +	/*CFI_REGISTER es, ecx*/
  17.736 +	movl ES(%esp), %edi		# get the function address
  17.737 +	movl ORIG_EAX(%esp), %edx	# get the error code
  17.738 +	movl %eax, ORIG_EAX(%esp)
  17.739 +	movl %ecx, ES(%esp)
  17.740 +	/*CFI_REL_OFFSET es, ES*/
  17.741 +	movl $(__USER_DS), %ecx
  17.742 +	movl %ecx, %ds
  17.743 +	movl %ecx, %es
  17.744 +	movl %esp,%eax			# pt_regs pointer
  17.745 +	call *%edi
  17.746 +	jmp ret_from_exception
  17.747 +	CFI_ENDPROC
  17.748 +
  17.749 +#ifdef CONFIG_XEN
  17.750 +# A note on the "critical region" in our callback handler.
  17.751 +# We want to avoid stacking callback handlers due to events occurring
  17.752 +# during handling of the last event. To do this, we keep events disabled
  17.753 +# until we've done all processing. HOWEVER, we must enable events before
  17.754 +# popping the stack frame (can't be done atomically) and so it would still
  17.755 +# be possible to get enough handler activations to overflow the stack.
  17.756 +# Although unlikely, bugs of that kind are hard to track down, so we'd
  17.757 +# like to avoid the possibility.
  17.758 +# So, on entry to the handler we detect whether we interrupted an
  17.759 +# existing activation in its critical region -- if so, we pop the current
  17.760 +# activation and restart the handler using the previous one.
  17.761 +#
  17.762 +# The sysexit critical region is slightly different. sysexit
  17.763 +# atomically removes the entire stack frame. If we interrupt in the
  17.764 +# critical region we know that the entire frame is present and correct
  17.765 +# so we can simply throw away the new one.
  17.766 +ENTRY(hypervisor_callback)
  17.767 +	RING0_INT_FRAME
  17.768 +	pushl %eax
  17.769 +	CFI_ADJUST_CFA_OFFSET 4
  17.770 +	SAVE_ALL
  17.771 +	movl EIP(%esp),%eax
  17.772 +	cmpl $scrit,%eax
  17.773 +	jb   11f
  17.774 +	cmpl $ecrit,%eax
  17.775 +	jb   critical_region_fixup
  17.776 +	cmpl $sysexit_scrit,%eax
  17.777 +	jb   11f
  17.778 +	cmpl $sysexit_ecrit,%eax
  17.779 +	ja   11f
  17.780 +	addl $OLDESP,%esp		# Remove eflags...ebx from stack frame.
  17.781 +11:	push %esp
  17.782 +	CFI_ADJUST_CFA_OFFSET 4
  17.783 +	call evtchn_do_upcall
  17.784 +	add  $4,%esp
  17.785 +	CFI_ADJUST_CFA_OFFSET -4
  17.786 +	jmp  ret_from_intr
  17.787 +	CFI_ENDPROC
  17.788 +
  17.789 +# [How we do the fixup]. We want to merge the current stack frame with the
  17.790 +# just-interrupted frame. How we do this depends on where in the critical
  17.791 +# region the interrupted handler was executing, and so how many saved
  17.792 +# registers are in each frame. We do this quickly using the lookup table
  17.793 +# 'critical_fixup_table'. For each byte offset in the critical region, it
  17.794 +# provides the number of bytes which have already been popped from the
  17.795 +# interrupted stack frame.
  17.796 +critical_region_fixup:
  17.797 +	movzbl critical_fixup_table-scrit(%eax),%ecx # %eax contains num bytes popped
  17.798 +	cmpb $0xff,%cl                  # 0xff => vcpu_info critical region
  17.799 +	jne  15f
  17.800 +	xorl %ecx,%ecx
  17.801 +15:	leal (%esp,%ecx),%esi		# %esi points at end of src region
  17.802 +	leal OLDESP(%esp),%edi		# %edi points at end of dst region
  17.803 +	shrl $2,%ecx			# convert words to bytes
  17.804 +	je   17f			# skip loop if nothing to copy
  17.805 +16:	subl $4,%esi			# pre-decrementing copy loop
  17.806 +	subl $4,%edi
  17.807 +	movl (%esi),%eax
  17.808 +	movl %eax,(%edi)
  17.809 +	loop 16b
  17.810 +17:	movl %edi,%esp			# final %edi is top of merged stack
  17.811 +	jmp  11b
  17.812 +
  17.813 +.section .rodata,"a"
  17.814 +critical_fixup_table:
  17.815 +	.byte 0xff,0xff,0xff		# testb $0xff,(%esi) = __TEST_PENDING
  17.816 +	.byte 0xff,0xff			# jnz  14f
  17.817 +	.byte 0x00			# pop  %ebx
  17.818 +	.byte 0x04			# pop  %ecx
  17.819 +	.byte 0x08			# pop  %edx
  17.820 +	.byte 0x0c			# pop  %esi
  17.821 +	.byte 0x10			# pop  %edi
  17.822 +	.byte 0x14			# pop  %ebp
  17.823 +	.byte 0x18			# pop  %eax
  17.824 +	.byte 0x1c			# pop  %ds
  17.825 +	.byte 0x20			# pop  %es
  17.826 +	.byte 0x24,0x24,0x24		# add  $4,%esp
  17.827 +	.byte 0x28			# iret
  17.828 +	.byte 0xff,0xff,0xff,0xff	# movb $1,1(%esi)
  17.829 +	.byte 0x00,0x00			# jmp  11b
  17.830 +.previous
  17.831 +
  17.832 +# Hypervisor uses this for application faults while it executes.
  17.833 +# We get here for two reasons:
  17.834 +#  1. Fault while reloading DS, ES, FS or GS
  17.835 +#  2. Fault while executing IRET
  17.836 +# Category 1 we fix up by reattempting the load, and zeroing the segment
  17.837 +# register if the load fails.
  17.838 +# Category 2 we fix up by jumping to do_iret_error. We cannot use the
  17.839 +# normal Linux return path in this case because if we use the IRET hypercall
  17.840 +# to pop the stack frame we end up in an infinite loop of failsafe callbacks.
  17.841 +# We distinguish between categories by maintaining a status value in EAX.
  17.842 +ENTRY(failsafe_callback)
  17.843 +	pushl %eax
  17.844 +	movl $1,%eax
  17.845 +1:	mov 4(%esp),%ds
  17.846 +2:	mov 8(%esp),%es
  17.847 +3:	mov 12(%esp),%fs
  17.848 +4:	mov 16(%esp),%gs
  17.849 +	testl %eax,%eax
  17.850 +	popl %eax
  17.851 +	jz 5f
  17.852 +	addl $16,%esp		# EAX != 0 => Category 2 (Bad IRET)
  17.853 +	jmp iret_exc
  17.854 +5:	addl $16,%esp		# EAX == 0 => Category 1 (Bad segment)
  17.855 +	RING0_INT_FRAME
  17.856 +	pushl $0
  17.857 +	SAVE_ALL
  17.858 +	jmp ret_from_exception
  17.859 +.section .fixup,"ax";		\
  17.860 +6:	xorl %eax,%eax;		\
  17.861 +	movl %eax,4(%esp);	\
  17.862 +	jmp 1b;			\
  17.863 +7:	xorl %eax,%eax;		\
  17.864 +	movl %eax,8(%esp);	\
  17.865 +	jmp 2b;			\
  17.866 +8:	xorl %eax,%eax;		\
  17.867 +	movl %eax,12(%esp);	\
  17.868 +	jmp 3b;			\
  17.869 +9:	xorl %eax,%eax;		\
  17.870 +	movl %eax,16(%esp);	\
  17.871 +	jmp 4b;			\
  17.872 +.previous;			\
  17.873 +.section __ex_table,"a";	\
  17.874 +	.align 4;		\
  17.875 +	.long 1b,6b;		\
  17.876 +	.long 2b,7b;		\
  17.877 +	.long 3b,8b;		\
  17.878 +	.long 4b,9b;		\
  17.879 +.previous
  17.880 +#endif
  17.881 +	CFI_ENDPROC
  17.882 +
  17.883 +ENTRY(coprocessor_error)
  17.884 +	RING0_INT_FRAME
  17.885 +	pushl $0
  17.886 +	CFI_ADJUST_CFA_OFFSET 4
  17.887 +	pushl $do_coprocessor_error
  17.888 +	CFI_ADJUST_CFA_OFFSET 4
  17.889 +	jmp error_code
  17.890 +	CFI_ENDPROC
  17.891 +
  17.892 +ENTRY(simd_coprocessor_error)
  17.893 +	RING0_INT_FRAME
  17.894 +	pushl $0
  17.895 +	CFI_ADJUST_CFA_OFFSET 4
  17.896 +	pushl $do_simd_coprocessor_error
  17.897 +	CFI_ADJUST_CFA_OFFSET 4
  17.898 +	jmp error_code
  17.899 +	CFI_ENDPROC
  17.900 +
  17.901 +ENTRY(device_not_available)
  17.902 +	RING0_INT_FRAME
  17.903 +	pushl $-1			# mark this as an int
  17.904 +	CFI_ADJUST_CFA_OFFSET 4
  17.905 +	SAVE_ALL
  17.906 +#ifndef CONFIG_XEN
  17.907 +	movl %cr0, %eax
  17.908 +	testl $0x4, %eax		# EM (math emulation bit)
  17.909 +	je device_available_emulate
  17.910 +	pushl $0			# temporary storage for ORIG_EIP
  17.911 +	CFI_ADJUST_CFA_OFFSET 4
  17.912 +	call math_emulate
  17.913 +	addl $4, %esp
  17.914 +	CFI_ADJUST_CFA_OFFSET -4
  17.915 +	jmp ret_from_exception
  17.916 +device_available_emulate:
  17.917 +#endif
  17.918 +	preempt_stop
  17.919 +	call math_state_restore
  17.920 +	jmp ret_from_exception
  17.921 +	CFI_ENDPROC
  17.922 +
  17.923 +#ifndef CONFIG_XEN
  17.924 +/*
  17.925 + * Debug traps and NMI can happen at the one SYSENTER instruction
  17.926 + * that sets up the real kernel stack. Check here, since we can't
  17.927 + * allow the wrong stack to be used.
  17.928 + *
  17.929 + * "SYSENTER_stack_esp0+12" is because the NMI/debug handler will have
  17.930 + * already pushed 3 words if it hits on the sysenter instruction:
  17.931 + * eflags, cs and eip.
  17.932 + *
  17.933 + * We just load the right stack, and push the three (known) values
  17.934 + * by hand onto the new stack - while updating the return eip past
  17.935 + * the instruction that would have done it for sysenter.
  17.936 + */
  17.937 +#define FIX_STACK(offset, ok, label)		\
  17.938 +	cmpw $__KERNEL_CS,4(%esp);		\
  17.939 +	jne ok;					\
  17.940 +label:						\
  17.941 +	movl SYSENTER_stack_esp0+offset(%esp),%esp;	\
  17.942 +	pushfl;					\
  17.943 +	pushl $__KERNEL_CS;			\
  17.944 +	pushl $sysenter_past_esp
  17.945 +#endif /* CONFIG_XEN */
  17.946 +
  17.947 +KPROBE_ENTRY(debug)
  17.948 +	RING0_INT_FRAME
  17.949 +#ifndef CONFIG_XEN
  17.950 +	cmpl $sysenter_entry,(%esp)
  17.951 +	jne debug_stack_correct
  17.952 +	FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn)
  17.953 +debug_stack_correct:
  17.954 +#endif /* !CONFIG_XEN */
  17.955 +	pushl $-1			# mark this as an int
  17.956 +	CFI_ADJUST_CFA_OFFSET 4
  17.957 +	SAVE_ALL
  17.958 +	xorl %edx,%edx			# error code 0
  17.959 +	movl %esp,%eax			# pt_regs pointer
  17.960 +	call do_debug
  17.961 +	jmp ret_from_exception
  17.962 +	CFI_ENDPROC
  17.963 +	.previous .text
  17.964 +#ifndef CONFIG_XEN
  17.965 +/*
  17.966 + * NMI is doubly nasty. It can happen _while_ we're handling
  17.967 + * a debug fault, and the debug fault hasn't yet been able to
  17.968 + * clear up the stack. So we first check whether we got  an
  17.969 + * NMI on the sysenter entry path, but after that we need to
  17.970 + * check whether we got an NMI on the debug path where the debug
  17.971 + * fault happened on the sysenter path.
  17.972 + */
  17.973 +ENTRY(nmi)
  17.974 +	RING0_INT_FRAME
  17.975 +	pushl %eax
  17.976 +	CFI_ADJUST_CFA_OFFSET 4
  17.977 +	movl %ss, %eax
  17.978 +	cmpw $__ESPFIX_SS, %ax
  17.979 +	popl %eax
  17.980 +	CFI_ADJUST_CFA_OFFSET -4
  17.981 +	je nmi_16bit_stack
  17.982 +	cmpl $sysenter_entry,(%esp)
  17.983 +	je nmi_stack_fixup
  17.984 +	pushl %eax
  17.985 +	CFI_ADJUST_CFA_OFFSET 4
  17.986 +	movl %esp,%eax
  17.987 +	/* Do not access memory above the end of our stack page,
  17.988 +	 * it might not exist.
  17.989 +	 */
  17.990 +	andl $(THREAD_SIZE-1),%eax
  17.991 +	cmpl $(THREAD_SIZE-20),%eax
  17.992 +	popl %eax
  17.993 +	CFI_ADJUST_CFA_OFFSET -4
  17.994 +	jae nmi_stack_correct
  17.995 +	cmpl $sysenter_entry,12(%esp)
  17.996 +	je nmi_debug_stack_check
  17.997 +nmi_stack_correct:
  17.998 +	pushl %eax
  17.999 +	CFI_ADJUST_CFA_OFFSET 4
 17.1000 +	SAVE_ALL
 17.1001 +	xorl %edx,%edx		# zero error code
 17.1002 +	movl %esp,%eax		# pt_regs pointer
 17.1003 +	call do_nmi
 17.1004 +	jmp restore_nocheck_notrace
 17.1005 +	CFI_ENDPROC
 17.1006 +
 17.1007 +nmi_stack_fixup:
 17.1008 +	FIX_STACK(12,nmi_stack_correct, 1)
 17.1009 +	jmp nmi_stack_correct
 17.1010 +nmi_debug_stack_check:
 17.1011 +	cmpw $__KERNEL_CS,16(%esp)
 17.1012 +	jne nmi_stack_correct
 17.1013 +	cmpl $debug,(%esp)
 17.1014 +	jb nmi_stack_correct
 17.1015 +	cmpl $debug_esp_fix_insn,(%esp)
 17.1016 +	ja nmi_stack_correct
 17.1017 +	FIX_STACK(24,nmi_stack_correct, 1)
 17.1018 +	jmp nmi_stack_correct
 17.1019 +
 17.1020 +nmi_16bit_stack:
 17.1021 +	RING0_INT_FRAME
 17.1022 +	/* create the pointer to lss back */
 17.1023 +	pushl %ss
 17.1024 +	CFI_ADJUST_CFA_OFFSET 4
 17.1025 +	pushl %esp
 17.1026 +	CFI_ADJUST_CFA_OFFSET 4
 17.1027 +	movzwl %sp, %esp
 17.1028 +	addw $4, (%esp)
 17.1029 +	/* copy the iret frame of 12 bytes */
 17.1030 +	.rept 3
 17.1031 +	pushl 16(%esp)
 17.1032 +	CFI_ADJUST_CFA_OFFSET 4
 17.1033 +	.endr
 17.1034 +	pushl %eax
 17.1035 +	CFI_ADJUST_CFA_OFFSET 4
 17.1036 +	SAVE_ALL
 17.1037 +	FIXUP_ESPFIX_STACK		# %eax == %esp
 17.1038 +	CFI_ADJUST_CFA_OFFSET -20	# the frame has now moved
 17.1039 +	xorl %edx,%edx			# zero error code
 17.1040 +	call do_nmi
 17.1041 +	RESTORE_REGS
 17.1042 +	lss 12+4(%esp), %esp		# back to 16bit stack
 17.1043 +1:	iret
 17.1044 +	CFI_ENDPROC
 17.1045 +.section __ex_table,"a"
 17.1046 +	.align 4
 17.1047 +	.long 1b,iret_exc
 17.1048 +.previous
 17.1049 +#else
 17.1050 +ENTRY(nmi)
 17.1051 +	RING0_INT_FRAME
 17.1052 +	pushl %eax
 17.1053 +	CFI_ADJUST_CFA_OFFSET 4
 17.1054 +	SAVE_ALL
 17.1055 +	xorl %edx,%edx		# zero error code
 17.1056 +	movl %esp,%eax		# pt_regs pointer
 17.1057 +	call do_nmi
 17.1058 +	orl  $NMI_MASK, EFLAGS(%esp)
 17.1059 +	jmp restore_all
 17.1060 +	CFI_ENDPROC
 17.1061 +#endif
 17.1062 +
 17.1063 +KPROBE_ENTRY(int3)
 17.1064 +	RING0_INT_FRAME
 17.1065 +	pushl $-1			# mark this as an int
 17.1066 +	CFI_ADJUST_CFA_OFFSET 4
 17.1067 +	SAVE_ALL
 17.1068 +	xorl %edx,%edx		# zero error code
 17.1069 +	movl %esp,%eax		# pt_regs pointer
 17.1070 +	call do_int3
 17.1071 +	jmp ret_from_exception
 17.1072 +	CFI_ENDPROC
 17.1073 +	.previous .text
 17.1074 +
 17.1075 +ENTRY(overflow)
 17.1076 +	RING0_INT_FRAME
 17.1077 +	pushl $0
 17.1078 +	CFI_ADJUST_CFA_OFFSET 4
 17.1079 +	pushl $do_overflow
 17.1080 +	CFI_ADJUST_CFA_OFFSET 4
 17.1081 +	jmp error_code
 17.1082 +	CFI_ENDPROC
 17.1083 +
 17.1084 +ENTRY(bounds)
 17.1085 +	RING0_INT_FRAME
 17.1086 +	pushl $0
 17.1087 +	CFI_ADJUST_CFA_OFFSET 4
 17.1088 +	pushl $do_bounds
 17.1089 +	CFI_ADJUST_CFA_OFFSET 4
 17.1090 +	jmp error_code
 17.1091 +	CFI_ENDPROC
 17.1092 +
 17.1093 +ENTRY(invalid_op)
 17.1094 +	RING0_INT_FRAME
 17.1095 +	pushl $0
 17.1096 +	CFI_ADJUST_CFA_OFFSET 4
 17.1097 +	pushl $do_invalid_op
 17.1098 +	CFI_ADJUST_CFA_OFFSET 4
 17.1099 +	jmp error_code
 17.1100 +	CFI_ENDPROC
 17.1101 +
 17.1102 +ENTRY(coprocessor_segment_overrun)
 17.1103 +	RING0_INT_FRAME
 17.1104 +	pushl $0
 17.1105 +	CFI_ADJUST_CFA_OFFSET 4
 17.1106 +	pushl $do_coprocessor_segment_overrun
 17.1107 +	CFI_ADJUST_CFA_OFFSET 4
 17.1108 +	jmp error_code
 17.1109 +	CFI_ENDPROC
 17.1110 +
 17.1111 +ENTRY(invalid_TSS)
 17.1112 +	RING0_EC_FRAME
 17.1113 +	pushl $do_invalid_TSS
 17.1114 +	CFI_ADJUST_CFA_OFFSET 4
 17.1115 +	jmp error_code
 17.1116 +	CFI_ENDPROC
 17.1117 +
 17.1118 +ENTRY(segment_not_present)
 17.1119 +	RING0_EC_FRAME
 17.1120 +	pushl $do_segment_not_present
 17.1121 +	CFI_ADJUST_CFA_OFFSET 4
 17.1122 +	jmp error_code
 17.1123 +	CFI_ENDPROC
 17.1124 +
 17.1125 +ENTRY(stack_segment)
 17.1126 +	RING0_EC_FRAME
 17.1127 +	pushl $do_stack_segment
 17.1128 +	CFI_ADJUST_CFA_OFFSET 4
 17.1129 +	jmp error_code
 17.1130 +	CFI_ENDPROC
 17.1131 +
 17.1132 +KPROBE_ENTRY(general_protection)
 17.1133 +	RING0_EC_FRAME
 17.1134 +	pushl $do_general_protection
 17.1135 +	CFI_ADJUST_CFA_OFFSET 4
 17.1136 +	jmp error_code
 17.1137 +	CFI_ENDPROC
 17.1138 +	.previous .text
 17.1139 +
 17.1140 +ENTRY(alignment_check)
 17.1141 +	RING0_EC_FRAME
 17.1142 +	pushl $do_alignment_check
 17.1143 +	CFI_ADJUST_CFA_OFFSET 4
 17.1144 +	jmp error_code
 17.1145 +	CFI_ENDPROC
 17.1146 +
 17.1147 +KPROBE_ENTRY(page_fault)
 17.1148 +	RING0_EC_FRAME
 17.1149 +	pushl $do_page_fault
 17.1150 +	CFI_ADJUST_CFA_OFFSET 4
 17.1151 +	jmp error_code
 17.1152 +	CFI_ENDPROC
 17.1153 +	.previous .text
 17.1154 +
 17.1155 +#ifdef CONFIG_X86_MCE
 17.1156 +ENTRY(machine_check)
 17.1157 +	RING0_INT_FRAME
 17.1158 +	pushl $0
 17.1159 +	CFI_ADJUST_CFA_OFFSET 4
 17.1160 +	pushl machine_check_vector
 17.1161 +	CFI_ADJUST_CFA_OFFSET 4
 17.1162 +	jmp error_code
 17.1163 +	CFI_ENDPROC
 17.1164 +#endif
 17.1165 +
 17.1166 +#ifndef CONFIG_XEN
 17.1167 +ENTRY(spurious_interrupt_bug)
 17.1168 +	RING0_INT_FRAME
 17.1169 +	pushl $0
 17.1170 +	CFI_ADJUST_CFA_OFFSET 4
 17.1171 +	pushl $do_spurious_interrupt_bug
 17.1172 +	CFI_ADJUST_CFA_OFFSET 4
 17.1173 +	jmp error_code
 17.1174 +	CFI_ENDPROC
 17.1175 +#endif /* !CONFIG_XEN */
 17.1176 +
 17.1177 +#ifdef CONFIG_STACK_UNWIND
 17.1178 +ENTRY(arch_unwind_init_running)
 17.1179 +	CFI_STARTPROC
 17.1180 +	movl	4(%esp), %edx
 17.1181 +	movl	(%esp), %ecx
 17.1182 +	leal	4(%esp), %eax
 17.1183 +	movl	%ebx, EBX(%edx)
 17.1184 +	xorl	%ebx, %ebx
 17.1185 +	movl	%ebx, ECX(%edx)
 17.1186 +	movl	%ebx, EDX(%edx)
 17.1187 +	movl	%esi, ESI(%edx)
 17.1188 +	movl	%edi, EDI(%edx)
 17.1189 +	movl	%ebp, EBP(%edx)
 17.1190 +	movl	%ebx, EAX(%edx)
 17.1191 +	movl	$__USER_DS, DS(%edx)
 17.1192 +	movl	$__USER_DS, ES(%edx)
 17.1193 +	movl	%ebx, ORIG_EAX(%edx)
 17.1194 +	movl	%ecx, EIP(%edx)
 17.1195 +	movl	12(%esp), %ecx
 17.1196 +	movl	$__KERNEL_CS, CS(%edx)
 17.1197 +	movl	%ebx, EFLAGS(%edx)
 17.1198 +	movl	%eax, OLDESP(%edx)
 17.1199 +	movl	8(%esp), %eax
 17.1200 +	movl	%ecx, 8(%esp)
 17.1201 +	movl	EBX(%edx), %ebx
 17.1202 +	movl	$__KERNEL_DS, OLDSS(%edx)
 17.1203 +	jmpl	*%eax
 17.1204 +	CFI_ENDPROC
 17.1205 +ENDPROC(arch_unwind_init_running)
 17.1206 +#endif
 17.1207 +
 17.1208 +ENTRY(fixup_4gb_segment)
 17.1209 +	RING0_EC_FRAME
 17.1210 +	pushl $do_fixup_4gb_segment
 17.1211 +	CFI_ADJUST_CFA_OFFSET 4
 17.1212 +	jmp error_code
 17.1213 +	CFI_ENDPROC
 17.1214 +
 17.1215 +.section .rodata,"a"
 17.1216 +.align 4
 17.1217 +#include "syscall_table.S"
 17.1218 +
 17.1219 +syscall_table_size=(.-sys_call_table)
    18.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    18.2 +++ b/arch/i386/kernel/fixup.c	Mon Jun 04 10:05:28 2007 +0100
    18.3 @@ -0,0 +1,88 @@
    18.4 +/******************************************************************************
    18.5 + * fixup.c
    18.6 + * 
    18.7 + * Binary-rewriting of certain IA32 instructions, on notification by Xen.
    18.8 + * Used to avoid repeated slow emulation of common instructions used by the
    18.9 + * user-space TLS (Thread-Local Storage) libraries.
   18.10 + * 
   18.11 + * **** NOTE ****
   18.12 + *  Issues with the binary rewriting have caused it to be removed. Instead
   18.13 + *  we rely on Xen's emulator to boot the kernel, and then print a banner
   18.14 + *  message recommending that the user disables /lib/tls.
   18.15 + * 
   18.16 + * Copyright (c) 2004, K A Fraser
   18.17 + * 
   18.18 + * This program is free software; you can redistribute it and/or modify
   18.19 + * it under the terms of the GNU General Public License as published by
   18.20 + * the Free Software Foundation; either version 2 of the License, or
   18.21 + * (at your option) any later version.
   18.22 + * 
   18.23 + * This program is distributed in the hope that it will be useful,
   18.24 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
   18.25 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   18.26 + * GNU General Public License for more details.
   18.27 + * 
   18.28 + * You should have received a copy of the GNU General Public License
   18.29 + * along with this program; if not, write to the Free Software
   18.30 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   18.31 + */
   18.32 +
   18.33 +#include <linux/init.h>
   18.34 +#include <linux/sched.h>
   18.35 +#include <linux/slab.h>
   18.36 +#include <linux/kernel.h>
   18.37 +#include <linux/delay.h>
   18.38 +#include <linux/version.h>
   18.39 +
   18.40 +#define DP(_f, _args...) printk(KERN_ALERT "  " _f "\n" , ## _args )
   18.41 +
   18.42 +fastcall void do_fixup_4gb_segment(struct pt_regs *regs, long error_code)
   18.43 +{
   18.44 +	static unsigned long printed = 0;
   18.45 +	char info[100];
   18.46 +	int i;
   18.47 +
   18.48 +	/* Ignore statically-linked init. */
   18.49 +	if (current->tgid == 1)
   18.50 +		return;
   18.51 +            
   18.52 +	HYPERVISOR_vm_assist(
   18.53 +		VMASST_CMD_disable, VMASST_TYPE_4gb_segments_notify);
   18.54 +
   18.55 +	if (test_and_set_bit(0, &printed))
   18.56 +		return;
   18.57 +
   18.58 +	sprintf(info, "%s (pid=%d)", current->comm, current->tgid);
   18.59 +
   18.60 +	DP("");
   18.61 +	DP("***************************************************************");
   18.62 +	DP("***************************************************************");
   18.63 +	DP("** WARNING: Currently emulating unsupported memory accesses  **");
   18.64 +	DP("**          in /lib/tls glibc libraries. The emulation is    **");
   18.65 +	DP("**          slow. To ensure full performance you should      **");
   18.66 +	DP("**          install a 'xen-friendly' (nosegneg) version of   **");
   18.67 +	DP("**          the library, or disable tls support by executing **");
   18.68 +	DP("**          the following as root:                           **");
   18.69 +	DP("**          mv /lib/tls /lib/tls.disabled                    **");
   18.70 +	DP("** Offending process: %-38.38s **", info);
   18.71 +	DP("***************************************************************");
   18.72 +	DP("***************************************************************");
   18.73 +	DP("");
   18.74 +
   18.75 +	for (i = 5; i > 0; i--) {
   18.76 +		touch_softlockup_watchdog();
   18.77 +		printk("Pausing... %d", i);
   18.78 +		mdelay(1000);
   18.79 +		printk("\b\b\b\b\b\b\b\b\b\b\b\b");
   18.80 +	}
   18.81 +
   18.82 +	printk("Continuing...\n\n");
   18.83 +}
   18.84 +
   18.85 +static int __init fixup_init(void)
   18.86 +{
   18.87 +	HYPERVISOR_vm_assist(
   18.88 +		VMASST_CMD_enable, VMASST_TYPE_4gb_segments_notify);
   18.89 +	return 0;
   18.90 +}
   18.91 +__initcall(fixup_init);
    19.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    19.2 +++ b/arch/i386/kernel/head-xen.S	Mon Jun 04 10:05:28 2007 +0100
    19.3 @@ -0,0 +1,207 @@
    19.4 +
    19.5 +
    19.6 +.text
    19.7 +#include <linux/elfnote.h>
    19.8 +#include <linux/threads.h>
    19.9 +#include <linux/linkage.h>
   19.10 +#include <asm/segment.h>
   19.11 +#include <asm/page.h>
   19.12 +#include <asm/cache.h>
   19.13 +#include <asm/thread_info.h>
   19.14 +#include <asm/asm-offsets.h>
   19.15 +#include <asm/dwarf2.h>
   19.16 +#include <xen/interface/xen.h>
   19.17 +#include <xen/interface/elfnote.h>
   19.18 +
   19.19 +/*
   19.20 + * References to members of the new_cpu_data structure.
   19.21 + */
   19.22 +
   19.23 +#define X86		new_cpu_data+CPUINFO_x86
   19.24 +#define X86_VENDOR	new_cpu_data+CPUINFO_x86_vendor
   19.25 +#define X86_MODEL	new_cpu_data+CPUINFO_x86_model
   19.26 +#define X86_MASK	new_cpu_data+CPUINFO_x86_mask
   19.27 +#define X86_HARD_MATH	new_cpu_data+CPUINFO_hard_math
   19.28 +#define X86_CPUID	new_cpu_data+CPUINFO_cpuid_level
   19.29 +#define X86_CAPABILITY	new_cpu_data+CPUINFO_x86_capability
   19.30 +#define X86_VENDOR_ID	new_cpu_data+CPUINFO_x86_vendor_id
   19.31 +
   19.32 +#define VIRT_ENTRY_OFFSET 0x0
   19.33 +.org VIRT_ENTRY_OFFSET
   19.34 +ENTRY(startup_32)
   19.35 +	movl %esi,xen_start_info
   19.36 +	cld
   19.37 +
   19.38 +	/* Set up the stack pointer */
   19.39 +	movl $(init_thread_union+THREAD_SIZE),%esp
   19.40 +
   19.41 +	/* get vendor info */
   19.42 +	xorl %eax,%eax			# call CPUID with 0 -> return vendor ID
   19.43 +	XEN_CPUID
   19.44 +	movl %eax,X86_CPUID		# save CPUID level
   19.45 +	movl %ebx,X86_VENDOR_ID		# lo 4 chars
   19.46 +	movl %edx,X86_VENDOR_ID+4	# next 4 chars
   19.47 +	movl %ecx,X86_VENDOR_ID+8	# last 4 chars
   19.48 +
   19.49 +	movl $1,%eax		# Use the CPUID instruction to get CPU type
   19.50 +	XEN_CPUID
   19.51 +	movb %al,%cl		# save reg for future use
   19.52 +	andb $0x0f,%ah		# mask processor family
   19.53 +	movb %ah,X86
   19.54 +	andb $0xf0,%al		# mask model
   19.55 +	shrb $4,%al
   19.56 +	movb %al,X86_MODEL
   19.57 +	andb $0x0f,%cl		# mask mask revision
   19.58 +	movb %cl,X86_MASK
   19.59 +	movl %edx,X86_CAPABILITY
   19.60 +
   19.61 +	movb $1,X86_HARD_MATH
   19.62 +
   19.63 +	xorl %eax,%eax			# Clear FS/GS and LDT
   19.64 +	movl %eax,%fs
   19.65 +	movl %eax,%gs
   19.66 +	cld			# gcc2 wants the direction flag cleared at all times
   19.67 +
   19.68 +	pushl %eax		# fake return address
   19.69 +	jmp start_kernel
   19.70 +
   19.71 +#define HYPERCALL_PAGE_OFFSET 0x1000
   19.72 +.org HYPERCALL_PAGE_OFFSET
   19.73 +ENTRY(hypercall_page)
   19.74 +	CFI_STARTPROC
   19.75 +.skip 0x1000
   19.76 +	CFI_ENDPROC
   19.77 +
   19.78 +/*
   19.79 + * Real beginning of normal "text" segment
   19.80 + */
   19.81 +ENTRY(stext)
   19.82 +ENTRY(_stext)
   19.83 +
   19.84 +/*
   19.85 + * BSS section
   19.86 + */
   19.87 +.section ".bss.page_aligned","w"
   19.88 +ENTRY(empty_zero_page)
   19.89 +	.fill 4096,1,0
   19.90 +
   19.91 +/*
   19.92 + * This starts the data section.
   19.93 + */
   19.94 +.data
   19.95 +
   19.96 +/*
   19.97 + * The Global Descriptor Table contains 28 quadwords, per-CPU.
   19.98 + */
   19.99 +	.align L1_CACHE_BYTES
  19.100 +ENTRY(cpu_gdt_table)
  19.101 +	.quad 0x0000000000000000	/* NULL descriptor */
  19.102 +	.quad 0x0000000000000000	/* 0x0b reserved */
  19.103 +	.quad 0x0000000000000000	/* 0x13 reserved */
  19.104 +	.quad 0x0000000000000000	/* 0x1b reserved */
  19.105 +	.quad 0x0000000000000000	/* 0x20 unused */
  19.106 +	.quad 0x0000000000000000	/* 0x28 unused */
  19.107 +	.quad 0x0000000000000000	/* 0x33 TLS entry 1 */
  19.108 +	.quad 0x0000000000000000	/* 0x3b TLS entry 2 */
  19.109 +	.quad 0x0000000000000000	/* 0x43 TLS entry 3 */
  19.110 +	.quad 0x0000000000000000	/* 0x4b reserved */
  19.111 +	.quad 0x0000000000000000	/* 0x53 reserved */
  19.112 +	.quad 0x0000000000000000	/* 0x5b reserved */
  19.113 +
  19.114 +	.quad 0x00cf9a000000ffff	/* 0x60 kernel 4GB code at 0x00000000 */
  19.115 +	.quad 0x00cf92000000ffff	/* 0x68 kernel 4GB data at 0x00000000 */
  19.116 +	.quad 0x00cffa000000ffff	/* 0x73 user 4GB code at 0x00000000 */
  19.117 +	.quad 0x00cff2000000ffff	/* 0x7b user 4GB data at 0x00000000 */
  19.118 +
  19.119 +	.quad 0x0000000000000000	/* 0x80 TSS descriptor */
  19.120 +	.quad 0x0000000000000000	/* 0x88 LDT descriptor */
  19.121 +
  19.122 +	/*
  19.123 +	 * Segments used for calling PnP BIOS have byte granularity.
  19.124 +	 * They code segments and data segments have fixed 64k limits,
  19.125 +	 * the transfer segment sizes are set at run time.
  19.126 +	 */
  19.127 +	.quad 0x0000000000000000	/* 0x90 32-bit code */
  19.128 +	.quad 0x0000000000000000	/* 0x98 16-bit code */
  19.129 +	.quad 0x0000000000000000	/* 0xa0 16-bit data */
  19.130 +	.quad 0x0000000000000000	/* 0xa8 16-bit data */
  19.131 +	.quad 0x0000000000000000	/* 0xb0 16-bit data */
  19.132 +
  19.133 +	/*
  19.134 +	 * The APM segments have byte granularity and their bases
  19.135 +	 * are set at run time.  All have 64k limits.
  19.136 +	 */
  19.137 +	.quad 0x0000000000000000	/* 0xb8 APM CS    code */
  19.138 +	.quad 0x0000000000000000	/* 0xc0 APM CS 16 code (16 bit) */
  19.139 +	.quad 0x0000000000000000	/* 0xc8 APM DS    data */
  19.140 +
  19.141 +	.quad 0x0000000000000000	/* 0xd0 - ESPFIX 16-bit SS */
  19.142 +	.quad 0x0000000000000000	/* 0xd8 - unused */
  19.143 +	.quad 0x0000000000000000	/* 0xe0 - unused */
  19.144 +	.quad 0x0000000000000000	/* 0xe8 - unused */
  19.145 +	.quad 0x0000000000000000	/* 0xf0 - unused */
  19.146 +	.quad 0x0000000000000000	/* 0xf8 - GDT entry 31: double-fault TSS */
  19.147 +
  19.148 +#if CONFIG_XEN_COMPAT <= 0x030002
  19.149 +/*
  19.150 + * __xen_guest information
  19.151 + */
  19.152 +.macro utoa value
  19.153 + .if (\value) < 0 || (\value) >= 0x10
  19.154 +	utoa (((\value)>>4)&0x0fffffff)
  19.155 + .endif
  19.156 + .if ((\value) & 0xf) < 10
  19.157 +  .byte '0' + ((\value) & 0xf)
  19.158 + .else
  19.159 +  .byte 'A' + ((\value) & 0xf) - 10
  19.160 + .endif
  19.161 +.endm
  19.162 +
  19.163 +.section __xen_guest
  19.164 +	.ascii	"GUEST_OS=linux,GUEST_VER=2.6"
  19.165 +	.ascii	",XEN_VER=xen-3.0"
  19.166 +	.ascii	",VIRT_BASE=0x"
  19.167 +		utoa __PAGE_OFFSET
  19.168 +	.ascii	",ELF_PADDR_OFFSET=0x"
  19.169 +		utoa __PAGE_OFFSET
  19.170 +	.ascii	",VIRT_ENTRY=0x"
  19.171 +		utoa (__PAGE_OFFSET + __PHYSICAL_START + VIRT_ENTRY_OFFSET)
  19.172 +	.ascii	",HYPERCALL_PAGE=0x"
  19.173 +		utoa ((__PHYSICAL_START+HYPERCALL_PAGE_OFFSET)>>PAGE_SHIFT)
  19.174 +	.ascii  ",FEATURES=writable_page_tables"
  19.175 +	.ascii	         "|writable_descriptor_tables"
  19.176 +	.ascii	         "|auto_translated_physmap"
  19.177 +	.ascii	         "|pae_pgdir_above_4gb"
  19.178 +	.ascii	         "|supervisor_mode_kernel"
  19.179 +#ifdef CONFIG_X86_PAE
  19.180 +	.ascii	",PAE=yes[extended-cr3]"
  19.181 +#else
  19.182 +	.ascii	",PAE=no"
  19.183 +#endif
  19.184 +	.ascii	",LOADER=generic"
  19.185 +	.byte	0
  19.186 +#endif /* CONFIG_XEN_COMPAT <= 0x030002 */
  19.187 +
  19.188 +
  19.189 +	ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS,       .asciz, "linux")	
  19.190 +	ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION,  .asciz, "2.6")
  19.191 +	ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION,    .asciz, "xen-3.0")
  19.192 +	ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE,      .long,  __PAGE_OFFSET)
  19.193 +#if CONFIG_XEN_COMPAT <= 0x030002
  19.194 +	ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET,   .long,  __PAGE_OFFSET)
  19.195 +#else
  19.196 +	ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET,   .long,  0)
  19.197 +#endif
  19.198 +	ELFNOTE(Xen, XEN_ELFNOTE_ENTRY,          .long,  startup_32)
  19.199 +	ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .long,  hypercall_page)
  19.200 +	ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW,   .long,  HYPERVISOR_VIRT_START)
  19.201 +	ELFNOTE(Xen, XEN_ELFNOTE_FEATURES,       .asciz, "writable_page_tables|writable_descriptor_tables|auto_translated_physmap|pae_pgdir_above_4gb|supervisor_mode_kernel")
  19.202 +#ifdef CONFIG_X86_PAE
  19.203 +	ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE,       .asciz, "yes")
  19.204 +	ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID,   .quad,  _PAGE_PRESENT,_PAGE_PRESENT)
  19.205 +#else
  19.206 +	ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE,       .asciz, "no")
  19.207 +	ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID,   .long,  _PAGE_PRESENT,_PAGE_PRESENT)
  19.208 +#endif
  19.209 +	ELFNOTE(Xen, XEN_ELFNOTE_LOADER,         .asciz, "generic")
  19.210 +	ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long,  1)
    20.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    20.2 +++ b/arch/i386/kernel/init_task-xen.c	Mon Jun 04 10:05:28 2007 +0100
    20.3 @@ -0,0 +1,51 @@
    20.4 +#include <linux/mm.h>
    20.5 +#include <linux/module.h>
    20.6 +#include <linux/sched.h>
    20.7 +#include <linux/init.h>
    20.8 +#include <linux/init_task.h>
    20.9 +#include <linux/fs.h>
   20.10 +#include <linux/mqueue.h>
   20.11 +
   20.12 +#include <asm/uaccess.h>
   20.13 +#include <asm/pgtable.h>
   20.14 +#include <asm/desc.h>
   20.15 +
   20.16 +static struct fs_struct init_fs = INIT_FS;
   20.17 +static struct files_struct init_files = INIT_FILES;
   20.18 +static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
   20.19 +static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
   20.20 +
   20.21 +#define swapper_pg_dir ((pgd_t *)NULL)
   20.22 +struct mm_struct init_mm = INIT_MM(init_mm);
   20.23 +#undef swapper_pg_dir
   20.24 +
   20.25 +EXPORT_SYMBOL(init_mm);
   20.26 +
   20.27 +/*
   20.28 + * Initial thread structure.
   20.29 + *
   20.30 + * We need to make sure that this is THREAD_SIZE aligned due to the
   20.31 + * way process stacks are handled. This is done by having a special
   20.32 + * "init_task" linker map entry..
   20.33 + */
   20.34 +union thread_union init_thread_union 
   20.35 +	__attribute__((__section__(".data.init_task"))) =
   20.36 +		{ INIT_THREAD_INFO(init_task) };
   20.37 +
   20.38 +/*
   20.39 + * Initial task structure.
   20.40 + *
   20.41 + * All other task structs will be allocated on slabs in fork.c
   20.42 + */
   20.43 +struct task_struct init_task = INIT_TASK(init_task);
   20.44 +
   20.45 +EXPORT_SYMBOL(init_task);
   20.46 +
   20.47 +#ifndef CONFIG_X86_NO_TSS
   20.48 +/*
   20.49 + * per-CPU TSS segments. Threads are completely 'soft' on Linux,
   20.50 + * no more per-task TSS's.
   20.51 + */ 
   20.52 +DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_internodealigned_in_smp = INIT_TSS;
   20.53 +#endif
   20.54 +
    21.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    21.2 +++ b/arch/i386/kernel/io_apic-xen.c	Mon Jun 04 10:05:28 2007 +0100
    21.3 @@ -0,0 +1,2777 @@
    21.4 +/*
    21.5 + *	Intel IO-APIC support for multi-Pentium hosts.
    21.6 + *
    21.7 + *	Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo
    21.8 + *
    21.9 + *	Many thanks to Stig Venaas for trying out countless experimental
   21.10 + *	patches and reporting/debugging problems patiently!
   21.11 + *
   21.12 + *	(c) 1999, Multiple IO-APIC support, developed by
   21.13 + *	Ken-ichi Yaku <yaku@css1.kbnes.nec.co.jp> and
   21.14 + *      Hidemi Kishimoto <kisimoto@css1.kbnes.nec.co.jp>,
   21.15 + *	further tested and cleaned up by Zach Brown <zab@redhat.com>
   21.16 + *	and Ingo Molnar <mingo@redhat.com>
   21.17 + *
   21.18 + *	Fixes
   21.19 + *	Maciej W. Rozycki	:	Bits for genuine 82489DX APICs;
   21.20 + *					thanks to Eric Gilmore
   21.21 + *					and Rolf G. Tews
   21.22 + *					for testing these extensively
   21.23 + *	Paul Diefenbaugh	:	Added full ACPI support
   21.24 + */
   21.25 +
   21.26 +#include <linux/mm.h>
   21.27 +#include <linux/interrupt.h>
   21.28 +#include <linux/init.h>
   21.29 +#include <linux/delay.h>
   21.30 +#include <linux/sched.h>
   21.31 +#include <linux/smp_lock.h>
   21.32 +#include <linux/mc146818rtc.h>
   21.33 +#include <linux/compiler.h>
   21.34 +#include <linux/acpi.h>
   21.35 +#include <linux/module.h>
   21.36 +#include <linux/sysdev.h>
   21.37 +
   21.38 +#include <asm/io.h>
   21.39 +#include <asm/smp.h>
   21.40 +#include <asm/desc.h>
   21.41 +#include <asm/timer.h>
   21.42 +#include <asm/i8259.h>
   21.43 +#include <asm/nmi.h>
   21.44 +
   21.45 +#include <mach_apic.h>
   21.46 +
   21.47 +#include "io_ports.h"
   21.48 +
   21.49 +#ifdef CONFIG_XEN
   21.50 +
   21.51 +#include <xen/interface/xen.h>
   21.52 +#include <xen/interface/physdev.h>
   21.53 +
   21.54 +/* Fake i8259 */
   21.55 +#define make_8259A_irq(_irq)     (io_apic_irqs &= ~(1UL<<(_irq)))
   21.56 +#define disable_8259A_irq(_irq)  ((void)0)
   21.57 +#define i8259A_irq_pending(_irq) (0)
   21.58 +
   21.59 +unsigned long io_apic_irqs;
   21.60 +
   21.61 +static inline unsigned int xen_io_apic_read(unsigned int apic, unsigned int reg)
   21.62 +{
   21.63 +	struct physdev_apic apic_op;
   21.64 +	int ret;
   21.65 +
   21.66 +	apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
   21.67 +	apic_op.reg = reg;
   21.68 +	ret = HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic_op);
   21.69 +	if (ret)
   21.70 +		return ret;
   21.71 +	return apic_op.value;
   21.72 +}
   21.73 +
   21.74 +static inline void xen_io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
   21.75 +{
   21.76 +	struct physdev_apic apic_op;
   21.77 +
   21.78 +	apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
   21.79 +	apic_op.reg = reg;
   21.80 +	apic_op.value = value;
   21.81 +	HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op);
   21.82 +}
   21.83 +
   21.84 +#define io_apic_read(a,r)    xen_io_apic_read(a,r)
   21.85 +#define io_apic_write(a,r,v) xen_io_apic_write(a,r,v)
   21.86 +
   21.87 +#endif /* CONFIG_XEN */
   21.88 +
   21.89 +int (*ioapic_renumber_irq)(int ioapic, int irq);
   21.90 +atomic_t irq_mis_count;
   21.91 +
   21.92 +/* Where if anywhere is the i8259 connect in external int mode */
   21.93 +static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
   21.94 +
   21.95 +static DEFINE_SPINLOCK(ioapic_lock);
   21.96 +static DEFINE_SPINLOCK(vector_lock);
   21.97 +
   21.98 +int timer_over_8254 __initdata = 1;
   21.99 +
  21.100 +/*
  21.101 + *	Is the SiS APIC rmw bug present ?
  21.102 + *	-1 = don't know, 0 = no, 1 = yes
  21.103 + */
  21.104 +int sis_apic_bug = -1;
  21.105 +
  21.106 +/*
  21.107 + * # of IRQ routing registers
  21.108 + */
  21.109 +int nr_ioapic_registers[MAX_IO_APICS];
  21.110 +
  21.111 +int disable_timer_pin_1 __initdata;
  21.112 +
  21.113 +/*
  21.114 + * Rough estimation of how many shared IRQs there are, can
  21.115 + * be changed anytime.
  21.116 + */
  21.117 +#define MAX_PLUS_SHARED_IRQS NR_IRQS
  21.118 +#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
  21.119 +
  21.120 +/*
  21.121 + * This is performance-critical, we want to do it O(1)
  21.122 + *
  21.123 + * the indexing order of this array favors 1:1 mappings
  21.124 + * between pins and IRQs.
  21.125 + */
  21.126 +
  21.127 +static struct irq_pin_list {
  21.128 +	int apic, pin, next;
  21.129 +} irq_2_pin[PIN_MAP_SIZE];
  21.130 +
  21.131 +int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1};
  21.132 +#ifdef CONFIG_PCI_MSI
  21.133 +#define vector_to_irq(vector) 	\
  21.134 +	(platform_legacy_irq(vector) ? vector : vector_irq[vector])
  21.135 +#else
  21.136 +#define vector_to_irq(vector)	(vector)
  21.137 +#endif
  21.138 +
  21.139 +/*
  21.140 + * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
  21.141 + * shared ISA-space IRQs, so we have to support them. We are super
  21.142 + * fast in the common case, and fast for shared ISA-space IRQs.
  21.143 + */
  21.144 +static void add_pin_to_irq(unsigned int irq, int apic, int pin)
  21.145 +{
  21.146 +	static int first_free_entry = NR_IRQS;
  21.147 +	struct irq_pin_list *entry = irq_2_pin + irq;
  21.148 +
  21.149 +	while (entry->next)
  21.150 +		entry = irq_2_pin + entry->next;
  21.151 +
  21.152 +	if (entry->pin != -1) {
  21.153 +		entry->next = first_free_entry;
  21.154 +		entry = irq_2_pin + entry->next;
  21.155 +		if (++first_free_entry >= PIN_MAP_SIZE)
  21.156 +			panic("io_apic.c: whoops");
  21.157 +	}
  21.158 +	entry->apic = apic;
  21.159 +	entry->pin = pin;
  21.160 +}
  21.161 +
  21.162 +#ifdef CONFIG_XEN
  21.163 +#define clear_IO_APIC() ((void)0)
  21.164 +#else
  21.165 +/*
  21.166 + * Reroute an IRQ to a different pin.
  21.167 + */
  21.168 +static void __init replace_pin_at_irq(unsigned int irq,
  21.169 +				      int oldapic, int oldpin,
  21.170 +				      int newapic, int newpin)
  21.171 +{
  21.172 +	struct irq_pin_list *entry = irq_2_pin + irq;
  21.173 +
  21.174 +	while (1) {
  21.175 +		if (entry->apic == oldapic && entry->pin == oldpin) {
  21.176 +			entry->apic = newapic;
  21.177 +			entry->pin = newpin;
  21.178 +		}
  21.179 +		if (!entry->next)
  21.180 +			break;
  21.181 +		entry = irq_2_pin + entry->next;
  21.182 +	}
  21.183 +}
  21.184 +
  21.185 +static void __modify_IO_APIC_irq (unsigned int irq, unsigned long enable, unsigned long disable)
  21.186 +{
  21.187 +	struct irq_pin_list *entry = irq_2_pin + irq;
  21.188 +	unsigned int pin, reg;
  21.189 +
  21.190 +	for (;;) {
  21.191 +		pin = entry->pin;
  21.192 +		if (pin == -1)
  21.193 +			break;
  21.194 +		reg = io_apic_read(entry->apic, 0x10 + pin*2);
  21.195 +		reg &= ~disable;
  21.196 +		reg |= enable;
  21.197 +		io_apic_modify(entry->apic, 0x10 + pin*2, reg);
  21.198 +		if (!entry->next)
  21.199 +			break;
  21.200 +		entry = irq_2_pin + entry->next;
  21.201 +	}
  21.202 +}
  21.203 +
  21.204 +/* mask = 1 */
  21.205 +static void __mask_IO_APIC_irq (unsigned int irq)
  21.206 +{
  21.207 +	__modify_IO_APIC_irq(irq, 0x00010000, 0);
  21.208 +}
  21.209 +
  21.210 +/* mask = 0 */
  21.211 +static void __unmask_IO_APIC_irq (unsigned int irq)
  21.212 +{
  21.213 +	__modify_IO_APIC_irq(irq, 0, 0x00010000);
  21.214 +}
  21.215 +
  21.216 +/* mask = 1, trigger = 0 */
  21.217 +static void __mask_and_edge_IO_APIC_irq (unsigned int irq)
  21.218 +{
  21.219 +	__modify_IO_APIC_irq(irq, 0x00010000, 0x00008000);
  21.220 +}
  21.221 +
  21.222 +/* mask = 0, trigger = 1 */
  21.223 +static void __unmask_and_level_IO_APIC_irq (unsigned int irq)
  21.224 +{
  21.225 +	__modify_IO_APIC_irq(irq, 0x00008000, 0x00010000);
  21.226 +}
  21.227 +
  21.228 +static void mask_IO_APIC_irq (unsigned int irq)
  21.229 +{
  21.230 +	unsigned long flags;
  21.231 +
  21.232 +	spin_lock_irqsave(&ioapic_lock, flags);
  21.233 +	__mask_IO_APIC_irq(irq);
  21.234 +	spin_unlock_irqrestore(&ioapic_lock, flags);
  21.235 +}
  21.236 +
  21.237 +static void unmask_IO_APIC_irq (unsigned int irq)
  21.238 +{
  21.239 +	unsigned long flags;
  21.240 +
  21.241 +	spin_lock_irqsave(&ioapic_lock, flags);
  21.242 +	__unmask_IO_APIC_irq(irq);
  21.243 +	spin_unlock_irqrestore(&ioapic_lock, flags);
  21.244 +}
  21.245 +
  21.246 +static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
  21.247 +{
  21.248 +	struct IO_APIC_route_entry entry;
  21.249 +	unsigned long flags;
  21.250 +	
  21.251 +	/* Check delivery_mode to be sure we're not clearing an SMI pin */
  21.252 +	spin_lock_irqsave(&ioapic_lock, flags);
  21.253 +	*(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
  21.254 +	*(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
  21.255 +	spin_unlock_irqrestore(&ioapic_lock, flags);
  21.256 +	if (entry.delivery_mode == dest_SMI)
  21.257 +		return;
  21.258 +
  21.259 +	/*
  21.260 +	 * Disable it in the IO-APIC irq-routing table:
  21.261 +	 */
  21.262 +	memset(&entry, 0, sizeof(entry));
  21.263 +	entry.mask = 1;
  21.264 +	spin_lock_irqsave(&ioapic_lock, flags);
  21.265 +	io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0));
  21.266 +	io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1));
  21.267 +	spin_unlock_irqrestore(&ioapic_lock, flags);
  21.268 +}
  21.269 +
  21.270 +static void clear_IO_APIC (void)
  21.271 +{
  21.272 +	int apic, pin;
  21.273 +
  21.274 +	for (apic = 0; apic < nr_ioapics; apic++)
  21.275 +		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
  21.276 +			clear_IO_APIC_pin(apic, pin);
  21.277 +}
  21.278 +
  21.279 +#ifdef CONFIG_SMP
  21.280 +static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
  21.281 +{
  21.282 +	unsigned long flags;
  21.283 +	int pin;
  21.284 +	struct irq_pin_list *entry = irq_2_pin + irq;
  21.285 +	unsigned int apicid_value;
  21.286 +	cpumask_t tmp;
  21.287 +	
  21.288 +	cpus_and(tmp, cpumask, cpu_online_map);
  21.289 +	if (cpus_empty(tmp))
  21.290 +		tmp = TARGET_CPUS;
  21.291 +
  21.292 +	cpus_and(cpumask, tmp, CPU_MASK_ALL);
  21.293 +
  21.294 +	apicid_value = cpu_mask_to_apicid(cpumask);
  21.295 +	/* Prepare to do the io_apic_write */
  21.296 +	apicid_value = apicid_value << 24;
  21.297 +	spin_lock_irqsave(&ioapic_lock, flags);
  21.298 +	for (;;) {
  21.299 +		pin = entry->pin;
  21.300 +		if (pin == -1)
  21.301 +			break;
  21.302 +		io_apic_write(entry->apic, 0x10 + 1 + pin*2, apicid_value);
  21.303 +		if (!entry->next)
  21.304 +			break;
  21.305 +		entry = irq_2_pin + entry->next;
  21.306 +	}
  21.307 +	set_irq_info(irq, cpumask);
  21.308 +	spin_unlock_irqrestore(&ioapic_lock, flags);
  21.309 +}
  21.310 +
  21.311 +#if defined(CONFIG_IRQBALANCE)
  21.312 +# include <asm/processor.h>	/* kernel_thread() */
  21.313 +# include <linux/kernel_stat.h>	/* kstat */
  21.314 +# include <linux/slab.h>		/* kmalloc() */
  21.315 +# include <linux/timer.h>	/* time_after() */
  21.316 + 
  21.317 +#ifdef CONFIG_BALANCED_IRQ_DEBUG
  21.318 +#  define TDprintk(x...) do { printk("<%ld:%s:%d>: ", jiffies, __FILE__, __LINE__); printk(x); } while (0)
  21.319 +#  define Dprintk(x...) do { TDprintk(x); } while (0)
  21.320 +# else
  21.321 +#  define TDprintk(x...) 
  21.322 +#  define Dprintk(x...) 
  21.323 +# endif
  21.324 +
  21.325 +#define IRQBALANCE_CHECK_ARCH -999
  21.326 +#define MAX_BALANCED_IRQ_INTERVAL	(5*HZ)
  21.327 +#define MIN_BALANCED_IRQ_INTERVAL	(HZ/2)
  21.328 +#define BALANCED_IRQ_MORE_DELTA		(HZ/10)
  21.329 +#define BALANCED_IRQ_LESS_DELTA		(HZ)
  21.330 +
  21.331 +static int irqbalance_disabled __read_mostly = IRQBALANCE_CHECK_ARCH;
  21.332 +static int physical_balance __read_mostly;
  21.333 +static long balanced_irq_interval __read_mostly = MAX_BALANCED_IRQ_INTERVAL;
  21.334 +
  21.335 +static struct irq_cpu_info {
  21.336 +	unsigned long * last_irq;
  21.337 +	unsigned long * irq_delta;
  21.338 +	unsigned long irq;
  21.339 +} irq_cpu_data[NR_CPUS];
  21.340 +
  21.341 +#define CPU_IRQ(cpu)		(irq_cpu_data[cpu].irq)
  21.342 +#define LAST_CPU_IRQ(cpu,irq)   (irq_cpu_data[cpu].last_irq[irq])
  21.343 +#define IRQ_DELTA(cpu,irq) 	(irq_cpu_data[cpu].irq_delta[irq])
  21.344 +
  21.345 +#define IDLE_ENOUGH(cpu,now) \
  21.346 +	(idle_cpu(cpu) && ((now) - per_cpu(irq_stat, (cpu)).idle_timestamp > 1))
  21.347 +
  21.348 +#define IRQ_ALLOWED(cpu, allowed_mask)	cpu_isset(cpu, allowed_mask)
  21.349 +
  21.350 +#define CPU_TO_PACKAGEINDEX(i) (first_cpu(cpu_sibling_map[i]))
  21.351 +
  21.352 +static cpumask_t balance_irq_affinity[NR_IRQS] = {
  21.353 +	[0 ... NR_IRQS-1] = CPU_MASK_ALL
  21.354 +};
  21.355 +
  21.356 +void set_balance_irq_affinity(unsigned int irq, cpumask_t mask)
  21.357 +{
  21.358 +	balance_irq_affinity[irq] = mask;
  21.359 +}
  21.360 +
  21.361 +static unsigned long move(int curr_cpu, cpumask_t allowed_mask,
  21.362 +			unsigned long now, int direction)
  21.363 +{
  21.364 +	int search_idle = 1;
  21.365 +	int cpu = curr_cpu;
  21.366 +
  21.367 +	goto inside;
  21.368 +
  21.369 +	do {
  21.370 +		if (unlikely(cpu == curr_cpu))
  21.371 +			search_idle = 0;
  21.372 +inside:
  21.373 +		if (direction == 1) {
  21.374 +			cpu++;
  21.375 +			if (cpu >= NR_CPUS)
  21.376 +				cpu = 0;
  21.377 +		} else {
  21.378 +			cpu--;
  21.379 +			if (cpu == -1)
  21.380 +				cpu = NR_CPUS-1;
  21.381 +		}
  21.382 +	} while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu,allowed_mask) ||
  21.383 +			(search_idle && !IDLE_ENOUGH(cpu,now)));
  21.384 +
  21.385 +	return cpu;
  21.386 +}
  21.387 +
  21.388 +static inline void balance_irq(int cpu, int irq)
  21.389 +{
  21.390 +	unsigned long now = jiffies;
  21.391 +	cpumask_t allowed_mask;
  21.392 +	unsigned int new_cpu;
  21.393 +		
  21.394 +	if (irqbalance_disabled)
  21.395 +		return; 
  21.396 +
  21.397 +	cpus_and(allowed_mask, cpu_online_map, balance_irq_affinity[irq]);
  21.398 +	new_cpu = move(cpu, allowed_mask, now, 1);
  21.399 +	if (cpu != new_cpu) {
  21.400 +		set_pending_irq(irq, cpumask_of_cpu(new_cpu));
  21.401 +	}
  21.402 +}
  21.403 +
  21.404 +static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold)
  21.405 +{
  21.406 +	int i, j;
  21.407 +	Dprintk("Rotating IRQs among CPUs.\n");
  21.408 +	for_each_online_cpu(i) {
  21.409 +		for (j = 0; j < NR_IRQS; j++) {
  21.410 +			if (!irq_desc[j].action)
  21.411 +				continue;
  21.412 +			/* Is it a significant load ?  */
  21.413 +			if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i),j) <
  21.414 +						useful_load_threshold)
  21.415 +				continue;
  21.416 +			balance_irq(i, j);
  21.417 +		}
  21.418 +	}
  21.419 +	balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
  21.420 +		balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);	
  21.421 +	return;
  21.422 +}
  21.423 +
  21.424 +static void do_irq_balance(void)
  21.425 +{
  21.426 +	int i, j;
  21.427 +	unsigned long max_cpu_irq = 0, min_cpu_irq = (~0);
  21.428 +	unsigned long move_this_load = 0;
  21.429 +	int max_loaded = 0, min_loaded = 0;
  21.430 +	int load;
  21.431 +	unsigned long useful_load_threshold = balanced_irq_interval + 10;
  21.432 +	int selected_irq;
  21.433 +	int tmp_loaded, first_attempt = 1;
  21.434 +	unsigned long tmp_cpu_irq;
  21.435 +	unsigned long imbalance = 0;
  21.436 +	cpumask_t allowed_mask, target_cpu_mask, tmp;
  21.437 +
  21.438 +	for_each_possible_cpu(i) {
  21.439 +		int package_index;
  21.440 +		CPU_IRQ(i) = 0;
  21.441 +		if (!cpu_online(i))
  21.442 +			continue;
  21.443 +		package_index = CPU_TO_PACKAGEINDEX(i);
  21.444 +		for (j = 0; j < NR_IRQS; j++) {
  21.445 +			unsigned long value_now, delta;
  21.446 +			/* Is this an active IRQ? */
  21.447 +			if (!irq_desc[j].action)
  21.448 +				continue;
  21.449 +			if ( package_index == i )
  21.450 +				IRQ_DELTA(package_index,j) = 0;
  21.451 +			/* Determine the total count per processor per IRQ */
  21.452 +			value_now = (unsigned long) kstat_cpu(i).irqs[j];
  21.453 +
  21.454 +			/* Determine the activity per processor per IRQ */
  21.455 +			delta = value_now - LAST_CPU_IRQ(i,j);
  21.456 +
  21.457 +			/* Update last_cpu_irq[][] for the next time */
  21.458 +			LAST_CPU_IRQ(i,j) = value_now;
  21.459 +
  21.460 +			/* Ignore IRQs whose rate is less than the clock */
  21.461 +			if (delta < useful_load_threshold)
  21.462 +				continue;
  21.463 +			/* update the load for the processor or package total */
  21.464 +			IRQ_DELTA(package_index,j) += delta;
  21.465 +
  21.466 +			/* Keep track of the higher numbered sibling as well */
  21.467 +			if (i != package_index)
  21.468 +				CPU_IRQ(i) += delta;
  21.469 +			/*
  21.470 +			 * We have sibling A and sibling B in the package
  21.471 +			 *
  21.472 +			 * cpu_irq[A] = load for cpu A + load for cpu B
  21.473 +			 * cpu_irq[B] = load for cpu B
  21.474 +			 */
  21.475 +			CPU_IRQ(package_index) += delta;
  21.476 +		}
  21.477 +	}
  21.478 +	/* Find the least loaded processor package */
  21.479 +	for_each_online_cpu(i) {
  21.480 +		if (i != CPU_TO_PACKAGEINDEX(i))
  21.481 +			continue;
  21.482 +		if (min_cpu_irq > CPU_IRQ(i)) {
  21.483 +			min_cpu_irq = CPU_IRQ(i);
  21.484 +			min_loaded = i;
  21.485 +		}
  21.486 +	}
  21.487 +	max_cpu_irq = ULONG_MAX;
  21.488 +
  21.489 +tryanothercpu:
  21.490 +	/* Look for heaviest loaded processor.
  21.491 +	 * We may come back to get the next heaviest loaded processor.
  21.492 +	 * Skip processors with trivial loads.
  21.493 +	 */
  21.494 +	tmp_cpu_irq = 0;
  21.495 +	tmp_loaded = -1;
  21.496 +	for_each_online_cpu(i) {
  21.497 +		if (i != CPU_TO_PACKAGEINDEX(i))
  21.498 +			continue;
  21.499 +		if (max_cpu_irq <= CPU_IRQ(i)) 
  21.500 +			continue;
  21.501 +		if (tmp_cpu_irq < CPU_IRQ(i)) {
  21.502 +			tmp_cpu_irq = CPU_IRQ(i);
  21.503 +			tmp_loaded = i;
  21.504 +		}
  21.505 +	}
  21.506 +
  21.507 +	if (tmp_loaded == -1) {
  21.508 + 	 /* In the case of small number of heavy interrupt sources, 
  21.509 +	  * loading some of the cpus too much. We use Ingo's original 
  21.510 +	  * approach to rotate them around.
  21.511 +	  */
  21.512 +		if (!first_attempt && imbalance >= useful_load_threshold) {
  21.513 +			rotate_irqs_among_cpus(useful_load_threshold);
  21.514 +			return;
  21.515 +		}
  21.516 +		goto not_worth_the_effort;
  21.517 +	}
  21.518 +	
  21.519 +	first_attempt = 0;		/* heaviest search */
  21.520 +	max_cpu_irq = tmp_cpu_irq;	/* load */
  21.521 +	max_loaded = tmp_loaded;	/* processor */
  21.522 +	imbalance = (max_cpu_irq - min_cpu_irq) / 2;
  21.523 +	
  21.524 +	Dprintk("max_loaded cpu = %d\n", max_loaded);
  21.525 +	Dprintk("min_loaded cpu = %d\n", min_loaded);
  21.526 +	Dprintk("max_cpu_irq load = %ld\n", max_cpu_irq);
  21.527 +	Dprintk("min_cpu_irq load = %ld\n", min_cpu_irq);
  21.528 +	Dprintk("load imbalance = %lu\n", imbalance);
  21.529 +
  21.530 +	/* if imbalance is less than approx 10% of max load, then
  21.531 +	 * observe diminishing returns action. - quit
  21.532 +	 */
  21.533 +	if (imbalance < (max_cpu_irq >> 3)) {
  21.534 +		Dprintk("Imbalance too trivial\n");
  21.535 +		goto not_worth_the_effort;
  21.536 +	}
  21.537 +
  21.538 +tryanotherirq:
  21.539 +	/* if we select an IRQ to move that can't go where we want, then
  21.540 +	 * see if there is another one to try.
  21.541 +	 */
  21.542 +	move_this_load = 0;
  21.543 +	selected_irq = -1;
  21.544 +	for (j = 0; j < NR_IRQS; j++) {
  21.545 +		/* Is this an active IRQ? */
  21.546 +		if (!irq_desc[j].action)
  21.547 +			continue;
  21.548 +		if (imbalance <= IRQ_DELTA(max_loaded,j))
  21.549 +			continue;
  21.550 +		/* Try to find the IRQ that is closest to the imbalance
  21.551 +		 * without going over.
  21.552 +		 */
  21.553 +		if (move_this_load < IRQ_DELTA(max_loaded,j)) {
  21.554 +			move_this_load = IRQ_DELTA(max_loaded,j);
  21.555 +			selected_irq = j;
  21.556 +		}
  21.557 +	}
  21.558 +	if (selected_irq == -1) {
  21.559 +		goto tryanothercpu;
  21.560 +	}
  21.561 +
  21.562 +	imbalance = move_this_load;
  21.563 +	
  21.564 +	/* For physical_balance case, we accumlated both load
  21.565 +	 * values in the one of the siblings cpu_irq[],
  21.566 +	 * to use the same code for physical and logical processors
  21.567 +	 * as much as possible. 
  21.568 +	 *
  21.569 +	 * NOTE: the cpu_irq[] array holds the sum of the load for
  21.570 +	 * sibling A and sibling B in the slot for the lowest numbered
  21.571 +	 * sibling (A), _AND_ the load for sibling B in the slot for
  21.572 +	 * the higher numbered sibling.
  21.573 +	 *
  21.574 +	 * We seek the least loaded sibling by making the comparison
  21.575 +	 * (A+B)/2 vs B
  21.576 +	 */
  21.577 +	load = CPU_IRQ(min_loaded) >> 1;
  21.578 +	for_each_cpu_mask(j, cpu_sibling_map[min_loaded]) {
  21.579 +		if (load > CPU_IRQ(j)) {
  21.580 +			/* This won't change cpu_sibling_map[min_loaded] */
  21.581 +			load = CPU_IRQ(j);
  21.582 +			min_loaded = j;
  21.583 +		}
  21.584 +	}
  21.585 +
  21.586 +	cpus_and(allowed_mask,
  21.587 +		cpu_online_map,
  21.588 +		balance_irq_affinity[selected_irq]);
  21.589 +	target_cpu_mask = cpumask_of_cpu(min_loaded);
  21.590 +	cpus_and(tmp, target_cpu_mask, allowed_mask);
  21.591 +
  21.592 +	if (!cpus_empty(tmp)) {
  21.593 +
  21.594 +		Dprintk("irq = %d moved to cpu = %d\n",
  21.595 +				selected_irq, min_loaded);
  21.596 +		/* mark for change destination */
  21.597 +		set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded));
  21.598 +
  21.599 +		/* Since we made a change, come back sooner to 
  21.600 +		 * check for more variation.
  21.601 +		 */
  21.602 +		balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
  21.603 +			balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);	
  21.604 +		return;
  21.605 +	}
  21.606 +	goto tryanotherirq;
  21.607 +
  21.608 +not_worth_the_effort:
  21.609 +	/*
  21.610 +	 * if we did not find an IRQ to move, then adjust the time interval
  21.611 +	 * upward
  21.612 +	 */
  21.613 +	balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL,
  21.614 +		balanced_irq_interval + BALANCED_IRQ_MORE_DELTA);	
  21.615 +	Dprintk("IRQ worth rotating not found\n");
  21.616 +	return;
  21.617 +}
  21.618 +
  21.619 +static int balanced_irq(void *unused)
  21.620 +{
  21.621 +	int i;
  21.622 +	unsigned long prev_balance_time = jiffies;
  21.623 +	long time_remaining = balanced_irq_interval;
  21.624 +
  21.625 +	daemonize("kirqd");
  21.626 +	
  21.627 +	/* push everything to CPU 0 to give us a starting point.  */
  21.628 +	for (i = 0 ; i < NR_IRQS ; i++) {
  21.629 +		irq_desc[i].pending_mask = cpumask_of_cpu(0);
  21.630 +		set_pending_irq(i, cpumask_of_cpu(0));
  21.631 +	}
  21.632 +
  21.633 +	for ( ; ; ) {
  21.634 +		time_remaining = schedule_timeout_interruptible(time_remaining);
  21.635 +		try_to_freeze();
  21.636 +		if (time_after(jiffies,
  21.637 +				prev_balance_time+balanced_irq_interval)) {
  21.638 +			preempt_disable();
  21.639 +			do_irq_balance();
  21.640 +			prev_balance_time = jiffies;
  21.641 +			time_remaining = balanced_irq_interval;
  21.642 +			preempt_enable();
  21.643 +		}
  21.644 +	}
  21.645 +	return 0;
  21.646 +}
  21.647 +
  21.648 +static int __init balanced_irq_init(void)
  21.649 +{
  21.650 +	int i;
  21.651 +	struct cpuinfo_x86 *c;
  21.652 +	cpumask_t tmp;
  21.653 +
  21.654 +	cpus_shift_right(tmp, cpu_online_map, 2);
  21.655 +        c = &boot_cpu_data;
  21.656 +	/* When not overwritten by the command line ask subarchitecture. */
  21.657 +	if (irqbalance_disabled == IRQBALANCE_CHECK_ARCH)
  21.658 +		irqbalance_disabled = NO_BALANCE_IRQ;
  21.659 +	if (irqbalance_disabled)
  21.660 +		return 0;
  21.661 +	
  21.662 +	 /* disable irqbalance completely if there is only one processor online */
  21.663 +	if (num_online_cpus() < 2) {
  21.664 +		irqbalance_disabled = 1;
  21.665 +		return 0;
  21.666 +	}
  21.667 +	/*
  21.668 +	 * Enable physical balance only if more than 1 physical processor
  21.669 +	 * is present
  21.670 +	 */
  21.671 +	if (smp_num_siblings > 1 && !cpus_empty(tmp))
  21.672 +		physical_balance = 1;
  21.673 +
  21.674 +	for_each_online_cpu(i) {
  21.675 +		irq_cpu_data[i].irq_delta = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
  21.676 +		irq_cpu_data[i].last_irq = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
  21.677 +		if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) {
  21.678 +			printk(KERN_ERR "balanced_irq_init: out of memory");
  21.679 +			goto failed;
  21.680 +		}
  21.681 +		memset(irq_cpu_data[i].irq_delta,0,sizeof(unsigned long) * NR_IRQS);
  21.682 +		memset(irq_cpu_data[i].last_irq,0,sizeof(unsigned long) * NR_IRQS);
  21.683 +	}
  21.684 +	
  21.685 +	printk(KERN_INFO "Starting balanced_irq\n");
  21.686 +	if (kernel_thread(balanced_irq, NULL, CLONE_KERNEL) >= 0) 
  21.687 +		return 0;
  21.688 +	else 
  21.689 +		printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
  21.690 +failed:
  21.691 +	for_each_possible_cpu(i) {
  21.692 +		kfree(irq_cpu_data[i].irq_delta);
  21.693 +		irq_cpu_data[i].irq_delta = NULL;
  21.694 +		kfree(irq_cpu_data[i].last_irq);
  21.695 +		irq_cpu_data[i].last_irq = NULL;
  21.696 +	}
  21.697 +	return 0;
  21.698 +}
  21.699 +
  21.700 +int __init irqbalance_disable(char *str)
  21.701 +{
  21.702 +	irqbalance_disabled = 1;
  21.703 +	return 1;
  21.704 +}
  21.705 +
  21.706 +__setup("noirqbalance", irqbalance_disable);
  21.707 +
  21.708 +late_initcall(balanced_irq_init);
  21.709 +#endif /* CONFIG_IRQBALANCE */
  21.710 +#endif /* CONFIG_SMP */
  21.711 +#endif
  21.712 +
  21.713 +#ifndef CONFIG_SMP
  21.714 +void fastcall send_IPI_self(int vector)
  21.715 +{
  21.716 +#ifndef CONFIG_XEN
  21.717 +	unsigned int cfg;
  21.718 +
  21.719 +	/*
  21.720 +	 * Wait for idle.
  21.721 +	 */
  21.722 +	apic_wait_icr_idle();
  21.723 +	cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL;
  21.724 +	/*
  21.725 +	 * Send the IPI. The write to APIC_ICR fires this off.
  21.726 +	 */
  21.727 +	apic_write_around(APIC_ICR, cfg);
  21.728 +#endif
  21.729 +}
  21.730 +#endif /* !CONFIG_SMP */
  21.731 +
  21.732 +
  21.733 +/*
  21.734 + * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
  21.735 + * specific CPU-side IRQs.
  21.736 + */
  21.737 +
  21.738 +#define MAX_PIRQS 8
  21.739 +static int pirq_entries [MAX_PIRQS];
  21.740 +static int pirqs_enabled;
  21.741 +int skip_ioapic_setup;
  21.742 +
  21.743 +static int __init ioapic_setup(char *str)
  21.744 +{
  21.745 +	skip_ioapic_setup = 1;
  21.746 +	return 1;
  21.747 +}
  21.748 +
  21.749 +__setup("noapic", ioapic_setup);
  21.750 +
  21.751 +static int __init ioapic_pirq_setup(char *str)
  21.752 +{
  21.753 +	int i, max;
  21.754 +	int ints[MAX_PIRQS+1];
  21.755 +
  21.756 +	get_options(str, ARRAY_SIZE(ints), ints);
  21.757 +
  21.758 +	for (i = 0; i < MAX_PIRQS; i++)
  21.759 +		pirq_entries[i] = -1;
  21.760 +
  21.761 +	pirqs_enabled = 1;
  21.762 +	apic_printk(APIC_VERBOSE, KERN_INFO
  21.763 +			"PIRQ redirection, working around broken MP-BIOS.\n");
  21.764 +	max = MAX_PIRQS;
  21.765 +	if (ints[0] < MAX_PIRQS)
  21.766 +		max = ints[0];
  21.767 +
  21.768 +	for (i = 0; i < max; i++) {
  21.769 +		apic_printk(APIC_VERBOSE, KERN_DEBUG
  21.770 +				"... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
  21.771 +		/*
  21.772 +		 * PIRQs are mapped upside down, usually.
  21.773 +		 */
  21.774 +		pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
  21.775 +	}
  21.776 +	return 1;
  21.777 +}
  21.778 +
  21.779 +__setup("pirq=", ioapic_pirq_setup);
  21.780 +
  21.781 +/*
  21.782 + * Find the IRQ entry number of a certain pin.
  21.783 + */
  21.784 +static int find_irq_entry(int apic, int pin, int type)
  21.785 +{
  21.786 +	int i;
  21.787 +
  21.788 +	for (i = 0; i < mp_irq_entries; i++)
  21.789 +		if (mp_irqs[i].mpc_irqtype == type &&
  21.790 +		    (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid ||
  21.791 +		     mp_irqs[i].mpc_dstapic == MP_APIC_ALL) &&
  21.792 +		    mp_irqs[i].mpc_dstirq == pin)
  21.793 +			return i;
  21.794 +
  21.795 +	return -1;
  21.796 +}
  21.797 +
  21.798 +/*
  21.799 + * Find the pin to which IRQ[irq] (ISA) is connected
  21.800 + */
  21.801 +static int __init find_isa_irq_pin(int irq, int type)
  21.802 +{
  21.803 +	int i;
  21.804 +
  21.805 +	for (i = 0; i < mp_irq_entries; i++) {
  21.806 +		int lbus = mp_irqs[i].mpc_srcbus;
  21.807 +
  21.808 +		if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
  21.809 +		     mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
  21.810 +		     mp_bus_id_to_type[lbus] == MP_BUS_MCA ||
  21.811 +		     mp_bus_id_to_type[lbus] == MP_BUS_NEC98
  21.812 +		    ) &&
  21.813 +		    (mp_irqs[i].mpc_irqtype == type) &&
  21.814 +		    (mp_irqs[i].mpc_srcbusirq == irq))
  21.815 +
  21.816 +			return mp_irqs[i].mpc_dstirq;
  21.817 +	}
  21.818 +	return -1;
  21.819 +}
  21.820 +
  21.821 +static int __init find_isa_irq_apic(int irq, int type)
  21.822 +{
  21.823 +	int i;
  21.824 +
  21.825 +	for (i = 0; i < mp_irq_entries; i++) {
  21.826 +		int lbus = mp_irqs[i].mpc_srcbus;
  21.827 +
  21.828 +		if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
  21.829 +		     mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
  21.830 +		     mp_bus_id_to_type[lbus] == MP_BUS_MCA ||
  21.831 +		     mp_bus_id_to_type[lbus] == MP_BUS_NEC98
  21.832 +		    ) &&
  21.833 +		    (mp_irqs[i].mpc_irqtype == type) &&
  21.834 +		    (mp_irqs[i].mpc_srcbusirq == irq))
  21.835 +			break;
  21.836 +	}
  21.837 +	if (i < mp_irq_entries) {
  21.838 +		int apic;
  21.839 +		for(apic = 0; apic < nr_ioapics; apic++) {
  21.840 +			if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic)
  21.841 +				return apic;
  21.842 +		}
  21.843 +	}
  21.844 +
  21.845 +	return -1;
  21.846 +}
  21.847 +
  21.848 +/*
  21.849 + * Find a specific PCI IRQ entry.
  21.850 + * Not an __init, possibly needed by modules
  21.851 + */
  21.852 +static int pin_2_irq(int idx, int apic, int pin);
  21.853 +
  21.854 +int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
  21.855 +{
  21.856 +	int apic, i, best_guess = -1;
  21.857 +
  21.858 +	apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, "
  21.859 +		"slot:%d, pin:%d.\n", bus, slot, pin);
  21.860 +	if (mp_bus_id_to_pci_bus[bus] == -1) {
  21.861 +		printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
  21.862 +		return -1;
  21.863 +	}
  21.864 +	for (i = 0; i < mp_irq_entries; i++) {
  21.865 +		int lbus = mp_irqs[i].mpc_srcbus;
  21.866 +
  21.867 +		for (apic = 0; apic < nr_ioapics; apic++)
  21.868 +			if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic ||
  21.869 +			    mp_irqs[i].mpc_dstapic == MP_APIC_ALL)
  21.870 +				break;
  21.871 +
  21.872 +		if ((mp_bus_id_to_type[lbus] == MP_BUS_PCI) &&
  21.873 +		    !mp_irqs[i].mpc_irqtype &&
  21.874 +		    (bus == lbus) &&
  21.875 +		    (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) {
  21.876 +			int irq = pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq);
  21.877 +
  21.878 +			if (!(apic || IO_APIC_IRQ(irq)))
  21.879 +				continue;
  21.880 +
  21.881 +			if (pin == (mp_irqs[i].mpc_srcbusirq & 3))
  21.882 +				return irq;
  21.883 +			/*
  21.884 +			 * Use the first all-but-pin matching entry as a
  21.885 +			 * best-guess fuzzy result for broken mptables.
  21.886 +			 */
  21.887 +			if (best_guess < 0)
  21.888 +				best_guess = irq;
  21.889 +		}
  21.890 +	}
  21.891 +	return best_guess;
  21.892 +}
  21.893 +EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
  21.894 +
  21.895 +/*
  21.896 + * This function currently is only a helper for the i386 smp boot process where 
  21.897 + * we need to reprogram the ioredtbls to cater for the cpus which have come online
  21.898 + * so mask in all cases should simply be TARGET_CPUS
  21.899 + */
  21.900 +#ifdef CONFIG_SMP
  21.901 +#ifndef CONFIG_XEN
  21.902 +void __init setup_ioapic_dest(void)
  21.903 +{
  21.904 +	int pin, ioapic, irq, irq_entry;
  21.905 +
  21.906 +	if (skip_ioapic_setup == 1)
  21.907 +		return;
  21.908 +
  21.909 +	for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
  21.910 +		for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
  21.911 +			irq_entry = find_irq_entry(ioapic, pin, mp_INT);
  21.912 +			if (irq_entry == -1)
  21.913 +				continue;
  21.914 +			irq = pin_2_irq(irq_entry, ioapic, pin);
  21.915 +			set_ioapic_affinity_irq(irq, TARGET_CPUS);
  21.916 +		}
  21.917 +
  21.918 +	}
  21.919 +}
  21.920 +#endif /* !CONFIG_XEN */
  21.921 +#endif
  21.922 +
  21.923 +/*
  21.924 + * EISA Edge/Level control register, ELCR
  21.925 + */
  21.926 +static int EISA_ELCR(unsigned int irq)
  21.927 +{
  21.928 +	if (irq < 16) {
  21.929 +		unsigned int port = 0x4d0 + (irq >> 3);
  21.930 +		return (inb(port) >> (irq & 7)) & 1;
  21.931 +	}
  21.932 +	apic_printk(APIC_VERBOSE, KERN_INFO
  21.933 +			"Broken MPtable reports ISA irq %d\n", irq);
  21.934 +	return 0;
  21.935 +}
  21.936 +
  21.937 +/* EISA interrupts are always polarity zero and can be edge or level
  21.938 + * trigger depending on the ELCR value.  If an interrupt is listed as
  21.939 + * EISA conforming in the MP table, that means its trigger type must
  21.940 + * be read in from the ELCR */
  21.941 +
  21.942 +#define default_EISA_trigger(idx)	(EISA_ELCR(mp_irqs[idx].mpc_srcbusirq))
  21.943 +#define default_EISA_polarity(idx)	(0)
  21.944 +
  21.945 +/* ISA interrupts are always polarity zero edge triggered,
  21.946 + * when listed as conforming in the MP table. */
  21.947 +
  21.948 +#define default_ISA_trigger(idx)	(0)
  21.949 +#define default_ISA_polarity(idx)	(0)
  21.950 +
  21.951 +/* PCI interrupts are always polarity one level triggered,
  21.952 + * when listed as conforming in the MP table. */
  21.953 +
  21.954 +#define default_PCI_trigger(idx)	(1)
  21.955 +#define default_PCI_polarity(idx)	(1)
  21.956 +
  21.957 +/* MCA interrupts are always polarity zero level triggered,
  21.958 + * when listed as conforming in the MP table. */
  21.959 +
  21.960 +#define default_MCA_trigger(idx)	(1)
  21.961 +#define default_MCA_polarity(idx)	(0)
  21.962 +
  21.963 +/* NEC98 interrupts are always polarity zero edge triggered,
  21.964 + * when listed as conforming in the MP table. */
  21.965 +
  21.966 +#define default_NEC98_trigger(idx)     (0)
  21.967 +#define default_NEC98_polarity(idx)    (0)
  21.968 +
  21.969 +static int __init MPBIOS_polarity(int idx)
  21.970 +{
  21.971 +	int bus = mp_irqs[idx].mpc_srcbus;
  21.972 +	int polarity;
  21.973 +
  21.974 +	/*
  21.975 +	 * Determine IRQ line polarity (high active or low active):
  21.976 +	 */
  21.977 +	switch (mp_irqs[idx].mpc_irqflag & 3)
  21.978 +	{
  21.979 +		case 0: /* conforms, ie. bus-type dependent polarity */
  21.980 +		{
  21.981 +			switch (mp_bus_id_to_type[bus])
  21.982 +			{
  21.983 +				case MP_BUS_ISA: /* ISA pin */
  21.984 +				{
  21.985 +					polarity = default_ISA_polarity(idx);
  21.986 +					break;
  21.987 +				}
  21.988 +				case MP_BUS_EISA: /* EISA pin */
  21.989 +				{
  21.990 +					polarity = default_EISA_polarity(idx);
  21.991 +					break;
  21.992 +				}
  21.993 +				case MP_BUS_PCI: /* PCI pin */
  21.994 +				{
  21.995 +					polarity = default_PCI_polarity(idx);
  21.996 +					break;
  21.997 +				}
  21.998 +				case MP_BUS_MCA: /* MCA pin */
  21.999 +				{
 21.1000 +					polarity = default_MCA_polarity(idx);
 21.1001 +					break;
 21.1002 +				}
 21.1003 +				case MP_BUS_NEC98: /* NEC 98 pin */
 21.1004 +				{
 21.1005 +					polarity = default_NEC98_polarity(idx);
 21.1006 +					break;
 21.1007 +				}
 21.1008 +				default:
 21.1009 +				{
 21.1010 +					printk(KERN_WARNING "broken BIOS!!\n");
 21.1011 +					polarity = 1;
 21.1012 +					break;
 21.1013 +				}
 21.1014 +			}
 21.1015 +			break;
 21.1016 +		}
 21.1017 +		case 1: /* high active */
 21.1018 +		{
 21.1019 +			polarity = 0;
 21.1020 +			break;
 21.1021 +		}
 21.1022 +		case 2: /* reserved */
 21.1023 +		{
 21.1024 +			printk(KERN_WARNING "broken BIOS!!\n");
 21.1025 +			polarity = 1;
 21.1026 +			break;
 21.1027 +		}
 21.1028 +		case 3: /* low active */
 21.1029 +		{
 21.1030 +			polarity = 1;
 21.1031 +			break;
 21.1032 +		}
 21.1033 +		default: /* invalid */
 21.1034 +		{
 21.1035 +			printk(KERN_WARNING "broken BIOS!!\n");
 21.1036 +			polarity = 1;
 21.1037 +			break;
 21.1038 +		}
 21.1039 +	}
 21.1040 +	return polarity;
 21.1041 +}
 21.1042 +
 21.1043 +static int MPBIOS_trigger(int idx)
 21.1044 +{
 21.1045 +	int bus = mp_irqs[idx].mpc_srcbus;
 21.1046 +	int trigger;
 21.1047 +
 21.1048 +	/*
 21.1049 +	 * Determine IRQ trigger mode (edge or level sensitive):
 21.1050 +	 */
 21.1051 +	switch ((mp_irqs[idx].mpc_irqflag>>2) & 3)
 21.1052 +	{
 21.1053 +		case 0: /* conforms, ie. bus-type dependent */
 21.1054 +		{
 21.1055 +			switch (mp_bus_id_to_type[bus])
 21.1056 +			{
 21.1057 +				case MP_BUS_ISA: /* ISA pin */
 21.1058 +				{
 21.1059 +					trigger = default_ISA_trigger(idx);
 21.1060 +					break;
 21.1061 +				}
 21.1062 +				case MP_BUS_EISA: /* EISA pin */
 21.1063 +				{
 21.1064 +					trigger = default_EISA_trigger(idx);
 21.1065 +					break;
 21.1066 +				}
 21.1067 +				case MP_BUS_PCI: /* PCI pin */
 21.1068 +				{
 21.1069 +					trigger = default_PCI_trigger(idx);
 21.1070 +					break;
 21.1071 +				}
 21.1072 +				case MP_BUS_MCA: /* MCA pin */
 21.1073 +				{
 21.1074 +					trigger = default_MCA_trigger(idx);
 21.1075 +					break;
 21.1076 +				}
 21.1077 +				case MP_BUS_NEC98: /* NEC 98 pin */
 21.1078 +				{
 21.1079 +					trigger = default_NEC98_trigger(idx);
 21.1080 +					break;
 21.1081 +				}
 21.1082 +				default:
 21.1083 +				{
 21.1084 +					printk(KERN_WARNING "broken BIOS!!\n");
 21.1085 +					trigger = 1;
 21.1086 +					break;
 21.1087 +				}
 21.1088 +			}
 21.1089 +			break;
 21.1090 +		}
 21.1091 +		case 1: /* edge */
 21.1092 +		{
 21.1093 +			trigger = 0;
 21.1094 +			break;
 21.1095 +		}
 21.1096 +		case 2: /* reserved */
 21.1097 +		{
 21.1098 +			printk(KERN_WARNING "broken BIOS!!\n");
 21.1099 +			trigger = 1;
 21.1100 +			break;
 21.1101 +		}
 21.1102 +		case 3: /* level */
 21.1103 +		{
 21.1104 +			trigger = 1;
 21.1105 +			break;
 21.1106 +		}
 21.1107 +		default: /* invalid */
 21.1108 +		{
 21.1109 +			printk(KERN_WARNING "broken BIOS!!\n");
 21.1110 +			trigger = 0;
 21.1111 +			break;
 21.1112 +		}
 21.1113 +	}
 21.1114 +	return trigger;
 21.1115 +}
 21.1116 +
 21.1117 +static inline int irq_polarity(int idx)
 21.1118 +{
 21.1119 +	return MPBIOS_polarity(idx);
 21.1120 +}
 21.1121 +
 21.1122 +static inline int irq_trigger(int idx)
 21.1123 +{
 21.1124 +	return MPBIOS_trigger(idx);
 21.1125 +}
 21.1126 +
 21.1127 +static int pin_2_irq(int idx, int apic, int pin)
 21.1128 +{
 21.1129 +	int irq, i;
 21.1130 +	int bus = mp_irqs[idx].mpc_srcbus;
 21.1131 +
 21.1132 +	/*
 21.1133 +	 * Debugging check, we are in big trouble if this message pops up!
 21.1134 +	 */
 21.1135 +	if (mp_irqs[idx].mpc_dstirq != pin)
 21.1136 +		printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
 21.1137 +
 21.1138 +	switch (mp_bus_id_to_type[bus])
 21.1139 +	{
 21.1140 +		case MP_BUS_ISA: /* ISA pin */
 21.1141 +		case MP_BUS_EISA:
 21.1142 +		case MP_BUS_MCA:
 21.1143 +		case MP_BUS_NEC98:
 21.1144 +		{
 21.1145 +			irq = mp_irqs[idx].mpc_srcbusirq;
 21.1146 +			break;
 21.1147 +		}
 21.1148 +		case MP_BUS_PCI: /* PCI pin */
 21.1149 +		{
 21.1150 +			/*
 21.1151 +			 * PCI IRQs are mapped in order
 21.1152 +			 */
 21.1153 +			i = irq = 0;
 21.1154 +			while (i < apic)
 21.1155 +				irq += nr_ioapic_registers[i++];
 21.1156 +			irq += pin;
 21.1157 +
 21.1158 +			/*
 21.1159 +			 * For MPS mode, so far only needed by ES7000 platform
 21.1160 +			 */
 21.1161 +			if (ioapic_renumber_irq)
 21.1162 +				irq = ioapic_renumber_irq(apic, irq);
 21.1163 +
 21.1164 +			break;
 21.1165 +		}
 21.1166 +		default:
 21.1167 +		{
 21.1168 +			printk(KERN_ERR "unknown bus type %d.\n",bus); 
 21.1169 +			irq = 0;
 21.1170 +			break;
 21.1171 +		}
 21.1172 +	}
 21.1173 +
 21.1174 +	/*
 21.1175 +	 * PCI IRQ command line redirection. Yes, limits are hardcoded.
 21.1176 +	 */
 21.1177 +	if ((pin >= 16) && (pin <= 23)) {
 21.1178 +		if (pirq_entries[pin-16] != -1) {
 21.1179 +			if (!pirq_entries[pin-16]) {
 21.1180 +				apic_printk(APIC_VERBOSE, KERN_DEBUG
 21.1181 +						"disabling PIRQ%d\n", pin-16);
 21.1182 +			} else {
 21.1183 +				irq = pirq_entries[pin-16];
 21.1184 +				apic_printk(APIC_VERBOSE, KERN_DEBUG
 21.1185 +						"using PIRQ%d -> IRQ %d\n",
 21.1186 +						pin-16, irq);
 21.1187 +			}
 21.1188 +		}
 21.1189 +	}
 21.1190 +	return irq;
 21.1191 +}
 21.1192 +
 21.1193 +static inline int IO_APIC_irq_trigger(int irq)
 21.1194 +{
 21.1195 +	int apic, idx, pin;
 21.1196 +
 21.1197 +	for (apic = 0; apic < nr_ioapics; apic++) {
 21.1198 +		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
 21.1199 +			idx = find_irq_entry(apic,pin,mp_INT);
 21.1200 +			if ((idx != -1) && (irq == pin_2_irq(idx,apic,pin)))
 21.1201 +				return irq_trigger(idx);
 21.1202 +		}
 21.1203 +	}
 21.1204 +	/*
 21.1205 +	 * nonexistent IRQs are edge default
 21.1206 +	 */
 21.1207 +	return 0;
 21.1208 +}
 21.1209 +
 21.1210 +/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
 21.1211 +u8 irq_vector[NR_IRQ_VECTORS] __read_mostly; /* = { FIRST_DEVICE_VECTOR , 0 }; */
 21.1212 +
 21.1213 +int assign_irq_vector(int irq)
 21.1214 +{
 21.1215 +	unsigned long flags;
 21.1216 +	int vector;
 21.1217 +	struct physdev_irq irq_op;
 21.1218 +
 21.1219 +	BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS);
 21.1220 +
 21.1221 +	spin_lock_irqsave(&vector_lock, flags);
 21.1222 +
 21.1223 +	if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) {
 21.1224 +		spin_unlock_irqrestore(&vector_lock, flags);
 21.1225 +		return IO_APIC_VECTOR(irq);
 21.1226 +	}
 21.1227 +
 21.1228 +	irq_op.irq = irq;
 21.1229 +	if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
 21.1230 +		spin_unlock_irqrestore(&vector_lock, flags);
 21.1231 +		return -ENOSPC;
 21.1232 +	}
 21.1233 +
 21.1234 +	vector = irq_op.vector;
 21.1235 +	vector_irq[vector] = irq;
 21.1236 +	if (irq != AUTO_ASSIGN)
 21.1237 +		IO_APIC_VECTOR(irq) = vector;
 21.1238 +
 21.1239 +	spin_unlock_irqrestore(&vector_lock, flags);
 21.1240 +
 21.1241 +	return vector;
 21.1242 +}
 21.1243 +
 21.1244 +#ifndef CONFIG_XEN
 21.1245 +static struct hw_interrupt_type ioapic_level_type;
 21.1246 +static struct hw_interrupt_type ioapic_edge_type;
 21.1247 +
 21.1248 +#define IOAPIC_AUTO	-1
 21.1249 +#define IOAPIC_EDGE	0
 21.1250 +#define IOAPIC_LEVEL	1
 21.1251 +
 21.1252 +static void ioapic_register_intr(int irq, int vector, unsigned long trigger)
 21.1253 +{
 21.1254 +	unsigned idx;
 21.1255 +
 21.1256 +	idx = use_pci_vector() && !platform_legacy_irq(irq) ? vector : irq;
 21.1257 +
 21.1258 +	if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
 21.1259 +			trigger == IOAPIC_LEVEL)
 21.1260 +		irq_desc[idx].chip = &ioapic_level_type;
 21.1261 +	else
 21.1262 +		irq_desc[idx].chip = &ioapic_edge_type;
 21.1263 +	set_intr_gate(vector, interrupt[idx]);
 21.1264 +}
 21.1265 +#else
 21.1266 +#define ioapic_register_intr(_irq,_vector,_trigger) ((void)0)
 21.1267 +#endif
 21.1268 +
 21.1269 +static void __init setup_IO_APIC_irqs(void)
 21.1270 +{
 21.1271 +	struct IO_APIC_route_entry entry;
 21.1272 +	int apic, pin, idx, irq, first_notcon = 1, vector;
 21.1273 +	unsigned long flags;
 21.1274 +
 21.1275 +	apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
 21.1276 +
 21.1277 +	for (apic = 0; apic < nr_ioapics; apic++) {
 21.1278 +	for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
 21.1279 +
 21.1280 +		/*
 21.1281 +		 * add it to the IO-APIC irq-routing table:
 21.1282 +		 */
 21.1283 +		memset(&entry,0,sizeof(entry));
 21.1284 +
 21.1285 +		entry.delivery_mode = INT_DELIVERY_MODE;
 21.1286 +		entry.dest_mode = INT_DEST_MODE;
 21.1287 +		entry.mask = 0;				/* enable IRQ */
 21.1288 +		entry.dest.logical.logical_dest = 
 21.1289 +					cpu_mask_to_apicid(TARGET_CPUS);
 21.1290 +
 21.1291 +		idx = find_irq_entry(apic,pin,mp_INT);
 21.1292 +		if (idx == -1) {
 21.1293 +			if (first_notcon) {
 21.1294 +				apic_printk(APIC_VERBOSE, KERN_DEBUG
 21.1295 +						" IO-APIC (apicid-pin) %d-%d",
 21.1296 +						mp_ioapics[apic].mpc_apicid,
 21.1297 +						pin);
 21.1298 +				first_notcon = 0;
 21.1299 +			} else
 21.1300 +				apic_printk(APIC_VERBOSE, ", %d-%d",
 21.1301 +					mp_ioapics[apic].mpc_apicid, pin);
 21.1302 +			continue;
 21.1303 +		}
 21.1304 +
 21.1305 +		entry.trigger = irq_trigger(idx);
 21.1306 +		entry.polarity = irq_polarity(idx);
 21.1307 +
 21.1308 +		if (irq_trigger(idx)) {
 21.1309 +			entry.trigger = 1;
 21.1310 +			entry.mask = 1;
 21.1311 +		}
 21.1312 +
 21.1313 +		irq = pin_2_irq(idx, apic, pin);
 21.1314 +		/*
 21.1315 +		 * skip adding the timer int on secondary nodes, which causes
 21.1316 +		 * a small but painful rift in the time-space continuum
 21.1317 +		 */
 21.1318 +		if (multi_timer_check(apic, irq))
 21.1319 +			continue;
 21.1320 +		else
 21.1321 +			add_pin_to_irq(irq, apic, pin);
 21.1322 +
 21.1323 +		if (/*!apic &&*/ !IO_APIC_IRQ(irq))
 21.1324 +			continue;
 21.1325 +
 21.1326 +		if (IO_APIC_IRQ(irq)) {
 21.1327 +			vector = assign_irq_vector(irq);
 21.1328 +			entry.vector = vector;
 21.1329 +			ioapic_register_intr(irq, vector, IOAPIC_AUTO);
 21.1330 +		
 21.1331 +			if (!apic && (irq < 16))
 21.1332 +				disable_8259A_irq(irq);
 21.1333 +		}
 21.1334 +		spin_lock_irqsave(&ioapic_lock, flags);
 21.1335 +		io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
 21.1336 +		io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
 21.1337 +		set_native_irq_info(irq, TARGET_CPUS);
 21.1338 +		spin_unlock_irqrestore(&ioapic_lock, flags);
 21.1339 +	}
 21.1340 +	}
 21.1341 +
 21.1342 +	if (!first_notcon)
 21.1343 +		apic_printk(APIC_VERBOSE, " not connected.\n");
 21.1344 +}
 21.1345 +
 21.1346 +/*
 21.1347 + * Set up the 8259A-master output pin:
 21.1348 + */
 21.1349 +#ifndef CONFIG_XEN
 21.1350 +static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector)
 21.1351 +{
 21.1352 +	struct IO_APIC_route_entry entry;
 21.1353 +	unsigned long flags;
 21.1354 +
 21.1355 +	memset(&entry,0,sizeof(entry));
 21.1356 +
 21.1357 +	disable_8259A_irq(0);
 21.1358 +
 21.1359 +	/* mask LVT0 */
 21.1360 +	apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
 21.1361 +
 21.1362 +	/*
 21.1363 +	 * We use logical delivery to get the timer IRQ
 21.1364 +	 * to the first CPU.
 21.1365 +	 */
 21.1366 +	entry.dest_mode = INT_DEST_MODE;
 21.1367 +	entry.mask = 0;					/* unmask IRQ now */
 21.1368 +	entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
 21.1369 +	entry.delivery_mode = INT_DELIVERY_MODE;
 21.1370 +	entry.polarity = 0;
 21.1371 +	entry.trigger = 0;
 21.1372 +	entry.vector = vector;
 21.1373 +
 21.1374 +	/*
 21.1375 +	 * The timer IRQ doesn't have to know that behind the
 21.1376 +	 * scene we have a 8259A-master in AEOI mode ...
 21.1377 +	 */
 21.1378 +	irq_desc[0].chip = &ioapic_edge_type;
 21.1379 +
 21.1380 +	/*
 21.1381 +	 * Add it to the IO-APIC irq-routing table:
 21.1382 +	 */
 21.1383 +	spin_lock_irqsave(&ioapic_lock, flags);
 21.1384 +	io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
 21.1385 +	io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
 21.1386 +	spin_unlock_irqrestore(&ioapic_lock, flags);
 21.1387 +
 21.1388 +	enable_8259A_irq(0);
 21.1389 +}
 21.1390 +
 21.1391 +static inline void UNEXPECTED_IO_APIC(void)
 21.1392 +{
 21.1393 +}
 21.1394 +
 21.1395 +void __init print_IO_APIC(void)
 21.1396 +{
 21.1397 +	int apic, i;
 21.1398 +	union IO_APIC_reg_00 reg_00;
 21.1399 +	union IO_APIC_reg_01 reg_01;
 21.1400 +	union IO_APIC_reg_02 reg_02;
 21.1401 +	union IO_APIC_reg_03 reg_03;
 21.1402 +	unsigned long flags;
 21.1403 +
 21.1404 +	if (apic_verbosity == APIC_QUIET)
 21.1405 +		return;
 21.1406 +
 21.1407 + 	printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
 21.1408 +	for (i = 0; i < nr_ioapics; i++)
 21.1409 +		printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
 21.1410 +		       mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]);
 21.1411 +
 21.1412 +	/*
 21.1413 +	 * We are a bit conservative about what we expect.  We have to
 21.1414 +	 * know about every hardware change ASAP.
 21.1415 +	 */
 21.1416 +	printk(KERN_INFO "testing the IO APIC.......................\n");
 21.1417 +
 21.1418 +	for (apic = 0; apic < nr_ioapics; apic++) {
 21.1419 +
 21.1420 +	spin_lock_irqsave(&ioapic_lock, flags);
 21.1421 +	reg_00.raw = io_apic_read(apic, 0);
 21.1422 +	reg_01.raw = io_apic_read(apic, 1);
 21.1423 +	if (reg_01.bits.version >= 0x10)
 21.1424 +		reg_02.raw = io_apic_read(apic, 2);
 21.1425 +	if (reg_01.bits.version >= 0x20)
 21.1426 +		reg_03.raw = io_apic_read(apic, 3);
 21.1427 +	spin_unlock_irqrestore(&ioapic_lock, flags);
 21.1428 +
 21.1429 +	printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid);
 21.1430 +	printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
 21.1431 +	printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.bits.ID);
 21.1432 +	printk(KERN_DEBUG ".......    : Delivery Type: %X\n", reg_00.bits.delivery_type);
 21.1433 +	printk(KERN_DEBUG ".......    : LTS          : %X\n", reg_00.bits.LTS);
 21.1434 +	if (reg_00.bits.ID >= get_physical_broadcast())
 21.1435 +		UNEXPECTED_IO_APIC();
 21.1436 +	if (reg_00.bits.__reserved_1 || reg_00.bits.__reserved_2)
 21.1437 +		UNEXPECTED_IO_APIC();
 21.1438 +
 21.1439 +	printk(KERN_DEBUG ".... register #01: %08X\n", reg_01.raw);
 21.1440 +	printk(KERN_DEBUG ".......     : max redirection entries: %04X\n", reg_01.bits.entries);
 21.1441 +	if (	(reg_01.bits.entries != 0x0f) && /* older (Neptune) boards */
 21.1442 +		(reg_01.bits.entries != 0x17) && /* typical ISA+PCI boards */
 21.1443 +		(reg_01.bits.entries != 0x1b) && /* Compaq Proliant boards */
 21.1444 +		(reg_01.bits.entries != 0x1f) && /* dual Xeon boards */
 21.1445 +		(reg_01.bits.entries != 0x22) && /* bigger Xeon boards */
 21.1446 +		(reg_01.bits.entries != 0x2E) &&
 21.1447 +		(reg_01.bits.entries != 0x3F)
 21.1448 +	)
 21.1449 +		UNEXPECTED_IO_APIC();
 21.1450 +
 21.1451 +	printk(KERN_DEBUG ".......     : PRQ implemented: %X\n", reg_01.bits.PRQ);
 21.1452 +	printk(KERN_DEBUG ".......     : IO APIC version: %04X\n", reg_01.bits.version);
 21.1453 +	if (	(reg_01.bits.version != 0x01) && /* 82489DX IO-APICs */
 21.1454 +		(reg_01.bits.version != 0x10) && /* oldest IO-APICs */
 21.1455 +		(reg_01.bits.version != 0x11) && /* Pentium/Pro IO-APICs */
 21.1456 +		(reg_01.bits.version != 0x13) && /* Xeon IO-APICs */
 21.1457 +		(reg_01.bits.version != 0x20)    /* Intel P64H (82806 AA) */
 21.1458 +	)
 21.1459 +		UNEXPECTED_IO_APIC();
 21.1460 +	if (reg_01.bits.__reserved_1 || reg_01.bits.__reserved_2)
 21.1461 +		UNEXPECTED_IO_APIC();
 21.1462 +
 21.1463 +	/*
 21.1464 +	 * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02,
 21.1465 +	 * but the value of reg_02 is read as the previous read register
 21.1466 +	 * value, so ignore it if reg_02 == reg_01.
 21.1467 +	 */
 21.1468 +	if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) {
 21.1469 +		printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
 21.1470 +		printk(KERN_DEBUG ".......     : arbitration: %02X\n", reg_02.bits.arbitration);
 21.1471 +		if (reg_02.bits.__reserved_1 || reg_02.bits.__reserved_2)
 21.1472 +			UNEXPECTED_IO_APIC();
 21.1473 +	}
 21.1474 +
 21.1475 +	/*
 21.1476 +	 * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02
 21.1477 +	 * or reg_03, but the value of reg_0[23] is read as the previous read
 21.1478 +	 * register value, so ignore it if reg_03 == reg_0[12].
 21.1479 +	 */
 21.1480 +	if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw &&
 21.1481 +	    reg_03.raw != reg_01.raw) {
 21.1482 +		printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw);
 21.1483 +		printk(KERN_DEBUG ".......     : Boot DT    : %X\n", reg_03.bits.boot_DT);
 21.1484 +		if (reg_03.bits.__reserved_1)
 21.1485 +			UNEXPECTED_IO_APIC();
 21.1486 +	}
 21.1487 +
 21.1488 +	printk(KERN_DEBUG ".... IRQ redirection table:\n");
 21.1489 +
 21.1490 +	printk(KERN_DEBUG " NR Log Phy Mask Trig IRR Pol"
 21.1491 +			  " Stat Dest Deli Vect:   \n");
 21.1492 +
 21.1493 +	for (i = 0; i <= reg_01.bits.entries; i++) {
 21.1494 +		struct IO_APIC_route_entry entry;
 21.1495 +
 21.1496 +		spin_lock_irqsave(&ioapic_lock, flags);
 21.1497 +		*(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2);
 21.1498 +		*(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2);
 21.1499 +		spin_unlock_irqrestore(&ioapic_lock, flags);
 21.1500 +
 21.1501 +		printk(KERN_DEBUG " %02x %03X %02X  ",
 21.1502 +			i,
 21.1503 +			entry.dest.logical.logical_dest,
 21.1504 +			entry.dest.physical.physical_dest
 21.1505 +		);
 21.1506 +
 21.1507 +		printk("%1d    %1d    %1d   %1d   %1d    %1d    %1d    %02X\n",
 21.1508 +			entry.mask,
 21.1509 +			entry.trigger,
 21.1510 +			entry.irr,
 21.1511 +			entry.polarity,
 21.1512 +			entry.delivery_status,
 21.1513 +			entry.dest_mode,
 21.1514 +			entry.delivery_mode,
 21.1515 +			entry.vector
 21.1516 +		);
 21.1517 +	}
 21.1518 +	}
 21.1519 +	if (use_pci_vector())
 21.1520 +		printk(KERN_INFO "Using vector-based indexing\n");
 21.1521 +	printk(KERN_DEBUG "IRQ to pin mappings:\n");
 21.1522 +	for (i = 0; i < NR_IRQS; i++) {
 21.1523 +		struct irq_pin_list *entry = irq_2_pin + i;
 21.1524 +		if (entry->pin < 0)
 21.1525 +			continue;
 21.1526 + 		if (use_pci_vector() && !platform_legacy_irq(i))
 21.1527 +			printk(KERN_DEBUG "IRQ%d ", IO_APIC_VECTOR(i));
 21.1528 +		else
 21.1529 +			printk(KERN_DEBUG "IRQ%d ", i);
 21.1530 +		for (;;) {
 21.1531 +			printk("-> %d:%d", entry->apic, entry->pin);
 21.1532 +			if (!entry->next)
 21.1533 +				break;
 21.1534 +			entry = irq_2_pin + entry->next;
 21.1535 +		}
 21.1536 +		printk("\n");
 21.1537 +	}
 21.1538 +
 21.1539 +	printk(KERN_INFO ".................................... done.\n");
 21.1540 +
 21.1541 +	return;
 21.1542 +}
 21.1543 +
 21.1544 +#if 0
 21.1545 +
 21.1546 +static void print_APIC_bitfield (int base)
 21.1547 +{
 21.1548 +	unsigned int v;
 21.1549 +	int i, j;
 21.1550 +
 21.1551 +	if (apic_verbosity == APIC_QUIET)
 21.1552 +		return;
 21.1553 +
 21.1554 +	printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG);
 21.1555 +	for (i = 0; i < 8; i++) {
 21.1556 +		v = apic_read(base + i*0x10);
 21.1557 +		for (j = 0; j < 32; j++) {
 21.1558 +			if (v & (1<<j))
 21.1559 +				printk("1");
 21.1560 +			else
 21.1561 +				printk("0");
 21.1562 +		}
 21.1563 +		printk("\n");
 21.1564 +	}
 21.1565 +}
 21.1566 +
 21.1567 +void /*__init*/ print_local_APIC(void * dummy)
 21.1568 +{
 21.1569 +	unsigned int v, ver, maxlvt;
 21.1570 +
 21.1571 +	if (apic_verbosity == APIC_QUIET)
 21.1572 +		return;
 21.1573 +
 21.1574 +	printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
 21.1575 +		smp_processor_id(), hard_smp_processor_id());
 21.1576 +	v = apic_read(APIC_ID);
 21.1577 +	printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, GET_APIC_ID(v));
 21.1578 +	v = apic_read(APIC_LVR);
 21.1579 +	printk(KERN_INFO "... APIC VERSION: %08x\n", v);
 21.1580 +	ver = GET_APIC_VERSION(v);
 21.1581 +	maxlvt = get_maxlvt();
 21.1582 +
 21.1583 +	v = apic_read(APIC_TASKPRI);
 21.1584 +	printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
 21.1585 +
 21.1586 +	if (APIC_INTEGRATED(ver)) {			/* !82489DX */
 21.1587 +		v = apic_read(APIC_ARBPRI);
 21.1588 +		printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
 21.1589 +			v & APIC_ARBPRI_MASK);
 21.1590 +		v = apic_read(APIC_PROCPRI);
 21.1591 +		printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
 21.1592 +	}
 21.1593 +
 21.1594 +	v = apic_read(APIC_EOI);
 21.1595 +	printk(KERN_DEBUG "... APIC EOI: %08x\n", v);
 21.1596 +	v = apic_read(APIC_RRR);
 21.1597 +	printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
 21.1598 +	v = apic_read(APIC_LDR);
 21.1599 +	printk(KERN_DEBUG "... APIC LDR: %08x\n", v);
 21.1600 +	v = apic_read(APIC_DFR);
 21.1601 +	printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
 21.1602 +	v = apic_read(APIC_SPIV);
 21.1603 +	printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
 21.1604 +
 21.1605 +	printk(KERN_DEBUG "... APIC ISR field:\n");
 21.1606 +	print_APIC_bitfield(APIC_ISR);
 21.1607 +	printk(KERN_DEBUG "... APIC TMR field:\n");
 21.1608 +	print_APIC_bitfield(APIC_TMR);
 21.1609 +	printk(KERN_DEBUG "... APIC IRR field:\n");
 21.1610 +	print_APIC_bitfield(APIC_IRR);
 21.1611 +
 21.1612 +	if (APIC_INTEGRATED(ver)) {		/* !82489DX */
 21.1613 +		if (maxlvt > 3)		/* Due to the Pentium erratum 3AP. */
 21.1614 +			apic_write(APIC_ESR, 0);
 21.1615 +		v = apic_read(APIC_ESR);
 21.1616 +		printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
 21.1617 +	}
 21.1618 +
 21.1619 +	v = apic_read(APIC_ICR);
 21.1620 +	printk(KERN_DEBUG "... APIC ICR: %08x\n", v);
 21.1621 +	v = apic_read(APIC_ICR2);
 21.1622 +	printk(KERN_DEBUG "... APIC ICR2: %08x\n", v);
 21.1623 +
 21.1624 +	v = apic_read(APIC_LVTT);
 21.1625 +	printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
 21.1626 +
 21.1627 +	if (maxlvt > 3) {                       /* PC is LVT#4. */
 21.1628 +		v = apic_read(APIC_LVTPC);
 21.1629 +		printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v);
 21.1630 +	}
 21.1631 +	v = apic_read(APIC_LVT0);
 21.1632 +	printk(KERN_DEBUG "... APIC LVT0: %08x\n", v);
 21.1633 +	v = apic_read(APIC_LVT1);
 21.1634 +	printk(KERN_DEBUG "... APIC LVT1: %08x\n", v);
 21.1635 +
 21.1636 +	if (maxlvt > 2) {			/* ERR is LVT#3. */
 21.1637 +		v = apic_read(APIC_LVTERR);
 21.1638 +		printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v);
 21.1639 +	}
 21.1640 +
 21.1641 +	v = apic_read(APIC_TMICT);
 21.1642 +	printk(KERN_DEBUG "... APIC TMICT: %08x\n", v);
 21.1643 +	v = apic_read(APIC_TMCCT);
 21.1644 +	printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
 21.1645 +	v = apic_read(APIC_TDCR);
 21.1646 +	printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
 21.1647 +	printk("\n");
 21.1648 +}
 21.1649 +
 21.1650 +void print_all_local_APICs (void)
 21.1651 +{
 21.1652 +	on_each_cpu(print_local_APIC, NULL, 1, 1);
 21.1653 +}
 21.1654 +
 21.1655 +void /*__init*/ print_PIC(void)
 21.1656 +{
 21.1657 +	unsigned int v;
 21.1658 +	unsigned long flags;
 21.1659 +
 21.1660 +	if (apic_verbosity == APIC_QUIET)
 21.1661 +		return;
 21.1662 +
 21.1663 +	printk(KERN_DEBUG "\nprinting PIC contents\n");
 21.1664 +
 21.1665 +	spin_lock_irqsave(&i8259A_lock, flags);
 21.1666 +
 21.1667 +	v = inb(0xa1) << 8 | inb(0x21);
 21.1668 +	printk(KERN_DEBUG "... PIC  IMR: %04x\n", v);
 21.1669 +
 21.1670 +	v = inb(0xa0) << 8 | inb(0x20);
 21.1671 +	printk(KERN_DEBUG "... PIC  IRR: %04x\n", v);
 21.1672 +
 21.1673 +	outb(0x0b,0xa0);
 21.1674 +	outb(0x0b,0x20);
 21.1675 +	v = inb(0xa0) << 8 | inb(0x20);
 21.1676 +	outb(0x0a,0xa0);
 21.1677 +	outb(0x0a,0x20);
 21.1678 +
 21.1679 +	spin_unlock_irqrestore(&i8259A_lock, flags);
 21.1680 +
 21.1681 +	printk(KERN_DEBUG "... PIC  ISR: %04x\n", v);
 21.1682 +
 21.1683 +	v = inb(0x4d1) << 8 | inb(0x4d0);
 21.1684 +	printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
 21.1685 +}
 21.1686 +
 21.1687 +#endif  /*  0  */
 21.1688 +
 21.1689 +#else
 21.1690 +void __init print_IO_APIC(void) { }
 21.1691 +#endif /* !CONFIG_XEN */
 21.1692 +
 21.1693 +static void __init enable_IO_APIC(void)
 21.1694 +{
 21.1695 +	union IO_APIC_reg_01 reg_01;
 21.1696 +	int i8259_apic, i8259_pin;
 21.1697 +	int i, apic;
 21.1698 +	unsigned long flags;
 21.1699 +
 21.1700 +	for (i = 0; i < PIN_MAP_SIZE; i++) {
 21.1701 +		irq_2_pin[i].pin = -1;
 21.1702 +		irq_2_pin[i].next = 0;
 21.1703 +	}
 21.1704 +	if (!pirqs_enabled)
 21.1705 +		for (i = 0; i < MAX_PIRQS; i++)
 21.1706 +			pirq_entries[i] = -1;
 21.1707 +
 21.1708 +	/*
 21.1709 +	 * The number of IO-APIC IRQ registers (== #pins):
 21.1710 +	 */
 21.1711 +	for (apic = 0; apic < nr_ioapics; apic++) {
 21.1712 +		spin_lock_irqsave(&ioapic_lock, flags);
 21.1713 +		reg_01.raw = io_apic_read(apic, 1);
 21.1714 +		spin_unlock_irqrestore(&ioapic_lock, flags);
 21.1715 +		nr_ioapic_registers[apic] = reg_01.bits.entries+1;
 21.1716 +	}
 21.1717 +	for(apic = 0; apic < nr_ioapics; apic++) {
 21.1718 +		int pin;
 21.1719 +		/* See if any of the pins is in ExtINT mode */
 21.1720 +		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
 21.1721 +			struct IO_APIC_route_entry entry;
 21.1722 +			spin_lock_irqsave(&ioapic_lock, flags);
 21.1723 +			*(((int *)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
 21.1724 +			*(((int *)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
 21.1725 +			spin_unlock_irqrestore(&ioapic_lock, flags);
 21.1726 +
 21.1727 +
 21.1728 +			/* If the interrupt line is enabled and in ExtInt mode
 21.1729 +			 * I have found the pin where the i8259 is connected.
 21.1730 +			 */
 21.1731 +			if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) {
 21.1732 +				ioapic_i8259.apic = apic;
 21.1733 +				ioapic_i8259.pin  = pin;
 21.1734 +				goto found_i8259;
 21.1735 +			}
 21.1736 +		}
 21.1737 +	}
 21.1738 + found_i8259:
 21.1739 +	/* Look to see what if the MP table has reported the ExtINT */
 21.1740 +	/* If we could not find the appropriate pin by looking at the ioapic
 21.1741 +	 * the i8259 probably is not connected the ioapic but give the
 21.1742 +	 * mptable a chance anyway.
 21.1743 +	 */
 21.1744 +	i8259_pin  = find_isa_irq_pin(0, mp_ExtINT);
 21.1745 +	i8259_apic = find_isa_irq_apic(0, mp_ExtINT);
 21.1746 +	/* Trust the MP table if nothing is setup in the hardware */
 21.1747 +	if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) {
 21.1748 +		printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n");
 21.1749 +		ioapic_i8259.pin  = i8259_pin;
 21.1750 +		ioapic_i8259.apic = i8259_apic;
 21.1751 +	}
 21.1752 +	/* Complain if the MP table and the hardware disagree */
 21.1753 +	if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) &&
 21.1754 +		(i8259_pin >= 0) && (ioapic_i8259.pin >= 0))
 21.1755 +	{
 21.1756 +		printk(KERN_WARNING "ExtINT in hardware and MP table differ\n");
 21.1757 +	}
 21.1758 +
 21.1759 +	/*
 21.1760 +	 * Do not trust the IO-APIC being empty at bootup
 21.1761 +	 */
 21.1762 +	clear_IO_APIC();
 21.1763 +}
 21.1764 +
 21.1765 +/*
 21.1766 + * Not an __init, needed by the reboot code
 21.1767 + */
 21.1768 +void disable_IO_APIC(void)
 21.1769 +{
 21.1770 +	/*
 21.1771 +	 * Clear the IO-APIC before rebooting:
 21.1772 +	 */
 21.1773 +	clear_IO_APIC();
 21.1774 +
 21.1775 +#ifndef CONFIG_XEN
 21.1776 +	/*
 21.1777 +	 * If the i8259 is routed through an IOAPIC
 21.1778 +	 * Put that IOAPIC in virtual wire mode
 21.1779 +	 * so legacy interrupts can be delivered.
 21.1780 +	 */
 21.1781 +	if (ioapic_i8259.pin != -1) {
 21.1782 +		struct IO_APIC_route_entry entry;
 21.1783 +		unsigned long flags;
 21.1784 +
 21.1785 +		memset(&entry, 0, sizeof(entry));
 21.1786 +		entry.mask            = 0; /* Enabled */
 21.1787 +		entry.trigger         = 0; /* Edge */
 21.1788 +		entry.irr             = 0;
 21.1789 +		entry.polarity        = 0; /* High */
 21.1790 +		entry.delivery_status = 0;
 21.1791 +		entry.dest_mode       = 0; /* Physical */
 21.1792 +		entry.delivery_mode   = dest_ExtINT; /* ExtInt */
 21.1793 +		entry.vector          = 0;
 21.1794 +		entry.dest.physical.physical_dest =
 21.1795 +					GET_APIC_ID(apic_read(APIC_ID));
 21.1796 +
 21.1797 +		/*
 21.1798 +		 * Add it to the IO-APIC irq-routing table:
 21.1799 +		 */
 21.1800 +		spin_lock_irqsave(&ioapic_lock, flags);
 21.1801 +		io_apic_write(ioapic_i8259.apic, 0x11+2*ioapic_i8259.pin,
 21.1802 +			*(((int *)&entry)+1));
 21.1803 +		io_apic_write(ioapic_i8259.apic, 0x10+2*ioapic_i8259.pin,
 21.1804 +			*(((int *)&entry)+0));
 21.1805 +		spin_unlock_irqrestore(&ioapic_lock, flags);
 21.1806 +	}
 21.1807 +	disconnect_bsp_APIC(ioapic_i8259.pin != -1);
 21.1808 +#endif
 21.1809 +}
 21.1810 +
 21.1811 +/*
 21.1812 + * function to set the IO-APIC physical IDs based on the
 21.1813 + * values stored in the MPC table.
 21.1814 + *
 21.1815 + * by Matt Domsch <Matt_Domsch@dell.com>  Tue Dec 21 12:25:05 CST 1999
 21.1816 + */
 21.1817 +
 21.1818 +#if !defined(CONFIG_XEN) && !defined(CONFIG_X86_NUMAQ)
 21.1819 +static void __init setup_ioapic_ids_from_mpc(void)
 21.1820 +{
 21.1821 +	union IO_APIC_reg_00 reg_00;
 21.1822 +	physid_mask_t phys_id_present_map;
 21.1823 +	int apic;
 21.1824 +	int i;
 21.1825 +	unsigned char old_id;
 21.1826 +	unsigned long flags;
 21.1827 +
 21.1828 +	/*
 21.1829 +	 * Don't check I/O APIC IDs for xAPIC systems.  They have
 21.1830 +	 * no meaning without the serial APIC bus.
 21.1831 +	 */
 21.1832 +	if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
 21.1833 +		|| APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
 21.1834 +		return;
 21.1835 +	/*
 21.1836 +	 * This is broken; anything with a real cpu count has to
 21.1837 +	 * circumvent this idiocy regardless.
 21.1838 +	 */
 21.1839 +	phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map);
 21.1840 +
 21.1841 +	/*
 21.1842 +	 * Set the IOAPIC ID to the value stored in the MPC table.
 21.1843 +	 */
 21.1844 +	for (apic = 0; apic < nr_ioapics; apic++) {
 21.1845 +
 21.1846 +		/* Read the register 0 value */
 21.1847 +		spin_lock_irqsave(&ioapic_lock, flags);
 21.1848 +		reg_00.raw = io_apic_read(apic, 0);
 21.1849 +		spin_unlock_irqrestore(&ioapic_lock, flags);
 21.1850 +		
 21.1851 +		old_id = mp_ioapics[apic].mpc_apicid;
 21.1852 +
 21.1853 +		if (mp_ioapics[apic].mpc_apicid >= get_physical_broadcast()) {
 21.1854 +			printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
 21.1855 +				apic, mp_ioapics[apic].mpc_apicid);
 21.1856 +			printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
 21.1857 +				reg_00.bits.ID);
 21.1858 +			mp_ioapics[apic].mpc_apicid = reg_00.bits.ID;
 21.1859 +		}
 21.1860 +
 21.1861 +		/*
 21.1862 +		 * Sanity check, is the ID really free? Every APIC in a
 21.1863 +		 * system must have a unique ID or we get lots of nice
 21.1864 +		 * 'stuck on smp_invalidate_needed IPI wait' messages.
 21.1865 +		 */
 21.1866 +		if (check_apicid_used(phys_id_present_map,
 21.1867 +					mp_ioapics[apic].mpc_apicid)) {
 21.1868 +			printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
 21.1869 +				apic, mp_ioapics[apic].mpc_apicid);
 21.1870 +			for (i = 0; i < get_physical_broadcast(); i++)
 21.1871 +				if (!physid_isset(i, phys_id_present_map))
 21.1872 +					break;
 21.1873 +			if (i >= get_physical_broadcast())
 21.1874 +				panic("Max APIC ID exceeded!\n");
 21.1875 +			printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
 21.1876 +				i);
 21.1877 +			physid_set(i, phys_id_present_map);
 21.1878 +			mp_ioapics[apic].mpc_apicid = i;
 21.1879 +		} else {
 21.1880 +			physid_mask_t tmp;
 21.1881 +			tmp = apicid_to_cpu_present(mp_ioapics[apic].mpc_apicid);
 21.1882 +			apic_printk(APIC_VERBOSE, "Setting %d in the "
 21.1883 +					"phys_id_present_map\n",
 21.1884 +					mp_ioapics[apic].mpc_apicid);
 21.1885 +			physids_or(phys_id_present_map, phys_id_present_map, tmp);
 21.1886 +		}
 21.1887 +
 21.1888 +
 21.1889 +		/*
 21.1890 +		 * We need to adjust the IRQ routing table
 21.1891 +		 * if the ID changed.
 21.1892 +		 */
 21.1893 +		if (old_id != mp_ioapics[apic].mpc_apicid)
 21.1894 +			for (i = 0; i < mp_irq_entries; i++)
 21.1895 +				if (mp_irqs[i].mpc_dstapic == old_id)
 21.1896 +					mp_irqs[i].mpc_dstapic
 21.1897 +						= mp_ioapics[apic].mpc_apicid;
 21.1898 +
 21.1899 +		/*
 21.1900 +		 * Read the right value from the MPC table and
 21.1901 +		 * write it into the ID register.
 21.1902 +	 	 */
 21.1903 +		apic_printk(APIC_VERBOSE, KERN_INFO
 21.1904 +			"...changing IO-APIC physical APIC ID to %d ...",
 21.1905 +			mp_ioapics[apic].mpc_apicid);
 21.1906 +
 21.1907 +		reg_00.bits.ID = mp_ioapics[apic].mpc_apicid;
 21.1908 +		spin_lock_irqsave(&ioapic_lock, flags);
 21.1909 +		io_apic_write(apic, 0, reg_00.raw);
 21.1910 +		spin_unlock_irqrestore(&ioapic_lock, flags);
 21.1911 +
 21.1912 +		/*
 21.1913 +		 * Sanity check
 21.1914 +		 */
 21.1915 +		spin_lock_irqsave(&ioapic_lock, flags);
 21.1916 +		reg_00.raw = io_apic_read(apic, 0);
 21.1917 +		spin_unlock_irqrestore(&ioapic_lock, flags);
 21.1918 +		if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid)
 21.1919 +			printk("could not set ID!\n");
 21.1920 +		else
 21.1921 +			apic_printk(APIC_VERBOSE, " ok.\n");
 21.1922 +	}
 21.1923 +}
 21.1924 +#else
 21.1925 +static void __init setup_ioapic_ids_from_mpc(void) { }
 21.1926 +#endif
 21.1927 +
 21.1928 +#ifndef CONFIG_XEN
 21.1929 +/*
 21.1930 + * There is a nasty bug in some older SMP boards, their mptable lies
 21.1931 + * about the timer IRQ. We do the following to work around the situation:
 21.1932 + *
 21.1933 + *	- timer IRQ defaults to IO-APIC IRQ
 21.1934 + *	- if this function detects that timer IRQs are defunct, then we fall
 21.1935 + *	  back to ISA timer IRQs
 21.1936 + */
 21.1937 +static int __init timer_irq_works(void)
 21.1938 +{
 21.1939 +	unsigned long t1 = jiffies;
 21.1940 +
 21.1941 +	local_irq_enable();
 21.1942 +	/* Let ten ticks pass... */
 21.1943 +	mdelay((10 * 1000) / HZ);
 21.1944 +
 21.1945 +	/*
 21.1946 +	 * Expect a few ticks at least, to be sure some possible
 21.1947 +	 * glue logic does not lock up after one or two first
 21.1948 +	 * ticks in a non-ExtINT mode.  Also the local APIC
 21.1949 +	 * might have cached one ExtINT interrupt.  Finally, at
 21.1950 +	 * least one tick may be lost due to delays.
 21.1951 +	 */
 21.1952 +	if (jiffies - t1 > 4)
 21.1953 +		return 1;
 21.1954 +
 21.1955 +	return 0;
 21.1956 +}
 21.1957 +
 21.1958 +/*
 21.1959 + * In the SMP+IOAPIC case it might happen that there are an unspecified
 21.1960 + * number of pending IRQ events unhandled. These cases are very rare,
 21.1961 + * so we 'resend' these IRQs via IPIs, to the same CPU. It's much
 21.1962 + * better to do it this way as thus we do not have to be aware of
 21.1963 + * 'pending' interrupts in the IRQ path, except at this point.
 21.1964 + */
 21.1965 +/*
 21.1966 + * Edge triggered needs to resend any interrupt
 21.1967 + * that was delayed but this is now handled in the device
 21.1968 + * independent code.
 21.1969 + */
 21.1970 +
 21.1971 +/*
 21.1972 + * Starting up a edge-triggered IO-APIC interrupt is
 21.1973 + * nasty - we need to make sure that we get the edge.
 21.1974 + * If it is already asserted for some reason, we need
 21.1975 + * return 1 to indicate that is was pending.
 21.1976 + *
 21.1977 + * This is not complete - we should be able to fake
 21.1978 + * an edge even if it isn't on the 8259A...
 21.1979 + */
 21.1980 +static unsigned int startup_edge_ioapic_irq(unsigned int irq)
 21.1981 +{
 21.1982 +	int was_pending = 0;
 21.1983 +	unsigned long flags;
 21.1984 +
 21.1985 +	spin_lock_irqsave(&ioapic_lock, flags);
 21.1986 +	if (irq < 16) {
 21.1987 +		disable_8259A_irq(irq);
 21.1988 +		if (i8259A_irq_pending(irq))
 21.1989 +			was_pending = 1;
 21.1990 +	}
 21.1991 +	__unmask_IO_APIC_irq(irq);
 21.1992 +	spin_unlock_irqrestore(&ioapic_lock, flags);
 21.1993 +
 21.1994 +	return was_pending;
 21.1995 +}
 21.1996 +
 21.1997 +/*
 21.1998 + * Once we have recorded IRQ_PENDING already, we can mask the
 21.1999 + * interrupt for real. This prevents IRQ storms from unhandled
 21.2000 + * devices.
 21.2001 + */
 21.2002 +static void ack_edge_ioapic_irq(unsigned int irq)
 21.2003 +{
 21.2004 +	move_irq(irq);
 21.2005 +	if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED))
 21.2006 +					== (IRQ_PENDING | IRQ_DISABLED))
 21.2007 +		mask_IO_APIC_irq(irq);
 21.2008 +	ack_APIC_irq();
 21.2009 +}
 21.2010 +
 21.2011 +/*
 21.2012 + * Level triggered interrupts can just be masked,
 21.2013 + * and shutting down and starting up the interrupt
 21.2014 + * is the same as enabling and disabling them -- except
 21.2015 + * with a startup need to return a "was pending" value.
 21.2016 + *
 21.2017 + * Level triggered interrupts are special because we
 21.2018 + * do not touch any IO-APIC register while handling
 21.2019 + * them. We ack the APIC in the end-IRQ handler, not
 21.2020 + * in the start-IRQ-handler. Protection against reentrance
 21.2021 + * from the same interrupt is still provided, both by the
 21.2022 + * generic IRQ layer and by the fact that an unacked local
 21.2023 + * APIC does not accept IRQs.
 21.2024 + */
 21.2025 +static unsigned int startup_level_ioapic_irq (unsigned int irq)
 21.2026 +{
 21.2027 +	unmask_IO_APIC_irq(irq);
 21.2028 +
 21.2029 +	return 0; /* don't check for pending */
 21.2030 +}
 21.2031 +
 21.2032 +static void end_level_ioapic_irq (unsigned int irq)
 21.2033 +{
 21.2034 +	unsigned long v;
 21.2035 +	int i;
 21.2036 +
 21.2037 +	move_irq(irq);
 21.2038 +/*
 21.2039 + * It appears there is an erratum which affects at least version 0x11
 21.2040 + * of I/O APIC (that's the 82093AA and cores integrated into various
 21.2041 + * chipsets).  Under certain conditions a level-triggered interrupt is
 21.2042 + * erroneously delivered as edge-triggered one but the respective IRR
 21.2043 + * bit gets set nevertheless.  As a result the I/O unit expects an EOI
 21.2044 + * message but it will never arrive and further interrupts are blocked
 21.2045 + * from the source.  The exact reason is so far unknown, but the
 21.2046 + * phenomenon was observed when two consecutive interrupt requests
 21.2047 + * from a given source get delivered to the same CPU and the source is
 21.2048 + * temporarily disabled in between.
 21.2049 + *
 21.2050 + * A workaround is to simulate an EOI message manually.  We achieve it
 21.2051 + * by setting the trigger mode to edge and then to level when the edge
 21.2052 + * trigger mode gets detected in the TMR of a local APIC for a
 21.2053 + * level-triggered interrupt.  We mask the source for the time of the
 21.2054 + * operation to prevent an edge-triggered interrupt escaping meanwhile.
 21.2055 + * The idea is from Manfred Spraul.  --macro
 21.2056 + */
 21.2057 +	i = IO_APIC_VECTOR(irq);
 21.2058 +
 21.2059 +	v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
 21.2060 +
 21.2061 +	ack_APIC_irq();
 21.2062 +
 21.2063 +	if (!(v & (1 << (i & 0x1f)))) {
 21.2064 +		atomic_inc(&irq_mis_count);
 21.2065 +		spin_lock(&ioapic_lock);
 21.2066 +		__mask_and_edge_IO_APIC_irq(irq);
 21.2067 +		__unmask_and_level_IO_APIC_irq(irq);
 21.2068 +		spin_unlock(&ioapic_lock);
 21.2069 +	}
 21.2070 +}
 21.2071 +
 21.2072 +#ifdef CONFIG_PCI_MSI
 21.2073 +static unsigned int startup_edge_ioapic_vector(unsigned int vector)
 21.2074 +{
 21.2075 +	int irq = vector_to_irq(vector);
 21.2076 +
 21.2077 +	return startup_edge_ioapic_irq(irq);
 21.2078 +}
 21.2079 +
 21.2080 +static void ack_edge_ioapic_vector(unsigned int vector)
 21.2081 +{
 21.2082 +	int irq = vector_to_irq(vector);
 21.2083 +
 21.2084 +	move_native_irq(vector);
 21.2085 +	ack_edge_ioapic_irq(irq);
 21.2086 +}
 21.2087 +
 21.2088 +static unsigned int startup_level_ioapic_vector (unsigned int vector)
 21.2089 +{
 21.2090 +	int irq = vector_to_irq(vector);
 21.2091 +
 21.2092 +	return startup_level_ioapic_irq (irq);
 21.2093 +}
 21.2094 +
 21.2095 +static void end_level_ioapic_vector (unsigned int vector)
 21.2096 +{
 21.2097 +	int irq = vector_to_irq(vector);
 21.2098 +
 21.2099 +	move_native_irq(vector);
 21.2100 +	end_level_ioapic_irq(irq);
 21.2101 +}
 21.2102 +
 21.2103 +static void mask_IO_APIC_vector (unsigned int vector)
 21.2104 +{
 21.2105 +	int irq = vector_to_irq(vector);
 21.2106 +
 21.2107 +	mask_IO_APIC_irq(irq);
 21.2108 +}
 21.2109 +
 21.2110 +static void unmask_IO_APIC_vector (unsigned int vector)
 21.2111 +{
 21.2112 +	int irq = vector_to_irq(vector);
 21.2113 +
 21.2114 +	unmask_IO_APIC_irq(irq);
 21.2115 +}
 21.2116 +
 21.2117 +#ifdef CONFIG_SMP
 21.2118 +static void set_ioapic_affinity_vector (unsigned int vector,
 21.2119 +					cpumask_t cpu_mask)
 21.2120 +{
 21.2121 +	int irq = vector_to_irq(vector);
 21.2122 +
 21.2123 +	set_native_irq_info(vector, cpu_mask);
 21.2124 +	set_ioapic_affinity_irq(irq, cpu_mask);
 21.2125 +}
 21.2126 +#endif
 21.2127 +#endif
 21.2128 +
 21.2129 +static int ioapic_retrigger(unsigned int irq)
 21.2130 +{
 21.2131 +	send_IPI_self(IO_APIC_VECTOR(irq));
 21.2132 +
 21.2133 +	return 1;
 21.2134 +}
 21.2135 +
 21.2136 +/*
 21.2137 + * Level and edge triggered IO-APIC interrupts need different handling,
 21.2138 + * so we use two separate IRQ descriptors. Edge triggered IRQs can be
 21.2139 + * handled with the level-triggered descriptor, but that one has slightly
 21.2140 + * more overhead. Level-triggered interrupts cannot be handled with the
 21.2141 + * edge-triggered handler, without risking IRQ storms and other ugly
 21.2142 + * races.
 21.2143 + */
 21.2144 +static struct hw_interrupt_type ioapic_edge_type __read_mostly = {
 21.2145 +	.typename 	= "IO-APIC-edge",
 21.2146 +	.startup 	= startup_edge_ioapic,
 21.2147 +	.shutdown 	= shutdown_edge_ioapic,
 21.2148 +	.enable 	= enable_edge_ioapic,
 21.2149 +	.disable 	= disable_edge_ioapic,
 21.2150 +	.ack 		= ack_edge_ioapic,
 21.2151 +	.end 		= end_edge_ioapic,
 21.2152 +#ifdef CONFIG_SMP
 21.2153 +	.set_affinity 	= set_ioapic_affinity,
 21.2154 +#endif
 21.2155 +	.retrigger	= ioapic_retrigger,
 21.2156 +};
 21.2157 +
 21.2158 +static struct hw_interrupt_type ioapic_level_type __read_mostly = {
 21.2159 +	.typename 	= "IO-APIC-level",
 21.2160 +	.startup 	= startup_level_ioapic,
 21.2161 +	.shutdown 	= shutdown_level_ioapic,
 21.2162 +	.enable 	= enable_level_ioapic,
 21.2163 +	.disable 	= disable_level_ioapic,
 21.2164 +	.ack 		= mask_and_ack_level_ioapic,
 21.2165 +	.end 		= end_level_ioapic,
 21.2166 +#ifdef CONFIG_SMP
 21.2167 +	.set_affinity 	= set_ioapic_affinity,
 21.2168 +#endif
 21.2169 +	.retrigger	= ioapic_retrigger,
 21.2170 +};
 21.2171 +#endif /* !CONFIG_XEN */
 21.2172 +
 21.2173 +static inline void init_IO_APIC_traps(void)
 21.2174 +{
 21.2175 +	int irq;
 21.2176 +
 21.2177 +	/*
 21.2178 +	 * NOTE! The local APIC isn't very good at handling
 21.2179 +	 * multiple interrupts at the same interrupt level.
 21.2180 +	 * As the interrupt level is determined by taking the
 21.2181 +	 * vector number and shifting that right by 4, we
 21.2182 +	 * want to spread these out a bit so that they don't
 21.2183 +	 * all fall in the same interrupt level.
 21.2184 +	 *
 21.2185 +	 * Also, we've got to be careful not to trash gate
 21.2186 +	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
 21.2187 +	 */
 21.2188 +	for (irq = 0; irq < NR_IRQS ; irq++) {
 21.2189 +		int tmp = irq;
 21.2190 +		if (use_pci_vector()) {
 21.2191 +			if (!platform_legacy_irq(tmp))
 21.2192 +				if ((tmp = vector_to_irq(tmp)) == -1)
 21.2193 +					continue;
 21.2194 +		}
 21.2195 +		if (IO_APIC_IRQ(tmp) && !IO_APIC_VECTOR(tmp)) {
 21.2196 +			/*
 21.2197 +			 * Hmm.. We don't have an entry for this,
 21.2198 +			 * so default to an old-fashioned 8259
 21.2199 +			 * interrupt if we can..
 21.2200 +			 */
 21.2201 +			if (irq < 16)
 21.2202 +				make_8259A_irq(irq);
 21.2203 +#ifndef CONFIG_XEN
 21.2204 +			else
 21.2205 +				/* Strange. Oh, well.. */
 21.2206 +				irq_desc[irq].chip = &no_irq_type;
 21.2207 +#endif
 21.2208 +		}
 21.2209 +	}
 21.2210 +}
 21.2211 +
 21.2212 +#ifndef CONFIG_XEN
 21.2213 +static void enable_lapic_irq (unsigned int irq)
 21.2214 +{
 21.2215 +	unsigned long v;
 21.2216 +
 21.2217 +	v = apic_read(APIC_LVT0);
 21.2218 +	apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED);
 21.2219 +}
 21.2220 +
 21.2221 +static void disable_lapic_irq (unsigned int irq)
 21.2222 +{
 21.2223 +	unsigned long v;
 21.2224 +
 21.2225 +	v = apic_read(APIC_LVT0);
 21.2226 +	apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED);
 21.2227 +}
 21.2228 +
 21.2229 +static void ack_lapic_irq (unsigned int irq)
 21.2230 +{
 21.2231 +	ack_APIC_irq();
 21.2232 +}
 21.2233 +
 21.2234 +static void end_lapic_irq (unsigned int i) { /* nothing */ }
 21.2235 +
 21.2236 +static struct hw_interrupt_type lapic_irq_type __read_mostly = {
 21.2237 +	.typename 	= "local-APIC-edge",
 21.2238 +	.startup 	= NULL, /* startup_irq() not used for IRQ0 */
 21.2239 +	.shutdown 	= NULL, /* shutdown_irq() not used for IRQ0 */
 21.2240 +	.enable 	= enable_lapic_irq,
 21.2241 +	.disable 	= disable_lapic_irq,
 21.2242 +	.ack 		= ack_lapic_irq,
 21.2243 +	.end 		= end_lapic_irq
 21.2244 +};
 21.2245 +
 21.2246 +static void setup_nmi (void)
 21.2247 +{
 21.2248 +	/*
 21.2249 + 	 * Dirty trick to enable the NMI watchdog ...
 21.2250 +	 * We put the 8259A master into AEOI mode and
 21.2251 +	 * unmask on all local APICs LVT0 as NMI.
 21.2252 +	 *
 21.2253 +	 * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
 21.2254 +	 * is from Maciej W. Rozycki - so we do not have to EOI from
 21.2255 +	 * the NMI handler or the timer interrupt.
 21.2256 +	 */ 
 21.2257 +	apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
 21.2258 +
 21.2259 +	on_each_cpu(enable_NMI_through_LVT0, NULL, 1, 1);
 21.2260 +
 21.2261 +	apic_printk(APIC_VERBOSE, " done.\n");
 21.2262 +}
 21.2263 +
 21.2264 +/*
 21.2265 + * This looks a bit hackish but it's about the only one way of sending
 21.2266 + * a few INTA cycles to 8259As and any associated glue logic.  ICR does
 21.2267 + * not support the ExtINT mode, unfortunately.  We need to send these
 21.2268 + * cycles as some i82489DX-based boards have glue logic that keeps the
 21.2269 + * 8259A interrupt line asserted until INTA.  --macro
 21.2270 + */
 21.2271 +static inline void unlock_ExtINT_logic(void)
 21.2272 +{
 21.2273 +	int apic, pin, i;
 21.2274 +	struct IO_APIC_route_entry entry0, entry1;
 21.2275 +	unsigned char save_control, save_freq_select;
 21.2276 +	unsigned long flags;
 21.2277 +
 21.2278 +	pin  = find_isa_irq_pin(8, mp_INT);
 21.2279 +	apic = find_isa_irq_apic(8, mp_INT);
 21.2280 +	if (pin == -1)
 21.2281 +		return;
 21.2282 +
 21.2283 +	spin_lock_irqsave(&ioapic_lock, flags);
 21.2284 +	*(((int *)&entry0) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
 21.2285 +	*(((int *)&entry0) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
 21.2286 +	spin_unlock_irqrestore(&ioapic_lock, flags);
 21.2287 +	clear_IO_APIC_pin(apic, pin);
 21.2288 +
 21.2289 +	memset(&entry1, 0, sizeof(entry1));
 21.2290 +
 21.2291 +	entry1.dest_mode = 0;			/* physical delivery */
 21.2292 +	entry1.mask = 0;			/* unmask IRQ now */
 21.2293 +	entry1.dest.physical.physical_dest = hard_smp_processor_id();
 21.2294 +	entry1.delivery_mode = dest_ExtINT;
 21.2295 +	entry1.polarity = entry0.polarity;
 21.2296 +	entry1.trigger = 0;
 21.2297 +	entry1.vector = 0;
 21.2298 +
 21.2299 +	spin_lock_irqsave(&ioapic_lock, flags);
 21.2300 +	io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry1) + 1));
 21.2301 +	io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry1) + 0));
 21.2302 +	spin_unlock_irqrestore(&ioapic_lock, flags);
 21.2303 +
 21.2304 +	save_control = CMOS_READ(RTC_CONTROL);
 21.2305 +	save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
 21.2306 +	CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6,
 21.2307 +		   RTC_FREQ_SELECT);
 21.2308 +	CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL);
 21.2309 +
 21.2310 +	i = 100;
 21.2311 +	while (i-- > 0) {
 21.2312 +		mdelay(10);
 21.2313 +		if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF)
 21.2314 +			i -= 10;
 21.2315 +	}
 21.2316 +
 21.2317 +	CMOS_WRITE(save_control, RTC_CONTROL);
 21.2318 +	CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
 21.2319 +	clear_IO_APIC_pin(apic, pin);
 21.2320 +
 21.2321 +	spin_lock_irqsave(&ioapic_lock, flags);
 21.2322 +	io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry0) + 1));
 21.2323 +	io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry0) + 0));
 21.2324 +	spin_unlock_irqrestore(&ioapic_lock, flags);
 21.2325 +}
 21.2326 +
 21.2327 +int timer_uses_ioapic_pin_0;
 21.2328 +
 21.2329 +/*
 21.2330 + * This code may look a bit paranoid, but it's supposed to cooperate with
 21.2331 + * a wide range of boards and BIOS bugs.  Fortunately only the timer IRQ
 21.2332 + * is so screwy.  Thanks to Brian Perkins for testing/hacking this beast
 21.2333 + * fanatically on his truly buggy board.
 21.2334 + */
 21.2335 +static inline void check_timer(void)
 21.2336 +{
 21.2337 +	int apic1, pin1, apic2, pin2;
 21.2338 +	int vector;
 21.2339 +
 21.2340 +	/*
 21.2341 +	 * get/set the timer IRQ vector:
 21.2342 +	 */
 21.2343 +	disable_8259A_irq(0);
 21.2344 +	vector = assign_irq_vector(0);
 21.2345 +	set_intr_gate(vector, interrupt[0]);
 21.2346 +
 21.2347 +	/*
 21.2348 +	 * Subtle, code in do_timer_interrupt() expects an AEOI
 21.2349 +	 * mode for the 8259A whenever interrupts are routed
 21.2350 +	 * through I/O APICs.  Also IRQ0 has to be enabled in
 21.2351 +	 * the 8259A which implies the virtual wire has to be
 21.2352 +	 * disabled in the local APIC.
 21.2353 +	 */
 21.2354 +	apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
 21.2355 +	init_8259A(1);
 21.2356 +	timer_ack = 1;
 21.2357 +	if (timer_over_8254 > 0)
 21.2358 +		enable_8259A_irq(0);
 21.2359 +
 21.2360 +	pin1  = find_isa_irq_pin(0, mp_INT);
 21.2361 +	apic1 = find_isa_irq_apic(0, mp_INT);
 21.2362 +	pin2  = ioapic_i8259.pin;
 21.2363 +	apic2 = ioapic_i8259.apic;
 21.2364 +
 21.2365 +	if (pin1 == 0)
 21.2366 +		timer_uses_ioapic_pin_0 = 1;
 21.2367 +
 21.2368 +	printk(KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
 21.2369 +		vector, apic1, pin1, apic2, pin2);
 21.2370 +
 21.2371 +	if (pin1 != -1) {
 21.2372 +		/*
 21.2373 +		 * Ok, does IRQ0 through the IOAPIC work?
 21.2374 +		 */
 21.2375 +		unmask_IO_APIC_irq(0);
 21.2376 +		if (timer_irq_works()) {
 21.2377 +			if (nmi_watchdog == NMI_IO_APIC) {
 21.2378 +				disable_8259A_irq(0);
 21.2379 +				setup_nmi();
 21.2380 +				enable_8259A_irq(0);
 21.2381 +			}
 21.2382 +			if (disable_timer_pin_1 > 0)
 21.2383 +				clear_IO_APIC_pin(0, pin1);
 21.2384 +			return;
 21.2385 +		}
 21.2386 +		clear_IO_APIC_pin(apic1, pin1);
 21.2387 +		printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to "
 21.2388 +				"IO-APIC\n");
 21.2389 +	}
 21.2390 +
 21.2391 +	printk(KERN_INFO "...trying to set up timer (IRQ0) through the 8259A ... ");
 21.2392 +	if (pin2 != -1) {
 21.2393 +		printk("\n..... (found pin %d) ...", pin2);
 21.2394 +		/*
 21.2395 +		 * legacy devices should be connected to IO APIC #0
 21.2396 +		 */
 21.2397 +		setup_ExtINT_IRQ0_pin(apic2, pin2, vector);
 21.2398 +		if (timer_irq_works()) {
 21.2399 +			printk("works.\n");
 21.2400 +			if (pin1 != -1)
 21.2401 +				replace_pin_at_irq(0, apic1, pin1, apic2, pin2);
 21.2402 +			else
 21.2403 +				add_pin_to_irq(0, apic2, pin2);
 21.2404 +			if (nmi_watchdog == NMI_IO_APIC) {
 21.2405 +				setup_nmi();
 21.2406 +			}
 21.2407 +			return;
 21.2408 +		}
 21.2409 +		/*
 21.2410 +		 * Cleanup, just in case ...
 21.2411 +		 */
 21.2412 +		clear_IO_APIC_pin(apic2, pin2);
 21.2413 +	}
 21.2414 +	printk(" failed.\n");
 21.2415 +
 21.2416 +	if (nmi_watchdog == NMI_IO_APIC) {
 21.2417 +		printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n");
 21.2418 +		nmi_watchdog = 0;
 21.2419 +	}
 21.2420 +
 21.2421 +	printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
 21.2422 +
 21.2423 +	disable_8259A_irq(0);
 21.2424 +	irq_desc[0].chip = &lapic_irq_type;
 21.2425 +	apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector);	/* Fixed mode */
 21.2426 +	enable_8259A_irq(0);
 21.2427 +
 21.2428 +	if (timer_irq_works()) {
 21.2429 +		printk(" works.\n");
 21.2430 +		return;
 21.2431 +	}
 21.2432 +	apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector);
 21.2433 +	printk(" failed.\n");
 21.2434 +
 21.2435 +	printk(KERN_INFO "...trying to set up timer as ExtINT IRQ...");
 21.2436 +
 21.2437 +	timer_ack = 0;
 21.2438 +	init_8259A(0);
 21.2439 +	make_8259A_irq(0);
 21.2440 +	apic_write_around(APIC_LVT0, APIC_DM_EXTINT);
 21.2441 +
 21.2442 +	unlock_ExtINT_logic();
 21.2443 +
 21.2444 +	if (timer_irq_works()) {
 21.2445 +		printk(" works.\n");
 21.2446 +		return;
 21.2447 +	}
 21.2448 +	printk(" failed :(.\n");
 21.2449 +	panic("IO-APIC + timer doesn't work!  Boot with apic=debug and send a "
 21.2450 +		"report.  Then try booting with the 'noapic' option");
 21.2451 +}
 21.2452 +#else
 21.2453 +int timer_uses_ioapic_pin_0 = 0;
 21.2454 +#define check_timer() ((void)0)
 21.2455 +#endif
 21.2456 +
 21.2457 +/*
 21.2458 + *
 21.2459 + * IRQ's that are handled by the PIC in the MPS IOAPIC case.
 21.2460 + * - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ.
 21.2461 + *   Linux doesn't really care, as it's not actually used
 21.2462 + *   for any interrupt handling anyway.
 21.2463 + */
 21.2464 +#define PIC_IRQS	(1 << PIC_CASCADE_IR)
 21.2465 +
 21.2466 +void __init setup_IO_APIC(void)
 21.2467 +{
 21.2468 +	enable_IO_APIC();
 21.2469 +
 21.2470 +	if (acpi_ioapic)
 21.2471 +		io_apic_irqs = ~0;	/* all IRQs go through IOAPIC */
 21.2472 +	else
 21.2473 +		io_apic_irqs = ~PIC_IRQS;
 21.2474 +
 21.2475 +	printk("ENABLING IO-APIC IRQs\n");
 21.2476 +
 21.2477 +	/*
 21.2478 +	 * Set up IO-APIC IRQ routing.
 21.2479 +	 */
 21.2480 +	if (!acpi_ioapic)
 21.2481 +		setup_ioapic_ids_from_mpc();
 21.2482 +#ifndef CONFIG_XEN
 21.2483 +	sync_Arb_IDs();
 21.2484 +#endif
 21.2485 +	setup_IO_APIC_irqs();
 21.2486 +	init_IO_APIC_traps();
 21.2487 +	check_timer();
 21.2488 +	if (!acpi_ioapic)
 21.2489 +		print_IO_APIC();
 21.2490 +}
 21.2491 +
 21.2492 +static int __init setup_disable_8254_timer(char *s)
 21.2493 +{
 21.2494 +	timer_over_8254 = -1;
 21.2495 +	return 1;
 21.2496 +}
 21.2497 +static int __init setup_enable_8254_timer(char *s)
 21.2498 +{
 21.2499 +	timer_over_8254 = 2;
 21.2500 +	return 1;
 21.2501 +}
 21.2502 +
 21.2503 +__setup("disable_8254_timer", setup_disable_8254_timer);
 21.2504 +__setup("enable_8254_timer", setup_enable_8254_timer);
 21.2505 +
 21.2506 +/*
 21.2507 + *	Called after all the initialization is done. If we didnt find any
 21.2508 + *	APIC bugs then we can allow the modify fast path
 21.2509 + */
 21.2510 + 
 21.2511 +static int __init io_apic_bug_finalize(void)
 21.2512 +{
 21.2513 +	if(sis_apic_bug == -1)
 21.2514 +		sis_apic_bug = 0;
 21.2515 +	if (is_initial_xendomain()) {
 21.2516 +		struct xen_platform_op op = { .cmd = XENPF_platform_quirk };
 21.2517 +		op.u.platform_quirk.quirk_id = sis_apic_bug ?
 21.2518 +			QUIRK_IOAPIC_BAD_REGSEL : QUIRK_IOAPIC_GOOD_REGSEL;
 21.2519 +		HYPERVISOR_platform_op(&op);
 21.2520 +	}
 21.2521 +	return 0;
 21.2522 +}
 21.2523 +
 21.2524 +late_initcall(io_apic_bug_finalize);
 21.2525 +
 21.2526 +struct sysfs_ioapic_data {
 21.2527 +	struct sys_device dev;
 21.2528 +	struct IO_APIC_route_entry entry[0];
 21.2529 +};
 21.2530 +static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS];
 21.2531 +
 21.2532 +static int ioapic_suspend(struct sys_device *dev, pm_message_t state)
 21.2533 +{
 21.2534 +	struct IO_APIC_route_entry *entry;
 21.2535 +	struct sysfs_ioapic_data *data;
 21.2536 +	unsigned long flags;
 21.2537 +	int i;
 21.2538 +	
 21.2539 +	data = container_of(dev, struct sysfs_ioapic_data, dev);
 21.2540 +	entry = data->entry;
 21.2541 +	spin_lock_irqsave(&ioapic_lock, flags);
 21.2542 +	for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
 21.2543 +		*(((int *)entry) + 1) = io_apic_read(dev->id, 0x11 + 2 * i);
 21.2544 +		*(((int *)entry) + 0) = io_apic_read(dev->id, 0x10 + 2 * i);
 21.2545 +	}
 21.2546 +	spin_unlock_irqrestore(&ioapic_lock, flags);
 21.2547 +
 21.2548 +	return 0;
 21.2549 +}
 21.2550 +
 21.2551 +static int ioapic_resume(struct sys_device *dev)
 21.2552 +{
 21.2553 +	struct IO_APIC_route_entry *entry;
 21.2554 +	struct sysfs_ioapic_data *data;
 21.2555 +	unsigned long flags;
 21.2556 +	union IO_APIC_reg_00 reg_00;
 21.2557 +	int i;
 21.2558 +	
 21.2559 +	data = container_of(dev, struct sysfs_ioapic_data, dev);
 21.2560 +	entry = data->entry;
 21.2561 +
 21.2562 +	spin_lock_irqsave(&ioapic_lock, flags);
 21.2563 +	reg_00.raw = io_apic_read(dev->id, 0);
 21.2564 +	if (reg_00.bits.ID != mp_ioapics[dev->id].mpc_apicid) {
 21.2565 +		reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid;
 21.2566 +		io_apic_write(dev->id, 0, reg_00.raw);
 21.2567 +	}
 21.2568 +	for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
 21.2569 +		io_apic_write(dev->id, 0x11+2*i, *(((int *)entry)+1));
 21.2570 +		io_apic_write(dev->id, 0x10+2*i, *(((int *)entry)+0));
 21.2571 +	}
 21.2572 +	spin_unlock_irqrestore(&ioapic_lock, flags);
 21.2573 +
 21.2574 +	return 0;
 21.2575 +}
 21.2576 +
 21.2577 +static struct sysdev_class ioapic_sysdev_class = {
 21.2578 +	set_kset_name("ioapic"),
 21.2579 +	.suspend = ioapic_suspend,
 21.2580 +	.resume = ioapic_resume,
 21.2581 +};
 21.2582 +
 21.2583 +static int __init ioapic_init_sysfs(void)
 21.2584 +{
 21.2585 +	struct sys_device * dev;
 21.2586 +	int i, size, error = 0;
 21.2587 +
 21.2588 +	error = sysdev_class_register(&ioapic_sysdev_class);
 21.2589 +	if (error)
 21.2590 +		return error;
 21.2591 +
 21.2592 +	for (i = 0; i < nr_ioapics; i++ ) {
 21.2593 +		size = sizeof(struct sys_device) + nr_ioapic_registers[i] 
 21.2594 +			* sizeof(struct IO_APIC_route_entry);
 21.2595 +		mp_ioapic_data[i] = kmalloc(size, GFP_KERNEL);
 21.2596 +		if (!mp_ioapic_data[i]) {
 21.2597 +			printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
 21.2598 +			continue;
 21.2599 +		}
 21.2600 +		memset(mp_ioapic_data[i], 0, size);
 21.2601 +		dev = &mp_ioapic_data[i]->dev;
 21.2602 +		dev->id = i; 
 21.2603 +		dev->cls = &ioapic_sysdev_class;
 21.2604 +		error = sysdev_register(dev);
 21.2605 +		if (error) {
 21.2606 +			kfree(mp_ioapic_data[i]);
 21.2607 +			mp_ioapic_data[i] = NULL;
 21.2608 +			printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
 21.2609 +			continue;
 21.2610 +		}
 21.2611 +	}
 21.2612 +
 21.2613 +	return 0;
 21.2614 +}
 21.2615 +
 21.2616 +device_initcall(ioapic_init_sysfs);
 21.2617 +
 21.2618 +/* --------------------------------------------------------------------------
 21.2619 +                          ACPI-based IOAPIC Configuration
 21.2620 +   -------------------------------------------------------------------------- */
 21.2621 +
 21.2622 +#ifdef CONFIG_ACPI
 21.2623 +
 21.2624 +int __init io_apic_get_unique_id (int ioapic, int apic_id)
 21.2625 +{
 21.2626 +#ifndef CONFIG_XEN
 21.2627 +	union IO_APIC_reg_00 reg_00;
 21.2628 +	static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
 21.2629 +	physid_mask_t tmp;
 21.2630 +	unsigned long flags;
 21.2631 +	int i = 0;
 21.2632 +
 21.2633 +	/*
 21.2634 +	 * The P4 platform supports up to 256 APIC IDs on two separate APIC 
 21.2635 +	 * buses (one for LAPICs, one for IOAPICs), where predecessors only 
 21.2636 +	 * supports up to 16 on one shared APIC bus.
 21.2637 +	 * 
 21.2638 +	 * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full
 21.2639 +	 *      advantage of new APIC bus architecture.
 21.2640 +	 */
 21.2641 +
 21.2642 +	if (physids_empty(apic_id_map))
 21.2643 +		apic_id_map = ioapic_phys_id_map(phys_cpu_present_map);
 21.2644 +
 21.2645 +	spin_lock_irqsave(&ioapic_lock, flags);
 21.2646 +	reg_00.raw = io_apic_read(ioapic, 0);
 21.2647 +	spin_unlock_irqrestore(&ioapic_lock, flags);
 21.2648 +
 21.2649 +	if (apic_id >= get_physical_broadcast()) {
 21.2650 +		printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
 21.2651 +			"%d\n", ioapic, apic_id, reg_00.bits.ID);
 21.2652 +		apic_id = reg_00.bits.ID;
 21.2653 +	}
 21.2654 +
 21.2655 +	/*
 21.2656 +	 * Every APIC in a system must have a unique ID or we get lots of nice 
 21.2657 +	 * 'stuck on smp_invalidate_needed IPI wait' messages.
 21.2658 +	 */
 21.2659 +	if (check_apicid_used(apic_id_map, apic_id)) {
 21.2660 +
 21.2661 +		for (i = 0; i < get_physical_broadcast(); i++) {
 21.2662 +			if (!check_apicid_used(apic_id_map, i))
 21.2663 +				break;
 21.2664 +		}
 21.2665 +
 21.2666 +		if (i == get_physical_broadcast())
 21.2667 +			panic("Max apic_id exceeded!\n");
 21.2668 +
 21.2669 +		printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
 21.2670 +			"trying %d\n", ioapic, apic_id, i);
 21.2671 +
 21.2672 +		apic_id = i;
 21.2673 +	} 
 21.2674 +
 21.2675 +	tmp = apicid_to_cpu_present(apic_id);
 21.2676 +	physids_or(apic_id_map, apic_id_map, tmp);
 21.2677 +
 21.2678 +	if (reg_00.bits.ID != apic_id) {
 21.2679 +		reg_00.bits.ID = apic_id;
 21.2680 +
 21.2681 +		spin_lock_irqsave(&ioapic_lock, flags);
 21.2682 +		io_apic_write(ioapic, 0, reg_00.raw);
 21.2683 +		reg_00.raw = io_apic_read(ioapic, 0);
 21.2684 +		spin_unlock_irqrestore(&ioapic_lock, flags);
 21.2685 +
 21.2686 +		/* Sanity check */
 21.2687 +		if (reg_00.bits.ID != apic_id) {
 21.2688 +			printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic);
 21.2689 +			return -1;
 21.2690 +		}
 21.2691 +	}
 21.2692 +
 21.2693 +	apic_printk(APIC_VERBOSE, KERN_INFO
 21.2694 +			"IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
 21.2695 +#endif /* !CONFIG_XEN */
 21.2696 +
 21.2697 +	return apic_id;
 21.2698 +}
 21.2699 +
 21.2700 +
 21.2701 +int __init io_apic_get_version (int ioapic)
 21.2702 +{
 21.2703 +	union IO_APIC_reg_01	reg_01;
 21.2704 +	unsigned long flags;
 21.2705 +
 21.2706 +	spin_lock_irqsave(&ioapic_lock, flags);
 21.2707 +	reg_01.raw = io_apic_read(ioapic, 1);
 21.2708 +	spin_unlock_irqrestore(&ioapic_lock, flags);
 21.2709 +
 21.2710 +	return reg_01.bits.version;
 21.2711 +}
 21.2712 +
 21.2713 +
 21.2714 +int __init io_apic_get_redir_entries (int ioapic)
 21.2715 +{
 21.2716 +	union IO_APIC_reg_01	reg_01;
 21.2717 +	unsigned long flags;
 21.2718 +
 21.2719 +	spin_lock_irqsave(&ioapic_lock, flags);
 21.2720 +	reg_01.raw = io_apic_read(ioapic, 1);
 21.2721 +	spin_unlock_irqrestore(&ioapic_lock, flags);
 21.2722 +
 21.2723 +	return reg_01.bits.entries;
 21.2724 +}
 21.2725 +
 21.2726 +
 21.2727 +int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low)
 21.2728 +{
 21.2729 +	struct IO_APIC_route_entry entry;
 21.2730 +	unsigned long flags;
 21.2731 +
 21.2732 +	if (!IO_APIC_IRQ(irq)) {
 21.2733 +		printk(KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
 21.2734 +			ioapic);
 21.2735 +		return -EINVAL;
 21.2736 +	}
 21.2737 +
 21.2738 +	/*
 21.2739 +	 * Generate a PCI IRQ routing entry and program the IOAPIC accordingly.
 21.2740 +	 * Note that we mask (disable) IRQs now -- these get enabled when the
 21.2741 +	 * corresponding device driver registers for this IRQ.
 21.2742 +	 */
 21.2743 +
 21.2744 +	memset(&entry,0,sizeof(entry));
 21.2745 +
 21.2746 +	entry.delivery_mode = INT_DELIVERY_MODE;
 21.2747 +	entry.dest_mode = INT_DEST_MODE;
 21.2748 +	entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
 21.2749 +	entry.trigger = edge_level;
 21.2750 +	entry.polarity = active_high_low;
 21.2751 +	entry.mask  = 1;
 21.2752 +
 21.2753 +	/*
 21.2754 +	 * IRQs < 16 are already in the irq_2_pin[] map
 21.2755 +	 */
 21.2756 +	if (irq >= 16)
 21.2757 +		add_pin_to_irq(irq, ioapic, pin);
 21.2758 +
 21.2759 +	entry.vector = assign_irq_vector(irq);
 21.2760 +
 21.2761 +	apic_printk(APIC_DEBUG, KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry "
 21.2762 +		"(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", ioapic,
 21.2763 +		mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq,
 21.2764 +		edge_level, active_high_low);
 21.2765 +
 21.2766 +	ioapic_register_intr(irq, entry.vector, edge_level);
 21.2767 +
 21.2768 +	if (!ioapic && (irq < 16))
 21.2769 +		disable_8259A_irq(irq);
 21.2770 +
 21.2771 +	spin_lock_irqsave(&ioapic_lock, flags);
 21.2772 +	io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1));
 21.2773 +	io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0));
 21.2774 +	set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS);
 21.2775 +	spin_unlock_irqrestore(&ioapic_lock, flags);
 21.2776 +
 21.2777 +	return 0;
 21.2778 +}
 21.2779 +
 21.2780 +#endif /* CONFIG_ACPI */
    22.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    22.2 +++ b/arch/i386/kernel/ioport-xen.c	Mon Jun 04 10:05:28 2007 +0100
    22.3 @@ -0,0 +1,122 @@
    22.4 +/*
    22.5 + *	linux/arch/i386/kernel/ioport.c
    22.6 + *
    22.7 + * This contains the io-permission bitmap code - written by obz, with changes
    22.8 + * by Linus.
    22.9 + */
   22.10 +
   22.11 +#include <linux/sched.h>
   22.12 +#include <linux/kernel.h>
   22.13 +#include <linux/capability.h>
   22.14 +#include <linux/errno.h>
   22.15 +#include <linux/types.h>
   22.16 +#include <linux/ioport.h>
   22.17 +#include <linux/smp.h>
   22.18 +#include <linux/smp_lock.h>
   22.19 +#include <linux/stddef.h>
   22.20 +#include <linux/slab.h>
   22.21 +#include <linux/thread_info.h>
   22.22 +#include <xen/interface/physdev.h>
   22.23 +
   22.24 +/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */
   22.25 +static void set_bitmap(unsigned long *bitmap, unsigned int base, unsigned int extent, int new_value)
   22.26 +{
   22.27 +	unsigned long mask;
   22.28 +	unsigned long *bitmap_base = bitmap + (base / BITS_PER_LONG);
   22.29 +	unsigned int low_index = base & (BITS_PER_LONG-1);
   22.30 +	int length = low_index + extent;
   22.31 +
   22.32 +	if (low_index != 0) {
   22.33 +		mask = (~0UL << low_index);
   22.34 +		if (length < BITS_PER_LONG)
   22.35 +			mask &= ~(~0UL << length);
   22.36 +		if (new_value)
   22.37 +			*bitmap_base++ |= mask;
   22.38 +		else
   22.39 +			*bitmap_base++ &= ~mask;
   22.40 +		length -= BITS_PER_LONG;
   22.41 +	}
   22.42 +
   22.43 +	mask = (new_value ? ~0UL : 0UL);
   22.44 +	while (length >= BITS_PER_LONG) {
   22.45 +		*bitmap_base++ = mask;
   22.46 +		length -= BITS_PER_LONG;
   22.47 +	}
   22.48 +
   22.49 +	if (length > 0) {
   22.50 +		mask = ~(~0UL << length);
   22.51 +		if (new_value)
   22.52 +			*bitmap_base++ |= mask;
   22.53 +		else
   22.54 +			*bitmap_base++ &= ~mask;
   22.55 +	}
   22.56 +}
   22.57 +
   22.58 +
   22.59 +/*
   22.60 + * this changes the io permissions bitmap in the current task.
   22.61 + */
   22.62 +asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
   22.63 +{
   22.64 +	struct thread_struct * t = &current->thread;
   22.65 +	unsigned long *bitmap;
   22.66 +	struct physdev_set_iobitmap set_iobitmap;
   22.67 +
   22.68 +	if ((from + num <= from) || (from + num > IO_BITMAP_BITS))
   22.69 +		return -EINVAL;
   22.70 +	if (turn_on && !capable(CAP_SYS_RAWIO))
   22.71 +		return -EPERM;
   22.72 +
   22.73 +	/*
   22.74 +	 * If it's the first ioperm() call in this thread's lifetime, set the
   22.75 +	 * IO bitmap up. ioperm() is much less timing critical than clone(),
   22.76 +	 * this is why we delay this operation until now:
   22.77 +	 */
   22.78 +	if (!t->io_bitmap_ptr) {
   22.79 +		bitmap = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
   22.80 +		if (!bitmap)
   22.81 +			return -ENOMEM;
   22.82 +
   22.83 +		memset(bitmap, 0xff, IO_BITMAP_BYTES);
   22.84 +		t->io_bitmap_ptr = bitmap;
   22.85 +		set_thread_flag(TIF_IO_BITMAP);
   22.86 +
   22.87 +		set_xen_guest_handle(set_iobitmap.bitmap, (char *)bitmap);
   22.88 +		set_iobitmap.nr_ports = IO_BITMAP_BITS;
   22.89 +		HYPERVISOR_physdev_op(PHYSDEVOP_set_iobitmap, &set_iobitmap);
   22.90 +	}
   22.91 +
   22.92 +	set_bitmap(t->io_bitmap_ptr, from, num, !turn_on);
   22.93 +
   22.94 +	return 0;
   22.95 +}
   22.96 +
   22.97 +/*
   22.98 + * sys_iopl has to be used when you want to access the IO ports
   22.99 + * beyond the 0x3ff range: to get the full 65536 ports bitmapped
  22.100 + * you'd need 8kB of bitmaps/process, which is a bit excessive.
  22.101 + *
  22.102 + * Here we just change the eflags value on the stack: we allow
  22.103 + * only the super-user to do it. This depends on the stack-layout
  22.104 + * on system-call entry - see also fork() and the signal handling
  22.105 + * code.
  22.106 + */
  22.107 +
  22.108 +asmlinkage long sys_iopl(unsigned long unused)
  22.109 +{
  22.110 +	volatile struct pt_regs * regs = (struct pt_regs *) &unused;
  22.111 +	unsigned int level = regs->ebx;
  22.112 +	struct thread_struct *t = &current->thread;
  22.113 +	unsigned int old = (t->iopl >> 12) & 3;
  22.114 +
  22.115 +	if (level > 3)
  22.116 +		return -EINVAL;
  22.117 +	/* Trying to gain more privileges? */
  22.118 +	if (level > old) {
  22.119 +		if (!capable(CAP_SYS_RAWIO))
  22.120 +			return -EPERM;
  22.121 +	}
  22.122 +	t->iopl = level << 12;
  22.123 +	set_iopl_mask(t->iopl);
  22.124 +	return 0;
  22.125 +}
    23.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    23.2 +++ b/arch/i386/kernel/irq-xen.c	Mon Jun 04 10:05:28 2007 +0100
    23.3 @@ -0,0 +1,324 @@
    23.4 +/*
    23.5 + *	linux/arch/i386/kernel/irq.c
    23.6 + *
    23.7 + *	Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
    23.8 + *
    23.9 + * This file contains the lowest level x86-specific interrupt
   23.10 + * entry, irq-stacks and irq statistics code. All the remaining
   23.11 + * irq logic is done by the generic kernel/irq/ code and
   23.12 + * by the x86-specific irq controller code. (e.g. i8259.c and
   23.13 + * io_apic.c.)
   23.14 + */
   23.15 +
   23.16 +#include <asm/uaccess.h>
   23.17 +#include <linux/module.h>
   23.18 +#include <linux/seq_file.h>
   23.19 +#include <linux/interrupt.h>
   23.20 +#include <linux/kernel_stat.h>
   23.21 +#include <linux/notifier.h>
   23.22 +#include <linux/cpu.h>
   23.23 +#include <linux/delay.h>
   23.24 +
   23.25 +DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp;
   23.26 +EXPORT_PER_CPU_SYMBOL(irq_stat);
   23.27 +
   23.28 +#ifndef CONFIG_X86_LOCAL_APIC
   23.29 +/*
   23.30 + * 'what should we do if we get a hw irq event on an illegal vector'.
   23.31 + * each architecture has to answer this themselves.
   23.32 + */
   23.33 +void ack_bad_irq(unsigned int irq)
   23.34 +{
   23.35 +	printk("unexpected IRQ trap at vector %02x\n", irq);
   23.36 +}
   23.37 +#endif
   23.38 +
   23.39 +#ifdef CONFIG_4KSTACKS
   23.40 +/*
   23.41 + * per-CPU IRQ handling contexts (thread information and stack)
   23.42 + */
   23.43 +union irq_ctx {
   23.44 +	struct thread_info      tinfo;
   23.45 +	u32                     stack[THREAD_SIZE/sizeof(u32)];
   23.46 +};
   23.47 +
   23.48 +static union irq_ctx *hardirq_ctx[NR_CPUS] __read_mostly;
   23.49 +static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly;
   23.50 +#endif
   23.51 +
   23.52 +/*
   23.53 + * do_IRQ handles all normal device IRQ's (the special
   23.54 + * SMP cross-CPU interrupts have their own specific
   23.55 + * handlers).
   23.56 + */
   23.57 +fastcall unsigned int do_IRQ(struct pt_regs *regs)
   23.58 +{	
   23.59 +	/* high bit used in ret_from_ code */
   23.60 +	int irq = ~regs->orig_eax;
   23.61 +#ifdef CONFIG_4KSTACKS
   23.62 +	union irq_ctx *curctx, *irqctx;
   23.63 +	u32 *isp;
   23.64 +#endif
   23.65 +
   23.66 +	if (unlikely((unsigned)irq >= NR_IRQS)) {
   23.67 +		printk(KERN_EMERG "%s: cannot handle IRQ %d\n",
   23.68 +					__FUNCTION__, irq);
   23.69 +		BUG();
   23.70 +	}
   23.71 +
   23.72 +	irq_enter();
   23.73 +#ifdef CONFIG_DEBUG_STACKOVERFLOW
   23.74 +	/* Debugging check for stack overflow: is there less than 1KB free? */
   23.75 +	{
   23.76 +		long esp;
   23.77 +
   23.78 +		__asm__ __volatile__("andl %%esp,%0" :
   23.79 +					"=r" (esp) : "0" (THREAD_SIZE - 1));
   23.80 +		if (unlikely(esp < (sizeof(struct thread_info) + STACK_WARN))) {
   23.81 +			printk("do_IRQ: stack overflow: %ld\n",
   23.82 +				esp - sizeof(struct thread_info));
   23.83 +			dump_stack();
   23.84 +		}
   23.85 +	}
   23.86 +#endif
   23.87 +
   23.88 +#ifdef CONFIG_4KSTACKS
   23.89 +
   23.90 +	curctx = (union irq_ctx *) current_thread_info();
   23.91 +	irqctx = hardirq_ctx[smp_processor_id()];
   23.92 +
   23.93 +	/*
   23.94 +	 * this is where we switch to the IRQ stack. However, if we are
   23.95 +	 * already using the IRQ stack (because we interrupted a hardirq
   23.96 +	 * handler) we can't do that and just have to keep using the
   23.97 +	 * current stack (which is the irq stack already after all)
   23.98 +	 */
   23.99 +	if (curctx != irqctx) {
  23.100 +		int arg1, arg2, ebx;
  23.101 +
  23.102 +		/* build the stack frame on the IRQ stack */
  23.103 +		isp = (u32*) ((char*)irqctx + sizeof(*irqctx));
  23.104 +		irqctx->tinfo.task = curctx->tinfo.task;
  23.105 +		irqctx->tinfo.previous_esp = current_stack_pointer;
  23.106 +
  23.107 +		/*
  23.108 +		 * Copy the softirq bits in preempt_count so that the
  23.109 +		 * softirq checks work in the hardirq context.
  23.110 +		 */
  23.111 +		irqctx->tinfo.preempt_count =
  23.112 +			(irqctx->tinfo.preempt_count & ~SOFTIRQ_MASK) |
  23.113 +			(curctx->tinfo.preempt_count & SOFTIRQ_MASK);
  23.114 +
  23.115 +		asm volatile(
  23.116 +			"       xchgl   %%ebx,%%esp      \n"
  23.117 +			"       call    __do_IRQ         \n"
  23.118 +			"       movl   %%ebx,%%esp      \n"
  23.119 +			: "=a" (arg1), "=d" (arg2), "=b" (ebx)
  23.120 +			:  "0" (irq),   "1" (regs),  "2" (isp)
  23.121 +			: "memory", "cc", "ecx"
  23.122 +		);
  23.123 +	} else
  23.124 +#endif
  23.125 +		__do_IRQ(irq, regs);
  23.126 +
  23.127 +	irq_exit();
  23.128 +
  23.129 +	return 1;
  23.130 +}
  23.131 +
  23.132 +#ifdef CONFIG_4KSTACKS
  23.133 +
  23.134 +/*
  23.135 + * These should really be __section__(".bss.page_aligned") as well, but
  23.136 + * gcc's 3.0 and earlier don't handle that correctly.
  23.137 + */
  23.138 +static char softirq_stack[NR_CPUS * THREAD_SIZE]
  23.139 +		__attribute__((__aligned__(THREAD_SIZE)));
  23.140 +
  23.141 +static char hardirq_stack[NR_CPUS * THREAD_SIZE]
  23.142 +		__attribute__((__aligned__(THREAD_SIZE)));
  23.143 +
  23.144 +/*
  23.145 + * allocate per-cpu stacks for hardirq and for softirq processing
  23.146 + */
  23.147 +void irq_ctx_init(int cpu)
  23.148 +{
  23.149 +	union irq_ctx *irqctx;
  23.150 +
  23.151 +	if (hardirq_ctx[cpu])
  23.152 +		return;
  23.153 +
  23.154 +	irqctx = (union irq_ctx*) &hardirq_stack[cpu*THREAD_SIZE];
  23.155 +	irqctx->tinfo.task              = NULL;
  23.156 +	irqctx->tinfo.exec_domain       = NULL;
  23.157 +	irqctx->tinfo.cpu               = cpu;
  23.158 +	irqctx->tinfo.preempt_count     = HARDIRQ_OFFSET;
  23.159 +	irqctx->tinfo.addr_limit        = MAKE_MM_SEG(0);
  23.160 +
  23.161 +	hardirq_ctx[cpu] = irqctx;
  23.162 +
  23.163 +	irqctx = (union irq_ctx*) &softirq_stack[cpu*THREAD_SIZE];
  23.164 +	irqctx->tinfo.task              = NULL;
  23.165 +	irqctx->tinfo.exec_domain       = NULL;
  23.166 +	irqctx->tinfo.cpu               = cpu;
  23.167 +	irqctx->tinfo.preempt_count     = 0;
  23.168 +	irqctx->tinfo.addr_limit        = MAKE_MM_SEG(0);
  23.169 +
  23.170 +	softirq_ctx[cpu] = irqctx;
  23.171 +
  23.172 +	printk("CPU %u irqstacks, hard=%p soft=%p\n",
  23.173 +		cpu,hardirq_ctx[cpu],softirq_ctx[cpu]);
  23.174 +}
  23.175 +
  23.176 +void irq_ctx_exit(int cpu)
  23.177 +{
  23.178 +	hardirq_ctx[cpu] = NULL;
  23.179 +}
  23.180 +
  23.181 +extern asmlinkage void __do_softirq(void);
  23.182 +
  23.183 +asmlinkage void do_softirq(void)
  23.184 +{
  23.185 +	unsigned long flags;
  23.186 +	struct thread_info *curctx;
  23.187 +	union irq_ctx *irqctx;
  23.188 +	u32 *isp;
  23.189 +
  23.190 +	if (in_interrupt())
  23.191 +		return;
  23.192 +
  23.193 +	local_irq_save(flags);
  23.194 +
  23.195 +	if (local_softirq_pending()) {
  23.196 +		curctx = current_thread_info();
  23.197 +		irqctx = softirq_ctx[smp_processor_id()];
  23.198 +		irqctx->tinfo.task = curctx->task;
  23.199 +		irqctx->tinfo.previous_esp = current_stack_pointer;
  23.200 +
  23.201 +		/* build the stack frame on the softirq stack */
  23.202 +		isp = (u32*) ((char*)irqctx + sizeof(*irqctx));
  23.203 +
  23.204 +		asm volatile(
  23.205 +			"       xchgl   %%ebx,%%esp     \n"
  23.206 +			"       call    __do_softirq    \n"
  23.207 +			"       movl    %%ebx,%%esp     \n"
  23.208 +			: "=b"(isp)
  23.209 +			: "0"(isp)
  23.210 +			: "memory", "cc", "edx", "ecx", "eax"
  23.211 +		);
  23.212 +		/*
  23.213 +		 * Shouldnt happen, we returned above if in_interrupt():
  23.214 +	 	 */
  23.215 +		WARN_ON_ONCE(softirq_count());
  23.216 +	}
  23.217 +
  23.218 +	local_irq_restore(flags);
  23.219 +}
  23.220 +
  23.221 +EXPORT_SYMBOL(do_softirq);
  23.222 +#endif
  23.223 +
  23.224 +/*
  23.225 + * Interrupt statistics:
  23.226 + */
  23.227 +
  23.228 +atomic_t irq_err_count;
  23.229 +
  23.230 +/*
  23.231 + * /proc/interrupts printing:
  23.232 + */
  23.233 +
  23.234 +int show_interrupts(struct seq_file *p, void *v)
  23.235 +{
  23.236 +	int i = *(loff_t *) v, j;
  23.237 +	struct irqaction * action;
  23.238 +	unsigned long flags;
  23.239 +
  23.240 +	if (i == 0) {
  23.241 +		seq_printf(p, "           ");
  23.242 +		for_each_online_cpu(j)
  23.243 +			seq_printf(p, "CPU%-8d",j);
  23.244 +		seq_putc(p, '\n');
  23.245 +	}
  23.246 +
  23.247 +	if (i < NR_IRQS) {
  23.248 +		spin_lock_irqsave(&irq_desc[i].lock, flags);
  23.249 +		action = irq_desc[i].action;
  23.250 +		if (!action)
  23.251 +			goto skip;
  23.252 +		seq_printf(p, "%3d: ",i);
  23.253 +#ifndef CONFIG_SMP
  23.254 +		seq_printf(p, "%10u ", kstat_irqs(i));
  23.255 +#else
  23.256 +		for_each_online_cpu(j)
  23.257 +			seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
  23.258 +#endif
  23.259 +		seq_printf(p, " %14s", irq_desc[i].chip->typename);
  23.260 +		seq_printf(p, "  %s", action->name);
  23.261 +
  23.262 +		for (action=action->next; action; action = action->next)
  23.263 +			seq_printf(p, ", %s", action->name);
  23.264 +
  23.265 +		seq_putc(p, '\n');
  23.266 +skip:
  23.267 +		spin_unlock_irqrestore(&irq_desc[i].lock, flags);
  23.268 +	} else if (i == NR_IRQS) {
  23.269 +		seq_printf(p, "NMI: ");
  23.270 +		for_each_online_cpu(j)
  23.271 +			seq_printf(p, "%10u ", nmi_count(j));
  23.272 +		seq_putc(p, '\n');
  23.273 +#ifdef CONFIG_X86_LOCAL_APIC
  23.274 +		seq_printf(p, "LOC: ");
  23.275 +		for_each_online_cpu(j)
  23.276 +			seq_printf(p, "%10u ",
  23.277 +				per_cpu(irq_stat,j).apic_timer_irqs);
  23.278 +		seq_putc(p, '\n');
  23.279 +#endif
  23.280 +		seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
  23.281 +#if defined(CONFIG_X86_IO_APIC)
  23.282 +		seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
  23.283 +#endif
  23.284 +	}
  23.285 +	return 0;
  23.286 +}
  23.287 +
  23.288 +#ifdef CONFIG_HOTPLUG_CPU
  23.289 +
  23.290 +void fixup_irqs(cpumask_t map)
  23.291 +{
  23.292 +	unsigned int irq;
  23.293 +	static int warned;
  23.294 +
  23.295 +	for (irq = 0; irq < NR_IRQS; irq++) {
  23.296 +		cpumask_t mask;
  23.297 +		if (irq == 2)
  23.298 +			continue;
  23.299 +
  23.300 +		cpus_and(mask, irq_desc[irq].affinity, map);
  23.301 +		if (any_online_cpu(mask) == NR_CPUS) {
  23.302 +			/*printk("Breaking affinity for irq %i\n", irq);*/
  23.303 +			mask = map;
  23.304 +		}
  23.305 +		if (irq_desc[irq].chip->set_affinity)
  23.306 +			irq_desc[irq].chip->set_affinity(irq, mask);
  23.307 +		else if (irq_desc[irq].action && !(warned++))
  23.308 +			printk("Cannot set affinity for irq %i\n", irq);
  23.309 +	}
  23.310 +
  23.311 +#if 0
  23.312 +	barrier();
  23.313 +	/* Ingo Molnar says: "after the IO-APIC masks have been redirected
  23.314 +	   [note the nop - the interrupt-enable boundary on x86 is two
  23.315 +	   instructions from sti] - to flush out pending hardirqs and
  23.316 +	   IPIs. After this point nothing is supposed to reach this CPU." */
  23.317 +	__asm__ __volatile__("sti; nop; cli");
  23.318 +	barrier();
  23.319 +#else
  23.320 +	/* That doesn't seem sufficient.  Give it 1ms. */
  23.321 +	local_irq_enable();
  23.322 +	mdelay(1);
  23.323 +	local_irq_disable();
  23.324 +#endif
  23.325 +}
  23.326 +#endif
  23.327 +
    24.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    24.2 +++ b/arch/i386/kernel/ldt-xen.c	Mon Jun 04 10:05:28 2007 +0100
    24.3 @@ -0,0 +1,270 @@
    24.4 +/*
    24.5 + * linux/kernel/ldt.c
    24.6 + *
    24.7 + * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
    24.8 + * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
    24.9 + */
   24.10 +
   24.11 +#include <linux/errno.h>
   24.12 +#include <linux/sched.h>
   24.13 +#include <linux/string.h>
   24.14 +#include <linux/mm.h>
   24.15 +#include <linux/smp.h>
   24.16 +#include <linux/smp_lock.h>
   24.17 +#include <linux/vmalloc.h>
   24.18 +#include <linux/slab.h>
   24.19 +
   24.20 +#include <asm/uaccess.h>
   24.21 +#include <asm/system.h>
   24.22 +#include <asm/ldt.h>
   24.23 +#include <asm/desc.h>
   24.24 +#include <asm/mmu_context.h>
   24.25 +
   24.26 +#ifdef CONFIG_SMP /* avoids "defined but not used" warnig */
   24.27 +static void flush_ldt(void *null)
   24.28 +{
   24.29 +	if (current->active_mm)
   24.30 +		load_LDT(&current->active_mm->context);
   24.31 +}
   24.32 +#endif
   24.33 +
   24.34 +static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
   24.35 +{
   24.36 +	void *oldldt;
   24.37 +	void *newldt;
   24.38 +	int oldsize;
   24.39 +
   24.40 +	if (mincount <= pc->size)
   24.41 +		return 0;
   24.42 +	oldsize = pc->size;
   24.43 +	mincount = (mincount+511)&(~511);
   24.44 +	if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE)
   24.45 +		newldt = vmalloc(mincount*LDT_ENTRY_SIZE);
   24.46 +	else
   24.47 +		newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL);
   24.48 +
   24.49 +	if (!newldt)
   24.50 +		return -ENOMEM;
   24.51 +
   24.52 +	if (oldsize)
   24.53 +		memcpy(newldt, pc->ldt, oldsize*LDT_ENTRY_SIZE);
   24.54 +	oldldt = pc->ldt;
   24.55 +	memset(newldt+oldsize*LDT_ENTRY_SIZE, 0, (mincount-oldsize)*LDT_ENTRY_SIZE);
   24.56 +	pc->ldt = newldt;
   24.57 +	wmb();
   24.58 +	pc->size = mincount;
   24.59 +	wmb();
   24.60 +
   24.61 +	if (reload) {
   24.62 +#ifdef CONFIG_SMP
   24.63 +		cpumask_t mask;
   24.64 +		preempt_disable();
   24.65 +#endif
   24.66 +		make_pages_readonly(
   24.67 +			pc->ldt,
   24.68 +			(pc->size * LDT_ENTRY_SIZE) / PAGE_SIZE,
   24.69 +			XENFEAT_writable_descriptor_tables);
   24.70 +		load_LDT(pc);
   24.71 +#ifdef CONFIG_SMP
   24.72 +		mask = cpumask_of_cpu(smp_processor_id());
   24.73 +		if (!cpus_equal(current->mm->cpu_vm_mask, mask))
   24.74 +			smp_call_function(flush_ldt, NULL, 1, 1);
   24.75 +		preempt_enable();
   24.76 +#endif
   24.77 +	}
   24.78 +	if (oldsize) {
   24.79 +		make_pages_writable(
   24.80 +			oldldt,
   24.81 +			(oldsize * LDT_ENTRY_SIZE) / PAGE_SIZE,
   24.82 +			XENFEAT_writable_descriptor_tables);
   24.83 +		if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE)
   24.84 +			vfree(oldldt);
   24.85 +		else
   24.86 +			kfree(oldldt);
   24.87 +	}
   24.88 +	return 0;
   24.89 +}
   24.90 +
   24.91 +static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
   24.92 +{
   24.93 +	int err = alloc_ldt(new, old->size, 0);
   24.94 +	if (err < 0)
   24.95 +		return err;
   24.96 +	memcpy(new->ldt, old->ldt, old->size*LDT_ENTRY_SIZE);
   24.97 +	make_pages_readonly(
   24.98 +		new->ldt,
   24.99 +		(new->size * LDT_ENTRY_SIZE) / PAGE_SIZE,
  24.100 +		XENFEAT_writable_descriptor_tables);
  24.101 +	return 0;
  24.102 +}
  24.103 +
  24.104 +/*
  24.105 + * we do not have to muck with descriptors here, that is
  24.106 + * done in switch_mm() as needed.
  24.107 + */
  24.108 +int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
  24.109 +{
  24.110 +	struct mm_struct * old_mm;
  24.111 +	int retval = 0;
  24.112 +
  24.113 +	init_MUTEX(&mm->context.sem);
  24.114 +	mm->context.size = 0;
  24.115 +	mm->context.has_foreign_mappings = 0;
  24.116 +	old_mm = current->mm;
  24.117 +	if (old_mm && old_mm->context.size > 0) {
  24.118 +		down(&old_mm->context.sem);
  24.119 +		retval = copy_ldt(&mm->context, &old_mm->context);
  24.120 +		up(&old_mm->context.sem);
  24.121 +	}
  24.122 +	return retval;
  24.123 +}
  24.124 +
  24.125 +/*
  24.126 + * No need to lock the MM as we are the last user
  24.127 + */
  24.128 +void destroy_context(struct mm_struct *mm)
  24.129 +{
  24.130 +	if (mm->context.size) {
  24.131 +		if (mm == current->active_mm)
  24.132 +			clear_LDT();
  24.133 +		make_pages_writable(
  24.134 +			mm->context.ldt,
  24.135 +			(mm->context.size * LDT_ENTRY_SIZE) / PAGE_SIZE,
  24.136 +			XENFEAT_writable_descriptor_tables);
  24.137 +		if (mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE)
  24.138 +			vfree(mm->context.ldt);
  24.139 +		else
  24.140 +			kfree(mm->context.ldt);
  24.141 +		mm->context.size = 0;
  24.142 +	}
  24.143 +}
  24.144 +
  24.145 +static int read_ldt(void __user * ptr, unsigned long bytecount)
  24.146 +{
  24.147 +	int err;
  24.148 +	unsigned long size;
  24.149 +	struct mm_struct * mm = current->mm;
  24.150 +
  24.151 +	if (!mm->context.size)
  24.152 +		return 0;
  24.153 +	if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES)
  24.154 +		bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES;
  24.155 +
  24.156 +	down(&mm->context.sem);
  24.157 +	size = mm->context.size*LDT_ENTRY_SIZE;
  24.158 +	if (size > bytecount)
  24.159 +		size = bytecount;
  24.160 +
  24.161 +	err = 0;
  24.162 +	if (copy_to_user(ptr, mm->context.ldt, size))
  24.163 +		err = -EFAULT;
  24.164 +	up(&mm->context.sem);
  24.165 +	if (err < 0)
  24.166 +		goto error_return;
  24.167 +	if (size != bytecount) {
  24.168 +		/* zero-fill the rest */
  24.169 +		if (clear_user(ptr+size, bytecount-size) != 0) {
  24.170 +			err = -EFAULT;
  24.171 +			goto error_return;
  24.172 +		}
  24.173 +	}
  24.174 +	return bytecount;
  24.175 +error_return:
  24.176 +	return err;
  24.177 +}
  24.178 +
  24.179 +static int read_default_ldt(void __user * ptr, unsigned long bytecount)
  24.180 +{
  24.181 +	int err;
  24.182 +	unsigned long size;
  24.183 +	void *address;
  24.184 +
  24.185 +	err = 0;
  24.186 +	address = &default_ldt[0];
  24.187 +	size = 5*sizeof(struct desc_struct);
  24.188 +	if (size > bytecount)
  24.189 +		size = bytecount;
  24.190 +
  24.191 +	err = size;
  24.192 +	if (copy_to_user(ptr, address, size))
  24.193 +		err = -EFAULT;
  24.194 +
  24.195 +	return err;
  24.196 +}
  24.197 +
  24.198 +static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode)
  24.199 +{
  24.200 +	struct mm_struct * mm = current->mm;
  24.201 +	__u32 entry_1, entry_2;
  24.202 +	int error;
  24.203 +	struct user_desc ldt_info;
  24.204 +
  24.205 +	error = -EINVAL;
  24.206 +	if (bytecount != sizeof(ldt_info))
  24.207 +		goto out;
  24.208 +	error = -EFAULT; 	
  24.209 +	if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info)))
  24.210 +		goto out;
  24.211 +
  24.212 +	error = -EINVAL;
  24.213 +	if (ldt_info.entry_number >= LDT_ENTRIES)
  24.214 +		goto out;
  24.215 +	if (ldt_info.contents == 3) {
  24.216 +		if (oldmode)
  24.217 +			goto out;
  24.218 +		if (ldt_info.seg_not_present == 0)
  24.219 +			goto out;
  24.220 +	}
  24.221 +
  24.222 +	down(&mm->context.sem);
  24.223 +	if (ldt_info.entry_number >= mm->context.size) {
  24.224 +		error = alloc_ldt(&current->mm->context, ldt_info.entry_number+1, 1);
  24.225 +		if (error < 0)
  24.226 +			goto out_unlock;
  24.227 +	}
  24.228 +
  24.229 +   	/* Allow LDTs to be cleared by the user. */
  24.230 +   	if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
  24.231 +		if (oldmode || LDT_empty(&ldt_info)) {
  24.232 +			entry_1 = 0;
  24.233 +			entry_2 = 0;
  24.234 +			goto install;
  24.235 +		}
  24.236 +	}
  24.237 +
  24.238 +	entry_1 = LDT_entry_a(&ldt_info);
  24.239 +	entry_2 = LDT_entry_b(&ldt_info);
  24.240 +	if (oldmode)
  24.241 +		entry_2 &= ~(1 << 20);
  24.242 +
  24.243 +	/* Install the new entry ...  */
  24.244 +install:
  24.245 +	error = write_ldt_entry(mm->context.ldt, ldt_info.entry_number,
  24.246 +				entry_1, entry_2);
  24.247 +
  24.248 +out_unlock:
  24.249 +	up(&mm->context.sem);
  24.250 +out:
  24.251 +	return error;
  24.252 +}
  24.253 +
  24.254 +asmlinkage int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount)
  24.255 +{
  24.256 +	int ret = -ENOSYS;
  24.257 +
  24.258 +	switch (func) {
  24.259 +	case 0:
  24.260 +		ret = read_ldt(ptr, bytecount);
  24.261 +		break;
  24.262 +	case 1:
  24.263 +		ret = write_ldt(ptr, bytecount, 1);
  24.264 +		break;
  24.265 +	case 2:
  24.266 +		ret = read_default_ldt(ptr, bytecount);
  24.267 +		break;
  24.268 +	case 0x11:
  24.269 +		ret = write_ldt(ptr, bytecount, 0);
  24.270 +		break;
  24.271 +	}
  24.272 +	return ret;
  24.273 +}
    25.1 --- a/arch/i386/kernel/machine_kexec.c	Mon Jun 04 10:05:24 2007 +0100
    25.2 +++ b/arch/i386/kernel/machine_kexec.c	Mon Jun 04 10:05:28 2007 +0100
    25.3 @@ -19,6 +19,10 @@
    25.4  #include <asm/desc.h>
    25.5  #include <asm/system.h>
    25.6  
    25.7 +#ifdef CONFIG_XEN
    25.8 +#include <xen/interface/kexec.h>
    25.9 +#endif
   25.10 +
   25.11  #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
   25.12  static u32 kexec_pgd[1024] PAGE_ALIGNED;
   25.13  #ifdef CONFIG_X86_PAE
   25.14 @@ -28,6 +32,40 @@ static u32 kexec_pmd1[1024] PAGE_ALIGNED
   25.15  static u32 kexec_pte0[1024] PAGE_ALIGNED;
   25.16  static u32 kexec_pte1[1024] PAGE_ALIGNED;
   25.17  
   25.18 +#ifdef CONFIG_XEN
   25.19 +
   25.20 +#define __ma(x) (pfn_to_mfn(__pa((x)) >> PAGE_SHIFT) << PAGE_SHIFT)
   25.21 +
   25.22 +#if PAGES_NR > KEXEC_XEN_NO_PAGES
   25.23 +#error PAGES_NR is greater than KEXEC_XEN_NO_PAGES - Xen support will break
   25.24 +#endif
   25.25 +
   25.26 +#if PA_CONTROL_PAGE != 0
   25.27 +#error PA_CONTROL_PAGE is non zero - Xen support will break
   25.28 +#endif
   25.29 +
   25.30 +void machine_kexec_setup_load_arg(xen_kexec_image_t *xki, struct kimage *image)
   25.31 +{
   25.32 +	void *control_page;
   25.33 +
   25.34 +	memset(xki->page_list, 0, sizeof(xki->page_list));
   25.35 +
   25.36 +	control_page = page_address(image->control_code_page);
   25.37 +	memcpy(control_page, relocate_kernel, PAGE_SIZE);
   25.38 +
   25.39 +	xki->page_list[PA_CONTROL_PAGE] = __ma(control_page);
   25.40 +	xki->page_list[PA_PGD] = __ma(kexec_pgd);
   25.41 +#ifdef CONFIG_X86_PAE
   25.42 +	xki->page_list[PA_PMD_0] = __ma(kexec_pmd0);
   25.43 +	xki->page_list[PA_PMD_1] = __ma(kexec_pmd1);
   25.44 +#endif
   25.45 +	xki->page_list[PA_PTE_0] = __ma(kexec_pte0);
   25.46 +	xki->page_list[PA_PTE_1] = __ma(kexec_pte1);
   25.47 +
   25.48 +}
   25.49 +
   25.50 +#endif /* CONFIG_XEN */
   25.51 +
   25.52  /*
   25.53   * A architecture hook called to validate the
   25.54   * proposed image and prepare the control pages
   25.55 @@ -54,6 +92,7 @@ void machine_kexec_cleanup(struct kimage
   25.56  {
   25.57  }
   25.58  
   25.59 +#ifndef CONFIG_XEN
   25.60  /*
   25.61   * Do not allocate memory (or fail in any way) in machine_kexec().
   25.62   * We are past the point of no return, committed to rebooting now.
   25.63 @@ -87,3 +126,4 @@ NORET_TYPE void machine_kexec(struct kim
   25.64  	relocate_kernel((unsigned long)image->head, (unsigned long)page_list,
   25.65  			image->start, cpu_has_pae);
   25.66  }
   25.67 +#endif
    26.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    26.2 +++ b/arch/i386/kernel/microcode-xen.c	Mon Jun 04 10:05:28 2007 +0100
    26.3 @@ -0,0 +1,144 @@
    26.4 +/*
    26.5 + *	Intel CPU Microcode Update Driver for Linux
    26.6 + *
    26.7 + *	Copyright (C) 2000-2004 Tigran Aivazian
    26.8 + *
    26.9 + *	This driver allows to upgrade microcode on Intel processors
   26.10 + *	belonging to IA-32 family - PentiumPro, Pentium II, 
   26.11 + *	Pentium III, Xeon, Pentium 4, etc.
   26.12 + *
   26.13 + *	Reference: Section 8.10 of Volume III, Intel Pentium 4 Manual, 
   26.14 + *	Order Number 245472 or free download from:
   26.15 + *		
   26.16 + *	http://developer.intel.com/design/pentium4/manuals/245472.htm
   26.17 + *
   26.18 + *	For more information, go to http://www.urbanmyth.org/microcode
   26.19 + *
   26.20 + *	This program is free software; you can redistribute it and/or
   26.21 + *	modify it under the terms of the GNU General Public License
   26.22 + *	as published by the Free Software Foundation; either version
   26.23 + *	2 of the License, or (at your option) any later version.
   26.24 + */
   26.25 +
   26.26 +//#define DEBUG /* pr_debug */
   26.27 +#include <linux/capability.h>
   26.28 +#include <linux/kernel.h>
   26.29 +#include <linux/init.h>
   26.30 +#include <linux/sched.h>
   26.31 +#include <linux/cpumask.h>
   26.32 +#include <linux/module.h>
   26.33 +#include <linux/slab.h>
   26.34 +#include <linux/vmalloc.h>
   26.35 +#include <linux/miscdevice.h>
   26.36 +#include <linux/spinlock.h>
   26.37 +#include <linux/mm.h>
   26.38 +#include <linux/mutex.h>
   26.39 +#include <linux/syscalls.h>
   26.40 +
   26.41 +#include <asm/msr.h>
   26.42 +#include <asm/uaccess.h>
   26.43 +#include <asm/processor.h>
   26.44 +
   26.45 +MODULE_DESCRIPTION("Intel CPU (IA-32) Microcode Update Driver");
   26.46 +MODULE_AUTHOR("Tigran Aivazian <tigran@veritas.com>");
   26.47 +MODULE_LICENSE("GPL");
   26.48 +
   26.49 +static int verbose;
   26.50 +module_param(verbose, int, 0644);
   26.51 +
   26.52 +#define MICROCODE_VERSION 	"1.14a-xen"
   26.53 +
   26.54 +#define DEFAULT_UCODE_DATASIZE 	(2000) 	  /* 2000 bytes */
   26.55 +#define MC_HEADER_SIZE		(sizeof (microcode_header_t))  	  /* 48 bytes */
   26.56 +#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) /* 2048 bytes */
   26.57 +
   26.58 +/* no concurrent ->write()s are allowed on /dev/cpu/microcode */
   26.59 +static DEFINE_MUTEX(microcode_mutex);
   26.60 +				
   26.61 +static int microcode_open (struct inode *unused1, struct file *unused2)
   26.62 +{
   26.63 +	return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
   26.64 +}
   26.65 +
   26.66 +
   26.67 +static int do_microcode_update (const void __user *ubuf, size_t len)
   26.68 +{
   26.69 +	int err;
   26.70 +	void *kbuf;
   26.71 +
   26.72 +	kbuf = vmalloc(len);
   26.73 +	if (!kbuf)
   26.74 +		return -ENOMEM;
   26.75 +
   26.76 +	if (copy_from_user(kbuf, ubuf, len) == 0) {
   26.77 +		struct xen_platform_op op;
   26.78 +
   26.79 +		op.cmd = XENPF_microcode_update;
   26.80 +		set_xen_guest_handle(op.u.microcode.data, kbuf);
   26.81 +		op.u.microcode.length = len;
   26.82 +		err = HYPERVISOR_platform_op(&op);
   26.83 +	} else
   26.84 +		err = -EFAULT;
   26.85 +
   26.86 +	vfree(kbuf);
   26.87 +
   26.88 +	return err;
   26.89 +}
   26.90 +
   26.91 +static ssize_t microcode_write (struct file *file, const char __user *buf, size_t len, loff_t *ppos)
   26.92 +{
   26.93 +	ssize_t ret;
   26.94 +
   26.95 +	if (len < MC_HEADER_SIZE) {
   26.96 +		printk(KERN_ERR "microcode: not enough data\n"); 
   26.97 +		return -EINVAL;
   26.98 +	}
   26.99 +
  26.100 +	mutex_lock(&microcode_mutex);
  26.101 +
  26.102 +	ret = do_microcode_update(buf, len);
  26.103 +	if (!ret)
  26.104 +		ret = (ssize_t)len;
  26.105 +
  26.106 +	mutex_unlock(&microcode_mutex);
  26.107 +
  26.108 +	return ret;
  26.109 +}
  26.110 +
  26.111 +static struct file_operations microcode_fops = {
  26.112 +	.owner		= THIS_MODULE,
  26.113 +	.write		= microcode_write,
  26.114 +	.open		= microcode_open,
  26.115 +};
  26.116 +
  26.117 +static struct miscdevice microcode_dev = {
  26.118 +	.minor		= MICROCODE_MINOR,
  26.119 +	.name		= "microcode",
  26.120 +	.fops		= &microcode_fops,
  26.121 +};
  26.122 +
  26.123 +static int __init microcode_init (void)
  26.124 +{
  26.125 +	int error;
  26.126 +
  26.127 +	error = misc_register(&microcode_dev);
  26.128 +	if (error) {
  26.129 +		printk(KERN_ERR
  26.130 +			"microcode: can't misc_register on minor=%d\n",
  26.131 +			MICROCODE_MINOR);
  26.132 +		return error;
  26.133 +	}
  26.134 +
  26.135 +	printk(KERN_INFO 
  26.136 +		"IA-32 Microcode Update Driver: v" MICROCODE_VERSION " <tigran@veritas.com>\n");
  26.137 +	return 0;
  26.138 +}
  26.139 +
  26.140 +static void __exit microcode_exit (void)
  26.141 +{
  26.142 +	misc_deregister(&microcode_dev);
  26.143 +}
  26.144 +
  26.145 +module_init(microcode_init)
  26.146 +module_exit(microcode_exit)
  26.147 +MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
    27.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    27.2 +++ b/arch/i386/kernel/mpparse-xen.c	Mon Jun 04 10:05:28 2007 +0100
    27.3 @@ -0,0 +1,1185 @@
    27.4 +/*
    27.5 + *	Intel Multiprocessor Specification 1.1 and 1.4
    27.6 + *	compliant MP-table parsing routines.
    27.7 + *
    27.8 + *	(c) 1995 Alan Cox, Building #3 <alan@redhat.com>
    27.9 + *	(c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
   27.10 + *
   27.11 + *	Fixes
   27.12 + *		Erich Boleyn	:	MP v1.4 and additional changes.
   27.13 + *		Alan Cox	:	Added EBDA scanning
   27.14 + *		Ingo Molnar	:	various cleanups and rewrites
   27.15 + *		Maciej W. Rozycki:	Bits for default MP configurations
   27.16 + *		Paul Diefenbaugh:	Added full ACPI support
   27.17 + */
   27.18 +
   27.19 +#include <linux/mm.h>
   27.20 +#include <linux/init.h>
   27.21 +#include <linux/acpi.h>
   27.22 +#include <linux/delay.h>
   27.23 +#include <linux/bootmem.h>
   27.24 +#include <linux/smp_lock.h>
   27.25 +#include <linux/kernel_stat.h>
   27.26 +#include <linux/mc146818rtc.h>
   27.27 +#include <linux/bitops.h>
   27.28 +
   27.29 +#include <asm/smp.h>
   27.30 +#include <asm/acpi.h>
   27.31 +#include <asm/mtrr.h>
   27.32 +#include <asm/mpspec.h>
   27.33 +#include <asm/io_apic.h>
   27.34 +
   27.35 +#include <mach_apic.h>
   27.36 +#include <mach_mpparse.h>
   27.37 +#include <bios_ebda.h>
   27.38 +
   27.39 +/* Have we found an MP table */
   27.40 +int smp_found_config;
   27.41 +unsigned int __initdata maxcpus = NR_CPUS;
   27.42 +
   27.43 +/*
   27.44 + * Various Linux-internal data structures created from the
   27.45 + * MP-table.
   27.46 + */
   27.47 +int apic_version [MAX_APICS];
   27.48 +int mp_bus_id_to_type [MAX_MP_BUSSES];
   27.49 +int mp_bus_id_to_node [MAX_MP_BUSSES];
   27.50 +int mp_bus_id_to_local [MAX_MP_BUSSES];
   27.51 +int quad_local_to_mp_bus_id [NR_CPUS/4][4];
   27.52 +int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
   27.53 +static int mp_current_pci_id;
   27.54 +
   27.55 +/* I/O APIC entries */
   27.56 +struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
   27.57 +
   27.58 +/* # of MP IRQ source entries */
   27.59 +struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
   27.60 +
   27.61 +/* MP IRQ source entries */
   27.62 +int mp_irq_entries;
   27.63 +
   27.64 +int nr_ioapics;
   27.65 +
   27.66 +int pic_mode;
   27.67 +unsigned long mp_lapic_addr;
   27.68 +
   27.69 +unsigned int def_to_bigsmp = 0;
   27.70 +
   27.71 +/* Processor that is doing the boot up */
   27.72 +unsigned int boot_cpu_physical_apicid = -1U;
   27.73 +/* Internal processor count */
   27.74 +static unsigned int __devinitdata num_processors;
   27.75 +
   27.76 +/* Bitmask of physically existing CPUs */
   27.77 +physid_mask_t phys_cpu_present_map;
   27.78 +
   27.79 +u8 bios_cpu_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
   27.80 +
   27.81 +/*
   27.82 + * Intel MP BIOS table parsing routines:
   27.83 + */
   27.84 +
   27.85 +
   27.86 +/*
   27.87 + * Checksum an MP configuration block.
   27.88 + */
   27.89 +
   27.90 +static int __init mpf_checksum(unsigned char *mp, int len)
   27.91 +{
   27.92 +	int sum = 0;
   27.93 +
   27.94 +	while (len--)
   27.95 +		sum += *mp++;
   27.96 +
   27.97 +	return sum & 0xFF;
   27.98 +}
   27.99 +
  27.100 +/*
  27.101 + * Have to match translation table entries to main table entries by counter
  27.102 + * hence the mpc_record variable .... can't see a less disgusting way of
  27.103 + * doing this ....
  27.104 + */
  27.105 +
  27.106 +static int mpc_record; 
  27.107 +static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] __initdata;
  27.108 +
  27.109 +#ifndef CONFIG_XEN
  27.110 +static void __devinit MP_processor_info (struct mpc_config_processor *m)
  27.111 +{
  27.112 + 	int ver, apicid;
  27.113 +	physid_mask_t phys_cpu;
  27.114 + 	
  27.115 +	if (!(m->mpc_cpuflag & CPU_ENABLED))
  27.116 +		return;
  27.117 +
  27.118 +	apicid = mpc_apic_id(m, translation_table[mpc_record]);
  27.119 +
  27.120 +	if (m->mpc_featureflag&(1<<0))
  27.121 +		Dprintk("    Floating point unit present.\n");
  27.122 +	if (m->mpc_featureflag&(1<<7))
  27.123 +		Dprintk("    Machine Exception supported.\n");
  27.124 +	if (m->mpc_featureflag&(1<<8))
  27.125 +		Dprintk("    64 bit compare & exchange supported.\n");
  27.126 +	if (m->mpc_featureflag&(1<<9))
  27.127 +		Dprintk("    Internal APIC present.\n");
  27.128 +	if (m->mpc_featureflag&(1<<11))
  27.129 +		Dprintk("    SEP present.\n");
  27.130 +	if (m->mpc_featureflag&(1<<12))
  27.131 +		Dprintk("    MTRR  present.\n");
  27.132 +	if (m->mpc_featureflag&(1<<13))
  27.133 +		Dprintk("    PGE  present.\n");
  27.134 +	if (m->mpc_featureflag&(1<<14))
  27.135 +		Dprintk("    MCA  present.\n");
  27.136 +	if (m->mpc_featureflag&(1<<15))
  27.137 +		Dprintk("    CMOV  present.\n");
  27.138 +	if (m->mpc_featureflag&(1<<16))
  27.139 +		Dprintk("    PAT  present.\n");
  27.140 +	if (m->mpc_featureflag&(1<<17))
  27.141 +		Dprintk("    PSE  present.\n");
  27.142 +	if (m->mpc_featureflag&(1<<18))
  27.143 +		Dprintk("    PSN  present.\n");
  27.144 +	if (m->mpc_featureflag&(1<<19))
  27.145 +		Dprintk("    Cache Line Flush Instruction present.\n");
  27.146 +	/* 20 Reserved */
  27.147 +	if (m->mpc_featureflag&(1<<21))
  27.148 +		Dprintk("    Debug Trace and EMON Store present.\n");
  27.149 +	if (m->mpc_featureflag&(1<<22))
  27.150 +		Dprintk("    ACPI Thermal Throttle Registers  present.\n");
  27.151 +	if (m->mpc_featureflag&(1<<23))
  27.152 +		Dprintk("    MMX  present.\n");
  27.153 +	if (m->mpc_featureflag&(1<<24))
  27.154 +		Dprintk("    FXSR  present.\n");
  27.155 +	if (m->mpc_featureflag&(1<<25))
  27.156 +		Dprintk("    XMM  present.\n");
  27.157 +	if (m->mpc_featureflag&(1<<26))
  27.158 +		Dprintk("    Willamette New Instructions  present.\n");
  27.159 +	if (m->mpc_featureflag&(1<<27))
  27.160 +		Dprintk("    Self Snoop  present.\n");
  27.161 +	if (m->mpc_featureflag&(1<<28))
  27.162 +		Dprintk("    HT  present.\n");
  27.163 +	if (m->mpc_featureflag&(1<<29))
  27.164 +		Dprintk("    Thermal Monitor present.\n");
  27.165 +	/* 30, 31 Reserved */
  27.166 +
  27.167 +
  27.168 +	if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
  27.169 +		Dprintk("    Bootup CPU\n");
  27.170 +		boot_cpu_physical_apicid = m->mpc_apicid;
  27.171 +	}
  27.172 +
  27.173 +	ver = m->mpc_apicver;
  27.174 +
  27.175 +	/*
  27.176 +	 * Validate version
  27.177 +	 */
  27.178 +	if (ver == 0x0) {
  27.179 +		printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! "
  27.180 +				"fixing up to 0x10. (tell your hw vendor)\n",
  27.181 +				m->mpc_apicid);
  27.182 +		ver = 0x10;
  27.183 +	}
  27.184 +	apic_version[m->mpc_apicid] = ver;
  27.185 +
  27.186 +	phys_cpu = apicid_to_cpu_present(apicid);
  27.187 +	physids_or(phys_cpu_present_map, phys_cpu_present_map, phys_cpu);
  27.188 +
  27.189 +	if (num_processors >= NR_CPUS) {
  27.190 +		printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
  27.191 +			"  Processor ignored.\n", NR_CPUS);
  27.192 +		return;
  27.193 +	}
  27.194 +
  27.195 +	if (num_processors >= maxcpus) {
  27.196 +		printk(KERN_WARNING "WARNING: maxcpus limit of %i reached."
  27.197 +			" Processor ignored.\n", maxcpus);
  27.198 +		return;
  27.199 +	}
  27.200 +
  27.201 +	cpu_set(num_processors, cpu_possible_map);
  27.202 +	num_processors++;
  27.203 +
  27.204 +	/*
  27.205 +	 * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y
  27.206 +	 * but we need to work other dependencies like SMP_SUSPEND etc
  27.207 +	 * before this can be done without some confusion.
  27.208 +	 * if (CPU_HOTPLUG_ENABLED || num_processors > 8)
  27.209 +	 *       - Ashok Raj <ashok.raj@intel.com>
  27.210 +	 */
  27.211 +	if (num_processors > 8) {
  27.212 +		switch (boot_cpu_data.x86_vendor) {
  27.213 +		case X86_VENDOR_INTEL:
  27.214 +			if (!APIC_XAPIC(ver)) {
  27.215 +				def_to_bigsmp = 0;
  27.216 +				break;
  27.217 +			}
  27.218 +			/* If P4 and above fall through */
  27.219 +		case X86_VENDOR_AMD:
  27.220 +			def_to_bigsmp = 1;
  27.221 +		}
  27.222 +	}
  27.223 +	bios_cpu_apicid[num_processors - 1] = m->mpc_apicid;
  27.224 +}
  27.225 +#else
  27.226 +void __init MP_processor_info (struct mpc_config_processor *m)
  27.227 +{
  27.228 +	num_processors++;
  27.229 +}
  27.230 +#endif /* CONFIG_XEN */
  27.231 +
  27.232 +static void __init MP_bus_info (struct mpc_config_bus *m)
  27.233 +{
  27.234 +	char str[7];
  27.235 +
  27.236 +	memcpy(str, m->mpc_bustype, 6);
  27.237 +	str[6] = 0;
  27.238 +
  27.239 +	mpc_oem_bus_info(m, str, translation_table[mpc_record]);
  27.240 +
  27.241 +	if (m->mpc_busid >= MAX_MP_BUSSES) {
  27.242 +		printk(KERN_WARNING "MP table busid value (%d) for bustype %s "
  27.243 +			" is too large, max. supported is %d\n",
  27.244 +			m->mpc_busid, str, MAX_MP_BUSSES - 1);
  27.245 +		return;
  27.246 +	}
  27.247 +
  27.248 +	if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) {
  27.249 +		mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
  27.250 +	} else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA)-1) == 0) {
  27.251 +		mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA;
  27.252 +	} else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI)-1) == 0) {
  27.253 +		mpc_oem_pci_bus(m, translation_table[mpc_record]);
  27.254 +		mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
  27.255 +		mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id;
  27.256 +		mp_current_pci_id++;
  27.257 +	} else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA)-1) == 0) {
  27.258 +		mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA;
  27.259 +	} else if (strncmp(str, BUSTYPE_NEC98, sizeof(BUSTYPE_NEC98)-1) == 0) {
  27.260 +		mp_bus_id_to_type[m->mpc_busid] = MP_BUS_NEC98;
  27.261 +	} else {
  27.262 +		printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str);
  27.263 +	}
  27.264 +}
  27.265 +
  27.266 +static void __init MP_ioapic_info (struct mpc_config_ioapic *m)
  27.267 +{
  27.268 +	if (!(m->mpc_flags & MPC_APIC_USABLE))
  27.269 +		return;
  27.270 +
  27.271 +	printk(KERN_INFO "I/O APIC #%d Version %d at 0x%lX.\n",
  27.272 +		m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr);
  27.273 +	if (nr_ioapics >= MAX_IO_APICS) {
  27.274 +		printk(KERN_CRIT "Max # of I/O APICs (%d) exceeded (found %d).\n",
  27.275 +			MAX_IO_APICS, nr_ioapics);
  27.276 +		panic("Recompile kernel with bigger MAX_IO_APICS!.\n");
  27.277 +	}
  27.278 +	if (!m->mpc_apicaddr) {
  27.279 +		printk(KERN_ERR "WARNING: bogus zero I/O APIC address"
  27.280 +			" found in MP table, skipping!\n");
  27.281 +		return;
  27.282 +	}
  27.283 +	mp_ioapics[nr_ioapics] = *m;
  27.284 +	nr_ioapics++;
  27.285 +}
  27.286 +
  27.287 +static void __init MP_intsrc_info (struct mpc_config_intsrc *m)
  27.288 +{
  27.289 +	mp_irqs [mp_irq_entries] = *m;
  27.290 +	Dprintk("Int: type %d, pol %d, trig %d, bus %d,"
  27.291 +		" IRQ %02x, APIC ID %x, APIC INT %02x\n",
  27.292 +			m->mpc_irqtype, m->mpc_irqflag & 3,
  27.293 +			(m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
  27.294 +			m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
  27.295 +	if (++mp_irq_entries == MAX_IRQ_SOURCES)
  27.296 +		panic("Max # of irq sources exceeded!!\n");
  27.297 +}
  27.298 +
  27.299 +static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m)
  27.300 +{
  27.301 +	Dprintk("Lint: type %d, pol %d, trig %d, bus %d,"
  27.302 +		" IRQ %02x, APIC ID %x, APIC LINT %02x\n",
  27.303 +			m->mpc_irqtype, m->mpc_irqflag & 3,
  27.304 +			(m->mpc_irqflag >> 2) &3, m->mpc_srcbusid,
  27.305 +			m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
  27.306 +	/*
  27.307 +	 * Well it seems all SMP boards in existence
  27.308 +	 * use ExtINT/LVT1 == LINT0 and
  27.309 +	 * NMI/LVT2 == LINT1 - the following check
  27.310 +	 * will show us if this assumptions is false.
  27.311 +	 * Until then we do not have to add baggage.
  27.312 +	 */
  27.313 +	if ((m->mpc_irqtype == mp_ExtINT) &&
  27.314 +		(m->mpc_destapiclint != 0))
  27.315 +			BUG();
  27.316 +	if ((m->mpc_irqtype == mp_NMI) &&
  27.317 +		(m->mpc_destapiclint != 1))
  27.318 +			BUG();
  27.319 +}
  27.320 +
  27.321 +#ifdef CONFIG_X86_NUMAQ
  27.322 +static void __init MP_translation_info (struct mpc_config_translation *m)
  27.323 +{
  27.324 +	printk(KERN_INFO "Translation: record %d, type %d, quad %d, global %d, local %d\n", mpc_record, m->trans_type, m->trans_quad, m->trans_global, m->trans_local);
  27.325 +
  27.326 +	if (mpc_record >= MAX_MPC_ENTRY) 
  27.327 +		printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n");
  27.328 +	else
  27.329 +		translation_table[mpc_record] = m; /* stash this for later */
  27.330 +	if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad))
  27.331 +		node_set_online(m->trans_quad);
  27.332 +}
  27.333 +
  27.334 +/*
  27.335 + * Read/parse the MPC oem tables
  27.336 + */
  27.337 +
  27.338 +static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable, \
  27.339 +	unsigned short oemsize)
  27.340 +{
  27.341 +	int count = sizeof (*oemtable); /* the header size */
  27.342 +	unsigned char *oemptr = ((unsigned char *)oemtable)+count;
  27.343 +	
  27.344 +	mpc_record = 0;
  27.345 +	printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n", oemtable);
  27.346 +	if (memcmp(oemtable->oem_signature,MPC_OEM_SIGNATURE,4))
  27.347 +	{
  27.348 +		printk(KERN_WARNING "SMP mpc oemtable: bad signature [%c%c%c%c]!\n",
  27.349 +			oemtable->oem_signature[0],
  27.350 +			oemtable->oem_signature[1],
  27.351 +			oemtable->oem_signature[2],
  27.352 +			oemtable->oem_signature[3]);
  27.353 +		return;
  27.354 +	}
  27.355 +	if (mpf_checksum((unsigned char *)oemtable,oemtable->oem_length))
  27.356 +	{
  27.357 +		printk(KERN_WARNING "SMP oem mptable: checksum error!\n");
  27.358 +		return;
  27.359 +	}
  27.360 +	while (count < oemtable->oem_length) {
  27.361 +		switch (*oemptr) {
  27.362 +			case MP_TRANSLATION:
  27.363 +			{
  27.364 +				struct mpc_config_translation *m=
  27.365 +					(struct mpc_config_translation *)oemptr;
  27.366 +				MP_translation_info(m);
  27.367 +				oemptr += sizeof(*m);
  27.368 +				count += sizeof(*m);
  27.369 +				++mpc_record;
  27.370 +				break;
  27.371 +			}
  27.372 +			default:
  27.373 +			{
  27.374 +				printk(KERN_WARNING "Unrecognised OEM table entry type! - %d\n", (int) *oemptr);
  27.375 +				return;
  27.376 +			}
  27.377 +		}
  27.378 +       }
  27.379 +}
  27.380 +
  27.381 +static inline void mps_oem_check(struct mp_config_table *mpc, char *oem,
  27.382 +		char *productid)
  27.383 +{
  27.384 +	if (strncmp(oem, "IBM NUMA", 8))
  27.385 +		printk("Warning!  May not be a NUMA-Q system!\n");
  27.386 +	if (mpc->mpc_oemptr)
  27.387 +		smp_read_mpc_oem((struct mp_config_oemtable *) mpc->mpc_oemptr,
  27.388 +				mpc->mpc_oemsize);
  27.389 +}
  27.390 +#endif	/* CONFIG_X86_NUMAQ */
  27.391 +
  27.392 +/*
  27.393 + * Read/parse the MPC
  27.394 + */
  27.395 +
  27.396 +static int __init smp_read_mpc(struct mp_config_table *mpc)
  27.397 +{
  27.398 +	char str[16];
  27.399 +	char oem[10];
  27.400 +	int count=sizeof(*mpc);
  27.401 +	unsigned char *mpt=((unsigned char *)mpc)+count;
  27.402 +
  27.403 +	if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) {
  27.404 +		printk(KERN_ERR "SMP mptable: bad signature [0x%x]!\n",
  27.405 +			*(u32 *)mpc->mpc_signature);
  27.406 +		return 0;
  27.407 +	}
  27.408 +	if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) {
  27.409 +		printk(KERN_ERR "SMP mptable: checksum error!\n");
  27.410 +		return 0;
  27.411 +	}
  27.412 +	if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) {
  27.413 +		printk(KERN_ERR "SMP mptable: bad table version (%d)!!\n",
  27.414 +			mpc->mpc_spec);
  27.415 +		return 0;
  27.416 +	}
  27.417 +	if (!mpc->mpc_lapic) {
  27.418 +		printk(KERN_ERR "SMP mptable: null local APIC address!\n");
  27.419 +		return 0;
  27.420 +	}
  27.421 +	memcpy(oem,mpc->mpc_oem,8);
  27.422 +	oem[8]=0;
  27.423 +	printk(KERN_INFO "OEM ID: %s ",oem);
  27.424 +
  27.425 +	memcpy(str,mpc->mpc_productid,12);
  27.426 +	str[12]=0;
  27.427 +	printk("Product ID: %s ",str);
  27.428 +
  27.429 +	mps_oem_check(mpc, oem, str);
  27.430 +
  27.431 +	printk("APIC at: 0x%lX\n",mpc->mpc_lapic);
  27.432 +
  27.433 +	/* 
  27.434 +	 * Save the local APIC address (it might be non-default) -- but only
  27.435 +	 * if we're not using ACPI.
  27.436 +	 */
  27.437 +	if (!acpi_lapic)
  27.438 +		mp_lapic_addr = mpc->mpc_lapic;
  27.439 +
  27.440 +	/*
  27.441 +	 *	Now process the configuration blocks.
  27.442 +	 */
  27.443 +	mpc_record = 0;
  27.444 +	while (count < mpc->mpc_length) {
  27.445 +		switch(*mpt) {
  27.446 +			case MP_PROCESSOR:
  27.447 +			{
  27.448 +				struct mpc_config_processor *m=
  27.449 +					(struct mpc_config_processor *)mpt;
  27.450 +				/* ACPI may have already provided this data */
  27.451 +				if (!acpi_lapic)
  27.452 +					MP_processor_info(m);
  27.453 +				mpt += sizeof(*m);
  27.454 +				count += sizeof(*m);
  27.455 +				break;
  27.456 +			}
  27.457 +			case MP_BUS:
  27.458 +			{
  27.459 +				struct mpc_config_bus *m=
  27.460 +					(struct mpc_config_bus *)mpt;
  27.461 +				MP_bus_info(m);
  27.462 +				mpt += sizeof(*m);
  27.463 +				count += sizeof(*m);
  27.464 +				break;
  27.465 +			}
  27.466 +			case MP_IOAPIC:
  27.467 +			{
  27.468 +				struct mpc_config_ioapic *m=
  27.469 +					(struct mpc_config_ioapic *)mpt;
  27.470 +				MP_ioapic_info(m);
  27.471 +				mpt+=sizeof(*m);
  27.472 +				count+=sizeof(*m);
  27.473 +				break;
  27.474 +			}
  27.475 +			case MP_INTSRC:
  27.476 +			{
  27.477 +				struct mpc_config_intsrc *m=
  27.478 +					(struct mpc_config_intsrc *)mpt;
  27.479 +
  27.480 +				MP_intsrc_info(m);
  27.481 +				mpt+=sizeof(*m);
  27.482 +				count+=sizeof(*m);
  27.483 +				break;
  27.484 +			}
  27.485 +			case MP_LINTSRC:
  27.486 +			{
  27.487 +				struct mpc_config_lintsrc *m=
  27.488 +					(struct mpc_config_lintsrc *)mpt;
  27.489 +				MP_lintsrc_info(m);
  27.490 +				mpt+=sizeof(*m);
  27.491 +				count+=sizeof(*m);
  27.492 +				break;
  27.493 +			}
  27.494 +			default:
  27.495 +			{
  27.496 +				count = mpc->mpc_length;
  27.497 +				break;
  27.498 +			}
  27.499 +		}
  27.500 +		++mpc_record;
  27.501 +	}
  27.502 +	clustered_apic_check();
  27.503 +	if (!num_processors)
  27.504 +		printk(KERN_ERR "SMP mptable: no processors registered!\n");
  27.505 +	return num_processors;
  27.506 +}
  27.507 +
  27.508 +static int __init ELCR_trigger(unsigned int irq)
  27.509 +{
  27.510 +	unsigned int port;
  27.511 +
  27.512 +	port = 0x4d0 + (irq >> 3);
  27.513 +	return (inb(port) >> (irq & 7)) & 1;
  27.514 +}
  27.515 +
  27.516 +static void __init construct_default_ioirq_mptable(int mpc_default_type)
  27.517 +{
  27.518 +	struct mpc_config_intsrc intsrc;
  27.519 +	int i;
  27.520 +	int ELCR_fallback = 0;
  27.521 +
  27.522 +	intsrc.mpc_type = MP_INTSRC;
  27.523 +	intsrc.mpc_irqflag = 0;			/* conforming */
  27.524 +	intsrc.mpc_srcbus = 0;
  27.525 +	intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid;
  27.526 +
  27.527 +	intsrc.mpc_irqtype = mp_INT;
  27.528 +
  27.529 +	/*
  27.530 +	 *  If true, we have an ISA/PCI system with no IRQ entries
  27.531 +	 *  in the MP table. To prevent the PCI interrupts from being set up
  27.532 +	 *  incorrectly, we try to use the ELCR. The sanity check to see if
  27.533 +	 *  there is good ELCR data is very simple - IRQ0, 1, 2 and 13 can
  27.534 +	 *  never be level sensitive, so we simply see if the ELCR agrees.
  27.535 +	 *  If it does, we assume it's valid.
  27.536 +	 */
  27.537 +	if (mpc_default_type == 5) {
  27.538 +		printk(KERN_INFO "ISA/PCI bus type with no IRQ information... falling back to ELCR\n");
  27.539 +
  27.540 +		if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) || ELCR_trigger(13))
  27.541 +			printk(KERN_WARNING "ELCR contains invalid data... not using ELCR\n");
  27.542 +		else {
  27.543 +			printk(KERN_INFO "Using ELCR to identify PCI interrupts\n");
  27.544 +			ELCR_fallback = 1;
  27.545 +		}
  27.546 +	}
  27.547 +
  27.548 +	for (i = 0; i < 16; i++) {
  27.549 +		switch (mpc_default_type) {
  27.550 +		case 2:
  27.551 +			if (i == 0 || i == 13)
  27.552 +				continue;	/* IRQ0 & IRQ13 not connected */
  27.553 +			/* fall through */
  27.554 +		default:
  27.555 +			if (i == 2)
  27.556 +				continue;	/* IRQ2 is never connected */
  27.557 +		}
  27.558 +
  27.559 +		if (ELCR_fallback) {
  27.560 +			/*
  27.561 +			 *  If the ELCR indicates a level-sensitive interrupt, we
  27.562 +			 *  copy that information over to the MP table in the
  27.563 +			 *  irqflag field (level sensitive, active high polarity).
  27.564 +			 */
  27.565 +			if (ELCR_trigger(i))
  27.566 +				intsrc.mpc_irqflag = 13;
  27.567 +			else
  27.568 +				intsrc.mpc_irqflag = 0;
  27.569 +		}
  27.570 +
  27.571 +		intsrc.mpc_srcbusirq = i;
  27.572 +		intsrc.mpc_dstirq = i ? i : 2;		/* IRQ0 to INTIN2 */
  27.573 +		MP_intsrc_info(&intsrc);
  27.574 +	}
  27.575 +
  27.576 +	intsrc.mpc_irqtype = mp_ExtINT;
  27.577 +	intsrc.mpc_srcbusirq = 0;
  27.578 +	intsrc.mpc_dstirq = 0;				/* 8259A to INTIN0 */
  27.579 +	MP_intsrc_info(&intsrc);
  27.580 +}
  27.581 +
  27.582 +static inline void __init construct_default_ISA_mptable(int mpc_default_type)
  27.583 +{
  27.584 +	struct mpc_config_processor processor;
  27.585 +	struct mpc_config_bus bus;
  27.586 +	struct mpc_config_ioapic ioapic;
  27.587 +	struct mpc_config_lintsrc lintsrc;
  27.588 +	int linttypes[2] = { mp_ExtINT, mp_NMI };
  27.589 +	int i;
  27.590 +
  27.591 +	/*
  27.592 +	 * local APIC has default address
  27.593 +	 */
  27.594 +	mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
  27.595 +
  27.596 +	/*
  27.597 +	 * 2 CPUs, numbered 0 & 1.
  27.598 +	 */
  27.599 +	processor.mpc_type = MP_PROCESSOR;
  27.600 +	/* Either an integrated APIC or a discrete 82489DX. */
  27.601 +	processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
  27.602 +	processor.mpc_cpuflag = CPU_ENABLED;
  27.603 +	processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
  27.604 +				   (boot_cpu_data.x86_model << 4) |
  27.605 +				   boot_cpu_data.x86_mask;
  27.606 +	processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
  27.607 +	processor.mpc_reserved[0] = 0;
  27.608 +	processor.mpc_reserved[1] = 0;
  27.609 +	for (i = 0; i < 2; i++) {
  27.610 +		processor.mpc_apicid = i;
  27.611 +		MP_processor_info(&processor);
  27.612 +	}
  27.613 +
  27.614 +	bus.mpc_type = MP_BUS;
  27.615 +	bus.mpc_busid = 0;
  27.616 +	switch (mpc_default_type) {
  27.617 +		default:
  27.618 +			printk("???\n");
  27.619 +			printk(KERN_ERR "Unknown standard configuration %d\n",
  27.620 +				mpc_default_type);
  27.621 +			/* fall through */
  27.622 +		case 1:
  27.623 +		case 5:
  27.624 +			memcpy(bus.mpc_bustype, "ISA   ", 6);
  27.625 +			break;
  27.626 +		case 2:
  27.627 +		case 6:
  27.628 +		case 3:
  27.629 +			memcpy(bus.mpc_bustype, "EISA  ", 6);
  27.630 +			break;
  27.631 +		case 4:
  27.632 +		case 7:
  27.633 +			memcpy(bus.mpc_bustype, "MCA   ", 6);
  27.634 +	}
  27.635 +	MP_bus_info(&bus);
  27.636 +	if (mpc_default_type > 4) {
  27.637 +		bus.mpc_busid = 1;
  27.638 +		memcpy(bus.mpc_bustype, "PCI   ", 6);
  27.639 +		MP_bus_info(&bus);
  27.640 +	}
  27.641 +
  27.642 +	ioapic.mpc_type = MP_IOAPIC;
  27.643 +	ioapic.mpc_apicid = 2;
  27.644 +	ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
  27.645 +	ioapic.mpc_flags = MPC_APIC_USABLE;
  27.646 +	ioapic.mpc_apicaddr = 0xFEC00000;
  27.647 +	MP_ioapic_info(&ioapic);
  27.648 +
  27.649 +	/*
  27.650 +	 * We set up most of the low 16 IO-APIC pins according to MPS rules.
  27.651 +	 */
  27.652 +	construct_default_ioirq_mptable(mpc_default_type);
  27.653 +
  27.654 +	lintsrc.mpc_type = MP_LINTSRC;
  27.655 +	lintsrc.mpc_irqflag = 0;		/* conforming */
  27.656 +	lintsrc.mpc_srcbusid = 0;
  27.657 +	lintsrc.mpc_srcbusirq = 0;
  27.658 +	lintsrc.mpc_destapic = MP_APIC_ALL;
  27.659 +	for (i = 0; i < 2; i++) {
  27.660 +		lintsrc.mpc_irqtype = linttypes[i];
  27.661 +		lintsrc.mpc_destapiclint = i;
  27.662 +		MP_lintsrc_info(&lintsrc);
  27.663 +	}
  27.664 +}
  27.665 +
  27.666 +static struct intel_mp_floating *mpf_found;
  27.667 +
  27.668 +/*
  27.669 + * Scan the memory blocks for an SMP configuration block.
  27.670 + */
  27.671 +void __init get_smp_config (void)
  27.672 +{
  27.673 +	struct intel_mp_floating *mpf = mpf_found;
  27.674 +
  27.675 +	/*
  27.676 +	 * ACPI supports both logical (e.g. Hyper-Threading) and physical 
  27.677 +	 * processors, where MPS only supports physical.
  27.678 +	 */
  27.679 +	if (acpi_lapic && acpi_ioapic) {
  27.680 +		printk(KERN_INFO "Using ACPI (MADT) for SMP configuration information\n");
  27.681 +		return;
  27.682 +	}
  27.683 +	else if (acpi_lapic)
  27.684 +		printk(KERN_INFO "Using ACPI for processor (LAPIC) configuration information\n");
  27.685 +
  27.686 +	printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification);
  27.687 +	if (mpf->mpf_feature2 & (1<<7)) {
  27.688 +		printk(KERN_INFO "    IMCR and PIC compatibility mode.\n");
  27.689 +		pic_mode = 1;
  27.690 +	} else {
  27.691 +		printk(KERN_INFO "    Virtual Wire compatibility mode.\n");
  27.692 +		pic_mode = 0;
  27.693 +	}
  27.694 +
  27.695 +	/*
  27.696 +	 * Now see if we need to read further.
  27.697 +	 */
  27.698 +	if (mpf->mpf_feature1 != 0) {
  27.699 +
  27.700 +		printk(KERN_INFO "Default MP configuration #%d\n", mpf->mpf_feature1);
  27.701 +		construct_default_ISA_mptable(mpf->mpf_feature1);
  27.702 +
  27.703 +	} else if (mpf->mpf_physptr) {
  27.704 +
  27.705 +		/*
  27.706 +		 * Read the physical hardware table.  Anything here will
  27.707 +		 * override the defaults.
  27.708 +		 */
  27.709 +		if (!smp_read_mpc(isa_bus_to_virt(mpf->mpf_physptr))) {
  27.710 +			smp_found_config = 0;
  27.711 +			printk(KERN_ERR "BIOS bug, MP table errors detected!...\n");
  27.712 +			printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n");
  27.713 +			return;
  27.714 +		}
  27.715 +		/*
  27.716 +		 * If there are no explicit MP IRQ entries, then we are
  27.717 +		 * broken.  We set up most of the low 16 IO-APIC pins to
  27.718 +		 * ISA defaults and hope it will work.
  27.719 +		 */
  27.720 +		if (!mp_irq_entries) {
  27.721 +			struct mpc_config_bus bus;
  27.722 +
  27.723 +			printk(KERN_ERR "BIOS bug, no explicit IRQ entries, using default mptable. (tell your hw vendor)\n");
  27.724 +
  27.725 +			bus.mpc_type = MP_BUS;
  27.726 +			bus.mpc_busid = 0;
  27.727 +			memcpy(bus.mpc_bustype, "ISA   ", 6);
  27.728 +			MP_bus_info(&bus);
  27.729 +
  27.730 +			construct_default_ioirq_mptable(0);
  27.731 +		}
  27.732 +
  27.733 +	} else
  27.734 +		BUG();
  27.735 +
  27.736 +	printk(KERN_INFO "Processors: %d\n", num_processors);
  27.737 +	/*
  27.738 +	 * Only use the first configuration found.
  27.739 +	 */
  27.740 +}
  27.741 +
  27.742 +static int __init smp_scan_config (unsigned long base, unsigned long length)
  27.743 +{
  27.744 +	unsigned long *bp = isa_bus_to_virt(base);
  27.745 +	struct intel_mp_floating *mpf;
  27.746 +
  27.747 +	Dprintk("Scan SMP from %p for %ld bytes.\n", bp,length);
  27.748 +	if (sizeof(*mpf) != 16)
  27.749 +		printk("Error: MPF size\n");
  27.750 +
  27.751 +	while (length > 0) {
  27.752 +		mpf = (struct intel_mp_floating *)bp;
  27.753 +		if ((*bp == SMP_MAGIC_IDENT) &&
  27.754 +			(mpf->mpf_length == 1) &&
  27.755 +			!mpf_checksum((unsigned char *)bp, 16) &&
  27.756 +			((mpf->mpf_specification == 1)
  27.757 +				|| (mpf->mpf_specification == 4)) ) {
  27.758 +
  27.759 +			smp_found_config = 1;
  27.760 +#ifndef CONFIG_XEN
  27.761 +			printk(KERN_INFO "found SMP MP-table at %08lx\n",
  27.762 +						virt_to_phys(mpf));
  27.763 +			reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE);
  27.764 +			if (mpf->mpf_physptr) {
  27.765 +				/*
  27.766 +				 * We cannot access to MPC table to compute
  27.767 +				 * table size yet, as only few megabytes from
  27.768 +				 * the bottom is mapped now.
  27.769 +				 * PC-9800's MPC table places on the very last
  27.770 +				 * of physical memory; so that simply reserving
  27.771 +				 * PAGE_SIZE from mpg->mpf_physptr yields BUG()
  27.772 +				 * in reserve_bootmem.
  27.773 +				 */
  27.774 +				unsigned long size = PAGE_SIZE;
  27.775 +				unsigned long end = max_low_pfn * PAGE_SIZE;
  27.776 +				if (mpf->mpf_physptr + size > end)
  27.777 +					size = end - mpf->mpf_physptr;
  27.778 +				reserve_bootmem(mpf->mpf_physptr, size);
  27.779 +			}
  27.780 +#else
  27.781 +			printk(KERN_INFO "found SMP MP-table at %08lx\n",
  27.782 +				((unsigned long)bp - (unsigned long)isa_bus_to_virt(base)) + base);
  27.783 +#endif
  27.784 +
  27.785 +			mpf_found = mpf;
  27.786 +			return 1;
  27.787 +		}
  27.788 +		bp += 4;
  27.789 +		length -= 16;
  27.790 +	}
  27.791 +	return 0;
  27.792 +}
  27.793 +
  27.794 +void __init find_smp_config (void)
  27.795 +{
  27.796 +#ifndef CONFIG_XEN
  27.797 +	unsigned int address;
  27.798 +#endif
  27.799 +
  27.800 +	/*
  27.801 +	 * FIXME: Linux assumes you have 640K of base ram..
  27.802 +	 * this continues the error...
  27.803 +	 *
  27.804 +	 * 1) Scan the bottom 1K for a signature
  27.805 +	 * 2) Scan the top 1K of base RAM
  27.806 +	 * 3) Scan the 64K of bios
  27.807 +	 */
  27.808 +	if (smp_scan_config(0x0,0x400) ||
  27.809 +		smp_scan_config(639*0x400,0x400) ||
  27.810 +			smp_scan_config(0xF0000,0x10000))
  27.811 +		return;
  27.812 +	/*
  27.813 +	 * If it is an SMP machine we should know now, unless the
  27.814 +	 * configuration is in an EISA/MCA bus machine with an
  27.815 +	 * extended bios data area.
  27.816 +	 *
  27.817 +	 * there is a real-mode segmented pointer pointing to the
  27.818 +	 * 4K EBDA area at 0x40E, calculate and scan it here.
  27.819 +	 *
  27.820 +	 * NOTE! There are Linux loaders that will corrupt the EBDA
  27.821 +	 * area, and as such this kind of SMP config may be less
  27.822 +	 * trustworthy, simply because the SMP table may have been
  27.823 +	 * stomped on during early boot. These loaders are buggy and
  27.824 +	 * should be fixed.
  27.825 +	 *
  27.826 +	 * MP1.4 SPEC states to only scan first 1K of 4K EBDA.
  27.827 +	 */
  27.828 +
  27.829 +#ifndef CONFIG_XEN
  27.830 +	address = get_bios_ebda();
  27.831 +	if (address)
  27.832 +		smp_scan_config(address, 0x400);
  27.833 +#endif
  27.834 +}
  27.835 +
  27.836 +int es7000_plat;
  27.837 +
  27.838 +/* --------------------------------------------------------------------------
  27.839 +                            ACPI-based MP Configuration
  27.840 +   -------------------------------------------------------------------------- */
  27.841 +
  27.842 +#ifdef CONFIG_ACPI
  27.843 +
  27.844 +void __init mp_register_lapic_address (
  27.845 +	u64			address)
  27.846 +{
  27.847 +#ifndef CONFIG_XEN
  27.848 +	mp_lapic_addr = (unsigned long) address;
  27.849 +
  27.850 +	set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
  27.851 +
  27.852 +	if (boot_cpu_physical_apicid == -1U)
  27.853 +		boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
  27.854 +
  27.855 +	Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid);
  27.856 +#endif
  27.857 +}
  27.858 +
  27.859 +
  27.860 +void __devinit mp_register_lapic (
  27.861 +	u8			id, 
  27.862 +	u8			enabled)
  27.863 +{
  27.864 +	struct mpc_config_processor processor;
  27.865 +	int			boot_cpu = 0;
  27.866 +	
  27.867 +	if (MAX_APICS - id <= 0) {
  27.868 +		printk(KERN_WARNING "Processor #%d invalid (max %d)\n",
  27.869 +			id, MAX_APICS);
  27.870 +		return;
  27.871 +	}
  27.872 +
  27.873 +	if (id == boot_cpu_physical_apicid)
  27.874 +		boot_cpu = 1;
  27.875 +
  27.876 +#ifndef CONFIG_XEN
  27.877 +	processor.mpc_type = MP_PROCESSOR;
  27.878 +	processor.mpc_apicid = id;
  27.879 +	processor.mpc_apicver = GET_APIC_VERSION(apic_read(APIC_LVR));
  27.880 +	processor.mpc_cpuflag = (enabled ? CPU_ENABLED : 0);
  27.881 +	processor.mpc_cpuflag |= (boot_cpu ? CPU_BOOTPROCESSOR : 0);
  27.882 +	processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | 
  27.883 +		(boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask;
  27.884 +	processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
  27.885 +	processor.mpc_reserved[0] = 0;
  27.886 +	processor.mpc_reserved[1] = 0;
  27.887 +#endif
  27.888 +
  27.889 +	MP_processor_info(&processor);
  27.890 +}
  27.891 +
  27.892 +#ifdef	CONFIG_X86_IO_APIC
  27.893 +
  27.894 +#define MP_ISA_BUS		0
  27.895 +#define MP_MAX_IOAPIC_PIN	127
  27.896 +
  27.897 +static struct mp_ioapic_routing {
  27.898 +	int			apic_id;
  27.899 +	int			gsi_base;
  27.900 +	int			gsi_end;
  27.901 +	u32			pin_programmed[4];
  27.902 +} mp_ioapic_routing[MAX_IO_APICS];
  27.903 +
  27.904 +
  27.905 +static int mp_find_ioapic (
  27.906 +	int			gsi)
  27.907 +{
  27.908 +	int			i = 0;
  27.909 +
  27.910 +	/* Find the IOAPIC that manages this GSI. */
  27.911 +	for (i = 0; i < nr_ioapics; i++) {
  27.912 +		if ((gsi >= mp_ioapic_routing[i].gsi_base)
  27.913 +			&& (gsi <= mp_ioapic_routing[i].gsi_end))
  27.914 +			return i;
  27.915 +	}
  27.916 +
  27.917 +	printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
  27.918 +
  27.919 +	return -1;
  27.920 +}
  27.921 +	
  27.922 +
  27.923 +void __init mp_register_ioapic (
  27.924 +	u8			id, 
  27.925 +	u32			address,
  27.926 +	u32			gsi_base)
  27.927 +{
  27.928 +	int			idx = 0;
  27.929 +	int			tmpid;
  27.930 +
  27.931 +	if (nr_ioapics >= MAX_IO_APICS) {
  27.932 +		printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
  27.933 +			"(found %d)\n", MAX_IO_APICS, nr_ioapics);
  27.934 +		panic("Recompile kernel with bigger MAX_IO_APICS!\n");
  27.935 +	}
  27.936 +	if (!address) {
  27.937 +		printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address"
  27.938 +			" found in MADT table, skipping!\n");
  27.939 +		return;
  27.940 +	}
  27.941 +
  27.942 +	idx = nr_ioapics++;
  27.943 +
  27.944 +	mp_ioapics[idx].mpc_type = MP_IOAPIC;
  27.945 +	mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE;
  27.946 +	mp_ioapics[idx].mpc_apicaddr = address;
  27.947 +
  27.948 +#ifndef CONFIG_XEN
  27.949 +	set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
  27.950 +#endif
  27.951 +	if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
  27.952 +		&& !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
  27.953 +		tmpid = io_apic_get_unique_id(idx, id);
  27.954 +	else
  27.955 +		tmpid = id;
  27.956 +	if (tmpid == -1) {
  27.957 +		nr_ioapics--;
  27.958 +		return;
  27.959 +	}
  27.960 +	mp_ioapics[idx].mpc_apicid = tmpid;
  27.961 +	mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx);
  27.962 +	
  27.963 +	/* 
  27.964 +	 * Build basic GSI lookup table to facilitate gsi->io_apic lookups
  27.965 +	 * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
  27.966 +	 */
  27.967 +	mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid;
  27.968 +	mp_ioapic_routing[idx].gsi_base = gsi_base;
  27.969 +	mp_ioapic_routing[idx].gsi_end = gsi_base + 
  27.970 +		io_apic_get_redir_entries(idx);
  27.971 +
  27.972 +	printk("IOAPIC[%d]: apic_id %d, version %d, address 0x%lx, "
  27.973 +		"GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid, 
  27.974 +		mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr,
  27.975 +		mp_ioapic_routing[idx].gsi_base,
  27.976 +		mp_ioapic_routing[idx].gsi_end);
  27.977 +
  27.978 +	return;
  27.979 +}
  27.980 +
  27.981 +
  27.982 +void __init mp_override_legacy_irq (
  27.983 +	u8			bus_irq,
  27.984 +	u8			polarity, 
  27.985 +	u8			trigger, 
  27.986 +	u32			gsi)
  27.987 +{
  27.988 +	struct mpc_config_intsrc intsrc;
  27.989 +	int			ioapic = -1;
  27.990 +	int			pin = -1;
  27.991 +
  27.992 +	/* 
  27.993 +	 * Convert 'gsi' to 'ioapic.pin'.
  27.994 +	 */
  27.995 +	ioapic = mp_find_ioapic(gsi);
  27.996 +	if (ioapic < 0)
  27.997 +		return;
  27.998 +	pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
  27.999 +
 27.1000 +	/*
 27.1001 +	 * TBD: This check is for faulty timer entries, where the override
 27.1002 +	 *      erroneously sets the trigger to level, resulting in a HUGE 
 27.1003 +	 *      increase of timer interrupts!
 27.1004 +	 */
 27.1005 +	if ((bus_irq == 0) && (trigger == 3))
 27.1006 +		trigger = 1;
 27.1007 +
 27.1008 +	intsrc.mpc_type = MP_INTSRC;
 27.1009 +	intsrc.mpc_irqtype = mp_INT;
 27.1010 +	intsrc.mpc_irqflag = (trigger << 2) | polarity;
 27.1011 +	intsrc.mpc_srcbus = MP_ISA_BUS;
 27.1012 +	intsrc.mpc_srcbusirq = bus_irq;				       /* IRQ */
 27.1013 +	intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid;	   /* APIC ID */
 27.1014 +	intsrc.mpc_dstirq = pin;				    /* INTIN# */
 27.1015 +
 27.1016 +	Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, %d-%d\n",
 27.1017 +		intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, 
 27.1018 +		(intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, 
 27.1019 +		intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, intsrc.mpc_dstirq);
 27.1020 +
 27.1021 +	mp_irqs[mp_irq_entries] = intsrc;
 27.1022 +	if (++mp_irq_entries == MAX_IRQ_SOURCES)
 27.1023 +		panic("Max # of irq sources exceeded!\n");
 27.1024 +
 27.1025 +	return;
 27.1026 +}
 27.1027 +
 27.1028 +void __init mp_config_acpi_legacy_irqs (void)
 27.1029 +{
 27.1030 +	struct mpc_config_intsrc intsrc;
 27.1031 +	int			i = 0;
 27.1032 +	int			ioapic = -1;
 27.1033 +
 27.1034 +	/* 
 27.1035 +	 * Fabricate the legacy ISA bus (bus #31).
 27.1036 +	 */
 27.1037 +	mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA;
 27.1038 +	Dprintk("Bus #%d is ISA\n", MP_ISA_BUS);
 27.1039 +
 27.1040 +	/*
 27.1041 +	 * Older generations of ES7000 have no legacy identity mappings
 27.1042 +	 */
 27.1043 +	if (es7000_plat == 1)
 27.1044 +		return;
 27.1045 +
 27.1046 +	/* 
 27.1047 +	 * Locate the IOAPIC that manages the ISA IRQs (0-15). 
 27.1048 +	 */
 27.1049 +	ioapic = mp_find_ioapic(0);
 27.1050 +	if (ioapic < 0)
 27.1051 +		return;
 27.1052 +
 27.1053 +	intsrc.mpc_type = MP_INTSRC;
 27.1054 +	intsrc.mpc_irqflag = 0;					/* Conforming */
 27.1055 +	intsrc.mpc_srcbus = MP_ISA_BUS;
 27.1056 +	intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid;
 27.1057 +
 27.1058 +	/* 
 27.1059 +	 * Use the default configuration for the IRQs 0-15.  Unless
 27.1060 +	 * overriden by (MADT) interrupt source override entries.
 27.1061 +	 */
 27.1062 +	for (i = 0; i < 16; i++) {
 27.1063 +		int idx;
 27.1064 +
 27.1065 +		for (idx = 0; idx < mp_irq_entries; idx++) {
 27.1066 +			struct mpc_config_intsrc *irq = mp_irqs + idx;
 27.1067 +
 27.1068 +			/* Do we already have a mapping for this ISA IRQ? */
 27.1069 +			if (irq->mpc_srcbus == MP_ISA_BUS && irq->mpc_srcbusirq == i)
 27.1070 +				break;
 27.1071 +
 27.1072 +			/* Do we already have a mapping for this IOAPIC pin */
 27.1073 +			if ((irq->mpc_dstapic == intsrc.mpc_dstapic) &&
 27.1074 +				(irq->mpc_dstirq == i))
 27.1075 +				break;
 27.1076 +		}
 27.1077 +
 27.1078 +		if (idx != mp_irq_entries) {
 27.1079 +			printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i);
 27.1080 +			continue;			/* IRQ already used */
 27.1081 +		}
 27.1082 +
 27.1083 +		intsrc.mpc_irqtype = mp_INT;
 27.1084 +		intsrc.mpc_srcbusirq = i;		   /* Identity mapped */
 27.1085 +		intsrc.mpc_dstirq = i;
 27.1086 +
 27.1087 +		Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, "
 27.1088 +			"%d-%d\n", intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, 
 27.1089 +			(intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, 
 27.1090 +			intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, 
 27.1091 +			intsrc.mpc_dstirq);
 27.1092 +
 27.1093 +		mp_irqs[mp_irq_entries] = intsrc;
 27.1094 +		if (++mp_irq_entries == MAX_IRQ_SOURCES)
 27.1095 +			panic("Max # of irq sources exceeded!\n");
 27.1096 +	}
 27.1097 +}
 27.1098 +
 27.1099 +#define MAX_GSI_NUM	4096
 27.1100 +
 27.1101 +int mp_register_gsi (u32 gsi, int triggering, int polarity)
 27.1102 +{
 27.1103 +	int			ioapic = -1;
 27.1104 +	int			ioapic_pin = 0;
 27.1105 +	int			idx, bit = 0;
 27.1106 +	static int		pci_irq = 16;
 27.1107 +	/*
 27.1108 +	 * Mapping between Global System Interrups, which
 27.1109 +	 * represent all possible interrupts, and IRQs
 27.1110 +	 * assigned to actual devices.
 27.1111 +	 */
 27.1112 +	static int		gsi_to_irq[MAX_GSI_NUM];
 27.1113 +
 27.1114 +	/* Don't set up the ACPI SCI because it's already set up */
 27.1115 +	if (acpi_fadt.sci_int == gsi)
 27.1116 +		return gsi;
 27.1117 +
 27.1118 +	ioapic = mp_find_ioapic(gsi);
 27.1119 +	if (ioapic < 0) {
 27.1120 +		printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi);
 27.1121 +		return gsi;
 27.1122 +	}
 27.1123 +
 27.1124 +	ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
 27.1125 +
 27.1126 +	if (ioapic_renumber_irq)
 27.1127 +		gsi = ioapic_renumber_irq(ioapic, gsi);
 27.1128 +
 27.1129 +	/* 
 27.1130 +	 * Avoid pin reprogramming.  PRTs typically include entries  
 27.1131 +	 * with redundant pin->gsi mappings (but unique PCI devices);
 27.1132 +	 * we only program the IOAPIC on the first.
 27.1133 +	 */
 27.1134 +	bit = ioapic_pin % 32;
 27.1135 +	idx = (ioapic_pin < 32) ? 0 : (ioapic_pin / 32);
 27.1136 +	if (idx > 3) {
 27.1137 +		printk(KERN_ERR "Invalid reference to IOAPIC pin "
 27.1138 +			"%d-%d\n", mp_ioapic_routing[ioapic].apic_id, 
 27.1139 +			ioapic_pin);
 27.1140 +		return gsi;
 27.1141 +	}
 27.1142 +	if ((1<<bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) {
 27.1143 +		Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n",
 27.1144 +			mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
 27.1145 +		return gsi_to_irq[gsi];
 27.1146 +	}
 27.1147 +
 27.1148 +	mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit);
 27.1149 +
 27.1150 +	if (triggering == ACPI_LEVEL_SENSITIVE) {
 27.1151 +		/*
 27.1152 +		 * For PCI devices assign IRQs in order, avoiding gaps
 27.1153 +		 * due to unused I/O APIC pins.
 27.1154 +		 */
 27.1155 +		int irq = gsi;
 27.1156 +		if (gsi < MAX_GSI_NUM) {
 27.1157 +			/*
 27.1158 +			 * Retain the VIA chipset work-around (gsi > 15), but
 27.1159 +			 * avoid a problem where the 8254 timer (IRQ0) is setup
 27.1160 +			 * via an override (so it's not on pin 0 of the ioapic),
 27.1161 +			 * and at the same time, the pin 0 interrupt is a PCI
 27.1162 +			 * type.  The gsi > 15 test could cause these two pins
 27.1163 +			 * to be shared as IRQ0, and they are not shareable.
 27.1164 +			 * So test for this condition, and if necessary, avoid
 27.1165 +			 * the pin collision.
 27.1166 +			 */
 27.1167 +			if (gsi > 15 || (gsi == 0 && !timer_uses_ioapic_pin_0))
 27.1168 +				gsi = pci_irq++;
 27.1169 +			/*
 27.1170 +			 * Don't assign IRQ used by ACPI SCI
 27.1171 +			 */
 27.1172 +			if (gsi == acpi_fadt.sci_int)
 27.1173 +				gsi = pci_irq++;
 27.1174 +			gsi_to_irq[irq] = gsi;
 27.1175 +		} else {
 27.1176 +			printk(KERN_ERR "GSI %u is too high\n", gsi);
 27.1177 +			return gsi;
 27.1178 +		}
 27.1179 +	}
 27.1180 +
 27.1181 +	io_apic_set_pci_routing(ioapic, ioapic_pin, gsi,
 27.1182 +		    triggering == ACPI_EDGE_SENSITIVE ? 0 : 1,
 27.1183 +		    polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
 27.1184 +	return gsi;
 27.1185 +}
 27.1186 +
 27.1187 +#endif /* CONFIG_X86_IO_APIC */
 27.1188 +#endif /* CONFIG_ACPI */
    28.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    28.2 +++ b/arch/i386/kernel/pci-dma-xen.c	Mon Jun 04 10:05:28 2007 +0100
    28.3 @@ -0,0 +1,378 @@
    28.4 +/*
    28.5 + * Dynamic DMA mapping support.
    28.6 + *
    28.7 + * On i386 there is no hardware dynamic DMA address translation,
    28.8 + * so consistent alloc/free are merely page allocation/freeing.
    28.9 + * The rest of the dynamic DMA mapping interface is implemented
   28.10 + * in asm/pci.h.
   28.11 + */
   28.12 +
   28.13 +#include <linux/types.h>
   28.14 +#include <linux/mm.h>
   28.15 +#include <linux/string.h>
   28.16 +#include <linux/pci.h>
   28.17 +#include <linux/module.h>
   28.18 +#include <linux/version.h>
   28.19 +#include <asm/io.h>
   28.20 +#include <xen/balloon.h>
   28.21 +#include <xen/gnttab.h>
   28.22 +#include <asm/swiotlb.h>
   28.23 +#include <asm/tlbflush.h>
   28.24 +#include <asm-i386/mach-xen/asm/swiotlb.h>
   28.25 +#include <asm/bug.h>
   28.26 +
   28.27 +#ifdef __x86_64__
   28.28 +#include <asm/proto.h>
   28.29 +
   28.30 +int iommu_merge __read_mostly = 0;
   28.31 +EXPORT_SYMBOL(iommu_merge);
   28.32 +
   28.33 +dma_addr_t bad_dma_address __read_mostly;
   28.34 +EXPORT_SYMBOL(bad_dma_address);
   28.35 +
   28.36 +/* This tells the BIO block layer to assume merging. Default to off
   28.37 +   because we cannot guarantee merging later. */
   28.38 +int iommu_bio_merge __read_mostly = 0;
   28.39 +EXPORT_SYMBOL(iommu_bio_merge);
   28.40 +
   28.41 +int force_iommu __read_mostly= 0;
   28.42 +
   28.43 +__init int iommu_setup(char *p)
   28.44 +{
   28.45 +    return 1;
   28.46 +}
   28.47 +
   28.48 +void __init pci_iommu_alloc(void)
   28.49 +{
   28.50 +#ifdef CONFIG_SWIOTLB
   28.51 +	pci_swiotlb_init();
   28.52 +#endif
   28.53 +}
   28.54 +
   28.55 +static int __init pci_iommu_init(void)
   28.56 +{
   28.57 +	no_iommu_init();
   28.58 +	return 0;
   28.59 +}
   28.60 +
   28.61 +/* Must execute after PCI subsystem */
   28.62 +fs_initcall(pci_iommu_init);
   28.63 +#endif
   28.64 +
   28.65 +struct dma_coherent_mem {
   28.66 +	void		*virt_base;
   28.67 +	u32		device_base;
   28.68 +	int		size;
   28.69 +	int		flags;
   28.70 +	unsigned long	*bitmap;
   28.71 +};
   28.72 +
   28.73 +#define IOMMU_BUG_ON(test)				\
   28.74 +do {							\
   28.75 +	if (unlikely(test)) {				\
   28.76 +		printk(KERN_ALERT "Fatal DMA error! "	\
   28.77 +		       "Please use 'swiotlb=force'\n");	\
   28.78 +		BUG();					\
   28.79 +	}						\
   28.80 +} while (0)
   28.81 +
   28.82 +int
   28.83 +dma_map_sg(struct device *hwdev, struct scatterlist *sg, int nents,
   28.84 +	   enum dma_data_direction direction)
   28.85 +{
   28.86 +	int i, rc;
   28.87 +
   28.88 +	if (direction == DMA_NONE)
   28.89 +		BUG();
   28.90 +	WARN_ON(nents == 0 || sg[0].length == 0);
   28.91 +
   28.92 +	if (swiotlb) {
   28.93 +		rc = swiotlb_map_sg(hwdev, sg, nents, direction);
   28.94 +	} else {
   28.95 +		for (i = 0; i < nents; i++ ) {
   28.96 +			sg[i].dma_address =
   28.97 +				gnttab_dma_map_page(sg[i].page) + sg[i].offset;
   28.98 +			sg[i].dma_length  = sg[i].length;
   28.99 +			BUG_ON(!sg[i].page);
  28.100 +			IOMMU_BUG_ON(address_needs_mapping(
  28.101 +				hwdev, sg[i].dma_address));
  28.102 +		}
  28.103 +		rc = nents;
  28.104 +	}
  28.105 +
  28.106 +	flush_write_buffers();
  28.107 +	return rc;
  28.108 +}
  28.109 +EXPORT_SYMBOL(dma_map_sg);
  28.110 +
  28.111 +void
  28.112 +dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents,
  28.113 +	     enum dma_data_direction direction)
  28.114 +{
  28.115 +	int i;
  28.116 +
  28.117 +	BUG_ON(direction == DMA_NONE);
  28.118 +	if (swiotlb)
  28.119 +		swiotlb_unmap_sg(hwdev, sg, nents, direction);
  28.120 +	else {
  28.121 +		for (i = 0; i < nents; i++ )
  28.122 +			gnttab_dma_unmap_page(sg[i].dma_address);
  28.123 +	}
  28.124 +}
  28.125 +EXPORT_SYMBOL(dma_unmap_sg);
  28.126 +
  28.127 +#ifdef CONFIG_HIGHMEM
  28.128 +dma_addr_t
  28.129 +dma_map_page(struct device *dev, struct page *page, unsigned long offset,
  28.130 +	     size_t size, enum dma_data_direction direction)
  28.131 +{
  28.132 +	dma_addr_t dma_addr;
  28.133 +
  28.134 +	BUG_ON(direction == DMA_NONE);
  28.135 +
  28.136 +	if (swiotlb) {
  28.137 +		dma_addr = swiotlb_map_page(
  28.138 +			dev, page, offset, size, direction);
  28.139 +	} else {
  28.140 +		dma_addr = gnttab_dma_map_page(page) + offset;
  28.141 +		IOMMU_BUG_ON(address_needs_mapping(dev, dma_addr));
  28.142 +	}
  28.143 +
  28.144 +	return dma_addr;
  28.145 +}
  28.146 +EXPORT_SYMBOL(dma_map_page);
  28.147 +
  28.148 +void
  28.149 +dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
  28.150 +	       enum dma_data_direction direction)
  28.151 +{
  28.152 +	BUG_ON(direction == DMA_NONE);
  28.153 +	if (swiotlb)
  28.154 +		swiotlb_unmap_page(dev, dma_address, size, direction);
  28.155 +	else
  28.156 +		gnttab_dma_unmap_page(dma_address);
  28.157 +}
  28.158 +EXPORT_SYMBOL(dma_unmap_page);
  28.159 +#endif /* CONFIG_HIGHMEM */
  28.160 +
  28.161 +int
  28.162 +dma_mapping_error(dma_addr_t dma_addr)
  28.163 +{
  28.164 +	if (swiotlb)
  28.165 +		return swiotlb_dma_mapping_error(dma_addr);
  28.166 +	return 0;
  28.167 +}
  28.168 +EXPORT_SYMBOL(dma_mapping_error);
  28.169 +
  28.170 +int
  28.171 +dma_supported(struct device *dev, u64 mask)
  28.172 +{
  28.173 +	if (swiotlb)
  28.174 +		return swiotlb_dma_supported(dev, mask);
  28.175 +	/*
  28.176 +	 * By default we'll BUG when an infeasible DMA is requested, and
  28.177 +	 * request swiotlb=force (see IOMMU_BUG_ON).
  28.178 +	 */
  28.179 +	return 1;
  28.180 +}
  28.181 +EXPORT_SYMBOL(dma_supported);
  28.182 +
  28.183 +void *dma_alloc_coherent(struct device *dev, size_t size,
  28.184 +			   dma_addr_t *dma_handle, gfp_t gfp)
  28.185 +{
  28.186 +	void *ret;
  28.187 +	struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
  28.188 +	unsigned int order = get_order(size);
  28.189 +	unsigned long vstart;
  28.190 +	u64 mask;
  28.191 +
  28.192 +	/* ignore region specifiers */
  28.193 +	gfp &= ~(__GFP_DMA | __GFP_HIGHMEM);
  28.194 +
  28.195 +	if (mem) {
  28.196 +		int page = bitmap_find_free_region(mem->bitmap, mem->size,
  28.197 +						     order);
  28.198 +		if (page >= 0) {
  28.199 +			*dma_handle = mem->device_base + (page << PAGE_SHIFT);
  28.200 +			ret = mem->virt_base + (page << PAGE_SHIFT);
  28.201 +			memset(ret, 0, size);
  28.202 +			return ret;
  28.203 +		}
  28.204 +		if (mem->flags & DMA_MEMORY_EXCLUSIVE)
  28.205 +			return NULL;
  28.206 +	}
  28.207 +
  28.208 +	if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff))
  28.209 +		gfp |= GFP_DMA;
  28.210 +
  28.211 +	vstart = __get_free_pages(gfp, order);
  28.212 +	ret = (void *)vstart;
  28.213 +
  28.214 +	if (dev != NULL && dev->coherent_dma_mask)
  28.215 +		mask = dev->coherent_dma_mask;
  28.216 +	else
  28.217 +		mask = 0xffffffff;
  28.218 +
  28.219 +	if (ret != NULL) {
  28.220 +		if (xen_create_contiguous_region(vstart, order,
  28.221 +						 fls64(mask)) != 0) {
  28.222 +			free_pages(vstart, order);
  28.223 +			return NULL;
  28.224 +		}
  28.225 +		memset(ret, 0, size);
  28.226 +		*dma_handle = virt_to_bus(ret);
  28.227 +	}
  28.228 +	return ret;
  28.229 +}
  28.230 +EXPORT_SYMBOL(dma_alloc_coherent);
  28.231 +
  28.232 +void dma_free_coherent(struct device *dev, size_t size,
  28.233 +			 void *vaddr, dma_addr_t dma_handle)
  28.234 +{
  28.235 +	struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
  28.236 +	int order = get_order(size);
  28.237 +	
  28.238 +	if (mem && vaddr >= mem->virt_base && vaddr < (mem->virt_base + (mem->size << PAGE_SHIFT))) {
  28.239 +		int page = (vaddr - mem->virt_base) >> PAGE_SHIFT;
  28.240 +
  28.241 +		bitmap_release_region(mem->bitmap, page, order);
  28.242 +	} else {
  28.243 +		xen_destroy_contiguous_region((unsigned long)vaddr, order);
  28.244 +		free_pages((unsigned long)vaddr, order);
  28.245 +	}
  28.246 +}
  28.247 +EXPORT_SYMBOL(dma_free_coherent);
  28.248 +
  28.249 +#ifdef ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY
  28.250 +int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
  28.251 +				dma_addr_t device_addr, size_t size, int flags)
  28.252 +{
  28.253 +	void __iomem *mem_base;
  28.254 +	int pages = size >> PAGE_SHIFT;
  28.255 +	int bitmap_size = (pages + 31)/32;
  28.256 +
  28.257 +	if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0)
  28.258 +		goto out;
  28.259 +	if (!size)
  28.260 +		goto out;
  28.261 +	if (dev->dma_mem)
  28.262 +		goto out;
  28.263 +
  28.264 +	/* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */
  28.265 +
  28.266 +	mem_base = ioremap(bus_addr, size);
  28.267 +	if (!mem_base)
  28.268 +		goto out;
  28.269 +
  28.270 +	dev->dma_mem = kmalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL);
  28.271 +	if (!dev->dma_mem)
  28.272 +		goto out;
  28.273 +	memset(dev->dma_mem, 0, sizeof(struct dma_coherent_mem));
  28.274 +	dev->dma_mem->bitmap = kmalloc(bitmap_size, GFP_KERNEL);
  28.275 +	if (!dev->dma_mem->bitmap)
  28.276 +		goto free1_out;
  28.277 +	memset(dev->dma_mem->bitmap, 0, bitmap_size);
  28.278 +
  28.279 +	dev->dma_mem->virt_base = mem_base;
  28.280 +	dev->dma_mem->device_base = device_addr;
  28.281 +	dev->dma_mem->size = pages;
  28.282 +	dev->dma_mem->flags = flags;
  28.283 +
  28.284 +	if (flags & DMA_MEMORY_MAP)
  28.285 +		return DMA_MEMORY_MAP;
  28.286 +
  28.287 +	return DMA_MEMORY_IO;
  28.288 +
  28.289 + free1_out:
  28.290 +	kfree(dev->dma_mem->bitmap);
  28.291 + out:
  28.292 +	return 0;
  28.293 +}
  28.294 +EXPORT_SYMBOL(dma_declare_coherent_memory);
  28.295 +
  28.296 +void dma_release_declared_memory(struct device *dev)
  28.297 +{
  28.298 +	struct dma_coherent_mem *mem = dev->dma_mem;
  28.299 +	
  28.300 +	if(!mem)
  28.301 +		return;
  28.302 +	dev->dma_mem = NULL;
  28.303 +	iounmap(mem->virt_base);
  28.304 +	kfree(mem->bitmap);
  28.305 +	kfree(mem);
  28.306 +}
  28.307 +EXPORT_SYMBOL(dma_release_declared_memory);
  28.308 +
  28.309 +void *dma_mark_declared_memory_occupied(struct device *dev,
  28.310 +					dma_addr_t device_addr, size_t size)
  28.311 +{
  28.312 +	struct dma_coherent_mem *mem = dev->dma_mem;
  28.313 +	int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1) >> PAGE_SHIFT;
  28.314 +	int pos, err;
  28.315 +
  28.316 +	if (!mem)
  28.317 +		return ERR_PTR(-EINVAL);
  28.318 +
  28.319 +	pos = (device_addr - mem->device_base) >> PAGE_SHIFT;
  28.320 +	err = bitmap_allocate_region(mem->bitmap, pos, get_order(pages));
  28.321 +	if (err != 0)
  28.322 +		return ERR_PTR(err);
  28.323 +	return mem->virt_base + (pos << PAGE_SHIFT);
  28.324 +}
  28.325 +EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
  28.326 +#endif /* ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY */
  28.327 +
  28.328 +dma_addr_t
  28.329 +dma_map_single(struct device *dev, void *ptr, size_t size,
  28.330 +	       enum dma_data_direction direction)
  28.331 +{
  28.332 +	dma_addr_t dma;
  28.333 +
  28.334 +	if (direction == DMA_NONE)
  28.335 +		BUG();
  28.336 +	WARN_ON(size == 0);
  28.337 +
  28.338 +	if (swiotlb) {
  28.339 +		dma = swiotlb_map_single(dev, ptr, size, direction);
  28.340 +	} else {
  28.341 +		dma = gnttab_dma_map_page(virt_to_page(ptr)) +
  28.342 +		      offset_in_page(ptr);
  28.343 +		IOMMU_BUG_ON(range_straddles_page_boundary(ptr, size));
  28.344 +		IOMMU_BUG_ON(address_needs_mapping(dev, dma));
  28.345 +	}
  28.346 +
  28.347 +	flush_write_buffers();
  28.348 +	return dma;
  28.349 +}
  28.350 +EXPORT_SYMBOL(dma_map_single);
  28.351 +
  28.352 +void
  28.353 +dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
  28.354 +		 enum dma_data_direction direction)
  28.355 +{
  28.356 +	if (direction == DMA_NONE)
  28.357 +		BUG();
  28.358 +	if (swiotlb)
  28.359 +		swiotlb_unmap_single(dev, dma_addr, size, direction);
  28.360 +	else
  28.361 +		gnttab_dma_unmap_page(dma_addr);
  28.362 +}
  28.363 +EXPORT_SYMBOL(dma_unmap_single);
  28.364 +
  28.365 +void
  28.366 +dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size,
  28.367 +			enum dma_data_direction direction)
  28.368 +{
  28.369 +	if (swiotlb)
  28.370 +		swiotlb_sync_single_for_cpu(dev, dma_handle, size, direction);
  28.371 +}
  28.372 +EXPORT_SYMBOL(dma_sync_single_for_cpu);
  28.373 +
  28.374 +void
  28.375 +dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t size,
  28.376 +                           enum dma_data_direction direction)
  28.377 +{
  28.378 +	if (swiotlb)
  28.379 +		swiotlb_sync_single_for_device(dev, dma_handle, size, direction);
  28.380 +}
  28.381 +EXPORT_SYMBOL(dma_sync_single_for_device);
    29.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    29.2 +++ b/arch/i386/kernel/process-xen.c	Mon Jun 04 10:05:28 2007 +0100
    29.3 @@ -0,0 +1,853 @@
    29.4 +/*
    29.5 + *  linux/arch/i386/kernel/process.c
    29.6 + *
    29.7 + *  Copyright (C) 1995  Linus Torvalds
    29.8 + *
    29.9 + *  Pentium III FXSR, SSE support
   29.10 + *	Gareth Hughes <gareth@valinux.com>, May 2000
   29.11 + */
   29.12 +
   29.13 +/*
   29.14 + * This file handles the architecture-dependent parts of process handling..
   29.15 + */
   29.16 +
   29.17 +#include <stdarg.h>
   29.18 +
   29.19 +#include <linux/cpu.h>
   29.20 +#include <linux/errno.h>
   29.21 +#include <linux/sched.h>
   29.22 +#include <linux/fs.h>
   29.23 +#include <linux/kernel.h>
   29.24 +#include <linux/mm.h>
   29.25 +#include <linux/elfcore.h>
   29.26 +#include <linux/smp.h>
   29.27 +#include <linux/smp_lock.h>
   29.28 +#include <linux/stddef.h>
   29.29 +#include <linux/slab.h>
   29.30 +#include <linux/vmalloc.h>
   29.31 +#include <linux/user.h>
   29.32 +#include <linux/a.out.h>
   29.33 +#include <linux/interrupt.h>
   29.34 +#include <linux/utsname.h>
   29.35 +#include <linux/delay.h>
   29.36 +#include <linux/reboot.h>
   29.37 +#include <linux/init.h>
   29.38 +#include <linux/mc146818rtc.h>
   29.39 +#include <linux/module.h>
   29.40 +#include <linux/kallsyms.h>
   29.41 +#include <linux/ptrace.h>
   29.42 +#include <linux/random.h>
   29.43 +
   29.44 +#include <asm/uaccess.h>
   29.45 +#include <asm/pgtable.h>
   29.46 +#include <asm/system.h>
   29.47 +#include <asm/io.h>
   29.48 +#include <asm/ldt.h>
   29.49 +#include <asm/processor.h>
   29.50 +#include <asm/i387.h>
   29.51 +#include <asm/desc.h>
   29.52 +#include <asm/vm86.h>
   29.53 +#ifdef CONFIG_MATH_EMULATION
   29.54 +#include <asm/math_emu.h>
   29.55 +#endif
   29.56 +
   29.57 +#include <xen/interface/physdev.h>
   29.58 +#include <xen/interface/vcpu.h>
   29.59 +#include <xen/cpu_hotplug.h>
   29.60 +
   29.61 +#include <linux/err.h>
   29.62 +
   29.63 +#include <asm/tlbflush.h>
   29.64 +#include <asm/cpu.h>
   29.65 +
   29.66 +asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
   29.67 +
   29.68 +static int hlt_counter;
   29.69 +
   29.70 +unsigned long boot_option_idle_override = 0;
   29.71 +EXPORT_SYMBOL(boot_option_idle_override);
   29.72 +
   29.73 +/*
   29.74 + * Return saved PC of a blocked thread.
   29.75 + */
   29.76 +unsigned long thread_saved_pc(struct task_struct *tsk)
   29.77 +{
   29.78 +	return ((unsigned long *)tsk->thread.esp)[3];
   29.79 +}
   29.80 +
   29.81 +/*
   29.82 + * Powermanagement idle function, if any..
   29.83 + */
   29.84 +void (*pm_idle)(void);
   29.85 +EXPORT_SYMBOL(pm_idle);
   29.86 +static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
   29.87 +
   29.88 +void disable_hlt(void)
   29.89 +{
   29.90 +	h