direct-io.hg

changeset 6512:e3d811cca4e1

merge.
author kevin@localhost.localdomain
date Tue Aug 16 12:15:23 2005 +0800 (2005-08-16)
parents b5196e075602 a6bb47919161
children 1ae656509f02
files .hgignore Config.mk Makefile buildconfigs/Rules.mk buildconfigs/mk.linux-2.6-xen buildconfigs/mk.linux-2.6-xen0 buildconfigs/mk.linux-2.6-xenU docs/misc/shype4xen_readme.txt docs/src/user.tex linux-2.4-xen-sparse/arch/xen/Makefile linux-2.4-xen-sparse/arch/xen/config.in linux-2.4-xen-sparse/arch/xen/kernel/time.c linux-2.4-xen-sparse/include/asm-xen/bugs.h linux-2.4-xen-sparse/include/asm-xen/fixmap.h linux-2.4-xen-sparse/include/asm-xen/highmem.h linux-2.4-xen-sparse/include/asm-xen/hw_irq.h linux-2.4-xen-sparse/include/asm-xen/io.h linux-2.4-xen-sparse/include/asm-xen/irq.h linux-2.4-xen-sparse/include/asm-xen/mmu_context.h linux-2.4-xen-sparse/include/asm-xen/page.h linux-2.4-xen-sparse/include/asm-xen/pci.h linux-2.4-xen-sparse/include/asm-xen/pgalloc.h linux-2.4-xen-sparse/include/asm-xen/pgtable.h linux-2.4-xen-sparse/include/asm-xen/processor.h linux-2.4-xen-sparse/include/asm-xen/segment.h linux-2.4-xen-sparse/include/asm-xen/smp.h linux-2.4-xen-sparse/include/asm-xen/system.h linux-2.4-xen-sparse/include/asm-xen/vga.h linux-2.4-xen-sparse/include/linux/blk.h linux-2.4-xen-sparse/include/linux/highmem.h linux-2.4-xen-sparse/include/linux/irq.h linux-2.4-xen-sparse/include/linux/mm.h linux-2.4-xen-sparse/include/linux/sched.h linux-2.4-xen-sparse/include/linux/skbuff.h linux-2.4-xen-sparse/include/linux/timer.h linux-2.4-xen-sparse/mkbuildtree linux-2.6-xen-sparse/arch/xen/Kconfig linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32 linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64 linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32 linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64 linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_32 linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_64 linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile linux-2.6-xen-sparse/arch/xen/i386/kernel/entry.S linux-2.6-xen-sparse/arch/xen/i386/kernel/head.S linux-2.6-xen-sparse/arch/xen/i386/kernel/ioport.c linux-2.6-xen-sparse/arch/xen/i386/kernel/irq.c linux-2.6-xen-sparse/arch/xen/i386/kernel/mpparse.c linux-2.6-xen-sparse/arch/xen/i386/kernel/pci-dma.c linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c linux-2.6-xen-sparse/arch/xen/i386/kernel/traps.c linux-2.6-xen-sparse/arch/xen/i386/mm/highmem.c linux-2.6-xen-sparse/arch/xen/i386/mm/hypervisor.c linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c linux-2.6-xen-sparse/arch/xen/i386/mm/pgtable.c linux-2.6-xen-sparse/arch/xen/i386/pci/irq.c linux-2.6-xen-sparse/arch/xen/kernel/evtchn.c linux-2.6-xen-sparse/arch/xen/kernel/gnttab.c linux-2.6-xen-sparse/arch/xen/kernel/reboot.c linux-2.6-xen-sparse/arch/xen/x86_64/Kconfig linux-2.6-xen-sparse/arch/xen/x86_64/Makefile linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile linux-2.6-xen-sparse/arch/xen/x86_64/kernel/apic.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/e820.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/entry.S linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head.S linux-2.6-xen-sparse/arch/xen/x86_64/kernel/ioport.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/irq.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/pci-dma.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/process.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup64.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smp.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/traps.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/xen_entry.S linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c linux-2.6-xen-sparse/arch/xen/x86_64/mm/ioremap.c linux-2.6-xen-sparse/drivers/xen/Makefile linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c linux-2.6-xen-sparse/drivers/xen/blkback/Makefile linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c linux-2.6-xen-sparse/drivers/xen/blkback/common.h linux-2.6-xen-sparse/drivers/xen/blkback/control.c linux-2.6-xen-sparse/drivers/xen/blkback/interface.c linux-2.6-xen-sparse/drivers/xen/blkback/vbd.c linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c linux-2.6-xen-sparse/drivers/xen/blkfront/block.h linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c linux-2.6-xen-sparse/drivers/xen/blktap/blktap.h linux-2.6-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c linux-2.6-xen-sparse/drivers/xen/blktap/blktap_datapath.c linux-2.6-xen-sparse/drivers/xen/blktap/blktap_userdev.c linux-2.6-xen-sparse/drivers/xen/netback/common.h linux-2.6-xen-sparse/drivers/xen/netback/interface.c linux-2.6-xen-sparse/drivers/xen/netback/netback.c linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c linux-2.6-xen-sparse/drivers/xen/usbback/common.h linux-2.6-xen-sparse/drivers/xen/usbback/interface.c linux-2.6-xen-sparse/drivers/xen/usbback/usbback.c linux-2.6-xen-sparse/drivers/xen/usbfront/usbfront.c linux-2.6-xen-sparse/drivers/xen/usbfront/xhci.h linux-2.6-xen-sparse/drivers/xen/xenbus/Makefile linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.h linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c linux-2.6-xen-sparse/include/asm-generic/pgtable.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/fixmap.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/hypercall.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/mach-xen/irq_vectors.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/mmu_context.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/page.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgalloc.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-3level.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/io.h linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/irq_vectors.h linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/processor.h linux-2.6-xen-sparse/include/asm-xen/balloon.h linux-2.6-xen-sparse/include/asm-xen/ctrl_if.h linux-2.6-xen-sparse/include/asm-xen/evtchn.h linux-2.6-xen-sparse/include/asm-xen/gnttab.h linux-2.6-xen-sparse/include/asm-xen/hypervisor.h linux-2.6-xen-sparse/include/asm-xen/xenbus.h linux-2.6-xen-sparse/include/linux/highmem.h linux-2.6-xen-sparse/include/linux/mm.h linux-2.6-xen-sparse/kernel/irq/manage.c linux-2.6-xen-sparse/mkbuildtree linux-2.6-xen-sparse/mm/highmem.c linux-2.6-xen-sparse/mm/memory.c patches/linux-2.6.12/smp-alts.patch tools/Makefile tools/blktap/blktaplib.c tools/debugger/gdb/gdbbuild tools/debugger/libxendebug/xendebug.c tools/debugger/pdb/Makefile tools/debugger/pdb/PDB.ml tools/debugger/pdb/Process.ml tools/debugger/pdb/Process.mli tools/debugger/pdb/Util.ml tools/debugger/pdb/Xen_domain.ml tools/debugger/pdb/Xen_domain.mli tools/debugger/pdb/debugger.ml tools/debugger/pdb/linux-2.6-module/Makefile tools/debugger/pdb/linux-2.6-module/debug.c tools/debugger/pdb/linux-2.6-module/module.c tools/debugger/pdb/linux-2.6-module/pdb_debug.h tools/debugger/pdb/linux-2.6-module/pdb_module.h tools/debugger/pdb/linux-2.6-patches/Makefile tools/debugger/pdb/linux-2.6-patches/i386_ksyms.patch tools/debugger/pdb/linux-2.6-patches/kdebug.patch tools/debugger/pdb/linux-2.6-patches/makefile.patch tools/debugger/pdb/linux-2.6-patches/ptrace.patch tools/debugger/pdb/linux-2.6-patches/traps.patch tools/debugger/pdb/pdb_caml_process.c tools/debugger/pdb/pdb_caml_xcs.c tools/debugger/pdb/pdb_caml_xen.h tools/debugger/pdb/readme tools/debugger/pdb/server.ml tools/examples/Makefile tools/examples/xend-config.sxp tools/examples/xmexample.vmx tools/firmware/rombios/rombios.c tools/ioemu/hw/ide.c tools/ioemu/hw/pckbd.c tools/ioemu/monitor.c tools/ioemu/target-i386-dm/Makefile tools/ioemu/target-i386-dm/helper2.c tools/ioemu/target-i386-dm/qemu-dm.debug tools/ioemu/vl.c tools/ioemu/vl.h tools/ioemu/vnc.c tools/libxc/Makefile tools/libxc/linux_boot_params.h tools/libxc/xc.h tools/libxc/xc_core.c tools/libxc/xc_domain.c tools/libxc/xc_gnttab.c tools/libxc/xc_linux_build.c tools/libxc/xc_linux_restore.c tools/libxc/xc_linux_save.c tools/libxc/xc_load_elf.c tools/libxc/xc_private.c tools/libxc/xc_private.h tools/libxc/xc_ptrace.c tools/libxc/xc_vmx_build.c tools/misc/Makefile tools/misc/policyprocessor/XmlToBinInterface.java tools/misc/xend tools/python/setup.py tools/python/xen/lowlevel/xc/xc.c tools/python/xen/lowlevel/xs/xs.c tools/python/xen/lowlevel/xu/xu.c tools/python/xen/sv/CreateDomain.py tools/python/xen/sv/DomInfo.py tools/python/xen/sv/GenTabbed.py tools/python/xen/sv/HTMLBase.py tools/python/xen/sv/Main.py tools/python/xen/sv/NodeInfo.py tools/python/xen/sv/RestoreDomain.py tools/python/xen/sv/Wizard.py tools/python/xen/sv/__init__.py tools/python/xen/sv/util.py tools/python/xen/web/SrvBase.py tools/python/xen/web/SrvDir.py tools/python/xen/web/__init__.py tools/python/xen/web/connection.py tools/python/xen/web/httpserver.py tools/python/xen/web/protocol.py tools/python/xen/web/reactor.py tools/python/xen/web/resource.py tools/python/xen/web/static.py tools/python/xen/web/tcp.py tools/python/xen/web/unix.py tools/python/xen/xend/Args.py tools/python/xen/xend/EventServer.py tools/python/xen/xend/PrettyPrint.py tools/python/xen/xend/Vifctl.py tools/python/xen/xend/XendCheckpoint.py tools/python/xen/xend/XendClient.py tools/python/xen/xend/XendDB.py tools/python/xen/xend/XendDmesg.py tools/python/xen/xend/XendDomain.py tools/python/xen/xend/XendDomainInfo.py tools/python/xen/xend/XendError.py tools/python/xen/xend/XendLogging.py tools/python/xen/xend/XendNode.py tools/python/xen/xend/XendProtocol.py tools/python/xen/xend/XendRoot.py tools/python/xen/xend/XendVnet.py tools/python/xen/xend/encode.py tools/python/xen/xend/image.py tools/python/xen/xend/scheduler.py tools/python/xen/xend/server/SrvDaemon.py tools/python/xen/xend/server/SrvDmesg.py tools/python/xen/xend/server/SrvDomain.py tools/python/xen/xend/server/SrvDomainDir.py tools/python/xen/xend/server/SrvNode.py tools/python/xen/xend/server/SrvRoot.py tools/python/xen/xend/server/SrvServer.py tools/python/xen/xend/server/SrvVnetDir.py tools/python/xen/xend/server/SrvXendLog.py tools/python/xen/xend/server/blkif.py tools/python/xen/xend/server/channel.py tools/python/xen/xend/server/controller.py tools/python/xen/xend/server/event.py tools/python/xen/xend/server/messages.py tools/python/xen/xend/server/netif.py tools/python/xen/xend/server/params.py tools/python/xen/xend/server/pciif.py tools/python/xen/xend/server/relocate.py tools/python/xen/xend/sxp.py tools/python/xen/xend/uuid.py tools/python/xen/xend/xenstore/__init__.py tools/python/xen/xend/xenstore/xsnode.py tools/python/xen/xend/xenstore/xsobj.py tools/python/xen/xend/xenstore/xsresource.py tools/python/xen/xm/create.py tools/python/xen/xm/destroy.py tools/python/xen/xm/help.py tools/python/xen/xm/main.py tools/python/xen/xm/migrate.py tools/python/xen/xm/opts.py tools/python/xen/xm/shutdown.py tools/python/xen/xm/sysrq.py tools/security/secpol_tool.c tools/sv/Makefile tools/sv/images/destroy.png tools/sv/images/finish.png tools/sv/images/next.png tools/sv/images/pause.png tools/sv/images/previous.png tools/sv/images/reboot.png tools/sv/images/shutdown.png tools/sv/images/small-destroy.png tools/sv/images/small-pause.png tools/sv/images/small-unpause.png tools/sv/images/unpause.png tools/sv/images/xen.png tools/sv/inc/script.js tools/sv/inc/style.css tools/sv/index.psp tools/xcs/xcs.h tools/xcutils/xc_restore.c tools/xenstore/Makefile tools/xenstore/TODO tools/xenstore/testsuite/test.sh tools/xenstore/utils.c tools/xenstore/utils.h tools/xenstore/xenstored_core.c tools/xenstore/xenstored_core.h tools/xenstore/xenstored_domain.c tools/xenstore/xenstored_domain.h tools/xenstore/xenstored_transaction.c tools/xenstore/xenstored_transaction.h tools/xenstore/xenstored_watch.c tools/xenstore/xenstored_watch.h tools/xenstore/xs.c tools/xenstore/xs.h tools/xenstore/xs_lib.c tools/xenstore/xs_lib.h tools/xenstore/xs_random.c tools/xenstore/xs_test.c tools/xentrace/xentrace.c xen/Makefile xen/Rules.mk xen/acm/acm_core.c xen/acm/acm_policy.c xen/arch/ia64/Makefile xen/arch/ia64/Rules.mk xen/arch/ia64/asm-offsets.c xen/arch/ia64/asm-xsi-offsets.c xen/arch/ia64/dom_fw.c xen/arch/ia64/domain.c xen/arch/ia64/grant_table.c xen/arch/ia64/hypercall.c xen/arch/ia64/hyperprivop.S xen/arch/ia64/linux-xen/efi.c xen/arch/ia64/linux-xen/entry.S xen/arch/ia64/linux-xen/entry.h xen/arch/ia64/linux-xen/head.S xen/arch/ia64/linux-xen/irq_ia64.c xen/arch/ia64/linux-xen/mm_contig.c xen/arch/ia64/linux-xen/pal.S xen/arch/ia64/linux-xen/setup.c xen/arch/ia64/linux-xen/time.c xen/arch/ia64/linux-xen/tlb.c xen/arch/ia64/linux-xen/unaligned.c xen/arch/ia64/linux/cmdline.c xen/arch/ia64/linux/efi_stub.S xen/arch/ia64/linux/extable.c xen/arch/ia64/linux/hpsim.S xen/arch/ia64/linux/ia64_ksyms.c xen/arch/ia64/linux/irq_lsapic.c xen/arch/ia64/linux/lib/Makefile xen/arch/ia64/linux/lib/bitop.c xen/arch/ia64/linux/lib/carta_random.S xen/arch/ia64/linux/lib/checksum.c xen/arch/ia64/linux/lib/clear_page.S xen/arch/ia64/linux/lib/clear_user.S xen/arch/ia64/linux/lib/copy_page.S xen/arch/ia64/linux/lib/copy_page_mck.S xen/arch/ia64/linux/lib/copy_user.S xen/arch/ia64/linux/lib/csum_partial_copy.c xen/arch/ia64/linux/lib/dec_and_lock.c xen/arch/ia64/linux/lib/do_csum.S xen/arch/ia64/linux/lib/flush.S xen/arch/ia64/linux/lib/idiv32.S xen/arch/ia64/linux/lib/idiv64.S xen/arch/ia64/linux/lib/io.c xen/arch/ia64/linux/lib/ip_fast_csum.S xen/arch/ia64/linux/lib/memcpy.S xen/arch/ia64/linux/lib/memcpy_mck.S xen/arch/ia64/linux/lib/memset.S xen/arch/ia64/linux/lib/strlen.S xen/arch/ia64/linux/lib/strlen_user.S xen/arch/ia64/linux/lib/strncpy_from_user.S xen/arch/ia64/linux/lib/strnlen_user.S xen/arch/ia64/linux/lib/xor.S xen/arch/ia64/linux/linuxextable.c xen/arch/ia64/linux/machvec.c xen/arch/ia64/linux/minstate.h xen/arch/ia64/linux/patch.c xen/arch/ia64/linux/pcdp.h xen/arch/ia64/linux/sal.c xen/arch/ia64/mmio.c xen/arch/ia64/pal_emul.c xen/arch/ia64/patch/linux-2.6.11/irq_ia64.c xen/arch/ia64/patch/linux-2.6.11/kregs.h xen/arch/ia64/pcdp.c xen/arch/ia64/process.c xen/arch/ia64/regionreg.c xen/arch/ia64/tools/mkbuildtree xen/arch/ia64/vcpu.c xen/arch/ia64/vlsapic.c xen/arch/ia64/vmmu.c xen/arch/ia64/vmx_hypercall.c xen/arch/ia64/vmx_ivt.S xen/arch/ia64/vmx_support.c xen/arch/ia64/vmx_vcpu.c xen/arch/ia64/vmx_virt.c xen/arch/ia64/vtlb.c xen/arch/ia64/xen.lds.S xen/arch/ia64/xenasm.S xen/arch/ia64/xenmem.c xen/arch/ia64/xenmisc.c xen/arch/ia64/xensetup.c xen/arch/ia64/xentime.c xen/arch/x86/Makefile xen/arch/x86/acpi/boot.c xen/arch/x86/apic.c xen/arch/x86/audit.c xen/arch/x86/cpu/common.c xen/arch/x86/dom0_ops.c xen/arch/x86/domain.c xen/arch/x86/domain_build.c xen/arch/x86/genapic/es7000plat.c xen/arch/x86/i8259.c xen/arch/x86/io_apic.c xen/arch/x86/mm.c xen/arch/x86/mpparse.c xen/arch/x86/setup.c xen/arch/x86/shadow.c xen/arch/x86/shadow32.c xen/arch/x86/shadow_public.c xen/arch/x86/smpboot.c xen/arch/x86/time.c xen/arch/x86/traps.c xen/arch/x86/vmx.c xen/arch/x86/vmx_intercept.c xen/arch/x86/vmx_io.c xen/arch/x86/vmx_platform.c xen/arch/x86/vmx_vmcs.c xen/arch/x86/x86_32/entry.S xen/arch/x86/x86_32/mm.c xen/arch/x86/x86_32/traps.c xen/arch/x86/x86_64/entry.S xen/arch/x86/x86_64/mm.c xen/arch/x86/x86_64/traps.c xen/common/ac_timer.c xen/common/dom0_ops.c xen/common/dom_mem_ops.c xen/common/domain.c xen/common/grant_table.c xen/common/page_alloc.c xen/common/perfc.c xen/common/sched_sedf.c xen/common/symbols.c xen/common/xmalloc.c xen/drivers/char/console.c xen/drivers/char/ns16550.c xen/include/acm/acm_core.h xen/include/acm/acm_hooks.h xen/include/asm-ia64/config.h xen/include/asm-ia64/domain.h xen/include/asm-ia64/event.h xen/include/asm-ia64/ia64_int.h xen/include/asm-ia64/linux-xen/asm/gcc_intrin.h xen/include/asm-ia64/linux-xen/asm/hpsim_ssc.h xen/include/asm-ia64/linux-xen/asm/ia64regs.h xen/include/asm-ia64/linux-xen/asm/io.h xen/include/asm-ia64/linux-xen/asm/kregs.h xen/include/asm-ia64/linux-xen/asm/mca_asm.h xen/include/asm-ia64/linux-xen/asm/page.h xen/include/asm-ia64/linux-xen/asm/pal.h xen/include/asm-ia64/linux-xen/asm/pgalloc.h xen/include/asm-ia64/linux-xen/asm/processor.h xen/include/asm-ia64/linux-xen/asm/ptrace.h xen/include/asm-ia64/linux-xen/asm/sn/sn_sal.h xen/include/asm-ia64/linux-xen/asm/system.h xen/include/asm-ia64/linux-xen/asm/types.h xen/include/asm-ia64/linux-xen/asm/uaccess.h xen/include/asm-ia64/linux-xen/linux/cpumask.h xen/include/asm-ia64/linux-xen/linux/hardirq.h xen/include/asm-ia64/linux-xen/linux/interrupt.h xen/include/asm-ia64/linux/asm-generic/bug.h xen/include/asm-ia64/linux/asm-generic/div64.h xen/include/asm-ia64/linux/asm-generic/errno-base.h xen/include/asm-ia64/linux/asm-generic/errno.h xen/include/asm-ia64/linux/asm-generic/ide_iops.h xen/include/asm-ia64/linux/asm-generic/iomap.h xen/include/asm-ia64/linux/asm-generic/pci-dma-compat.h xen/include/asm-ia64/linux/asm-generic/pci.h xen/include/asm-ia64/linux/asm-generic/pgtable-nopud.h xen/include/asm-ia64/linux/asm-generic/pgtable.h xen/include/asm-ia64/linux/asm-generic/sections.h xen/include/asm-ia64/linux/asm-generic/topology.h xen/include/asm-ia64/linux/asm-generic/vmlinux.lds.h xen/include/asm-ia64/linux/asm/acpi.h xen/include/asm-ia64/linux/asm/asmmacro.h xen/include/asm-ia64/linux/asm/atomic.h xen/include/asm-ia64/linux/asm/bitops.h xen/include/asm-ia64/linux/asm/break.h xen/include/asm-ia64/linux/asm/bug.h xen/include/asm-ia64/linux/asm/byteorder.h xen/include/asm-ia64/linux/asm/cache.h xen/include/asm-ia64/linux/asm/cacheflush.h xen/include/asm-ia64/linux/asm/checksum.h xen/include/asm-ia64/linux/asm/current.h xen/include/asm-ia64/linux/asm/delay.h xen/include/asm-ia64/linux/asm/desc.h xen/include/asm-ia64/linux/asm/div64.h xen/include/asm-ia64/linux/asm/dma-mapping.h xen/include/asm-ia64/linux/asm/dma.h xen/include/asm-ia64/linux/asm/errno.h xen/include/asm-ia64/linux/asm/fpu.h xen/include/asm-ia64/linux/asm/hardirq.h xen/include/asm-ia64/linux/asm/hdreg.h xen/include/asm-ia64/linux/asm/hw_irq.h xen/include/asm-ia64/linux/asm/ia32.h xen/include/asm-ia64/linux/asm/intrinsics.h xen/include/asm-ia64/linux/asm/ioctl.h xen/include/asm-ia64/linux/asm/irq.h xen/include/asm-ia64/linux/asm/linkage.h xen/include/asm-ia64/linux/asm/machvec.h xen/include/asm-ia64/linux/asm/machvec_hpsim.h xen/include/asm-ia64/linux/asm/mca.h xen/include/asm-ia64/linux/asm/meminit.h xen/include/asm-ia64/linux/asm/mman.h xen/include/asm-ia64/linux/asm/module.h xen/include/asm-ia64/linux/asm/numa.h xen/include/asm-ia64/linux/asm/param.h xen/include/asm-ia64/linux/asm/patch.h xen/include/asm-ia64/linux/asm/pci.h xen/include/asm-ia64/linux/asm/pdb.h xen/include/asm-ia64/linux/asm/percpu.h xen/include/asm-ia64/linux/asm/pgtable.h xen/include/asm-ia64/linux/asm/ptrace_offsets.h xen/include/asm-ia64/linux/asm/rse.h xen/include/asm-ia64/linux/asm/rwsem.h xen/include/asm-ia64/linux/asm/sal.h xen/include/asm-ia64/linux/asm/scatterlist.h xen/include/asm-ia64/linux/asm/sections.h xen/include/asm-ia64/linux/asm/semaphore.h xen/include/asm-ia64/linux/asm/setup.h xen/include/asm-ia64/linux/asm/sigcontext.h xen/include/asm-ia64/linux/asm/signal.h xen/include/asm-ia64/linux/asm/smp.h xen/include/asm-ia64/linux/asm/sn/arch.h xen/include/asm-ia64/linux/asm/sn/geo.h xen/include/asm-ia64/linux/asm/sn/nodepda.h xen/include/asm-ia64/linux/asm/sn/sn_cpuid.h xen/include/asm-ia64/linux/asm/spinlock.h xen/include/asm-ia64/linux/asm/string.h xen/include/asm-ia64/linux/asm/thread_info.h xen/include/asm-ia64/linux/asm/timex.h xen/include/asm-ia64/linux/asm/tlbflush.h xen/include/asm-ia64/linux/asm/topology.h xen/include/asm-ia64/linux/asm/unaligned.h xen/include/asm-ia64/linux/asm/unistd.h xen/include/asm-ia64/linux/asm/unwind.h xen/include/asm-ia64/linux/asm/ustack.h xen/include/asm-ia64/linux/bcd.h xen/include/asm-ia64/linux/bitmap.h xen/include/asm-ia64/linux/bitops.h xen/include/asm-ia64/linux/bootmem.h xen/include/asm-ia64/linux/byteorder/generic.h xen/include/asm-ia64/linux/byteorder/little_endian.h xen/include/asm-ia64/linux/byteorder/swab.h xen/include/asm-ia64/linux/cpu.h xen/include/asm-ia64/linux/device.h xen/include/asm-ia64/linux/dma-mapping.h xen/include/asm-ia64/linux/efi.h xen/include/asm-ia64/linux/err.h xen/include/asm-ia64/linux/file.h xen/include/asm-ia64/linux/gfp.h xen/include/asm-ia64/linux/initrd.h xen/include/asm-ia64/linux/ioport.h xen/include/asm-ia64/linux/jiffies.h xen/include/asm-ia64/linux/kernel_stat.h xen/include/asm-ia64/linux/kmalloc_sizes.h xen/include/asm-ia64/linux/linkage.h xen/include/asm-ia64/linux/linuxtime.h xen/include/asm-ia64/linux/mmzone.h xen/include/asm-ia64/linux/module.h xen/include/asm-ia64/linux/numa.h xen/include/asm-ia64/linux/page-flags.h xen/include/asm-ia64/linux/percpu.h xen/include/asm-ia64/linux/preempt.h xen/include/asm-ia64/linux/proc_fs.h xen/include/asm-ia64/linux/profile.h xen/include/asm-ia64/linux/ptrace.h xen/include/asm-ia64/linux/random.h xen/include/asm-ia64/linux/rbtree.h xen/include/asm-ia64/linux/rtc.h xen/include/asm-ia64/linux/rwsem.h xen/include/asm-ia64/linux/seq_file.h xen/include/asm-ia64/linux/seqlock.h xen/include/asm-ia64/linux/serial.h xen/include/asm-ia64/linux/serial_core.h xen/include/asm-ia64/linux/signal.h xen/include/asm-ia64/linux/slab.h xen/include/asm-ia64/linux/smp_lock.h xen/include/asm-ia64/linux/stddef.h xen/include/asm-ia64/linux/swap.h xen/include/asm-ia64/linux/thread_info.h xen/include/asm-ia64/linux/threads.h xen/include/asm-ia64/linux/timex.h xen/include/asm-ia64/linux/topology.h xen/include/asm-ia64/linux/tty.h xen/include/asm-ia64/linux/wait.h xen/include/asm-ia64/privop.h xen/include/asm-ia64/regionreg.h xen/include/asm-ia64/regs.h xen/include/asm-ia64/vcpu.h xen/include/asm-ia64/vmx.h xen/include/asm-ia64/vmx_uaccess.h xen/include/asm-ia64/vmx_vcpu.h xen/include/asm-ia64/vmx_vpd.h xen/include/asm-ia64/xensystem.h xen/include/asm-x86/apicdef.h xen/include/asm-x86/config.h xen/include/asm-x86/event.h xen/include/asm-x86/fixmap.h xen/include/asm-x86/genapic.h xen/include/asm-x86/hpet.h xen/include/asm-x86/io.h xen/include/asm-x86/mach-bigsmp/mach_apic.h xen/include/asm-x86/mach-default/mach_apic.h xen/include/asm-x86/mach-es7000/mach_apic.h xen/include/asm-x86/mach-generic/mach_apic.h xen/include/asm-x86/mach-summit/mach_apic.h xen/include/asm-x86/mach-summit/mach_mpparse.h xen/include/asm-x86/mm.h xen/include/asm-x86/page.h xen/include/asm-x86/shadow.h xen/include/asm-x86/shadow_64.h xen/include/asm-x86/shadow_public.h xen/include/asm-x86/time.h xen/include/asm-x86/types.h xen/include/asm-x86/vmx.h xen/include/asm-x86/vmx_virpit.h xen/include/asm-x86/vmx_vmcs.h xen/include/asm-x86/x86_32/uaccess.h xen/include/asm-x86/x86_64/page.h xen/include/public/arch-ia64.h xen/include/public/dom0_ops.h xen/include/public/grant_table.h xen/include/public/io/blkif.h xen/include/public/io/domain_controller.h xen/include/public/io/netif.h xen/include/public/trace.h xen/include/public/xen.h xen/include/xen/ac_timer.h xen/include/xen/domain.h xen/include/xen/event.h xen/include/xen/grant_table.h xen/include/xen/mm.h xen/include/xen/perfc_defn.h xen/include/xen/sched.h xen/include/xen/serial.h xen/include/xen/symbols.h xen/include/xen/time.h xen/include/xen/trace.h xen/tools/Makefile xen/tools/symbols.c
line diff
   317.1 --- a/xen/arch/ia64/Makefile	Mon Aug 15 12:28:07 2005 -0800
   317.2 +++ b/xen/arch/ia64/Makefile	Tue Aug 16 12:15:23 2005 +0800
   317.3 @@ -1,5 +1,7 @@
   317.4  include $(BASEDIR)/Rules.mk
   317.5  
   317.6 +VPATH = linux linux-xen
   317.7 +
   317.8  # libs-y	+= arch/ia64/lib/lib.a
   317.9  
  317.10  OBJS = xensetup.o setup.o time.o irq.o ia64_ksyms.o process.o smp.o \
  317.11 @@ -75,7 +77,7 @@ xen.lds.s: xen.lds.S
  317.12  		-o xen.lds.s xen.lds.S
  317.13  
  317.14  ia64lib.o:
  317.15 -	$(MAKE) -C lib && cp lib/ia64lib.o .
  317.16 +	$(MAKE) -C linux/lib && cp linux/lib/ia64lib.o .
  317.17  
  317.18  clean:
  317.19  	rm -f *.o *~ core  xen.lds.s $(BASEDIR)/include/asm-ia64/.offsets.h.stamp asm-offsets.s
   318.1 --- a/xen/arch/ia64/Rules.mk	Mon Aug 15 12:28:07 2005 -0800
   318.2 +++ b/xen/arch/ia64/Rules.mk	Tue Aug 16 12:15:23 2005 +0800
   318.3 @@ -6,14 +6,21 @@ ifneq ($(COMPILE_ARCH),$(TARGET_ARCH))
   318.4  CROSS_COMPILE ?= /usr/local/sp_env/v2.2.5/i686/bin/ia64-unknown-linux-
   318.5  endif
   318.6  AFLAGS  += -D__ASSEMBLY__
   318.7 -CPPFLAGS  += -I$(BASEDIR)/include -I$(BASEDIR)/include/asm-ia64
   318.8 +CPPFLAGS  += -I$(BASEDIR)/include -I$(BASEDIR)/include/asm-ia64 	\
   318.9 +             -I$(BASEDIR)/include/asm-ia64/linux 			\
  318.10 +	     -I$(BASEDIR)/include/asm-ia64/linux-xen 			\
  318.11 +             -I$(BASEDIR)/arch/ia64/linux -I$(BASEDIR)/arch/ia64/linux-xen
  318.12 +
  318.13  CFLAGS  := -nostdinc -fno-builtin -fno-common -fno-strict-aliasing
  318.14  #CFLAGS  += -O3		# -O3 over-inlines making debugging tough!
  318.15  CFLAGS  += -O2		# but no optimization causes compile errors!
  318.16  #CFLAGS  += -iwithprefix include -Wall -DMONITOR_BASE=$(MONITOR_BASE)
  318.17  CFLAGS  += -iwithprefix include -Wall
  318.18  CFLAGS  += -fomit-frame-pointer -I$(BASEDIR)/include -D__KERNEL__
  318.19 -CFLAGS  += -I$(BASEDIR)/include/asm-ia64
  318.20 +CFLAGS  += -I$(BASEDIR)/include/asm-ia64 -I$(BASEDIR)/include/asm-ia64/linux \
  318.21 +           -I$(BASEDIR)/include/asm-ia64/linux 				\
  318.22 +           -I$(BASEDIR)/include/asm-ia64/linux-xen 			\
  318.23 +           -I$(BASEDIR)/arch/ia64/linux -I$(BASEDIR)/arch/ia64/linux-xen
  318.24  CFLAGS  += -Wno-pointer-arith -Wredundant-decls
  318.25  CFLAGS  += -DIA64 -DXEN -DLINUX_2_6
  318.26  CFLAGS	+= -ffixed-r13 -mfixed-range=f12-f15,f32-f127
   319.1 --- a/xen/arch/ia64/asm-offsets.c	Mon Aug 15 12:28:07 2005 -0800
   319.2 +++ b/xen/arch/ia64/asm-offsets.c	Tue Aug 16 12:15:23 2005 +0800
   319.3 @@ -139,14 +139,14 @@ void foo(void)
   319.4  	DEFINE(IA64_PT_REGS_R2_OFFSET, offsetof (struct pt_regs, r2));
   319.5  	DEFINE(IA64_PT_REGS_R3_OFFSET, offsetof (struct pt_regs, r3));
   319.6  #ifdef CONFIG_VTI
   319.7 -	DEFINE(IA64_PT_REGS_R4_OFFSET, offsetof (struct xen_regs, r4));
   319.8 -	DEFINE(IA64_PT_REGS_R5_OFFSET, offsetof (struct xen_regs, r5));
   319.9 -	DEFINE(IA64_PT_REGS_R6_OFFSET, offsetof (struct xen_regs, r6));
  319.10 -	DEFINE(IA64_PT_REGS_R7_OFFSET, offsetof (struct xen_regs, r7));
  319.11 -	DEFINE(IA64_PT_REGS_CR_IIPA_OFFSET, offsetof (struct xen_regs, cr_iipa));
  319.12 -	DEFINE(IA64_PT_REGS_CR_ISR_OFFSET, offsetof (struct xen_regs, cr_isr));
  319.13 -	DEFINE(IA64_PT_REGS_EML_UNAT_OFFSET, offsetof (struct xen_regs, eml_unat));
  319.14 -	DEFINE(IA64_PT_REGS_RFI_PFS_OFFSET, offsetof (struct xen_regs, rfi_pfs));
  319.15 +	DEFINE(IA64_PT_REGS_R4_OFFSET, offsetof (struct pt_regs, r4));
  319.16 +	DEFINE(IA64_PT_REGS_R5_OFFSET, offsetof (struct pt_regs, r5));
  319.17 +	DEFINE(IA64_PT_REGS_R6_OFFSET, offsetof (struct pt_regs, r6));
  319.18 +	DEFINE(IA64_PT_REGS_R7_OFFSET, offsetof (struct pt_regs, r7));
  319.19 +	DEFINE(IA64_PT_REGS_CR_IIPA_OFFSET, offsetof (struct pt_regs, cr_iipa));
  319.20 +	DEFINE(IA64_PT_REGS_CR_ISR_OFFSET, offsetof (struct pt_regs, cr_isr));
  319.21 +	DEFINE(IA64_PT_REGS_EML_UNAT_OFFSET, offsetof (struct pt_regs, eml_unat));
  319.22 +	DEFINE(IA64_PT_REGS_RFI_PFS_OFFSET, offsetof (struct pt_regs, rfi_pfs));
  319.23  	DEFINE(RFI_IIP_OFFSET, offsetof(struct vcpu, arch.arch_vmx.rfi_iip));
  319.24  	DEFINE(RFI_IPSR_OFFSET, offsetof(struct vcpu, arch.arch_vmx.rfi_ipsr));
  319.25  	DEFINE(RFI_IFS_OFFSET,offsetof(struct vcpu ,arch.arch_vmx.rfi_ifs));
   322.1 --- a/xen/arch/ia64/domain.c	Mon Aug 15 12:28:07 2005 -0800
   322.2 +++ b/xen/arch/ia64/domain.c	Tue Aug 16 12:15:23 2005 +0800
   322.3 @@ -450,7 +450,7 @@ void new_thread(struct vcpu *v,
   322.4                  unsigned long start_info)
   322.5  {
   322.6  	struct domain *d = v->domain;
   322.7 -	struct xen_regs *regs;
   322.8 +	struct pt_regs *regs;
   322.9  	struct ia64_boot_param *bp;
  322.10  	extern char saved_command_line[];
  322.11  	//char *dom0_cmdline = "BOOT_IMAGE=scsi0:\EFI\redhat\xenlinux nomca root=/dev/sdb1 ro";
   326.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
   326.2 +++ b/xen/arch/ia64/linux-xen/efi.c	Tue Aug 16 12:15:23 2005 +0800
   326.3 @@ -0,0 +1,866 @@
   326.4 +/*
   326.5 + * Extensible Firmware Interface
   326.6 + *
   326.7 + * Based on Extensible Firmware Interface Specification version 0.9 April 30, 1999
   326.8 + *
   326.9 + * Copyright (C) 1999 VA Linux Systems
  326.10 + * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
  326.11 + * Copyright (C) 1999-2003 Hewlett-Packard Co.
  326.12 + *	David Mosberger-Tang <davidm@hpl.hp.com>
  326.13 + *	Stephane Eranian <eranian@hpl.hp.com>
  326.14 + *
  326.15 + * All EFI Runtime Services are not implemented yet as EFI only
  326.16 + * supports physical mode addressing on SoftSDV. This is to be fixed
  326.17 + * in a future version.  --drummond 1999-07-20
  326.18 + *
  326.19 + * Implemented EFI runtime services and virtual mode calls.  --davidm
  326.20 + *
  326.21 + * Goutham Rao: <goutham.rao@intel.com>
  326.22 + *	Skip non-WB memory and ignore empty memory ranges.
  326.23 + */
  326.24 +#include <linux/config.h>
  326.25 +#include <linux/module.h>
  326.26 +#include <linux/kernel.h>
  326.27 +#include <linux/init.h>
  326.28 +#include <linux/types.h>
  326.29 +#include <linux/time.h>
  326.30 +#include <linux/efi.h>
  326.31 +
  326.32 +#include <asm/io.h>
  326.33 +#include <asm/kregs.h>
  326.34 +#include <asm/meminit.h>
  326.35 +#include <asm/pgtable.h>
  326.36 +#include <asm/processor.h>
  326.37 +#include <asm/mca.h>
  326.38 +
  326.39 +#define EFI_DEBUG	0
  326.40 +
  326.41 +extern efi_status_t efi_call_phys (void *, ...);
  326.42 +
  326.43 +struct efi efi;
  326.44 +EXPORT_SYMBOL(efi);
  326.45 +static efi_runtime_services_t *runtime;
  326.46 +static unsigned long mem_limit = ~0UL, max_addr = ~0UL;
  326.47 +
  326.48 +#define efi_call_virt(f, args...)	(*(f))(args)
  326.49 +
  326.50 +#define STUB_GET_TIME(prefix, adjust_arg)							  \
  326.51 +static efi_status_t										  \
  326.52 +prefix##_get_time (efi_time_t *tm, efi_time_cap_t *tc)						  \
  326.53 +{												  \
  326.54 +	struct ia64_fpreg fr[6];								  \
  326.55 +	efi_time_cap_t *atc = NULL;								  \
  326.56 +	efi_status_t ret;									  \
  326.57 +												  \
  326.58 +	if (tc)											  \
  326.59 +		atc = adjust_arg(tc);								  \
  326.60 +	ia64_save_scratch_fpregs(fr);								  \
  326.61 +	ret = efi_call_##prefix((efi_get_time_t *) __va(runtime->get_time), adjust_arg(tm), atc); \
  326.62 +	ia64_load_scratch_fpregs(fr);								  \
  326.63 +	return ret;										  \
  326.64 +}
  326.65 +
  326.66 +#define STUB_SET_TIME(prefix, adjust_arg)							\
  326.67 +static efi_status_t										\
  326.68 +prefix##_set_time (efi_time_t *tm)								\
  326.69 +{												\
  326.70 +	struct ia64_fpreg fr[6];								\
  326.71 +	efi_status_t ret;									\
  326.72 +												\
  326.73 +	ia64_save_scratch_fpregs(fr);								\
  326.74 +	ret = efi_call_##prefix((efi_set_time_t *) __va(runtime->set_time), adjust_arg(tm));	\
  326.75 +	ia64_load_scratch_fpregs(fr);								\
  326.76 +	return ret;										\
  326.77 +}
  326.78 +
  326.79 +#define STUB_GET_WAKEUP_TIME(prefix, adjust_arg)						\
  326.80 +static efi_status_t										\
  326.81 +prefix##_get_wakeup_time (efi_bool_t *enabled, efi_bool_t *pending, efi_time_t *tm)		\
  326.82 +{												\
  326.83 +	struct ia64_fpreg fr[6];								\
  326.84 +	efi_status_t ret;									\
  326.85 +												\
  326.86 +	ia64_save_scratch_fpregs(fr);								\
  326.87 +	ret = efi_call_##prefix((efi_get_wakeup_time_t *) __va(runtime->get_wakeup_time),	\
  326.88 +				adjust_arg(enabled), adjust_arg(pending), adjust_arg(tm));	\
  326.89 +	ia64_load_scratch_fpregs(fr);								\
  326.90 +	return ret;										\
  326.91 +}
  326.92 +
  326.93 +#define STUB_SET_WAKEUP_TIME(prefix, adjust_arg)						\
  326.94 +static efi_status_t										\
  326.95 +prefix##_set_wakeup_time (efi_bool_t enabled, efi_time_t *tm)					\
  326.96 +{												\
  326.97 +	struct ia64_fpreg fr[6];								\
  326.98 +	efi_time_t *atm = NULL;									\
  326.99 +	efi_status_t ret;									\
 326.100 +												\
 326.101 +	if (tm)											\
 326.102 +		atm = adjust_arg(tm);								\
 326.103 +	ia64_save_scratch_fpregs(fr);								\
 326.104 +	ret = efi_call_##prefix((efi_set_wakeup_time_t *) __va(runtime->set_wakeup_time),	\
 326.105 +				enabled, atm);							\
 326.106 +	ia64_load_scratch_fpregs(fr);								\
 326.107 +	return ret;										\
 326.108 +}
 326.109 +
 326.110 +#define STUB_GET_VARIABLE(prefix, adjust_arg)						\
 326.111 +static efi_status_t									\
 326.112 +prefix##_get_variable (efi_char16_t *name, efi_guid_t *vendor, u32 *attr,		\
 326.113 +		       unsigned long *data_size, void *data)				\
 326.114 +{											\
 326.115 +	struct ia64_fpreg fr[6];							\
 326.116 +	u32 *aattr = NULL;									\
 326.117 +	efi_status_t ret;								\
 326.118 +											\
 326.119 +	if (attr)									\
 326.120 +		aattr = adjust_arg(attr);						\
 326.121 +	ia64_save_scratch_fpregs(fr);							\
 326.122 +	ret = efi_call_##prefix((efi_get_variable_t *) __va(runtime->get_variable),	\
 326.123 +				adjust_arg(name), adjust_arg(vendor), aattr,		\
 326.124 +				adjust_arg(data_size), adjust_arg(data));		\
 326.125 +	ia64_load_scratch_fpregs(fr);							\
 326.126 +	return ret;									\
 326.127 +}
 326.128 +
 326.129 +#define STUB_GET_NEXT_VARIABLE(prefix, adjust_arg)						\
 326.130 +static efi_status_t										\
 326.131 +prefix##_get_next_variable (unsigned long *name_size, efi_char16_t *name, efi_guid_t *vendor)	\
 326.132 +{												\
 326.133 +	struct ia64_fpreg fr[6];								\
 326.134 +	efi_status_t ret;									\
 326.135 +												\
 326.136 +	ia64_save_scratch_fpregs(fr);								\
 326.137 +	ret = efi_call_##prefix((efi_get_next_variable_t *) __va(runtime->get_next_variable),	\
 326.138 +				adjust_arg(name_size), adjust_arg(name), adjust_arg(vendor));	\
 326.139 +	ia64_load_scratch_fpregs(fr);								\
 326.140 +	return ret;										\
 326.141 +}
 326.142 +
 326.143 +#define STUB_SET_VARIABLE(prefix, adjust_arg)						\
 326.144 +static efi_status_t									\
 326.145 +prefix##_set_variable (efi_char16_t *name, efi_guid_t *vendor, unsigned long attr,	\
 326.146 +		       unsigned long data_size, void *data)				\
 326.147 +{											\
 326.148 +	struct ia64_fpreg fr[6];							\
 326.149 +	efi_status_t ret;								\
 326.150 +											\
 326.151 +	ia64_save_scratch_fpregs(fr);							\
 326.152 +	ret = efi_call_##prefix((efi_set_variable_t *) __va(runtime->set_variable),	\
 326.153 +				adjust_arg(name), adjust_arg(vendor), attr, data_size,	\
 326.154 +				adjust_arg(data));					\
 326.155 +	ia64_load_scratch_fpregs(fr);							\
 326.156 +	return ret;									\
 326.157 +}
 326.158 +
 326.159 +#define STUB_GET_NEXT_HIGH_MONO_COUNT(prefix, adjust_arg)					\
 326.160 +static efi_status_t										\
 326.161 +prefix##_get_next_high_mono_count (u32 *count)							\
 326.162 +{												\
 326.163 +	struct ia64_fpreg fr[6];								\
 326.164 +	efi_status_t ret;									\
 326.165 +												\
 326.166 +	ia64_save_scratch_fpregs(fr);								\
 326.167 +	ret = efi_call_##prefix((efi_get_next_high_mono_count_t *)				\
 326.168 +				__va(runtime->get_next_high_mono_count), adjust_arg(count));	\
 326.169 +	ia64_load_scratch_fpregs(fr);								\
 326.170 +	return ret;										\
 326.171 +}
 326.172 +
 326.173 +#define STUB_RESET_SYSTEM(prefix, adjust_arg)					\
 326.174 +static void									\
 326.175 +prefix##_reset_system (int reset_type, efi_status_t status,			\
 326.176 +		       unsigned long data_size, efi_char16_t *data)		\
 326.177 +{										\
 326.178 +	struct ia64_fpreg fr[6];						\
 326.179 +	efi_char16_t *adata = NULL;						\
 326.180 +										\
 326.181 +	if (data)								\
 326.182 +		adata = adjust_arg(data);					\
 326.183 +										\
 326.184 +	ia64_save_scratch_fpregs(fr);						\
 326.185 +	efi_call_##prefix((efi_reset_system_t *) __va(runtime->reset_system),	\
 326.186 +			  reset_type, status, data_size, adata);		\
 326.187 +	/* should not return, but just in case... */				\
 326.188 +	ia64_load_scratch_fpregs(fr);						\
 326.189 +}
 326.190 +
 326.191 +#define phys_ptr(arg)	((__typeof__(arg)) ia64_tpa(arg))
 326.192 +
 326.193 +STUB_GET_TIME(phys, phys_ptr)
 326.194 +STUB_SET_TIME(phys, phys_ptr)
 326.195 +STUB_GET_WAKEUP_TIME(phys, phys_ptr)
 326.196 +STUB_SET_WAKEUP_TIME(phys, phys_ptr)
 326.197 +STUB_GET_VARIABLE(phys, phys_ptr)
 326.198 +STUB_GET_NEXT_VARIABLE(phys, phys_ptr)
 326.199 +STUB_SET_VARIABLE(phys, phys_ptr)
 326.200 +STUB_GET_NEXT_HIGH_MONO_COUNT(phys, phys_ptr)
 326.201 +STUB_RESET_SYSTEM(phys, phys_ptr)
 326.202 +
 326.203 +#define id(arg)	arg
 326.204 +
 326.205 +STUB_GET_TIME(virt, id)
 326.206 +STUB_SET_TIME(virt, id)
 326.207 +STUB_GET_WAKEUP_TIME(virt, id)
 326.208 +STUB_SET_WAKEUP_TIME(virt, id)
 326.209 +STUB_GET_VARIABLE(virt, id)
 326.210 +STUB_GET_NEXT_VARIABLE(virt, id)
 326.211 +STUB_SET_VARIABLE(virt, id)
 326.212 +STUB_GET_NEXT_HIGH_MONO_COUNT(virt, id)
 326.213 +STUB_RESET_SYSTEM(virt, id)
 326.214 +
 326.215 +void
 326.216 +efi_gettimeofday (struct timespec *ts)
 326.217 +{
 326.218 +	efi_time_t tm;
 326.219 +
 326.220 +	memset(ts, 0, sizeof(ts));
 326.221 +	if ((*efi.get_time)(&tm, NULL) != EFI_SUCCESS)
 326.222 +		return;
 326.223 +
 326.224 +	ts->tv_sec = mktime(tm.year, tm.month, tm.day, tm.hour, tm.minute, tm.second);
 326.225 +	ts->tv_nsec = tm.nanosecond;
 326.226 +}
 326.227 +
 326.228 +static int
 326.229 +is_available_memory (efi_memory_desc_t *md)
 326.230 +{
 326.231 +	if (!(md->attribute & EFI_MEMORY_WB))
 326.232 +		return 0;
 326.233 +
 326.234 +	switch (md->type) {
 326.235 +	      case EFI_LOADER_CODE:
 326.236 +	      case EFI_LOADER_DATA:
 326.237 +	      case EFI_BOOT_SERVICES_CODE:
 326.238 +	      case EFI_BOOT_SERVICES_DATA:
 326.239 +	      case EFI_CONVENTIONAL_MEMORY:
 326.240 +		return 1;
 326.241 +	}
 326.242 +	return 0;
 326.243 +}
 326.244 +
 326.245 +/*
 326.246 + * Trim descriptor MD so its starts at address START_ADDR.  If the descriptor covers
 326.247 + * memory that is normally available to the kernel, issue a warning that some memory
 326.248 + * is being ignored.
 326.249 + */
 326.250 +static void
 326.251 +trim_bottom (efi_memory_desc_t *md, u64 start_addr)
 326.252 +{
 326.253 +	u64 num_skipped_pages;
 326.254 +
 326.255 +	if (md->phys_addr >= start_addr || !md->num_pages)
 326.256 +		return;
 326.257 +
 326.258 +	num_skipped_pages = (start_addr - md->phys_addr) >> EFI_PAGE_SHIFT;
 326.259 +	if (num_skipped_pages > md->num_pages)
 326.260 +		num_skipped_pages = md->num_pages;
 326.261 +
 326.262 +	if (is_available_memory(md))
 326.263 +		printk(KERN_NOTICE "efi.%s: ignoring %luKB of memory at 0x%lx due to granule hole "
 326.264 +		       "at 0x%lx\n", __FUNCTION__,
 326.265 +		       (num_skipped_pages << EFI_PAGE_SHIFT) >> 10,
 326.266 +		       md->phys_addr, start_addr - IA64_GRANULE_SIZE);
 326.267 +	/*
 326.268 +	 * NOTE: Don't set md->phys_addr to START_ADDR because that could cause the memory
 326.269 +	 * descriptor list to become unsorted.  In such a case, md->num_pages will be
 326.270 +	 * zero, so the Right Thing will happen.
 326.271 +	 */
 326.272 +	md->phys_addr += num_skipped_pages << EFI_PAGE_SHIFT;
 326.273 +	md->num_pages -= num_skipped_pages;
 326.274 +}
 326.275 +
 326.276 +static void
 326.277 +trim_top (efi_memory_desc_t *md, u64 end_addr)
 326.278 +{
 326.279 +	u64 num_dropped_pages, md_end_addr;
 326.280 +
 326.281 +	md_end_addr = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
 326.282 +
 326.283 +	if (md_end_addr <= end_addr || !md->num_pages)
 326.284 +		return;
 326.285 +
 326.286 +	num_dropped_pages = (md_end_addr - end_addr) >> EFI_PAGE_SHIFT;
 326.287 +	if (num_dropped_pages > md->num_pages)
 326.288 +		num_dropped_pages = md->num_pages;
 326.289 +
 326.290 +	if (is_available_memory(md))
 326.291 +		printk(KERN_NOTICE "efi.%s: ignoring %luKB of memory at 0x%lx due to granule hole "
 326.292 +		       "at 0x%lx\n", __FUNCTION__,
 326.293 +		       (num_dropped_pages << EFI_PAGE_SHIFT) >> 10,
 326.294 +		       md->phys_addr, end_addr);
 326.295 +	md->num_pages -= num_dropped_pages;
 326.296 +}
 326.297 +
 326.298 +/*
 326.299 + * Walks the EFI memory map and calls CALLBACK once for each EFI memory descriptor that
 326.300 + * has memory that is available for OS use.
 326.301 + */
 326.302 +void
 326.303 +efi_memmap_walk (efi_freemem_callback_t callback, void *arg)
 326.304 +{
 326.305 +	int prev_valid = 0;
 326.306 +	struct range {
 326.307 +		u64 start;
 326.308 +		u64 end;
 326.309 +	} prev, curr;
 326.310 +	void *efi_map_start, *efi_map_end, *p, *q;
 326.311 +	efi_memory_desc_t *md, *check_md;
 326.312 +	u64 efi_desc_size, start, end, granule_addr, last_granule_addr, first_non_wb_addr = 0;
 326.313 +	unsigned long total_mem = 0;
 326.314 +
 326.315 +	efi_map_start = __va(ia64_boot_param->efi_memmap);
 326.316 +	efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
 326.317 +	efi_desc_size = ia64_boot_param->efi_memdesc_size;
 326.318 +
 326.319 +	for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
 326.320 +		md = p;
 326.321 +
 326.322 +		/* skip over non-WB memory descriptors; that's all we're interested in... */
 326.323 +		if (!(md->attribute & EFI_MEMORY_WB))
 326.324 +			continue;
 326.325 +
 326.326 +#ifdef XEN
 326.327 +// this works around a problem in the ski bootloader
 326.328 +{
 326.329 +		extern long running_on_sim;
 326.330 +		if (running_on_sim && md->type != EFI_CONVENTIONAL_MEMORY)
 326.331 +			continue;
 326.332 +}
 326.333 +// this is a temporary hack to avoid CONFIG_VIRTUAL_MEM_MAP
 326.334 +		if (md->phys_addr >= 0x100000000) continue;
 326.335 +#endif
 326.336 +		/*
 326.337 +		 * granule_addr is the base of md's first granule.
 326.338 +		 * [granule_addr - first_non_wb_addr) is guaranteed to
 326.339 +		 * be contiguous WB memory.
 326.340 +		 */
 326.341 +		granule_addr = GRANULEROUNDDOWN(md->phys_addr);
 326.342 +		first_non_wb_addr = max(first_non_wb_addr, granule_addr);
 326.343 +
 326.344 +		if (first_non_wb_addr < md->phys_addr) {
 326.345 +			trim_bottom(md, granule_addr + IA64_GRANULE_SIZE);
 326.346 +			granule_addr = GRANULEROUNDDOWN(md->phys_addr);
 326.347 +			first_non_wb_addr = max(first_non_wb_addr, granule_addr);
 326.348 +		}
 326.349 +
 326.350 +		for (q = p; q < efi_map_end; q += efi_desc_size) {
 326.351 +			check_md = q;
 326.352 +
 326.353 +			if ((check_md->attribute & EFI_MEMORY_WB) &&
 326.354 +			    (check_md->phys_addr == first_non_wb_addr))
 326.355 +				first_non_wb_addr += check_md->num_pages << EFI_PAGE_SHIFT;
 326.356 +			else
 326.357 +				break;		/* non-WB or hole */
 326.358 +		}
 326.359 +
 326.360 +		last_granule_addr = GRANULEROUNDDOWN(first_non_wb_addr);
 326.361 +		if (last_granule_addr < md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT))
 326.362 +			trim_top(md, last_granule_addr);
 326.363 +
 326.364 +		if (is_available_memory(md)) {
 326.365 +			if (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) >= max_addr) {
 326.366 +				if (md->phys_addr >= max_addr)
 326.367 +					continue;
 326.368 +				md->num_pages = (max_addr - md->phys_addr) >> EFI_PAGE_SHIFT;
 326.369 +				first_non_wb_addr = max_addr;
 326.370 +			}
 326.371 +
 326.372 +			if (total_mem >= mem_limit)
 326.373 +				continue;
 326.374 +
 326.375 +			if (total_mem + (md->num_pages << EFI_PAGE_SHIFT) > mem_limit) {
 326.376 +				unsigned long limit_addr = md->phys_addr;
 326.377 +
 326.378 +				limit_addr += mem_limit - total_mem;
 326.379 +				limit_addr = GRANULEROUNDDOWN(limit_addr);
 326.380 +
 326.381 +				if (md->phys_addr > limit_addr)
 326.382 +					continue;
 326.383 +
 326.384 +				md->num_pages = (limit_addr - md->phys_addr) >>
 326.385 +				                EFI_PAGE_SHIFT;
 326.386 +				first_non_wb_addr = max_addr = md->phys_addr +
 326.387 +				              (md->num_pages << EFI_PAGE_SHIFT);
 326.388 +			}
 326.389 +			total_mem += (md->num_pages << EFI_PAGE_SHIFT);
 326.390 +
 326.391 +			if (md->num_pages == 0)
 326.392 +				continue;
 326.393 +
 326.394 +			curr.start = PAGE_OFFSET + md->phys_addr;
 326.395 +			curr.end   = curr.start + (md->num_pages << EFI_PAGE_SHIFT);
 326.396 +
 326.397 +			if (!prev_valid) {
 326.398 +				prev = curr;
 326.399 +				prev_valid = 1;
 326.400 +			} else {
 326.401 +				if (curr.start < prev.start)
 326.402 +					printk(KERN_ERR "Oops: EFI memory table not ordered!\n");
 326.403 +
 326.404 +				if (prev.end == curr.start) {
 326.405 +					/* merge two consecutive memory ranges */
 326.406 +					prev.end = curr.end;
 326.407 +				} else {
 326.408 +					start = PAGE_ALIGN(prev.start);
 326.409 +					end = prev.end & PAGE_MASK;
 326.410 +					if ((end > start) && (*callback)(start, end, arg) < 0)
 326.411 +						return;
 326.412 +					prev = curr;
 326.413 +				}
 326.414 +			}
 326.415 +		}
 326.416 +	}
 326.417 +	if (prev_valid) {
 326.418 +		start = PAGE_ALIGN(prev.start);
 326.419 +		end = prev.end & PAGE_MASK;
 326.420 +		if (end > start)
 326.421 +			(*callback)(start, end, arg);
 326.422 +	}
 326.423 +}
 326.424 +
 326.425 +/*
 326.426 + * Look for the PAL_CODE region reported by EFI and maps it using an
 326.427 + * ITR to enable safe PAL calls in virtual mode.  See IA-64 Processor
 326.428 + * Abstraction Layer chapter 11 in ADAG
 326.429 + */
 326.430 +
 326.431 +void *
 326.432 +efi_get_pal_addr (void)
 326.433 +{
 326.434 +	void *efi_map_start, *efi_map_end, *p;
 326.435 +	efi_memory_desc_t *md;
 326.436 +	u64 efi_desc_size;
 326.437 +	int pal_code_count = 0;
 326.438 +	u64 vaddr, mask;
 326.439 +
 326.440 +	efi_map_start = __va(ia64_boot_param->efi_memmap);
 326.441 +	efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
 326.442 +	efi_desc_size = ia64_boot_param->efi_memdesc_size;
 326.443 +
 326.444 +	for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
 326.445 +		md = p;
 326.446 +		if (md->type != EFI_PAL_CODE)
 326.447 +			continue;
 326.448 +
 326.449 +		if (++pal_code_count > 1) {
 326.450 +			printk(KERN_ERR "Too many EFI Pal Code memory ranges, dropped @ %lx\n",
 326.451 +			       md->phys_addr);
 326.452 +			continue;
 326.453 +		}
 326.454 +		/*
 326.455 +		 * The only ITLB entry in region 7 that is used is the one installed by
 326.456 +		 * __start().  That entry covers a 64MB range.
 326.457 +		 */
 326.458 +		mask  = ~((1 << KERNEL_TR_PAGE_SHIFT) - 1);
 326.459 +		vaddr = PAGE_OFFSET + md->phys_addr;
 326.460 +
 326.461 +		/*
 326.462 +		 * We must check that the PAL mapping won't overlap with the kernel
 326.463 +		 * mapping.
 326.464 +		 *
 326.465 +		 * PAL code is guaranteed to be aligned on a power of 2 between 4k and
 326.466 +		 * 256KB and that only one ITR is needed to map it. This implies that the
 326.467 +		 * PAL code is always aligned on its size, i.e., the closest matching page
 326.468 +		 * size supported by the TLB. Therefore PAL code is guaranteed never to
 326.469 +		 * cross a 64MB unless it is bigger than 64MB (very unlikely!).  So for
 326.470 +		 * now the following test is enough to determine whether or not we need a
 326.471 +		 * dedicated ITR for the PAL code.
 326.472 +		 */
 326.473 +		if ((vaddr & mask) == (KERNEL_START & mask)) {
 326.474 +			printk(KERN_INFO "%s: no need to install ITR for PAL code\n",
 326.475 +			       __FUNCTION__);
 326.476 +			continue;
 326.477 +		}
 326.478 +
 326.479 +		if (md->num_pages << EFI_PAGE_SHIFT > IA64_GRANULE_SIZE)
 326.480 +			panic("Woah!  PAL code size bigger than a granule!");
 326.481 +
 326.482 +#if EFI_DEBUG
 326.483 +		mask  = ~((1 << IA64_GRANULE_SHIFT) - 1);
 326.484 +
 326.485 +		printk(KERN_INFO "CPU %d: mapping PAL code [0x%lx-0x%lx) into [0x%lx-0x%lx)\n",
 326.486 +			smp_processor_id(), md->phys_addr,
 326.487 +			md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
 326.488 +			vaddr & mask, (vaddr & mask) + IA64_GRANULE_SIZE);
 326.489 +#endif
 326.490 +		return __va(md->phys_addr);
 326.491 +	}
 326.492 +	printk(KERN_WARNING "%s: no PAL-code memory-descriptor found",
 326.493 +	       __FUNCTION__);
 326.494 +	return NULL;
 326.495 +}
 326.496 +
 326.497 +void
 326.498 +efi_map_pal_code (void)
 326.499 +{
 326.500 +	void *pal_vaddr = efi_get_pal_addr ();
 326.501 +	u64 psr;
 326.502 +
 326.503 +	if (!pal_vaddr)
 326.504 +		return;
 326.505 +
 326.506 +	/*
 326.507 +	 * Cannot write to CRx with PSR.ic=1
 326.508 +	 */
 326.509 +	psr = ia64_clear_ic();
 326.510 +	ia64_itr(0x1, IA64_TR_PALCODE, GRANULEROUNDDOWN((unsigned long) pal_vaddr),
 326.511 +		 pte_val(pfn_pte(__pa(pal_vaddr) >> PAGE_SHIFT, PAGE_KERNEL)),
 326.512 +		 IA64_GRANULE_SHIFT);
 326.513 +	ia64_set_psr(psr);		/* restore psr */
 326.514 +	ia64_srlz_i();
 326.515 +}
 326.516 +
 326.517 +void __init
 326.518 +efi_init (void)
 326.519 +{
 326.520 +	void *efi_map_start, *efi_map_end;
 326.521 +	efi_config_table_t *config_tables;
 326.522 +	efi_char16_t *c16;
 326.523 +	u64 efi_desc_size;
 326.524 +	char *cp, *end, vendor[100] = "unknown";
 326.525 +	extern char saved_command_line[];
 326.526 +	int i;
 326.527 +
 326.528 +	/* it's too early to be able to use the standard kernel command line support... */
 326.529 +	for (cp = saved_command_line; *cp; ) {
 326.530 +		if (memcmp(cp, "mem=", 4) == 0) {
 326.531 +			cp += 4;
 326.532 +			mem_limit = memparse(cp, &end);
 326.533 +			if (end != cp)
 326.534 +				break;
 326.535 +			cp = end;
 326.536 +		} else if (memcmp(cp, "max_addr=", 9) == 0) {
 326.537 +			cp += 9;
 326.538 +			max_addr = GRANULEROUNDDOWN(memparse(cp, &end));
 326.539 +			if (end != cp)
 326.540 +				break;
 326.541 +			cp = end;
 326.542 +		} else {
 326.543 +			while (*cp != ' ' && *cp)
 326.544 +				++cp;
 326.545 +			while (*cp == ' ')
 326.546 +				++cp;
 326.547 +		}
 326.548 +	}
 326.549 +	if (max_addr != ~0UL)
 326.550 +		printk(KERN_INFO "Ignoring memory above %luMB\n", max_addr >> 20);
 326.551 +
 326.552 +	efi.systab = __va(ia64_boot_param->efi_systab);
 326.553 +
 326.554 +	/*
 326.555 +	 * Verify the EFI Table
 326.556 +	 */
 326.557 +	if (efi.systab == NULL)
 326.558 +		panic("Woah! Can't find EFI system table.\n");
 326.559 +	if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
 326.560 +		panic("Woah! EFI system table signature incorrect\n");
 326.561 +	if ((efi.systab->hdr.revision ^ EFI_SYSTEM_TABLE_REVISION) >> 16 != 0)
 326.562 +		printk(KERN_WARNING "Warning: EFI system table major version mismatch: "
 326.563 +		       "got %d.%02d, expected %d.%02d\n",
 326.564 +		       efi.systab->hdr.revision >> 16, efi.systab->hdr.revision & 0xffff,
 326.565 +		       EFI_SYSTEM_TABLE_REVISION >> 16, EFI_SYSTEM_TABLE_REVISION & 0xffff);
 326.566 +
 326.567 +	config_tables = __va(efi.systab->tables);
 326.568 +
 326.569 +	/* Show what we know for posterity */
 326.570 +	c16 = __va(efi.systab->fw_vendor);
 326.571 +	if (c16) {
 326.572 +		for (i = 0;i < (int) sizeof(vendor) && *c16; ++i)
 326.573 +			vendor[i] = *c16++;
 326.574 +		vendor[i] = '\0';
 326.575 +	}
 326.576 +
 326.577 +	printk(KERN_INFO "EFI v%u.%.02u by %s:",
 326.578 +	       efi.systab->hdr.revision >> 16, efi.systab->hdr.revision & 0xffff, vendor);
 326.579 +
 326.580 +	for (i = 0; i < (int) efi.systab->nr_tables; i++) {
 326.581 +		if (efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID) == 0) {
 326.582 +			efi.mps = __va(config_tables[i].table);
 326.583 +			printk(" MPS=0x%lx", config_tables[i].table);
 326.584 +		} else if (efi_guidcmp(config_tables[i].guid, ACPI_20_TABLE_GUID) == 0) {
 326.585 +			efi.acpi20 = __va(config_tables[i].table);
 326.586 +			printk(" ACPI 2.0=0x%lx", config_tables[i].table);
 326.587 +		} else if (efi_guidcmp(config_tables[i].guid, ACPI_TABLE_GUID) == 0) {
 326.588 +			efi.acpi = __va(config_tables[i].table);
 326.589 +			printk(" ACPI=0x%lx", config_tables[i].table);
 326.590 +		} else if (efi_guidcmp(config_tables[i].guid, SMBIOS_TABLE_GUID) == 0) {
 326.591 +			efi.smbios = __va(config_tables[i].table);
 326.592 +			printk(" SMBIOS=0x%lx", config_tables[i].table);
 326.593 +		} else if (efi_guidcmp(config_tables[i].guid, SAL_SYSTEM_TABLE_GUID) == 0) {
 326.594 +			efi.sal_systab = __va(config_tables[i].table);
 326.595 +			printk(" SALsystab=0x%lx", config_tables[i].table);
 326.596 +		} else if (efi_guidcmp(config_tables[i].guid, HCDP_TABLE_GUID) == 0) {
 326.597 +			efi.hcdp = __va(config_tables[i].table);
 326.598 +			printk(" HCDP=0x%lx", config_tables[i].table);
 326.599 +		}
 326.600 +	}
 326.601 +	printk("\n");
 326.602 +
 326.603 +	runtime = __va(efi.systab->runtime);
 326.604 +	efi.get_time = phys_get_time;
 326.605 +	efi.set_time = phys_set_time;
 326.606 +	efi.get_wakeup_time = phys_get_wakeup_time;
 326.607 +	efi.set_wakeup_time = phys_set_wakeup_time;
 326.608 +	efi.get_variable = phys_get_variable;
 326.609 +	efi.get_next_variable = phys_get_next_variable;
 326.610 +	efi.set_variable = phys_set_variable;
 326.611 +	efi.get_next_high_mono_count = phys_get_next_high_mono_count;
 326.612 +	efi.reset_system = phys_reset_system;
 326.613 +
 326.614 +	efi_map_start = __va(ia64_boot_param->efi_memmap);
 326.615 +	efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
 326.616 +	efi_desc_size = ia64_boot_param->efi_memdesc_size;
 326.617 +
 326.618 +#if EFI_DEBUG
 326.619 +	/* print EFI memory map: */
 326.620 +	{
 326.621 +		efi_memory_desc_t *md;
 326.622 +		void *p;
 326.623 +
 326.624 +		for (i = 0, p = efi_map_start; p < efi_map_end; ++i, p += efi_desc_size) {
 326.625 +			md = p;
 326.626 +			printk("mem%02u: type=%u, attr=0x%lx, range=[0x%016lx-0x%016lx) (%luMB)\n",
 326.627 +			       i, md->type, md->attribute, md->phys_addr,
 326.628 +			       md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
 326.629 +			       md->num_pages >> (20 - EFI_PAGE_SHIFT));
 326.630 +		}
 326.631 +	}
 326.632 +#endif
 326.633 +
 326.634 +	efi_map_pal_code();
 326.635 +	efi_enter_virtual_mode();
 326.636 +}
 326.637 +
 326.638 +void
 326.639 +efi_enter_virtual_mode (void)
 326.640 +{
 326.641 +	void *efi_map_start, *efi_map_end, *p;
 326.642 +	efi_memory_desc_t *md;
 326.643 +	efi_status_t status;
 326.644 +	u64 efi_desc_size;
 326.645 +
 326.646 +	efi_map_start = __va(ia64_boot_param->efi_memmap);
 326.647 +	efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
 326.648 +	efi_desc_size = ia64_boot_param->efi_memdesc_size;
 326.649 +
 326.650 +	for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
 326.651 +		md = p;
 326.652 +		if (md->attribute & EFI_MEMORY_RUNTIME) {
 326.653 +			/*
 326.654 +			 * Some descriptors have multiple bits set, so the order of
 326.655 +			 * the tests is relevant.
 326.656 +			 */
 326.657 +			if (md->attribute & EFI_MEMORY_WB) {
 326.658 +				md->virt_addr = (u64) __va(md->phys_addr);
 326.659 +			} else if (md->attribute & EFI_MEMORY_UC) {
 326.660 +				md->virt_addr = (u64) ioremap(md->phys_addr, 0);
 326.661 +			} else if (md->attribute & EFI_MEMORY_WC) {
 326.662 +#if 0
 326.663 +				md->virt_addr = ia64_remap(md->phys_addr, (_PAGE_A | _PAGE_P
 326.664 +									   | _PAGE_D
 326.665 +									   | _PAGE_MA_WC
 326.666 +									   | _PAGE_PL_0
 326.667 +									   | _PAGE_AR_RW));
 326.668 +#else
 326.669 +				printk(KERN_INFO "EFI_MEMORY_WC mapping\n");
 326.670 +				md->virt_addr = (u64) ioremap(md->phys_addr, 0);
 326.671 +#endif
 326.672 +			} else if (md->attribute & EFI_MEMORY_WT) {
 326.673 +#if 0
 326.674 +				md->virt_addr = ia64_remap(md->phys_addr, (_PAGE_A | _PAGE_P
 326.675 +									   | _PAGE_D | _PAGE_MA_WT
 326.676 +									   | _PAGE_PL_0
 326.677 +									   | _PAGE_AR_RW));
 326.678 +#else
 326.679 +				printk(KERN_INFO "EFI_MEMORY_WT mapping\n");
 326.680 +				md->virt_addr = (u64) ioremap(md->phys_addr, 0);
 326.681 +#endif
 326.682 +			}
 326.683 +		}
 326.684 +	}
 326.685 +
 326.686 +	status = efi_call_phys(__va(runtime->set_virtual_address_map),
 326.687 +			       ia64_boot_param->efi_memmap_size,
 326.688 +			       efi_desc_size, ia64_boot_param->efi_memdesc_version,
 326.689 +			       ia64_boot_param->efi_memmap);
 326.690 +	if (status != EFI_SUCCESS) {
 326.691 +		printk(KERN_WARNING "warning: unable to switch EFI into virtual mode "
 326.692 +		       "(status=%lu)\n", status);
 326.693 +		return;
 326.694 +	}
 326.695 +
 326.696 +	/*
 326.697 +	 * Now that EFI is in virtual mode, we call the EFI functions more efficiently:
 326.698 +	 */
 326.699 +	efi.get_time = virt_get_time;
 326.700 +	efi.set_time = virt_set_time;
 326.701 +	efi.get_wakeup_time = virt_get_wakeup_time;
 326.702 +	efi.set_wakeup_time = virt_set_wakeup_time;
 326.703 +	efi.get_variable = virt_get_variable;
 326.704 +	efi.get_next_variable = virt_get_next_variable;
 326.705 +	efi.set_variable = virt_set_variable;
 326.706 +	efi.get_next_high_mono_count = virt_get_next_high_mono_count;
 326.707 +	efi.reset_system = virt_reset_system;
 326.708 +}
 326.709 +
 326.710 +/*
 326.711 + * Walk the EFI memory map looking for the I/O port range.  There can only be one entry of
 326.712 + * this type, other I/O port ranges should be described via ACPI.
 326.713 + */
 326.714 +u64
 326.715 +efi_get_iobase (void)
 326.716 +{
 326.717 +	void *efi_map_start, *efi_map_end, *p;
 326.718 +	efi_memory_desc_t *md;
 326.719 +	u64 efi_desc_size;
 326.720 +
 326.721 +	efi_map_start = __va(ia64_boot_param->efi_memmap);
 326.722 +	efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
 326.723 +	efi_desc_size = ia64_boot_param->efi_memdesc_size;
 326.724 +
 326.725 +	for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
 326.726 +		md = p;
 326.727 +		if (md->type == EFI_MEMORY_MAPPED_IO_PORT_SPACE) {
 326.728 +			if (md->attribute & EFI_MEMORY_UC)
 326.729 +				return md->phys_addr;
 326.730 +		}
 326.731 +	}
 326.732 +	return 0;
 326.733 +}
 326.734 +
 326.735 +#ifdef XEN
 326.736 +// variation of efi_get_iobase which returns entire memory descriptor
 326.737 +efi_memory_desc_t *
 326.738 +efi_get_io_md (void)
 326.739 +{
 326.740 +	void *efi_map_start, *efi_map_end, *p;
 326.741 +	efi_memory_desc_t *md;
 326.742 +	u64 efi_desc_size;
 326.743 +
 326.744 +	efi_map_start = __va(ia64_boot_param->efi_memmap);
 326.745 +	efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
 326.746 +	efi_desc_size = ia64_boot_param->efi_memdesc_size;
 326.747 +
 326.748 +	for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
 326.749 +		md = p;
 326.750 +		if (md->type == EFI_MEMORY_MAPPED_IO_PORT_SPACE) {
 326.751 +			if (md->attribute & EFI_MEMORY_UC)
 326.752 +				return md;
 326.753 +		}
 326.754 +	}
 326.755 +	return 0;
 326.756 +}
 326.757 +#endif
 326.758 +
 326.759 +u32
 326.760 +efi_mem_type (unsigned long phys_addr)
 326.761 +{
 326.762 +	void *efi_map_start, *efi_map_end, *p;
 326.763 +	efi_memory_desc_t *md;
 326.764 +	u64 efi_desc_size;
 326.765 +
 326.766 +	efi_map_start = __va(ia64_boot_param->efi_memmap);
 326.767 +	efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
 326.768 +	efi_desc_size = ia64_boot_param->efi_memdesc_size;
 326.769 +
 326.770 +	for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
 326.771 +		md = p;
 326.772 +
 326.773 +		if (phys_addr - md->phys_addr < (md->num_pages << EFI_PAGE_SHIFT))
 326.774 +			 return md->type;
 326.775 +	}
 326.776 +	return 0;
 326.777 +}
 326.778 +
 326.779 +u64
 326.780 +efi_mem_attributes (unsigned long phys_addr)
 326.781 +{
 326.782 +	void *efi_map_start, *efi_map_end, *p;
 326.783 +	efi_memory_desc_t *md;
 326.784 +	u64 efi_desc_size;
 326.785 +
 326.786 +	efi_map_start = __va(ia64_boot_param->efi_memmap);
 326.787 +	efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
 326.788 +	efi_desc_size = ia64_boot_param->efi_memdesc_size;
 326.789 +
 326.790 +	for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
 326.791 +		md = p;
 326.792 +
 326.793 +		if (phys_addr - md->phys_addr < (md->num_pages << EFI_PAGE_SHIFT))
 326.794 +			return md->attribute;
 326.795 +	}
 326.796 +	return 0;
 326.797 +}
 326.798 +EXPORT_SYMBOL(efi_mem_attributes);
 326.799 +
 326.800 +int
 326.801 +valid_phys_addr_range (unsigned long phys_addr, unsigned long *size)
 326.802 +{
 326.803 +	void *efi_map_start, *efi_map_end, *p;
 326.804 +	efi_memory_desc_t *md;
 326.805 +	u64 efi_desc_size;
 326.806 +
 326.807 +	efi_map_start = __va(ia64_boot_param->efi_memmap);
 326.808 +	efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
 326.809 +	efi_desc_size = ia64_boot_param->efi_memdesc_size;
 326.810 +
 326.811 +	for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
 326.812 +		md = p;
 326.813 +
 326.814 +		if (phys_addr - md->phys_addr < (md->num_pages << EFI_PAGE_SHIFT)) {
 326.815 +			if (!(md->attribute & EFI_MEMORY_WB))
 326.816 +				return 0;
 326.817 +
 326.818 +			if (*size > md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - phys_addr)
 326.819 +				*size = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - phys_addr;
 326.820 +			return 1;
 326.821 +		}
 326.822 +	}
 326.823 +	return 0;
 326.824 +}
 326.825 +
 326.826 +int __init
 326.827 +efi_uart_console_only(void)
 326.828 +{
 326.829 +	efi_status_t status;
 326.830 +	char *s, name[] = "ConOut";
 326.831 +	efi_guid_t guid = EFI_GLOBAL_VARIABLE_GUID;
 326.832 +	efi_char16_t *utf16, name_utf16[32];
 326.833 +	unsigned char data[1024];
 326.834 +	unsigned long size = sizeof(data);
 326.835 +	struct efi_generic_dev_path *hdr, *end_addr;
 326.836 +	int uart = 0;
 326.837 +
 326.838 +	/* Convert to UTF-16 */
 326.839 +	utf16 = name_utf16;
 326.840 +	s = name;
 326.841 +	while (*s)
 326.842 +		*utf16++ = *s++ & 0x7f;
 326.843 +	*utf16 = 0;
 326.844 +
 326.845 +	status = efi.get_variable(name_utf16, &guid, NULL, &size, data);
 326.846 +	if (status != EFI_SUCCESS) {
 326.847 +		printk(KERN_ERR "No EFI %s variable?\n", name);
 326.848 +		return 0;
 326.849 +	}
 326.850 +
 326.851 +	hdr = (struct efi_generic_dev_path *) data;
 326.852 +	end_addr = (struct efi_generic_dev_path *) ((u8 *) data + size);
 326.853 +	while (hdr < end_addr) {
 326.854 +		if (hdr->type == EFI_DEV_MSG &&
 326.855 +		    hdr->sub_type == EFI_DEV_MSG_UART)
 326.856 +			uart = 1;
 326.857 +		else if (hdr->type == EFI_DEV_END_PATH ||
 326.858 +			  hdr->type == EFI_DEV_END_PATH2) {
 326.859 +			if (!uart)
 326.860 +				return 0;
 326.861 +			if (hdr->sub_type == EFI_DEV_END_ENTIRE)
 326.862 +				return 1;
 326.863 +			uart = 0;
 326.864 +		}
 326.865 +		hdr = (struct efi_generic_dev_path *) ((u8 *) hdr + hdr->length);
 326.866 +	}
 326.867 +	printk(KERN_ERR "Malformed %s value\n", name);
 326.868 +	return 0;
 326.869 +}
   327.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
   327.2 +++ b/xen/arch/ia64/linux-xen/entry.S	Tue Aug 16 12:15:23 2005 +0800
   327.3 @@ -0,0 +1,1653 @@
   327.4 +/*
   327.5 + * ia64/kernel/entry.S
   327.6 + *
   327.7 + * Kernel entry points.
   327.8 + *
   327.9 + * Copyright (C) 1998-2003, 2005 Hewlett-Packard Co
  327.10 + *	David Mosberger-Tang <davidm@hpl.hp.com>
  327.11 + * Copyright (C) 1999, 2002-2003
  327.12 + *	Asit Mallick <Asit.K.Mallick@intel.com>
  327.13 + * 	Don Dugger <Don.Dugger@intel.com>
  327.14 + *	Suresh Siddha <suresh.b.siddha@intel.com>
  327.15 + *	Fenghua Yu <fenghua.yu@intel.com>
  327.16 + * Copyright (C) 1999 VA Linux Systems
  327.17 + * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
  327.18 + */
  327.19 +/*
  327.20 + * ia64_switch_to now places correct virtual mapping in in TR2 for
  327.21 + * kernel stack. This allows us to handle interrupts without changing
  327.22 + * to physical mode.
  327.23 + *
  327.24 + * Jonathan Nicklin	<nicklin@missioncriticallinux.com>
  327.25 + * Patrick O'Rourke	<orourke@missioncriticallinux.com>
  327.26 + * 11/07/2000
  327.27 + */
  327.28 +/*
  327.29 + * Global (preserved) predicate usage on syscall entry/exit path:
  327.30 + *
  327.31 + *	pKStk:		See entry.h.
  327.32 + *	pUStk:		See entry.h.
  327.33 + *	pSys:		See entry.h.
  327.34 + *	pNonSys:	!pSys
  327.35 + */
  327.36 +
  327.37 +#include <linux/config.h>
  327.38 +
  327.39 +#include <asm/asmmacro.h>
  327.40 +#include <asm/cache.h>
  327.41 +#include <asm/errno.h>
  327.42 +#include <asm/kregs.h>
  327.43 +#include <asm/offsets.h>
  327.44 +#include <asm/pgtable.h>
  327.45 +#include <asm/percpu.h>
  327.46 +#include <asm/processor.h>
  327.47 +#include <asm/thread_info.h>
  327.48 +#include <asm/unistd.h>
  327.49 +
  327.50 +#include "minstate.h"
  327.51 +
  327.52 +#ifndef XEN
  327.53 +	/*
  327.54 +	 * execve() is special because in case of success, we need to
  327.55 +	 * setup a null register window frame.
  327.56 +	 */
  327.57 +ENTRY(ia64_execve)
  327.58 +	/*
  327.59 +	 * Allocate 8 input registers since ptrace() may clobber them
  327.60 +	 */
  327.61 +	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
  327.62 +	alloc loc1=ar.pfs,8,2,4,0
  327.63 +	mov loc0=rp
  327.64 +	.body
  327.65 +	mov out0=in0			// filename
  327.66 +	;;				// stop bit between alloc and call
  327.67 +	mov out1=in1			// argv
  327.68 +	mov out2=in2			// envp
  327.69 +	add out3=16,sp			// regs
  327.70 +	br.call.sptk.many rp=sys_execve
  327.71 +.ret0:
  327.72 +#ifdef CONFIG_IA32_SUPPORT
  327.73 +	/*
  327.74 +	 * Check if we're returning to ia32 mode. If so, we need to restore ia32 registers
  327.75 +	 * from pt_regs.
  327.76 +	 */
  327.77 +	adds r16=PT(CR_IPSR)+16,sp
  327.78 +	;;
  327.79 +	ld8 r16=[r16]
  327.80 +#endif
  327.81 +	cmp4.ge p6,p7=r8,r0
  327.82 +	mov ar.pfs=loc1			// restore ar.pfs
  327.83 +	sxt4 r8=r8			// return 64-bit result
  327.84 +	;;
  327.85 +	stf.spill [sp]=f0
  327.86 +(p6)	cmp.ne pKStk,pUStk=r0,r0	// a successful execve() lands us in user-mode...
  327.87 +	mov rp=loc0
  327.88 +(p6)	mov ar.pfs=r0			// clear ar.pfs on success
  327.89 +(p7)	br.ret.sptk.many rp
  327.90 +
  327.91 +	/*
  327.92 +	 * In theory, we'd have to zap this state only to prevent leaking of
  327.93 +	 * security sensitive state (e.g., if current->mm->dumpable is zero).  However,
  327.94 +	 * this executes in less than 20 cycles even on Itanium, so it's not worth
  327.95 +	 * optimizing for...).
  327.96 +	 */
  327.97 +	mov ar.unat=0; 		mov ar.lc=0
  327.98 +	mov r4=0;		mov f2=f0;		mov b1=r0
  327.99 +	mov r5=0;		mov f3=f0;		mov b2=r0
 327.100 +	mov r6=0;		mov f4=f0;		mov b3=r0
 327.101 +	mov r7=0;		mov f5=f0;		mov b4=r0
 327.102 +	ldf.fill f12=[sp];	mov f13=f0;		mov b5=r0
 327.103 +	ldf.fill f14=[sp];	ldf.fill f15=[sp];	mov f16=f0
 327.104 +	ldf.fill f17=[sp];	ldf.fill f18=[sp];	mov f19=f0
 327.105 +	ldf.fill f20=[sp];	ldf.fill f21=[sp];	mov f22=f0
 327.106 +	ldf.fill f23=[sp];	ldf.fill f24=[sp];	mov f25=f0
 327.107 +	ldf.fill f26=[sp];	ldf.fill f27=[sp];	mov f28=f0
 327.108 +	ldf.fill f29=[sp];	ldf.fill f30=[sp];	mov f31=f0
 327.109 +#ifdef CONFIG_IA32_SUPPORT
 327.110 +	tbit.nz p6,p0=r16, IA64_PSR_IS_BIT
 327.111 +	movl loc0=ia64_ret_from_ia32_execve
 327.112 +	;;
 327.113 +(p6)	mov rp=loc0
 327.114 +#endif
 327.115 +	br.ret.sptk.many rp
 327.116 +END(ia64_execve)
 327.117 +
 327.118 +/*
 327.119 + * sys_clone2(u64 flags, u64 ustack_base, u64 ustack_size, u64 parent_tidptr, u64 child_tidptr,
 327.120 + *	      u64 tls)
 327.121 + */
 327.122 +GLOBAL_ENTRY(sys_clone2)
 327.123 +	/*
 327.124 +	 * Allocate 8 input registers since ptrace() may clobber them
 327.125 +	 */
 327.126 +	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
 327.127 +	alloc r16=ar.pfs,8,2,6,0
 327.128 +	DO_SAVE_SWITCH_STACK
 327.129 +	adds r2=PT(R16)+IA64_SWITCH_STACK_SIZE+16,sp
 327.130 +	mov loc0=rp
 327.131 +	mov loc1=r16				// save ar.pfs across do_fork
 327.132 +	.body
 327.133 +	mov out1=in1
 327.134 +	mov out3=in2
 327.135 +	tbit.nz p6,p0=in0,CLONE_SETTLS_BIT
 327.136 +	mov out4=in3	// parent_tidptr: valid only w/CLONE_PARENT_SETTID
 327.137 +	;;
 327.138 +(p6)	st8 [r2]=in5				// store TLS in r16 for copy_thread()
 327.139 +	mov out5=in4	// child_tidptr:  valid only w/CLONE_CHILD_SETTID or CLONE_CHILD_CLEARTID
 327.140 +	adds out2=IA64_SWITCH_STACK_SIZE+16,sp	// out2 = &regs
 327.141 +	mov out0=in0				// out0 = clone_flags
 327.142 +	br.call.sptk.many rp=do_fork
 327.143 +.ret1:	.restore sp
 327.144 +	adds sp=IA64_SWITCH_STACK_SIZE,sp	// pop the switch stack
 327.145 +	mov ar.pfs=loc1
 327.146 +	mov rp=loc0
 327.147 +	br.ret.sptk.many rp
 327.148 +END(sys_clone2)
 327.149 +
 327.150 +/*
 327.151 + * sys_clone(u64 flags, u64 ustack_base, u64 parent_tidptr, u64 child_tidptr, u64 tls)
 327.152 + *	Deprecated.  Use sys_clone2() instead.
 327.153 + */
 327.154 +GLOBAL_ENTRY(sys_clone)
 327.155 +	/*
 327.156 +	 * Allocate 8 input registers since ptrace() may clobber them
 327.157 +	 */
 327.158 +	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
 327.159 +	alloc r16=ar.pfs,8,2,6,0
 327.160 +	DO_SAVE_SWITCH_STACK
 327.161 +	adds r2=PT(R16)+IA64_SWITCH_STACK_SIZE+16,sp
 327.162 +	mov loc0=rp
 327.163 +	mov loc1=r16				// save ar.pfs across do_fork
 327.164 +	.body
 327.165 +	mov out1=in1
 327.166 +	mov out3=16				// stacksize (compensates for 16-byte scratch area)
 327.167 +	tbit.nz p6,p0=in0,CLONE_SETTLS_BIT
 327.168 +	mov out4=in2	// parent_tidptr: valid only w/CLONE_PARENT_SETTID
 327.169 +	;;
 327.170 +(p6)	st8 [r2]=in4				// store TLS in r13 (tp)
 327.171 +	mov out5=in3	// child_tidptr:  valid only w/CLONE_CHILD_SETTID or CLONE_CHILD_CLEARTID
 327.172 +	adds out2=IA64_SWITCH_STACK_SIZE+16,sp	// out2 = &regs
 327.173 +	mov out0=in0				// out0 = clone_flags
 327.174 +	br.call.sptk.many rp=do_fork
 327.175 +.ret2:	.restore sp
 327.176 +	adds sp=IA64_SWITCH_STACK_SIZE,sp	// pop the switch stack
 327.177 +	mov ar.pfs=loc1
 327.178 +	mov rp=loc0
 327.179 +	br.ret.sptk.many rp
 327.180 +END(sys_clone)
 327.181 +#endif /* !XEN */
 327.182 +
 327.183 +/*
 327.184 + * prev_task <- ia64_switch_to(struct task_struct *next)
 327.185 + *	With Ingo's new scheduler, interrupts are disabled when this routine gets
 327.186 + *	called.  The code starting at .map relies on this.  The rest of the code
 327.187 + *	doesn't care about the interrupt masking status.
 327.188 + */
 327.189 +GLOBAL_ENTRY(ia64_switch_to)
 327.190 +	.prologue
 327.191 +	alloc r16=ar.pfs,1,0,0,0
 327.192 +	DO_SAVE_SWITCH_STACK
 327.193 +	.body
 327.194 +
 327.195 +	adds r22=IA64_TASK_THREAD_KSP_OFFSET,r13
 327.196 +	movl r25=init_task
 327.197 +	mov r27=IA64_KR(CURRENT_STACK)
 327.198 +	adds r21=IA64_TASK_THREAD_KSP_OFFSET,in0
 327.199 +#ifdef XEN
 327.200 +	dep r20=0,in0,60,4		// physical address of "next"
 327.201 +#else
 327.202 +	dep r20=0,in0,61,3		// physical address of "next"
 327.203 +#endif
 327.204 +	;;
 327.205 +	st8 [r22]=sp			// save kernel stack pointer of old task
 327.206 +	shr.u r26=r20,IA64_GRANULE_SHIFT
 327.207 +	cmp.eq p7,p6=r25,in0
 327.208 +	;;
 327.209 +	/*
 327.210 +	 * If we've already mapped this task's page, we can skip doing it again.
 327.211 +	 */
 327.212 +(p6)	cmp.eq p7,p6=r26,r27
 327.213 +(p6)	br.cond.dpnt .map
 327.214 +	;;
 327.215 +.done:
 327.216 +(p6)	ssm psr.ic			// if we had to map, reenable the psr.ic bit FIRST!!!
 327.217 +	;;
 327.218 +(p6)	srlz.d
 327.219 +	ld8 sp=[r21]			// load kernel stack pointer of new task
 327.220 +	mov IA64_KR(CURRENT)=in0	// update "current" application register
 327.221 +	mov r8=r13			// return pointer to previously running task
 327.222 +	mov r13=in0			// set "current" pointer
 327.223 +	;;
 327.224 +	DO_LOAD_SWITCH_STACK
 327.225 +
 327.226 +#ifdef CONFIG_SMP
 327.227 +	sync.i				// ensure "fc"s done by this CPU are visible on other CPUs
 327.228 +#endif
 327.229 +	br.ret.sptk.many rp		// boogie on out in new context
 327.230 +
 327.231 +.map:
 327.232 +#ifdef XEN
 327.233 +	// avoid overlapping with kernel TR
 327.234 +	movl r25=KERNEL_START
 327.235 +	dep  r23=0,in0,0,KERNEL_TR_PAGE_SHIFT
 327.236 +	;;
 327.237 +	cmp.eq p7,p0=r25,r23
 327.238 +	;;
 327.239 +(p7)	mov IA64_KR(CURRENT_STACK)=r26	// remember last page we mapped...
 327.240 +(p7)	br.cond.sptk .done
 327.241 +#endif
 327.242 +	rsm psr.ic			// interrupts (psr.i) are already disabled here
 327.243 +	movl r25=PAGE_KERNEL
 327.244 +	;;
 327.245 +	srlz.d
 327.246 +	or r23=r25,r20			// construct PA | page properties
 327.247 +	mov r25=IA64_GRANULE_SHIFT<<2
 327.248 +	;;
 327.249 +	mov cr.itir=r25
 327.250 +	mov cr.ifa=in0			// VA of next task...
 327.251 +	;;
 327.252 +	mov r25=IA64_TR_CURRENT_STACK
 327.253 +	mov IA64_KR(CURRENT_STACK)=r26	// remember last page we mapped...
 327.254 +	;;
 327.255 +	itr.d dtr[r25]=r23		// wire in new mapping...
 327.256 +	br.cond.sptk .done
 327.257 +END(ia64_switch_to)
 327.258 +
 327.259 +/*
 327.260 + * Note that interrupts are enabled during save_switch_stack and load_switch_stack.  This
 327.261 + * means that we may get an interrupt with "sp" pointing to the new kernel stack while
 327.262 + * ar.bspstore is still pointing to the old kernel backing store area.  Since ar.rsc,
 327.263 + * ar.rnat, ar.bsp, and ar.bspstore are all preserved by interrupts, this is not a
 327.264 + * problem.  Also, we don't need to specify unwind information for preserved registers
 327.265 + * that are not modified in save_switch_stack as the right unwind information is already
 327.266 + * specified at the call-site of save_switch_stack.
 327.267 + */
 327.268 +
 327.269 +/*
 327.270 + * save_switch_stack:
 327.271 + *	- r16 holds ar.pfs
 327.272 + *	- b7 holds address to return to
 327.273 + *	- rp (b0) holds return address to save
 327.274 + */
 327.275 +GLOBAL_ENTRY(save_switch_stack)
 327.276 +	.prologue
 327.277 +	.altrp b7
 327.278 +	flushrs			// flush dirty regs to backing store (must be first in insn group)
 327.279 +	.save @priunat,r17
 327.280 +	mov r17=ar.unat		// preserve caller's
 327.281 +	.body
 327.282 +#ifdef CONFIG_ITANIUM
 327.283 +	adds r2=16+128,sp
 327.284 +	adds r3=16+64,sp
 327.285 +	adds r14=SW(R4)+16,sp
 327.286 +	;;
 327.287 +	st8.spill [r14]=r4,16		// spill r4
 327.288 +	lfetch.fault.excl.nt1 [r3],128
 327.289 +	;;
 327.290 +	lfetch.fault.excl.nt1 [r2],128
 327.291 +	lfetch.fault.excl.nt1 [r3],128
 327.292 +	;;
 327.293 +	lfetch.fault.excl [r2]
 327.294 +	lfetch.fault.excl [r3]
 327.295 +	adds r15=SW(R5)+16,sp
 327.296 +#else
 327.297 +	add r2=16+3*128,sp
 327.298 +	add r3=16,sp
 327.299 +	add r14=SW(R4)+16,sp
 327.300 +	;;
 327.301 +	st8.spill [r14]=r4,SW(R6)-SW(R4)	// spill r4 and prefetch offset 0x1c0
 327.302 +	lfetch.fault.excl.nt1 [r3],128	//		prefetch offset 0x010
 327.303 +	;;
 327.304 +	lfetch.fault.excl.nt1 [r3],128	//		prefetch offset 0x090
 327.305 +	lfetch.fault.excl.nt1 [r2],128	//		prefetch offset 0x190
 327.306 +	;;
 327.307 +	lfetch.fault.excl.nt1 [r3]	//		prefetch offset 0x110
 327.308 +	lfetch.fault.excl.nt1 [r2]	//		prefetch offset 0x210
 327.309 +	adds r15=SW(R5)+16,sp
 327.310 +#endif
 327.311 +	;;
 327.312 +	st8.spill [r15]=r5,SW(R7)-SW(R5)	// spill r5
 327.313 +	mov.m ar.rsc=0			// put RSE in mode: enforced lazy, little endian, pl 0
 327.314 +	add r2=SW(F2)+16,sp		// r2 = &sw->f2
 327.315 +	;;
 327.316 +	st8.spill [r14]=r6,SW(B0)-SW(R6)	// spill r6
 327.317 +	mov.m r18=ar.fpsr		// preserve fpsr
 327.318 +	add r3=SW(F3)+16,sp		// r3 = &sw->f3
 327.319 +	;;
 327.320 +	stf.spill [r2]=f2,32
 327.321 +	mov.m r19=ar.rnat
 327.322 +	mov r21=b0
 327.323 +
 327.324 +	stf.spill [r3]=f3,32
 327.325 +	st8.spill [r15]=r7,SW(B2)-SW(R7)	// spill r7
 327.326 +	mov r22=b1
 327.327 +	;;
 327.328 +	// since we're done with the spills, read and save ar.unat:
 327.329 +	mov.m r29=ar.unat
 327.330 +	mov.m r20=ar.bspstore
 327.331 +	mov r23=b2
 327.332 +	stf.spill [r2]=f4,32
 327.333 +	stf.spill [r3]=f5,32
 327.334 +	mov r24=b3
 327.335 +	;;
 327.336 +	st8 [r14]=r21,SW(B1)-SW(B0)		// save b0
 327.337 +	st8 [r15]=r23,SW(B3)-SW(B2)		// save b2
 327.338 +	mov r25=b4
 327.339 +	mov r26=b5
 327.340 +	;;
 327.341 +	st8 [r14]=r22,SW(B4)-SW(B1)		// save b1
 327.342 +	st8 [r15]=r24,SW(AR_PFS)-SW(B3)		// save b3
 327.343 +	mov r21=ar.lc		// I-unit
 327.344 +	stf.spill [r2]=f12,32
 327.345 +	stf.spill [r3]=f13,32
 327.346 +	;;
 327.347 +	st8 [r14]=r25,SW(B5)-SW(B4)		// save b4
 327.348 +	st8 [r15]=r16,SW(AR_LC)-SW(AR_PFS)	// save ar.pfs
 327.349 +	stf.spill [r2]=f14,32
 327.350 +	stf.spill [r3]=f15,32
 327.351 +	;;
 327.352 +	st8 [r14]=r26				// save b5
 327.353 +	st8 [r15]=r21				// save ar.lc
 327.354 +	stf.spill [r2]=f16,32
 327.355 +	stf.spill [r3]=f17,32
 327.356 +	;;
 327.357 +	stf.spill [r2]=f18,32
 327.358 +	stf.spill [r3]=f19,32
 327.359 +	;;
 327.360 +	stf.spill [r2]=f20,32
 327.361 +	stf.spill [r3]=f21,32
 327.362 +	;;
 327.363 +	stf.spill [r2]=f22,32
 327.364 +	stf.spill [r3]=f23,32
 327.365 +	;;
 327.366 +	stf.spill [r2]=f24,32
 327.367 +	stf.spill [r3]=f25,32
 327.368 +	;;
 327.369 +	stf.spill [r2]=f26,32
 327.370 +	stf.spill [r3]=f27,32
 327.371 +	;;
 327.372 +	stf.spill [r2]=f28,32
 327.373 +	stf.spill [r3]=f29,32
 327.374 +	;;
 327.375 +	stf.spill [r2]=f30,SW(AR_UNAT)-SW(F30)
 327.376 +	stf.spill [r3]=f31,SW(PR)-SW(F31)
 327.377 +	add r14=SW(CALLER_UNAT)+16,sp
 327.378 +	;;
 327.379 +	st8 [r2]=r29,SW(AR_RNAT)-SW(AR_UNAT)	// save ar.unat
 327.380 +	st8 [r14]=r17,SW(AR_FPSR)-SW(CALLER_UNAT) // save caller_unat
 327.381 +	mov r21=pr
 327.382 +	;;
 327.383 +	st8 [r2]=r19,SW(AR_BSPSTORE)-SW(AR_RNAT) // save ar.rnat
 327.384 +	st8 [r3]=r21				// save predicate registers
 327.385 +	;;
 327.386 +	st8 [r2]=r20				// save ar.bspstore
 327.387 +	st8 [r14]=r18				// save fpsr
 327.388 +	mov ar.rsc=3		// put RSE back into eager mode, pl 0
 327.389 +	br.cond.sptk.many b7
 327.390 +END(save_switch_stack)
 327.391 +
 327.392 +/*
 327.393 + * load_switch_stack:
 327.394 + *	- "invala" MUST be done at call site (normally in DO_LOAD_SWITCH_STACK)
 327.395 + *	- b7 holds address to return to
 327.396 + *	- must not touch r8-r11
 327.397 + */
 327.398 +#ifdef XEN
 327.399 +GLOBAL_ENTRY(load_switch_stack)
 327.400 +#else
 327.401 +ENTRY(load_switch_stack)
 327.402 +#endif
 327.403 +	.prologue
 327.404 +	.altrp b7
 327.405 +
 327.406 +	.body
 327.407 +	lfetch.fault.nt1 [sp]
 327.408 +	adds r2=SW(AR_BSPSTORE)+16,sp
 327.409 +	adds r3=SW(AR_UNAT)+16,sp
 327.410 +	mov ar.rsc=0						// put RSE into enforced lazy mode
 327.411 +	adds r14=SW(CALLER_UNAT)+16,sp
 327.412 +	adds r15=SW(AR_FPSR)+16,sp
 327.413 +	;;
 327.414 +	ld8 r27=[r2],(SW(B0)-SW(AR_BSPSTORE))	// bspstore
 327.415 +	ld8 r29=[r3],(SW(B1)-SW(AR_UNAT))	// unat
 327.416 +	;;
 327.417 +	ld8 r21=[r2],16		// restore b0
 327.418 +	ld8 r22=[r3],16		// restore b1
 327.419 +	;;
 327.420 +	ld8 r23=[r2],16		// restore b2
 327.421 +	ld8 r24=[r3],16		// restore b3
 327.422 +	;;
 327.423 +	ld8 r25=[r2],16		// restore b4
 327.424 +	ld8 r26=[r3],16		// restore b5
 327.425 +	;;
 327.426 +	ld8 r16=[r2],(SW(PR)-SW(AR_PFS))	// ar.pfs
 327.427 +	ld8 r17=[r3],(SW(AR_RNAT)-SW(AR_LC))	// ar.lc
 327.428 +	;;
 327.429 +	ld8 r28=[r2]		// restore pr
 327.430 +	ld8 r30=[r3]		// restore rnat
 327.431 +	;;
 327.432 +	ld8 r18=[r14],16	// restore caller's unat
 327.433 +	ld8 r19=[r15],24	// restore fpsr
 327.434 +	;;
 327.435 +	ldf.fill f2=[r14],32
 327.436 +	ldf.fill f3=[r15],32
 327.437 +	;;
 327.438 +	ldf.fill f4=[r14],32
 327.439 +	ldf.fill f5=[r15],32
 327.440 +	;;
 327.441 +	ldf.fill f12=[r14],32
 327.442 +	ldf.fill f13=[r15],32
 327.443 +	;;
 327.444 +	ldf.fill f14=[r14],32
 327.445 +	ldf.fill f15=[r15],32
 327.446 +	;;
 327.447 +	ldf.fill f16=[r14],32
 327.448 +	ldf.fill f17=[r15],32
 327.449 +	;;
 327.450 +	ldf.fill f18=[r14],32
 327.451 +	ldf.fill f19=[r15],32
 327.452 +	mov b0=r21
 327.453 +	;;
 327.454 +	ldf.fill f20=[r14],32
 327.455 +	ldf.fill f21=[r15],32
 327.456 +	mov b1=r22
 327.457 +	;;
 327.458 +	ldf.fill f22=[r14],32
 327.459 +	ldf.fill f23=[r15],32
 327.460 +	mov b2=r23
 327.461 +	;;
 327.462 +	mov ar.bspstore=r27
 327.463 +	mov ar.unat=r29		// establish unat holding the NaT bits for r4-r7
 327.464 +	mov b3=r24
 327.465 +	;;
 327.466 +	ldf.fill f24=[r14],32
 327.467 +	ldf.fill f25=[r15],32
 327.468 +	mov b4=r25
 327.469 +	;;
 327.470 +	ldf.fill f26=[r14],32
 327.471 +	ldf.fill f27=[r15],32
 327.472 +	mov b5=r26
 327.473 +	;;
 327.474 +	ldf.fill f28=[r14],32
 327.475 +	ldf.fill f29=[r15],32
 327.476 +	mov ar.pfs=r16
 327.477 +	;;
 327.478 +	ldf.fill f30=[r14],32
 327.479 +	ldf.fill f31=[r15],24
 327.480 +	mov ar.lc=r17
 327.481 +	;;
 327.482 +	ld8.fill r4=[r14],16
 327.483 +	ld8.fill r5=[r15],16
 327.484 +	mov pr=r28,-1
 327.485 +	;;
 327.486 +	ld8.fill r6=[r14],16
 327.487 +	ld8.fill r7=[r15],16
 327.488 +
 327.489 +	mov ar.unat=r18				// restore caller's unat
 327.490 +	mov ar.rnat=r30				// must restore after bspstore but before rsc!
 327.491 +	mov ar.fpsr=r19				// restore fpsr
 327.492 +	mov ar.rsc=3				// put RSE back into eager mode, pl 0
 327.493 +	br.cond.sptk.many b7
 327.494 +END(load_switch_stack)
 327.495 +
 327.496 +#ifndef XEN
 327.497 +GLOBAL_ENTRY(__ia64_syscall)
 327.498 +	.regstk 6,0,0,0
 327.499 +	mov r15=in5				// put syscall number in place
 327.500 +	break __BREAK_SYSCALL
 327.501 +	movl r2=errno
 327.502 +	cmp.eq p6,p7=-1,r10
 327.503 +	;;
 327.504 +(p6)	st4 [r2]=r8
 327.505 +(p6)	mov r8=-1
 327.506 +	br.ret.sptk.many rp
 327.507 +END(__ia64_syscall)
 327.508 +
 327.509 +GLOBAL_ENTRY(execve)
 327.510 +	mov r15=__NR_execve			// put syscall number in place
 327.511 +	break __BREAK_SYSCALL
 327.512 +	br.ret.sptk.many rp
 327.513 +END(execve)
 327.514 +
 327.515 +GLOBAL_ENTRY(clone)
 327.516 +	mov r15=__NR_clone			// put syscall number in place
 327.517 +	break __BREAK_SYSCALL
 327.518 +	br.ret.sptk.many rp
 327.519 +END(clone)
 327.520 +
 327.521 +	/*
 327.522 +	 * Invoke a system call, but do some tracing before and after the call.
 327.523 +	 * We MUST preserve the current register frame throughout this routine
 327.524 +	 * because some system calls (such as ia64_execve) directly
 327.525 +	 * manipulate ar.pfs.
 327.526 +	 */
 327.527 +GLOBAL_ENTRY(ia64_trace_syscall)
 327.528 +	PT_REGS_UNWIND_INFO(0)
 327.529 +	/*
 327.530 +	 * We need to preserve the scratch registers f6-f11 in case the system
 327.531 +	 * call is sigreturn.
 327.532 +	 */
 327.533 +	adds r16=PT(F6)+16,sp
 327.534 +	adds r17=PT(F7)+16,sp
 327.535 +	;;
 327.536 + 	stf.spill [r16]=f6,32
 327.537 + 	stf.spill [r17]=f7,32
 327.538 +	;;
 327.539 + 	stf.spill [r16]=f8,32
 327.540 + 	stf.spill [r17]=f9,32
 327.541 +	;;
 327.542 + 	stf.spill [r16]=f10
 327.543 + 	stf.spill [r17]=f11
 327.544 +	br.call.sptk.many rp=syscall_trace_enter // give parent a chance to catch syscall args
 327.545 +	adds r16=PT(F6)+16,sp
 327.546 +	adds r17=PT(F7)+16,sp
 327.547 +	;;
 327.548 +	ldf.fill f6=[r16],32
 327.549 +	ldf.fill f7=[r17],32
 327.550 +	;;
 327.551 +	ldf.fill f8=[r16],32
 327.552 +	ldf.fill f9=[r17],32
 327.553 +	;;
 327.554 +	ldf.fill f10=[r16]
 327.555 +	ldf.fill f11=[r17]
 327.556 +	// the syscall number may have changed, so re-load it and re-calculate the
 327.557 +	// syscall entry-point:
 327.558 +	adds r15=PT(R15)+16,sp			// r15 = &pt_regs.r15 (syscall #)
 327.559 +	;;
 327.560 +	ld8 r15=[r15]
 327.561 +	mov r3=NR_syscalls - 1
 327.562 +	;;
 327.563 +	adds r15=-1024,r15
 327.564 +	movl r16=sys_call_table
 327.565 +	;;
 327.566 +	shladd r20=r15,3,r16			// r20 = sys_call_table + 8*(syscall-1024)
 327.567 +	cmp.leu p6,p7=r15,r3
 327.568 +	;;
 327.569 +(p6)	ld8 r20=[r20]				// load address of syscall entry point
 327.570 +(p7)	movl r20=sys_ni_syscall
 327.571 +	;;
 327.572 +	mov b6=r20
 327.573 +	br.call.sptk.many rp=b6			// do the syscall
 327.574 +.strace_check_retval:
 327.575 +	cmp.lt p6,p0=r8,r0			// syscall failed?
 327.576 +	adds r2=PT(R8)+16,sp			// r2 = &pt_regs.r8
 327.577 +	adds r3=PT(R10)+16,sp			// r3 = &pt_regs.r10
 327.578 +	mov r10=0
 327.579 +(p6)	br.cond.sptk strace_error		// syscall failed ->
 327.580 +	;;					// avoid RAW on r10
 327.581 +.strace_save_retval:
 327.582 +.mem.offset 0,0; st8.spill [r2]=r8		// store return value in slot for r8
 327.583 +.mem.offset 8,0; st8.spill [r3]=r10		// clear error indication in slot for r10
 327.584 +	br.call.sptk.many rp=syscall_trace_leave // give parent a chance to catch return value
 327.585 +.ret3:	br.cond.sptk .work_pending_syscall_end
 327.586 +
 327.587 +strace_error:
 327.588 +	ld8 r3=[r2]				// load pt_regs.r8
 327.589 +	sub r9=0,r8				// negate return value to get errno value
 327.590 +	;;
 327.591 +	cmp.ne p6,p0=r3,r0			// is pt_regs.r8!=0?
 327.592 +	adds r3=16,r2				// r3=&pt_regs.r10
 327.593 +	;;
 327.594 +(p6)	mov r10=-1
 327.595 +(p6)	mov r8=r9
 327.596 +	br.cond.sptk .strace_save_retval
 327.597 +END(ia64_trace_syscall)
 327.598 +
 327.599 +	/*
 327.600 +	 * When traced and returning from sigreturn, we invoke syscall_trace but then
 327.601 +	 * go straight to ia64_leave_kernel rather than ia64_leave_syscall.
 327.602 +	 */
 327.603 +GLOBAL_ENTRY(ia64_strace_leave_kernel)
 327.604 +	PT_REGS_UNWIND_INFO(0)
 327.605 +{	/*
 327.606 +	 * Some versions of gas generate bad unwind info if the first instruction of a
 327.607 +	 * procedure doesn't go into the first slot of a bundle.  This is a workaround.
 327.608 +	 */
 327.609 +	nop.m 0
 327.610 +	nop.i 0
 327.611 +	br.call.sptk.many rp=syscall_trace_leave // give parent a chance to catch return value
 327.612 +}
 327.613 +.ret4:	br.cond.sptk ia64_leave_kernel
 327.614 +END(ia64_strace_leave_kernel)
 327.615 +#endif
 327.616 +
 327.617 +GLOBAL_ENTRY(ia64_ret_from_clone)
 327.618 +	PT_REGS_UNWIND_INFO(0)
 327.619 +{	/*
 327.620 +	 * Some versions of gas generate bad unwind info if the first instruction of a
 327.621 +	 * procedure doesn't go into the first slot of a bundle.  This is a workaround.
 327.622 +	 */
 327.623 +	nop.m 0
 327.624 +	nop.i 0
 327.625 +	/*
 327.626 +	 * We need to call schedule_tail() to complete the scheduling process.
 327.627 +	 * Called by ia64_switch_to() after do_fork()->copy_thread().  r8 contains the
 327.628 +	 * address of the previously executing task.
 327.629 +	 */
 327.630 +	br.call.sptk.many rp=ia64_invoke_schedule_tail
 327.631 +}
 327.632 +#ifdef XEN
 327.633 +	// new domains are cloned but not exec'ed so switch to user mode here
 327.634 +	cmp.ne pKStk,pUStk=r0,r0
 327.635 +#ifdef CONFIG_VTI
 327.636 +	br.cond.spnt ia64_leave_hypervisor
 327.637 +#else // CONFIG_VTI
 327.638 +	br.cond.spnt ia64_leave_kernel
 327.639 +#endif // CONFIG_VTI
 327.640 +#else
 327.641 +.ret8:
 327.642 +	adds r2=TI_FLAGS+IA64_TASK_SIZE,r13
 327.643 +	;;
 327.644 +	ld4 r2=[r2]
 327.645 +	;;
 327.646 +	mov r8=0
 327.647 +	and r2=_TIF_SYSCALL_TRACEAUDIT,r2
 327.648 +	;;
 327.649 +	cmp.ne p6,p0=r2,r0
 327.650 +(p6)	br.cond.spnt .strace_check_retval
 327.651 +#endif
 327.652 +	;;					// added stop bits to prevent r8 dependency
 327.653 +END(ia64_ret_from_clone)
 327.654 +	// fall through
 327.655 +GLOBAL_ENTRY(ia64_ret_from_syscall)
 327.656 +	PT_REGS_UNWIND_INFO(0)
 327.657 +	cmp.ge p6,p7=r8,r0			// syscall executed successfully?
 327.658 +	adds r2=PT(R8)+16,sp			// r2 = &pt_regs.r8
 327.659 +	mov r10=r0				// clear error indication in r10
 327.660 +(p7)	br.cond.spnt handle_syscall_error	// handle potential syscall failure
 327.661 +END(ia64_ret_from_syscall)
 327.662 +	// fall through
 327.663 +/*
 327.664 + * ia64_leave_syscall(): Same as ia64_leave_kernel, except that it doesn't
 327.665 + *	need to switch to bank 0 and doesn't restore the scratch registers.
 327.666 + *	To avoid leaking kernel bits, the scratch registers are set to
 327.667 + *	the following known-to-be-safe values:
 327.668 + *
 327.669 + *		  r1: restored (global pointer)
 327.670 + *		  r2: cleared
 327.671 + *		  r3: 1 (when returning to user-level)
 327.672 + *	      r8-r11: restored (syscall return value(s))
 327.673 + *		 r12: restored (user-level stack pointer)
 327.674 + *		 r13: restored (user-level thread pointer)
 327.675 + *		 r14: cleared
 327.676 + *		 r15: restored (syscall #)
 327.677 + *	     r16-r17: cleared
 327.678 + *		 r18: user-level b6
 327.679 + *		 r19: cleared
 327.680 + *		 r20: user-level ar.fpsr
 327.681 + *		 r21: user-level b0
 327.682 + *		 r22: cleared
 327.683 + *		 r23: user-level ar.bspstore
 327.684 + *		 r24: user-level ar.rnat
 327.685 + *		 r25: user-level ar.unat
 327.686 + *		 r26: user-level ar.pfs
 327.687 + *		 r27: user-level ar.rsc
 327.688 + *		 r28: user-level ip
 327.689 + *		 r29: user-level psr
 327.690 + *		 r30: user-level cfm
 327.691 + *		 r31: user-level pr
 327.692 + *	      f6-f11: cleared
 327.693 + *		  pr: restored (user-level pr)
 327.694 + *		  b0: restored (user-level rp)
 327.695 + *	          b6: restored
 327.696 + *		  b7: cleared
 327.697 + *	     ar.unat: restored (user-level ar.unat)
 327.698 + *	      ar.pfs: restored (user-level ar.pfs)
 327.699 + *	      ar.rsc: restored (user-level ar.rsc)
 327.700 + *	     ar.rnat: restored (user-level ar.rnat)
 327.701 + *	 ar.bspstore: restored (user-level ar.bspstore)
 327.702 + *	     ar.fpsr: restored (user-level ar.fpsr)
 327.703 + *	      ar.ccv: cleared
 327.704 + *	      ar.csd: cleared
 327.705 + *	      ar.ssd: cleared
 327.706 + */
 327.707 +ENTRY(ia64_leave_syscall)
 327.708 +	PT_REGS_UNWIND_INFO(0)
 327.709 +	/*
 327.710 +	 * work.need_resched etc. mustn't get changed by this CPU before it returns to
 327.711 +	 * user- or fsys-mode, hence we disable interrupts early on.
 327.712 +	 *
 327.713 +	 * p6 controls whether current_thread_info()->flags needs to be check for
 327.714 +	 * extra work.  We always check for extra work when returning to user-level.
 327.715 +	 * With CONFIG_PREEMPT, we also check for extra work when the preempt_count
 327.716 +	 * is 0.  After extra work processing has been completed, execution
 327.717 +	 * resumes at .work_processed_syscall with p6 set to 1 if the extra-work-check
 327.718 +	 * needs to be redone.
 327.719 +	 */
 327.720 +#ifdef CONFIG_PREEMPT
 327.721 +	rsm psr.i				// disable interrupts
 327.722 +	cmp.eq pLvSys,p0=r0,r0			// pLvSys=1: leave from syscall
 327.723 +(pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13
 327.724 +	;;
 327.725 +	.pred.rel.mutex pUStk,pKStk
 327.726 +(pKStk) ld4 r21=[r20]			// r21 <- preempt_count
 327.727 +(pUStk)	mov r21=0			// r21 <- 0
 327.728 +	;;
 327.729 +	cmp.eq p6,p0=r21,r0		// p6 <- pUStk || (preempt_count == 0)
 327.730 +#else /* !CONFIG_PREEMPT */
 327.731 +(pUStk)	rsm psr.i
 327.732 +	cmp.eq pLvSys,p0=r0,r0		// pLvSys=1: leave from syscall
 327.733 +(pUStk)	cmp.eq.unc p6,p0=r0,r0		// p6 <- pUStk
 327.734 +#endif
 327.735 +.work_processed_syscall:
 327.736 +	adds r2=PT(LOADRS)+16,r12
 327.737 +	adds r3=PT(AR_BSPSTORE)+16,r12
 327.738 +#ifdef XEN
 327.739 +	;;
 327.740 +#else
 327.741 +	adds r18=TI_FLAGS+IA64_TASK_SIZE,r13
 327.742 +	;;
 327.743 +(p6)	ld4 r31=[r18]				// load current_thread_info()->flags
 327.744 +#endif
 327.745 +	ld8 r19=[r2],PT(B6)-PT(LOADRS)		// load ar.rsc value for "loadrs"
 327.746 +	mov b7=r0		// clear b7
 327.747 +	;;
 327.748 +	ld8 r23=[r3],PT(R11)-PT(AR_BSPSTORE)	// load ar.bspstore (may be garbage)
 327.749 +	ld8 r18=[r2],PT(R9)-PT(B6)		// load b6
 327.750 +#ifndef XEN
 327.751 +(p6)	and r15=TIF_WORK_MASK,r31		// any work other than TIF_SYSCALL_TRACE?
 327.752 +#endif
 327.753 +	;;
 327.754 +	mov r16=ar.bsp				// M2  get existing backing store pointer
 327.755 +#ifndef XEN
 327.756 +(p6)	cmp4.ne.unc p6,p0=r15, r0		// any special work pending?
 327.757 +(p6)	br.cond.spnt .work_pending_syscall
 327.758 +#endif
 327.759 +	;;
 327.760 +	// start restoring the state saved on the kernel stack (struct pt_regs):
 327.761 +	ld8 r9=[r2],PT(CR_IPSR)-PT(R9)
 327.762 +	ld8 r11=[r3],PT(CR_IIP)-PT(R11)
 327.763 +	mov f6=f0		// clear f6
 327.764 +	;;
 327.765 +	invala			// M0|1 invalidate ALAT
 327.766 +	rsm psr.i | psr.ic	// M2 initiate turning off of interrupt and interruption collection
 327.767 +	mov f9=f0		// clear f9
 327.768 +
 327.769 +	ld8 r29=[r2],16		// load cr.ipsr
 327.770 +	ld8 r28=[r3],16			// load cr.iip
 327.771 +	mov f8=f0		// clear f8
 327.772 +	;;
 327.773 +	ld8 r30=[r2],16		// M0|1 load cr.ifs
 327.774 +	mov.m ar.ssd=r0		// M2 clear ar.ssd
 327.775 +	cmp.eq p9,p0=r0,r0	// set p9 to indicate that we should restore cr.ifs
 327.776 +	;;
 327.777 +	ld8 r25=[r3],16		// M0|1 load ar.unat
 327.778 +	mov.m ar.csd=r0		// M2 clear ar.csd
 327.779 +	mov r22=r0		// clear r22
 327.780 +	;;
 327.781 +	ld8 r26=[r2],PT(B0)-PT(AR_PFS)	// M0|1 load ar.pfs
 327.782 +(pKStk)	mov r22=psr		// M2 read PSR now that interrupts are disabled
 327.783 +	mov f10=f0		// clear f10
 327.784 +	;;
 327.785 +	ld8 r21=[r2],PT(AR_RNAT)-PT(B0) // load b0
 327.786 +	ld8 r27=[r3],PT(PR)-PT(AR_RSC)	// load ar.rsc
 327.787 +	mov f11=f0		// clear f11
 327.788 +	;;
 327.789 +	ld8 r24=[r2],PT(AR_FPSR)-PT(AR_RNAT)	// load ar.rnat (may be garbage)
 327.790 +	ld8 r31=[r3],PT(R1)-PT(PR)		// load predicates
 327.791 +(pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
 327.792 +	;;
 327.793 +	ld8 r20=[r2],PT(R12)-PT(AR_FPSR)	// load ar.fpsr
 327.794 +	ld8.fill r1=[r3],16	// load r1
 327.795 +(pUStk) mov r17=1
 327.796 +	;;
 327.797 +	srlz.d			// M0  ensure interruption collection is off
 327.798 +	ld8.fill r13=[r3],16
 327.799 +	mov f7=f0		// clear f7
 327.800 +	;;
 327.801 +	ld8.fill r12=[r2]	// restore r12 (sp)
 327.802 +	ld8.fill r15=[r3]	// restore r15
 327.803 +#ifdef XEN
 327.804 +	movl r3=THIS_CPU(ia64_phys_stacked_size_p8)
 327.805 +#else
 327.806 +	addl r3=THIS_CPU(ia64_phys_stacked_size_p8),r0
 327.807 +#endif
 327.808 +	;;
 327.809 +(pUStk)	ld4 r3=[r3]		// r3 = cpu_data->phys_stacked_size_p8
 327.810 +(pUStk) st1 [r14]=r17
 327.811 +	mov b6=r18		// I0  restore b6
 327.812 +	;;
 327.813 +	mov r14=r0		// clear r14
 327.814 +	shr.u r18=r19,16	// I0|1 get byte size of existing "dirty" partition
 327.815 +(pKStk) br.cond.dpnt.many skip_rbs_switch
 327.816 +
 327.817 +	mov.m ar.ccv=r0		// clear ar.ccv
 327.818 +(pNonSys) br.cond.dpnt.many dont_preserve_current_frame
 327.819 +	br.cond.sptk.many rbs_switch
 327.820 +END(ia64_leave_syscall)
 327.821 +
 327.822 +#ifdef CONFIG_IA32_SUPPORT
 327.823 +GLOBAL_ENTRY(ia64_ret_from_ia32_execve)
 327.824 +	PT_REGS_UNWIND_INFO(0)
 327.825 +	adds r2=PT(R8)+16,sp			// r2 = &pt_regs.r8
 327.826 +	adds r3=PT(R10)+16,sp			// r3 = &pt_regs.r10
 327.827 +	;;
 327.828 +	.mem.offset 0,0
 327.829 +	st8.spill [r2]=r8	// store return value in slot for r8 and set unat bit
 327.830 +	.mem.offset 8,0
 327.831 +	st8.spill [r3]=r0	// clear error indication in slot for r10 and set unat bit
 327.832 +END(ia64_ret_from_ia32_execve_syscall)
 327.833 +	// fall through
 327.834 +#endif /* CONFIG_IA32_SUPPORT */
 327.835 +GLOBAL_ENTRY(ia64_leave_kernel)
 327.836 +	PT_REGS_UNWIND_INFO(0)
 327.837 +	/*
 327.838 +	 * work.need_resched etc. mustn't get changed by this CPU before it returns to
 327.839 +	 * user- or fsys-mode, hence we disable interrupts early on.
 327.840 +	 *
 327.841 +	 * p6 controls whether current_thread_info()->flags needs to be check for
 327.842 +	 * extra work.  We always check for extra work when returning to user-level.
 327.843 +	 * With CONFIG_PREEMPT, we also check for extra work when the preempt_count
 327.844 +	 * is 0.  After extra work processing has been completed, execution
 327.845 +	 * resumes at .work_processed_syscall with p6 set to 1 if the extra-work-check
 327.846 +	 * needs to be redone.
 327.847 +	 */
 327.848 +#ifdef CONFIG_PREEMPT
 327.849 +	rsm psr.i				// disable interrupts
 327.850 +	cmp.eq p0,pLvSys=r0,r0			// pLvSys=0: leave from kernel
 327.851 +(pKStk)	adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13
 327.852 +	;;
 327.853 +	.pred.rel.mutex pUStk,pKStk
 327.854 +(pKStk)	ld4 r21=[r20]			// r21 <- preempt_count
 327.855 +(pUStk)	mov r21=0			// r21 <- 0
 327.856 +	;;
 327.857 +	cmp.eq p6,p0=r21,r0		// p6 <- pUStk || (preempt_count == 0)
 327.858 +#else
 327.859 +(pUStk)	rsm psr.i
 327.860 +	cmp.eq p0,pLvSys=r0,r0		// pLvSys=0: leave from kernel
 327.861 +(pUStk)	cmp.eq.unc p6,p0=r0,r0		// p6 <- pUStk
 327.862 +#endif
 327.863 +.work_processed_kernel:
 327.864 +#ifdef XEN
 327.865 +	alloc loc0=ar.pfs,0,1,1,0
 327.866 +	adds out0=16,r12
 327.867 +	;;
 327.868 +(p6)	br.call.sptk.many b0=deliver_pending_interrupt
 327.869 +	mov ar.pfs=loc0
 327.870 +	mov r31=r0
 327.871 +#else
 327.872 +	adds r17=TI_FLAGS+IA64_TASK_SIZE,r13
 327.873 +	;;
 327.874 +(p6)	ld4 r31=[r17]				// load current_thread_info()->flags
 327.875 +#endif
 327.876 +	adds r21=PT(PR)+16,r12
 327.877 +	;;
 327.878 +
 327.879 +	lfetch [r21],PT(CR_IPSR)-PT(PR)
 327.880 +	adds r2=PT(B6)+16,r12
 327.881 +	adds r3=PT(R16)+16,r12
 327.882 +	;;
 327.883 +	lfetch [r21]
 327.884 +	ld8 r28=[r2],8		// load b6
 327.885 +	adds r29=PT(R24)+16,r12
 327.886 +
 327.887 +	ld8.fill r16=[r3]
 327.888 +	adds r30=PT(AR_CCV)+16,r12
 327.889 +(p6)	and r19=TIF_WORK_MASK,r31		// any work other than TIF_SYSCALL_TRACE?
 327.890 +	;;
 327.891 +	adds r3=PT(AR_CSD)-PT(R16),r3
 327.892 +	ld8.fill r24=[r29]
 327.893 +	ld8 r15=[r30]		// load ar.ccv
 327.894 +(p6)	cmp4.ne.unc p6,p0=r19, r0		// any special work pending?
 327.895 +	;;
 327.896 +	ld8 r29=[r2],16		// load b7
 327.897 +	ld8 r30=[r3],16		// load ar.csd
 327.898 +#ifndef XEN
 327.899 +(p6)	br.cond.spnt .work_pending
 327.900 +#endif
 327.901 +	;;
 327.902 +	ld8 r31=[r2],16		// load ar.ssd
 327.903 +	ld8.fill r8=[r3],16
 327.904 +	;;
 327.905 +	ld8.fill r9=[r2],16
 327.906 +	ld8.fill r10=[r3],PT(R17)-PT(R10)
 327.907 +	;;
 327.908 +	ld8.fill r11=[r2],PT(R18)-PT(R11)
 327.909 +	ld8.fill r17=[r3],16
 327.910 +	;;
 327.911 +	ld8.fill r18=[r2],16
 327.912 +	ld8.fill r19=[r3],16
 327.913 +	;;
 327.914 +	ld8.fill r20=[r2],16
 327.915 +	ld8.fill r21=[r3],16
 327.916 +	mov ar.csd=r30
 327.917 +	mov ar.ssd=r31
 327.918 +	;;
 327.919 +	rsm psr.i | psr.ic	// initiate turning off of interrupt and interruption collection
 327.920 +	invala			// invalidate ALAT
 327.921 +	;;
 327.922 +	ld8.fill r22=[r2],24
 327.923 +	ld8.fill r23=[r3],24
 327.924 +	mov b6=r28
 327.925 +	;;
 327.926 +	ld8.fill r25=[r2],16
 327.927 +	ld8.fill r26=[r3],16
 327.928 +	mov b7=r29
 327.929 +	;;
 327.930 +	ld8.fill r27=[r2],16
 327.931 +	ld8.fill r28=[r3],16
 327.932 +	;;
 327.933 +	ld8.fill r29=[r2],16
 327.934 +	ld8.fill r30=[r3],24
 327.935 +	;;
 327.936 +	ld8.fill r31=[r2],PT(F9)-PT(R31)
 327.937 +	adds r3=PT(F10)-PT(F6),r3
 327.938 +	;;
 327.939 +	ldf.fill f9=[r2],PT(F6)-PT(F9)
 327.940 +	ldf.fill f10=[r3],PT(F8)-PT(F10)
 327.941 +	;;
 327.942 +	ldf.fill f6=[r2],PT(F7)-PT(F6)
 327.943 +	;;
 327.944 +	ldf.fill f7=[r2],PT(F11)-PT(F7)
 327.945 +	ldf.fill f8=[r3],32
 327.946 +	;;
 327.947 +	srlz.i			// ensure interruption collection is off
 327.948 +	mov ar.ccv=r15
 327.949 +	;;
 327.950 +	ldf.fill f11=[r2]
 327.951 +	bsw.0			// switch back to bank 0 (no stop bit required beforehand...)
 327.952 +	;;
 327.953 +(pUStk)	mov r18=IA64_KR(CURRENT)// M2 (12 cycle read latency)
 327.954 +	adds r16=PT(CR_IPSR)+16,r12
 327.955 +	adds r17=PT(CR_IIP)+16,r12
 327.956 +
 327.957 +(pKStk)	mov r22=psr		// M2 read PSR now that interrupts are disabled
 327.958 +	nop.i 0
 327.959 +	nop.i 0
 327.960 +	;;
 327.961 +	ld8 r29=[r16],16	// load cr.ipsr
 327.962 +	ld8 r28=[r17],16	// load cr.iip
 327.963 +	;;
 327.964 +	ld8 r30=[r16],16	// load cr.ifs
 327.965 +	ld8 r25=[r17],16	// load ar.unat
 327.966 +	;;
 327.967 +	ld8 r26=[r16],16	// load ar.pfs
 327.968 +	ld8 r27=[r17],16	// load ar.rsc
 327.969 +	cmp.eq p9,p0=r0,r0	// set p9 to indicate that we should restore cr.ifs
 327.970 +	;;
 327.971 +	ld8 r24=[r16],16	// load ar.rnat (may be garbage)
 327.972 +	ld8 r23=[r17],16	// load ar.bspstore (may be garbage)
 327.973 +	;;
 327.974 +	ld8 r31=[r16],16	// load predicates
 327.975 +	ld8 r21=[r17],16	// load b0
 327.976 +	;;
 327.977 +	ld8 r19=[r16],16	// load ar.rsc value for "loadrs"
 327.978 +	ld8.fill r1=[r17],16	// load r1
 327.979 +	;;
 327.980 +	ld8.fill r12=[r16],16
 327.981 +	ld8.fill r13=[r17],16
 327.982 +(pUStk)	adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18
 327.983 +	;;
 327.984 +	ld8 r20=[r16],16	// ar.fpsr
 327.985 +	ld8.fill r15=[r17],16
 327.986 +	;;
 327.987 +	ld8.fill r14=[r16],16
 327.988 +	ld8.fill r2=[r17]
 327.989 +(pUStk)	mov r17=1
 327.990 +	;;
 327.991 +	ld8.fill r3=[r16]
 327.992 +(pUStk)	st1 [r18]=r17		// restore current->thread.on_ustack
 327.993 +	shr.u r18=r19,16	// get byte size of existing "dirty" partition
 327.994 +	;;
 327.995 +	mov r16=ar.bsp		// get existing backing store pointer
 327.996 +#ifdef XEN
 327.997 +	movl r17=THIS_CPU(ia64_phys_stacked_size_p8)
 327.998 +#else
 327.999 +	addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0
327.1000 +#endif
327.1001 +	;;
327.1002 +	ld4 r17=[r17]		// r17 = cpu_data->phys_stacked_size_p8
327.1003 +(pKStk)	br.cond.dpnt skip_rbs_switch
327.1004 +
327.1005 +	/*
327.1006 +	 * Restore user backing store.
327.1007 +	 *
327.1008 +	 * NOTE: alloc, loadrs, and cover can't be predicated.
327.1009 +	 */
327.1010 +(pNonSys) br.cond.dpnt dont_preserve_current_frame
327.1011 +
327.1012 +rbs_switch:
327.1013 +	cover				// add current frame into dirty partition and set cr.ifs
327.1014 +	;;
327.1015 +	mov r19=ar.bsp			// get new backing store pointer
327.1016 +	sub r16=r16,r18			// krbs = old bsp - size of dirty partition
327.1017 +	cmp.ne p9,p0=r0,r0		// clear p9 to skip restore of cr.ifs
327.1018 +	;;
327.1019 +	sub r19=r19,r16			// calculate total byte size of dirty partition
327.1020 +	add r18=64,r18			// don't force in0-in7 into memory...
327.1021 +	;;
327.1022 +	shl r19=r19,16			// shift size of dirty partition into loadrs position
327.1023 +	;;
327.1024 +dont_preserve_current_frame:
327.1025 +	/*
327.1026 +	 * To prevent leaking bits between the kernel and user-space,
327.1027 +	 * we must clear the stacked registers in the "invalid" partition here.
327.1028 +	 * Not pretty, but at least it's fast (3.34 registers/cycle on Itanium,
327.1029 +	 * 5 registers/cycle on McKinley).
327.1030 +	 */
327.1031 +#	define pRecurse	p6
327.1032 +#	define pReturn	p7
327.1033 +#ifdef CONFIG_ITANIUM
327.1034 +#	define Nregs	10
327.1035 +#else
327.1036 +#	define Nregs	14
327.1037 +#endif
327.1038 +	alloc loc0=ar.pfs,2,Nregs-2,2,0
327.1039 +	shr.u loc1=r18,9		// RNaTslots <= floor(dirtySize / (64*8))
327.1040 +	sub r17=r17,r18			// r17 = (physStackedSize + 8) - dirtySize
327.1041 +	;;
327.1042 +	mov ar.rsc=r19			// load ar.rsc to be used for "loadrs"
327.1043 +	shladd in0=loc1,3,r17
327.1044 +	mov in1=0
327.1045 +	;;
327.1046 +	TEXT_ALIGN(32)
327.1047 +rse_clear_invalid:
327.1048 +#ifdef CONFIG_ITANIUM
327.1049 +	// cycle 0
327.1050 + { .mii
327.1051 +	alloc loc0=ar.pfs,2,Nregs-2,2,0
327.1052 +	cmp.lt pRecurse,p0=Nregs*8,in0	// if more than Nregs regs left to clear, (re)curse
327.1053 +	add out0=-Nregs*8,in0
327.1054 +}{ .mfb
327.1055 +	add out1=1,in1			// increment recursion count
327.1056 +	nop.f 0
327.1057 +	nop.b 0				// can't do br.call here because of alloc (WAW on CFM)
327.1058 +	;;
327.1059 +}{ .mfi	// cycle 1
327.1060 +	mov loc1=0
327.1061 +	nop.f 0
327.1062 +	mov loc2=0
327.1063 +}{ .mib
327.1064 +	mov loc3=0
327.1065 +	mov loc4=0
327.1066 +(pRecurse) br.call.sptk.many b0=rse_clear_invalid
327.1067 +
327.1068 +}{ .mfi	// cycle 2
327.1069 +	mov loc5=0
327.1070 +	nop.f 0
327.1071 +	cmp.ne pReturn,p0=r0,in1	// if recursion count != 0, we need to do a br.ret
327.1072 +}{ .mib
327.1073 +	mov loc6=0
327.1074 +	mov loc7=0
327.1075 +(pReturn) br.ret.sptk.many b0
327.1076 +}
327.1077 +#else /* !CONFIG_ITANIUM */
327.1078 +	alloc loc0=ar.pfs,2,Nregs-2,2,0
327.1079 +	cmp.lt pRecurse,p0=Nregs*8,in0	// if more than Nregs regs left to clear, (re)curse
327.1080 +	add out0=-Nregs*8,in0
327.1081 +	add out1=1,in1			// increment recursion count
327.1082 +	mov loc1=0
327.1083 +	mov loc2=0
327.1084 +	;;
327.1085 +	mov loc3=0
327.1086 +	mov loc4=0
327.1087 +	mov loc5=0
327.1088 +	mov loc6=0
327.1089 +	mov loc7=0
327.1090 +(pRecurse) br.call.sptk.few b0=rse_clear_invalid
327.1091 +	;;
327.1092 +	mov loc8=0
327.1093 +	mov loc9=0
327.1094 +	cmp.ne pReturn,p0=r0,in1	// if recursion count != 0, we need to do a br.ret
327.1095 +	mov loc10=0
327.1096 +	mov loc11=0
327.1097 +(pReturn) br.ret.sptk.many b0
327.1098 +#endif /* !CONFIG_ITANIUM */
327.1099 +#	undef pRecurse
327.1100 +#	undef pReturn
327.1101 +	;;
327.1102 +	alloc r17=ar.pfs,0,0,0,0	// drop current register frame
327.1103 +	;;
327.1104 +	loadrs
327.1105 +	;;
327.1106 +skip_rbs_switch:
327.1107 +	mov ar.unat=r25		// M2
327.1108 +(pKStk)	extr.u r22=r22,21,1	// I0 extract current value of psr.pp from r22
327.1109 +(pLvSys)mov r19=r0		// A  clear r19 for leave_syscall, no-op otherwise
327.1110 +	;;
327.1111 +(pUStk)	mov ar.bspstore=r23	// M2
327.1112 +(pKStk)	dep r29=r22,r29,21,1	// I0 update ipsr.pp with psr.pp
327.1113 +(pLvSys)mov r16=r0		// A  clear r16 for leave_syscall, no-op otherwise
327.1114 +	;;
327.1115 +	mov cr.ipsr=r29		// M2
327.1116 +	mov ar.pfs=r26		// I0
327.1117 +(pLvSys)mov r17=r0		// A  clear r17 for leave_syscall, no-op otherwise
327.1118 +
327.1119 +(p9)	mov cr.ifs=r30		// M2
327.1120 +	mov b0=r21		// I0
327.1121 +(pLvSys)mov r18=r0		// A  clear r18 for leave_syscall, no-op otherwise
327.1122 +
327.1123 +	mov ar.fpsr=r20		// M2
327.1124 +	mov cr.iip=r28		// M2
327.1125 +	nop 0
327.1126 +	;;
327.1127 +(pUStk)	mov ar.rnat=r24		// M2 must happen with RSE in lazy mode
327.1128 +	nop 0
327.1129 +(pLvSys)mov r2=r0
327.1130 +
327.1131 +	mov ar.rsc=r27		// M2
327.1132 +	mov pr=r31,-1		// I0
327.1133 +	rfi			// B
327.1134 +
327.1135 +#ifndef XEN
327.1136 +	/*
327.1137 +	 * On entry:
327.1138 +	 *	r20 = &current->thread_info->pre_count (if CONFIG_PREEMPT)
327.1139 +	 *	r31 = current->thread_info->flags
327.1140 +	 * On exit:
327.1141 +	 *	p6 = TRUE if work-pending-check needs to be redone
327.1142 +	 */
327.1143 +.work_pending_syscall:
327.1144 +	add r2=-8,r2
327.1145 +	add r3=-8,r3
327.1146 +	;;
327.1147 +	st8 [r2]=r8
327.1148 +	st8 [r3]=r10
327.1149 +.work_pending:
327.1150 +	tbit.nz p6,p0=r31,TIF_SIGDELAYED		// signal delayed from  MCA/INIT/NMI/PMI context?
327.1151 +(p6)	br.cond.sptk.few .sigdelayed
327.1152 +	;;
327.1153 +	tbit.z p6,p0=r31,TIF_NEED_RESCHED		// current_thread_info()->need_resched==0?
327.1154 +(p6)	br.cond.sptk.few .notify
327.1155 +#ifdef CONFIG_PREEMPT
327.1156 +(pKStk) dep r21=-1,r0,PREEMPT_ACTIVE_BIT,1
327.1157 +	;;
327.1158 +(pKStk) st4 [r20]=r21
327.1159 +	ssm psr.i		// enable interrupts
327.1160 +#endif
327.1161 +	br.call.spnt.many rp=schedule
327.1162 +.ret9:	cmp.eq p6,p0=r0,r0				// p6 <- 1
327.1163 +	rsm psr.i		// disable interrupts
327.1164 +	;;
327.1165 +#ifdef CONFIG_PREEMPT
327.1166 +(pKStk)	adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13
327.1167 +	;;
327.1168 +(pKStk)	st4 [r20]=r0		// preempt_count() <- 0
327.1169 +#endif
327.1170 +(pLvSys)br.cond.sptk.few  .work_pending_syscall_end
327.1171 +	br.cond.sptk.many .work_processed_kernel	// re-check
327.1172 +
327.1173 +.notify:
327.1174 +(pUStk)	br.call.spnt.many rp=notify_resume_user
327.1175 +.ret10:	cmp.ne p6,p0=r0,r0				// p6 <- 0
327.1176 +(pLvSys)br.cond.sptk.few  .work_pending_syscall_end
327.1177 +	br.cond.sptk.many .work_processed_kernel	// don't re-check
327.1178 +
327.1179 +// There is a delayed signal that was detected in MCA/INIT/NMI/PMI context where
327.1180 +// it could not be delivered.  Deliver it now.  The signal might be for us and
327.1181 +// may set TIF_SIGPENDING, so redrive ia64_leave_* after processing the delayed
327.1182 +// signal.
327.1183 +
327.1184 +.sigdelayed:
327.1185 +	br.call.sptk.many rp=do_sigdelayed
327.1186 +	cmp.eq p6,p0=r0,r0				// p6 <- 1, always re-check
327.1187 +(pLvSys)br.cond.sptk.few  .work_pending_syscall_end
327.1188 +	br.cond.sptk.many .work_processed_kernel	// re-check
327.1189 +
327.1190 +.work_pending_syscall_end:
327.1191 +	adds r2=PT(R8)+16,r12
327.1192 +	adds r3=PT(R10)+16,r12
327.1193 +	;;
327.1194 +	ld8 r8=[r2]
327.1195 +	ld8 r10=[r3]
327.1196 +	br.cond.sptk.many .work_processed_syscall	// re-check
327.1197 +#endif
327.1198 +
327.1199 +END(ia64_leave_kernel)
327.1200 +
327.1201 +ENTRY(handle_syscall_error)
327.1202 +	/*
327.1203 +	 * Some system calls (e.g., ptrace, mmap) can return arbitrary values which could
327.1204 +	 * lead us to mistake a negative return value as a failed syscall.  Those syscall
327.1205 +	 * must deposit a non-zero value in pt_regs.r8 to indicate an error.  If
327.1206 +	 * pt_regs.r8 is zero, we assume that the call completed successfully.
327.1207 +	 */
327.1208 +	PT_REGS_UNWIND_INFO(0)
327.1209 +	ld8 r3=[r2]		// load pt_regs.r8
327.1210 +	;;
327.1211 +	cmp.eq p6,p7=r3,r0	// is pt_regs.r8==0?
327.1212 +	;;
327.1213 +(p7)	mov r10=-1
327.1214 +(p7)	sub r8=0,r8		// negate return value to get errno
327.1215 +	br.cond.sptk ia64_leave_syscall
327.1216 +END(handle_syscall_error)
327.1217 +
327.1218 +	/*
327.1219 +	 * Invoke schedule_tail(task) while preserving in0-in7, which may be needed
327.1220 +	 * in case a system call gets restarted.
327.1221 +	 */
327.1222 +GLOBAL_ENTRY(ia64_invoke_schedule_tail)
327.1223 +	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
327.1224 +	alloc loc1=ar.pfs,8,2,1,0
327.1225 +	mov loc0=rp
327.1226 +	mov out0=r8				// Address of previous task
327.1227 +	;;
327.1228 +	br.call.sptk.many rp=schedule_tail
327.1229 +.ret11:	mov ar.pfs=loc1
327.1230 +	mov rp=loc0
327.1231 +	br.ret.sptk.many rp
327.1232 +END(ia64_invoke_schedule_tail)
327.1233 +
327.1234 +#ifndef XEN
327.1235 +	/*
327.1236 +	 * Setup stack and call do_notify_resume_user().  Note that pSys and pNonSys need to
327.1237 +	 * be set up by the caller.  We declare 8 input registers so the system call
327.1238 +	 * args get preserved, in case we need to restart a system call.
327.1239 +	 */
327.1240 +ENTRY(notify_resume_user)
327.1241 +	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
327.1242 +	alloc loc1=ar.pfs,8,2,3,0 // preserve all eight input regs in case of syscall restart!
327.1243 +	mov r9=ar.unat
327.1244 +	mov loc0=rp				// save return address
327.1245 +	mov out0=0				// there is no "oldset"
327.1246 +	adds out1=8,sp				// out1=&sigscratch->ar_pfs
327.1247 +(pSys)	mov out2=1				// out2==1 => we're in a syscall
327.1248 +	;;
327.1249 +(pNonSys) mov out2=0				// out2==0 => not a syscall
327.1250 +	.fframe 16
327.1251 +	.spillpsp ar.unat, 16			// (note that offset is relative to psp+0x10!)
327.1252 +	st8 [sp]=r9,-16				// allocate space for ar.unat and save it
327.1253 +	st8 [out1]=loc1,-8			// save ar.pfs, out1=&sigscratch
327.1254 +	.body
327.1255 +	br.call.sptk.many rp=do_notify_resume_user
327.1256 +.ret15:	.restore sp
327.1257 +	adds sp=16,sp				// pop scratch stack space
327.1258 +	;;
327.1259 +	ld8 r9=[sp]				// load new unat from sigscratch->scratch_unat
327.1260 +	mov rp=loc0
327.1261 +	;;
327.1262 +	mov ar.unat=r9
327.1263 +	mov ar.pfs=loc1
327.1264 +	br.ret.sptk.many rp
327.1265 +END(notify_resume_user)
327.1266 +
327.1267 +GLOBAL_ENTRY(sys_rt_sigsuspend)
327.1268 +	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
327.1269 +	alloc loc1=ar.pfs,8,2,3,0 // preserve all eight input regs in case of syscall restart!
327.1270 +	mov r9=ar.unat
327.1271 +	mov loc0=rp				// save return address
327.1272 +	mov out0=in0				// mask
327.1273 +	mov out1=in1				// sigsetsize
327.1274 +	adds out2=8,sp				// out2=&sigscratch->ar_pfs
327.1275 +	;;
327.1276 +	.fframe 16
327.1277 +	.spillpsp ar.unat, 16			// (note that offset is relative to psp+0x10!)
327.1278 +	st8 [sp]=r9,-16				// allocate space for ar.unat and save it
327.1279 +	st8 [out2]=loc1,-8			// save ar.pfs, out2=&sigscratch
327.1280 +	.body
327.1281 +	br.call.sptk.many rp=ia64_rt_sigsuspend
327.1282 +.ret17:	.restore sp
327.1283 +	adds sp=16,sp				// pop scratch stack space
327.1284 +	;;
327.1285 +	ld8 r9=[sp]				// load new unat from sw->caller_unat
327.1286 +	mov rp=loc0
327.1287 +	;;
327.1288 +	mov ar.unat=r9
327.1289 +	mov ar.pfs=loc1
327.1290 +	br.ret.sptk.many rp
327.1291 +END(sys_rt_sigsuspend)
327.1292 +
327.1293 +ENTRY(sys_rt_sigreturn)
327.1294 +	PT_REGS_UNWIND_INFO(0)
327.1295 +	/*
327.1296 +	 * Allocate 8 input registers since ptrace() may clobber them
327.1297 +	 */
327.1298 +	alloc r2=ar.pfs,8,0,1,0
327.1299 +	.prologue
327.1300 +	PT_REGS_SAVES(16)
327.1301 +	adds sp=-16,sp
327.1302 +	.body
327.1303 +	cmp.eq pNonSys,pSys=r0,r0		// sigreturn isn't a normal syscall...
327.1304 +	;;
327.1305 +	/*
327.1306 +	 * leave_kernel() restores f6-f11 from pt_regs, but since the streamlined
327.1307 +	 * syscall-entry path does not save them we save them here instead.  Note: we
327.1308 +	 * don't need to save any other registers that are not saved by the stream-lined
327.1309 +	 * syscall path, because restore_sigcontext() restores them.
327.1310 +	 */
327.1311 +	adds r16=PT(F6)+32,sp
327.1312 +	adds r17=PT(F7)+32,sp
327.1313 +	;;
327.1314 + 	stf.spill [r16]=f6,32
327.1315 + 	stf.spill [r17]=f7,32
327.1316 +	;;
327.1317 + 	stf.spill [r16]=f8,32
327.1318 + 	stf.spill [r17]=f9,32
327.1319 +	;;
327.1320 + 	stf.spill [r16]=f10
327.1321 + 	stf.spill [r17]=f11
327.1322 +	adds out0=16,sp				// out0 = &sigscratch
327.1323 +	br.call.sptk.many rp=ia64_rt_sigreturn
327.1324 +.ret19:	.restore sp 0
327.1325 +	adds sp=16,sp
327.1326 +	;;
327.1327 +	ld8 r9=[sp]				// load new ar.unat
327.1328 +	mov.sptk b7=r8,ia64_leave_kernel
327.1329 +	;;
327.1330 +	mov ar.unat=r9
327.1331 +	br.many b7
327.1332 +END(sys_rt_sigreturn)
327.1333 +#endif
327.1334 +
327.1335 +GLOBAL_ENTRY(ia64_prepare_handle_unaligned)
327.1336 +	.prologue
327.1337 +	/*
327.1338 +	 * r16 = fake ar.pfs, we simply need to make sure privilege is still 0
327.1339 +	 */
327.1340 +	mov r16=r0
327.1341 +	DO_SAVE_SWITCH_STACK
327.1342 +	br.call.sptk.many rp=ia64_handle_unaligned	// stack frame setup in ivt
327.1343 +.ret21:	.body
327.1344 +	DO_LOAD_SWITCH_STACK
327.1345 +	br.cond.sptk.many rp				// goes to ia64_leave_kernel
327.1346 +END(ia64_prepare_handle_unaligned)
327.1347 +
327.1348 +#ifndef XEN
327.1349 +	//
327.1350 +	// unw_init_running(void (*callback)(info, arg), void *arg)
327.1351 +	//
327.1352 +#	define EXTRA_FRAME_SIZE	((UNW_FRAME_INFO_SIZE+15)&~15)
327.1353 +
327.1354 +GLOBAL_ENTRY(unw_init_running)
327.1355 +	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(2)
327.1356 +	alloc loc1=ar.pfs,2,3,3,0
327.1357 +	;;
327.1358 +	ld8 loc2=[in0],8
327.1359 +	mov loc0=rp
327.1360 +	mov r16=loc1
327.1361 +	DO_SAVE_SWITCH_STACK
327.1362 +	.body
327.1363 +
327.1364 +	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(2)
327.1365 +	.fframe IA64_SWITCH_STACK_SIZE+EXTRA_FRAME_SIZE
327.1366 +	SWITCH_STACK_SAVES(EXTRA_FRAME_SIZE)
327.1367 +	adds sp=-EXTRA_FRAME_SIZE,sp
327.1368 +	.body
327.1369 +	;;
327.1370 +	adds out0=16,sp				// &info
327.1371 +	mov out1=r13				// current
327.1372 +	adds out2=16+EXTRA_FRAME_SIZE,sp	// &switch_stack
327.1373 +	br.call.sptk.many rp=unw_init_frame_info
327.1374 +1:	adds out0=16,sp				// &info
327.1375 +	mov b6=loc2
327.1376 +	mov loc2=gp				// save gp across indirect function call
327.1377 +	;;
327.1378 +	ld8 gp=[in0]
327.1379 +	mov out1=in1				// arg
327.1380 +	br.call.sptk.many rp=b6			// invoke the callback function
327.1381 +1:	mov gp=loc2				// restore gp
327.1382 +
327.1383 +	// For now, we don't allow changing registers from within
327.1384 +	// unw_init_running; if we ever want to allow that, we'd
327.1385 +	// have to do a load_switch_stack here:
327.1386 +	.restore sp
327.1387 +	adds sp=IA64_SWITCH_STACK_SIZE+EXTRA_FRAME_SIZE,sp
327.1388 +
327.1389 +	mov ar.pfs=loc1
327.1390 +	mov rp=loc0
327.1391 +	br.ret.sptk.many rp
327.1392 +END(unw_init_running)
327.1393 +
327.1394 +	.rodata
327.1395 +	.align 8
327.1396 +	.globl sys_call_table
327.1397 +sys_call_table:
327.1398 +	data8 sys_ni_syscall		//  This must be sys_ni_syscall!  See ivt.S.
327.1399 +	data8 sys_exit				// 1025
327.1400 +	data8 sys_read
327.1401 +	data8 sys_write
327.1402 +	data8 sys_open
327.1403 +	data8 sys_close
327.1404 +	data8 sys_creat				// 1030
327.1405 +	data8 sys_link
327.1406 +	data8 sys_unlink
327.1407 +	data8 ia64_execve
327.1408 +	data8 sys_chdir
327.1409 +	data8 sys_fchdir			// 1035
327.1410 +	data8 sys_utimes
327.1411 +	data8 sys_mknod
327.1412 +	data8 sys_chmod
327.1413 +	data8 sys_chown
327.1414 +	data8 sys_lseek				// 1040
327.1415 +	data8 sys_getpid
327.1416 +	data8 sys_getppid
327.1417 +	data8 sys_mount
327.1418 +	data8 sys_umount
327.1419 +	data8 sys_setuid			// 1045
327.1420 +	data8 sys_getuid
327.1421 +	data8 sys_geteuid
327.1422 +	data8 sys_ptrace
327.1423 +	data8 sys_access
327.1424 +	data8 sys_sync				// 1050
327.1425 +	data8 sys_fsync
327.1426 +	data8 sys_fdatasync
327.1427 +	data8 sys_kill
327.1428 +	data8 sys_rename
327.1429 +	data8 sys_mkdir				// 1055
327.1430 +	data8 sys_rmdir
327.1431 +	data8 sys_dup
327.1432 +	data8 sys_pipe
327.1433 +	data8 sys_times
327.1434 +	data8 ia64_brk				// 1060
327.1435 +	data8 sys_setgid
327.1436 +	data8 sys_getgid
327.1437 +	data8 sys_getegid
327.1438 +	data8 sys_acct
327.1439 +	data8 sys_ioctl				// 1065
327.1440 +	data8 sys_fcntl
327.1441 +	data8 sys_umask
327.1442 +	data8 sys_chroot
327.1443 +	data8 sys_ustat
327.1444 +	data8 sys_dup2				// 1070
327.1445 +	data8 sys_setreuid
327.1446 +	data8 sys_setregid
327.1447 +	data8 sys_getresuid
327.1448 +	data8 sys_setresuid
327.1449 +	data8 sys_getresgid			// 1075
327.1450 +	data8 sys_setresgid
327.1451 +	data8 sys_getgroups
327.1452 +	data8 sys_setgroups
327.1453 +	data8 sys_getpgid
327.1454 +	data8 sys_setpgid			// 1080
327.1455 +	data8 sys_setsid
327.1456 +	data8 sys_getsid
327.1457 +	data8 sys_sethostname
327.1458 +	data8 sys_setrlimit
327.1459 +	data8 sys_getrlimit			// 1085
327.1460 +	data8 sys_getrusage
327.1461 +	data8 sys_gettimeofday
327.1462 +	data8 sys_settimeofday
327.1463 +	data8 sys_select
327.1464 +	data8 sys_poll				// 1090
327.1465 +	data8 sys_symlink
327.1466 +	data8 sys_readlink
327.1467 +	data8 sys_uselib
327.1468 +	data8 sys_swapon
327.1469 +	data8 sys_swapoff			// 1095
327.1470 +	data8 sys_reboot
327.1471 +	data8 sys_truncate
327.1472 +	data8 sys_ftruncate
327.1473 +	data8 sys_fchmod
327.1474 +	data8 sys_fchown			// 1100
327.1475 +	data8 ia64_getpriority
327.1476 +	data8 sys_setpriority
327.1477 +	data8 sys_statfs
327.1478 +	data8 sys_fstatfs
327.1479 +	data8 sys_gettid			// 1105
327.1480 +	data8 sys_semget
327.1481 +	data8 sys_semop
327.1482 +	data8 sys_semctl
327.1483 +	data8 sys_msgget
327.1484 +	data8 sys_msgsnd			// 1110
327.1485 +	data8 sys_msgrcv
327.1486 +	data8 sys_msgctl
327.1487 +	data8 sys_shmget
327.1488 +	data8 ia64_shmat
327.1489 +	data8 sys_shmdt				// 1115
327.1490 +	data8 sys_shmctl
327.1491 +	data8 sys_syslog
327.1492 +	data8 sys_setitimer
327.1493 +	data8 sys_getitimer
327.1494 +	data8 sys_ni_syscall			// 1120		/* was: ia64_oldstat */
327.1495 +	data8 sys_ni_syscall					/* was: ia64_oldlstat */
327.1496 +	data8 sys_ni_syscall					/* was: ia64_oldfstat */
327.1497 +	data8 sys_vhangup
327.1498 +	data8 sys_lchown
327.1499 +	data8 sys_remap_file_pages		// 1125
327.1500 +	data8 sys_wait4
327.1501 +	data8 sys_sysinfo
327.1502 +	data8 sys_clone
327.1503 +	data8 sys_setdomainname
327.1504 +	data8 sys_newuname			// 1130
327.1505 +	data8 sys_adjtimex
327.1506 +	data8 sys_ni_syscall					/* was: ia64_create_module */
327.1507 +	data8 sys_init_module
327.1508 +	data8 sys_delete_module
327.1509 +	data8 sys_ni_syscall			// 1135		/* was: sys_get_kernel_syms */
327.1510 +	data8 sys_ni_syscall					/* was: sys_query_module */
327.1511 +	data8 sys_quotactl
327.1512 +	data8 sys_bdflush
327.1513 +	data8 sys_sysfs
327.1514 +	data8 sys_personality			// 1140
327.1515 +	data8 sys_ni_syscall		// sys_afs_syscall
327.1516 +	data8 sys_setfsuid
327.1517 +	data8 sys_setfsgid
327.1518 +	data8 sys_getdents
327.1519 +	data8 sys_flock				// 1145
327.1520 +	data8 sys_readv
327.1521 +	data8 sys_writev
327.1522 +	data8 sys_pread64
327.1523 +	data8 sys_pwrite64
327.1524 +	data8 sys_sysctl			// 1150
327.1525 +	data8 sys_mmap
327.1526 +	data8 sys_munmap
327.1527 +	data8 sys_mlock
327.1528 +	data8 sys_mlockall
327.1529 +	data8 sys_mprotect			// 1155
327.1530 +	data8 ia64_mremap
327.1531 +	data8 sys_msync
327.1532 +	data8 sys_munlock
327.1533 +	data8 sys_munlockall
327.1534 +	data8 sys_sched_getparam		// 1160
327.1535 +	data8 sys_sched_setparam
327.1536 +	data8 sys_sched_getscheduler
327.1537 +	data8 sys_sched_setscheduler
327.1538 +	data8 sys_sched_yield
327.1539 +	data8 sys_sched_get_priority_max	// 1165
327.1540 +	data8 sys_sched_get_priority_min
327.1541 +	data8 sys_sched_rr_get_interval
327.1542 +	data8 sys_nanosleep
327.1543 +	data8 sys_nfsservctl
327.1544 +	data8 sys_prctl				// 1170
327.1545 +	data8 sys_getpagesize
327.1546 +	data8 sys_mmap2
327.1547 +	data8 sys_pciconfig_read
327.1548 +	data8 sys_pciconfig_write
327.1549 +	data8 sys_perfmonctl			// 1175
327.1550 +	data8 sys_sigaltstack
327.1551 +	data8 sys_rt_sigaction
327.1552 +	data8 sys_rt_sigpending
327.1553 +	data8 sys_rt_sigprocmask
327.1554 +	data8 sys_rt_sigqueueinfo		// 1180
327.1555 +	data8 sys_rt_sigreturn
327.1556 +	data8 sys_rt_sigsuspend
327.1557 +	data8 sys_rt_sigtimedwait
327.1558 +	data8 sys_getcwd
327.1559 +	data8 sys_capget			// 1185
327.1560 +	data8 sys_capset
327.1561 +	data8 sys_sendfile64
327.1562 +	data8 sys_ni_syscall		// sys_getpmsg (STREAMS)
327.1563 +	data8 sys_ni_syscall		// sys_putpmsg (STREAMS)
327.1564 +	data8 sys_socket			// 1190
327.1565 +	data8 sys_bind
327.1566 +	data8 sys_connect
327.1567 +	data8 sys_listen
327.1568 +	data8 sys_accept
327.1569 +	data8 sys_getsockname			// 1195
327.1570 +	data8 sys_getpeername
327.1571 +	data8 sys_socketpair
327.1572 +	data8 sys_send
327.1573 +	data8 sys_sendto
327.1574 +	data8 sys_recv				// 1200
327.1575 +	data8 sys_recvfrom
327.1576 +	data8 sys_shutdown
327.1577 +	data8 sys_setsockopt
327.1578 +	data8 sys_getsockopt
327.1579 +	data8 sys_sendmsg			// 1205
327.1580 +	data8 sys_recvmsg
327.1581 +	data8 sys_pivot_root
327.1582 +	data8 sys_mincore
327.1583 +	data8 sys_madvise
327.1584 +	data8 sys_newstat			// 1210
327.1585 +	data8 sys_newlstat
327.1586 +	data8 sys_newfstat
327.1587 +	data8 sys_clone2
327.1588 +	data8 sys_getdents64
327.1589 +	data8 sys_getunwind			// 1215
327.1590 +	data8 sys_readahead
327.1591 +	data8 sys_setxattr
327.1592 +	data8 sys_lsetxattr
327.1593 +	data8 sys_fsetxattr
327.1594 +	data8 sys_getxattr			// 1220
327.1595 +	data8 sys_lgetxattr
327.1596 +	data8 sys_fgetxattr
327.1597 +	data8 sys_listxattr
327.1598 +	data8 sys_llistxattr
327.1599 +	data8 sys_flistxattr			// 1225
327.1600 +	data8 sys_removexattr
327.1601 +	data8 sys_lremovexattr
327.1602 +	data8 sys_fremovexattr
327.1603 +	data8 sys_tkill
327.1604 +	data8 sys_futex				// 1230
327.1605 +	data8 sys_sched_setaffinity
327.1606 +	data8 sys_sched_getaffinity
327.1607 +	data8 sys_set_tid_address
327.1608 +	data8 sys_fadvise64_64
327.1609 +	data8 sys_tgkill 			// 1235
327.1610 +	data8 sys_exit_group
327.1611 +	data8 sys_lookup_dcookie
327.1612 +	data8 sys_io_setup
327.1613 +	data8 sys_io_destroy
327.1614 +	data8 sys_io_getevents			// 1240
327.1615 +	data8 sys_io_submit
327.1616 +	data8 sys_io_cancel
327.1617 +	data8 sys_epoll_create
327.1618 +	data8 sys_epoll_ctl
327.1619 +	data8 sys_epoll_wait			// 1245
327.1620 +	data8 sys_restart_syscall
327.1621 +	data8 sys_semtimedop
327.1622 +	data8 sys_timer_create
327.1623 +	data8 sys_timer_settime
327.1624 +	data8 sys_timer_gettime			// 1250
327.1625 +	data8 sys_timer_getoverrun
327.1626 +	data8 sys_timer_delete
327.1627 +	data8 sys_clock_settime
327.1628 +	data8 sys_clock_gettime
327.1629 +	data8 sys_clock_getres			// 1255
327.1630 +	data8 sys_clock_nanosleep
327.1631 +	data8 sys_fstatfs64
327.1632 +	data8 sys_statfs64
327.1633 +	data8 sys_mbind
327.1634 +	data8 sys_get_mempolicy			// 1260
327.1635 +	data8 sys_set_mempolicy
327.1636 +	data8 sys_mq_open
327.1637 +	data8 sys_mq_unlink
327.1638 +	data8 sys_mq_timedsend
327.1639 +	data8 sys_mq_timedreceive		// 1265
327.1640 +	data8 sys_mq_notify
327.1641 +	data8 sys_mq_getsetattr
327.1642 +	data8 sys_ni_syscall			// reserved for kexec_load
327.1643 +	data8 sys_ni_syscall			// reserved for vserver
327.1644 +	data8 sys_waitid			// 1270
327.1645 +	data8 sys_add_key
327.1646 +	data8 sys_request_key
327.1647 +	data8 sys_keyctl
327.1648 +	data8 sys_ni_syscall
327.1649 +	data8 sys_ni_syscall			// 1275
327.1650 +	data8 sys_ni_syscall
327.1651 +	data8 sys_ni_syscall
327.1652 +	data8 sys_ni_syscall
327.1653 +	data8 sys_ni_syscall
327.1654 +
327.1655 +	.org sys_call_table + 8*NR_syscalls	// guard against failures to increase NR_syscalls
327.1656 +#endif
   328.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
   328.2 +++ b/xen/arch/ia64/linux-xen/entry.h	Tue Aug 16 12:15:23 2005 +0800
   328.3 @@ -0,0 +1,97 @@
   328.4 +#include <linux/config.h>
   328.5 +
   328.6 +/*
   328.7 + * Preserved registers that are shared between code in ivt.S and
   328.8 + * entry.S.  Be careful not to step on these!
   328.9 + */
  328.10 +#define PRED_LEAVE_SYSCALL	1 /* TRUE iff leave from syscall */
  328.11 +#define PRED_KERNEL_STACK	2 /* returning to kernel-stacks? */
  328.12 +#define PRED_USER_STACK		3 /* returning to user-stacks? */
  328.13 +#ifdef CONFIG_VTI
  328.14 +#define PRED_EMUL		2 /* Need to save r4-r7 for inst emulation */
  328.15 +#define PRED_NON_EMUL		3 /* No need to save r4-r7 for normal path */
  328.16 +#define PRED_BN0		6 /* Guest is in bank 0 */
  328.17 +#define PRED_BN1		7 /* Guest is in bank 1 */
  328.18 +#endif // CONFIG_VTI
  328.19 +#define PRED_SYSCALL		4 /* inside a system call? */
  328.20 +#define PRED_NON_SYSCALL	5 /* complement of PRED_SYSCALL */
  328.21 +
  328.22 +#ifdef __ASSEMBLY__
  328.23 +# define PASTE2(x,y)	x##y
  328.24 +# define PASTE(x,y)	PASTE2(x,y)
  328.25 +
  328.26 +# define pLvSys		PASTE(p,PRED_LEAVE_SYSCALL)
  328.27 +# define pKStk		PASTE(p,PRED_KERNEL_STACK)
  328.28 +# define pUStk		PASTE(p,PRED_USER_STACK)
  328.29 +#ifdef CONFIG_VTI
  328.30 +# define pEml		PASTE(p,PRED_EMUL)
  328.31 +# define pNonEml	PASTE(p,PRED_NON_EMUL)
  328.32 +# define pBN0		PASTE(p,PRED_BN0)
  328.33 +# define pBN1		PASTE(p,PRED_BN1)
  328.34 +#endif // CONFIG_VTI
  328.35 +# define pSys		PASTE(p,PRED_SYSCALL)
  328.36 +# define pNonSys	PASTE(p,PRED_NON_SYSCALL)
  328.37 +#endif
  328.38 +
  328.39 +#define PT(f)		(IA64_PT_REGS_##f##_OFFSET)
  328.40 +#define SW(f)		(IA64_SWITCH_STACK_##f##_OFFSET)
  328.41 +#ifdef CONFIG_VTI
  328.42 +#define VPD(f)      (VPD_##f##_START_OFFSET)
  328.43 +#endif // CONFIG_VTI
  328.44 +
  328.45 +#define PT_REGS_SAVES(off)			\
  328.46 +	.unwabi 3, 'i';				\
  328.47 +	.fframe IA64_PT_REGS_SIZE+16+(off);	\
  328.48 +	.spillsp rp, PT(CR_IIP)+16+(off);	\
  328.49 +	.spillsp ar.pfs, PT(CR_IFS)+16+(off);	\
  328.50 +	.spillsp ar.unat, PT(AR_UNAT)+16+(off);	\
  328.51 +	.spillsp ar.fpsr, PT(AR_FPSR)+16+(off);	\
  328.52 +	.spillsp pr, PT(PR)+16+(off);
  328.53 +
  328.54 +#define PT_REGS_UNWIND_INFO(off)		\
  328.55 +	.prologue;				\
  328.56 +	PT_REGS_SAVES(off);			\
  328.57 +	.body
  328.58 +
  328.59 +#define SWITCH_STACK_SAVES(off)							\
  328.60 +	.savesp ar.unat,SW(CALLER_UNAT)+16+(off);				\
  328.61 +	.savesp ar.fpsr,SW(AR_FPSR)+16+(off);					\
  328.62 +	.spillsp f2,SW(F2)+16+(off); .spillsp f3,SW(F3)+16+(off);		\
  328.63 +	.spillsp f4,SW(F4)+16+(off); .spillsp f5,SW(F5)+16+(off);		\
  328.64 +	.spillsp f16,SW(F16)+16+(off); .spillsp f17,SW(F17)+16+(off);		\
  328.65 +	.spillsp f18,SW(F18)+16+(off); .spillsp f19,SW(F19)+16+(off);		\
  328.66 +	.spillsp f20,SW(F20)+16+(off); .spillsp f21,SW(F21)+16+(off);		\
  328.67 +	.spillsp f22,SW(F22)+16+(off); .spillsp f23,SW(F23)+16+(off);		\
  328.68 +	.spillsp f24,SW(F24)+16+(off); .spillsp f25,SW(F25)+16+(off);		\
  328.69 +	.spillsp f26,SW(F26)+16+(off); .spillsp f27,SW(F27)+16+(off);		\
  328.70 +	.spillsp f28,SW(F28)+16+(off); .spillsp f29,SW(F29)+16+(off);		\
  328.71 +	.spillsp f30,SW(F30)+16+(off); .spillsp f31,SW(F31)+16+(off);		\
  328.72 +	.spillsp r4,SW(R4)+16+(off); .spillsp r5,SW(R5)+16+(off);		\
  328.73 +	.spillsp r6,SW(R6)+16+(off); .spillsp r7,SW(R7)+16+(off);		\
  328.74 +	.spillsp b0,SW(B0)+16+(off); .spillsp b1,SW(B1)+16+(off);		\
  328.75 +	.spillsp b2,SW(B2)+16+(off); .spillsp b3,SW(B3)+16+(off);		\
  328.76 +	.spillsp b4,SW(B4)+16+(off); .spillsp b5,SW(B5)+16+(off);		\
  328.77 +	.spillsp ar.pfs,SW(AR_PFS)+16+(off); .spillsp ar.lc,SW(AR_LC)+16+(off);	\
  328.78 +	.spillsp @priunat,SW(AR_UNAT)+16+(off);					\
  328.79 +	.spillsp ar.rnat,SW(AR_RNAT)+16+(off);					\
  328.80 +	.spillsp ar.bspstore,SW(AR_BSPSTORE)+16+(off);				\
  328.81 +	.spillsp pr,SW(PR)+16+(off))
  328.82 +
  328.83 +#define DO_SAVE_SWITCH_STACK			\
  328.84 +	movl r28=1f;				\
  328.85 +	;;					\
  328.86 +	.fframe IA64_SWITCH_STACK_SIZE;		\
  328.87 +	adds sp=-IA64_SWITCH_STACK_SIZE,sp;	\
  328.88 +	mov.ret.sptk b7=r28,1f;			\
  328.89 +	SWITCH_STACK_SAVES(0);			\
  328.90 +	br.cond.sptk.many save_switch_stack;	\
  328.91 +1:
  328.92 +
  328.93 +#define DO_LOAD_SWITCH_STACK			\
  328.94 +	movl r28=1f;				\
  328.95 +	;;					\
  328.96 +	invala;					\
  328.97 +	mov.ret.sptk b7=r28,1f;			\
  328.98 +	br.cond.sptk.many load_switch_stack;	\
  328.99 +1:	.restore sp;				\
 328.100 +	adds sp=IA64_SWITCH_STACK_SIZE,sp
   329.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
   329.2 +++ b/xen/arch/ia64/linux-xen/head.S	Tue Aug 16 12:15:23 2005 +0800
   329.3 @@ -0,0 +1,1026 @@
   329.4 +/*
   329.5 + * Here is where the ball gets rolling as far as the kernel is concerned.
   329.6 + * When control is transferred to _start, the bootload has already
   329.7 + * loaded us to the correct address.  All that's left to do here is
   329.8 + * to set up the kernel's global pointer and jump to the kernel
   329.9 + * entry point.
  329.10 + *
  329.11 + * Copyright (C) 1998-2001, 2003, 2005 Hewlett-Packard Co
  329.12 + *	David Mosberger-Tang <davidm@hpl.hp.com>
  329.13 + *	Stephane Eranian <eranian@hpl.hp.com>
  329.14 + * Copyright (C) 1999 VA Linux Systems
  329.15 + * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
  329.16 + * Copyright (C) 1999 Intel Corp.
  329.17 + * Copyright (C) 1999 Asit Mallick <Asit.K.Mallick@intel.com>
  329.18 + * Copyright (C) 1999 Don Dugger <Don.Dugger@intel.com>
  329.19 + * Copyright (C) 2002 Fenghua Yu <fenghua.yu@intel.com>
  329.20 + *   -Optimize __ia64_save_fpu() and __ia64_load_fpu() for Itanium 2.
  329.21 + */
  329.22 +
  329.23 +#include <linux/config.h>
  329.24 +
  329.25 +#include <asm/asmmacro.h>
  329.26 +#include <asm/fpu.h>
  329.27 +#include <asm/kregs.h>
  329.28 +#include <asm/mmu_context.h>
  329.29 +#include <asm/offsets.h>
  329.30 +#include <asm/pal.h>
  329.31 +#include <asm/pgtable.h>
  329.32 +#include <asm/processor.h>
  329.33 +#include <asm/ptrace.h>
  329.34 +#include <asm/system.h>
  329.35 +
  329.36 +	.section __special_page_section,"ax"
  329.37 +
  329.38 +	.global empty_zero_page
  329.39 +empty_zero_page:
  329.40 +	.skip PAGE_SIZE
  329.41 +
  329.42 +	.global swapper_pg_dir
  329.43 +swapper_pg_dir:
  329.44 +	.skip PAGE_SIZE
  329.45 +
  329.46 +	.rodata
  329.47 +halt_msg:
  329.48 +	stringz "Halting kernel\n"
  329.49 +
  329.50 +	.text
  329.51 +
  329.52 +	.global start_ap
  329.53 +
  329.54 +	/*
  329.55 +	 * Start the kernel.  When the bootloader passes control to _start(), r28
  329.56 +	 * points to the address of the boot parameter area.  Execution reaches
  329.57 +	 * here in physical mode.
  329.58 +	 */
  329.59 +GLOBAL_ENTRY(_start)
  329.60 +start_ap:
  329.61 +	.prologue
  329.62 +	.save rp, r0		// terminate unwind chain with a NULL rp
  329.63 +	.body
  329.64 +
  329.65 +	rsm psr.i | psr.ic
  329.66 +	;;
  329.67 +	srlz.i
  329.68 +	;;
  329.69 +	/*
  329.70 +	 * Initialize kernel region registers:
  329.71 +	 *	rr[0]: VHPT enabled, page size = PAGE_SHIFT
  329.72 +	 *	rr[1]: VHPT enabled, page size = PAGE_SHIFT
  329.73 +	 *	rr[2]: VHPT enabled, page size = PAGE_SHIFT
  329.74 +	 *	rr[3]: VHPT enabled, page size = PAGE_SHIFT
  329.75 +	 *	rr[4]: VHPT enabled, page size = PAGE_SHIFT
  329.76 +	 *	rr[5]: VHPT enabled, page size = PAGE_SHIFT
  329.77 +	 *	rr[6]: VHPT disabled, page size = IA64_GRANULE_SHIFT
  329.78 +	 *	rr[7]: VHPT disabled, page size = IA64_GRANULE_SHIFT
  329.79 +	 * We initialize all of them to prevent inadvertently assuming
  329.80 +	 * something about the state of address translation early in boot.
  329.81 +	 */
  329.82 +	movl r6=((ia64_rid(IA64_REGION_ID_KERNEL, (0<<61)) << 8) | (PAGE_SHIFT << 2) | 1)
  329.83 +	movl r7=(0<<61)
  329.84 +	movl r8=((ia64_rid(IA64_REGION_ID_KERNEL, (1<<61)) << 8) | (PAGE_SHIFT << 2) | 1)
  329.85 +	movl r9=(1<<61)
  329.86 +	movl r10=((ia64_rid(IA64_REGION_ID_KERNEL, (2<<61)) << 8) | (PAGE_SHIFT << 2) | 1)
  329.87 +	movl r11=(2<<61)
  329.88 +	movl r12=((ia64_rid(IA64_REGION_ID_KERNEL, (3<<61)) << 8) | (PAGE_SHIFT << 2) | 1)
  329.89 +	movl r13=(3<<61)
  329.90 +	movl r14=((ia64_rid(IA64_REGION_ID_KERNEL, (4<<61)) << 8) | (PAGE_SHIFT << 2) | 1)
  329.91 +	movl r15=(4<<61)
  329.92 +	movl r16=((ia64_rid(IA64_REGION_ID_KERNEL, (5<<61)) << 8) | (PAGE_SHIFT << 2) | 1)
  329.93 +	movl r17=(5<<61)
  329.94 +	movl r18=((ia64_rid(IA64_REGION_ID_KERNEL, (6<<61)) << 8) | (IA64_GRANULE_SHIFT << 2))
  329.95 +	movl r19=(6<<61)
  329.96 +	movl r20=((ia64_rid(IA64_REGION_ID_KERNEL, (7<<61)) << 8) | (IA64_GRANULE_SHIFT << 2))
  329.97 +	movl r21=(7<<61)
  329.98 +	;;
  329.99 +	mov rr[r7]=r6
 329.100 +	mov rr[r9]=r8
 329.101 +	mov rr[r11]=r10
 329.102 +	mov rr[r13]=r12
 329.103 +	mov rr[r15]=r14
 329.104 +	mov rr[r17]=r16
 329.105 +	mov rr[r19]=r18
 329.106 +	mov rr[r21]=r20
 329.107 +	;;
 329.108 +	/*
 329.109 +	 * Now pin mappings into the TLB for kernel text and data
 329.110 +	 */
 329.111 +	mov r18=KERNEL_TR_PAGE_SHIFT<<2
 329.112 +	movl r17=KERNEL_START
 329.113 +	;;
 329.114 +	mov cr.itir=r18
 329.115 +	mov cr.ifa=r17
 329.116 +	mov r16=IA64_TR_KERNEL
 329.117 +	mov r3=ip
 329.118 +	movl r18=PAGE_KERNEL
 329.119 +	;;
 329.120 +	dep r2=0,r3,0,KERNEL_TR_PAGE_SHIFT
 329.121 +	;;
 329.122 +	or r18=r2,r18
 329.123 +	;;
 329.124 +	srlz.i
 329.125 +	;;
 329.126 +	itr.i itr[r16]=r18
 329.127 +	;;
 329.128 +	itr.d dtr[r16]=r18
 329.129 +	;;
 329.130 +	srlz.i
 329.131 +
 329.132 +	/*
 329.133 +	 * Switch into virtual mode:
 329.134 +	 */
 329.135 +#ifdef CONFIG_VTI
 329.136 +	movl r16=(IA64_PSR_IT|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_DFH \
 329.137 +		  |IA64_PSR_DI)
 329.138 +#else // CONFIG_VTI
 329.139 +	movl r16=(IA64_PSR_IT|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_DFH|IA64_PSR_BN \
 329.140 +		  |IA64_PSR_DI)
 329.141 +#endif // CONFIG_VTI
 329.142 +	;;
 329.143 +	mov cr.ipsr=r16
 329.144 +	movl r17=1f
 329.145 +	;;
 329.146 +	mov cr.iip=r17
 329.147 +	mov cr.ifs=r0
 329.148 +	;;
 329.149 +	rfi
 329.150 +	;;
 329.151 +1:	// now we are in virtual mode
 329.152 +
 329.153 +	// set IVT entry point---can't access I/O ports without it
 329.154 +#ifdef CONFIG_VTI
 329.155 +    movl r3=vmx_ia64_ivt
 329.156 +#else // CONFIG_VTI
 329.157 +	movl r3=ia64_ivt
 329.158 +#endif // CONFIG_VTI
 329.159 +	;;
 329.160 +	mov cr.iva=r3
 329.161 +	movl r2=FPSR_DEFAULT
 329.162 +	;;
 329.163 +	srlz.i
 329.164 +	movl gp=__gp
 329.165 +
 329.166 +	mov ar.fpsr=r2
 329.167 +	;;
 329.168 +
 329.169 +#define isAP	p2	// are we an Application Processor?
 329.170 +#define isBP	p3	// are we the Bootstrap Processor?
 329.171 +
 329.172 +#ifdef CONFIG_SMP
 329.173 +	/*
 329.174 +	 * Find the init_task for the currently booting CPU.  At poweron, and in
 329.175 +	 * UP mode, task_for_booting_cpu is NULL.
 329.176 +	 */
 329.177 +	movl r3=task_for_booting_cpu
 329.178 + 	;;
 329.179 +	ld8 r3=[r3]
 329.180 +	movl r2=init_task
 329.181 +	;;
 329.182 +	cmp.eq isBP,isAP=r3,r0
 329.183 +	;;
 329.184 +(isAP)	mov r2=r3
 329.185 +#else
 329.186 +	movl r2=init_task
 329.187 +	cmp.eq isBP,isAP=r0,r0
 329.188 +#endif
 329.189 +	;;
 329.190 +	tpa r3=r2		// r3 == phys addr of task struct
 329.191 +	mov r16=-1
 329.192 +(isBP)	br.cond.dpnt .load_current // BP stack is on region 5 --- no need to map it
 329.193 +
 329.194 +	// load mapping for stack (virtaddr in r2, physaddr in r3)
 329.195 +	rsm psr.ic
 329.196 +	movl r17=PAGE_KERNEL
 329.197 +	;;
 329.198 +	srlz.d
 329.199 +	dep r18=0,r3,0,12
 329.200 +	;;
 329.201 +	or r18=r17,r18
 329.202 +#ifdef XEN
 329.203 +	dep r2=-1,r3,60,4	// IMVA of task
 329.204 +#else
 329.205 +	dep r2=-1,r3,61,3	// IMVA of task
 329.206 +#endif
 329.207 +	;;
 329.208 +	mov r17=rr[r2]
 329.209 +	shr.u r16=r3,IA64_GRANULE_SHIFT
 329.210 +	;;
 329.211 +	dep r17=0,r17,8,24
 329.212 +	;;
 329.213 +	mov cr.itir=r17
 329.214 +	mov cr.ifa=r2
 329.215 +
 329.216 +	mov r19=IA64_TR_CURRENT_STACK
 329.217 +	;;
 329.218 +	itr.d dtr[r19]=r18
 329.219 +	;;
 329.220 +	ssm psr.ic
 329.221 +	srlz.d
 329.222 +  	;;
 329.223 +
 329.224 +.load_current:
 329.225 +	// load the "current" pointer (r13) and ar.k6 with the current task
 329.226 +#ifdef CONFIG_VTI
 329.227 +	mov r21=r2		// virtual address
 329.228 +	;;
 329.229 +	bsw.1
 329.230 +	;;
 329.231 +#else // CONFIG_VTI
 329.232 +	mov IA64_KR(CURRENT)=r2		// virtual address
 329.233 +	mov IA64_KR(CURRENT_STACK)=r16
 329.234 +#endif // CONFIG_VTI
 329.235 +	mov r13=r2
 329.236 +	/*
 329.237 +	 * Reserve space at the top of the stack for "struct pt_regs".  Kernel threads
 329.238 +	 * don't store interesting values in that structure, but the space still needs
 329.239 +	 * to be there because time-critical stuff such as the context switching can
 329.240 +	 * be implemented more efficiently (for example, __switch_to()
 329.241 +	 * always sets the psr.dfh bit of the task it is switching to).
 329.242 +	 */
 329.243 +	addl r12=IA64_STK_OFFSET-IA64_PT_REGS_SIZE-16,r2
 329.244 +	addl r2=IA64_RBS_OFFSET,r2	// initialize the RSE
 329.245 +	mov ar.rsc=0		// place RSE in enforced lazy mode
 329.246 +	;;
 329.247 +	loadrs			// clear the dirty partition
 329.248 +	;;
 329.249 +	mov ar.bspstore=r2	// establish the new RSE stack
 329.250 +	;;
 329.251 +	mov ar.rsc=0x3		// place RSE in eager mode
 329.252 +
 329.253 +#ifdef XEN
 329.254 +(isBP)	dep r28=-1,r28,60,4	// make address virtual
 329.255 +#else
 329.256 +(isBP)	dep r28=-1,r28,61,3	// make address virtual
 329.257 +#endif
 329.258 +(isBP)	movl r2=ia64_boot_param
 329.259 +	;;
 329.260 +(isBP)	st8 [r2]=r28		// save the address of the boot param area passed by the bootloader
 329.261 +
 329.262 +#ifdef CONFIG_SMP
 329.263 +(isAP)	br.call.sptk.many rp=start_secondary
 329.264 +.ret0:
 329.265 +(isAP)	br.cond.sptk self
 329.266 +#endif
 329.267 +
 329.268 +	// This is executed by the bootstrap processor (bsp) only:
 329.269 +
 329.270 +#ifdef CONFIG_IA64_FW_EMU
 329.271 +	// initialize PAL & SAL emulator:
 329.272 +	br.call.sptk.many rp=sys_fw_init
 329.273 +.ret1:
 329.274 +#endif
 329.275 +	br.call.sptk.many rp=start_kernel
 329.276 +.ret2:	addl r3=@ltoff(halt_msg),gp
 329.277 +	;;
 329.278 +	alloc r2=ar.pfs,8,0,2,0
 329.279 +	;;
 329.280 +	ld8 out0=[r3]
 329.281 +	br.call.sptk.many b0=console_print
 329.282 +
 329.283 +self:	hint @pause
 329.284 +	;;
 329.285 +	br.sptk.many self		// endless loop
 329.286 +	;;
 329.287 +END(_start)
 329.288 +
 329.289 +GLOBAL_ENTRY(ia64_save_debug_regs)
 329.290 +	alloc r16=ar.pfs,1,0,0,0
 329.291 +	mov r20=ar.lc			// preserve ar.lc
 329.292 +	mov ar.lc=IA64_NUM_DBG_REGS-1
 329.293 +	mov r18=0
 329.294 +	add r19=IA64_NUM_DBG_REGS*8,in0
 329.295 +	;;
 329.296 +1:	mov r16=dbr[r18]
 329.297 +#ifdef CONFIG_ITANIUM
 329.298 +	;;
 329.299 +	srlz.d
 329.300 +#endif
 329.301 +	mov r17=ibr[r18]
 329.302 +	add r18=1,r18
 329.303 +	;;
 329.304 +	st8.nta [in0]=r16,8
 329.305 +	st8.nta [r19]=r17,8
 329.306 +	br.cloop.sptk.many 1b
 329.307 +	;;
 329.308 +	mov ar.lc=r20			// restore ar.lc
 329.309 +	br.ret.sptk.many rp
 329.310 +END(ia64_save_debug_regs)
 329.311 +
 329.312 +GLOBAL_ENTRY(ia64_load_debug_regs)
 329.313 +	alloc r16=ar.pfs,1,0,0,0
 329.314 +	lfetch.nta [in0]
 329.315 +	mov r20=ar.lc			// preserve ar.lc
 329.316 +	add r19=IA64_NUM_DBG_REGS*8,in0
 329.317 +	mov ar.lc=IA64_NUM_DBG_REGS-1
 329.318 +	mov r18=-1
 329.319 +	;;
 329.320 +1:	ld8.nta r16=[in0],8
 329.321 +	ld8.nta r17=[r19],8
 329.322 +	add r18=1,r18
 329.323 +	;;
 329.324 +	mov dbr[r18]=r16
 329.325 +#ifdef CONFIG_ITANIUM
 329.326 +	;;
 329.327 +	srlz.d				// Errata 132 (NoFix status)
 329.328 +#endif
 329.329 +	mov ibr[r18]=r17
 329.330 +	br.cloop.sptk.many 1b
 329.331 +	;;
 329.332 +	mov ar.lc=r20			// restore ar.lc
 329.333 +	br.ret.sptk.many rp
 329.334 +END(ia64_load_debug_regs)
 329.335 +
 329.336 +GLOBAL_ENTRY(__ia64_save_fpu)
 329.337 +	alloc r2=ar.pfs,1,4,0,0
 329.338 +	adds loc0=96*16-16,in0
 329.339 +	adds loc1=96*16-16-128,in0
 329.340 +	;;
 329.341 +	stf.spill.nta [loc0]=f127,-256
 329.342 +	stf.spill.nta [loc1]=f119,-256
 329.343 +	;;
 329.344 +	stf.spill.nta [loc0]=f111,-256
 329.345 +	stf.spill.nta [loc1]=f103,-256
 329.346 +	;;
 329.347 +	stf.spill.nta [loc0]=f95,-256
 329.348 +	stf.spill.nta [loc1]=f87,-256
 329.349 +	;;
 329.350 +	stf.spill.nta [loc0]=f79,-256
 329.351 +	stf.spill.nta [loc1]=f71,-256
 329.352 +	;;
 329.353 +	stf.spill.nta [loc0]=f63,-256
 329.354 +	stf.spill.nta [loc1]=f55,-256
 329.355 +	adds loc2=96*16-32,in0
 329.356 +	;;
 329.357 +	stf.spill.nta [loc0]=f47,-256
 329.358 +	stf.spill.nta [loc1]=f39,-256
 329.359 +	adds loc3=96*16-32-128,in0
 329.360 +	;;
 329.361 +	stf.spill.nta [loc2]=f126,-256
 329.362 +	stf.spill.nta [loc3]=f118,-256
 329.363 +	;;
 329.364 +	stf.spill.nta [loc2]=f110,-256
 329.365 +	stf.spill.nta [loc3]=f102,-256
 329.366 +	;;
 329.367 +	stf.spill.nta [loc2]=f94,-256
 329.368 +	stf.spill.nta [loc3]=f86,-256
 329.369 +	;;
 329.370 +	stf.spill.nta [loc2]=f78,-256
 329.371 +	stf.spill.nta [loc3]=f70,-256
 329.372 +	;;
 329.373 +	stf.spill.nta [loc2]=f62,-256
 329.374 +	stf.spill.nta [loc3]=f54,-256
 329.375 +	adds loc0=96*16-48,in0
 329.376 +	;;
 329.377 +	stf.spill.nta [loc2]=f46,-256
 329.378 +	stf.spill.nta [loc3]=f38,-256
 329.379 +	adds loc1=96*16-48-128,in0
 329.380 +	;;
 329.381 +	stf.spill.nta [loc0]=f125,-256
 329.382 +	stf.spill.nta [loc1]=f117,-256
 329.383 +	;;
 329.384 +	stf.spill.nta [loc0]=f109,-256
 329.385 +	stf.spill.nta [loc1]=f101,-256
 329.386 +	;;
 329.387 +	stf.spill.nta [loc0]=f93,-256
 329.388 +	stf.spill.nta [loc1]=f85,-256
 329.389 +	;;
 329.390 +	stf.spill.nta [loc0]=f77,-256
 329.391 +	stf.spill.nta [loc1]=f69,-256
 329.392 +	;;
 329.393 +	stf.spill.nta [loc0]=f61,-256
 329.394 +	stf.spill.nta [loc1]=f53,-256
 329.395 +	adds loc2=96*16-64,in0
 329.396 +	;;
 329.397 +	stf.spill.nta [loc0]=f45,-256
 329.398 +	stf.spill.nta [loc1]=f37,-256
 329.399 +	adds loc3=96*16-64-128,in0
 329.400 +	;;
 329.401 +	stf.spill.nta [loc2]=f124,-256
 329.402 +	stf.spill.nta [loc3]=f116,-256
 329.403 +	;;
 329.404 +	stf.spill.nta [loc2]=f108,-256
 329.405 +	stf.spill.nta [loc3]=f100,-256
 329.406 +	;;
 329.407 +	stf.spill.nta [loc2]=f92,-256
 329.408 +	stf.spill.nta [loc3]=f84,-256
 329.409 +	;;
 329.410 +	stf.spill.nta [loc2]=f76,-256
 329.411 +	stf.spill.nta [loc3]=f68,-256
 329.412 +	;;
 329.413 +	stf.spill.nta [loc2]=f60,-256
 329.414 +	stf.spill.nta [loc3]=f52,-256
 329.415 +	adds loc0=96*16-80,in0
 329.416 +	;;
 329.417 +	stf.spill.nta [loc2]=f44,-256
 329.418 +	stf.spill.nta [loc3]=f36,-256
 329.419 +	adds loc1=96*16-80-128,in0
 329.420 +	;;
 329.421 +	stf.spill.nta [loc0]=f123,-256
 329.422 +	stf.spill.nta [loc1]=f115,-256
 329.423 +	;;
 329.424 +	stf.spill.nta [loc0]=f107,-256
 329.425 +	stf.spill.nta [loc1]=f99,-256
 329.426 +	;;
 329.427 +	stf.spill.nta [loc0]=f91,-256
 329.428 +	stf.spill.nta [loc1]=f83,-256
 329.429 +	;;
 329.430 +	stf.spill.nta [loc0]=f75,-256
 329.431 +	stf.spill.nta [loc1]=f67,-256
 329.432 +	;;
 329.433 +	stf.spill.nta [loc0]=f59,-256
 329.434 +	stf.spill.nta [loc1]=f51,-256
 329.435 +	adds loc2=96*16-96,in0
 329.436 +	;;
 329.437 +	stf.spill.nta [loc0]=f43,-256
 329.438 +	stf.spill.nta [loc1]=f35,-256
 329.439 +	adds loc3=96*16-96-128,in0
 329.440 +	;;
 329.441 +	stf.spill.nta [loc2]=f122,-256
 329.442 +	stf.spill.nta [loc3]=f114,-256
 329.443 +	;;
 329.444 +	stf.spill.nta [loc2]=f106,-256
 329.445 +	stf.spill.nta [loc3]=f98,-256
 329.446 +	;;
 329.447 +	stf.spill.nta [loc2]=f90,-256
 329.448 +	stf.spill.nta [loc3]=f82,-256
 329.449 +	;;
 329.450 +	stf.spill.nta [loc2]=f74,-256
 329.451 +	stf.spill.nta [loc3]=f66,-256
 329.452 +	;;
 329.453 +	stf.spill.nta [loc2]=f58,-256
 329.454 +	stf.spill.nta [loc3]=f50,-256
 329.455 +	adds loc0=96*16-112,in0
 329.456 +	;;
 329.457 +	stf.spill.nta [loc2]=f42,-256
 329.458 +	stf.spill.nta [loc3]=f34,-256
 329.459 +	adds loc1=96*16-112-128,in0
 329.460 +	;;
 329.461 +	stf.spill.nta [loc0]=f121,-256
 329.462 +	stf.spill.nta [loc1]=f113,-256
 329.463 +	;;
 329.464 +	stf.spill.nta [loc0]=f105,-256
 329.465 +	stf.spill.nta [loc1]=f97,-256
 329.466 +	;;
 329.467 +	stf.spill.nta [loc0]=f89,-256
 329.468 +	stf.spill.nta [loc1]=f81,-256
 329.469 +	;;
 329.470 +	stf.spill.nta [loc0]=f73,-256
 329.471 +	stf.spill.nta [loc1]=f65,-256
 329.472 +	;;
 329.473 +	stf.spill.nta [loc0]=f57,-256
 329.474 +	stf.spill.nta [loc1]=f49,-256
 329.475 +	adds loc2=96*16-128,in0
 329.476 +	;;
 329.477 +	stf.spill.nta [loc0]=f41,-256
 329.478 +	stf.spill.nta [loc1]=f33,-256
 329.479 +	adds loc3=96*16-128-128,in0
 329.480 +	;;
 329.481 +	stf.spill.nta [loc2]=f120,-256
 329.482 +	stf.spill.nta [loc3]=f112,-256
 329.483 +	;;
 329.484 +	stf.spill.nta [loc2]=f104,-256
 329.485 +	stf.spill.nta [loc3]=f96,-256
 329.486 +	;;
 329.487 +	stf.spill.nta [loc2]=f88,-256
 329.488 +	stf.spill.nta [loc3]=f80,-256
 329.489 +	;;
 329.490 +	stf.spill.nta [loc2]=f72,-256
 329.491 +	stf.spill.nta [loc3]=f64,-256
 329.492 +	;;
 329.493 +	stf.spill.nta [loc2]=f56,-256
 329.494 +	stf.spill.nta [loc3]=f48,-256
 329.495 +	;;
 329.496 +	stf.spill.nta [loc2]=f40
 329.497 +	stf.spill.nta [loc3]=f32
 329.498 +	br.ret.sptk.many rp
 329.499 +END(__ia64_save_fpu)
 329.500 +
 329.501 +GLOBAL_ENTRY(__ia64_load_fpu)
 329.502 +	alloc r2=ar.pfs,1,2,0,0
 329.503 +	adds r3=128,in0
 329.504 +	adds r14=256,in0
 329.505 +	adds r15=384,in0
 329.506 +	mov loc0=512
 329.507 +	mov loc1=-1024+16
 329.508 +	;;
 329.509 +	ldf.fill.nta f32=[in0],loc0
 329.510 +	ldf.fill.nta f40=[ r3],loc0
 329.511 +	ldf.fill.nta f48=[r14],loc0
 329.512 +	ldf.fill.nta f56=[r15],loc0
 329.513 +	;;
 329.514 +	ldf.fill.nta f64=[in0],loc0
 329.515 +	ldf.fill.nta f72=[ r3],loc0
 329.516 +	ldf.fill.nta f80=[r14],loc0
 329.517 +	ldf.fill.nta f88=[r15],loc0
 329.518 +	;;
 329.519 +	ldf.fill.nta f96=[in0],loc1
 329.520 +	ldf.fill.nta f104=[ r3],loc1
 329.521 +	ldf.fill.nta f112=[r14],loc1
 329.522 +	ldf.fill.nta f120=[r15],loc1
 329.523 +	;;
 329.524 +	ldf.fill.nta f33=[in0],loc0
 329.525 +	ldf.fill.nta f41=[ r3],loc0
 329.526 +	ldf.fill.nta f49=[r14],loc0
 329.527 +	ldf.fill.nta f57=[r15],loc0
 329.528 +	;;
 329.529 +	ldf.fill.nta f65=[in0],loc0
 329.530 +	ldf.fill.nta f73=[ r3],loc0
 329.531 +	ldf.fill.nta f81=[r14],loc0
 329.532 +	ldf.fill.nta f89=[r15],loc0
 329.533 +	;;
 329.534 +	ldf.fill.nta f97=[in0],loc1
 329.535 +	ldf.fill.nta f105=[ r3],loc1
 329.536 +	ldf.fill.nta f113=[r14],loc1
 329.537 +	ldf.fill.nta f121=[r15],loc1
 329.538 +	;;
 329.539 +	ldf.fill.nta f34=[in0],loc0
 329.540 +	ldf.fill.nta f42=[ r3],loc0
 329.541 +	ldf.fill.nta f50=[r14],loc0
 329.542 +	ldf.fill.nta f58=[r15],loc0
 329.543 +	;;
 329.544 +	ldf.fill.nta f66=[in0],loc0
 329.545 +	ldf.fill.nta f74=[ r3],loc0
 329.546 +	ldf.fill.nta f82=[r14],loc0
 329.547 +	ldf.fill.nta f90=[r15],loc0
 329.548 +	;;
 329.549 +	ldf.fill.nta f98=[in0],loc1
 329.550 +	ldf.fill.nta f106=[ r3],loc1
 329.551 +	ldf.fill.nta f114=[r14],loc1
 329.552 +	ldf.fill.nta f122=[r15],loc1
 329.553 +	;;
 329.554 +	ldf.fill.nta f35=[in0],loc0
 329.555 +	ldf.fill.nta f43=[ r3],loc0
 329.556 +	ldf.fill.nta f51=[r14],loc0
 329.557 +	ldf.fill.nta f59=[r15],loc0
 329.558 +	;;
 329.559 +	ldf.fill.nta f67=[in0],loc0
 329.560 +	ldf.fill.nta f75=[ r3],loc0
 329.561 +	ldf.fill.nta f83=[r14],loc0
 329.562 +	ldf.fill.nta f91=[r15],loc0
 329.563 +	;;
 329.564 +	ldf.fill.nta f99=[in0],loc1
 329.565 +	ldf.fill.nta f107=[ r3],loc1
 329.566 +	ldf.fill.nta f115=[r14],loc1
 329.567 +	ldf.fill.nta f123=[r15],loc1
 329.568 +	;;
 329.569 +	ldf.fill.nta f36=[in0],loc0
 329.570 +	ldf.fill.nta f44=[ r3],loc0
 329.571 +	ldf.fill.nta f52=[r14],loc0
 329.572 +	ldf.fill.nta f60=[r15],loc0
 329.573 +	;;
 329.574 +	ldf.fill.nta f68=[in0],loc0
 329.575 +	ldf.fill.nta f76=[ r3],loc0
 329.576 +	ldf.fill.nta f84=[r14],loc0
 329.577 +	ldf.fill.nta f92=[r15],loc0
 329.578 +	;;
 329.579 +	ldf.fill.nta f100=[in0],loc1
 329.580 +	ldf.fill.nta f108=[ r3],loc1
 329.581 +	ldf.fill.nta f116=[r14],loc1
 329.582 +	ldf.fill.nta f124=[r15],loc1
 329.583 +	;;
 329.584 +	ldf.fill.nta f37=[in0],loc0
 329.585 +	ldf.fill.nta f45=[ r3],loc0
 329.586 +	ldf.fill.nta f53=[r14],loc0
 329.587 +	ldf.fill.nta f61=[r15],loc0
 329.588 +	;;
 329.589 +	ldf.fill.nta f69=[in0],loc0
 329.590 +	ldf.fill.nta f77=[ r3],loc0
 329.591 +	ldf.fill.nta f85=[r14],loc0
 329.592 +	ldf.fill.nta f93=[r15],loc0
 329.593 +	;;
 329.594 +	ldf.fill.nta f101=[in0],loc1
 329.595 +	ldf.fill.nta f109=[ r3],loc1
 329.596 +	ldf.fill.nta f117=[r14],loc1
 329.597 +	ldf.fill.nta f125=[r15],loc1
 329.598 +	;;
 329.599 +	ldf.fill.nta f38 =[in0],loc0
 329.600 +	ldf.fill.nta f46 =[ r3],loc0
 329.601 +	ldf.fill.nta f54 =[r14],loc0
 329.602 +	ldf.fill.nta f62 =[r15],loc0
 329.603 +	;;
 329.604 +	ldf.fill.nta f70 =[in0],loc0
 329.605 +	ldf.fill.nta f78 =[ r3],loc0
 329.606 +	ldf.fill.nta f86 =[r14],loc0
 329.607 +	ldf.fill.nta f94 =[r15],loc0
 329.608 +	;;
 329.609 +	ldf.fill.nta f102=[in0],loc1
 329.610 +	ldf.fill.nta f110=[ r3],loc1
 329.611 +	ldf.fill.nta f118=[r14],loc1
 329.612 +	ldf.fill.nta f126=[r15],loc1
 329.613 +	;;
 329.614 +	ldf.fill.nta f39 =[in0],loc0
 329.615 +	ldf.fill.nta f47 =[ r3],loc0
 329.616 +	ldf.fill.nta f55 =[r14],loc0
 329.617 +	ldf.fill.nta f63 =[r15],loc0
 329.618 +	;;
 329.619 +	ldf.fill.nta f71 =[in0],loc0
 329.620 +	ldf.fill.nta f79 =[ r3],loc0
 329.621 +	ldf.fill.nta f87 =[r14],loc0
 329.622 +	ldf.fill.nta f95 =[r15],loc0
 329.623 +	;;
 329.624 +	ldf.fill.nta f103=[in0]
 329.625 +	ldf.fill.nta f111=[ r3]
 329.626 +	ldf.fill.nta f119=[r14]
 329.627 +	ldf.fill.nta f127=[r15]
 329.628 +	br.ret.sptk.many rp
 329.629 +END(__ia64_load_fpu)
 329.630 +
 329.631 +GLOBAL_ENTRY(__ia64_init_fpu)
 329.632 +	stf.spill [sp]=f0		// M3
 329.633 +	mov	 f32=f0			// F
 329.634 +	nop.b	 0
 329.635 +
 329.636 +	ldfps	 f33,f34=[sp]		// M0
 329.637 +	ldfps	 f35,f36=[sp]		// M1
 329.638 +	mov      f37=f0			// F
 329.639 +	;;
 329.640 +
 329.641 +	setf.s	 f38=r0			// M2
 329.642 +	setf.s	 f39=r0			// M3
 329.643 +	mov      f40=f0			// F
 329.644 +
 329.645 +	ldfps	 f41,f42=[sp]		// M0
 329.646 +	ldfps	 f43,f44=[sp]		// M1
 329.647 +	mov      f45=f0			// F
 329.648 +
 329.649 +	setf.s	 f46=r0			// M2
 329.650 +	setf.s	 f47=r0			// M3
 329.651 +	mov      f48=f0			// F
 329.652 +
 329.653 +	ldfps	 f49,f50=[sp]		// M0
 329.654 +	ldfps	 f51,f52=[sp]		// M1
 329.655 +	mov      f53=f0			// F
 329.656 +
 329.657 +	setf.s	 f54=r0			// M2
 329.658 +	setf.s	 f55=r0			// M3
 329.659 +	mov      f56=f0			// F
 329.660 +
 329.661 +	ldfps	 f57,f58=[sp]		// M0
 329.662 +	ldfps	 f59,f60=[sp]		// M1
 329.663 +	mov      f61=f0			// F
 329.664 +
 329.665 +	setf.s	 f62=r0			// M2
 329.666 +	setf.s	 f63=r0			// M3
 329.667 +	mov      f64=f0			// F
 329.668 +
 329.669 +	ldfps	 f65,f66=[sp]		// M0
 329.670 +	ldfps	 f67,f68=[sp]		// M1
 329.671 +	mov      f69=f0			// F
 329.672 +
 329.673 +	setf.s	 f70=r0			// M2
 329.674 +	setf.s	 f71=r0			// M3
 329.675 +	mov      f72=f0			// F
 329.676 +
 329.677 +	ldfps	 f73,f74=[sp]		// M0
 329.678 +	ldfps	 f75,f76=[sp]		// M1
 329.679 +	mov      f77=f0			// F
 329.680 +
 329.681 +	setf.s	 f78=r0			// M2
 329.682 +	setf.s	 f79=r0			// M3
 329.683 +	mov      f80=f0			// F
 329.684 +
 329.685 +	ldfps	 f81,f82=[sp]		// M0
 329.686 +	ldfps	 f83,f84=[sp]		// M1
 329.687 +	mov      f85=f0			// F
 329.688 +
 329.689 +	setf.s	 f86=r0			// M2
 329.690 +	setf.s	 f87=r0			// M3
 329.691 +	mov      f88=f0			// F
 329.692 +
 329.693 +	/*
 329.694 +	 * When the instructions are cached, it would be faster to initialize
 329.695 +	 * the remaining registers with simply mov instructions (F-unit).
 329.696 +	 * This gets the time down to ~29 cycles.  However, this would use up
 329.697 +	 * 33 bundles, whereas continuing with the above pattern yields
 329.698 +	 * 10 bundles and ~30 cycles.
 329.699 +	 */
 329.700 +
 329.701 +	ldfps	 f89,f90=[sp]		// M0
 329.702 +	ldfps	 f91,f92=[sp]		// M1
 329.703 +	mov      f93=f0			// F
 329.704 +
 329.705 +	setf.s	 f94=r0			// M2
 329.706 +	setf.s	 f95=r0			// M3
 329.707 +	mov      f96=f0			// F
 329.708 +
 329.709 +	ldfps	 f97,f98=[sp]		// M0
 329.710 +	ldfps	 f99,f100=[sp]		// M1
 329.711 +	mov      f101=f0		// F
 329.712 +
 329.713 +	setf.s	 f102=r0		// M2
 329.714 +	setf.s	 f103=r0		// M3
 329.715 +	mov      f104=f0		// F
 329.716 +
 329.717 +	ldfps	 f105,f106=[sp]		// M0
 329.718 +	ldfps	 f107,f108=[sp]		// M1
 329.719 +	mov      f109=f0		// F
 329.720 +
 329.721 +	setf.s	 f110=r0		// M2
 329.722 +	setf.s	 f111=r0		// M3
 329.723 +	mov      f112=f0		// F
 329.724 +
 329.725 +	ldfps	 f113,f114=[sp]		// M0
 329.726 +	ldfps	 f115,f116=[sp]		// M1
 329.727 +	mov      f117=f0		// F
 329.728 +
 329.729 +	setf.s	 f118=r0		// M2
 329.730 +	setf.s	 f119=r0		// M3
 329.731 +	mov      f120=f0		// F
 329.732 +
 329.733 +	ldfps	 f121,f122=[sp]		// M0
 329.734 +	ldfps	 f123,f124=[sp]		// M1
 329.735 +	mov      f125=f0		// F
 329.736 +
 329.737 +	setf.s	 f126=r0		// M2
 329.738 +	setf.s	 f127=r0		// M3
 329.739 +	br.ret.sptk.many rp		// F
 329.740 +END(__ia64_init_fpu)
 329.741 +
 329.742 +/*
 329.743 + * Switch execution mode from virtual to physical
 329.744 + *
 329.745 + * Inputs:
 329.746 + *	r16 = new psr to establish
 329.747 + * Output:
 329.748 + *	r19 = old virtual address of ar.bsp
 329.749 + *	r20 = old virtual address of sp
 329.750 + *
 329.751 + * Note: RSE must already be in enforced lazy mode
 329.752 + */
 329.753 +GLOBAL_ENTRY(ia64_switch_mode_phys)
 329.754 + {
 329.755 +	alloc r2=ar.pfs,0,0,0,0
 329.756 +	rsm psr.i | psr.ic		// disable interrupts and interrupt collection
 329.757 +	mov r15=ip
 329.758 + }
 329.759 +	;;
 329.760 + {
 329.761 +	flushrs				// must be first insn in group
 329.762 +	srlz.i
 329.763 + }
 329.764 +	;;
 329.765 +	mov cr.ipsr=r16			// set new PSR
 329.766 +	add r3=1f-ia64_switch_mode_phys,r15
 329.767 +
 329.768 +	mov r19=ar.bsp
 329.769 +	mov r20=sp
 329.770 +	mov r14=rp			// get return address into a general register
 329.771 +	;;
 329.772 +
 329.773 +	// going to physical mode, use tpa to translate virt->phys
 329.774 +	tpa r17=r19
 329.775 +	tpa r3=r3
 329.776 +	tpa sp=sp
 329.777 +	tpa r14=r14
 329.778 +	;;
 329.779 +
 329.780 +	mov r18=ar.rnat			// save ar.rnat
 329.781 +	mov ar.bspstore=r17		// this steps on ar.rnat
 329.782 +	mov cr.iip=r3
 329.783 +	mov cr.ifs=r0
 329.784 +	;;
 329.785 +	mov ar.rnat=r18			// restore ar.rnat
 329.786 +	rfi				// must be last insn in group
 329.787 +	;;
 329.788 +1:	mov rp=r14
 329.789 +	br.ret.sptk.many rp
 329.790 +END(ia64_switch_mode_phys)
 329.791 +
 329.792 +/*
 329.793 + * Switch execution mode from physical to virtual
 329.794 + *
 329.795 + * Inputs:
 329.796 + *	r16 = new psr to establish
 329.797 + *	r19 = new bspstore to establish
 329.798 + *	r20 = new sp to establish
 329.799 + *
 329.800 + * Note: RSE must already be in enforced lazy mode
 329.801 + */
 329.802 +GLOBAL_ENTRY(ia64_switch_mode_virt)
 329.803 + {
 329.804 +	alloc r2=ar.pfs,0,0,0,0
 329.805 +	rsm psr.i | psr.ic		// disable interrupts and interrupt collection
 329.806 +	mov r15=ip
 329.807 + }
 329.808 +	;;
 329.809 + {
 329.810 +	flushrs				// must be first insn in group
 329.811 +	srlz.i
 329.812 + }
 329.813 +	;;
 329.814 +	mov cr.ipsr=r16			// set new PSR
 329.815 +	add r3=1f-ia64_switch_mode_virt,r15
 329.816 +
 329.817 +	mov r14=rp			// get return address into a general register
 329.818 +	;;
 329.819 +
 329.820 +	// going to virtual
 329.821 +	//   - for code addresses, set upper bits of addr to KERNEL_START
 329.822 +	//   - for stack addresses, copy from input argument
 329.823 +	movl r18=KERNEL_START
 329.824 +	dep r3=0,r3,KERNEL_TR_PAGE_SHIFT,64-KERNEL_TR_PAGE_SHIFT
 329.825 +	dep r14=0,r14,KERNEL_TR_PAGE_SHIFT,64-KERNEL_TR_PAGE_SHIFT
 329.826 +	mov sp=r20
 329.827 +	;;
 329.828 +	or r3=r3,r18
 329.829 +	or r14=r14,r18
 329.830 +	;;
 329.831 +
 329.832 +	mov r18=ar.rnat			// save ar.rnat
 329.833 +	mov ar.bspstore=r19		// this steps on ar.rnat
 329.834 +	mov cr.iip=r3
 329.835 +	mov cr.ifs=r0
 329.836 +	;;
 329.837 +	mov ar.rnat=r18			// restore ar.rnat
 329.838 +	rfi				// must be last insn in group
 329.839 +	;;
 329.840 +1:	mov rp=r14
 329.841 +	br.ret.sptk.many rp
 329.842 +END(ia64_switch_mode_virt)
 329.843 +
 329.844 +GLOBAL_ENTRY(ia64_delay_loop)
 329.845 +	.prologue
 329.846 +{	nop 0			// work around GAS unwind info generation bug...
 329.847 +	.save ar.lc,r2
 329.848 +	mov r2=ar.lc
 329.849 +	.body
 329.850 +	;;
 329.851 +	mov ar.lc=r32
 329.852 +}
 329.853 +	;;
 329.854 +	// force loop to be 32-byte aligned (GAS bug means we cannot use .align
 329.855 +	// inside function body without corrupting unwind info).
 329.856 +{	nop 0 }
 329.857 +1:	br.cloop.sptk.few 1b
 329.858 +	;;
 329.859 +	mov ar.lc=r2
 329.860 +	br.ret.sptk.many rp
 329.861 +END(ia64_delay_loop)
 329.862 +
 329.863 +/*
 329.864 + * Return a CPU-local timestamp in nano-seconds.  This timestamp is
 329.865 + * NOT synchronized across CPUs its return value must never be
 329.866 + * compared against the values returned on another CPU.  The usage in
 329.867 + * kernel/sched.c ensures that.
 329.868 + *
 329.869 + * The return-value of sched_clock() is NOT supposed to wrap-around.
 329.870 + * If it did, it would cause some scheduling hiccups (at the worst).
 329.871 + * Fortunately, with a 64-bit cycle-counter ticking at 100GHz, even
 329.872 + * that would happen only once every 5+ years.
 329.873 + *
 329.874 + * The code below basically calculates:
 329.875 + *
 329.876 + *   (ia64_get_itc() * local_cpu_data->nsec_per_cyc) >> IA64_NSEC_PER_CYC_SHIFT
 329.877 + *
 329.878 + * except that the multiplication and the shift are done with 128-bit
 329.879 + * intermediate precision so that we can produce a full 64-bit result.
 329.880 + */
 329.881 +GLOBAL_ENTRY(sched_clock)
 329.882 +#ifdef XEN
 329.883 +	movl r8=THIS_CPU(cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET
 329.884 +#else
 329.885 +	addl r8=THIS_CPU(cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0
 329.886 +#endif
 329.887 +	mov.m r9=ar.itc		// fetch cycle-counter				(35 cyc)
 329.888 +	;;
 329.889 +	ldf8 f8=[r8]
 329.890 +	;;
 329.891 +	setf.sig f9=r9		// certain to stall, so issue it _after_ ldf8...
 329.892 +	;;
 329.893 +	xmpy.lu f10=f9,f8	// calculate low 64 bits of 128-bit product	(4 cyc)
 329.894 +	xmpy.hu f11=f9,f8	// calculate high 64 bits of 128-bit product
 329.895 +	;;
 329.896 +	getf.sig r8=f10		//						(5 cyc)
 329.897 +	getf.sig r9=f11
 329.898 +	;;
 329.899 +	shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT
 329.900 +	br.ret.sptk.many rp
 329.901 +END(sched_clock)
 329.902 +
 329.903 +GLOBAL_ENTRY(start_kernel_thread)
 329.904 +	.prologue
 329.905 +	.save rp, r0				// this is the end of the call-chain
 329.906 +	.body
 329.907 +	alloc r2 = ar.pfs, 0, 0, 2, 0
 329.908 +	mov out0 = r9
 329.909 +	mov out1 = r11;;
 329.910 +	br.call.sptk.many rp = kernel_thread_helper;;
 329.911 +	mov out0 = r8
 329.912 +	br.call.sptk.many rp = sys_exit;;
 329.913 +1:	br.sptk.few 1b				// not reached
 329.914 +END(start_kernel_thread)
 329.915 +
 329.916 +#ifdef CONFIG_IA64_BRL_EMU
 329.917 +
 329.918 +/*
 329.919 + *  Assembly routines used by brl_emu.c to set preserved register state.
 329.920 + */
 329.921 +
 329.922 +#define SET_REG(reg)				\
 329.923 + GLOBAL_ENTRY(ia64_set_##reg);			\
 329.924 +	alloc r16=ar.pfs,1,0,0,0;		\
 329.925 +	mov reg=r32;				\
 329.926 +	;;					\
 329.927 +	br.ret.sptk.many rp;			\
 329.928 + END(ia64_set_##reg)
 329.929 +
 329.930 +SET_REG(b1);
 329.931 +SET_REG(b2);
 329.932 +SET_REG(b3);
 329.933 +SET_REG(b4);
 329.934 +SET_REG(b5);
 329.935 +
 329.936 +#endif /* CONFIG_IA64_BRL_EMU */
 329.937 +
 329.938 +#ifdef CONFIG_SMP
 329.939 +	/*
 329.940 +	 * This routine handles spinlock contention.  It uses a non-standard calling
 329.941 +	 * convention to avoid converting leaf routines into interior routines.  Because
 329.942 +	 * of this special convention, there are several restrictions:
 329.943 +	 *
 329.944 +	 * - do not use gp relative variables, this code is called from the kernel
 329.945 +	 *   and from modules, r1 is undefined.
 329.946 +	 * - do not use stacked registers, the caller owns them.
 329.947 +	 * - do not use the scratch stack space, the caller owns it.
 329.948 +	 * - do not use any registers other than the ones listed below
 329.949 +	 *
 329.950 +	 * Inputs:
 329.951 +	 *   ar.pfs - saved CFM of caller
 329.952 +	 *   ar.ccv - 0 (and available for use)
 329.953 +	 *   r27    - flags from spin_lock_irqsave or 0.  Must be preserved.
 329.954 +	 *   r28    - available for use.
 329.955 +	 *   r29    - available for use.
 329.956 +	 *   r30    - available for use.
 329.957 +	 *   r31    - address of lock, available for use.
 329.958 +	 *   b6     - return address
 329.959 +	 *   p14    - available for use.
 329.960 +	 *   p15    - used to track flag status.
 329.961 +	 *
 329.962 +	 * If you patch this code to use more registers, do not forget to update
 329.963 +	 * the clobber lists for spin_lock() in include/asm-ia64/spinlock.h.
 329.964 +	 */
 329.965 +
 329.966 +#if __GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 3)
 329.967 +
 329.968 +GLOBAL_ENTRY(ia64_spinlock_contention_pre3_4)
 329.969 +	.prologue
 329.970 +	.save ar.pfs, r0	// this code effectively has a zero frame size
 329.971 +	.save rp, r28
 329.972 +	.body
 329.973 +	nop 0
 329.974 +	tbit.nz p15,p0=r27,IA64_PSR_I_BIT
 329.975 +	.restore sp		// pop existing prologue after next insn
 329.976 +	mov b6 = r28
 329.977 +	.prologue
 329.978 +	.save ar.pfs, r0
 329.979 +	.altrp b6
 329.980 +	.body
 329.981 +	;;
 329.982 +(p15)	ssm psr.i		// reenable interrupts if they were on
 329.983 +				// DavidM says that srlz.d is slow and is not required in this case
 329.984 +.wait:
 329.985 +	// exponential backoff, kdb, lockmeter etc. go in here
 329.986 +	hint @pause
 329.987 +	ld4 r30=[r31]		// don't use ld4.bias; if it's contended, we won't write the word
 329.988 +	nop 0
 329.989 +	;;
 329.990 +	cmp4.ne p14,p0=r30,r0
 329.991 +(p14)	br.cond.sptk.few .wait
 329.992 +(p15)	rsm psr.i		// disable interrupts if we reenabled them
 329.993 +	br.cond.sptk.few b6	// lock is now free, try to acquire
 329.994 +	.global ia64_spinlock_contention_pre3_4_end	// for kernprof
 329.995 +ia64_spinlock_contention_pre3_4_end:
 329.996 +END(ia64_spinlock_contention_pre3_4)
 329.997 +
 329.998 +#else
 329.999 +
329.1000 +GLOBAL_ENTRY(ia64_spinlock_contention)
329.1001 +	.prologue
329.1002 +	.altrp b6
329.1003 +	.body
329.1004 +	tbit.nz p15,p0=r27,IA64_PSR_I_BIT
329.1005 +	;;
329.1006 +.wait:
329.1007 +(p15)	ssm psr.i		// reenable interrupts if they were on
329.1008 +				// DavidM says that srlz.d is slow and is not required in this case
329.1009 +.wait2:
329.1010 +	// exponential backoff, kdb, lockmeter etc. go in here
329.1011 +	hint @pause
329.1012 +	ld4 r30=[r31]		// don't use ld4.bias; if it's contended, we won't write the word
329.1013 +	;;
329.1014 +	cmp4.ne p14,p0=r30,r0
329.1015 +	mov r30 = 1
329.1016 +(p14)	br.cond.sptk.few .wait2
329.1017 +(p15)	rsm psr.i		// disable interrupts if we reenabled them
329.1018 +	;;
329.1019 +	cmpxchg4.acq r30=[r31], r30, ar.ccv
329.1020 +	;;
329.1021 +	cmp4.ne p14,p0=r0,r30
329.1022 +(p14)	br.cond.sptk.few .wait
329.1023 +
329.1024 +	br.ret.sptk.many b6	// lock is now taken
329.1025 +END(ia64_spinlock_contention)
329.1026 +
329.1027 +#endif
329.1028 +
329.1029 +#endif /* CONFIG_SMP */
   330.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
   330.2 +++ b/xen/arch/ia64/linux-xen/irq_ia64.c	Tue Aug 16 12:15:23 2005 +0800
   330.3 @@ -0,0 +1,381 @@
   330.4 +/*
   330.5 + * linux/arch/ia64/kernel/irq.c
   330.6 + *
   330.7 + * Copyright (C) 1998-2001 Hewlett-Packard Co
   330.8 + *	Stephane Eranian <eranian@hpl.hp.com>
   330.9 + *	David Mosberger-Tang <davidm@hpl.hp.com>
  330.10 + *
  330.11 + *  6/10/99: Updated to bring in sync with x86 version to facilitate
  330.12 + *	     support for SMP and different interrupt controllers.
  330.13 + *
  330.14 + * 09/15/00 Goutham Rao <goutham.rao@intel.com> Implemented pci_irq_to_vector
  330.15 + *                      PCI to vector allocation routine.
  330.16 + * 04/14/2004 Ashok Raj <ashok.raj@intel.com>
  330.17 + *						Added CPU Hotplug handling for IPF.
  330.18 + */
  330.19 +
  330.20 +#include <linux/config.h>
  330.21 +#include <linux/module.h>
  330.22 +
  330.23 +#include <linux/jiffies.h>
  330.24 +#include <linux/errno.h>
  330.25 +#include <linux/init.h>
  330.26 +#include <linux/interrupt.h>
  330.27 +#include <linux/ioport.h>
  330.28 +#include <linux/kernel_stat.h>
  330.29 +#include <linux/slab.h>
  330.30 +#include <linux/ptrace.h>
  330.31 +#include <linux/random.h>	/* for rand_initialize_irq() */
  330.32 +#include <linux/signal.h>
  330.33 +#include <linux/smp.h>
  330.34 +#include <linux/smp_lock.h>
  330.35 +#include <linux/threads.h>
  330.36 +#include <linux/bitops.h>
  330.37 +
  330.38 +#include <asm/delay.h>
  330.39 +#include <asm/intrinsics.h>
  330.40 +#include <asm/io.h>
  330.41 +#include <asm/hw_irq.h>
  330.42 +#include <asm/machvec.h>
  330.43 +#include <asm/pgtable.h>
  330.44 +#include <asm/system.h>
  330.45 +
  330.46 +#ifdef CONFIG_PERFMON
  330.47 +# include <asm/perfmon.h>
  330.48 +#endif
  330.49 +
  330.50 +#define IRQ_DEBUG	0
  330.51 +
  330.52 +/* default base addr of IPI table */
  330.53 +void __iomem *ipi_base_addr = ((void __iomem *)
  330.54 +			       (__IA64_UNCACHED_OFFSET | IA64_IPI_DEFAULT_BASE_ADDR));
  330.55 +
  330.56 +/*
  330.57 + * Legacy IRQ to IA-64 vector translation table.
  330.58 + */
  330.59 +__u8 isa_irq_to_vector_map[16] = {
  330.60 +	/* 8259 IRQ translation, first 16 entries */
  330.61 +	0x2f, 0x20, 0x2e, 0x2d, 0x2c, 0x2b, 0x2a, 0x29,
  330.62 +	0x28, 0x27, 0x26, 0x25, 0x24, 0x23, 0x22, 0x21
  330.63 +};
  330.64 +EXPORT_SYMBOL(isa_irq_to_vector_map);
  330.65 +
  330.66 +static unsigned long ia64_vector_mask[BITS_TO_LONGS(IA64_NUM_DEVICE_VECTORS)];
  330.67 +
  330.68 +int
  330.69 +assign_irq_vector (int irq)
  330.70 +{
  330.71 +	int pos, vector;
  330.72 + again:
  330.73 +	pos = find_first_zero_bit(ia64_vector_mask, IA64_NUM_DEVICE_VECTORS);
  330.74 +	vector = IA64_FIRST_DEVICE_VECTOR + pos;
  330.75 +	if (vector > IA64_LAST_DEVICE_VECTOR)
  330.76 +		/* XXX could look for sharable vectors instead of panic'ing... */
  330.77 +		panic("assign_irq_vector: out of interrupt vectors!");
  330.78 +	if (test_and_set_bit(pos, ia64_vector_mask))
  330.79 +		goto again;
  330.80 +	return vector;
  330.81 +}
  330.82 +
  330.83 +void
  330.84 +free_irq_vector (int vector)
  330.85 +{
  330.86 +	int pos;
  330.87 +
  330.88 +	if (vector < IA64_FIRST_DEVICE_VECTOR || vector > IA64_LAST_DEVICE_VECTOR)
  330.89 +		return;
  330.90 +
  330.91 +	pos = vector - IA64_FIRST_DEVICE_VECTOR;
  330.92 +	if (!test_and_clear_bit(pos, ia64_vector_mask))
  330.93 +		printk(KERN_WARNING "%s: double free!\n", __FUNCTION__);
  330.94 +}
  330.95 +
  330.96 +#ifdef CONFIG_SMP
  330.97 +#	define IS_RESCHEDULE(vec)	(vec == IA64_IPI_RESCHEDULE)
  330.98 +#else
  330.99 +#	define IS_RESCHEDULE(vec)	(0)
 330.100 +#endif
 330.101 +/*
 330.102 + * That's where the IVT branches when we get an external
 330.103 + * interrupt. This branches to the correct hardware IRQ handler via
 330.104 + * function ptr.
 330.105 + */
 330.106 +void
 330.107 +ia64_handle_irq (ia64_vector vector, struct pt_regs *regs)
 330.108 +{
 330.109 +	unsigned long saved_tpr;
 330.110 +
 330.111 +#if IRQ_DEBUG
 330.112 +#ifdef XEN
 330.113 +	xen_debug_irq(vector, regs);
 330.114 +#endif
 330.115 +	{
 330.116 +		unsigned long bsp, sp;
 330.117 +
 330.118 +		/*
 330.119 +		 * Note: if the interrupt happened while executing in
 330.120 +		 * the context switch routine (ia64_switch_to), we may
 330.121 +		 * get a spurious stack overflow here.  This is
 330.122 +		 * because the register and the memory stack are not
 330.123 +		 * switched atomically.
 330.124 +		 */
 330.125 +		bsp = ia64_getreg(_IA64_REG_AR_BSP);
 330.126 +		sp = ia64_getreg(_IA64_REG_SP);
 330.127 +
 330.128 +		if ((sp - bsp) < 1024) {
 330.129 +			static unsigned char count;
 330.130 +			static long last_time;
 330.131 +
 330.132 +			if (jiffies - last_time > 5*HZ)
 330.133 +				count = 0;
 330.134 +			if (++count < 5) {
 330.135 +				last_time = jiffies;
 330.136 +				printk("ia64_handle_irq: DANGER: less than "
 330.137 +				       "1KB of free stack space!!\n"
 330.138 +				       "(bsp=0x%lx, sp=%lx)\n", bsp, sp);
 330.139 +			}
 330.140 +		}
 330.141 +	}
 330.142 +#endif /* IRQ_DEBUG */
 330.143 +
 330.144 +	/*
 330.145 +	 * Always set TPR to limit maximum interrupt nesting depth to
 330.146 +	 * 16 (without this, it would be ~240, which could easily lead
 330.147 +	 * to kernel stack overflows).
 330.148 +	 */
 330.149 +	irq_enter();
 330.150 +	saved_tpr = ia64_getreg(_IA64_REG_CR_TPR);
 330.151 +	ia64_srlz_d();
 330.152 +	while (vector != IA64_SPURIOUS_INT_VECTOR) {
 330.153 +		if (!IS_RESCHEDULE(vector)) {
 330.154 +			ia64_setreg(_IA64_REG_CR_TPR, vector);
 330.155 +			ia64_srlz_d();
 330.156 +
 330.157 +#ifdef XEN
 330.158 +			if (!xen_do_IRQ(vector))
 330.159 +#endif
 330.160 +			__do_IRQ(local_vector_to_irq(vector), regs);
 330.161 +
 330.162 +			/*
 330.163 +			 * Disable interrupts and send EOI:
 330.164 +			 */
 330.165 +			local_irq_disable();
 330.166 +			ia64_setreg(_IA64_REG_CR_TPR, saved_tpr);
 330.167 +		}
 330.168 +		ia64_eoi();
 330.169 +		vector = ia64_get_ivr();
 330.170 +	}
 330.171 +	/*
 330.172 +	 * This must be done *after* the ia64_eoi().  For example, the keyboard softirq
 330.173 +	 * handler needs to be able to wait for further keyboard interrupts, which can't
 330.174 +	 * come through until ia64_eoi() has been done.
 330.175 +	 */
 330.176 +	irq_exit();
 330.177 +}
 330.178 +
 330.179 +#ifdef  CONFIG_VTI
 330.180 +#define vmx_irq_enter()		\
 330.181 +	add_preempt_count(HARDIRQ_OFFSET);
 330.182 +
 330.183 +/* Now softirq will be checked when leaving hypervisor, or else
 330.184 + * scheduler irq will be executed too early.
 330.185 + */
 330.186 +#define vmx_irq_exit(void)	\
 330.187 +	sub_preempt_count(HARDIRQ_OFFSET);
 330.188 +/*
 330.189 + * That's where the IVT branches when we get an external
 330.190 + * interrupt. This branches to the correct hardware IRQ handler via
 330.191 + * function ptr.
 330.192 + */
 330.193 +void
 330.194 +vmx_ia64_handle_irq (ia64_vector vector, struct pt_regs *regs)
 330.195 +{
 330.196 +	unsigned long saved_tpr;
 330.197 +	int	wake_dom0 = 0;
 330.198 +
 330.199 +
 330.200 +#if IRQ_DEBUG
 330.201 +	{
 330.202 +		unsigned long bsp, sp;
 330.203 +
 330.204 +		/*
 330.205 +		 * Note: if the interrupt happened while executing in
 330.206 +		 * the context switch routine (ia64_switch_to), we may
 330.207 +		 * get a spurious stack overflow here.  This is
 330.208 +		 * because the register and the memory stack are not
 330.209 +		 * switched atomically.
 330.210 +		 */
 330.211 +		bsp = ia64_getreg(_IA64_REG_AR_BSP);
 330.212 +		sp = ia64_getreg(_IA64_REG_AR_SP);
 330.213 +
 330.214 +		if ((sp - bsp) < 1024) {
 330.215 +			static unsigned char count;
 330.216 +			static long last_time;
 330.217 +
 330.218 +			if (jiffies - last_time > 5*HZ)
 330.219 +				count = 0;
 330.220 +			if (++count < 5) {
 330.221 +				last_time = jiffies;
 330.222 +				printk("ia64_handle_irq: DANGER: less than "
 330.223 +				       "1KB of free stack space!!\n"
 330.224 +				       "(bsp=0x%lx, sp=%lx)\n", bsp, sp);
 330.225 +			}
 330.226 +		}
 330.227 +	}
 330.228 +#endif /* IRQ_DEBUG */
 330.229 +
 330.230 +	/*
 330.231 +	 * Always set TPR to limit maximum interrupt nesting depth to
 330.232 +	 * 16 (without this, it would be ~240, which could easily lead
 330.233 +	 * to kernel stack overflows).
 330.234 +	 */
 330.235 +	vmx_irq_enter();
 330.236 +	saved_tpr = ia64_getreg(_IA64_REG_CR_TPR);
 330.237 +	ia64_srlz_d();
 330.238 +	while (vector != IA64_SPURIOUS_INT_VECTOR) {
 330.239 +	    if (!IS_RESCHEDULE(vector)) {
 330.240 +		ia64_setreg(_IA64_REG_CR_TPR, vector);
 330.241 +		ia64_srlz_d();
 330.242 +
 330.243 +		if (vector != IA64_TIMER_VECTOR) {
 330.244 +			/* FIXME: Leave IRQ re-route later */
 330.245 +			vmx_vcpu_pend_interrupt(dom0->vcpu[0],vector);
 330.246 +			wake_dom0 = 1;
 330.247 +		}
 330.248 +		else {	// FIXME: Handle Timer only now
 330.249 +			__do_IRQ(local_vector_to_irq(vector), regs);
 330.250 +		}
 330.251 +		
 330.252 +		/*
 330.253 +		 * Disable interrupts and send EOI:
 330.254 +		 */
 330.255 +		local_irq_disable();
 330.256 +		ia64_setreg(_IA64_REG_CR_TPR, saved_tpr);
 330.257 +	    }
 330.258 +	    else {
 330.259 +                printf("Oops: RESCHEDULE IPI absorbed by HV\n");
 330.260 +            }
 330.261 +	    ia64_eoi();
 330.262 +	    vector = ia64_get_ivr();
 330.263 +	}
 330.264 +	/*
 330.265 +	 * This must be done *after* the ia64_eoi().  For example, the keyboard softirq
 330.266 +	 * handler needs to be able to wait for further keyboard interrupts, which can't
 330.267 +	 * come through until ia64_eoi() has been done.
 330.268 +	 */
 330.269 +	vmx_irq_exit();
 330.270 +	if ( wake_dom0 && current != dom0 ) 
 330.271 +		domain_wake(dom0->vcpu[0]);
 330.272 +}
 330.273 +#endif
 330.274 +
 330.275 +
 330.276 +#ifdef CONFIG_HOTPLUG_CPU
 330.277 +/*
 330.278 + * This function emulates a interrupt processing when a cpu is about to be
 330.279 + * brought down.
 330.280 + */
 330.281 +void ia64_process_pending_intr(void)
 330.282 +{
 330.283 +	ia64_vector vector;
 330.284 +	unsigned long saved_tpr;
 330.285 +	extern unsigned int vectors_in_migration[NR_IRQS];
 330.286 +
 330.287 +	vector = ia64_get_ivr();
 330.288 +
 330.289 +	 irq_enter();
 330.290 +	 saved_tpr = ia64_getreg(_IA64_REG_CR_TPR);
 330.291 +	 ia64_srlz_d();
 330.292 +
 330.293 +	 /*
 330.294 +	  * Perform normal interrupt style processing
 330.295 +	  */
 330.296 +	while (vector != IA64_SPURIOUS_INT_VECTOR) {
 330.297 +		if (!IS_RESCHEDULE(vector)) {
 330.298 +			ia64_setreg(_IA64_REG_CR_TPR, vector);
 330.299 +			ia64_srlz_d();
 330.300 +
 330.301 +			/*
 330.302 +			 * Now try calling normal ia64_handle_irq as it would have got called
 330.303 +			 * from a real intr handler. Try passing null for pt_regs, hopefully
 330.304 +			 * it will work. I hope it works!.
 330.305 +			 * Probably could shared code.
 330.306 +			 */
 330.307 +			vectors_in_migration[local_vector_to_irq(vector)]=0;
 330.308 +			__do_IRQ(local_vector_to_irq(vector), NULL);
 330.309 +
 330.310 +			/*
 330.311 +			 * Disable interrupts and send EOI
 330.312 +			 */
 330.313 +			local_irq_disable();
 330.314 +			ia64_setreg(_IA64_REG_CR_TPR, saved_tpr);
 330.315 +		}
 330.316 +		ia64_eoi();
 330.317 +		vector = ia64_get_ivr();
 330.318 +	}
 330.319 +	irq_exit();
 330.320 +}
 330.321 +#endif
 330.322 +
 330.323 +
 330.324 +#ifdef CONFIG_SMP
 330.325 +extern irqreturn_t handle_IPI (int irq, void *dev_id, struct pt_regs *regs);
 330.326 +
 330.327 +static struct irqaction ipi_irqaction = {
 330.328 +	.handler =	handle_IPI,
 330.329 +	.flags =	SA_INTERRUPT,
 330.330 +	.name =		"IPI"
 330.331 +};
 330.332 +#endif
 330.333 +
 330.334 +void
 330.335 +register_percpu_irq (ia64_vector vec, struct irqaction *action)
 330.336 +{
 330.337 +	irq_desc_t *desc;
 330.338 +	unsigned int irq;
 330.339 +
 330.340 +	for (irq = 0; irq < NR_IRQS; ++irq)
 330.341 +		if (irq_to_vector(irq) == vec) {
 330.342 +			desc = irq_descp(irq);
 330.343 +			desc->status |= IRQ_PER_CPU;
 330.344 +			desc->handler = &irq_type_ia64_lsapic;
 330.345 +			if (action)
 330.346 +				setup_irq(irq, action);
 330.347 +		}
 330.348 +}
 330.349 +
 330.350 +void __init
 330.351 +init_IRQ (void)
 330.352 +{
 330.353 +	register_percpu_irq(IA64_SPURIOUS_INT_VECTOR, NULL);
 330.354 +#ifdef CONFIG_SMP
 330.355 +	register_percpu_irq(IA64_IPI_VECTOR, &ipi_irqaction);
 330.356 +#endif
 330.357 +#ifdef CONFIG_PERFMON
 330.358 +	pfm_init_percpu();
 330.359 +#endif
 330.360 +	platform_irq_init();
 330.361 +}
 330.362 +
 330.363 +void
 330.364 +ia64_send_ipi (int cpu, int vector, int delivery_mode, int redirect)
 330.365 +{
 330.366 +	void __iomem *ipi_addr;
 330.367 +	unsigned long ipi_data;
 330.368 +	unsigned long phys_cpu_id;
 330.369 +
 330.370 +#ifdef CONFIG_SMP
 330.371 +	phys_cpu_id = cpu_physical_id(cpu);
 330.372 +#else
 330.373 +	phys_cpu_id = (ia64_getreg(_IA64_REG_CR_LID) >> 16) & 0xffff;
 330.374 +#endif
 330.375 +
 330.376 +	/*
 330.377 +	 * cpu number is in 8bit ID and 8bit EID
 330.378 +	 */
 330.379 +
 330.380 +	ipi_data = (delivery_mode << 8) | (vector & 0xff);
 330.381 +	ipi_addr = ipi_base_addr + ((phys_cpu_id << 4) | ((redirect & 1) << 3));
 330.382 +
 330.383 +	writeq(ipi_data, ipi_addr);
 330.384 +}
   331.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
   331.2 +++ b/xen/arch/ia64/linux-xen/mm_contig.c	Tue Aug 16 12:15:23 2005 +0800
   331.3 @@ -0,0 +1,305 @@
   331.4 +/*
   331.5 + * This file is subject to the terms and conditions of the GNU General Public
   331.6 + * License.  See the file "COPYING" in the main directory of this archive
   331.7 + * for more details.
   331.8 + *
   331.9 + * Copyright (C) 1998-2003 Hewlett-Packard Co
  331.10 + *	David Mosberger-Tang <davidm@hpl.hp.com>
  331.11 + *	Stephane Eranian <eranian@hpl.hp.com>
  331.12 + * Copyright (C) 2000, Rohit Seth <rohit.seth@intel.com>
  331.13 + * Copyright (C) 1999 VA Linux Systems
  331.14 + * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
  331.15 + * Copyright (C) 2003 Silicon Graphics, Inc. All rights reserved.
  331.16 + *
  331.17 + * Routines used by ia64 machines with contiguous (or virtually contiguous)
  331.18 + * memory.
  331.19 + */
  331.20 +#include <linux/config.h>
  331.21 +#include <linux/bootmem.h>
  331.22 +#include <linux/efi.h>
  331.23 +#include <linux/mm.h>
  331.24 +#include <linux/swap.h>
  331.25 +
  331.26 +#include <asm/meminit.h>
  331.27 +#include <asm/pgalloc.h>
  331.28 +#include <asm/pgtable.h>
  331.29 +#include <asm/sections.h>
  331.30 +#include <asm/mca.h>
  331.31 +
  331.32 +#ifdef CONFIG_VIRTUAL_MEM_MAP
  331.33 +static unsigned long num_dma_physpages;
  331.34 +#endif
  331.35 +
  331.36 +/**
  331.37 + * show_mem - display a memory statistics summary
  331.38 + *
  331.39 + * Just walks the pages in the system and describes where they're allocated.
  331.40 + */
  331.41 +#ifndef XEN
  331.42 +void
  331.43 +show_mem (void)
  331.44 +{
  331.45 +	int i, total = 0, reserved = 0;
  331.46 +	int shared = 0, cached = 0;
  331.47 +
  331.48 +	printk("Mem-info:\n");
  331.49 +	show_free_areas();
  331.50 +
  331.51 +	printk("Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
  331.52 +	i = max_mapnr;
  331.53 +	while (i-- > 0) {
  331.54 +		if (!pfn_valid(i))
  331.55 +			continue;
  331.56 +		total++;
  331.57 +		if (PageReserved(mem_map+i))
  331.58 +			reserved++;
  331.59 +		else if (PageSwapCache(mem_map+i))
  331.60 +			cached++;
  331.61 +		else if (page_count(mem_map + i))
  331.62 +			shared += page_count(mem_map + i) - 1;
  331.63 +	}
  331.64 +	printk("%d pages of RAM\n", total);
  331.65 +	printk("%d reserved pages\n", reserved);
  331.66 +	printk("%d pages shared\n", shared);
  331.67 +	printk("%d pages swap cached\n", cached);
  331.68 +	printk("%ld pages in page table cache\n", pgtable_cache_size);
  331.69 +}
  331.70 +#endif
  331.71 +
  331.72 +/* physical address where the bootmem map is located */
  331.73 +unsigned long bootmap_start;
  331.74 +
  331.75 +/**
  331.76 + * find_max_pfn - adjust the maximum page number callback
  331.77 + * @start: start of range
  331.78 + * @end: end of range
  331.79 + * @arg: address of pointer to global max_pfn variable
  331.80 + *
  331.81 + * Passed as a callback function to efi_memmap_walk() to determine the highest
  331.82 + * available page frame number in the system.
  331.83 + */
  331.84 +int
  331.85 +find_max_pfn (unsigned long start, unsigned long end, void *arg)
  331.86 +{
  331.87 +	unsigned long *max_pfnp = arg, pfn;
  331.88 +
  331.89 +	pfn = (PAGE_ALIGN(end - 1) - PAGE_OFFSET) >> PAGE_SHIFT;
  331.90 +	if (pfn > *max_pfnp)
  331.91 +		*max_pfnp = pfn;
  331.92 +	return 0;
  331.93 +}
  331.94 +
  331.95 +/**
  331.96 + * find_bootmap_location - callback to find a memory area for the bootmap
  331.97 + * @start: start of region
  331.98 + * @end: end of region
  331.99 + * @arg: unused callback data
 331.100 + *
 331.101 + * Find a place to put the bootmap and return its starting address in
 331.102 + * bootmap_start.  This address must be page-aligned.
 331.103 + */
 331.104 +int
 331.105 +find_bootmap_location (unsigned long start, unsigned long end, void *arg)
 331.106 +{
 331.107 +	unsigned long needed = *(unsigned long *)arg;
 331.108 +	unsigned long range_start, range_end, free_start;
 331.109 +	int i;
 331.110 +
 331.111 +#if IGNORE_PFN0
 331.112 +	if (start == PAGE_OFFSET) {
 331.113 +		start += PAGE_SIZE;
 331.114 +		if (start >= end)
 331.115 +			return 0;
 331.116 +	}
 331.117 +#endif
 331.118 +
 331.119 +	free_start = PAGE_OFFSET;
 331.120 +
 331.121 +	for (i = 0; i < num_rsvd_regions; i++) {
 331.122 +		range_start = max(start, free_start);
 331.123 +		range_end   = min(end, rsvd_region[i].start & PAGE_MASK);
 331.124 +
 331.125 +		free_start = PAGE_ALIGN(rsvd_region[i].end);
 331.126 +
 331.127 +		if (range_end <= range_start)
 331.128 +			continue; /* skip over empty range */
 331.129 +
 331.130 +		if (range_end - range_start >= needed) {
 331.131 +			bootmap_start = __pa(range_start);
 331.132 +			return -1;	/* done */
 331.133 +		}
 331.134 +
 331.135 +		/* nothing more available in this segment */
 331.136 +		if (range_end == end)
 331.137 +			return 0;
 331.138 +	}
 331.139 +	return 0;
 331.140 +}
 331.141 +
 331.142 +/**
 331.143 + * find_memory - setup memory map
 331.144 + *
 331.145 + * Walk the EFI memory map and find usable memory for the system, taking
 331.146 + * into account reserved areas.
 331.147 + */
 331.148 +#ifndef XEN
 331.149 +void
 331.150 +find_memory (void)
 331.151 +{
 331.152 +	unsigned long bootmap_size;
 331.153 +
 331.154 +	reserve_memory();
 331.155 +
 331.156 +	/* first find highest page frame number */
 331.157 +	max_pfn = 0;
 331.158 +	efi_memmap_walk(find_max_pfn, &max_pfn);
 331.159 +
 331.160 +	/* how many bytes to cover all the pages */
 331.161 +	bootmap_size = bootmem_bootmap_pages(max_pfn) << PAGE_SHIFT;
 331.162 +
 331.163 +	/* look for a location to hold the bootmap */
 331.164 +	bootmap_start = ~0UL;
 331.165 +	efi_memmap_walk(find_bootmap_location, &bootmap_size);
 331.166 +	if (bootmap_start == ~0UL)
 331.167 +		panic("Cannot find %ld bytes for bootmap\n", bootmap_size);
 331.168 +
 331.169 +	bootmap_size = init_bootmem(bootmap_start >> PAGE_SHIFT, max_pfn);
 331.170 +
 331.171 +	/* Free all available memory, then mark bootmem-map as being in use. */
 331.172 +	efi_memmap_walk(filter_rsvd_memory, free_bootmem);
 331.173 +	reserve_bootmem(bootmap_start, bootmap_size);
 331.174 +
 331.175 +	find_initrd();
 331.176 +}
 331.177 +#endif
 331.178 +
 331.179 +#ifdef CONFIG_SMP
 331.180 +/**
 331.181 + * per_cpu_init - setup per-cpu variables
 331.182 + *
 331.183 + * Allocate and setup per-cpu data areas.
 331.184 + */
 331.185 +void *
 331.186 +per_cpu_init (void)
 331.187 +{
 331.188 +	void *cpu_data;
 331.189 +	int cpu;
 331.190 +
 331.191 +	/*
 331.192 +	 * get_free_pages() cannot be used before cpu_init() done.  BSP
 331.193 +	 * allocates "NR_CPUS" pages for all CPUs to avoid that AP calls
 331.194 +	 * get_zeroed_page().
 331.195 +	 */
 331.196 +	if (smp_processor_id() == 0) {
 331.197 +		cpu_data = __alloc_bootmem(PERCPU_PAGE_SIZE * NR_CPUS,
 331.198 +					   PERCPU_PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
 331.199 +		for (cpu = 0; cpu < NR_CPUS; cpu++) {
 331.200 +			memcpy(cpu_data, __phys_per_cpu_start, __per_cpu_end - __per_cpu_start);
 331.201 +			__per_cpu_offset[cpu] = (char *) cpu_data - __per_cpu_start;
 331.202 +			cpu_data += PERCPU_PAGE_SIZE;
 331.203 +			per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu];
 331.204 +		}
 331.205 +	}
 331.206 +	return __per_cpu_start + __per_cpu_offset[smp_processor_id()];
 331.207 +}
 331.208 +#endif /* CONFIG_SMP */
 331.209 +
 331.210 +static int
 331.211 +count_pages (u64 start, u64 end, void *arg)
 331.212 +{
 331.213 +	unsigned long *count = arg;
 331.214 +
 331.215 +	*count += (end - start) >> PAGE_SHIFT;
 331.216 +	return 0;
 331.217 +}
 331.218 +
 331.219 +#ifdef CONFIG_VIRTUAL_MEM_MAP
 331.220 +static int
 331.221 +count_dma_pages (u64 start, u64 end, void *arg)
 331.222 +{
 331.223 +	unsigned long *count = arg;
 331.224 +
 331.225 +	if (start < MAX_DMA_ADDRESS)
 331.226 +		*count += (min(end, MAX_DMA_ADDRESS) - start) >> PAGE_SHIFT;
 331.227 +	return 0;
 331.228 +}
 331.229 +#endif
 331.230 +
 331.231 +/*
 331.232 + * Set up the page tables.
 331.233 + */
 331.234 +
 331.235 +#ifndef XEN
 331.236 +void
 331.237 +paging_init (void)
 331.238 +{
 331.239 +	unsigned long max_dma;
 331.240 +	unsigned long zones_size[MAX_NR_ZONES];
 331.241 +#ifdef CONFIG_VIRTUAL_MEM_MAP
 331.242 +	unsigned long zholes_size[MAX_NR_ZONES];
 331.243 +	unsigned long max_gap;
 331.244 +#endif
 331.245 +
 331.246 +	/* initialize mem_map[] */
 331.247 +
 331.248 +	memset(zones_size, 0, sizeof(zones_size));
 331.249 +
 331.250 +	num_physpages = 0;
 331.251 +	efi_memmap_walk(count_pages, &num_physpages);
 331.252 +
 331.253 +	max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT;
 331.254 +
 331.255 +#ifdef CONFIG_VIRTUAL_MEM_MAP
 331.256 +	memset(zholes_size, 0, sizeof(zholes_size));
 331.257 +
 331.258 +	num_dma_physpages = 0;
 331.259 +	efi_memmap_walk(count_dma_pages, &num_dma_physpages);
 331.260 +
 331.261 +	if (max_low_pfn < max_dma) {
 331.262 +		zones_size[ZONE_DMA] = max_low_pfn;
 331.263 +		zholes_size[ZONE_DMA] = max_low_pfn - num_dma_physpages;
 331.264 +	} else {
 331.265 +		zones_size[ZONE_DMA] = max_dma;
 331.266 +		zholes_size[ZONE_DMA] = max_dma - num_dma_physpages;
 331.267 +		if (num_physpages > num_dma_physpages) {
 331.268 +			zones_size[ZONE_NORMAL] = max_low_pfn - max_dma;
 331.269 +			zholes_size[ZONE_NORMAL] =
 331.270 +				((max_low_pfn - max_dma) -
 331.271 +				 (num_physpages - num_dma_physpages));
 331.272 +		}
 331.273 +	}
 331.274 +
 331.275 +	max_gap = 0;
 331.276 +	efi_memmap_walk(find_largest_hole, (u64 *)&max_gap);
 331.277 +	if (max_gap < LARGE_GAP) {
 331.278 +		vmem_map = (struct page *) 0;
 331.279 +		free_area_init_node(0, &contig_page_data, zones_size, 0,
 331.280 +				    zholes_size);
 331.281 +	} else {
 331.282 +		unsigned long map_size;
 331.283 +
 331.284 +		/* allocate virtual_mem_map */
 331.285 +
 331.286 +		map_size = PAGE_ALIGN(max_low_pfn * sizeof(struct page));
 331.287 +		vmalloc_end -= map_size;
 331.288 +		vmem_map = (struct page *) vmalloc_end;
 331.289 +		efi_memmap_walk(create_mem_map_page_table, NULL);
 331.290 +
 331.291 +		mem_map = contig_page_data.node_mem_map = vmem_map;
 331.292 +		free_area_init_node(0, &contig_page_data, zones_size,
 331.293 +				    0, zholes_size);
 331.294 +
 331.295 +		printk("Virtual mem_map starts at 0x%p\n", mem_map);
 331.296 +	}
 331.297 +#else /* !CONFIG_VIRTUAL_MEM_MAP */
 331.298 +	if (max_low_pfn < max_dma)
 331.299 +		zones_size[ZONE_DMA] = max_low_pfn;
 331.300 +	else {
 331.301 +		zones_size[ZONE_DMA] = max_dma;
 331.302 +		zones_size[ZONE_NORMAL] = max_low_pfn - max_dma;
 331.303 +	}
 331.304 +	free_area_init(zones_size);
 331.305 +#endif /* !CONFIG_VIRTUAL_MEM_MAP */
 331.306 +	zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page));
 331.307 +}
 331.308 +#endif /* !CONFIG_XEN */
   332.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
   332.2 +++ b/xen/arch/ia64/linux-xen/pal.S	Tue Aug 16 12:15:23 2005 +0800
   332.3 @@ -0,0 +1,310 @@
   332.4 +/*
   332.5 + * PAL Firmware support
   332.6 + * IA-64 Processor Programmers Reference Vol 2
   332.7 + *
   332.8 + * Copyright (C) 1999 Don Dugger <don.dugger@intel.com>
   332.9 + * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
  332.10 + * Copyright (C) 1999-2001, 2003 Hewlett-Packard Co
  332.11 + *	David Mosberger <davidm@hpl.hp.com>
  332.12 + *	Stephane Eranian <eranian@hpl.hp.com>
  332.13 + *
  332.14 + * 05/22/2000 eranian Added support for stacked register calls
  332.15 + * 05/24/2000 eranian Added support for physical mode static calls
  332.16 + */
  332.17 +
  332.18 +#include <asm/asmmacro.h>
  332.19 +#include <asm/processor.h>
  332.20 +
  332.21 +	.data
  332.22 +pal_entry_point:
  332.23 +	data8 ia64_pal_default_handler
  332.24 +	.text
  332.25 +
  332.26 +/*
  332.27 + * Set the PAL entry point address.  This could be written in C code, but we do it here
  332.28 + * to keep it all in one module (besides, it's so trivial that it's
  332.29 + * not a big deal).
  332.30 + *
  332.31 + * in0		Address of the PAL entry point (text address, NOT a function descriptor).
  332.32 + */
  332.33 +GLOBAL_ENTRY(ia64_pal_handler_init)
  332.34 +	alloc r3=ar.pfs,1,0,0,0
  332.35 +	movl r2=pal_entry_point
  332.36 +	;;
  332.37 +	st8 [r2]=in0
  332.38 +	br.ret.sptk.many rp
  332.39 +END(ia64_pal_handler_init)
  332.40 +
  332.41 +/*
  332.42 + * Default PAL call handler.  This needs to be coded in assembly because it uses
  332.43 + * the static calling convention, i.e., the RSE may not be used and calls are
  332.44 + * done via "br.cond" (not "br.call").
  332.45 + */
  332.46 +GLOBAL_ENTRY(ia64_pal_default_handler)
  332.47 +	mov r8=-1
  332.48 +	br.cond.sptk.many rp
  332.49 +END(ia64_pal_default_handler)
  332.50 +
  332.51 +/*
  332.52 + * Make a PAL call using the static calling convention.
  332.53 + *
  332.54 + * in0         Index of PAL service
  332.55 + * in1 - in3   Remaining PAL arguments
  332.56 + * in4	       1 ==> clear psr.ic,  0 ==> don't clear psr.ic
  332.57 + *
  332.58 + */
  332.59 +GLOBAL_ENTRY(ia64_pal_call_static)
  332.60 +	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(5)
  332.61 +	alloc loc1 = ar.pfs,5,5,0,0
  332.62 +	movl loc2 = pal_entry_point
  332.63 +1:	{
  332.64 +	  mov r28 = in0
  332.65 +	  mov r29 = in1
  332.66 +	  mov r8 = ip
  332.67 +	}
  332.68 +	;;
  332.69 +	ld8 loc2 = [loc2]		// loc2 <- entry point
  332.70 +	tbit.nz p6,p7 = in4, 0
  332.71 +	adds r8 = 1f-1b,r8
  332.72 +	mov loc4=ar.rsc			// save RSE configuration
  332.73 +	;;
  332.74 +	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
  332.75 +	mov loc3 = psr
  332.76 +	mov loc0 = rp
  332.77 +	.body
  332.78 +	mov r30 = in2
  332.79 +
  332.80 +(p6)	rsm psr.i | psr.ic
  332.81 +	mov r31 = in3
  332.82 +	mov b7 = loc2
  332.83 +
  332.84 +(p7)	rsm psr.i
  332.85 +	;;
  332.86 +(p6)	srlz.i
  332.87 +	mov rp = r8
  332.88 +	br.cond.sptk.many b7
  332.89 +1:	mov psr.l = loc3
  332.90 +	mov ar.rsc = loc4		// restore RSE configuration
  332.91 +	mov ar.pfs = loc1
  332.92 +	mov rp = loc0
  332.93 +	;;
  332.94 +	srlz.d				// seralize restoration of psr.l
  332.95 +	br.ret.sptk.many b0
  332.96 +END(ia64_pal_call_static)
  332.97 +
  332.98 +/*
  332.99 + * Make a PAL call using the stacked registers calling convention.
 332.100 + *
 332.101 + * Inputs:
 332.102 + * 	in0         Index of PAL service
 332.103 + * 	in2 - in3   Remaning PAL arguments
 332.104 + */
 332.105 +GLOBAL_ENTRY(ia64_pal_call_stacked)
 332.106 +	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(4)
 332.107 +	alloc loc1 = ar.pfs,4,4,4,0
 332.108 +	movl loc2 = pal_entry_point
 332.109 +
 332.110 +	mov r28  = in0			// Index MUST be copied to r28
 332.111 +	mov out0 = in0			// AND in0 of PAL function
 332.112 +	mov loc0 = rp
 332.113 +	.body
 332.114 +	;;
 332.115 +	ld8 loc2 = [loc2]		// loc2 <- entry point
 332.116 +	mov out1 = in1
 332.117 +	mov out2 = in2
 332.118 +	mov out3 = in3
 332.119 +	mov loc3 = psr
 332.120 +	;;
 332.121 +	rsm psr.i
 332.122 +	mov b7 = loc2
 332.123 +	;;
 332.124 +	br.call.sptk.many rp=b7		// now make the call
 332.125 +.ret0:	mov psr.l  = loc3
 332.126 +	mov ar.pfs = loc1
 332.127 +	mov rp = loc0
 332.128 +	;;
 332.129 +	srlz.d				// serialize restoration of psr.l
 332.130 +	br.ret.sptk.many b0
 332.131 +END(ia64_pal_call_stacked)
 332.132 +
 332.133 +/*
 332.134 + * Make a physical mode PAL call using the static registers calling convention.
 332.135 + *
 332.136 + * Inputs:
 332.137 + * 	in0         Index of PAL service
 332.138 + * 	in2 - in3   Remaning PAL arguments
 332.139 + *
 332.140 + * PSR_LP, PSR_TB, PSR_ID, PSR_DA are never set by the kernel.
 332.141 + * So we don't need to clear them.
 332.142 + */
 332.143 +#define PAL_PSR_BITS_TO_CLEAR							\
 332.144 +	(IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT  | IA64_PSR_DB | IA64_PSR_RT |	\
 332.145 +	 IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED |		\
 332.146 +	 IA64_PSR_DFL | IA64_PSR_DFH)
 332.147 +
 332.148 +#define PAL_PSR_BITS_TO_SET							\
 332.149 +	(IA64_PSR_BN)
 332.150 +
 332.151 +
 332.152 +GLOBAL_ENTRY(ia64_pal_call_phys_static)
 332.153 +	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(4)
 332.154 +	alloc loc1 = ar.pfs,4,7,0,0
 332.155 +	movl loc2 = pal_entry_point
 332.156 +1:	{
 332.157 +	  mov r28  = in0		// copy procedure index
 332.158 +	  mov r8   = ip			// save ip to compute branch
 332.159 +	  mov loc0 = rp			// save rp
 332.160 +	}
 332.161 +	.body
 332.162 +	;;
 332.163 +	ld8 loc2 = [loc2]		// loc2 <- entry point
 332.164 +	mov r29  = in1			// first argument
 332.165 +	mov r30  = in2			// copy arg2
 332.166 +	mov r31  = in3			// copy arg3
 332.167 +	;;
 332.168 +	mov loc3 = psr			// save psr
 332.169 +	adds r8  = 1f-1b,r8		// calculate return address for call
 332.170 +	;;
 332.171 +	mov loc4=ar.rsc			// save RSE configuration
 332.172 +#ifdef XEN
 332.173 +	dep.z loc2=loc2,0,60		// convert pal entry point to physical
 332.174 +#else // XEN
 332.175 +	dep.z loc2=loc2,0,61		// convert pal entry point to physical
 332.176 +#endif // XEN
 332.177 +	tpa r8=r8			// convert rp to physical
 332.178 +	;;
 332.179 +	mov b7 = loc2			// install target to branch reg
 332.180 +	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
 332.181 +	movl r16=PAL_PSR_BITS_TO_CLEAR
 332.182 +	movl r17=PAL_PSR_BITS_TO_SET
 332.183 +	;;
 332.184 +	or loc3=loc3,r17		// add in psr the bits to set
 332.185 +	;;
 332.186 +	andcm r16=loc3,r16		// removes bits to clear from psr
 332.187 +	br.call.sptk.many rp=ia64_switch_mode_phys
 332.188 +.ret1:	mov rp = r8			// install return address (physical)
 332.189 +	mov loc5 = r19
 332.190 +	mov loc6 = r20
 332.191 +	br.cond.sptk.many b7
 332.192 +1:
 332.193 +	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
 332.194 +	mov r16=loc3			// r16= original psr
 332.195 +	mov r19=loc5
 332.196 +	mov r20=loc6
 332.197 +	br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode
 332.198 +.ret2:
 332.199 +	mov psr.l = loc3		// restore init PSR
 332.200 +
 332.201 +	mov ar.pfs = loc1
 332.202 +	mov rp = loc0
 332.203 +	;;
 332.204 +	mov ar.rsc=loc4			// restore RSE configuration
 332.205 +	srlz.d				// seralize restoration of psr.l
 332.206 +	br.ret.sptk.many b0
 332.207 +END(ia64_pal_call_phys_static)
 332.208 +
 332.209 +/*
 332.210 + * Make a PAL call using the stacked registers in physical mode.
 332.211 + *
 332.212 + * Inputs:
 332.213 + * 	in0         Index of PAL service
 332.214 + * 	in2 - in3   Remaning PAL arguments
 332.215 + */
 332.216 +GLOBAL_ENTRY(ia64_pal_call_phys_stacked)
 332.217 +	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(5)
 332.218 +	alloc	loc1 = ar.pfs,5,7,4,0
 332.219 +	movl	loc2 = pal_entry_point
 332.220 +1:	{
 332.221 +	  mov r28  = in0		// copy procedure index
 332.222 +	  mov loc0 = rp		// save rp
 332.223 +	}
 332.224 +	.body
 332.225 +	;;
 332.226 +	ld8 loc2 = [loc2]		// loc2 <- entry point
 332.227 +	mov out0 = in0		// first argument
 332.228 +	mov out1 = in1		// copy arg2
 332.229 +	mov out2 = in2		// copy arg3
 332.230 +	mov out3 = in3		// copy arg3
 332.231 +	;;
 332.232 +	mov loc3 = psr		// save psr
 332.233 +	;;
 332.234 +	mov loc4=ar.rsc			// save RSE configuration
 332.235 +#ifdef XEN
 332.236 +	dep.z loc2=loc2,0,60		// convert pal entry point to physical
 332.237 +#else // XEN
 332.238 +	dep.z loc2=loc2,0,61		// convert pal entry point to physical
 332.239 +#endif // XEN
 332.240 +	;;
 332.241 +	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
 332.242 +	movl r16=PAL_PSR_BITS_TO_CLEAR
 332.243 +	movl r17=PAL_PSR_BITS_TO_SET
 332.244 +	;;
 332.245 +	or loc3=loc3,r17		// add in psr the bits to set
 332.246 +	mov b7 = loc2			// install target to branch reg
 332.247 +	;;
 332.248 +	andcm r16=loc3,r16		// removes bits to clear from psr
 332.249 +	br.call.sptk.many rp=ia64_switch_mode_phys
 332.250 +.ret6:
 332.251 +	mov loc5 = r19
 332.252 +	mov loc6 = r20
 332.253 +	br.call.sptk.many rp=b7		// now make the call
 332.254 +.ret7:
 332.255 +	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
 332.256 +	mov r16=loc3			// r16= original psr
 332.257 +	mov r19=loc5
 332.258 +	mov r20=loc6
 332.259 +	br.call.sptk.many rp=ia64_switch_mode_virt	// return to virtual mode
 332.260 +
 332.261 +.ret8:	mov psr.l  = loc3		// restore init PSR
 332.262 +	mov ar.pfs = loc1
 332.263 +	mov rp = loc0
 332.264 +	;;
 332.265 +	mov ar.rsc=loc4			// restore RSE configuration
 332.266 +	srlz.d				// seralize restoration of psr.l
 332.267 +	br.ret.sptk.many b0
 332.268 +END(ia64_pal_call_phys_stacked)
 332.269 +
 332.270 +/*
 332.271 + * Save scratch fp scratch regs which aren't saved in pt_regs already (fp10-fp15).
 332.272 + *
 332.273 + * NOTE: We need to do this since firmware (SAL and PAL) may use any of the scratch
 332.274 + * regs fp-low partition.
 332.275 + *
 332.276 + * Inputs:
 332.277 + *      in0	Address of stack storage for fp regs
 332.278 + */
 332.279 +GLOBAL_ENTRY(ia64_save_scratch_fpregs)
 332.280 +	alloc r3=ar.pfs,1,0,0,0
 332.281 +	add r2=16,in0
 332.282 +	;;
 332.283 +	stf.spill [in0] = f10,32
 332.284 +	stf.spill [r2]  = f11,32
 332.285 +	;;
 332.286 +	stf.spill [in0] = f12,32
 332.287 +	stf.spill [r2]  = f13,32
 332.288 +	;;
 332.289 +	stf.spill [in0] = f14,32
 332.290 +	stf.spill [r2]  = f15,32
 332.291 +	br.ret.sptk.many rp
 332.292 +END(ia64_save_scratch_fpregs)
 332.293 +
 332.294 +/*
 332.295 + * Load scratch fp scratch regs (fp10-fp15)
 332.296 + *
 332.297 + * Inputs:
 332.298 + *      in0	Address of stack storage for fp regs
 332.299 + */
 332.300 +GLOBAL_ENTRY(ia64_load_scratch_fpregs)
 332.301 +	alloc r3=ar.pfs,1,0,0,0
 332.302 +	add r2=16,in0
 332.303 +	;;
 332.304 +	ldf.fill  f10 = [in0],32
 332.305 +	ldf.fill  f11 = [r2],32
 332.306 +	;;
 332.307 +	ldf.fill  f12 = [in0],32
 332.308 +	ldf.fill  f13 = [r2],32
 332.309 +	;;
 332.310 +	ldf.fill  f14 = [in0],32
 332.311 +	ldf.fill  f15 = [r2],32
 332.312 +	br.ret.sptk.many rp
 332.313 +END(ia64_load_scratch_fpregs)
   333.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
   333.2 +++ b/xen/arch/ia64/linux-xen/setup.c	Tue Aug 16 12:15:23 2005 +0800
   333.3 @@ -0,0 +1,773 @@
   333.4 +/*
   333.5 + * Architecture-specific setup.
   333.6 + *
   333.7 + * Copyright (C) 1998-2001, 2003-2004 Hewlett-Packard Co
   333.8 + *	David Mosberger-Tang <davidm@hpl.hp.com>
   333.9 + *	Stephane Eranian <eranian@hpl.hp.com>
  333.10 + * Copyright (C) 2000, Rohit Seth <rohit.seth@intel.com>
  333.11 + * Copyright (C) 1999 VA Linux Systems
  333.12 + * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
  333.13 + *
  333.14 + * 11/12/01 D.Mosberger Convert get_cpuinfo() to seq_file based show_cpuinfo().
  333.15 + * 04/04/00 D.Mosberger renamed cpu_initialized to cpu_online_map
  333.16 + * 03/31/00 R.Seth	cpu_initialized and current->processor fixes
  333.17 + * 02/04/00 D.Mosberger	some more get_cpuinfo fixes...
  333.18 + * 02/01/00 R.Seth	fixed get_cpuinfo for SMP
  333.19 + * 01/07/99 S.Eranian	added the support for command line argument
  333.20 + * 06/24/99 W.Drummond	added boot_cpu_data.
  333.21 + */
  333.22 +#include <linux/config.h>
  333.23 +#include <linux/module.h>
  333.24 +#include <linux/init.h>
  333.25 +
  333.26 +#include <linux/acpi.h>
  333.27 +#include <linux/bootmem.h>
  333.28 +#include <linux/console.h>
  333.29 +#include <linux/delay.h>
  333.30 +#include <linux/kernel.h>
  333.31 +#include <linux/reboot.h>
  333.32 +#include <linux/sched.h>
  333.33 +#include <linux/seq_file.h>
  333.34 +#include <linux/string.h>
  333.35 +#include <linux/threads.h>
  333.36 +#include <linux/tty.h>
  333.37 +#include <linux/serial.h>
  333.38 +#include <linux/serial_core.h>
  333.39 +#include <linux/efi.h>
  333.40 +#include <linux/initrd.h>
  333.41 +
  333.42 +#include <asm/ia32.h>
  333.43 +#include <asm/machvec.h>
  333.44 +#include <asm/mca.h>
  333.45 +#include <asm/meminit.h>
  333.46 +#include <asm/page.h>
  333.47 +#include <asm/patch.h>
  333.48 +#include <asm/pgtable.h>
  333.49 +#include <asm/processor.h>
  333.50 +#include <asm/sal.h>
  333.51 +#include <asm/sections.h>
  333.52 +#include <asm/serial.h>
  333.53 +#include <asm/setup.h>
  333.54 +#include <asm/smp.h>
  333.55 +#include <asm/system.h>
  333.56 +#include <asm/unistd.h>
  333.57 +#ifdef CONFIG_VTI
  333.58 +#include <asm/vmx.h>
  333.59 +#endif // CONFIG_VTI
  333.60 +#include <asm/io.h>
  333.61 +
  333.62 +#if defined(CONFIG_SMP) && (IA64_CPU_SIZE > PAGE_SIZE)
  333.63 +# error "struct cpuinfo_ia64 too big!"
  333.64 +#endif
  333.65 +
  333.66 +#ifdef CONFIG_SMP
  333.67 +unsigned long __per_cpu_offset[NR_CPUS];
  333.68 +EXPORT_SYMBOL(__per_cpu_offset);
  333.69 +#endif
  333.70 +
  333.71 +DEFINE_PER_CPU(struct cpuinfo_ia64, cpu_info);
  333.72 +DEFINE_PER_CPU(unsigned long, local_per_cpu_offset);
  333.73 +DEFINE_PER_CPU(unsigned long, ia64_phys_stacked_size_p8);
  333.74 +unsigned long ia64_cycles_per_usec;
  333.75 +struct ia64_boot_param *ia64_boot_param;
  333.76 +struct screen_info screen_info;
  333.77 +
  333.78 +unsigned long ia64_max_cacheline_size;
  333.79 +unsigned long ia64_iobase;	/* virtual address for I/O accesses */
  333.80 +EXPORT_SYMBOL(ia64_iobase);
  333.81 +struct io_space io_space[MAX_IO_SPACES];
  333.82 +EXPORT_SYMBOL(io_space);
  333.83 +unsigned int num_io_spaces;
  333.84 +
  333.85 +unsigned char aux_device_present = 0xaa;        /* XXX remove this when legacy I/O is gone */
  333.86 +
  333.87 +/*
  333.88 + * The merge_mask variable needs to be set to (max(iommu_page_size(iommu)) - 1).  This
  333.89 + * mask specifies a mask of address bits that must be 0 in order for two buffers to be
  333.90 + * mergeable by the I/O MMU (i.e., the end address of the first buffer and the start
  333.91 + * address of the second buffer must be aligned to (merge_mask+1) in order to be
  333.92 + * mergeable).  By default, we assume there is no I/O MMU which can merge physically
  333.93 + * discontiguous buffers, so we set the merge_mask to ~0UL, which corresponds to a iommu
  333.94 + * page-size of 2^64.
  333.95 + */
  333.96 +unsigned long ia64_max_iommu_merge_mask = ~0UL;
  333.97 +EXPORT_SYMBOL(ia64_max_iommu_merge_mask);
  333.98 +
  333.99 +/*
 333.100 + * We use a special marker for the end of memory and it uses the extra (+1) slot
 333.101 + */
 333.102 +struct rsvd_region rsvd_region[IA64_MAX_RSVD_REGIONS + 1];
 333.103 +int num_rsvd_regions;
 333.104 +
 333.105 +
 333.106 +/*
 333.107 + * Filter incoming memory segments based on the primitive map created from the boot
 333.108 + * parameters. Segments contained in the map are removed from the memory ranges. A
 333.109 + * caller-specified function is called with the memory ranges that remain after filtering.
 333.110 + * This routine does not assume the incoming segments are sorted.
 333.111 + */
 333.112 +int
 333.113 +filter_rsvd_memory (unsigned long start, unsigned long end, void *arg)
 333.114 +{
 333.115 +	unsigned long range_start, range_end, prev_start;
 333.116 +	void (*func)(unsigned long, unsigned long, int);
 333.117 +	int i;
 333.118 +
 333.119 +#if IGNORE_PFN0
 333.120 +	if (start == PAGE_OFFSET) {
 333.121 +		printk(KERN_WARNING "warning: skipping physical page 0\n");
 333.122 +		start += PAGE_SIZE;
 333.123 +		if (start >= end) return 0;
 333.124 +	}
 333.125 +#endif
 333.126 +	/*
 333.127 +	 * lowest possible address(walker uses virtual)
 333.128 +	 */
 333.129 +	prev_start = PAGE_OFFSET;
 333.130 +	func = arg;
 333.131 +
 333.132 +	for (i = 0; i < num_rsvd_regions; ++i) {
 333.133 +		range_start = max(start, prev_start);
 333.134 +		range_end   = min(end, rsvd_region[i].start);
 333.135 +
 333.136 +		if (range_start < range_end)
 333.137 +#ifdef XEN
 333.138 +		{
 333.139 +		/* init_boot_pages requires "ps, pe" */
 333.140 +			printk("Init boot pages: 0x%lx -> 0x%lx.\n",
 333.141 +				__pa(range_start), __pa(range_end));
 333.142 +			(*func)(__pa(range_start), __pa(range_end), 0);
 333.143 +		}
 333.144 +#else
 333.145 +			call_pernode_memory(__pa(range_start), range_end - range_start, func);
 333.146 +#endif
 333.147 +
 333.148 +		/* nothing more available in this segment */
 333.149 +		if (range_end == end) return 0;
 333.150 +
 333.151 +		prev_start = rsvd_region[i].end;
 333.152 +	}
 333.153 +	/* end of memory marker allows full processing inside loop body */
 333.154 +	return 0;
 333.155 +}
 333.156 +
 333.157 +static void
 333.158 +sort_regions (struct rsvd_region *rsvd_region, int max)
 333.159 +{
 333.160 +	int j;
 333.161 +
 333.162 +	/* simple bubble sorting */
 333.163 +	while (max--) {
 333.164 +		for (j = 0; j < max; ++j) {
 333.165 +			if (rsvd_region[j].start > rsvd_region[j+1].start) {
 333.166 +				struct rsvd_region tmp;
 333.167 +				tmp = rsvd_region[j];
 333.168 +				rsvd_region[j] = rsvd_region[j + 1];
 333.169 +				rsvd_region[j + 1] = tmp;
 333.170 +			}
 333.171 +		}
 333.172 +	}
 333.173 +}
 333.174 +
 333.175 +/**
 333.176 + * reserve_memory - setup reserved memory areas
 333.177 + *
 333.178 + * Setup the reserved memory areas set aside for the boot parameters,
 333.179 + * initrd, etc.  There are currently %IA64_MAX_RSVD_REGIONS defined,
 333.180 + * see include/asm-ia64/meminit.h if you need to define more.
 333.181 + */
 333.182 +void
 333.183 +reserve_memory (void)
 333.184 +{
 333.185 +	int n = 0;
 333.186 +
 333.187 +	/*
 333.188 +	 * none of the entries in this table overlap
 333.189 +	 */
 333.190 +	rsvd_region[n].start = (unsigned long) ia64_boot_param;
 333.191 +	rsvd_region[n].end   = rsvd_region[n].start + sizeof(*ia64_boot_param);
 333.192 +	n++;
 333.193 +
 333.194 +	rsvd_region[n].start = (unsigned long) __va(ia64_boot_param->efi_memmap);
 333.195 +	rsvd_region[n].end   = rsvd_region[n].start + ia64_boot_param->efi_memmap_size;
 333.196 +	n++;
 333.197 +
 333.198 +	rsvd_region[n].start = (unsigned long) __va(ia64_boot_param->command_line);
 333.199 +	rsvd_region[n].end   = (rsvd_region[n].start
 333.200 +				+ strlen(__va(ia64_boot_param->command_line)) + 1);
 333.201 +	n++;
 333.202 +
 333.203 +	rsvd_region[n].start = (unsigned long) ia64_imva((void *)KERNEL_START);
 333.204 +#ifdef XEN
 333.205 +	/* Reserve xen image/bitmap/xen-heap */
 333.206 +	rsvd_region[n].end   = rsvd_region[n].start + xenheap_size;
 333.207 +#else
 333.208 +	rsvd_region[n].end   = (unsigned long) ia64_imva(_end);
 333.209 +#endif
 333.210 +	n++;
 333.211 +
 333.212 +#ifdef CONFIG_BLK_DEV_INITRD
 333.213 +	if (ia64_boot_param->initrd_start) {
 333.214 +		rsvd_region[n].start = (unsigned long)__va(ia64_boot_param->initrd_start);
 333.215 +		rsvd_region[n].end   = rsvd_region[n].start + ia64_boot_param->initrd_size;
 333.216 +		n++;
 333.217 +	}
 333.218 +#endif
 333.219 +
 333.220 +	/* end of memory marker */
 333.221 +	rsvd_region[n].start = ~0UL;
 333.222 +	rsvd_region[n].end   = ~0UL;
 333.223 +	n++;
 333.224 +
 333.225 +	num_rsvd_regions = n;
 333.226 +
 333.227 +	sort_regions(rsvd_region, num_rsvd_regions);
 333.228 +}
 333.229 +
 333.230 +/**
 333.231 + * find_initrd - get initrd parameters from the boot parameter structure
 333.232 + *
 333.233 + * Grab the initrd start and end from the boot parameter struct given us by
 333.234 + * the boot loader.
 333.235 + */
 333.236 +void
 333.237 +find_initrd (void)
 333.238 +{
 333.239 +#ifdef CONFIG_BLK_DEV_INITRD
 333.240 +	if (ia64_boot_param->initrd_start) {
 333.241 +		initrd_start = (unsigned long)__va(ia64_boot_param->initrd_start);
 333.242 +		initrd_end   = initrd_start+ia64_boot_param->initrd_size;
 333.243 +
 333.244 +		printk(KERN_INFO "Initial ramdisk at: 0x%lx (%lu bytes)\n",
 333.245 +		       initrd_start, ia64_boot_param->initrd_size);
 333.246 +	}
 333.247 +#endif
 333.248 +}
 333.249 +
 333.250 +static void __init
 333.251 +io_port_init (void)
 333.252 +{
 333.253 +	extern unsigned long ia64_iobase;
 333.254 +	unsigned long phys_iobase;
 333.255 +
 333.256 +	/*
 333.257 +	 *  Set `iobase' to the appropriate address in region 6 (uncached access range).
 333.258 +	 *
 333.259 +	 *  The EFI memory map is the "preferred" location to get the I/O port space base,
 333.260 +	 *  rather the relying on AR.KR0. This should become more clear in future SAL
 333.261 +	 *  specs. We'll fall back to getting it out of AR.KR0 if no appropriate entry is
 333.262 +	 *  found in the memory map.
 333.263 +	 */
 333.264 +	phys_iobase = efi_get_iobase();
 333.265 +	if (phys_iobase)
 333.266 +		/* set AR.KR0 since this is all we use it for anyway */
 333.267 +		ia64_set_kr(IA64_KR_IO_BASE, phys_iobase);
 333.268 +	else {
 333.269 +		phys_iobase = ia64_get_kr(IA64_KR_IO_BASE);
 333.270 +		printk(KERN_INFO "No I/O port range found in EFI memory map, falling back "
 333.271 +		       "to AR.KR0\n");
 333.272 +		printk(KERN_INFO "I/O port base = 0x%lx\n", phys_iobase);
 333.273 +	}
 333.274 +	ia64_iobase = (unsigned long) ioremap(phys_iobase, 0);
 333.275 +
 333.276 +	/* setup legacy IO port space */
 333.277 +	io_space[0].mmio_base = ia64_iobase;
 333.278 +	io_space[0].sparse = 1;
 333.279 +	num_io_spaces = 1;
 333.280 +}
 333.281 +
 333.282 +/**
 333.283 + * early_console_setup - setup debugging console
 333.284 + *
 333.285 + * Consoles started here require little enough setup that we can start using
 333.286 + * them very early in the boot process, either right after the machine
 333.287 + * vector initialization, or even before if the drivers can detect their hw.
 333.288 + *
 333.289 + * Returns non-zero if a console couldn't be setup.
 333.290 + */
 333.291 +static inline int __init
 333.292 +early_console_setup (char *cmdline)
 333.293 +{
 333.294 +#ifdef CONFIG_SERIAL_SGI_L1_CONSOLE
 333.295 +	{
 333.296 +		extern int sn_serial_console_early_setup(void);
 333.297 +		if (!sn_serial_console_early_setup())
 333.298 +			return 0;
 333.299 +	}
 333.300 +#endif
 333.301 +#ifdef CONFIG_EFI_PCDP
 333.302 +	if (!efi_setup_pcdp_console(cmdline))
 333.303 +		return 0;
 333.304 +#endif
 333.305 +#ifdef CONFIG_SERIAL_8250_CONSOLE
 333.306 +	if (!early_serial_console_init(cmdline))
 333.307 +		return 0;
 333.308 +#endif
 333.309 +
 333.310 +	return -1;
 333.311 +}
 333.312 +
 333.313 +static inline void
 333.314 +mark_bsp_online (void)
 333.315 +{
 333.316 +#ifdef CONFIG_SMP
 333.317 +	/* If we register an early console, allow CPU 0 to printk */
 333.318 +	cpu_set(smp_processor_id(), cpu_online_map);
 333.319 +#endif
 333.320 +}
 333.321 +
 333.322 +void __init
 333.323 +#ifdef XEN
 333.324 +early_setup_arch (char **cmdline_p)
 333.325 +#else
 333.326 +setup_arch (char **cmdline_p)
 333.327 +#endif
 333.328 +{
 333.329 +	unw_init();
 333.330 +
 333.331 +	ia64_patch_vtop((u64) __start___vtop_patchlist, (u64) __end___vtop_patchlist);
 333.332 +
 333.333 +	*cmdline_p = __va(ia64_boot_param->command_line);
 333.334 +#ifdef XEN
 333.335 +	efi_init();
 333.336 +#else
 333.337 +	strlcpy(saved_command_line, *cmdline_p, COMMAND_LINE_SIZE);
 333.338 +
 333.339 +	efi_init();
 333.340 +	io_port_init();
 333.341 +#endif
 333.342 +
 333.343 +#ifdef CONFIG_IA64_GENERIC
 333.344 +	{
 333.345 +		const char *mvec_name = strstr (*cmdline_p, "machvec=");
 333.346 +		char str[64];
 333.347 +
 333.348 +		if (mvec_name) {
 333.349 +			const char *end;
 333.350 +			size_t len;
 333.351 +
 333.352 +			mvec_name += 8;
 333.353 +			end = strchr (mvec_name, ' ');
 333.354 +			if (end)
 333.355 +				len = end - mvec_name;
 333.356 +			else
 333.357 +				len = strlen (mvec_name);
 333.358 +			len = min(len, sizeof (str) - 1);
 333.359 +			strncpy (str, mvec_name, len);
 333.360 +			str[len] = '\0';
 333.361 +			mvec_name = str;
 333.362 +		} else
 333.363 +			mvec_name = acpi_get_sysname();
 333.364 +		machvec_init(mvec_name);
 333.365 +	}
 333.366 +#endif
 333.367 +
 333.368 +#ifdef XEN
 333.369 +	early_cmdline_parse(cmdline_p);
 333.370 +	cmdline_parse(*cmdline_p);
 333.371 +#undef CONFIG_ACPI_BOOT
 333.372 +#endif
 333.373 +	if (early_console_setup(*cmdline_p) == 0)
 333.374 +		mark_bsp_online();
 333.375 +
 333.376 +#ifdef CONFIG_ACPI_BOOT
 333.377 +	/* Initialize the ACPI boot-time table parser */
 333.378 +	acpi_table_init();
 333.379 +# ifdef CONFIG_ACPI_NUMA
 333.380 +	acpi_numa_init();
 333.381 +# endif
 333.382 +#else
 333.383 +# ifdef CONFIG_SMP
 333.384 +	smp_build_cpu_map();	/* happens, e.g., with the Ski simulator */
 333.385 +# endif
 333.386 +#endif /* CONFIG_APCI_BOOT */
 333.387 +
 333.388 +#ifndef XEN
 333.389 +	find_memory();
 333.390 +#else
 333.391 +	io_port_init();
 333.392 +}
 333.393 +
 333.394 +void __init
 333.395 +late_setup_arch (char **cmdline_p)
 333.396 +{
 333.397 +#undef CONFIG_ACPI_BOOT
 333.398 +	acpi_table_init();
 333.399 +#endif
 333.400 +	/* process SAL system table: */
 333.401 +	ia64_sal_init(efi.sal_systab);
 333.402 +
 333.403 +#ifdef CONFIG_SMP
 333.404 +	cpu_physical_id(0) = hard_smp_processor_id();
 333.405 +#endif
 333.406 +
 333.407 +#ifdef CONFIG_VTI
 333.408 +	identify_vmx_feature();
 333.409 +#endif // CONFIG_VTI
 333.410 +
 333.411 +	cpu_init();	/* initialize the bootstrap CPU */
 333.412 +
 333.413 +#ifdef CONFIG_ACPI_BOOT
 333.414 +	acpi_boot_init();
 333.415 +#endif
 333.416 +
 333.417 +#ifdef CONFIG_VT
 333.418 +	if (!conswitchp) {
 333.419 +# if defined(CONFIG_DUMMY_CONSOLE)
 333.420 +		conswitchp = &dummy_con;
 333.421 +# endif
 333.422 +# if defined(CONFIG_VGA_CONSOLE)
 333.423 +		/*
 333.424 +		 * Non-legacy systems may route legacy VGA MMIO range to system
 333.425 +		 * memory.  vga_con probes the MMIO hole, so memory looks like
 333.426 +		 * a VGA device to it.  The EFI memory map can tell us if it's
 333.427 +		 * memory so we can avoid this problem.
 333.428 +		 */
 333.429 +		if (efi_mem_type(0xA0000) != EFI_CONVENTIONAL_MEMORY)
 333.430 +			conswitchp = &vga_con;
 333.431 +# endif
 333.432 +	}
 333.433 +#endif
 333.434 +
 333.435 +	/* enable IA-64 Machine Check Abort Handling unless disabled */
 333.436 +	if (!strstr(saved_command_line, "nomca"))
 333.437 +		ia64_mca_init();
 333.438 +
 333.439 +	platform_setup(cmdline_p);
 333.440 +	paging_init();
 333.441 +}
 333.442 +
 333.443 +/*
 333.444 + * Display cpu info for all cpu's.
 333.445 + */
 333.446 +static int
 333.447 +show_cpuinfo (struct seq_file *m, void *v)
 333.448 +{
 333.449 +#ifdef CONFIG_SMP
 333.450 +#	define lpj	c->loops_per_jiffy
 333.451 +#	define cpunum	c->cpu
 333.452 +#else
 333.453 +#	define lpj	loops_per_jiffy
 333.454 +#	define cpunum	0
 333.455 +#endif
 333.456 +	static struct {
 333.457 +		unsigned long mask;
 333.458 +		const char *feature_name;
 333.459 +	} feature_bits[] = {
 333.460 +		{ 1UL << 0, "branchlong" },
 333.461 +		{ 1UL << 1, "spontaneous deferral"},
 333.462 +		{ 1UL << 2, "16-byte atomic ops" }
 333.463 +	};
 333.464 +	char family[32], features[128], *cp, sep;
 333.465 +	struct cpuinfo_ia64 *c = v;
 333.466 +	unsigned long mask;
 333.467 +	int i;
 333.468 +
 333.469 +	mask = c->features;
 333.470 +
 333.471 +	switch (c->family) {
 333.472 +	      case 0x07:	memcpy(family, "Itanium", 8); break;
 333.473 +	      case 0x1f:	memcpy(family, "Itanium 2", 10); break;
 333.474 +	      default:		sprintf(family, "%u", c->family); break;
 333.475 +	}
 333.476 +
 333.477 +	/* build the feature string: */
 333.478 +	memcpy(features, " standard", 10);
 333.479 +	cp = features;
 333.480 +	sep = 0;
 333.481 +	for (i = 0; i < (int) ARRAY_SIZE(feature_bits); ++i) {
 333.482 +		if (mask & feature_bits[i].mask) {
 333.483 +			if (sep)
 333.484 +				*cp++ = sep;
 333.485 +			sep = ',';
 333.486 +			*cp++ = ' ';
 333.487 +			strcpy(cp, feature_bits[i].feature_name);
 333.488 +			cp += strlen(feature_bits[i].feature_name);
 333.489 +			mask &= ~feature_bits[i].mask;
 333.490 +		}
 333.491 +	}
 333.492 +	if (mask) {
 333.493 +		/* print unknown features as a hex value: */
 333.494 +		if (sep)
 333.495 +			*cp++ = sep;
 333.496 +		sprintf(cp, " 0x%lx", mask);
 333.497 +	}
 333.498 +
 333.499 +	seq_printf(m,
 333.500 +		   "processor  : %d\n"
 333.501 +		   "vendor     : %s\n"
 333.502 +		   "arch       : IA-64\n"
 333.503 +		   "family     : %s\n"
 333.504 +		   "model      : %u\n"
 333.505 +		   "revision   : %u\n"
 333.506 +		   "archrev    : %u\n"
 333.507 +		   "features   :%s\n"	/* don't change this---it _is_ right! */
 333.508 +		   "cpu number : %lu\n"
 333.509 +		   "cpu regs   : %u\n"
 333.510 +		   "cpu MHz    : %lu.%06lu\n"
 333.511 +		   "itc MHz    : %lu.%06lu\n"
 333.512 +		   "BogoMIPS   : %lu.%02lu\n\n",
 333.513 +		   cpunum, c->vendor, family, c->model, c->revision, c->archrev,
 333.514 +		   features, c->ppn, c->number,
 333.515 +		   c->proc_freq / 1000000, c->proc_freq % 1000000,
 333.516 +		   c->itc_freq / 1000000, c->itc_freq % 1000000,
 333.517 +		   lpj*HZ/500000, (lpj*HZ/5000) % 100);
 333.518 +	return 0;
 333.519 +}
 333.520 +
 333.521 +static void *
 333.522 +c_start (struct seq_file *m, loff_t *pos)
 333.523 +{
 333.524 +#ifdef CONFIG_SMP
 333.525 +	while (*pos < NR_CPUS && !cpu_isset(*pos, cpu_online_map))
 333.526 +		++*pos;
 333.527 +#endif
 333.528 +	return *pos < NR_CPUS ? cpu_data(*pos) : NULL;
 333.529 +}
 333.530 +
 333.531 +static void *
 333.532 +c_next (struct seq_file *m, void *v, loff_t *pos)
 333.533 +{
 333.534 +	++*pos;
 333.535 +	return c_start(m, pos);
 333.536 +}
 333.537 +
 333.538 +static void
 333.539 +c_stop (struct seq_file *m, void *v)
 333.540 +{
 333.541 +}
 333.542 +
 333.543 +#ifndef XEN
 333.544 +struct seq_operations cpuinfo_op = {
 333.545 +	.start =	c_start,
 333.546 +	.next =		c_next,
 333.547 +	.stop =		c_stop,
 333.548 +	.show =		show_cpuinfo
 333.549 +};
 333.550 +#endif
 333.551 +
 333.552 +void
 333.553 +identify_cpu (struct cpuinfo_ia64 *c)
 333.554 +{
 333.555 +	union {
 333.556 +		unsigned long bits[5];
 333.557 +		struct {
 333.558 +			/* id 0 & 1: */
 333.559 +			char vendor[16];
 333.560 +
 333.561 +			/* id 2 */
 333.562 +			u64 ppn;		/* processor serial number */
 333.563 +
 333.564 +			/* id 3: */
 333.565 +			unsigned number		:  8;
 333.566 +			unsigned revision	:  8;
 333.567 +			unsigned model		:  8;
 333.568 +			unsigned family		:  8;
 333.569 +			unsigned archrev	:  8;
 333.570 +			unsigned reserved	: 24;
 333.571 +
 333.572 +			/* id 4: */
 333.573 +			u64 features;
 333.574 +		} field;
 333.575 +	} cpuid;
 333.576 +	pal_vm_info_1_u_t vm1;
 333.577 +	pal_vm_info_2_u_t vm2;
 333.578 +	pal_status_t status;
 333.579 +	unsigned long impl_va_msb = 50, phys_addr_size = 44;	/* Itanium defaults */
 333.580 +	int i;
 333.581 +
 333.582 +	for (i = 0; i < 5; ++i)
 333.583 +		cpuid.bits[i] = ia64_get_cpuid(i);
 333.584 +
 333.585 +	memcpy(c->vendor, cpuid.field.vendor, 16);
 333.586 +#ifdef CONFIG_SMP
 333.587 +	c->cpu = smp_processor_id();
 333.588 +#endif
 333.589 +	c->ppn = cpuid.field.ppn;
 333.590 +	c->number = cpuid.field.number;
 333.591 +	c->revision = cpuid.field.revision;
 333.592 +	c->model = cpuid.field.model;
 333.593 +	c->family = cpuid.field.family;
 333.594 +	c->archrev = cpuid.field.archrev;
 333.595 +	c->features = cpuid.field.features;
 333.596 +
 333.597 +	status = ia64_pal_vm_summary(&vm1, &vm2);
 333.598 +	if (status == PAL_STATUS_SUCCESS) {
 333.599 +		impl_va_msb = vm2.pal_vm_info_2_s.impl_va_msb;
 333.600 +		phys_addr_size = vm1.pal_vm_info_1_s.phys_add_size;
 333.601 +	}
 333.602 +	c->unimpl_va_mask = ~((7L<<61) | ((1L << (impl_va_msb + 1)) - 1));
 333.603 +	c->unimpl_pa_mask = ~((1L<<63) | ((1L << phys_addr_size) - 1));
 333.604 +
 333.605 +#ifdef CONFIG_VTI
 333.606 +	/* If vmx feature is on, do necessary initialization for vmx */
 333.607 +	if (vmx_enabled)
 333.608 +		vmx_init_env();
 333.609 +#endif
 333.610 +}
 333.611 +
 333.612 +void
 333.613 +setup_per_cpu_areas (void)
 333.614 +{
 333.615 +	/* start_kernel() requires this... */
 333.616 +}
 333.617 +
 333.618 +static void
 333.619 +get_max_cacheline_size (void)
 333.620 +{
 333.621 +	unsigned long line_size, max = 1;
 333.622 +	u64 l, levels, unique_caches;
 333.623 +        pal_cache_config_info_t cci;
 333.624 +        s64 status;
 333.625 +
 333.626 +        status = ia64_pal_cache_summary(&levels, &unique_caches);
 333.627 +        if (status != 0) {
 333.628 +                printk(KERN_ERR "%s: ia64_pal_cache_summary() failed (status=%ld)\n",
 333.629 +                       __FUNCTION__, status);
 333.630 +                max = SMP_CACHE_BYTES;
 333.631 +		goto out;
 333.632 +        }
 333.633 +
 333.634 +	for (l = 0; l < levels; ++l) {
 333.635 +		status = ia64_pal_cache_config_info(l, /* cache_type (data_or_unified)= */ 2,
 333.636 +						    &cci);
 333.637 +		if (status != 0) {
 333.638 +			printk(KERN_ERR
 333.639 +			       "%s: ia64_pal_cache_config_info(l=%lu) failed (status=%ld)\n",
 333.640 +			       __FUNCTION__, l, status);
 333.641 +			max = SMP_CACHE_BYTES;
 333.642 +		}
 333.643 +		line_size = 1 << cci.pcci_line_size;
 333.644 +		if (line_size > max)
 333.645 +			max = line_size;
 333.646 +        }
 333.647 +  out:
 333.648 +	if (max > ia64_max_cacheline_size)
 333.649 +		ia64_max_cacheline_size = max;
 333.650 +}
 333.651 +
 333.652 +/*
 333.653 + * cpu_init() initializes state that is per-CPU.  This function acts
 333.654 + * as a 'CPU state barrier', nothing should get across.
 333.655 + */
 333.656 +void
 333.657 +cpu_init (void)
 333.658 +{
 333.659 +	extern void __devinit ia64_mmu_init (void *);
 333.660 +	unsigned long num_phys_stacked;
 333.661 +	pal_vm_info_2_u_t vmi;
 333.662 +	unsigned int max_ctx;
 333.663 +	struct cpuinfo_ia64 *cpu_info;
 333.664 +	void *cpu_data;
 333.665 +
 333.666 +	cpu_data = per_cpu_init();
 333.667 +
 333.668 +	/*
 333.669 +	 * We set ar.k3 so that assembly code in MCA handler can compute
 333.670 +	 * physical addresses of per cpu variables with a simple:
 333.671 +	 *   phys = ar.k3 + &per_cpu_var
 333.672 +	 */
 333.673 +	ia64_set_kr(IA64_KR_PER_CPU_DATA,
 333.674 +		    ia64_tpa(cpu_data) - (long) __per_cpu_start);
 333.675 +
 333.676 +	get_max_cacheline_size();
 333.677 +
 333.678 +	/*
 333.679 +	 * We can't pass "local_cpu_data" to identify_cpu() because we haven't called
 333.680 +	 * ia64_mmu_init() yet.  And we can't call ia64_mmu_init() first because it
 333.681 +	 * depends on the data returned by identify_cpu().  We break the dependency by
 333.682 +	 * accessing cpu_data() through the canonical per-CPU address.
 333.683 +	 */
 333.684 +	cpu_info = cpu_data + ((char *) &__ia64_per_cpu_var(cpu_info) - __per_cpu_start);
 333.685 +	identify_cpu(cpu_info);
 333.686 +
 333.687 +#ifdef CONFIG_MCKINLEY
 333.688 +	{
 333.689 +#		define FEATURE_SET 16
 333.690 +		struct ia64_pal_retval iprv;
 333.691 +
 333.692 +		if (cpu_info->family == 0x1f) {
 333.693 +			PAL_CALL_PHYS(iprv, PAL_PROC_GET_FEATURES, 0, FEATURE_SET, 0);
 333.694 +			if ((iprv.status == 0) && (iprv.v0 & 0x80) && (iprv.v2 & 0x80))
 333.695 +				PAL_CALL_PHYS(iprv, PAL_PROC_SET_FEATURES,
 333.696 +				              (iprv.v1 | 0x80), FEATURE_SET, 0);
 333.697 +		}
 333.698 +	}
 333.699 +#endif
 333.700 +
 333.701 +	/* Clear the stack memory reserved for pt_regs: */
 333.702 +	memset(ia64_task_regs(current), 0, sizeof(struct pt_regs));
 333.703 +
 333.704 +	ia64_set_kr(IA64_KR_FPU_OWNER, 0);
 333.705 +
 333.706 +	/*
 333.707 +	 * Initialize default control register to defer all speculative faults.  The
 333.708 +	 * kernel MUST NOT depend on a particular setting of these bits (in other words,
 333.709 +	 * the kernel must have recovery code for all speculative accesses).  Turn on
 333.710 +	 * dcr.lc as per recommendation by the architecture team.  Most IA-32 apps
 333.711 +	 * shouldn't be affected by this (moral: keep your ia32 locks aligned and you'll
 333.712 +	 * be fine).
 333.713 +	 */
 333.714 +	ia64_setreg(_IA64_REG_CR_DCR,  (  IA64_DCR_DP | IA64_DCR_DK | IA64_DCR_DX | IA64_DCR_DR
 333.715 +					| IA64_DCR_DA | IA64_DCR_DD | IA64_DCR_LC));
 333.716 +	atomic_inc(&init_mm.mm_count);
 333.717 +	current->active_mm = &init_mm;
 333.718 +#ifdef XEN
 333.719 +	if (current->domain->arch.mm)
 333.720 +#else
 333.721 +	if (current->mm)
 333.722 +#endif
 333.723 +		BUG();
 333.724 +
 333.725 +	ia64_mmu_init(ia64_imva(cpu_data));
 333.726 +	ia64_mca_cpu_init(ia64_imva(cpu_data));
 333.727 +
 333.728 +#ifdef CONFIG_IA32_SUPPORT
 333.729 +	ia32_cpu_init();
 333.730 +#endif
 333.731 +
 333.732 +	/* Clear ITC to eliminiate sched_clock() overflows in human time.  */
 333.733 +	ia64_set_itc(0);
 333.734 +
 333.735 +	/* disable all local interrupt sources: */
 333.736 +	ia64_set_itv(1 << 16);
 333.737 +	ia64_set_lrr0(1 << 16);
 333.738 +	ia64_set_lrr1(1 << 16);
 333.739 +	ia64_setreg(_IA64_REG_CR_PMV, 1 << 16);
 333.740 +	ia64_setreg(_IA64_REG_CR_CMCV, 1 << 16);
 333.741 +
 333.742 +	/* clear TPR & XTP to enable all interrupt classes: */
 333.743 +	ia64_setreg(_IA64_REG_CR_TPR, 0);
 333.744 +#ifdef CONFIG_SMP
 333.745 +	normal_xtp();
 333.746 +#endif
 333.747 +
 333.748 +	/* set ia64_ctx.max_rid to the maximum RID that is supported by all CPUs: */
 333.749 +	if (ia64_pal_vm_summary(NULL, &vmi) == 0)
 333.750 +		max_ctx = (1U << (vmi.pal_vm_info_2_s.rid_size - 3)) - 1;
 333.751 +	else {
 333.752 +		printk(KERN_WARNING "cpu_init: PAL VM summary failed, assuming 18 RID bits\n");
 333.753 +		max_ctx = (1U << 15) - 1;	/* use architected minimum */
 333.754 +	}
 333.755 +	while (max_ctx < ia64_ctx.max_ctx) {
 333.756 +		unsigned int old = ia64_ctx.max_ctx;
 333.757 +		if (cmpxchg(&ia64_ctx.max_ctx, old, max_ctx) == old)
 333.758 +			break;
 333.759 +	}
 333.760 +
 333.761 +	if (ia64_pal_rse_info(&num_phys_stacked, NULL) != 0) {
 333.762 +		printk(KERN_WARNING "cpu_init: PAL RSE info failed; assuming 96 physical "
 333.763 +		       "stacked regs\n");
 333.764 +		num_phys_stacked = 96;
 333.765 +	}
 333.766 +	/* size of physical stacked register partition plus 8 bytes: */
 333.767 +	__get_cpu_var(ia64_phys_stacked_size_p8) = num_phys_stacked*8 + 8;
 333.768 +	platform_cpu_init();
 333.769 +}
 333.770 +
 333.771 +void
 333.772 +check_bugs (void)
 333.773 +{
 333.774 +	ia64_patch_mckinley_e9((unsigned long) __start___mckinley_e9_bundles,
 333.775 +			       (unsigned long) __end___mckinley_e9_bundles);
 333.776 +}
   334.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
   334.2 +++ b/xen/arch/ia64/linux-xen/time.c	Tue Aug 16 12:15:23 2005 +0800
   334.3 @@ -0,0 +1,264 @@
   334.4 +/*
   334.5 + * linux/arch/ia64/kernel/time.c
   334.6 + *
   334.7 + * Copyright (C) 1998-2003 Hewlett-Packard Co
   334.8 + *	Stephane Eranian <eranian@hpl.hp.com>
   334.9 + *	David Mosberger <davidm@hpl.hp.com>
  334.10 + * Copyright (C) 1999 Don Dugger <don.dugger@intel.com>
  334.11 + * Copyright (C) 1999-2000 VA Linux Systems
  334.12 + * Copyright (C) 1999-2000 Walt Drummond <drummond@valinux.com>
  334.13 + */
  334.14 +#include <linux/config.h>
  334.15 +
  334.16 +#include <linux/cpu.h>
  334.17 +#include <linux/init.h>
  334.18 +#include <linux/kernel.h>
  334.19 +#include <linux/module.h>
  334.20 +#include <linux/profile.h>
  334.21 +#include <linux/sched.h>
  334.22 +#include <linux/time.h>
  334.23 +#include <linux/interrupt.h>
  334.24 +#include <linux/efi.h>
  334.25 +#include <linux/profile.h>
  334.26 +#include <linux/timex.h>
  334.27 +
  334.28 +#include <asm/machvec.h>
  334.29 +#include <asm/delay.h>
  334.30 +#include <asm/hw_irq.h>
  334.31 +#include <asm/ptrace.h>
  334.32 +#include <asm/sal.h>
  334.33 +#include <asm/sections.h>
  334.34 +#include <asm/system.h>
  334.35 +#ifdef XEN
  334.36 +#include <linux/jiffies.h>	// not included by xen/sched.h
  334.37 +#endif
  334.38 +
  334.39 +extern unsigned long wall_jiffies;
  334.40 +
  334.41 +u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES;
  334.42 +
  334.43 +EXPORT_SYMBOL(jiffies_64);
  334.44 +
  334.45 +#define TIME_KEEPER_ID	0	/* smp_processor_id() of time-keeper */
  334.46 +
  334.47 +#ifdef CONFIG_IA64_DEBUG_IRQ
  334.48 +
  334.49 +unsigned long last_cli_ip;
  334.50 +EXPORT_SYMBOL(last_cli_ip);
  334.51 +
  334.52 +#endif
  334.53 +
  334.54 +#ifndef XEN
  334.55 +static struct time_interpolator itc_interpolator = {
  334.56 +	.shift = 16,
  334.57 +	.mask = 0xffffffffffffffffLL,
  334.58 +	.source = TIME_SOURCE_CPU
  334.59 +};
  334.60 +
  334.61 +static irqreturn_t
  334.62 +timer_interrupt (int irq, void *dev_id, struct pt_regs *regs)
  334.63 +{
  334.64 +	unsigned long new_itm;
  334.65 +
  334.66 +	if (unlikely(cpu_is_offline(smp_processor_id()))) {
  334.67 +		return IRQ_HANDLED;
  334.68 +	}
  334.69 +
  334.70 +	platform_timer_interrupt(irq, dev_id, regs);
  334.71 +
  334.72 +	new_itm = local_cpu_data->itm_next;
  334.73 +
  334.74 +	if (!time_after(ia64_get_itc(), new_itm))
  334.75 +		printk(KERN_ERR "Oops: timer tick before it's due (itc=%lx,itm=%lx)\n",
  334.76 +		       ia64_get_itc(), new_itm);
  334.77 +
  334.78 +	profile_tick(CPU_PROFILING, regs);
  334.79 +
  334.80 +	while (1) {
  334.81 +		update_process_times(user_mode(regs));
  334.82 +
  334.83 +		new_itm += local_cpu_data->itm_delta;
  334.84 +
  334.85 +		if (smp_processor_id() == TIME_KEEPER_ID) {
  334.86 +			/*
  334.87 +			 * Here we are in the timer irq handler. We have irqs locally
  334.88 +			 * disabled, but we don't know if the timer_bh is running on
  334.89 +			 * another CPU. We need to avoid to SMP race by acquiring the
  334.90 +			 * xtime_lock.
  334.91 +			 */
  334.92 +			write_seqlock(&xtime_lock);
  334.93 +			do_timer(regs);
  334.94 +			local_cpu_data->itm_next = new_itm;
  334.95 +			write_sequnlock(&xtime_lock);
  334.96 +		} else
  334.97 +			local_cpu_data->itm_next = new_itm;
  334.98 +
  334.99 +		if (time_after(new_itm, ia64_get_itc()))
 334.100 +			break;
 334.101 +	}
 334.102 +
 334.103 +	do {
 334.104 +		/*
 334.105 +		 * If we're too close to the next clock tick for
 334.106 +		 * comfort, we increase the safety margin by
 334.107 +		 * intentionally dropping the next tick(s).  We do NOT
 334.108 +		 * update itm.next because that would force us to call
 334.109 +		 * do_timer() which in turn would let our clock run
 334.110 +		 * too fast (with the potentially devastating effect
 334.111 +		 * of losing monotony of time).
 334.112 +		 */
 334.113 +		while (!time_after(new_itm, ia64_get_itc() + local_cpu_data->itm_delta/2))
 334.114 +			new_itm += local_cpu_data->itm_delta;
 334.115 +		ia64_set_itm(new_itm);
 334.116 +		/* double check, in case we got hit by a (slow) PMI: */
 334.117 +	} while (time_after_eq(ia64_get_itc(), new_itm));
 334.118 +	return IRQ_HANDLED;
 334.119 +}
 334.120 +#endif
 334.121 +
 334.122 +/*
 334.123 + * Encapsulate access to the itm structure for SMP.
 334.124 + */
 334.125 +void
 334.126 +ia64_cpu_local_tick (void)
 334.127 +{
 334.128 +	int cpu = smp_processor_id();
 334.129 +	unsigned long shift = 0, delta;
 334.130 +
 334.131 +	/* arrange for the cycle counter to generate a timer interrupt: */
 334.132 +	ia64_set_itv(IA64_TIMER_VECTOR);
 334.133 +
 334.134 +	delta = local_cpu_data->itm_delta;
 334.135 +	/*
 334.136 +	 * Stagger the timer tick for each CPU so they don't occur all at (almost) the
 334.137 +	 * same time:
 334.138 +	 */
 334.139 +	if (cpu) {
 334.140 +		unsigned long hi = 1UL << ia64_fls(cpu);
 334.141 +		shift = (2*(cpu - hi) + 1) * delta/hi/2;
 334.142 +	}
 334.143 +	local_cpu_data->itm_next = ia64_get_itc() + delta + shift;
 334.144 +	ia64_set_itm(local_cpu_data->itm_next);
 334.145 +}
 334.146 +
 334.147 +static int nojitter;
 334.148 +
 334.149 +static int __init nojitter_setup(char *str)
 334.150 +{
 334.151 +	nojitter = 1;
 334.152 +	printk("Jitter checking for ITC timers disabled\n");
 334.153 +	return 1;
 334.154 +}
 334.155 +
 334.156 +__setup("nojitter", nojitter_setup);
 334.157 +
 334.158 +
 334.159 +void __devinit
 334.160 +ia64_init_itm (void)
 334.161 +{
 334.162 +	unsigned long platform_base_freq, itc_freq;
 334.163 +	struct pal_freq_ratio itc_ratio, proc_ratio;
 334.164 +	long status, platform_base_drift, itc_drift;
 334.165 +
 334.166 +	/*
 334.167 +	 * According to SAL v2.6, we need to use a SAL call to determine the platform base
 334.168 +	 * frequency and then a PAL call to determine the frequency ratio between the ITC
 334.169 +	 * and the base frequency.
 334.170 +	 */
 334.171 +	status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM,
 334.172 +				    &platform_base_freq, &platform_base_drift);
 334.173 +	if (status != 0) {
 334.174 +		printk(KERN_ERR "SAL_FREQ_BASE_PLATFORM failed: %s\n", ia64_sal_strerror(status));
 334.175 +	} else {
 334.176 +		status = ia64_pal_freq_ratios(&proc_ratio, NULL, &itc_ratio);
 334.177 +		if (status != 0)
 334.178 +			printk(KERN_ERR "PAL_FREQ_RATIOS failed with status=%ld\n", status);
 334.179 +	}
 334.180 +	if (status != 0) {
 334.181 +		/* invent "random" values */
 334.182 +		printk(KERN_ERR
 334.183 +		       "SAL/PAL failed to obtain frequency info---inventing reasonable values\n");
 334.184 +		platform_base_freq = 100000000;
 334.185 +		platform_base_drift = -1;	/* no drift info */
 334.186 +		itc_ratio.num = 3;
 334.187 +		itc_ratio.den = 1;
 334.188 +	}
 334.189 +	if (platform_base_freq < 40000000) {
 334.190 +		printk(KERN_ERR "Platform base frequency %lu bogus---resetting to 75MHz!\n",
 334.191 +		       platform_base_freq);
 334.192 +		platform_base_freq = 75000000;
 334.193 +		platform_base_drift = -1;
 334.194 +	}
 334.195 +	if (!proc_ratio.den)
 334.196 +		proc_ratio.den = 1;	/* avoid division by zero */
 334.197 +	if (!itc_ratio.den)
 334.198 +		itc_ratio.den = 1;	/* avoid division by zero */
 334.199 +
 334.200 +	itc_freq = (platform_base_freq*itc_ratio.num)/itc_ratio.den;
 334.201 +
 334.202 +	local_cpu_data->itm_delta = (itc_freq + HZ/2) / HZ;
 334.203 +	printk(KERN_DEBUG "CPU %d: base freq=%lu.%03luMHz, ITC ratio=%lu/%lu, "
 334.204 +	       "ITC freq=%lu.%03luMHz", smp_processor_id(),
 334.205 +	       platform_base_freq / 1000000, (platform_base_freq / 1000) % 1000,
 334.206 +	       itc_ratio.num, itc_ratio.den, itc_freq / 1000000, (itc_freq / 1000) % 1000);
 334.207 +
 334.208 +	if (platform_base_drift != -1) {
 334.209 +		itc_drift = platform_base_drift*itc_ratio.num/itc_ratio.den;
 334.210 +		printk("+/-%ldppm\n", itc_drift);
 334.211 +	} else {
 334.212 +		itc_drift = -1;
 334.213 +		printk("\n");
 334.214 +	}
 334.215 +
 334.216 +	local_cpu_data->proc_freq = (platform_base_freq*proc_ratio.num)/proc_ratio.den;
 334.217 +	local_cpu_data->itc_freq = itc_freq;
 334.218 +	local_cpu_data->cyc_per_usec = (itc_freq + USEC_PER_SEC/2) / USEC_PER_SEC;
 334.219 +	local_cpu_data->nsec_per_cyc = ((NSEC_PER_SEC<<IA64_NSEC_PER_CYC_SHIFT)
 334.220 +					+ itc_freq/2)/itc_freq;
 334.221 +
 334.222 +	if (!(sal_platform_features & IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT)) {
 334.223 +#ifndef XEN
 334.224 +		itc_interpolator.frequency = local_cpu_data->itc_freq;
 334.225 +		itc_interpolator.drift = itc_drift;
 334.226 +#ifdef CONFIG_SMP
 334.227 +		/* On IA64 in an SMP configuration ITCs are never accurately synchronized.
 334.228 +		 * Jitter compensation requires a cmpxchg which may limit
 334.229 +		 * the scalability of the syscalls for retrieving time.
 334.230 +		 * The ITC synchronization is usually successful to within a few
 334.231 +		 * ITC ticks but this is not a sure thing. If you need to improve
 334.232 +		 * timer performance in SMP situations then boot the kernel with the
 334.233 +		 * "nojitter" option. However, doing so may result in time fluctuating (maybe
 334.234 +		 * even going backward) if the ITC offsets between the individual CPUs
 334.235 +		 * are too large.
 334.236 +		 */
 334.237 +		if (!nojitter) itc_interpolator.jitter = 1;
 334.238 +#endif
 334.239 +		register_time_interpolator(&itc_interpolator);
 334.240 +#endif
 334.241 +	}
 334.242 +
 334.243 +	/* Setup the CPU local timer tick */
 334.244 +	ia64_cpu_local_tick();
 334.245 +}
 334.246 +
 334.247 +#ifndef XEN
 334.248 +static struct irqaction timer_irqaction = {
 334.249 +	.handler =	timer_interrupt,
 334.250 +	.flags =	SA_INTERRUPT,
 334.251 +	.name =		"timer"
 334.252 +};
 334.253 +
 334.254 +void __init
 334.255 +time_init (void)
 334.256 +{
 334.257 +	register_percpu_irq(IA64_TIMER_VECTOR, &timer_irqaction);
 334.258 +	efi_gettimeofday(&xtime);
 334.259 +	ia64_init_itm();
 334.260 +
 334.261 +	/*
 334.262 +	 * Initialize wall_to_monotonic such that adding it to xtime will yield zero, the
 334.263 +	 * tv_nsec field must be normalized (i.e., 0 <= nsec < NSEC_PER_SEC).
 334.264 +	 */
 334.265 +	set_normalized_timespec(&wall_to_monotonic, -xtime.tv_sec, -xtime.tv_nsec);
 334.266 +}
 334.267 +#endif
   335.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
   335.2 +++ b/xen/arch/ia64/linux-xen/tlb.c	Tue Aug 16 12:15:23 2005 +0800
   335.3 @@ -0,0 +1,199 @@
   335.4 +/*
   335.5 + * TLB support routines.
   335.6 + *
   335.7 + * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co
   335.8 + *	David Mosberger-Tang <davidm@hpl.hp.com>
   335.9 + *
  335.10 + * 08/02/00 A. Mallick <asit.k.mallick@intel.com>
  335.11 + *		Modified RID allocation for SMP
  335.12 + *          Goutham Rao <goutham.rao@intel.com>
  335.13 + *              IPI based ptc implementation and A-step IPI implementation.
  335.14 + */
  335.15 +#include <linux/config.h>
  335.16 +#include <linux/module.h>
  335.17 +#include <linux/init.h>
  335.18 +#include <linux/kernel.h>
  335.19 +#include <linux/sched.h>
  335.20 +#include <linux/smp.h>
  335.21 +#include <linux/mm.h>
  335.22 +
  335.23 +#include <asm/delay.h>
  335.24 +#include <asm/mmu_context.h>
  335.25 +#include <asm/pgalloc.h>
  335.26 +#include <asm/pal.h>
  335.27 +#include <asm/tlbflush.h>
  335.28 +
  335.29 +static struct {
  335.30 +	unsigned long mask;	/* mask of supported purge page-sizes */
  335.31 +	unsigned long max_bits;	/* log2() of largest supported purge page-size */
  335.32 +} purge;
  335.33 +
  335.34 +struct ia64_ctx ia64_ctx = {
  335.35 +	.lock =		SPIN_LOCK_UNLOCKED,
  335.36 +	.next =		1,
  335.37 +	.limit =	(1 << 15) - 1,		/* start out with the safe (architected) limit */
  335.38 +	.max_ctx =	~0U
  335.39 +};
  335.40 +
  335.41 +DEFINE_PER_CPU(u8, ia64_need_tlb_flush);
  335.42 +
  335.43 +/*
  335.44 + * Acquire the ia64_ctx.lock before calling this function!
  335.45 + */
  335.46 +void
  335.47 +wrap_mmu_context (struct mm_struct *mm)
  335.48 +{
  335.49 +#ifdef XEN
  335.50 +printf("wrap_mmu_context: called, not implemented\n");
  335.51 +#else
  335.52 +	unsigned long tsk_context, max_ctx = ia64_ctx.max_ctx;
  335.53 +	struct task_struct *tsk;
  335.54 +	int i;
  335.55 +
  335.56 +	if (ia64_ctx.next > max_ctx)
  335.57 +		ia64_ctx.next = 300;	/* skip daemons */
  335.58 +	ia64_ctx.limit = max_ctx + 1;
  335.59 +
  335.60 +	/*
  335.61 +	 * Scan all the task's mm->context and set proper safe range
  335.62 +	 */
  335.63 +
  335.64 +	read_lock(&tasklist_lock);
  335.65 +  repeat:
  335.66 +	for_each_process(tsk) {
  335.67 +		if (!tsk->mm)
  335.68 +			continue;
  335.69 +		tsk_context = tsk->mm->context;
  335.70 +		if (tsk_context == ia64_ctx.next) {
  335.71 +			if (++ia64_ctx.next >= ia64_ctx.limit) {
  335.72 +				/* empty range: reset the range limit and start over */
  335.73 +				if (ia64_ctx.next > max_ctx)
  335.74 +					ia64_ctx.next = 300;
  335.75 +				ia64_ctx.limit = max_ctx + 1;
  335.76 +				goto repeat;
  335.77 +			}
  335.78 +		}
  335.79 +		if ((tsk_context > ia64_ctx.next) && (tsk_context < ia64_ctx.limit))
  335.80 +			ia64_ctx.limit = tsk_context;
  335.81 +	}
  335.82 +	read_unlock(&tasklist_lock);
  335.83 +	/* can't call flush_tlb_all() here because of race condition with O(1) scheduler [EF] */
  335.84 +	{
  335.85 +		int cpu = get_cpu(); /* prevent preemption/migration */
  335.86 +		for (i = 0; i < NR_CPUS; ++i)
  335.87 +			if (cpu_online(i) && (i != cpu))
  335.88 +				per_cpu(ia64_need_tlb_flush, i) = 1;
  335.89 +		put_cpu();
  335.90 +	}
  335.91 +	local_flush_tlb_all();
  335.92 +#endif
  335.93 +}
  335.94 +
  335.95 +void
  335.96 +ia64_global_tlb_purge (unsigned long start, unsigned long end, unsigned long nbits)
  335.97 +{
  335.98 +	static DEFINE_SPINLOCK(ptcg_lock);
  335.99 +
 335.100 +	/* HW requires global serialization of ptc.ga.  */
 335.101 +	spin_lock(&ptcg_lock);
 335.102 +	{
 335.103 +		do {
 335.104 +			/*
 335.105 +			 * Flush ALAT entries also.
 335.106 +			 */
 335.107 +			ia64_ptcga(start, (nbits<<2));
 335.108 +			ia64_srlz_i();
 335.109 +			start += (1UL << nbits);
 335.110 +		} while (start < end);
 335.111 +	}
 335.112 +	spin_unlock(&ptcg_lock);
 335.113 +}
 335.114 +
 335.115 +void
 335.116 +local_flush_tlb_all (void)
 335.117 +{
 335.118 +	unsigned long i, j, flags, count0, count1, stride0, stride1, addr;
 335.119 +
 335.120 +	addr    = local_cpu_data->ptce_base;
 335.121 +	count0  = local_cpu_data->ptce_count[0];
 335.122 +	count1  = local_cpu_data->ptce_count[1];
 335.123 +	stride0 = local_cpu_data->ptce_stride[0];
 335.124 +	stride1 = local_cpu_data->ptce_stride[1];
 335.125 +
 335.126 +	local_irq_save(flags);
 335.127 +	for (i = 0; i < count0; ++i) {
 335.128 +		for (j = 0; j < count1; ++j) {
 335.129 +			ia64_ptce(addr);
 335.130 +			addr += stride1;
 335.131 +		}
 335.132 +		addr += stride0;
 335.133 +	}
 335.134 +	local_irq_restore(flags);
 335.135 +	ia64_srlz_i();			/* srlz.i implies srlz.d */
 335.136 +}
 335.137 +EXPORT_SYMBOL(local_flush_tlb_all);
 335.138 +
 335.139 +void
 335.140 +flush_tlb_range (struct vm_area_struct *vma, unsigned long start, unsigned long end)
 335.141 +{
 335.142 +#ifdef XEN
 335.143 +printf("flush_tlb_range: called, not implemented\n");
 335.144 +#else
 335.145 +	struct mm_struct *mm = vma->vm_mm;
 335.146 +	unsigned long size = end - start;
 335.147 +	unsigned long nbits;
 335.148 +
 335.149 +	if (mm != current->active_mm) {
 335.150 +		/* this does happen, but perhaps it's not worth optimizing for? */
 335.151 +#ifdef CONFIG_SMP
 335.152 +		flush_tlb_all();
 335.153 +#else
 335.154 +		mm->context = 0;
 335.155 +#endif
 335.156 +		return;
 335.157 +	}
 335.158 +
 335.159 +	nbits = ia64_fls(size + 0xfff);
 335.160 +	while (unlikely (((1UL << nbits) & purge.mask) == 0) && (nbits < purge.max_bits))
 335.161 +		++nbits;
 335.162 +	if (nbits > purge.max_bits)
 335.163 +		nbits = purge.max_bits;
 335.164 +	start &= ~((1UL << nbits) - 1);
 335.165 +
 335.166 +# ifdef CONFIG_SMP
 335.167 +	platform_global_tlb_purge(start, end, nbits);
 335.168 +# else
 335.169 +	do {
 335.170 +		ia64_ptcl(start, (nbits<<2));
 335.171 +		start += (1UL << nbits);
 335.172 +	} while (start < end);
 335.173 +# endif
 335.174 +
 335.175 +	ia64_srlz_i();			/* srlz.i implies srlz.d */
 335.176 +#endif
 335.177 +}
 335.178 +EXPORT_SYMBOL(flush_tlb_range);
 335.179 +
 335.180 +void __devinit
 335.181 +ia64_tlb_init (void)
 335.182 +{
 335.183 +	ia64_ptce_info_t ptce_info;
 335.184 +	unsigned long tr_pgbits;
 335.185 +	long status;
 335.186 +
 335.187 +	if ((status = ia64_pal_vm_page_size(&tr_pgbits, &purge.mask)) != 0) {
 335.188 +		printk(KERN_ERR "PAL_VM_PAGE_SIZE failed with status=%ld;"
 335.189 +		       "defaulting to architected purge page-sizes.\n", status);
 335.190 +		purge.mask = 0x115557000UL;
 335.191 +	}
 335.192 +	purge.max_bits = ia64_fls(purge.mask);
 335.193 +
 335.194 +	ia64_get_ptce(&ptce_info);
 335.195 +	local_cpu_data->ptce_base = ptce_info.base;
 335.196 +	local_cpu_data->ptce_count[0] = ptce_info.count[0];
 335.197 +	local_cpu_data->ptce_count[1] = ptce_info.count[1];
 335.198 +	local_cpu_data->ptce_stride[0] = ptce_info.stride[0];
 335.199 +	local_cpu_data->ptce_stride[1] = ptce_info.stride[1];
 335.200 +
 335.201 +	local_flush_tlb_all();		/* nuke left overs from bootstrapping... */
 335.202 +}
   336.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
   336.2 +++ b/xen/arch/ia64/linux-xen/unaligned.c	Tue Aug 16 12:15:23 2005 +0800
   336.3 @@ -0,0 +1,1653 @@
   336.4 +/*
   336.5 + * Architecture-specific unaligned trap handling.
   336.6 + *
   336.7 + * Copyright (C) 1999-2002, 2004 Hewlett-Packard Co
   336.8 + *	Stephane Eranian <eranian@hpl.hp.com>
   336.9 + *	David Mosberger-Tang <davidm@hpl.hp.com>
  336.10 + *
  336.11 + * 2002/12/09   Fix rotating register handling (off-by-1 error, missing fr-rotation).  Fix
  336.12 + *		get_rse_reg() to not leak kernel bits to user-level (reading an out-of-frame
  336.13 + *		stacked register returns an undefined value; it does NOT trigger a
  336.14 + *		"rsvd register fault").
  336.15 + * 2001/10/11	Fix unaligned access to rotating registers in s/w pipelined loops.
  336.16 + * 2001/08/13	Correct size of extended floats (float_fsz) from 16 to 10 bytes.
  336.17 + * 2001/01/17	Add support emulation of unaligned kernel accesses.
  336.18 + */
  336.19 +#include <linux/kernel.h>
  336.20 +#include <linux/sched.h>
  336.21 +#include <linux/smp_lock.h>
  336.22 +#include <linux/tty.h>
  336.23 +
  336.24 +#include <asm/intrinsics.h>
  336.25 +#include <asm/processor.h>
  336.26 +#include <asm/rse.h>
  336.27 +#include <asm/uaccess.h>
  336.28 +#include <asm/unaligned.h>
  336.29 +
  336.30 +extern void die_if_kernel(char *str, struct pt_regs *regs, long err) __attribute__ ((noreturn));
  336.31 +
  336.32 +#undef DEBUG_UNALIGNED_TRAP
  336.33 +
  336.34 +#ifdef DEBUG_UNALIGNED_TRAP
  336.35 +# define DPRINT(a...)	do { printk("%s %u: ", __FUNCTION__, __LINE__); printk (a); } while (0)
  336.36 +# define DDUMP(str,vp,len)	dump(str, vp, len)
  336.37 +
  336.38 +static void
  336.39 +dump (const char *str, void *vp, size_t len)
  336.40 +{
  336.41 +	unsigned char *cp = vp;
  336.42 +	int i;
  336.43 +
  336.44 +	printk("%s", str);
  336.45 +	for (i = 0; i < len; ++i)
  336.46 +		printk (" %02x", *cp++);
  336.47 +	printk("\n");
  336.48 +}
  336.49 +#else
  336.50 +# define DPRINT(a...)
  336.51 +# define DDUMP(str,vp,len)
  336.52 +#endif
  336.53 +
  336.54 +#define IA64_FIRST_STACKED_GR	32
  336.55 +#define IA64_FIRST_ROTATING_FR	32
  336.56 +#define SIGN_EXT9		0xffffffffffffff00ul
  336.57 +
  336.58 +/*
  336.59 + * For M-unit:
  336.60 + *
  336.61 + *  opcode |   m  |   x6    |
  336.62 + * --------|------|---------|
  336.63 + * [40-37] | [36] | [35:30] |
  336.64 + * --------|------|---------|
  336.65 + *     4   |   1  |    6    | = 11 bits
  336.66 + * --------------------------
  336.67 + * However bits [31:30] are not directly useful to distinguish between
  336.68 + * load/store so we can use [35:32] instead, which gives the following
  336.69 + * mask ([40:32]) using 9 bits. The 'e' comes from the fact that we defer
  336.70 + * checking the m-bit until later in the load/store emulation.
  336.71 + */
  336.72 +#define IA64_OPCODE_MASK	0x1ef
  336.73 +#define IA64_OPCODE_SHIFT	32
  336.74 +
  336.75 +/*
  336.76 + * Table C-28 Integer Load/Store
  336.77 + *
  336.78 + * We ignore [35:32]= 0x6, 0x7, 0xE, 0xF
  336.79 + *
  336.80 + * ld8.fill, st8.fill  MUST be aligned because the RNATs are based on
  336.81 + * the address (bits [8:3]), so we must failed.
  336.82 + */
  336.83 +#define LD_OP            0x080
  336.84 +#define LDS_OP           0x081
  336.85 +#define LDA_OP           0x082
  336.86 +#define LDSA_OP          0x083
  336.87 +#define LDBIAS_OP        0x084
  336.88 +#define LDACQ_OP         0x085
  336.89 +/* 0x086, 0x087 are not relevant */
  336.90 +#define LDCCLR_OP        0x088
  336.91 +#define LDCNC_OP         0x089
  336.92 +#define LDCCLRACQ_OP     0x08a
  336.93 +#define ST_OP            0x08c
  336.94 +#define STREL_OP         0x08d
  336.95 +/* 0x08e,0x8f are not relevant */
  336.96 +
  336.97 +/*
  336.98 + * Table C-29 Integer Load +Reg
  336.99 + *
 336.100 + * we use the ld->m (bit [36:36]) field to determine whether or not we have
 336.101 + * a load/store of this form.
 336.102 + */
 336.103 +
 336.104 +/*
 336.105 + * Table C-30 Integer Load/Store +Imm
 336.106 + *
 336.107 + * We ignore [35:32]= 0x6, 0x7, 0xE, 0xF
 336.108 + *
 336.109 + * ld8.fill, st8.fill  must be aligned because the Nat register are based on
 336.110 + * the address, so we must fail and the program must be fixed.
 336.111 + */
 336.112 +#define LD_IMM_OP            0x0a0
 336.113 +#define LDS_IMM_OP           0x0a1
 336.114 +#define LDA_IMM_OP           0x0a2
 336.115 +#define LDSA_IMM_OP          0x0a3
 336.116 +#define LDBIAS_IMM_OP        0x0a4
 336.117 +#define LDACQ_IMM_OP         0x0a5
 336.118 +/* 0x0a6, 0xa7 are not relevant */
 336.119 +#define LDCCLR_IMM_OP        0x0a8
 336.120 +#define LDCNC_IMM_OP         0x0a9
 336.121 +#define LDCCLRACQ_IMM_OP     0x0aa
 336.122 +#define ST_IMM_OP            0x0ac
 336.123 +#define STREL_IMM_OP         0x0ad
 336.124 +/* 0x0ae,0xaf are not relevant */
 336.125 +
 336.126 +/*
 336.127 + * Table C-32 Floating-point Load/Store
 336.128 + */
 336.129 +#define LDF_OP           0x0c0
 336.130 +#define LDFS_OP          0x0c1
 336.131 +#define LDFA_OP          0x0c2
 336.132 +#define LDFSA_OP         0x0c3
 336.133 +/* 0x0c6 is irrelevant */
 336.134 +#define LDFCCLR_OP       0x0c8
 336.135 +#define LDFCNC_OP        0x0c9
 336.136 +/* 0x0cb is irrelevant  */
 336.137 +#define STF_OP           0x0cc
 336.138 +
 336.139 +/*
 336.140 + * Table C-33 Floating-point Load +Reg
 336.141 + *
 336.142 + * we use the ld->m (bit [36:36]) field to determine whether or not we have
 336.143 + * a load/store of this form.
 336.144 + */
 336.145 +
 336.146 +/*
 336.147 + * Table C-34 Floating-point Load/Store +Imm
 336.148 + */
 336.149 +#define LDF_IMM_OP       0x0e0
 336.150 +#define LDFS_IMM_OP      0x0e1
 336.151 +#define LDFA_IMM_OP      0x0e2
 336.152 +#define LDFSA_IMM_OP     0x0e3
 336.153 +/* 0x0e6 is irrelevant */
 336.154 +#define LDFCCLR_IMM_OP   0x0e8
 336.155 +#define LDFCNC_IMM_OP    0x0e9
 336.156 +#define STF_IMM_OP       0x0ec
 336.157 +
 336.158 +typedef struct {
 336.159 +	unsigned long	 qp:6;	/* [0:5]   */
 336.160 +	unsigned long    r1:7;	/* [6:12]  */
 336.161 +	unsigned long   imm:7;	/* [13:19] */
 336.162 +	unsigned long    r3:7;	/* [20:26] */
 336.163 +	unsigned long     x:1;  /* [27:27] */
 336.164 +	unsigned long  hint:2;	/* [28:29] */
 336.165 +	unsigned long x6_sz:2;	/* [30:31] */
 336.166 +	unsigned long x6_op:4;	/* [32:35], x6 = x6_sz|x6_op */
 336.167 +	unsigned long     m:1;	/* [36:36] */
 336.168 +	unsigned long    op:4;	/* [37:40] */
 336.169 +	unsigned long   pad:23; /* [41:63] */
 336.170 +} load_store_t;
 336.171 +
 336.172 +
 336.173 +typedef enum {
 336.174 +	UPD_IMMEDIATE,	/* ldXZ r1=[r3],imm(9) */
 336.175 +	UPD_REG		/* ldXZ r1=[r3],r2     */
 336.176 +} update_t;
 336.177 +
 336.178 +/*
 336.179 + * We use tables to keep track of the offsets of registers in the saved state.
 336.180 + * This way we save having big switch/case statements.
 336.181 + *
 336.182 + * We use bit 0 to indicate switch_stack or pt_regs.
 336.183 + * The offset is simply shifted by 1 bit.
 336.184 + * A 2-byte value should be enough to hold any kind of offset
 336.185 + *
 336.186 + * In case the calling convention changes (and thus pt_regs/switch_stack)
 336.187 + * simply use RSW instead of RPT or vice-versa.
 336.188 + */
 336.189 +
 336.190 +#define RPO(x)	((size_t) &((struct pt_regs *)0)->x)
 336.191 +#define RSO(x)	((size_t) &((struct switch_stack *)0)->x)
 336.192 +
 336.193 +#define RPT(x)		(RPO(x) << 1)
 336.194 +#define RSW(x)		(1| RSO(x)<<1)
 336.195 +
 336.196 +#define GR_OFFS(x)	(gr_info[x]>>1)
 336.197 +#define GR_IN_SW(x)	(gr_info[x] & 0x1)
 336.198 +
 336.199 +#define FR_OFFS(x)	(fr_info[x]>>1)
 336.200 +#define FR_IN_SW(x)	(fr_info[x] & 0x1)
 336.201 +
 336.202 +static u16 gr_info[32]={
 336.203 +	0,			/* r0 is read-only : WE SHOULD NEVER GET THIS */
 336.204 +
 336.205 +	RPT(r1), RPT(r2), RPT(r3),
 336.206 +
 336.207 +#ifdef  CONFIG_VTI
 336.208 +	RPT(r4), RPT(r5), RPT(r6), RPT(r7),
 336.209 +#else   //CONFIG_VTI
 336.210 +	RSW(r4), RSW(r5), RSW(r6), RSW(r7),
 336.211 +#endif  //CONFIG_VTI
 336.212 +
 336.213 +	RPT(r8), RPT(r9), RPT(r10), RPT(r11),
 336.214 +	RPT(r12), RPT(r13), RPT(r14), RPT(r15),
 336.215 +
 336.216 +	RPT(r16), RPT(r17), RPT(r18), RPT(r19),
 336.217 +	RPT(r20), RPT(r21), RPT(r22), RPT(r23),
 336.218 +	RPT(r24), RPT(r25), RPT(r26), RPT(r27),
 336.219 +	RPT(r28), RPT(r29), RPT(r30), RPT(r31)
 336.220 +};
 336.221 +
 336.222 +static u16 fr_info[32]={
 336.223 +	0,			/* constant : WE SHOULD NEVER GET THIS */
 336.224 +	0,			/* constant : WE SHOULD NEVER GET THIS */
 336.225 +
 336.226 +	RSW(f2), RSW(f3), RSW(f4), RSW(f5),
 336.227 +
 336.228 +	RPT(f6), RPT(f7), RPT(f8), RPT(f9),
 336.229 +	RPT(f10), RPT(f11),
 336.230 +
 336.231 +	RSW(f12), RSW(f13), RSW(f14),
 336.232 +	RSW(f15), RSW(f16), RSW(f17), RSW(f18), RSW(f19),
 336.233 +	RSW(f20), RSW(f21), RSW(f22), RSW(f23), RSW(f24),
 336.234 +	RSW(f25), RSW(f26), RSW(f27), RSW(f28), RSW(f29),
 336.235 +	RSW(f30), RSW(f31)
 336.236 +};
 336.237 +
 336.238 +/* Invalidate ALAT entry for integer register REGNO.  */
 336.239 +static void
 336.240 +invala_gr (int regno)
 336.241 +{
 336.242 +#	define F(reg)	case reg: ia64_invala_gr(reg); break
 336.243 +
 336.244 +	switch (regno) {
 336.245 +		F(  0); F(  1); F(  2); F(  3); F(  4); F(  5); F(  6); F(  7);
 336.246 +		F(  8); F(  9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15);
 336.247 +		F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23);
 336.248 +		F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31);
 336.249 +		F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39);
 336.250 +		F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47);
 336.251 +		F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55);
 336.252 +		F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63);
 336.253 +		F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71);
 336.254 +		F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79);
 336.255 +		F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87);
 336.256 +		F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95);
 336.257 +		F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103);
 336.258 +		F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111);
 336.259 +		F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119);
 336.260 +		F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127);
 336.261 +	}
 336.262 +#	undef F
 336.263 +}
 336.264 +
 336.265 +/* Invalidate ALAT entry for floating-point register REGNO.  */
 336.266 +static void
 336.267 +invala_fr (int regno)
 336.268 +{
 336.269 +#	define F(reg)	case reg: ia64_invala_fr(reg); break
 336.270 +
 336.271 +	switch (regno) {
 336.272 +		F(  0); F(  1); F(  2); F(  3); F(  4); F(  5); F(  6); F(  7);
 336.273 +		F(  8); F(  9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15);
 336.274 +		F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23);
 336.275 +		F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31);
 336.276 +		F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39);
 336.277 +		F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47);
 336.278 +		F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55);
 336.279 +		F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63);
 336.280 +		F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71);
 336.281 +		F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79);
 336.282 +		F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87);
 336.283 +		F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95);
 336.284 +		F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103);
 336.285 +		F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111);
 336.286 +		F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119);
 336.287 +		F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127);
 336.288 +	}
 336.289 +#	undef F
 336.290 +}
 336.291 +
 336.292 +static inline unsigned long
 336.293 +rotate_reg (unsigned long sor, unsigned long rrb, unsigned long reg)
 336.294 +{
 336.295 +	reg += rrb;
 336.296 +	if (reg >= sor)
 336.297 +		reg -= sor;
 336.298 +	return reg;
 336.299 +}
 336.300 +
 336.301 +#ifdef CONFIG_VTI
 336.302 +static void
 336.303 +set_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long val, unsigned long nat)
 336.304 +{
 336.305 +	struct switch_stack *sw = (struct switch_stack *) regs - 1;
 336.306 +	unsigned long *bsp, *bspstore, *addr, *rnat_addr, *ubs_end;
 336.307 +	unsigned long *kbs = (void *) current + IA64_RBS_OFFSET;
 336.308 +	unsigned long rnats, nat_mask;
 336.309 +    unsigned long old_rsc,new_rsc;
 336.310 +	unsigned long on_kbs,rnat;
 336.311 +	long sof = (regs->cr_ifs) & 0x7f;
 336.312 +	long sor = 8 * ((regs->cr_ifs >> 14) & 0xf);
 336.313 +	long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
 336.314 +	long ridx = r1 - 32;
 336.315 +
 336.316 +	if (ridx >= sof) {
 336.317 +		/* this should never happen, as the "rsvd register fault" has higher priority */
 336.318 +		DPRINT("ignoring write to r%lu; only %lu registers are allocated!\n", r1, sof);
 336.319 +		return;
 336.320 +	}
 336.321 +
 336.322 +	if (ridx < sor)
 336.323 +		ridx = rotate_reg(sor, rrb_gr, ridx);
 336.324 +
 336.325 +    old_rsc=ia64_get_rsc();
 336.326 +    new_rsc=old_rsc&(~0x3);
 336.327 +    ia64_set_rsc(new_rsc);
 336.328 +
 336.329 +    bspstore = ia64_get_bspstore();
 336.330 +    bsp =kbs + (regs->loadrs >> 19);//16+3
 336.331 +
 336.332 +	addr = ia64_rse_skip_regs(bsp, -sof + ridx);
 336.333 +    nat_mask = 1UL << ia64_rse_slot_num(addr);
 336.334 +	rnat_addr = ia64_rse_rnat_addr(addr);
 336.335 +
 336.336 +    if(addr >= bspstore){
 336.337 +
 336.338 +        ia64_flushrs ();
 336.339 +        ia64_mf ();
 336.340 +		*addr = val;
 336.341 +        bspstore = ia64_get_bspstore();
 336.342 +    	rnat = ia64_get_rnat ();
 336.343 +        if(bspstore < rnat_addr){
 336.344 +            rnat=rnat&(~nat_mask);
 336.345 +        }else{
 336.346 +            *rnat_addr = (*rnat_addr)&(~nat_mask);
 336.347 +        }
 336.348 +        ia64_mf();
 336.349 +        ia64_loadrs();
 336.350 +        ia64_set_rnat(rnat);
 336.351 +    }else{
 336.352 +
 336.353 +    	rnat = ia64_get_rnat ();
 336.354 +		*addr = val;
 336.355 +        if(bspstore < rnat_addr){
 336.356 +            rnat=rnat&(~nat_mask);
 336.357 +        }else{
 336.358 +            *rnat_addr = (*rnat_addr)&(~nat_mask);
 336.359 +        }
 336.360 +        ia64_set_bspstore (bspstore);
 336.361 +        ia64_set_rnat(rnat);
 336.362 +    }
 336.363 +    ia64_set_rsc(old_rsc);
 336.364 +}
 336.365 +
 336.366 +
 336.367 +static void
 336.368 +get_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long *val, unsigned long *nat)
 336.369 +{
 336.370 +	struct switch_stack *sw = (struct switch_stack *) regs - 1;
 336.371 +	unsigned long *bsp, *addr, *rnat_addr, *ubs_end, *bspstore;
 336.372 +	unsigned long *kbs = (void *) current + IA64_RBS_OFFSET;
 336.373 +	unsigned long rnats, nat_mask;
 336.374 +	unsigned long on_kbs;
 336.375 +    unsigned long old_rsc, new_rsc;
 336.376 +	long sof = (regs->cr_ifs) & 0x7f;
 336.377 +	long sor = 8 * ((regs->cr_ifs >> 14) & 0xf);
 336.378 +	long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
 336.379 +	long ridx = r1 - 32;
 336.380 +
 336.381 +	if (ridx >= sof) {
 336.382 +		/* read of out-of-frame register returns an undefined value; 0 in our case.  */
 336.383 +		DPRINT("ignoring read from r%lu; only %lu registers are allocated!\n", r1, sof);
 336.384 +		panic("wrong stack register number");
 336.385 +	}
 336.386 +
 336.387 +	if (ridx < sor)
 336.388 +		ridx = rotate_reg(sor, rrb_gr, ridx);
 336.389 +
 336.390 +    old_rsc=ia64_get_rsc();
 336.391 +    new_rsc=old_rsc&(~(0x3));
 336.392 +    ia64_set_rsc(new_rsc);
 336.393 +
 336.394 +    bspstore = ia64_get_bspstore();
 336.395 +    bsp =kbs + (regs->loadrs >> 19); //16+3;
 336.396 +
 336.397 +	addr = ia64_rse_skip_regs(bsp, -sof + ridx);
 336.398 +    nat_mask = 1UL << ia64_rse_slot_num(addr);
 336.399 +	rnat_addr = ia64_rse_rnat_addr(addr);
 336.400 +
 336.401 +    if(addr >= bspstore){
 336.402 +
 336.403 +        ia64_flushrs ();
 336.404 +        ia64_mf ();
 336.405 +        bspstore = ia64_get_bspstore();
 336.406 +    }
 336.407 +	*val=*addr;
 336.408 +    if(bspstore < rnat_addr){
 336.409 +        *nat=!!(ia64_get_rnat()&nat_mask);
 336.410 +    }else{
 336.411 +        *nat = !!((*rnat_addr)&nat_mask);
 336.412 +    }
 336.413 +    ia64_set_rsc(old_rsc);
 336.414 +}
 336.415 +#else // CONFIG_VTI
 336.416 +static void
 336.417 +set_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long val, int nat)
 336.418 +{
 336.419 +	struct switch_stack *sw = (struct switch_stack *) regs - 1;
 336.420 +	unsigned long *bsp, *bspstore, *addr, *rnat_addr, *ubs_end;
 336.421 +	unsigned long *kbs = (void *) current + IA64_RBS_OFFSET;
 336.422 +	unsigned long rnats, nat_mask;
 336.423 +	unsigned long on_kbs;
 336.424 +	long sof = (regs->cr_ifs) & 0x7f;
 336.425 +	long sor = 8 * ((regs->cr_ifs >> 14) & 0xf);
 336.426 +	long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
 336.427 +	long ridx = r1 - 32;
 336.428 +
 336.429 +	if (ridx >= sof) {
 336.430 +		/* this should never happen, as the "rsvd register fault" has higher priority */
 336.431 +		DPRINT("ignoring write to r%lu; only %lu registers are allocated!\n", r1, sof);
 336.432 +		return;
 336.433 +	}
 336.434 +
 336.435 +	if (ridx < sor)
 336.436 +		ridx = rotate_reg(sor, rrb_gr, ridx);
 336.437 +
 336.438 +	DPRINT("r%lu, sw.bspstore=%lx pt.bspstore=%lx sof=%ld sol=%ld ridx=%ld\n",
 336.439 +	       r1, sw->ar_bspstore, regs->ar_bspstore, sof, (regs->cr_ifs >> 7) & 0x7f, ridx);
 336.440 +
 336.441 +	on_kbs = ia64_rse_num_regs(kbs, (unsigned long *) sw->ar_bspstore);
 336.442 +	addr = ia64_rse_skip_regs((unsigned long *) sw->ar_bspstore, -sof + ridx);
 336.443 +	if (addr >= kbs) {
 336.444 +		/* the register is on the kernel backing store: easy... */
 336.445 +		rnat_addr = ia64_rse_rnat_addr(addr);
 336.446 +		if ((unsigned long) rnat_addr >= sw->ar_bspstore)
 336.447 +			rnat_addr = &sw->ar_rnat;
 336.448 +		nat_mask = 1UL << ia64_rse_slot_num(addr);
 336.449 +
 336.450 +		*addr = val;
 336.451 +		if (nat)
 336.452 +			*rnat_addr |=  nat_mask;
 336.453 +		else
 336.454 +			*rnat_addr &= ~nat_mask;
 336.455 +		return;
 336.456 +	}
 336.457 +
 336.458 +	if (!user_stack(current, regs)) {
 336.459 +		DPRINT("ignoring kernel write to r%lu; register isn't on the kernel RBS!", r1);
 336.460 +		return;
 336.461 +	}
 336.462 +
 336.463 +	bspstore = (unsigned long *)regs->ar_bspstore;
 336.464 +	ubs_end = ia64_rse_skip_regs(bspstore, on_kbs);
 336.465 +	bsp     = ia64_rse_skip_regs(ubs_end, -sof);
 336.466 +	addr    = ia64_rse_skip_regs(bsp, ridx);
 336.467 +
 336.468 +	DPRINT("ubs_end=%p bsp=%p addr=%p\n", (void *) ubs_end, (void *) bsp, (void *) addr);
 336.469 +
 336.470 +	ia64_poke(current, sw, (unsigned long) ubs_end, (unsigned long) addr, val);
 336.471 +
 336.472 +	rnat_addr = ia64_rse_rnat_addr(addr);
 336.473 +
 336.474 +	ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, &rnats);
 336.475 +	DPRINT("rnat @%p = 0x%lx nat=%d old nat=%ld\n",
 336.476 +	       (void *) rnat_addr, rnats, nat, (rnats >> ia64_rse_slot_num(addr)) & 1);
 336.477 +
 336.478 +	nat_mask = 1UL << ia64_rse_slot_num(addr);
 336.479 +	if (nat)
 336.480 +		rnats |=  nat_mask;
 336.481 +	else
 336.482 +		rnats &= ~nat_mask;
 336.483 +	ia64_poke(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, rnats);
 336.484 +
 336.485 +	DPRINT("rnat changed to @%p = 0x%lx\n", (void *) rnat_addr, rnats);
 336.486 +}
 336.487 +
 336.488 +
 336.489 +static void
 336.490 +get_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long *val, int *nat)
 336.491 +{
 336.492 +	struct switch_stack *sw = (struct switch_stack *) regs - 1;
 336.493 +	unsigned long *bsp, *addr, *rnat_addr, *ubs_end, *bspstore;
 336.494 +	unsigned long *kbs = (void *) current + IA64_RBS_OFFSET;
 336.495 +	unsigned long rnats, nat_mask;
 336.496 +	unsigned long on_kbs;
 336.497 +	long sof = (regs->cr_ifs) & 0x7f;
 336.498 +	long sor = 8 * ((regs->cr_ifs >> 14) & 0xf);
 336.499 +	long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
 336.500 +	long ridx = r1 - 32;
 336.501 +
 336.502 +	if (ridx >= sof) {
 336.503 +		/* read of out-of-frame register returns an undefined value; 0 in our case.  */
 336.504 +		DPRINT("ignoring read from r%lu; only %lu registers are allocated!\n", r1, sof);
 336.505 +		goto fail;
 336.506 +	}
 336.507 +
 336.508 +	if (ridx < sor)
 336.509 +		ridx = rotate_reg(sor, rrb_gr, ridx);
 336.510 +
 336.511 +	DPRINT("r%lu, sw.bspstore=%lx pt.bspstore=%lx sof=%ld sol=%ld ridx=%ld\n",
 336.512 +	       r1, sw->ar_bspstore, regs->ar_bspstore, sof, (regs->cr_ifs >> 7) & 0x7f, ridx);
 336.513 +
 336.514 +	on_kbs = ia64_rse_num_regs(kbs, (unsigned long *) sw->ar_bspstore);
 336.515 +	addr = ia64_rse_skip_regs((unsigned long *) sw->ar_bspstore, -sof + ridx);
 336.516 +	if (addr >= kbs) {
 336.517 +		/* the register is on the kernel backing store: easy... */
 336.518 +		*val = *addr;
 336.519 +		if (nat) {
 336.520 +			rnat_addr = ia64_rse_rnat_addr(addr);
 336.521 +			if ((unsigned long) rnat_addr >= sw->ar_bspstore)
 336.522 +				rnat_addr = &sw->ar_rnat;
 336.523 +			nat_mask = 1UL << ia64_rse_slot_num(addr);
 336.524 +			*nat = (*rnat_addr & nat_mask) != 0;
 336.525 +		}
 336.526 +		return;
 336.527 +	}
 336.528 +
 336.529 +	if (!user_stack(current, regs)) {
 336.530 +		DPRINT("ignoring kernel read of r%lu; register isn't on the RBS!", r1);
 336.531 +		goto fail;
 336.532 +	}
 336.533 +
 336.534 +	bspstore = (unsigned long *)regs->ar_bspstore;
 336.535 +	ubs_end = ia64_rse_skip_regs(bspstore, on_kbs);
 336.536 +	bsp     = ia64_rse_skip_regs(ubs_end, -sof);
 336.537 +	addr    = ia64_rse_skip_regs(bsp, ridx);
 336.538 +
 336.539 +	DPRINT("ubs_end=%p bsp=%p addr=%p\n", (void *) ubs_end, (void *) bsp, (void *) addr);
 336.540 +
 336.541 +	ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) addr, val);
 336.542 +
 336.543 +	if (nat) {
 336.544 +		rnat_addr = ia64_rse_rnat_addr(addr);
 336.545 +		nat_mask = 1UL << ia64_rse_slot_num(addr);
 336.546 +
 336.547 +		DPRINT("rnat @%p = 0x%lx\n", (void *) rnat_addr, rnats);
 336.548 +
 336.549 +		ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, &rnats);
 336.550 +		*nat = (rnats & nat_mask) != 0;
 336.551 +	}
 336.552 +	return;
 336.553 +
 336.554 +  fail:
 336.555 +	*val = 0;
 336.556 +	if (nat)
 336.557 +		*nat = 0;
 336.558 +	return;
 336.559 +}
 336.560 +#endif // CONFIG_VTI
 336.561 +
 336.562 +
 336.563 +#ifdef XEN
 336.564 +void
 336.565 +#else
 336.566 +static void
 336.567 +#endif
 336.568 +setreg (unsigned long regnum, unsigned long val, int nat, struct pt_regs *regs)
 336.569 +{
 336.570 +	struct switch_stack *sw = (struct switch_stack *) regs - 1;
 336.571 +	unsigned long addr;
 336.572 +	unsigned long bitmask;
 336.573 +	unsigned long *unat;
 336.574 +
 336.575 +	/*
 336.576 +	 * First takes care of stacked registers
 336.577 +	 */
 336.578 +	if (regnum >= IA64_FIRST_STACKED_GR) {
 336.579 +		set_rse_reg(regs, regnum, val, nat);
 336.580 +		return;
 336.581 +	}
 336.582 +
 336.583 +	/*
 336.584 +	 * Using r0 as a target raises a General Exception fault which has higher priority
 336.585 +	 * than the Unaligned Reference fault.
 336.586 +	 */
 336.587 +
 336.588 +	/*
 336.589 +	 * Now look at registers in [0-31] range and init correct UNAT
 336.590 +	 */
 336.591 +	if (GR_IN_SW(regnum)) {
 336.592 +		addr = (unsigned long)sw;
 336.593 +		unat = &sw->ar_unat;
 336.594 +	} else {
 336.595 +		addr = (unsigned long)regs;
 336.596 +#ifdef CONFIG_VTI
 336.597 +		unat = &regs->eml_unat;
 336.598 +#else //CONFIG_VTI
 336.599 +		unat = &sw->caller_unat;
 336.600 +#endif  //CONFIG_VTI
 336.601 +	}
 336.602 +	DPRINT("tmp_base=%lx switch_stack=%s offset=%d\n",
 336.603 +	       addr, unat==&sw->ar_unat ? "yes":"no", GR_OFFS(regnum));
 336.604 +	/*
 336.605 +	 * add offset from base of struct
 336.606 +	 * and do it !
 336.607 +	 */
 336.608 +	addr += GR_OFFS(regnum);
 336.609 +
 336.610 +	*(unsigned long *)addr = val;
 336.611 +
 336.612 +	/*
 336.613 +	 * We need to clear the corresponding UNAT bit to fully emulate the load
 336.614 +	 * UNAT bit_pos = GR[r3]{8:3} form EAS-2.4
 336.615 +	 */
 336.616 +	bitmask   = 1UL << (addr >> 3 & 0x3f);
 336.617 +	DPRINT("*0x%lx=0x%lx NaT=%d prev_unat @%p=%lx\n", addr, val, nat, (void *) unat, *unat);
 336.618 +	if (nat) {
 336.619 +		*unat |= bitmask;
 336.620 +	} else {
 336.621 +		*unat &= ~bitmask;
 336.622 +	}
 336.623 +	DPRINT("*0x%lx=0x%lx NaT=%d new unat: %p=%lx\n", addr, val, nat, (void *) unat,*unat);
 336.624 +}
 336.625 +
 336.626 +/*
 336.627 + * Return the (rotated) index for floating point register REGNUM (REGNUM must be in the
 336.628 + * range from 32-127, result is in the range from 0-95.
 336.629 + */
 336.630 +static inline unsigned long
 336.631 +fph_index (struct pt_regs *regs, long regnum)
 336.632 +{
 336.633 +	unsigned long rrb_fr = (regs->cr_ifs >> 25) & 0x7f;
 336.634 +	return rotate_reg(96, rrb_fr, (regnum - IA64_FIRST_ROTATING_FR));
 336.635 +}
 336.636 +
 336.637 +static void
 336.638 +setfpreg (unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs)
 336.639 +{
 336.640 +	struct switch_stack *sw = (struct switch_stack *)regs - 1;
 336.641 +	unsigned long addr;
 336.642 +
 336.643 +	/*
 336.644 +	 * From EAS-2.5: FPDisableFault has higher priority than Unaligned
 336.645 +	 * Fault. Thus, when we get here, we know the partition is enabled.
 336.646 +	 * To update f32-f127, there are three choices:
 336.647 +	 *
 336.648 +	 *	(1) save f32-f127 to thread.fph and update the values there
 336.649 +	 *	(2) use a gigantic switch statement to directly access the registers
 336.650 +	 *	(3) generate code on the fly to update the desired register
 336.651 +	 *
 336.652 +	 * For now, we are using approach (1).
 336.653 +	 */
 336.654 +	if (regnum >= IA64_FIRST_ROTATING_FR) {
 336.655 +		ia64_sync_fph(current);
 336.656 +#ifdef XEN
 336.657 +		current->arch._thread.fph[fph_index(regs, regnum)] = *fpval;
 336.658 +#else
 336.659 +		current->thread.fph[fph_index(regs, regnum)] = *fpval;
 336.660 +#endif
 336.661 +	} else {
 336.662 +		/*
 336.663 +		 * pt_regs or switch_stack ?
 336.664 +		 */
 336.665 +		if (FR_IN_SW(regnum)) {
 336.666 +			addr = (unsigned long)sw;
 336.667 +		} else {
 336.668 +			addr = (unsigned long)regs;
 336.669 +		}
 336.670 +
 336.671 +		DPRINT("tmp_base=%lx offset=%d\n", addr, FR_OFFS(regnum));
 336.672 +
 336.673 +		addr += FR_OFFS(regnum);
 336.674 +		*(struct ia64_fpreg *)addr = *fpval;
 336.675 +
 336.676 +		/*
 336.677 +		 * mark the low partition as being used now
 336.678 +		 *
 336.679 +		 * It is highly unlikely that this bit is not already set, but
 336.680 +		 * let's do it for safety.
 336.681 +		 */
 336.682 +		regs->cr_ipsr |= IA64_PSR_MFL;
 336.683 +	}
 336.684 +}
 336.685 +
 336.686 +/*
 336.687 + * Those 2 inline functions generate the spilled versions of the constant floating point
 336.688 + * registers which can be used with stfX
 336.689 + */
 336.690 +static inline void
 336.691 +float_spill_f0 (struct ia64_fpreg *final)
 336.692 +{
 336.693 +	ia64_stf_spill(final, 0);
 336.694 +}
 336.695 +
 336.696 +static inline void
 336.697 +float_spill_f1 (struct ia64_fpreg *final)
 336.698 +{
 336.699 +	ia64_stf_spill(final, 1);
 336.700 +}
 336.701 +
 336.702 +static void
 336.703 +getfpreg (unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs)
 336.704 +{
 336.705 +	struct switch_stack *sw = (struct switch_stack *) regs - 1;
 336.706 +	unsigned long addr;
 336.707 +
 336.708 +	/*
 336.709 +	 * From EAS-2.5: FPDisableFault has higher priority than
 336.710 +	 * Unaligned Fault. Thus, when we get here, we know the partition is
 336.711 +	 * enabled.
 336.712 +	 *
 336.713 +	 * When regnum > 31, the register is still live and we need to force a save
 336.714 +	 * to current->thread.fph to get access to it.  See discussion in setfpreg()
 336.715 +	 * for reasons and other ways of doing this.
 336.716 +	 */
 336.717 +	if (regnum >= IA64_FIRST_ROTATING_FR) {
 336.718 +		ia64_flush_fph(current);
 336.719 +#ifdef XEN
 336.720 +		*fpval = current->arch._thread.fph[fph_index(regs, regnum)];
 336.721 +#else
 336.722 +		*fpval = current->thread.fph[fph_index(regs, regnum)];
 336.723 +#endif
 336.724 +	} else {
 336.725 +		/*
 336.726 +		 * f0 = 0.0, f1= 1.0. Those registers are constant and are thus
 336.727 +		 * not saved, we must generate their spilled form on the fly
 336.728 +		 */
 336.729 +		switch(regnum) {
 336.730 +		case 0:
 336.731 +			float_spill_f0(fpval);
 336.732 +			break;
 336.733 +		case 1:
 336.734 +			float_spill_f1(fpval);
 336.735 +			break;
 336.736 +		default:
 336.737 +			/*
 336.738 +			 * pt_regs or switch_stack ?
 336.739 +			 */
 336.740 +			addr =  FR_IN_SW(regnum) ? (unsigned long)sw
 336.741 +						 : (unsigned long)regs;
 336.742 +
 336.743 +			DPRINT("is_sw=%d tmp_base=%lx offset=0x%x\n",
 336.744 +			       FR_IN_SW(regnum), addr, FR_OFFS(regnum));
 336.745 +
 336.746 +			addr  += FR_OFFS(regnum);
 336.747 +			*fpval = *(struct ia64_fpreg *)addr;
 336.748 +		}
 336.749 +	}
 336.750 +}
 336.751 +
 336.752 +
 336.753 +#ifdef XEN
 336.754 +void
 336.755 +#else
 336.756 +static void
 336.757 +#endif
 336.758 +getreg (unsigned long regnum, unsigned long *val, int *nat, struct pt_regs *regs)
 336.759 +{
 336.760 +	struct switch_stack *sw = (struct switch_stack *) regs - 1;
 336.761 +	unsigned long addr, *unat;
 336.762 +
 336.763 +	if (regnum >= IA64_FIRST_STACKED_GR) {
 336.764 +		get_rse_reg(regs, regnum, val, nat);
 336.765 +		return;
 336.766 +	}
 336.767 +
 336.768 +	/*
 336.769 +	 * take care of r0 (read-only always evaluate to 0)
 336.770 +	 */
 336.771 +	if (regnum == 0) {
 336.772 +		*val = 0;
 336.773 +		if (nat)
 336.774 +			*nat = 0;
 336.775 +		return;
 336.776 +	}
 336.777 +
 336.778 +	/*
 336.779 +	 * Now look at registers in [0-31] range and init correct UNAT
 336.780 +	 */
 336.781 +	if (GR_IN_SW(regnum)) {
 336.782 +		addr = (unsigned long)sw;
 336.783 +		unat = &sw->ar_unat;
 336.784 +	} else {
 336.785 +		addr = (unsigned long)regs;
 336.786 +#ifdef  CONFIG_VTI
 336.787 +		unat = &regs->eml_unat;;
 336.788 +#else   //CONFIG_VTI
 336.789 +		unat = &sw->caller_unat;
 336.790 +#endif  //CONFIG_VTI
 336.791 +	}
 336.792 +
 336.793 +	DPRINT("addr_base=%lx offset=0x%x\n", addr,  GR_OFFS(regnum));
 336.794 +
 336.795 +	addr += GR_OFFS(regnum);
 336.796 +
 336.797 +	*val  = *(unsigned long *)addr;
 336.798 +
 336.799 +	/*
 336.800 +	 * do it only when requested
 336.801 +	 */
 336.802 +	if (nat)
 336.803 +		*nat  = (*unat >> (addr >> 3 & 0x3f)) & 0x1UL;
 336.804 +}
 336.805 +
 336.806 +static void
 336.807 +emulate_load_updates (update_t type, load_store_t ld, struct pt_regs *regs, unsigned long ifa)
 336.808 +{
 336.809 +	/*
 336.810 +	 * IMPORTANT:
 336.811 +	 * Given the way we handle unaligned speculative loads, we should
 336.812 +	 * not get to this point in the code but we keep this sanity check,
 336.813 +	 * just in case.
 336.814 +	 */
 336.815 +	if (ld.x6_op == 1 || ld.x6_op == 3) {
 336.816 +		printk(KERN_ERR "%s: register update on speculative load, error\n", __FUNCTION__);
 336.817 +		die_if_kernel("unaligned reference on speculative load with register update\n",
 336.818 +			      regs, 30);
 336.819 +	}
 336.820 +
 336.821 +
 336.822 +	/*
 336.823 +	 * at this point, we know that the base register to update is valid i.e.,
 336.824 +	 * it's not r0
 336.825 +	 */
 336.826 +	if (type == UPD_IMMEDIATE) {
 336.827 +		unsigned long imm;
 336.828 +
 336.829 +		/*
 336.830 +		 * Load +Imm: ldXZ r1=[r3],imm(9)
 336.831 +		 *
 336.832 +		 *
 336.833 +		 * form imm9: [13:19] contain the first 7 bits
 336.834 +		 */
 336.835 +		imm = ld.x << 7 | ld.imm;
 336.836 +
 336.837 +		/*
 336.838 +		 * sign extend (1+8bits) if m set
 336.839 +		 */
 336.840 +		if (ld.m) imm |= SIGN_EXT9;
 336.841 +
 336.842 +		/*
 336.843 +		 * ifa == r3 and we know that the NaT bit on r3 was clear so
 336.844 +		 * we can directly use ifa.
 336.845 +		 */
 336.846 +		ifa += imm;
 336.847 +
 336.848 +		setreg(ld.r3, ifa, 0, regs);
 336.849 +
 336.850 +		DPRINT("ld.x=%d ld.m=%d imm=%ld r3=0x%lx\n", ld.x, ld.m, imm, ifa);
 336.851 +
 336.852 +	} else if (ld.m) {
 336.853 +		unsigned long r2;
 336.854 +		int nat_r2;
 336.855 +
 336.856 +		/*
 336.857 +		 * Load +Reg Opcode: ldXZ r1=[r3],r2
 336.858 +		 *
 336.859 +		 * Note: that we update r3 even in the case of ldfX.a
 336.860 +		 * (where the load does not happen)
 336.861 +		 *
 336.862 +		 * The way the load algorithm works, we know that r3 does not
 336.863 +		 * have its NaT bit set (would have gotten NaT consumption
 336.864 +		 * before getting the unaligned fault). So we can use ifa
 336.865 +		 * which equals r3 at this point.
 336.866 +		 *
 336.867 +		 * IMPORTANT:
 336.868 +		 * The above statement holds ONLY because we know that we
 336.869 +		 * never reach this code when trying to do a ldX.s.
 336.870 +		 * If we ever make it to here on an ldfX.s then
 336.871 +		 */
 336.872 +		getreg(ld.imm, &r2, &nat_r2, regs);
 336.873 +
 336.874 +		ifa += r2;
 336.875 +
 336.876 +		/*
 336.877 +		 * propagate Nat r2 -> r3
 336.878 +		 */
 336.879 +		setreg(ld.r3, ifa, nat_r2, regs);
 336.880 +
 336.881 +		DPRINT("imm=%d r2=%ld r3=0x%lx nat_r2=%d\n",ld.imm, r2, ifa, nat_r2);
 336.882 +	}
 336.883 +}
 336.884 +
 336.885 +
 336.886 +static int
 336.887 +emulate_load_int (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
 336.888 +{
 336.889 +	unsigned int len = 1 << ld.x6_sz;
 336.890 +	unsigned long val = 0;
 336.891 +
 336.892 +	/*
 336.893 +	 * r0, as target, doesn't need to be checked because Illegal Instruction
 336.894 +	 * faults have higher priority than unaligned faults.
 336.895 +	 *
 336.896 +	 * r0 cannot be found as the base as it would never generate an
 336.897 +	 * unaligned reference.
 336.898 +	 */
 336.899 +
 336.900 +	/*
 336.901 +	 * ldX.a we will emulate load and also invalidate the ALAT entry.
 336.902 +	 * See comment below for explanation on how we handle ldX.a
 336.903 +	 */
 336.904 +
 336.905 +	if (len != 2 && len != 4 && len != 8) {
 336.906 +		DPRINT("unknown size: x6=%d\n", ld.x6_sz);
 336.907 +		return -1;
 336.908 +	}
 336.909 +	/* this assumes little-endian byte-order: */
 336.910 +	if (copy_from_user(&val, (void __user *) ifa, len))
 336.911 +		return -1;
 336.912 +	setreg(ld.r1, val, 0, regs);
 336.913 +
 336.914 +	/*
 336.915 +	 * check for updates on any kind of loads
 336.916 +	 */
 336.917 +	if (ld.op == 0x5 || ld.m)
 336.918 +		emulate_load_updates(ld.op == 0x5 ? UPD_IMMEDIATE: UPD_REG, ld, regs, ifa);
 336.919 +
 336.920 +	/*
 336.921 +	 * handling of various loads (based on EAS2.4):
 336.922 +	 *
 336.923 +	 * ldX.acq (ordered load):
 336.924 +	 *	- acquire semantics would have been used, so force fence instead.
 336.925 +	 *
 336.926 +	 * ldX.c.clr (check load and clear):
 336.927 +	 *	- if we get to this handler, it's because the entry was not in the ALAT.
 336.928 +	 *	  Therefore the operation reverts to a normal load
 336.929 +	 *
 336.930 +	 * ldX.c.nc (check load no clear):
 336.931 +	 *	- same as previous one
 336.932 +	 *
 336.933 +	 * ldX.c.clr.acq (ordered check load and clear):
 336.934 +	 *	- same as above for c.clr part. The load needs to have acquire semantics. So
 336.935 +	 *	  we use the fence semantics which is stronger and thus ensures correctness.
 336.936 +	 *
 336.937 +	 * ldX.a (advanced load):
 336.938 +	 *	- suppose ldX.a r1=[r3]. If we get to the unaligned trap it's because the
 336.939 +	 *	  address doesn't match requested size alignment. This means that we would
 336.940 +	 *	  possibly need more than one load to get the result.
 336.941 +	 *
 336.942 +	 *	  The load part can be handled just like a normal load, however the difficult
 336.943 +	 *	  part is to get the right thing into the ALAT. The critical piece of information
 336.944 +	 *	  in the base address of the load & size. To do that, a ld.a must be executed,
 336.945 +	 *	  clearly any address can be pushed into the table by using ld1.a r1=[r3]. Now
 336.946 +	 *	  if we use the same target register, we will be okay for the check.a instruction.
 336.947 +	 *	  If we look at the store, basically a stX [r3]=r1 checks the ALAT  for any entry
 336.948 +	 *	  which would overlap within [r3,r3+X] (the size of the load was store in the
 336.949 +	 *	  ALAT). If such an entry is found the entry is invalidated. But this is not good
 336.950 +	 *	  enough, take the following example:
 336.951 +	 *		r3=3
 336.952 +	 *		ld4.a r1=[r3]
 336.953 +	 *
 336.954 +	 *	  Could be emulated by doing:
 336.955 +	 *		ld1.a r1=[r3],1
 336.956 +	 *		store to temporary;
 336.957 +	 *		ld1.a r1=[r3],1
 336.958 +	 *		store & shift to temporary;
 336.959 +	 *		ld1.a r1=[r3],1
 336.960 +	 *		store & shift to temporary;
 336.961 +	 *		ld1.a r1=[r3]
 336.962 +	 *		store & shift to temporary;
 336.963 +	 *		r1=temporary
 336.964 +	 *
 336.965 +	 *	  So in this case, you would get the right value is r1 but the wrong info in
 336.966 +	 *	  the ALAT.  Notice that you could do it in reverse to finish with address 3
 336.967 +	 *	  but you would still get the size wrong.  To get the size right, one needs to
 336.968 +	 *	  execute exactly the same kind of load. You could do it from a aligned
 336.969 +	 *	  temporary location, but you would get the address wrong.
 336.970 +	 *
 336.971 +	 *	  So no matter what, it is not possible to emulate an advanced load
 336.972 +	 *	  correctly. But is that really critical ?
 336.973 +	 *
 336.974 +	 *	  We will always convert ld.a into a normal load with ALAT invalidated.  This
 336.975 +	 *	  will enable compiler to do optimization where certain code path after ld.a
 336.976 +	 *	  is not required to have ld.c/chk.a, e.g., code path with no intervening stores.
 336.977 +	 *
 336.978 +	 *	  If there is a store after the advanced load, one must either do a ld.c.* or
 336.979 +	 *	  chk.a.* to reuse the value stored in the ALAT. Both can "fail" (meaning no
 336.980 +	 *	  entry found in ALAT), and that's perfectly ok because:
 336.981 +	 *
 336.982 +	 *		- ld.c.*, if the entry is not present a  normal load is executed
 336.983 +	 *		- chk.a.*, if the entry is not present, execution jumps to recovery code
 336.984 +	 *
 336.985 +	 *	  In either case, the load can be potentially retried in another form.
 336.986 +	 *
 336.987 +	 *	  ALAT must be invalidated for the register (so that chk.a or ld.c don't pick
 336.988 +	 *	  up a stale entry later). The register base update MUST also be performed.
 336.989 +	 */
 336.990 +
 336.991 +	/*
 336.992 +	 * when the load has the .acq completer then
 336.993 +	 * use ordering fence.
 336.994 +	 */
 336.995 +	if (ld.x6_op == 0x5 || ld.x6_op == 0xa)
 336.996 +		mb();
 336.997 +
 336.998 +	/*
 336.999 +	 * invalidate ALAT entry in case of advanced load
336.1000 +	 */
336.1001 +	if (ld.x6_op == 0x2)
336.1002 +		invala_gr(ld.r1);
336.1003 +
336.1004 +	return 0;
336.1005 +}
336.1006 +
336.1007 +static int
336.1008 +emulate_store_int (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
336.1009 +{
336.1010 +	unsigned long r2;
336.1011 +	unsigned int len = 1 << ld.x6_sz;
336.1012 +
336.1013 +	/*
336.1014 +	 * if we get to this handler, Nat bits on both r3 and r2 have already
336.1015 +	 * been checked. so we don't need to do it
336.1016 +	 *
336.1017 +	 * extract the value to be stored
336.1018 +	 */
336.1019 +	getreg(ld.imm, &r2, NULL, regs);
336.1020 +
336.1021 +	/*
336.1022 +	 * we rely on the macros in unaligned.h for now i.e.,
336.1023 +	 * we let the compiler figure out how to read memory gracefully.
336.1024 +	 *
336.1025 +	 * We need this switch/case because the way the inline function
336.1026 +	 * works. The code is optimized by the compiler and looks like
336.1027 +	 * a single switch/case.
336.1028 +	 */
336.1029 +	DPRINT("st%d [%lx]=%lx\n", len, ifa, r2);
336.1030 +
336.1031 +	if (len != 2 && len != 4 && len != 8) {
336.1032 +		DPRINT("unknown size: x6=%d\n", ld.x6_sz);
336.1033 +		return -1;
336.1034 +	}
336.1035 +
336.1036 +	/* this assumes little-endian byte-order: */
336.1037 +	if (copy_to_user((void __user *) ifa, &r2, len))
336.1038 +		return -1;
336.1039 +
336.1040 +	/*
336.1041 +	 * stX [r3]=r2,imm(9)
336.1042 +	 *
336.1043 +	 * NOTE:
336.1044 +	 * ld.r3 can never be r0, because r0 would not generate an
336.1045 +	 * unaligned access.
336.1046 +	 */
336.1047 +	if (ld.op == 0x5) {
336.1048 +		unsigned long imm;
336.1049 +
336.1050 +		/*
336.1051 +		 * form imm9: [12:6] contain first 7bits
336.1052 +		 */
336.1053 +		imm = ld.x << 7 | ld.r1;
336.1054 +		/*
336.1055 +		 * sign extend (8bits) if m set
336.1056 +		 */
336.1057 +		if (ld.m) imm |= SIGN_EXT9;
336.1058 +		/*
336.1059 +		 * ifa == r3 (NaT is necessarily cleared)
336.1060 +		 */
336.1061 +		ifa += imm;
336.1062 +
336.1063 +		DPRINT("imm=%lx r3=%lx\n", imm, ifa);
336.1064 +
336.1065 +		setreg(ld.r3, ifa, 0, regs);
336.1066 +	}
336.1067 +	/*
336.1068 +	 * we don't have alat_invalidate_multiple() so we need
336.1069 +	 * to do the complete flush :-<<
336.1070 +	 */
336.1071 +	ia64_invala();
336.1072 +
336.1073 +	/*
336.1074 +	 * stX.rel: use fence instead of release
336.1075 +	 */
336.1076 +	if (ld.x6_op == 0xd)
336.1077 +		mb();
336.1078 +
336.1079 +	return 0;
336.1080 +}
336.1081 +
336.1082 +/*
336.1083 + * floating point operations sizes in bytes
336.1084 + */
336.1085 +static const unsigned char float_fsz[4]={
336.1086 +	10, /* extended precision (e) */
336.1087 +	8,  /* integer (8)            */
336.1088 +	4,  /* single precision (s)   */
336.1089 +	8   /* double precision (d)   */
336.1090 +};
336.1091 +
336.1092 +static inline void
336.1093 +mem2float_extended (struct ia64_fpreg *init, struct ia64_fpreg *final)
336.1094 +{
336.1095 +	ia64_ldfe(6, init);
336.1096 +	ia64_stop();
336.1097 +	ia64_stf_spill(final, 6);
336.1098 +}
336.1099 +
336.1100 +static inline void
336.1101 +mem2float_integer (struct ia64_fpreg *init, struct ia64_fpreg *final)
336.1102 +{
336.1103 +	ia64_ldf8(6, init);
336.1104 +	ia64_stop();
336.1105 +	ia64_stf_spill(final, 6);
336.1106 +}
336.1107 +
336.1108 +static inline void
336.1109 +mem2float_single (struct ia64_fpreg *init, struct ia64_fpreg *final)
336.1110 +{
336.1111 +	ia64_ldfs(6, init);
336.1112 +	ia64_stop();
336.1113 +	ia64_stf_spill(final, 6);
336.1114 +}
336.1115 +
336.1116 +static inline void
336.1117 +mem2float_double (struct ia64_fpreg *init, struct ia64_fpreg *final)
336.1118 +{
336.1119 +	ia64_ldfd(6, init);
336.1120 +	ia64_stop();
336.1121 +	ia64_stf_spill(final, 6);
336.1122 +}
336.1123 +
336.1124 +static inline void
336.1125 +float2mem_extended (struct ia64_fpreg *init, struct ia64_fpreg *final)
336.1126 +{
336.1127 +	ia64_ldf_fill(6, init);
336.1128 +	ia64_stop();
336.1129 +	ia64_stfe(final, 6);
336.1130 +}
336.1131 +
336.1132 +static inline void
336.1133 +float2mem_integer (struct ia64_fpreg *init, struct ia64_fpreg *final)
336.1134 +{
336.1135 +	ia64_ldf_fill(6, init);
336.1136 +	ia64_stop();
336.1137 +	ia64_stf8(final, 6);
336.1138 +}
336.1139 +
336.1140 +static inline void
336.1141 +float2mem_single (struct ia64_fpreg *init, struct ia64_fpreg *final)
336.1142 +{
336.1143 +	ia64_ldf_fill(6, init);
336.1144 +	ia64_stop();
336.1145 +	ia64_stfs(final, 6);
336.1146 +}
336.1147 +
336.1148 +static inline void
336.1149 +float2mem_double (struct ia64_fpreg *init, struct ia64_fpreg *final)
336.1150 +{
336.1151 +	ia64_ldf_fill(6, init);
336.1152 +	ia64_stop();
336.1153 +	ia64_stfd(final, 6);
336.1154 +}
336.1155 +
336.1156 +static int
336.1157 +emulate_load_floatpair (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
336.1158 +{
336.1159 +	struct ia64_fpreg fpr_init[2];
336.1160 +	struct ia64_fpreg fpr_final[2];
336.1161 +	unsigned long len = float_fsz[ld.x6_sz];
336.1162 +
336.1163 +	/*
336.1164 +	 * fr0 & fr1 don't need to be checked because Illegal Instruction faults have
336.1165 +	 * higher priority than unaligned faults.
336.1166 +	 *
336.1167 +	 * r0 cannot be found as the base as it would never generate an unaligned
336.1168 +	 * reference.
336.1169 +	 */
336.1170 +
336.1171 +	/*
336.1172 +	 * make sure we get clean buffers
336.1173 +	 */
336.1174 +	memset(&fpr_init, 0, sizeof(fpr_init));
336.1175 +	memset(&fpr_final, 0, sizeof(fpr_final));
336.1176 +
336.1177 +	/*
336.1178 +	 * ldfpX.a: we don't try to emulate anything but we must
336.1179 +	 * invalidate the ALAT entry and execute updates, if any.
336.1180 +	 */
336.1181 +	if (ld.x6_op != 0x2) {
336.1182 +		/*
336.1183 +		 * This assumes little-endian byte-order.  Note that there is no "ldfpe"
336.1184 +		 * instruction:
336.1185 +		 */
336.1186 +		if (copy_from_user(&fpr_init[0], (void __user *) ifa, len)
336.1187 +		    || copy_from_user(&fpr_init[1], (void __user *) (ifa + len), len))
336.1188 +			return -1;
336.1189 +
336.1190 +		DPRINT("ld.r1=%d ld.imm=%d x6_sz=%d\n", ld.r1, ld.imm, ld.x6_sz);
336.1191 +		DDUMP("frp_init =", &fpr_init, 2*len);
336.1192 +		/*
336.1193 +		 * XXX fixme
336.1194 +		 * Could optimize inlines by using ldfpX & 2 spills
336.1195 +		 */
336.1196 +		switch( ld.x6_sz ) {
336.1197 +			case 0:
336.1198 +				mem2float_extended(&fpr_init[0], &fpr_final[0]);
336.1199 +				mem2float_extended(&fpr_init[1], &fpr_final[1]);
336.1200 +				break;
336.1201 +			case 1:
336.1202 +				mem2float_integer(&fpr_init[0], &fpr_final[0]);
336.1203 +				mem2float_integer(&fpr_init[1], &fpr_final[1]);
336.1204 +				break;
336.1205 +			case 2:
336.1206 +				mem2float_single(&fpr_init[0], &fpr_final[0]);
336.1207 +				mem2float_single(&fpr_init[1], &fpr_final[1]);
336.1208 +				break;
336.1209 +			case 3:
336.1210 +				mem2float_double(&fpr_init[0], &fpr_final[0]);
336.1211 +				mem2float_double(&fpr_init[1], &fpr_final[1]);
336.1212 +				break;
336.1213 +		}
336.1214 +		DDUMP("fpr_final =", &fpr_final, 2*len);
336.1215 +		/*
336.1216 +		 * XXX fixme
336.1217 +		 *
336.1218 +		 * A possible optimization would be to drop fpr_final and directly
336.1219 +		 * use the storage from the saved context i.e., the actual final
336.1220 +		 * destination (pt_regs, switch_stack or thread structure).
336.1221 +		 */
336.1222 +		setfpreg(ld.r1, &fpr_final[0], regs);
336.1223 +		setfpreg(ld.imm, &fpr_final[1], regs);
336.1224 +	}
336.1225 +
336.1226 +	/*
336.1227 +	 * Check for updates: only immediate updates are available for this
336.1228 +	 * instruction.
336.1229 +	 */
336.1230 +	if (ld.m) {
336.1231 +		/*
336.1232 +		 * the immediate is implicit given the ldsz of the operation:
336.1233 +		 * single: 8 (2x4) and for  all others it's 16 (2x8)
336.1234 +		 */
336.1235 +		ifa += len<<1;
336.1236 +
336.1237 +		/*
336.1238 +		 * IMPORTANT:
336.1239 +		 * the fact that we force the NaT of r3 to zero is ONLY valid
336.1240 +		 * as long as we don't come here with a ldfpX.s.
336.1241 +		 * For this reason we keep this sanity check
336.1242 +		 */
336.1243 +		if (ld.x6_op == 1 || ld.x6_op == 3)
336.1244 +			printk(KERN_ERR "%s: register update on speculative load pair, error\n",
336.1245 +			       __FUNCTION__);
336.1246 +
336.1247 +		setreg(ld.r3, ifa, 0, regs);
336.1248 +	}
336.1249 +
336.1250 +	/*
336.1251 +	 * Invalidate ALAT entries, if any, for both registers.
336.1252 +	 */
336.1253 +	if (ld.x6_op == 0x2) {
336.1254 +		invala_fr(ld.r1);
336.1255 +		invala_fr(ld.imm);
336.1256 +	}
336.1257 +	return 0;
336.1258 +}
336.1259 +
336.1260 +
336.1261 +static int
336.1262 +emulate_load_float (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
336.1263 +{
336.1264 +	struct ia64_fpreg fpr_init;
336.1265 +	struct ia64_fpreg fpr_final;
336.1266 +	unsigned long len = float_fsz[ld.x6_sz];
336.1267 +
336.1268 +	/*
336.1269 +	 * fr0 & fr1 don't need to be checked because Illegal Instruction
336.1270 +	 * faults have higher priority than unaligned faults.
336.1271 +	 *
336.1272 +	 * r0 cannot be found as the base as it would never generate an
336.1273 +	 * unaligned reference.
336.1274 +	 */
336.1275 +
336.1276 +	/*
336.1277 +	 * make sure we get clean buffers
336.1278 +	 */
336.1279 +	memset(&fpr_init,0, sizeof(fpr_init));
336.1280 +	memset(&fpr_final,0, sizeof(fpr_final));
336.1281 +
336.1282 +	/*
336.1283 +	 * ldfX.a we don't try to emulate anything but we must
336.1284 +	 * invalidate the ALAT entry.
336.1285 +	 * See comments in ldX for descriptions on how the various loads are handled.
336.1286 +	 */
336.1287 +	if (ld.x6_op != 0x2) {
336.1288 +		if (copy_from_user(&fpr_init, (void __user *) ifa, len))
336.1289 +			return -1;
336.1290 +
336.1291 +		DPRINT("ld.r1=%d x6_sz=%d\n", ld.r1, ld.x6_sz);
336.1292 +		DDUMP("fpr_init =", &fpr_init, len);
336.1293 +		/*
336.1294 +		 * we only do something for x6_op={0,8,9}
336.1295 +		 */
336.1296 +		switch( ld.x6_sz ) {
336.1297 +			case 0:
336.1298 +				mem2float_extended(&fpr_init, &fpr_final);
336.1299 +				break;
336.1300 +			case 1:
336.1301 +				mem2float_integer(&fpr_init, &fpr_final);
336.1302 +				break;
336.1303 +			case 2:
336.1304 +				mem2float_single(&fpr_init, &fpr_final);
336.1305 +				break;
336.1306 +			case 3:
336.1307 +				mem2float_double(&fpr_init, &fpr_final);
336.1308 +				break;
336.1309 +		}
336.1310 +		DDUMP("fpr_final =", &fpr_final, len);
336.1311 +		/*
336.1312 +		 * XXX fixme
336.1313 +		 *
336.1314 +		 * A possible optimization would be to drop fpr_final and directly
336.1315 +		 * use the storage from the saved context i.e., the actual final
336.1316 +		 * destination (pt_regs, switch_stack or thread structure).
336.1317 +		 */
336.1318 +		setfpreg(ld.r1, &fpr_final, regs);
336.1319 +	}
336.1320 +
336.1321 +	/*
336.1322 +	 * check for updates on any loads
336.1323 +	 */
336.1324 +	if (ld.op == 0x7 || ld.m)
336.1325 +		emulate_load_updates(ld.op == 0x7 ? UPD_IMMEDIATE: UPD_REG, ld, regs, ifa);
336.1326 +
336.1327 +	/*
336.1328 +	 * invalidate ALAT entry in case of advanced floating point loads
336.1329 +	 */
336.1330 +	if (ld.x6_op == 0x2)
336.1331 +		invala_fr(ld.r1);
336.1332 +
336.1333 +	return 0;
336.1334 +}
336.1335 +
336.1336 +
336.1337 +static int
336.1338 +emulate_store_float (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
336.1339 +{
336.1340 +	struct ia64_fpreg fpr_init;
336.1341 +	struct ia64_fpreg fpr_final;
336.1342 +	unsigned long len = float_fsz[ld.x6_sz];
336.1343 +
336.1344 +	/*
336.1345 +	 * make sure we get clean buffers
336.1346 +	 */
336.1347 +	memset(&fpr_init,0, sizeof(fpr_init));
336.1348 +	memset(&fpr_final,0, sizeof(fpr_final));
336.1349 +
336.1350 +	/*
336.1351 +	 * if we get to this handler, Nat bits on both r3 and r2 have already
336.1352 +	 * been checked. so we don't need to do it
336.1353 +	 *
336.1354 +	 * extract the value to be stored
336.1355 +	 */
336.1356 +	getfpreg(ld.imm, &fpr_init, regs);
336.1357 +	/*
336.1358 +	 * during this step, we extract the spilled registers from the saved
336.1359 +	 * context i.e., we refill. Then we store (no spill) to temporary
336.1360 +	 * aligned location
336.1361 +	 */
336.1362 +	switch( ld.x6_sz ) {
336.1363 +		case 0:
336.1364 +			float2mem_extended(&fpr_init, &fpr_final);
336.1365 +			break;
336.1366 +		case 1:
336.1367 +			float2mem_integer(&fpr_init, &fpr_final);
336.1368 +			break;
336.1369 +		case 2:
336.1370 +			float2mem_single(&fpr_init, &fpr_final);
336.1371 +			break;
336.1372 +		case 3:
336.1373 +			float2mem_double(&fpr_init, &fpr_final);
336.1374 +			break;
336.1375 +	}
336.1376 +	DPRINT("ld.r1=%d x6_sz=%d\n", ld.r1, ld.x6_sz);
336.1377 +	DDUMP("fpr_init =", &fpr_init, len);
336.1378 +	DDUMP("fpr_final =", &fpr_final, len);
336.1379 +
336.1380 +	if (copy_to_user((void __user *) ifa, &fpr_final, len))
336.1381 +		return -1;
336.1382 +
336.1383 +	/*
336.1384 +	 * stfX [r3]=r2,imm(9)
336.1385 +	 *
336.1386 +	 * NOTE:
336.1387 +	 * ld.r3 can never be r0, because r0 would not generate an
336.1388 +	 * unaligned access.
336.1389 +	 */
336.1390 +	if (ld.op == 0x7) {
336.1391 +		unsigned long imm;
336.1392 +
336.1393 +		/*
336.1394 +		 * form imm9: [12:6] contain first 7bits
336.1395 +		 */
336.1396 +		imm = ld.x << 7 | ld.r1;
336.1397 +		/*
336.1398 +		 * sign extend (8bits) if m set
336.1399 +		 */
336.1400 +		if (ld.m)
336.1401 +			imm |= SIGN_EXT9;
336.1402 +		/*
336.1403 +		 * ifa == r3 (NaT is necessarily cleared)
336.1404 +		 */
336.1405 +		ifa += imm;
336.1406 +
336.1407 +		DPRINT("imm=%lx r3=%lx\n", imm, ifa);
336.1408 +
336.1409 +		setreg(ld.r3, ifa, 0, regs);
336.1410 +	}
336.1411 +	/*
336.1412 +	 * we don't have alat_invalidate_multiple() so we need
336.1413 +	 * to do the complete flush :-<<
336.1414 +	 */
336.1415 +	ia64_invala();
336.1416 +
336.1417 +	return 0;
336.1418 +}
336.1419 +
336.1420 +/*
336.1421 + * Make sure we log the unaligned access, so that user/sysadmin can notice it and
336.1422 + * eventually fix the program.  However, we don't want to do that for every access so we
336.1423 + * pace it with jiffies.  This isn't really MP-safe, but it doesn't really have to be
336.1424 + * either...
336.1425 + */
336.1426 +static int
336.1427 +within_logging_rate_limit (void)
336.1428 +{
336.1429 +	static unsigned long count, last_time;
336.1430 +
336.1431 +	if (jiffies - last_time > 5*HZ)
336.1432 +		count = 0;
336.1433 +	if (++count < 5) {
336.1434 +		last_time = jiffies;
336.1435 +		return 1;
336.1436 +	}
336.1437 +	return 0;
336.1438 +
336.1439 +}
336.1440 +
336.1441 +void
336.1442 +ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs)
336.1443 +{
336.1444 +#ifdef XEN
336.1445 +printk("ia64_handle_unaligned: called, not working yet\n");
336.1446 +#else
336.1447 +	struct ia64_psr *ipsr = ia64_psr(regs);
336.1448 +	mm_segment_t old_fs = get_fs();
336.1449 +	unsigned long bundle[2];
336.1450 +	unsigned long opcode;
336.1451 +	struct siginfo si;
336.1452 +	const struct exception_table_entry *eh = NULL;
336.1453 +	union {
336.1454 +		unsigned long l;
336.1455 +		load_store_t insn;
336.1456 +	} u;
336.1457 +	int ret = -1;
336.1458 +
336.1459 +	if (ia64_psr(regs)->be) {
336.1460 +		/* we don't support big-endian accesses */
336.1461 +		die_if_kernel("big-endian unaligned accesses are not supported", regs, 0);
336.1462 +		goto force_sigbus;
336.1463 +	}
336.1464 +
336.1465 +	/*
336.1466 +	 * Treat kernel accesses for which there is an exception handler entry the same as
336.1467 +	 * user-level unaligned accesses.  Otherwise, a clever program could trick this
336.1468 +	 * handler into reading an arbitrary kernel addresses...
336.1469 +	 */
336.1470 +	if (!user_mode(regs))
336.1471 +		eh = search_exception_tables(regs->cr_iip + ia64_psr(regs)->ri);
336.1472 +	if (user_mode(regs) || eh) {
336.1473 +		if ((current->thread.flags & IA64_THREAD_UAC_SIGBUS) != 0)
336.1474 +			goto force_sigbus;
336.1475 +
336.1476 +		if (!(current->thread.flags & IA64_THREAD_UAC_NOPRINT)
336.1477 +		    && within_logging_rate_limit())
336.1478 +		{
336.1479 +			char buf[200];	/* comm[] is at most 16 bytes... */
336.1480 +			size_t len;
336.1481 +
336.1482 +			len = sprintf(buf, "%s(%d): unaligned access to 0x%016lx, "
336.1483 +				      "ip=0x%016lx\n\r", current->comm, current->pid,
336.1484 +				      ifa, regs->cr_iip + ipsr->ri);
336.1485 +			/*
336.1486 +			 * Don't call tty_write_message() if we're in the kernel; we might
336.1487 +			 * be holding locks...
336.1488 +			 */
336.1489 +			if (user_mode(regs))
336.1490 +				tty_write_message(current->signal->tty, buf);
336.1491 +			buf[len-1] = '\0';	/* drop '\r' */
336.1492 +			printk(KERN_WARNING "%s", buf);	/* watch for command names containing %s */
336.1493 +		}
336.1494 +	} else {
336.1495 +		if (within_logging_rate_limit())
336.1496 +			printk(KERN_WARNING "kernel unaligned access to 0x%016lx, ip=0x%016lx\n",
336.1497 +			       ifa, regs->cr_iip + ipsr->ri);
336.1498 +		set_fs(KERNEL_DS);
336.1499 +	}
336.1500 +
336.1501 +	DPRINT("iip=%lx ifa=%lx isr=%lx (ei=%d, sp=%d)\n",
336.1502 +	       regs->cr_iip, ifa, regs->cr_ipsr, ipsr->ri, ipsr->it);
336.1503 +
336.1504 +	if (__copy_from_user(bundle, (void __user *) regs->cr_iip, 16))
336.1505 +		goto failure;
336.1506 +
336.1507 +	/*
336.1508 +	 * extract the instruction from the bundle given the slot number
336.1509 +	 */
336.1510 +	switch (ipsr->ri) {
336.1511 +	      case 0: u.l = (bundle[0] >>  5); break;
336.1512 +	      case 1: u.l = (bundle[0] >> 46) | (bundle[1] << 18); break;
336.1513 +	      case 2: u.l = (bundle[1] >> 23); break;
336.1514 +	}
336.1515 +	opcode = (u.l >> IA64_OPCODE_SHIFT) & IA64_OPCODE_MASK;
336.1516 +
336.1517 +	DPRINT("opcode=%lx ld.qp=%d ld.r1=%d ld.imm=%d ld.r3=%d ld.x=%d ld.hint=%d "
336.1518 +	       "ld.x6=0x%x ld.m=%d ld.op=%d\n", opcode, u.insn.qp, u.insn.r1, u.insn.imm,
336.1519 +	       u.insn.r3, u.insn.x, u.insn.hint, u.insn.x6_sz, u.insn.m, u.insn.op);
336.1520 +
336.1521 +	/*
336.1522 +	 * IMPORTANT:
336.1523 +	 * Notice that the switch statement DOES not cover all possible instructions
336.1524 +	 * that DO generate unaligned references. This is made on purpose because for some
336.1525 +	 * instructions it DOES NOT make sense to try and emulate the access. Sometimes it
336.1526 +	 * is WRONG to try and emulate. Here is a list of instruction we don't emulate i.e.,
336.1527 +	 * the program will get a signal and die:
336.1528 +	 *
336.1529 +	 *	load/store:
336.1530 +	 *		- ldX.spill
336.1531 +	 *		- stX.spill
336.1532 +	 *	Reason: RNATs are based on addresses
336.1533 +	 *
336.1534 +	 *	synchronization:
336.1535 +	 *		- cmpxchg
336.1536 +	 *		- fetchadd
336.1537 +	 *		- xchg
336.1538 +	 *	Reason: ATOMIC operations cannot be emulated properly using multiple
336.1539 +	 *	        instructions.
336.1540 +	 *
336.1541 +	 *	speculative loads:
336.1542 +	 *		- ldX.sZ
336.1543 +	 *	Reason: side effects, code must be ready to deal with failure so simpler
336.1544 +	 *		to let the load fail.
336.1545 +	 * ---------------------------------------------------------------------------------
336.1546 +	 * XXX fixme
336.1547 +	 *
336.1548 +	 * I would like to get rid of this switch case and do something
336.1549 +	 * more elegant.
336.1550 +	 */
336.1551 +	switch (opcode) {
336.1552 +	      case LDS_OP:
336.1553 +	      case LDSA_OP:
336.1554 +	      case LDS_IMM_OP:
336.1555 +	      case LDSA_IMM_OP:
336.1556 +	      case LDFS_OP:
336.1557 +	      case LDFSA_OP:
336.1558 +	      case LDFS_IMM_OP:
336.1559 +		/*
336.1560 +		 * The instruction will be retried with deferred exceptions turned on, and
336.1561 +		 * we should get Nat bit installed
336.1562 +		 *
336.1563 +		 * IMPORTANT: When PSR_ED is set, the register & immediate update forms
336.1564 +		 * are actually executed even though the operation failed. So we don't
336.1565 +		 * need to take care of this.
336.1566 +		 */
336.1567 +		DPRINT("forcing PSR_ED\n");
336.1568 +		regs->cr_ipsr |= IA64_PSR_ED;
336.1569 +		goto done;
336.1570 +
336.1571 +	      case LD_OP:
336.1572 +	      case LDA_OP:
336.1573 +	      case LDBIAS_OP:
336.1574 +	      case LDACQ_OP:
336.1575 +	      case LDCCLR_OP:
336.1576 +	      case LDCNC_OP:
336.1577 +	      case LDCCLRACQ_OP:
336.1578 +	      case LD_IMM_OP:
336.1579 +	      case LDA_IMM_OP:
336.1580 +	      case LDBIAS_IMM_OP:
336.1581 +	      case LDACQ_IMM_OP:
336.1582 +	      case LDCCLR_IMM_OP:
336.1583 +	      case LDCNC_IMM_OP:
336.1584 +	      case LDCCLRACQ_IMM_OP:
336.1585 +		ret = emulate_load_int(ifa, u.insn, regs);
336.1586 +		break;
336.1587 +
336.1588 +	      case ST_OP:
336.1589 +	      case STREL_OP:
336.1590 +	      case ST_IMM_OP:
336.1591 +	      case STREL_IMM_OP:
336.1592 +		ret = emulate_store_int(ifa, u.insn, regs);
336.1593 +		break;
336.1594 +
336.1595 +	      case LDF_OP:
336.1596 +	      case LDFA_OP:
336.1597 +	      case LDFCCLR_OP:
336.1598 +	      case LDFCNC_OP:
336.1599 +	      case LDF_IMM_OP:
336.1600 +	      case LDFA_IMM_OP:
336.1601 +	      case LDFCCLR_IMM_OP:
336.1602 +	      case LDFCNC_IMM_OP:
336.1603 +		if (u.insn.x)
336.1604 +			ret = emulate_load_floatpair(ifa, u.insn, regs);
336.1605 +		else
336.1606 +			ret = emulate_load_float(ifa, u.insn, regs);
336.1607 +		break;
336.1608 +
336.1609 +	      case STF_OP:
336.1610 +	      case STF_IMM_OP:
336.1611 +		ret = emulate_store_float(ifa, u.insn, regs);
336.1612 +		break;
336.1613 +
336.1614 +	      default:
336.1615 +		goto failure;
336.1616 +	}
336.1617 +	DPRINT("ret=%d\n", ret);
336.1618 +	if (ret)
336.1619 +		goto failure;
336.1620 +
336.1621 +	if (ipsr->ri == 2)
336.1622 +		/*
336.1623 +		 * given today's architecture this case is not likely to happen because a
336.1624 +		 * memory access instruction (M) can never be in the last slot of a
336.1625 +		 * bundle. But let's keep it for now.
336.1626 +		 */
336.1627 +		regs->cr_iip += 16;
336.1628 +	ipsr->ri = (ipsr->ri + 1) & 0x3;
336.1629 +
336.1630 +	DPRINT("ipsr->ri=%d iip=%lx\n", ipsr->ri, regs->cr_iip);
336.1631 +  done:
336.1632 +	set_fs(old_fs);		/* restore original address limit */
336.1633 +	return;
336.1634 +
336.1635 +  failure:
336.1636 +	/* something went wrong... */
336.1637 +	if (!user_mode(regs)) {
336.1638 +		if (eh) {
336.1639 +			ia64_handle_exception(regs, eh);
336.1640 +			goto done;
336.1641 +		}
336.1642 +		die_if_kernel("error during unaligned kernel access\n", regs, ret);
336.1643 +		/* NOT_REACHED */
336.1644 +	}
336.1645 +  force_sigbus:
336.1646 +	si.si_signo = SIGBUS;
336.1647 +	si.si_errno = 0;
336.1648 +	si.si_code = BUS_ADRALN;
336.1649 +	si.si_addr = (void __user *) ifa;
336.1650 +	si.si_flags = 0;
336.1651 +	si.si_isr = 0;
336.1652 +	si.si_imm = 0;
336.1653 +	force_sig_info(SIGBUS, &si, current);
336.1654 +	goto done;
336.1655 +#endif
336.1656 +}
   337.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
   337.2 +++ b/xen/arch/ia64/linux/cmdline.c	Tue Aug 16 12:15:23 2005 +0800
   337.3 @@ -0,0 +1,120 @@
   337.4 +/*
   337.5 + * linux/lib/cmdline.c
   337.6 + * Helper functions generally used for parsing kernel command line
   337.7 + * and module options.
   337.8 + *
   337.9 + * Code and copyrights come from init/main.c and arch/i386/kernel/setup.c.
  337.10 + *
  337.11 + * This source code is licensed under the GNU General Public License,
  337.12 + * Version 2.  See the file COPYING for more details.
  337.13 + *
  337.14 + * GNU Indent formatting options for this file: -kr -i8 -npsl -pcs
  337.15 + *
  337.16 + */
  337.17 +
  337.18 +#include <linux/module.h>
  337.19 +#include <linux/kernel.h>
  337.20 +#include <linux/string.h>
  337.21 +
  337.22 +
  337.23 +/**
  337.24 + *	get_option - Parse integer from an option string
  337.25 + *	@str: option string
  337.26 + *	@pint: (output) integer value parsed from @str
  337.27 + *
  337.28 + *	Read an int from an option string; if available accept a subsequent
  337.29 + *	comma as well.
  337.30 + *
  337.31 + *	Return values:
  337.32 + *	0 : no int in string
  337.33 + *	1 : int found, no subsequent comma
  337.34 + *	2 : int found including a subsequent comma
  337.35 + */
  337.36 +
  337.37 +int get_option (char **str, int *pint)
  337.38 +{
  337.39 +	char *cur = *str;
  337.40 +
  337.41 +	if (!cur || !(*cur))
  337.42 +		return 0;
  337.43 +	*pint = simple_strtol (cur, str, 0);
  337.44 +	if (cur == *str)
  337.45 +		return 0;
  337.46 +	if (**str == ',') {
  337.47 +		(*str)++;
  337.48 +		return 2;
  337.49 +	}
  337.50 +
  337.51 +	return 1;
  337.52 +}
  337.53 +
  337.54 +/**
  337.55 + *	get_options - Parse a string into a list of integers
  337.56 + *	@str: String to be parsed
  337.57 + *	@nints: size of integer array
  337.58 + *	@ints: integer array
  337.59 + *
  337.60 + *	This function parses a string containing a comma-separated
  337.61 + *	list of integers.  The parse halts when the array is
  337.62 + *	full, or when no more numbers can be retrieved from the
  337.63 + *	string.
  337.64 + *
  337.65 + *	Return value is the character in the string which caused
  337.66 + *	the parse to end (typically a null terminator, if @str is
  337.67 + *	completely parseable).
  337.68 + */
  337.69 + 
  337.70 +char *get_options(const char *str, int nints, int *ints)
  337.71 +{
  337.72 +	int res, i = 1;
  337.73 +
  337.74 +	while (i < nints) {
  337.75 +		res = get_option ((char **)&str, ints + i);
  337.76 +		if (res == 0)
  337.77 +			break;
  337.78 +		i++;
  337.79 +		if (res == 1)
  337.80 +			break;
  337.81 +	}
  337.82 +	ints[0] = i - 1;
  337.83 +	return (char *)str;
  337.84 +}
  337.85 +
  337.86 +/**
  337.87 + *	memparse - parse a string with mem suffixes into a number
  337.88 + *	@ptr: Where parse begins
  337.89 + *	@retptr: (output) Pointer to next char after parse completes
  337.90 + *
  337.91 + *	Parses a string into a number.  The number stored at @ptr is
  337.92 + *	potentially suffixed with %K (for kilobytes, or 1024 bytes),
  337.93 + *	%M (for megabytes, or 1048576 bytes), or %G (for gigabytes, or
  337.94 + *	1073741824).  If the number is suffixed with K, M, or G, then
  337.95 + *	the return value is the number multiplied by one kilobyte, one
  337.96 + *	megabyte, or one gigabyte, respectively.
  337.97 + */
  337.98 +
  337.99 +unsigned long long memparse (char *ptr, char **retptr)
 337.100 +{
 337.101 +	unsigned long long ret = simple_strtoull (ptr, retptr, 0);
 337.102 +
 337.103 +	switch (**retptr) {
 337.104 +	case 'G':
 337.105 +	case 'g':
 337.106 +		ret <<= 10;
 337.107 +	case 'M':
 337.108 +	case 'm':
 337.109 +		ret <<= 10;
 337.110 +	case 'K':
 337.111 +	case 'k':
 337.112 +		ret <<= 10;
 337.113 +		(*retptr)++;
 337.114 +	default:
 337.115 +		break;
 337.116 +	}
 337.117 +	return ret;
 337.118 +}
 337.119 +
 337.120 +
 337.121 +EXPORT_SYMBOL(memparse);
 337.122 +EXPORT_SYMBOL(get_option);
 337.123 +EXPORT_SYMBOL(get_options);
   338.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
   338.2 +++ b/xen/arch/ia64/linux/efi_stub.S	Tue Aug 16 12:15:23 2005 +0800
   338.3 @@ -0,0 +1,86 @@
   338.4 +/*
   338.5 + * EFI call stub.
   338.6 + *
   338.7 + * Copyright (C) 1999-2001 Hewlett-Packard Co
   338.8 + *	David Mosberger <davidm@hpl.hp.com>
   338.9 + *
  338.10 + * This stub allows us to make EFI calls in physical mode with interrupts
  338.11 + * turned off.  We need this because we can't call SetVirtualMap() until
  338.12 + * the kernel has booted far enough to allow allocation of struct vma_struct
  338.13 + * entries (which we would need to map stuff with memory attributes other
  338.14 + * than uncached or writeback...).  Since the GetTime() service gets called
  338.15 + * earlier than that, we need to be able to make physical mode EFI calls from
  338.16 + * the kernel.
  338.17 + */
  338.18 +
  338.19 +/*
  338.20 + * PSR settings as per SAL spec (Chapter 8 in the "IA-64 System
  338.21 + * Abstraction Layer Specification", revision 2.6e).  Note that
  338.22 + * psr.dfl and psr.dfh MUST be cleared, despite what this manual says.
  338.23 + * Otherwise, SAL dies whenever it's trying to do an IA-32 BIOS call
  338.24 + * (the br.ia instruction fails unless psr.dfl and psr.dfh are
  338.25 + * cleared).  Fortunately, SAL promises not to touch the floating
  338.26 + * point regs, so at least we don't have to save f2-f127.
  338.27 + */
  338.28 +#define PSR_BITS_TO_CLEAR						\
  338.29 +	(IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT | IA64_PSR_RT |		\
  338.30 +	 IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED |	\
  338.31 +	 IA64_PSR_DFL | IA64_PSR_DFH)
  338.32 +
  338.33 +#define PSR_BITS_TO_SET							\
  338.34 +	(IA64_PSR_BN)
  338.35 +
  338.36 +#include <asm/processor.h>
  338.37 +#include <asm/asmmacro.h>
  338.38 +
  338.39 +/*
  338.40 + * Inputs:
  338.41 + *	in0 = address of function descriptor of EFI routine to call
  338.42 + *	in1..in7 = arguments to routine
  338.43 + *
  338.44 + * Outputs:
  338.45 + *	r8 = EFI_STATUS returned by called function
  338.46 + */
  338.47 +
  338.48 +GLOBAL_ENTRY(efi_call_phys)
  338.49 +	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
  338.50 +	alloc loc1=ar.pfs,8,7,7,0
  338.51 +	ld8 r2=[in0],8			// load EFI function's entry point
  338.52 +	mov loc0=rp
  338.53 +	.body
  338.54 +	;;
  338.55 +	mov loc2=gp			// save global pointer
  338.56 +	mov loc4=ar.rsc			// save RSE configuration
  338.57 +	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
  338.58 +	;;
  338.59 +	ld8 gp=[in0]			// load EFI function's global pointer
  338.60 +	movl r16=PSR_BITS_TO_CLEAR
  338.61 +	mov loc3=psr			// save processor status word
  338.62 +	movl r17=PSR_BITS_TO_SET
  338.63 +	;;
  338.64 +	or loc3=loc3,r17
  338.65 +	mov b6=r2
  338.66 +	;;
  338.67 +	andcm r16=loc3,r16		// get psr with IT, DT, and RT bits cleared
  338.68 +	br.call.sptk.many rp=ia64_switch_mode_phys
  338.69 +.ret0:	mov out4=in5
  338.70 +	mov out0=in1
  338.71 +	mov out1=in2
  338.72 +	mov out2=in3
  338.73 +	mov out3=in4
  338.74 +	mov out5=in6
  338.75 +	mov out6=in7
  338.76 +	mov loc5=r19
  338.77 +	mov loc6=r20
  338.78 +	br.call.sptk.many rp=b6		// call the EFI function
  338.79 +.ret1:	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
  338.80 +	mov r16=loc3
  338.81 +	mov r19=loc5
  338.82 +	mov r20=loc6
  338.83 +	br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode
  338.84 +.ret2:	mov ar.rsc=loc4			// restore RSE configuration
  338.85 +	mov ar.pfs=loc1
  338.86 +	mov rp=loc0
  338.87 +	mov gp=loc2
  338.88 +	br.ret.sptk.many rp
  338.89 +END(efi_call_phys)
   339.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
   339.2 +++ b/xen/arch/ia64/linux/extable.c	Tue Aug 16 12:15:23 2005 +0800
   339.3 @@ -0,0 +1,93 @@
   339.4 +/*
   339.5 + * Kernel exception handling table support.  Derived from arch/alpha/mm/extable.c.
   339.6 + *
   339.7 + * Copyright (C) 1998, 1999, 2001-2002, 2004 Hewlett-Packard Co
   339.8 + *	David Mosberger-Tang <davidm@hpl.hp.com>
   339.9 + */
  339.10 +
  339.11 +#include <linux/config.h>
  339.12 +
  339.13 +#include <asm/uaccess.h>
  339.14 +#include <asm/module.h>
  339.15 +
  339.16 +static inline int
  339.17 +compare_entries (struct exception_table_entry *l, struct exception_table_entry *r)
  339.18 +{
  339.19 +	u64 lip = (u64) &l->addr + l->addr;
  339.20 +	u64 rip = (u64) &r->addr + r->addr;
  339.21 +
  339.22 +	if (lip < rip)
  339.23 +		return -1;
  339.24 +	if (lip == rip)
  339.25 +		return 0;
  339.26 +	else
  339.27 +		return 1;
  339.28 +}
  339.29 +
  339.30 +static inline void
  339.31 +swap_entries (struct exception_table_entry *l, struct exception_table_entry *r)
  339.32 +{
  339.33 +	u64 delta = (u64) r - (u64) l;
  339.34 +	struct exception_table_entry tmp;
  339.35 +
  339.36 +	tmp = *l;
  339.37 +	l->addr = r->addr + delta;
  339.38 +	l->cont = r->cont + delta;
  339.39 +	r->addr = tmp.addr - delta;
  339.40 +	r->cont = tmp.cont - delta;
  339.41 +}
  339.42 +
  339.43 +/*
  339.44 + * Sort the exception table.  It's usually already sorted, but there may be unordered
  339.45 + * entries due to multiple text sections (such as the .init text section).  Note that the
  339.46 + * exception-table-entries contain location-relative addresses, which requires a bit of
  339.47 + * care during sorting to avoid overflows in the offset members (e.g., it would not be
  339.48 + * safe to make a temporary copy of an exception-table entry on the stack, because the
  339.49 + * stack may be more than 2GB away from the exception-table).
  339.50 + */
  339.51 +void
  339.52 +sort_extable (struct exception_table_entry *start, struct exception_table_entry *finish)
  339.53 +{
  339.54 +	struct exception_table_entry *p, *q;
  339.55 +
  339.56 + 	/* insertion sort */
  339.57 +	for (p = start + 1; p < finish; ++p)
  339.58 +		/* start .. p-1 is sorted; push p down to it's proper place */
  339.59 +		for (q = p; q > start && compare_entries(&q[0], &q[-1]) < 0; --q)
  339.60 +			swap_entries(&q[0], &q[-1]);
  339.61 +}
  339.62 +
  339.63 +const struct exception_table_entry *
  339.64 +search_extable (const struct exception_table_entry *first,
  339.65 +		const struct exception_table_entry *last,
  339.66 +		unsigned long ip)
  339.67 +{
  339.68 +	const struct exception_table_entry *mid;
  339.69 +	unsigned long mid_ip;
  339.70 +	long diff;
  339.71 +
  339.72 +        while (first <= last) {
  339.73 +		mid = &first[(last - first)/2];
  339.74 +		mid_ip = (u64) &mid->addr + mid->addr;
  339.75 +		diff = mid_ip - ip;
  339.76 +                if (diff == 0)
  339.77 +                        return mid;
  339.78 +                else if (diff < 0)
  339.79 +                        first = mid + 1;
  339.80 +                else
  339.81 +                        last = mid - 1;
  339.82 +        }
  339.83 +        return NULL;
  339.84 +}
  339.85 +
  339.86 +void
  339.87 +ia64_handle_exception (struct pt_regs *regs, const struct exception_table_entry *e)
  339.88 +{
  339.89 +	long fix = (u64) &e->cont + e->cont;
  339.90 +
  339.91 +	regs->r8 = -EFAULT;
  339.92 +	if (fix & 4)
  339.93 +		regs->r9 = 0;
  339.94 +	regs->cr_iip = fix & ~0xf;
  339.95 +	ia64_psr(regs)->ri = fix & 0x3;		/* set continuation slot number */
  339.96 +}
   340.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
   340.2 +++ b/xen/arch/ia64/linux/hpsim.S	Tue Aug 16 12:15:23 2005 +0800
   340.3 @@ -0,0 +1,10 @@
   340.4 +#include <asm/asmmacro.h>
   340.5 +
   340.6 +/*
   340.7 + * Simulator system call.
   340.8 + */
   340.9 +GLOBAL_ENTRY(ia64_ssc)
  340.10 +	mov r15=r36
  340.11 +	break 0x80001
  340.12 +	br.ret.sptk.many rp
  340.13 +END(ia64_ssc)
   341.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
   341.2 +++ b/xen/arch/ia64/linux/ia64_ksyms.c	Tue Aug 16 12:15:23 2005 +0800
   341.3 @@ -0,0 +1,127 @@
   341.4 +/*
   341.5 + * Architecture-specific kernel symbols
   341.6 + *
   341.7 + * Don't put any exports here unless it's defined in an assembler file.
   341.8 + * All other exports should be put directly after the definition.
   341.9 + */
  341.10 +
  341.11 +#include <linux/config.h>
  341.12 +#include <linux/module.h>
  341.13 +
  341.14 +#include <linux/string.h>
  341.15 +EXPORT_SYMBOL(memset);
  341.16 +EXPORT_SYMBOL(memchr);
  341.17 +EXPORT_SYMBOL(memcmp);
  341.18 +EXPORT_SYMBOL(memcpy);
  341.19 +EXPORT_SYMBOL(memmove);
  341.20 +EXPORT_SYMBOL(memscan);
  341.21 +EXPORT_SYMBOL(strcat);
  341.22 +EXPORT_SYMBOL(strchr);
  341.23 +EXPORT_SYMBOL(strcmp);
  341.24 +EXPORT_SYMBOL(strcpy);
  341.25 +EXPORT_SYMBOL(strlen);
  341.26 +EXPORT_SYMBOL(strncat);
  341.27 +EXPORT_SYMBOL(strncmp);
  341.28 +EXPORT_SYMBOL(strncpy);
  341.29 +EXPORT_SYMBOL(strnlen);
  341.30 +EXPORT_SYMBOL(strrchr);
  341.31 +EXPORT_SYMBOL(strstr);
  341.32 +EXPORT_SYMBOL(strpbrk);
  341.33 +
  341.34 +#include <asm/checksum.h>
  341.35 +EXPORT_SYMBOL(ip_fast_csum);		/* hand-coded assembly */
  341.36 +
  341.37 +#include <asm/semaphore.h>
  341.38 +EXPORT_SYMBOL(__down);
  341.39 +EXPORT_SYMBOL(__down_interruptible);
  341.40 +EXPORT_SYMBOL(__down_trylock);
  341.41 +EXPORT_SYMBOL(__up);
  341.42 +
  341.43 +#include <asm/page.h>
  341.44 +EXPORT_SYMBOL(clear_page);
  341.45 +
  341.46 +#ifdef CONFIG_VIRTUAL_MEM_MAP
  341.47 +#include <linux/bootmem.h>
  341.48 +EXPORT_SYMBOL(max_low_pfn);	/* defined by bootmem.c, but not exported by generic code */
  341.49 +#endif
  341.50 +
  341.51 +#include <asm/processor.h>
  341.52 +EXPORT_SYMBOL(per_cpu__cpu_info);
  341.53 +#ifdef CONFIG_SMP
  341.54 +EXPORT_SYMBOL(per_cpu__local_per_cpu_offset);
  341.55 +#endif
  341.56 +
  341.57 +#include <asm/uaccess.h>
  341.58 +EXPORT_SYMBOL(__copy_user);
  341.59 +EXPORT_SYMBOL(__do_clear_user);
  341.60 +EXPORT_SYMBOL(__strlen_user);
  341.61 +EXPORT_SYMBOL(__strncpy_from_user);
  341.62 +EXPORT_SYMBOL(__strnlen_user);
  341.63 +
  341.64 +#include <asm/unistd.h>
  341.65 +EXPORT_SYMBOL(__ia64_syscall);
  341.66 +
  341.67 +/* from arch/ia64/lib */
  341.68 +extern void __divsi3(void);
  341.69 +extern void __udivsi3(void);
  341.70 +extern void __modsi3(void);
  341.71 +extern void __umodsi3(void);
  341.72 +extern void __divdi3(void);
  341.73 +extern void __udivdi3(void);
  341.74 +extern void __moddi3(void);
  341.75 +extern void __umoddi3(void);
  341.76 +
  341.77 +EXPORT_SYMBOL(__divsi3);
  341.78 +EXPORT_SYMBOL(__udivsi3);
  341.79 +EXPORT_SYMBOL(__modsi3);
  341.80 +EXPORT_SYMBOL(__umodsi3);
  341.81 +EXPORT_SYMBOL(__divdi3);
  341.82 +EXPORT_SYMBOL(__udivdi3);
  341.83 +EXPORT_SYMBOL(__moddi3);
  341.84 +EXPORT_SYMBOL(__umoddi3);
  341.85 +
  341.86 +#if defined(CONFIG_MD_RAID5) || defined(CONFIG_MD_RAID5_MODULE)
  341.87 +extern void xor_ia64_2(void);
  341.88 +extern void xor_ia64_3(void);
  341.89 +extern void xor_ia64_4(void);
  341.90 +extern void xor_ia64_5(void);
  341.91 +
  341.92 +EXPORT_SYMBOL(xor_ia64_2);
  341.93 +EXPORT_SYMBOL(xor_ia64_3);
  341.94 +EXPORT_SYMBOL(xor_ia64_4);
  341.95 +EXPORT_SYMBOL(xor_ia64_5);
  341.96 +#endif
  341.97 +
  341.98 +#include <asm/pal.h>
  341.99 +EXPORT_SYMBOL(ia64_pal_call_phys_stacked);
 341.100 +EXPORT_SYMBOL(ia64_pal_call_phys_static);
 341.101 +EXPORT_SYMBOL(ia64_pal_call_stacked);
 341.102 +EXPORT_SYMBOL(ia64_pal_call_static);
 341.103 +EXPORT_SYMBOL(ia64_load_scratch_fpregs);
 341.104 +EXPORT_SYMBOL(ia64_save_scratch_fpregs);
 341.105 +
 341.106 +#include <asm/unwind.h>
 341.107 +EXPORT_SYMBOL(unw_init_running);
 341.108 +
 341.109 +#ifdef ASM_SUPPORTED
 341.110 +# ifdef CONFIG_SMP
 341.111 +#  if __GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 3)
 341.112 +/*
 341.113 + * This is not a normal routine and we don't want a function descriptor for it, so we use
 341.114 + * a fake declaration here.
 341.115 + */
 341.116 +extern char ia64_spinlock_contention_pre3_4;
 341.117 +EXPORT_SYMBOL(ia64_spinlock_contention_pre3_4);
 341.118 +#  else
 341.119 +/*
 341.120 + * This is not a normal routine and we don't want a function descriptor for it, so we use
 341.121 + * a fake declaration here.
 341.122 + */
 341.123 +extern char ia64_spinlock_contention;
 341.124 +EXPORT_SYMBOL(ia64_spinlock_contention);
 341.125 +#  endif
 341.126 +# endif
 341.127 +#endif
 341.128 +
 341.129 +extern char ia64_ivt[];
 341.130 +EXPORT_SYMBOL(ia64_ivt);
   342.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
   342.2 +++ b/xen/arch/ia64/linux/irq_lsapic.c	Tue Aug 16 12:15:23 2005 +0800
   342.3 @@ -0,0 +1,37 @@
   342.4 +/*
   342.5 + * LSAPIC Interrupt Controller
   342.6 + *
   342.7 + * This takes care of interrupts that are generated by the CPU's
   342.8 + * internal Streamlined Advanced Programmable Interrupt Controller
   342.9 + * (LSAPIC), such as the ITC and IPI interrupts.
  342.10 +    *
  342.11 + * Copyright (C) 1999 VA Linux Systems
  342.12 + * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
  342.13 + * Copyright (C) 2000 Hewlett-Packard Co
  342.14 + * Copyright (C) 2000 David Mosberger-Tang <davidm@hpl.hp.com>
  342.15 + */
  342.16 +
  342.17 +#include <linux/sched.h>
  342.18 +#include <linux/irq.h>
  342.19 +
  342.20 +static unsigned int
  342.21 +lsapic_noop_startup (unsigned int irq)
  342.22 +{
  342.23 +	return 0;
  342.24 +}
  342.25 +
  342.26 +static void
  342.27 +lsapic_noop (unsigned int irq)
  342.28 +{
  342.29 +	/* nuthing to do... */
  342.30 +}
  342.31 +
  342.32 +struct hw_interrupt_type irq_type_ia64_lsapic = {
  342.33 +	.typename =	"LSAPIC",
  342.34 +	.startup =	lsapic_noop_startup,
  342.35 +	.shutdown =	lsapic_noop,
  342.36 +	.enable =	lsapic_noop,
  342.37 +	.disable =	lsapic_noop,
  342.38 +	.ack =		lsapic_noop,
  342.39 +	.end =		lsapic_noop
  342.40 +};
   343.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
   343.2 +++ b/xen/arch/ia64/linux/lib/Makefile	Tue Aug 16 12:15:23 2005 +0800
   343.3 @@ -0,0 +1,44 @@
   343.4 +#
   343.5 +# Makefile for ia64-specific library routines..
   343.6 +#
   343.7 +
   343.8 +include $(BASEDIR)/Rules.mk
   343.9 +
  343.10 +OBJS := __divsi3.o __udivsi3.o __modsi3.o __umodsi3.o			\
  343.11 +	__divdi3.o __udivdi3.o __moddi3.o __umoddi3.o			\
  343.12 +	bitop.o checksum.o clear_page.o csum_partial_copy.o copy_page.o	\
  343.13 +	clear_user.o strncpy_from_user.o strlen_user.o strnlen_user.o	\
  343.14 +	flush.o ip_fast_csum.o do_csum.o copy_user.o			\
  343.15 +	memset.o strlen.o memcpy.o 
  343.16 +
  343.17 +default: $(OBJS)
  343.18 +	$(LD) -r -o ia64lib.o $(OBJS)
  343.19 +
  343.20 +AFLAGS += -I$(BASEDIR)/include -D__ASSEMBLY__
  343.21 +
  343.22 +__divdi3.o: idiv64.S
  343.23 +	$(CC) $(AFLAGS) $(AFLAGS_KERNEL) -c -o $@ $<
  343.24 +
  343.25 +__udivdi3.o: idiv64.S
  343.26 +	$(CC) $(AFLAGS) $(AFLAGS_KERNEL) -c -DUNSIGNED -c -o $@ $<
  343.27 +
  343.28 +__moddi3.o: idiv64.S
  343.29 +	$(CC) $(AFLAGS) $(AFLAGS_KERNEL) -c -DMODULO -c -o $@ $<
  343.30 +
  343.31 +__umoddi3.o: idiv64.S
  343.32 +	$(CC) $(AFLAGS) $(AFLAGS_KERNEL) -c -DMODULO -DUNSIGNED -c -o $@ $<
  343.33 +
  343.34 +__divsi3.o: idiv32.S
  343.35 +	$(CC) $(AFLAGS) $(AFLAGS_KERNEL) -c -o $@ $<
  343.36 +
  343.37 +__udivsi3.o: idiv32.S
  343.38 +	$(CC) $(AFLAGS) $(AFLAGS_KERNEL) -c -DUNSIGNED -c -o $@ $<
  343.39 +
  343.40 +__modsi3.o: idiv32.S
  343.41 +	$(CC) $(AFLAGS) $(AFLAGS_KERNEL) -c -DMODULO -c -o $@ $<
  343.42 +
  343.43 +__umodsi3.o: idiv32.S
  343.44 +	$(CC) $(AFLAGS) $(AFLAGS_KERNEL) -c -DMODULO -DUNSIGNED -c -o $@ $<
  343.45 +
  343.46 +clean:
  343.47 +	rm -f *.o *~
   344.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
   344.2 +++ b/xen/arch/ia64/linux/lib/bitop.c	Tue Aug 16 12:15:23 2005 +0800
   344.3 @@ -0,0 +1,88 @@
   344.4 +#include <linux/compiler.h>
   344.5 +#include <linux/types.h>
   344.6 +#include <asm/intrinsics.h>
   344.7 +#include <linux/module.h>
   344.8 +#include <linux/bitops.h>
   344.9 +
  344.10 +/*
  344.11 + * Find next zero bit in a bitmap reasonably efficiently..
  344.12 + */
  344.13 +
  344.14 +int __find_next_zero_bit (const void *addr, unsigned long size, unsigned long offset)
  344.15 +{
  344.16 +	unsigned long *p = ((unsigned long *) addr) + (offset >> 6);
  344.17 +	unsigned long result = offset & ~63UL;
  344.18 +	unsigned long tmp;
  344.19 +
  344.20 +	if (offset >= size)
  344.21 +		return size;
  344.22 +	size -= result;
  344.23 +	offset &= 63UL;
  344.24 +	if (offset) {
  344.25 +		tmp = *(p++);
  344.26 +		tmp |= ~0UL >> (64-offset);
  344.27 +		if (size < 64)
  344.28 +			goto found_first;
  344.29 +		if (~tmp)
  344.30 +			goto found_middle;
  344.31 +		size -= 64;
  344.32 +		result += 64;
  344.33 +	}
  344.34 +	while (size & ~63UL) {
  344.35 +		if (~(tmp = *(p++)))
  344.36 +			goto found_middle;
  344.37 +		result += 64;
  344.38 +		size -= 64;
  344.39 +	}
  344.40 +	if (!size)
  344.41 +		return result;
  344.42 +	tmp = *p;
  344.43 +found_first:
  344.44 +	tmp |= ~0UL << size;
  344.45 +	if (tmp == ~0UL)		/* any bits zero? */
  344.46 +		return result + size;	/* nope */
  344.47 +found_middle:
  344.48 +	return result + ffz(tmp);
  344.49 +}
  344.50 +EXPORT_SYMBOL(__find_next_zero_bit);
  344.51 +
  344.52 +/*
  344.53 + * Find next bit in a bitmap reasonably efficiently..
  344.54 + */
  344.55 +int __find_next_bit(const void *addr, unsigned long size, unsigned long offset)
  344.56 +{
  344.57 +	unsigned long *p = ((unsigned long *) addr) + (offset >> 6);
  344.58 +	unsigned long result = offset & ~63UL;
  344.59 +	unsigned long tmp;
  344.60 +
  344.61 +	if (offset >= size)
  344.62 +		return size;
  344.63 +	size -= result;
  344.64 +	offset &= 63UL;
  344.65 +	if (offset) {
  344.66 +		tmp = *(p++);
  344.67 +		tmp &= ~0UL << offset;
  344.68 +		if (size < 64)
  344.69 +			goto found_first;
  344.70 +		if (tmp)
  344.71 +			goto found_middle;
  344.72 +		size -= 64;
  344.73 +		result += 64;
  344.74 +	}
  344.75 +	while (size & ~63UL) {
  344.76 +		if ((tmp = *(p++)))
  344.77 +			goto found_middle;
  344.78 +		result += 64;
  344.79 +		size -= 64;
  344.80 +	}
  344.81 +	if (!size)
  344.82 +		return result;
  344.83 +	tmp = *p;
  344.84 +  found_first:
  344.85 +	tmp &= ~0UL >> (64-size);
  344.86 +	if (tmp == 0UL)		/* Are any bits set? */
  344.87 +		return result + size; /* Nope. */
  344.88 +  found_middle:
  344.89 +	return result + __ffs(tmp);
  344.90 +}
  344.91 +EXPORT_SYMBOL(__find_next_bit);
   345.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
   345.2 +++ b/xen/arch/ia64/linux/lib/carta_random.S	Tue Aug 16 12:15:23 2005 +0800
   345.3 @@ -0,0 +1,54 @@
   345.4 +/*
   345.5 + * Fast, simple, yet decent quality random number generator based on
   345.6 + * a paper by David G. Carta ("Two Fast Implementations of the
   345.7 + * `Minimal Standard' Random Number Generator," Communications of the
   345.8 + * ACM, January, 1990).
   345.9 + *
  345.10 + * Copyright (C) 2002 Hewlett-Packard Co
  345.11 + *	David Mosberger-Tang <davidm@hpl.hp.com>
  345.12 + */
  345.13 +
  345.14 +#include <asm/asmmacro.h>
  345.15 +
  345.16 +#define a	r2
  345.17 +#define m	r3
  345.18 +#define lo	r8
  345.19 +#define hi	r9
  345.20 +#define t0	r16
  345.21 +#define t1	r17
  345.22 +#define	seed	r32
  345.23 +
  345.24 +GLOBAL_ENTRY(carta_random32)
  345.25 +	movl	a = (16807 << 16) | 16807
  345.26 +	;;
  345.27 +	pmpyshr2.u t0 = a, seed, 0
  345.28 +	pmpyshr2.u t1 = a, seed, 16
  345.29 +	;;
  345.30 +	unpack2.l t0 = t1, t0
  345.31 +	dep	m = -1, r0, 0, 31
  345.32 +	;;
  345.33 +	zxt4	lo = t0
  345.34 +	shr.u	hi = t0, 32
  345.35 +	;;
  345.36 +	dep	t0 = 0, hi, 15, 49	// t0 = (hi & 0x7fff)
  345.37 +	;;
  345.38 +	shl	t0 = t0, 16		// t0 = (hi & 0x7fff) << 16
  345.39 +	shr	t1 = hi, 15		// t1 = (hi >> 15)
  345.40 +	;;
  345.41 +	add	lo = lo, t0
  345.42 +	;;
  345.43 +	cmp.gtu	p6, p0 = lo, m
  345.44 +	;;
  345.45 +(p6)	and	lo = lo, m
  345.46 +	;;
  345.47 +(p6)	add	lo = 1, lo
  345.48 +	;;
  345.49 +	add	lo = lo, t1
  345.50 +	;;
  345.51 +	cmp.gtu p6, p0 = lo, m
  345.52 +	;;
  345.53 +(p6)	and	lo = lo, m
  345.54 +	;;
  345.55 +(p6)	add	lo = 1, lo
  345.56 +	br.ret.sptk.many rp
  345.57 +END(carta_random32)
   346.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
   346.2 +++ b/xen/arch/ia64/linux/lib/checksum.c	Tue Aug 16 12:15:23 2005 +0800
   346.3 @@ -0,0 +1,102 @@
   346.4 +/*
   346.5 + * Network checksum routines
   346.6 + *
   346.7 + * Copyright (C) 1999, 2003 Hewlett-Packard Co
   346.8 + *	Stephane Eranian <eranian@hpl.hp.com>
   346.9 + *
  346.10 + * Most of the code coming from arch/alpha/lib/checksum.c
  346.11 + *
  346.12 + * This file contains network checksum routines that are better done
  346.13 + * in an architecture-specific manner due to speed..
  346.14 + */
  346.15 +
  346.16 +#include <linux/module.h>
  346.17 +#include <linux/string.h>
  346.18 +
  346.19 +#include <asm/byteorder.h>
  346.20 +
  346.21 +static inline unsigned short
  346.22 +from64to16 (unsigned long x)
  346.23 +{
  346.24 +	/* add up 32-bit words for 33 bits */
  346.25 +	x = (x & 0xffffffff) + (x >> 32);
  346.26 +	/* add up 16-bit and 17-bit words for 17+c bits */
  346.27 +	x = (x & 0xffff) + (x >> 16);
  346.28 +	/* add up 16-bit and 2-bit for 16+c bit */
  346.29 +	x = (x & 0xffff) + (x >> 16);
  346.30 +	/* add up carry.. */
  346.31 +	x = (x & 0xffff) + (x >> 16);
  346.32 +	return x;
  346.33 +}
  346.34 +
  346.35 +/*
  346.36 + * computes the checksum of the TCP/UDP pseudo-header
  346.37 + * returns a 16-bit checksum, already complemented.
  346.38 + */
  346.39 +unsigned short int
  346.40 +csum_tcpudp_magic (unsigned long saddr, unsigned long daddr, unsigned short len,
  346.41 +		   unsigned short proto, unsigned int sum)
  346.42 +{
  346.43 +	return ~from64to16(saddr + daddr + sum + ((unsigned long) ntohs(len) << 16) +
  346.44 +			   ((unsigned long) proto << 8));
  346.45 +}
  346.46 +
  346.47 +EXPORT_SYMBOL(csum_tcpudp_magic);
  346.48 +
  346.49 +unsigned int
  346.50 +csum_tcpudp_nofold (unsigned long saddr, unsigned long daddr, unsigned short len,
  346.51 +		    unsigned short proto, unsigned int sum)
  346.52 +{
  346.53 +	unsigned long result;
  346.54 +
  346.55 +	result = (saddr + daddr + sum +
  346.56 +		  ((unsigned long) ntohs(len) << 16) +
  346.57 +		  ((unsigned long) proto << 8));
  346.58 +
  346.59 +	/* Fold down to 32-bits so we don't lose in the typedef-less network stack.  */
  346.60 +	/* 64 to 33 */
  346.61 +	result = (result & 0xffffffff) + (result >> 32);
  346.62 +	/* 33 to 32 */
  346.63 +	result = (result & 0xffffffff) + (result >> 32);
  346.64 +	return result;
  346.65 +}
  346.66 +
  346.67 +extern unsigned long do_csum (const unsigned char *, long);
  346.68 +
  346.69 +/*
  346.70 + * computes the checksum of a memory block at buff, length len,
  346.71 + * and adds in "sum" (32-bit)
  346.72 + *
  346.73 + * returns a 32-bit number suitable for feeding into itself
  346.74 + * or csum_tcpudp_magic
  346.75 + *
  346.76 + * this function must be called with even lengths, except
  346.77 + * for the last fragment, which may be odd
  346.78 + *
  346.79 + * it's best to have buff aligned on a 32-bit boundary
  346.80 + */
  346.81 +unsigned int
  346.82 +csum_partial (const unsigned char * buff, int len, unsigned int sum)
  346.83 +{
  346.84 +	unsigned long result = do_csum(buff, len);
  346.85 +
  346.86 +	/* add in old sum, and carry.. */
  346.87 +	result += sum;
  346.88 +	/* 32+c bits -> 32 bits */
  346.89 +	result = (result & 0xffffffff) + (result >> 32);
  346.90 +	return result;
  346.91 +}
  346.92 +
  346.93 +EXPORT_SYMBOL(csum_partial);
  346.94 +
  346.95 +/*
  346.96 + * this routine is used for miscellaneous IP-like checksums, mainly
  346.97 + * in icmp.c
  346.98 + */
  346.99 +unsigned short
 346.100 +ip_compute_csum (unsigned char * buff, int len)
 346.101 +{
 346.102 +	return ~do_csum(buff,len);
 346.103 +}
 346.104 +
 346.105 +EXPORT_SYMBOL(ip_compute_csum);
   347.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
   347.2 +++ b/xen/arch/ia64/linux/lib/clear_page.S	Tue Aug 16 12:15:23 2005 +0800
   347.3 @@ -0,0 +1,77 @@
   347.4 +/*
   347.5 + * Copyright (C) 1999-2002 Hewlett-Packard Co
   347.6 + *	Stephane Eranian <eranian@hpl.hp.com>
   347.7 + *	David Mosberger-Tang <davidm@hpl.hp.com>
   347.8 + * Copyright (C) 2002 Ken Chen <kenneth.w.chen@intel.com>
   347.9 + *
  347.10 + * 1/06/01 davidm	Tuned for Itanium.
  347.11 + * 2/12/02 kchen	Tuned for both Itanium and McKinley
  347.12 + * 3/08/02 davidm	Some more tweaking
  347.13 + */
  347.14 +#include <linux/config.h>
  347.15 +
  347.16 +#include <asm/asmmacro.h>
  347.17 +#include <asm/page.h>
  347.18 +
  347.19 +#ifdef CONFIG_ITANIUM
  347.20 +# define L3_LINE_SIZE	64	// Itanium L3 line size
  347.21 +# define PREFETCH_LINES	9	// magic number
  347.22 +#else
  347.23 +# define L3_LINE_SIZE	128	// McKinley L3 line size
  347.24 +# define PREFETCH_LINES	12	// magic number
  347.25 +#endif
  347.26 +
  347.27 +#define saved_lc	r2
  347.28 +#define dst_fetch	r3
  347.29 +#define dst1		r8
  347.30 +#define dst2		r9
  347.31 +#define dst3		r10
  347.32 +#define dst4		r11
  347.33 +
  347.34 +#define dst_last	r31
  347.35 +
  347.36 +GLOBAL_ENTRY(clear_page)
  347.37 +	.prologue
  347.38 +	.regstk 1,0,0,0
  347.39 +	mov r16 = PAGE_SIZE/L3_LINE_SIZE-1	// main loop count, -1=repeat/until
  347.40 +	.save ar.lc, saved_lc
  347.41 +	mov saved_lc = ar.lc
  347.42 +
  347.43 +	.body
  347.44 +	mov ar.lc = (PREFETCH_LINES - 1)
  347.45 +	mov dst_fetch = in0
  347.46 +	adds dst1 = 16, in0
  347.47 +	adds dst2 = 32, in0
  347.48 +	;;
  347.49 +.fetch:	stf.spill.nta [dst_fetch] = f0, L3_LINE_SIZE
  347.50 +	adds dst3 = 48, in0		// executing this multiple times is harmless
  347.51 +	br.cloop.sptk.few .fetch
  347.52 +	;;
  347.53 +	addl dst_last = (PAGE_SIZE - PREFETCH_LINES*L3_LINE_SIZE), dst_fetch
  347.54 +	mov ar.lc = r16			// one L3 line per iteration
  347.55 +	adds dst4 = 64, in0
  347.56 +	;;
  347.57 +#ifdef CONFIG_ITANIUM
  347.58 +	// Optimized for Itanium
  347.59 +1:	stf.spill.nta [dst1] = f0, 64
  347.60 +	stf.spill.nta [dst2] = f0, 64
  347.61 +	cmp.lt p8,p0=dst_fetch, dst_last
  347.62 +	;;
  347.63 +#else
  347.64 +	// Optimized for McKinley
  347.65 +1:	stf.spill.nta [dst1] = f0, 64
  347.66 +	stf.spill.nta [dst2] = f0, 64
  347.67 +	stf.spill.nta [dst3] = f0, 64
  347.68 +	stf.spill.nta [dst4] = f0, 128
  347.69 +	cmp.lt p8,p0=dst_fetch, dst_last
  347.70 +	;;
  347.71 +	stf.spill.nta [dst1] = f0, 64
  347.72 +	stf.spill.nta [dst2] = f0, 64
  347.73 +#endif
  347.74 +	stf.spill.nta [dst3] = f0, 64
  347.75 +(p8)	stf.spill.nta [dst_fetch] = f0, L3_LINE_SIZE
  347.76 +	br.cloop.sptk.few 1b
  347.77 +	;;
  347.78 +	mov ar.lc = saved_lc		// restore lc
  347.79 +	br.ret.sptk.many rp
  347.80 +END(clear_page)
   348.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
   348.2 +++ b/xen/arch/ia64/linux/lib/clear_user.S	Tue Aug 16 12:15:23 2005 +0800
   348.3 @@ -0,0 +1,209 @@
   348.4 +/*
   348.5 + * This routine clears to zero a linear memory buffer in user space.
   348.6 + *
   348.7 + * Inputs:
   348.8 + *	in0:	address of buffer
   348.9 + *	in1:	length of buffer in bytes
  348.10 + * Outputs:
  348.11 + *	r8:	number of bytes that didn't get cleared due to a fault
  348.12 + *
  348.13 + * Copyright (C) 1998, 1999, 2001 Hewlett-Packard Co
  348.14 + *	Stephane Eranian <eranian@hpl.hp.com>
  348.15 + */
  348.16 +
  348.17 +#include <asm/asmmacro.h>
  348.18 +
  348.19 +//
  348.20 +// arguments
  348.21 +//
  348.22 +#define buf		r32
  348.23 +#define len		r33
  348.24 +
  348.25 +//
  348.26 +// local registers
  348.27 +//
  348.28 +#define cnt		r16
  348.29 +#define buf2		r17
  348.30 +#define saved_lc	r18
  348.31 +#define saved_pfs	r19
  348.32 +#define tmp		r20
  348.33 +#define len2		r21
  348.34 +#define len3		r22
  348.35 +
  348.36 +//
  348.37 +// Theory of operations:
  348.38 +//	- we check whether or not the buffer is small, i.e., less than 17
  348.39 +//	  in which case we do the byte by byte loop.
  348.40 +//
  348.41 +//	- Otherwise we go progressively from 1 byte store to 8byte store in
  348.42 +//	  the head part, the body is a 16byte store loop and we finish we the
  348.43 +//	  tail for the last 15 bytes.
  348.44 +//	  The good point about this breakdown is that the long buffer handling
  348.45 +//	  contains only 2 branches.
  348.46 +//
  348.47 +//	The reason for not using shifting & masking for both the head and the
  348.48 +//	tail is to stay semantically correct. This routine is not supposed
  348.49 +//	to write bytes outside of the buffer. While most of the time this would
  348.50 +//	be ok, we can't tolerate a mistake. A classical example is the case
  348.51 +//	of multithreaded code were to the extra bytes touched is actually owned
  348.52 +//	by another thread which runs concurrently to ours. Another, less likely,
  348.53 +//	example is with device drivers where reading an I/O mapped location may
  348.54 +//	have side effects (same thing for writing).
  348.55 +//
  348.56 +
  348.57 +GLOBAL_ENTRY(__do_clear_user)
  348.58 +	.prologue
  348.59 +	.save ar.pfs, saved_pfs
  348.60 +	alloc	saved_pfs=ar.pfs,2,0,0,0
  348.61 +	cmp.eq p6,p0=r0,len		// check for zero length
  348.62 +	.save ar.lc, saved_lc
  348.63 +	mov saved_lc=ar.lc		// preserve ar.lc (slow)
  348.64 +	.body
  348.65 +	;;				// avoid WAW on CFM
  348.66 +	adds tmp=-1,len			// br.ctop is repeat/until
  348.67 +	mov ret0=len			// return value is length at this point
  348.68 +(p6)	br.ret.spnt.many rp
  348.69 +	;;
  348.70 +	cmp.lt p6,p0=16,len		// if len > 16 then long memset
  348.71 +	mov ar.lc=tmp			// initialize lc for small count
  348.72 +(p6)	br.cond.dptk .long_do_clear
  348.73 +	;;				// WAR on ar.lc
  348.74 +	//
  348.75 +	// worst case 16 iterations, avg 8 iterations
  348.76 +	//
  348.77 +	// We could have played with the predicates to use the extra
  348.78 +	// M slot for 2 stores/iteration but the cost the initialization
  348.79 +	// the various counters compared to how long the loop is supposed
  348.80 +	// to last on average does not make this solution viable.
  348.81 +	//
  348.82 +1:
  348.83 +	EX( .Lexit1, st1 [buf]=r0,1 )
  348.84 +	adds len=-1,len			// countdown length using len
  348.85 +	br.cloop.dptk 1b
  348.86 +	;;				// avoid RAW on ar.lc
  348.87 +	//
  348.88 +	// .Lexit4: comes from byte by byte loop
  348.89 +	//	    len contains bytes left
  348.90 +.Lexit1:
  348.91 +	mov ret0=len			// faster than using ar.lc
  348.92 +	mov ar.lc=saved_lc
  348.93 +	br.ret.sptk.many rp		// end of short clear_user
  348.94 +
  348.95 +
  348.96 +	//
  348.97 +	// At this point we know we have more than 16 bytes to copy
  348.98 +	// so we focus on alignment (no branches required)
  348.99 +	//
 348.100 +	// The use of len/len2 for countdown of the number of bytes left
 348.101 +	// instead of ret0 is due to the fact that the exception code
 348.102 +	// changes the values of r8.
 348.103 +	//
 348.104 +.long_do_clear:
 348.105 +	tbit.nz p6,p0=buf,0		// odd alignment (for long_do_clear)
 348.106 +	;;
 348.107 +	EX( .Lexit3, (p6) st1 [buf]=r0,1 )	// 1-byte aligned
 348.108 +(p6)	adds len=-1,len;;		// sync because buf is modified
 348.109 +	tbit.nz p6,p0=buf,1
 348.110 +	;;
 348.111 +	EX( .Lexit3, (p6) st2 [buf]=r0,2 )	// 2-byte aligned
 348.112 +(p6)	adds len=-2,len;;
 348.113 +	tbit.nz p6,p0=buf,2
 348.114 +	;;
 348.115 +	EX( .Lexit3, (p6) st4 [buf]=r0,4 )	// 4-byte aligned
 348.116 +(p6)	adds len=-4,len;;
 348.117 +	tbit.nz p6,p0=buf,3
 348.118 +	;;
 348.119 +	EX( .Lexit3, (p6) st8 [buf]=r0,8 )	// 8-byte aligned
 348.120 +(p6)	adds len=-8,len;;
 348.121 +	shr.u cnt=len,4		// number of 128-bit (2x64bit) words
 348.122 +	;;
 348.123 +	cmp.eq p6,p0=r0,cnt
 348.124 +	adds tmp=-1,cnt
 348.125 +(p6)	br.cond.dpnt .dotail		// we have less than 16 bytes left
 348.126 +	;;
 348.127 +	adds buf2=8,buf			// setup second base pointer
 348.128 +	mov ar.lc=tmp
 348.129 +	;;
 348.130 +
 348.131 +	//
 348.132 +	// 16bytes/iteration core loop
 348.133 +	//
 348.134 +	// The second store can never generate a fault because
 348.135 +	// we come into the loop only when we are 16-byte aligned.
 348.136 +	// This means that if we cross a page then it will always be
 348.137 +	// in the first store and never in the second.
 348.138 +	//
 348.139 +	//
 348.140 +	// We need to keep track of the remaining length. A possible (optimistic)
 348.141 +	// way would be to use ar.lc and derive how many byte were left by
 348.142 +	// doing : left= 16*ar.lc + 16.  this would avoid the addition at
 348.143 +	// every iteration.
 348.144 +	// However we need to keep the synchronization point. A template
 348.145 +	// M;;MB does not exist and thus we can keep the addition at no
 348.146 +	// extra cycle cost (use a nop slot anyway). It also simplifies the
 348.147 +	// (unlikely)  error recovery code
 348.148 +	//
 348.149 +
 348.150 +2:	EX(.Lexit3, st8 [buf]=r0,16 )
 348.151 +	;;				// needed to get len correct when error
 348.152 +	st8 [buf2]=r0,16
 348.153 +	adds len=-16,len
 348.154 +	br.cloop.dptk 2b
 348.155 +	;;
 348.156 +	mov ar.lc=saved_lc
 348.157 +	//
 348.158 +	// tail correction based on len only
 348.159 +	//
 348.160 +	// We alternate the use of len3,len2 to allow parallelism and correct
 348.161 +	// error handling. We also reuse p6/p7 to return correct value.
 348.162 +	// The addition of len2/len3 does not cost anything more compared to
 348.163 +	// the regular memset as we had empty slots.
 348.164 +	//
 348.165 +.dotail:
 348.166 +	mov len2=len			// for parallelization of error handling
 348.167 +	mov len3=len
 348.168 +	tbit.nz p6,p0=len,3
 348.169 +	;;
 348.170 +	EX( .Lexit2, (p6) st8 [buf]=r0,8 )	// at least 8 bytes
 348.171 +(p6)	adds len3=-8,len2
 348.172 +	tbit.nz p7,p6=len,2
 348.173 +	;;
 348.174 +	EX( .Lexit2, (p7) st4 [buf]=r0,4 )	// at least 4 bytes
 348.175 +(p7)	adds len2=-4,len3
 348.176 +	tbit.nz p6,p7=len,1
 348.177 +	;;
 348.178 +	EX( .Lexit2, (p6) st2 [buf]=r0,2 )	// at least 2 bytes
 348.179 +(p6)	adds len3=-2,len2
 348.180 +	tbit.nz p7,p6=len,0
 348.181 +	;;
 348.182 +	EX( .Lexit2, (p7) st1 [buf]=r0 )	// only 1 byte left
 348.183 +	mov ret0=r0				// success
 348.184 +	br.ret.sptk.many rp			// end of most likely path
 348.185 +
 348.186 +	//
 348.187 +	// Outlined error handling code
 348.188 +	//
 348.189 +
 348.190 +	//
 348.191 +	// .Lexit3: comes from core loop, need restore pr/lc
 348.192 +	//	    len contains bytes left
 348.193 +	//
 348.194 +	//
 348.195 +	// .Lexit2:
 348.196 +	//	if p6 -> coming from st8 or st2 : len2 contains what's left
 348.197 +	//	if p7 -> coming from st4 or st1 : len3 contains what's left
 348.198 +	// We must restore lc/pr even though might not have been used.
 348.199 +.Lexit2:
 348.200 +	.pred.rel "mutex", p6, p7
 348.201 +(p6)	mov len=len2
 348.202 +(p7)	mov len=len3
 348.203 +	;;
 348.204 +	//
 348.205 +	// .Lexit4: comes from head, need not restore pr/lc
 348.206 +	//	    len contains bytes left
 348.207 +	//
 348.208 +.Lexit3:
 348.209 +	mov ret0=len
 348.210 +	mov ar.lc=saved_lc
 348.211 +	br.ret.sptk.many rp
 348.212 +END(__do_clear_user)
   349.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
   349.2 +++ b/xen/arch/ia64/linux/lib/copy_page.S	Tue Aug 16 12:15:23 2005 +0800
   349.3 @@ -0,0 +1,98 @@
   349.4 +/*
   349.5 + *
   349.6 + * Optimized version of the standard copy_page() function
   349.7 + *
   349.8 + * Inputs:
   349.9 + *	in0:	address of target page
  349.10 + *	in1:	address of source page
  349.11 + * Output:
  349.12 + *	no return value
  349.13 + *
  349.14 + * Copyright (C) 1999, 2001 Hewlett-Packard Co
  349.15 + *	Stephane Eranian <eranian@hpl.hp.com>
  349.16 + *	David Mosberger <davidm@hpl.hp.com>
  349.17 + *
  349.18 + * 4/06/01 davidm	Tuned to make it perform well both for cached and uncached copies.
  349.19 + */
  349.20 +#include <asm/asmmacro.h>
  349.21 +#include <asm/page.h>
  349.22 +
  349.23 +#define PIPE_DEPTH	3
  349.24 +#define EPI		p[PIPE_DEPTH-1]
  349.25 +
  349.26 +#define lcount		r16
  349.27 +#define saved_pr	r17
  349.28 +#define saved_lc	r18
  349.29 +#define saved_pfs	r19
  349.30 +#define src1		r20
  349.31 +#define src2		r21
  349.32 +#define tgt1		r22
  349.33 +#define tgt2		r23
  349.34 +#define srcf		r24
  349.35 +#define tgtf		r25
  349.36 +#define tgt_last	r26
  349.37 +
  349.38 +#define Nrot		((8*PIPE_DEPTH+7)&~7)
  349.39 +
  349.40 +GLOBAL_ENTRY(copy_page)
  349.41 +	.prologue
  349.42 +	.save ar.pfs, saved_pfs
  349.43 +	alloc saved_pfs=ar.pfs,3,Nrot-3,0,Nrot
  349.44 +
  349.45 +	.rotr t1[PIPE_DEPTH], t2[PIPE_DEPTH], t3[PIPE_DEPTH], t4[PIPE_DEPTH], \
  349.46 +	      t5[PIPE_DEPTH], t6[PIPE_DEPTH], t7[PIPE_DEPTH], t8[PIPE_DEPTH]
  349.47 +	.rotp p[PIPE_DEPTH]
  349.48 +
  349.49 +	.save ar.lc, saved_lc
  349.50 +	mov saved_lc=ar.lc
  349.51 +	mov ar.ec=PIPE_DEPTH
  349.52 +
  349.53 +	mov lcount=PAGE_SIZE/64-1
  349.54 +	.save pr, saved_pr
  349.55 +	mov saved_pr=pr
  349.56 +	mov pr.rot=1<<16
  349.57 +
  349.58 +	.body
  349.59 +
  349.60 +	mov src1=in1
  349.61 +	adds src2=8,in1
  349.62 +	mov tgt_last = PAGE_SIZE
  349.63 +	;;
  349.64 +	adds tgt2=8,in0
  349.65 +	add srcf=512,in1
  349.66 +	mov ar.lc=lcount
  349.67 +	mov tgt1=in0
  349.68 +	add tgtf=512,in0
  349.69 +	add tgt_last = tgt_last, in0
  349.70 +	;;
  349.71 +1:
  349.72 +(p[0])	ld8 t1[0]=[src1],16
  349.73 +(EPI)	st8 [tgt1]=t1[PIPE_DEPTH-1],16
  349.74 +(p[0])	ld8 t2[0]=[src2],16
  349.75 +(EPI)	st8 [tgt2]=t2[PIPE_DEPTH-1],16
  349.76 +	cmp.ltu p6,p0 = tgtf, tgt_last
  349.77 +	;;
  349.78 +(p[0])	ld8 t3[0]=[src1],16
  349.79 +(EPI)	st8 [tgt1]=t3[PIPE_DEPTH-1],16
  349.80 +(p[0])	ld8 t4[0]=[src2],16
  349.81 +(EPI)	st8 [tgt2]=t4[PIPE_DEPTH-1],16
  349.82 +	;;
  349.83 +(p[0])	ld8 t5[0]=[src1],16
  349.84 +(EPI)	st8 [tgt1]=t5[PIPE_DEPTH-1],16
  349.85 +(p[0])	ld8 t6[0]=[src2],16
  349.86 +(EPI)	st8 [tgt2]=t6[PIPE_DEPTH-1],16
  349.87 +	;;
  349.88 +(p[0])	ld8 t7[0]=[src1],16
  349.89 +(EPI)	st8 [tgt1]=t7[PIPE_DEPTH-1],16
  349.90 +(p[0])	ld8 t8[0]=[src2],16
  349.91 +(EPI)	st8 [tgt2]=t8[PIPE_DEPTH-1],16
  349.92 +
  349.93 +(p6)	lfetch [srcf], 64
  349.94 +(p6)	lfetch [tgtf], 64
  349.95 +	br.ctop.sptk.few 1b
  349.96 +	;;
  349.97 +	mov pr=saved_pr,0xffffffffffff0000	// restore predicates
  349.98 +	mov ar.pfs=saved_pfs
  349.99 +	mov ar.lc=saved_lc
 349.100 +	br.ret.sptk.many rp
 349.101 +END(copy_page)
   350.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
   350.2 +++ b/xen/arch/ia64/linux/lib/copy_page_mck.S	Tue Aug 16 12:15:23 2005 +0800
   350.3 @@ -0,0 +1,185 @@
   350.4 +/*
   350.5 + * McKinley-optimized version of copy_page().
   350.6 + *
   350.7 + * Copyright (C) 2002 Hewlett-Packard Co
   350.8 + *	David Mosberger <davidm@hpl.hp.com>
   350.9 + *
  350.10 + * Inputs:
  350.11 + *	in0:	address of target page
  350.12 + *	in1:	address of source page
  350.13 + * Output:
  350.14 + *	no return value
  350.15 + *
  350.16 + * General idea:
  350.17 + *	- use regular loads and stores to prefetch data to avoid consuming M-slot just for
  350.18 + *	  lfetches => good for in-cache performance
  350.19 + *	- avoid l2 bank-conflicts by not storing into the same 16-byte bank within a single
  350.20 + *	  cycle
  350.21 + *
  350.22 + * Principle of operation:
  350.23 + *	First, note that L1 has a line-size of 64 bytes and L2 a line-size of 128 bytes.
  350.24 + *	To avoid secondary misses in L2, we prefetch both source and destination with a line-size
  350.25 + *	of 128 bytes.  When both of these lines are in the L2 and the first half of the
  350.26 + *	source line is in L1, we start copying the remaining words.  The second half of the
  350.27 + *	source line is prefetched in an earlier iteration, so that by the time we start
  350.28 + *	accessing it, it's also present in the L1.
  350.29 + *
  350.30 + *	We use a software-pipelined loop to control the overall operation.  The pipeline
  350.31 + *	has 2*PREFETCH_DIST+K stages.  The first PREFETCH_DIST stages are used for prefetching
  350.32 + *	source cache-lines.  The second PREFETCH_DIST stages are used for prefetching destination
  350.33 + *	cache-lines, the last K stages are used to copy the cache-line words not copied by
  350.34 + *	the prefetches.  The four relevant points in the pipelined are called A, B, C, D:
  350.35 + *	p[A] is TRUE if a source-line should be prefetched, p[B] is TRUE if a destination-line
  350.36 + *	should be prefetched, p[C] is TRUE if the second half of an L2 line should be brought
  350.37 + *	into L1D and p[D] is TRUE if a cacheline needs to be copied.
  350.38 + *
  350.39 + *	This all sounds very complicated, but thanks to the modulo-scheduled loop support,
  350.40 + *	the resulting code is very regular and quite easy to follow (once you get the idea).
  350.41 + *
  350.42 + *	As a secondary optimization, the first 2*PREFETCH_DIST iterations are implemented
  350.43 + *	as the separate .prefetch_loop.  Logically, this loop performs exactly like the
  350.44 + *	main-loop (.line_copy), but has all known-to-be-predicated-off instructions removed,
  350.45 + *	so that each loop iteration is faster (again, good for cached case).
  350.46 + *
  350.47 + *	When reading the code, it helps to keep the following picture in mind:
  350.48 + *
  350.49 + *	       word 0 word 1
  350.50 + *            +------+------+---
  350.51 + *	      |	v[x] | 	t1  | ^
  350.52 + *	      |	t2   |	t3  | |
  350.53 + *	      |	t4   |	t5  | |
  350.54 + *	      |	t6   |	t7  | | 128 bytes
  350.55 + *     	      |	n[y] | 	t9  | |	(L2 cache line)
  350.56 + *	      |	t10  | 	t11 | |
  350.57 + *	      |	t12  | 	t13 | |
  350.58 + *	      |	t14  | 	t15 | v
  350.59 + *	      +------+------+---
  350.60 + *
  350.61 + *	Here, v[x] is copied by the (memory) prefetch.  n[y] is loaded at p[C]
  350.62 + *	to fetch the second-half of the L2 cache line into L1, and the tX words are copied in
  350.63 + *	an order that avoids bank conflicts.
  350.64 + */
  350.65 +#include <asm/asmmacro.h>
  350.66 +#include <asm/page.h>
  350.67 +
  350.68 +#define PREFETCH_DIST	8		// McKinley sustains 16 outstanding L2 misses (8 ld, 8 st)
  350.69 +
  350.70 +#define src0		r2
  350.71 +#define src1		r3
  350.72 +#define dst0		r9
  350.73 +#define dst1		r10
  350.74 +#define src_pre_mem	r11
  350.75 +#define dst_pre_mem	r14
  350.76 +#define src_pre_l2	r15
  350.77 +#define dst_pre_l2	r16
  350.78 +#define t1		r17
  350.79 +#define t2		r18
  350.80 +#define t3		r19
  350.81 +#define t4		r20
  350.82 +#define t5		t1	// alias!
  350.83 +#define t6		t2	// alias!
  350.84 +#define t7		t3	// alias!
  350.85 +#define t9		t5	// alias!
  350.86 +#define t10		t4	// alias!
  350.87 +#define t11		t7	// alias!
  350.88 +#define t12		t6	// alias!
  350.89 +#define t14		t10	// alias!
  350.90 +#define t13		r21
  350.91 +#define t15		r22
  350.92 +
  350.93 +#define saved_lc	r23
  350.94 +#define saved_pr	r24
  350.95 +
  350.96 +#define	A	0
  350.97 +#define B	(PREFETCH_DIST)
  350.98 +#define C	(B + PREFETCH_DIST)
  350.99 +#define D	(C + 3)
 350.100 +#define N	(D + 1)
 350.101 +#define Nrot	((N + 7) & ~7)
 350.102 +
 350.103 +GLOBAL_ENTRY(copy_page)
 350.104 +	.prologue
 350.105 +	alloc r8 = ar.pfs, 2, Nrot-2, 0, Nrot
 350.106 +
 350.107 +	.rotr v[2*PREFETCH_DIST], n[D-C+1]
 350.108 +	.rotp p[N]
 350.109 +
 350.110 +	.save ar.lc, saved_lc
 350.111 +	mov saved_lc = ar.lc
 350.112 +	.save pr, saved_pr
 350.113 +	mov saved_pr = pr
 350.114 +	.body
 350.115 +
 350.116 +	mov src_pre_mem = in1
 350.117 +	mov pr.rot = 0x10000
 350.118 +	mov ar.ec = 1				// special unrolled loop
 350.119 +
 350.120 +	mov dst_pre_mem = in0
 350.121 +	mov ar.lc = 2*PREFETCH_DIST - 1
 350.122 +
 350.123 +	add src_pre_l2 = 8*8, in1
 350.124 +	add dst_pre_l2 = 8*8, in0
 350.125 +	add src0 = 8, in1			// first t1 src
 350.126 +	add src1 = 3*8, in1			// first t3 src
 350.127 +	add dst0 = 8, in0			// first t1 dst
 350.128 +	add dst1 = 3*8, in0			// first t3 dst
 350.129 +	mov t1 = (PAGE_SIZE/128) - (2*PREFETCH_DIST) - 1
 350.130 +	nop.m 0
 350.131 +	nop.i 0
 350.132 +	;;
 350.133 +	// same as .line_copy loop, but with all predicated-off instructions removed:
 350.134 +.prefetch_loop:
 350.135 +(p[A])	ld8 v[A] = [src_pre_mem], 128		// M0
 350.136 +(p[B])	st8 [dst_pre_mem] = v[B], 128		// M2
 350.137 +	br.ctop.sptk .prefetch_loop
 350.138 +	;;
 350.139 +	cmp.eq p16, p0 = r0, r0			// reset p16 to 1 (br.ctop cleared it to zero)
 350.140 +	mov ar.lc = t1				// with 64KB pages, t1 is too big to fit in 8 bits!
 350.141 +	mov ar.ec = N				// # of stages in pipeline
 350.142 +	;;
 350.143 +.line_copy:
 350.144 +(p[D])	ld8 t2 = [src0], 3*8			// M0
 350.145 +(p[D])	ld8 t4 = [src1], 3*8			// M1
 350.146 +(p[B])	st8 [dst_pre_mem] = v[B], 128		// M2 prefetch dst from memory
 350.147 +(p[D])	st8 [dst_pre_l2] = n[D-C], 128		// M3 prefetch dst from L2
 350.148 +	;;
 350.149 +(p[A])	ld8 v[A] = [src_pre_mem], 128		// M0 prefetch src from memory
 350.150 +(p[C])	ld8 n[0] = [src_pre_l2], 128		// M1 prefetch src from L2
 350.151 +(p[D])	st8 [dst0] =  t1, 8			// M2
 350.152 +(p[D])	st8 [dst1] =  t3, 8			// M3
 350.153 +	;;
 350.154 +(p[D])	ld8  t5 = [src0], 8
 350.155 +(p[D])	ld8  t7 = [src1], 3*8
 350.156 +(p[D])	st8 [dst0] =  t2, 3*8
 350.157 +(p[D])	st8 [dst1] =  t4, 3*8
 350.158 +	;;
 350.159 +(p[D])	ld8  t6 = [src0], 3*8
 350.160 +(p[D])	ld8 t10 = [src1], 8
 350.161 +(p[D])	st8 [dst0] =  t5, 8
 350.162 +(p[D])	st8 [dst1] =  t7, 3*8
 350.163 +	;;
 350.164 +(p[D])	ld8  t9 = [src0], 3*8
 350.165 +(p[D])	ld8 t11 = [src1], 3*8
 350.166 +(p[D])	st8 [dst0] =  t6, 3*8
 350.167 +(p[D])	st8 [dst1] = t10, 8
 350.168 +	;;
 350.169 +(p[D])	ld8 t12 = [src0], 8
 350.170 +(p[D])	ld8 t14 = [src1], 8
 350.171 +(p[D])	st8 [dst0] =  t9, 3*8
 350.172 +(p[D])	st8 [dst1] = t11, 3*8
 350.173 +	;;
 350.174 +(p[D])	ld8 t13 = [src0], 4*8
 350.175 +(p[D])	ld8 t15 = [src1], 4*8
 350.176 +(p[D])	st8 [dst0] = t12, 8
 350.177 +(p[D])	st8 [dst1] = t14, 8
 350.178 +	;;
 350.179 +(p[D-1])ld8  t1 = [src0], 8
 350.180 +(p[D-1])ld8  t3 = [src1], 8
 350.181 +(p[D])	st8 [dst0] = t13, 4*8
 350.182 +(p[D])	st8 [dst1] = t15, 4*8
 350.183 +	br.ctop.sptk .line_copy
 350.184 +	;;
 350.185 +	mov ar.lc = saved_lc
 350.186 +	mov pr = saved_pr, -1
 350.187 +	br.ret.sptk.many rp
 350.188 +END(copy_page)
   351.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
   351.2 +++ b/xen/arch/ia64/linux/lib/copy_user.S	Tue Aug 16 12:15:23 2005 +0800
   351.3 @@ -0,0 +1,610 @@
   351.4 +/*
   351.5 + *
   351.6 + * Optimized version of the copy_user() routine.
   351.7 + * It is used to copy date across the kernel/user boundary.
   351.8 + *
   351.9 + * The source and destination are always on opposite side of
  351.10 + * the boundary. When reading from user space we must catch
  351.11 + * faults on loads. When writing to user space we must catch
  351.12 + * errors on stores. Note that because of the nature of the copy
  351.13 + * we don't need to worry about overlapping regions.
  351.14 + *
  351.15 + *
  351.16 + * Inputs:
  351.17 + *	in0	address of source buffer
  351.18 + *	in1	address of destination buffer
  351.19 + *	in2	number of bytes to copy
  351.20 + *
  351.21 + * Outputs:
  351.22 + *	ret0	0 in case of success. The number of bytes NOT copied in
  351.23 + *		case of error.
  351.24 + *
  351.25 + * Copyright (C) 2000-2001 Hewlett-Packard Co
  351.26 + *	Stephane Eranian <eranian@hpl.hp.com>
  351.27 + *
  351.28 + * Fixme:
  351.29 + *	- handle the case where we have more than 16 bytes and the alignment
  351.30 + *	  are different.
  351.31 + *	- more benchmarking
  351.32 + *	- fix extraneous stop bit introduced by the EX() macro.
  351.33 + */
  351.34 +
  351.35 +#include <asm/asmmacro.h>
  351.36 +
  351.37 +//
  351.38 +// Tuneable parameters
  351.39 +//
  351.40 +#define COPY_BREAK	16	// we do byte copy below (must be >=16)
  351.41 +#define PIPE_DEPTH	21	// pipe depth
  351.42 +
  351.43 +#define EPI		p[PIPE_DEPTH-1]
  351.44 +
  351.45 +//
  351.46 +// arguments
  351.47 +//
  351.48 +#define dst		in0
  351.49 +#define src		in1
  351.50 +#define len		in2
  351.51 +
  351.52 +//
  351.53 +// local registers
  351.54 +//
  351.55 +#define t1		r2	// rshift in bytes
  351.56 +#define t2		r3	// lshift in bytes
  351.57 +#define rshift		r14	// right shift in bits
  351.58 +#define lshift		r15	// left shift in bits
  351.59 +#define word1		r16
  351.60 +#define word2		r17
  351.61 +#define cnt		r18
  351.62 +#define len2		r19
  351.63 +#define saved_lc	r20
  351.64 +#define saved_pr	r21
  351.65 +#define tmp		r22
  351.66 +#define val		r23
  351.67 +#define src1		r24
  351.68 +#define dst1		r25
  351.69 +#define src2		r26
  351.70 +#define dst2		r27
  351.71 +#define len1		r28
  351.72 +#define enddst		r29
  351.73 +#define endsrc		r30
  351.74 +#define saved_pfs	r31
  351.75 +
  351.76 +GLOBAL_ENTRY(__copy_user)
  351.77 +	.prologue
  351.78 +	.save ar.pfs, saved_pfs
  351.79 +	alloc saved_pfs=ar.pfs,3,((2*PIPE_DEPTH+7)&~7),0,((2*PIPE_DEPTH+7)&~7)
  351.80 +
  351.81 +	.rotr val1[PIPE_DEPTH],val2[PIPE_DEPTH]
  351.82 +	.rotp p[PIPE_DEPTH]
  351.83 +
  351.84 +	adds len2=-1,len	// br.ctop is repeat/until
  351.85 +	mov ret0=r0
  351.86 +
  351.87 +	;;			// RAW of cfm when len=0
  351.88 +	cmp.eq p8,p0=r0,len	// check for zero length
  351.89 +	.save ar.lc, saved_lc
  351.90 +	mov saved_lc=ar.lc	// preserve ar.lc (slow)
  351.91 +(p8)	br.ret.spnt.many rp	// empty mempcy()
  351.92 +	;;
  351.93 +	add enddst=dst,len	// first byte after end of source
  351.94 +	add endsrc=src,len	// first byte after end of destination
  351.95 +	.save pr, saved_pr
  351.96 +	mov saved_pr=pr		// preserve predicates
  351.97 +
  351.98 +	.body
  351.99 +
 351.100 +	mov dst1=dst		// copy because of rotation
 351.101 +	mov ar.ec=PIPE_DEPTH
 351.102 +	mov pr.rot=1<<16	// p16=true all others are false
 351.103 +
 351.104 +	mov src1=src		// copy because of rotation
 351.105 +	mov ar.lc=len2		// initialize lc for small count
 351.106 +	cmp.lt p10,p7=COPY_BREAK,len	// if len > COPY_BREAK then long copy
 351.107 +
 351.108 +	xor tmp=src,dst		// same alignment test prepare
 351.109 +(p10)	br.cond.dptk .long_copy_user
 351.110 +	;;			// RAW pr.rot/p16 ?
 351.111 +	//
 351.112 +	// Now we do the byte by byte loop with software pipeline
 351.113 +	//
 351.114 +	// p7 is necessarily false by now
 351.115 +1:
 351.116 +	EX(.failure_in_pipe1,(p16) ld1 val1[0]=[src1],1)
 351.117 +	EX(.failure_out,(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1)
 351.118 +	br.ctop.dptk.few 1b
 351.119 +	;;
 351.120 +	mov ar.lc=saved_lc
 351.121 +	mov pr=saved_pr,0xffffffffffff0000
 351.122 +	mov ar.pfs=saved_pfs		// restore ar.ec
 351.123 +	br.ret.sptk.many rp		// end of short memcpy
 351.124 +
 351.125 +	//
 351.126 +	// Not 8-byte aligned
 351.127 +	//
 351.128 +.diff_align_copy_user:
 351.129 +	// At this point we know we have more than 16 bytes to copy
 351.130 +	// and also that src and dest do _not_ have the same alignment.
 351.131 +	and src2=0x7,src1				// src offset
 351.132 +	and dst2=0x7,dst1				// dst offset
 351.133 +	;;
 351.134 +	// The basic idea is that we copy byte-by-byte at the head so
 351.135 +	// that we can reach 8-byte alignment for both src1 and dst1.
 351.136 +	// Then copy the body using software pipelined 8-byte copy,
 351.137 +	// shifting the two back-to-back words right and left, then copy
 351.138 +	// the tail by copying byte-by-byte.
 351.139 +	//
 351.140 +	// Fault handling. If the byte-by-byte at the head fails on the
 351.141 +	// load, then restart and finish the pipleline by copying zeros
 351.142 +	// to the dst1. Then copy zeros for the rest of dst1.
 351.143 +	// If 8-byte software pipeline fails on the load, do the same as
 351.144 +	// failure_in3 does. If the byte-by-byte at the tail fails, it is
 351.145 +	// handled simply by failure_in_pipe1.
 351.146 +	//
 351.147 +	// The case