ia64/xen-unstable

changeset 6530:e3d811cca4e1

merge.
author kevin@localhost.localdomain
date Tue Aug 16 12:15:23 2005 +0800 (2005-08-16)
parents b5196e075602 a6bb47919161
children 1ae656509f02
files .hgignore Config.mk Makefile buildconfigs/Rules.mk buildconfigs/mk.linux-2.6-xen buildconfigs/mk.linux-2.6-xen0 buildconfigs/mk.linux-2.6-xenU docs/misc/shype4xen_readme.txt docs/src/user.tex linux-2.4-xen-sparse/arch/xen/Makefile linux-2.4-xen-sparse/arch/xen/config.in linux-2.4-xen-sparse/arch/xen/kernel/time.c linux-2.4-xen-sparse/include/asm-xen/bugs.h linux-2.4-xen-sparse/include/asm-xen/fixmap.h linux-2.4-xen-sparse/include/asm-xen/highmem.h linux-2.4-xen-sparse/include/asm-xen/hw_irq.h linux-2.4-xen-sparse/include/asm-xen/io.h linux-2.4-xen-sparse/include/asm-xen/irq.h linux-2.4-xen-sparse/include/asm-xen/mmu_context.h linux-2.4-xen-sparse/include/asm-xen/page.h linux-2.4-xen-sparse/include/asm-xen/pci.h linux-2.4-xen-sparse/include/asm-xen/pgalloc.h linux-2.4-xen-sparse/include/asm-xen/pgtable.h linux-2.4-xen-sparse/include/asm-xen/processor.h linux-2.4-xen-sparse/include/asm-xen/segment.h linux-2.4-xen-sparse/include/asm-xen/smp.h linux-2.4-xen-sparse/include/asm-xen/system.h linux-2.4-xen-sparse/include/asm-xen/vga.h linux-2.4-xen-sparse/include/linux/blk.h linux-2.4-xen-sparse/include/linux/highmem.h linux-2.4-xen-sparse/include/linux/irq.h linux-2.4-xen-sparse/include/linux/mm.h linux-2.4-xen-sparse/include/linux/sched.h linux-2.4-xen-sparse/include/linux/skbuff.h linux-2.4-xen-sparse/include/linux/timer.h linux-2.4-xen-sparse/mkbuildtree linux-2.6-xen-sparse/arch/xen/Kconfig linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32 linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64 linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32 linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64 linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_32 linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_64 linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile linux-2.6-xen-sparse/arch/xen/i386/kernel/entry.S linux-2.6-xen-sparse/arch/xen/i386/kernel/head.S linux-2.6-xen-sparse/arch/xen/i386/kernel/ioport.c linux-2.6-xen-sparse/arch/xen/i386/kernel/irq.c linux-2.6-xen-sparse/arch/xen/i386/kernel/mpparse.c linux-2.6-xen-sparse/arch/xen/i386/kernel/pci-dma.c linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c linux-2.6-xen-sparse/arch/xen/i386/kernel/traps.c linux-2.6-xen-sparse/arch/xen/i386/mm/highmem.c linux-2.6-xen-sparse/arch/xen/i386/mm/hypervisor.c linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c linux-2.6-xen-sparse/arch/xen/i386/mm/pgtable.c linux-2.6-xen-sparse/arch/xen/i386/pci/irq.c linux-2.6-xen-sparse/arch/xen/kernel/evtchn.c linux-2.6-xen-sparse/arch/xen/kernel/gnttab.c linux-2.6-xen-sparse/arch/xen/kernel/reboot.c linux-2.6-xen-sparse/arch/xen/x86_64/Kconfig linux-2.6-xen-sparse/arch/xen/x86_64/Makefile linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile linux-2.6-xen-sparse/arch/xen/x86_64/kernel/apic.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/e820.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/entry.S linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head.S linux-2.6-xen-sparse/arch/xen/x86_64/kernel/ioport.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/irq.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/pci-dma.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/process.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup64.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smp.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/traps.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/xen_entry.S linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c linux-2.6-xen-sparse/arch/xen/x86_64/mm/ioremap.c linux-2.6-xen-sparse/drivers/xen/Makefile linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c linux-2.6-xen-sparse/drivers/xen/blkback/Makefile linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c linux-2.6-xen-sparse/drivers/xen/blkback/common.h linux-2.6-xen-sparse/drivers/xen/blkback/control.c linux-2.6-xen-sparse/drivers/xen/blkback/interface.c linux-2.6-xen-sparse/drivers/xen/blkback/vbd.c linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c linux-2.6-xen-sparse/drivers/xen/blkfront/block.h linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c linux-2.6-xen-sparse/drivers/xen/blktap/blktap.h linux-2.6-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c linux-2.6-xen-sparse/drivers/xen/blktap/blktap_datapath.c linux-2.6-xen-sparse/drivers/xen/blktap/blktap_userdev.c linux-2.6-xen-sparse/drivers/xen/netback/common.h linux-2.6-xen-sparse/drivers/xen/netback/interface.c linux-2.6-xen-sparse/drivers/xen/netback/netback.c linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c linux-2.6-xen-sparse/drivers/xen/usbback/common.h linux-2.6-xen-sparse/drivers/xen/usbback/interface.c linux-2.6-xen-sparse/drivers/xen/usbback/usbback.c linux-2.6-xen-sparse/drivers/xen/usbfront/usbfront.c linux-2.6-xen-sparse/drivers/xen/usbfront/xhci.h linux-2.6-xen-sparse/drivers/xen/xenbus/Makefile linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.h linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c linux-2.6-xen-sparse/include/asm-generic/pgtable.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/fixmap.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/hypercall.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/mach-xen/irq_vectors.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/mmu_context.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/page.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgalloc.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-3level.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/io.h linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/irq_vectors.h linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/processor.h linux-2.6-xen-sparse/include/asm-xen/balloon.h linux-2.6-xen-sparse/include/asm-xen/ctrl_if.h linux-2.6-xen-sparse/include/asm-xen/evtchn.h linux-2.6-xen-sparse/include/asm-xen/gnttab.h linux-2.6-xen-sparse/include/asm-xen/hypervisor.h linux-2.6-xen-sparse/include/asm-xen/xenbus.h linux-2.6-xen-sparse/include/linux/highmem.h linux-2.6-xen-sparse/include/linux/mm.h linux-2.6-xen-sparse/kernel/irq/manage.c linux-2.6-xen-sparse/mkbuildtree linux-2.6-xen-sparse/mm/highmem.c linux-2.6-xen-sparse/mm/memory.c patches/linux-2.6.12/smp-alts.patch tools/Makefile tools/blktap/blktaplib.c tools/debugger/gdb/gdbbuild tools/debugger/libxendebug/xendebug.c tools/debugger/pdb/Makefile tools/debugger/pdb/PDB.ml tools/debugger/pdb/Process.ml tools/debugger/pdb/Process.mli tools/debugger/pdb/Util.ml tools/debugger/pdb/Xen_domain.ml tools/debugger/pdb/Xen_domain.mli tools/debugger/pdb/debugger.ml tools/debugger/pdb/linux-2.6-module/Makefile tools/debugger/pdb/linux-2.6-module/debug.c tools/debugger/pdb/linux-2.6-module/module.c tools/debugger/pdb/linux-2.6-module/pdb_debug.h tools/debugger/pdb/linux-2.6-module/pdb_module.h tools/debugger/pdb/linux-2.6-patches/Makefile tools/debugger/pdb/linux-2.6-patches/i386_ksyms.patch tools/debugger/pdb/linux-2.6-patches/kdebug.patch tools/debugger/pdb/linux-2.6-patches/makefile.patch tools/debugger/pdb/linux-2.6-patches/ptrace.patch tools/debugger/pdb/linux-2.6-patches/traps.patch tools/debugger/pdb/pdb_caml_process.c tools/debugger/pdb/pdb_caml_xcs.c tools/debugger/pdb/pdb_caml_xen.h tools/debugger/pdb/readme tools/debugger/pdb/server.ml tools/examples/Makefile tools/examples/xend-config.sxp tools/examples/xmexample.vmx tools/firmware/rombios/rombios.c tools/ioemu/hw/ide.c tools/ioemu/hw/pckbd.c tools/ioemu/monitor.c tools/ioemu/target-i386-dm/Makefile tools/ioemu/target-i386-dm/helper2.c tools/ioemu/target-i386-dm/qemu-dm.debug tools/ioemu/vl.c tools/ioemu/vl.h tools/ioemu/vnc.c tools/libxc/Makefile tools/libxc/linux_boot_params.h tools/libxc/xc.h tools/libxc/xc_core.c tools/libxc/xc_domain.c tools/libxc/xc_gnttab.c tools/libxc/xc_linux_build.c tools/libxc/xc_linux_restore.c tools/libxc/xc_linux_save.c tools/libxc/xc_load_elf.c tools/libxc/xc_private.c tools/libxc/xc_private.h tools/libxc/xc_ptrace.c tools/libxc/xc_vmx_build.c tools/misc/Makefile tools/misc/policyprocessor/XmlToBinInterface.java tools/misc/xend tools/python/setup.py tools/python/xen/lowlevel/xc/xc.c tools/python/xen/lowlevel/xs/xs.c tools/python/xen/lowlevel/xu/xu.c tools/python/xen/sv/CreateDomain.py tools/python/xen/sv/DomInfo.py tools/python/xen/sv/GenTabbed.py tools/python/xen/sv/HTMLBase.py tools/python/xen/sv/Main.py tools/python/xen/sv/NodeInfo.py tools/python/xen/sv/RestoreDomain.py tools/python/xen/sv/Wizard.py tools/python/xen/sv/__init__.py tools/python/xen/sv/util.py tools/python/xen/web/SrvBase.py tools/python/xen/web/SrvDir.py tools/python/xen/web/__init__.py tools/python/xen/web/connection.py tools/python/xen/web/httpserver.py tools/python/xen/web/protocol.py tools/python/xen/web/reactor.py tools/python/xen/web/resource.py tools/python/xen/web/static.py tools/python/xen/web/tcp.py tools/python/xen/web/unix.py tools/python/xen/xend/Args.py tools/python/xen/xend/EventServer.py tools/python/xen/xend/PrettyPrint.py tools/python/xen/xend/Vifctl.py tools/python/xen/xend/XendCheckpoint.py tools/python/xen/xend/XendClient.py tools/python/xen/xend/XendDB.py tools/python/xen/xend/XendDmesg.py tools/python/xen/xend/XendDomain.py tools/python/xen/xend/XendDomainInfo.py tools/python/xen/xend/XendError.py tools/python/xen/xend/XendLogging.py tools/python/xen/xend/XendNode.py tools/python/xen/xend/XendProtocol.py tools/python/xen/xend/XendRoot.py tools/python/xen/xend/XendVnet.py tools/python/xen/xend/encode.py tools/python/xen/xend/image.py tools/python/xen/xend/scheduler.py tools/python/xen/xend/server/SrvDaemon.py tools/python/xen/xend/server/SrvDmesg.py tools/python/xen/xend/server/SrvDomain.py tools/python/xen/xend/server/SrvDomainDir.py tools/python/xen/xend/server/SrvNode.py tools/python/xen/xend/server/SrvRoot.py tools/python/xen/xend/server/SrvServer.py tools/python/xen/xend/server/SrvVnetDir.py tools/python/xen/xend/server/SrvXendLog.py tools/python/xen/xend/server/blkif.py tools/python/xen/xend/server/channel.py tools/python/xen/xend/server/controller.py tools/python/xen/xend/server/event.py tools/python/xen/xend/server/messages.py tools/python/xen/xend/server/netif.py tools/python/xen/xend/server/params.py tools/python/xen/xend/server/pciif.py tools/python/xen/xend/server/relocate.py tools/python/xen/xend/sxp.py tools/python/xen/xend/uuid.py tools/python/xen/xend/xenstore/__init__.py tools/python/xen/xend/xenstore/xsnode.py tools/python/xen/xend/xenstore/xsobj.py tools/python/xen/xend/xenstore/xsresource.py tools/python/xen/xm/create.py tools/python/xen/xm/destroy.py tools/python/xen/xm/help.py tools/python/xen/xm/main.py tools/python/xen/xm/migrate.py tools/python/xen/xm/opts.py tools/python/xen/xm/shutdown.py tools/python/xen/xm/sysrq.py tools/security/secpol_tool.c tools/sv/Makefile tools/sv/images/destroy.png tools/sv/images/finish.png tools/sv/images/next.png tools/sv/images/pause.png tools/sv/images/previous.png tools/sv/images/reboot.png tools/sv/images/shutdown.png tools/sv/images/small-destroy.png tools/sv/images/small-pause.png tools/sv/images/small-unpause.png tools/sv/images/unpause.png tools/sv/images/xen.png tools/sv/inc/script.js tools/sv/inc/style.css tools/sv/index.psp tools/xcs/xcs.h tools/xcutils/xc_restore.c tools/xenstore/Makefile tools/xenstore/TODO tools/xenstore/testsuite/test.sh tools/xenstore/utils.c tools/xenstore/utils.h tools/xenstore/xenstored_core.c tools/xenstore/xenstored_core.h tools/xenstore/xenstored_domain.c tools/xenstore/xenstored_domain.h tools/xenstore/xenstored_transaction.c tools/xenstore/xenstored_transaction.h tools/xenstore/xenstored_watch.c tools/xenstore/xenstored_watch.h tools/xenstore/xs.c tools/xenstore/xs.h tools/xenstore/xs_lib.c tools/xenstore/xs_lib.h tools/xenstore/xs_random.c tools/xenstore/xs_test.c tools/xentrace/xentrace.c xen/Makefile xen/Rules.mk xen/acm/acm_core.c xen/acm/acm_policy.c xen/arch/ia64/Makefile xen/arch/ia64/Rules.mk xen/arch/ia64/asm-offsets.c xen/arch/ia64/asm-xsi-offsets.c xen/arch/ia64/dom_fw.c xen/arch/ia64/domain.c xen/arch/ia64/grant_table.c xen/arch/ia64/hypercall.c xen/arch/ia64/hyperprivop.S xen/arch/ia64/linux-xen/efi.c xen/arch/ia64/linux-xen/entry.S xen/arch/ia64/linux-xen/entry.h xen/arch/ia64/linux-xen/head.S xen/arch/ia64/linux-xen/irq_ia64.c xen/arch/ia64/linux-xen/mm_contig.c xen/arch/ia64/linux-xen/pal.S xen/arch/ia64/linux-xen/setup.c xen/arch/ia64/linux-xen/time.c xen/arch/ia64/linux-xen/tlb.c xen/arch/ia64/linux-xen/unaligned.c xen/arch/ia64/linux/cmdline.c xen/arch/ia64/linux/efi_stub.S xen/arch/ia64/linux/extable.c xen/arch/ia64/linux/hpsim.S xen/arch/ia64/linux/ia64_ksyms.c xen/arch/ia64/linux/irq_lsapic.c xen/arch/ia64/linux/lib/Makefile xen/arch/ia64/linux/lib/bitop.c xen/arch/ia64/linux/lib/carta_random.S xen/arch/ia64/linux/lib/checksum.c xen/arch/ia64/linux/lib/clear_page.S xen/arch/ia64/linux/lib/clear_user.S xen/arch/ia64/linux/lib/copy_page.S xen/arch/ia64/linux/lib/copy_page_mck.S xen/arch/ia64/linux/lib/copy_user.S xen/arch/ia64/linux/lib/csum_partial_copy.c xen/arch/ia64/linux/lib/dec_and_lock.c xen/arch/ia64/linux/lib/do_csum.S xen/arch/ia64/linux/lib/flush.S xen/arch/ia64/linux/lib/idiv32.S xen/arch/ia64/linux/lib/idiv64.S xen/arch/ia64/linux/lib/io.c xen/arch/ia64/linux/lib/ip_fast_csum.S xen/arch/ia64/linux/lib/memcpy.S xen/arch/ia64/linux/lib/memcpy_mck.S xen/arch/ia64/linux/lib/memset.S xen/arch/ia64/linux/lib/strlen.S xen/arch/ia64/linux/lib/strlen_user.S xen/arch/ia64/linux/lib/strncpy_from_user.S xen/arch/ia64/linux/lib/strnlen_user.S xen/arch/ia64/linux/lib/xor.S xen/arch/ia64/linux/linuxextable.c xen/arch/ia64/linux/machvec.c xen/arch/ia64/linux/minstate.h xen/arch/ia64/linux/patch.c xen/arch/ia64/linux/pcdp.h xen/arch/ia64/linux/sal.c xen/arch/ia64/mmio.c xen/arch/ia64/pal_emul.c xen/arch/ia64/patch/linux-2.6.11/irq_ia64.c xen/arch/ia64/patch/linux-2.6.11/kregs.h xen/arch/ia64/pcdp.c xen/arch/ia64/process.c xen/arch/ia64/regionreg.c xen/arch/ia64/tools/mkbuildtree xen/arch/ia64/vcpu.c xen/arch/ia64/vlsapic.c xen/arch/ia64/vmmu.c xen/arch/ia64/vmx_hypercall.c xen/arch/ia64/vmx_ivt.S xen/arch/ia64/vmx_support.c xen/arch/ia64/vmx_vcpu.c xen/arch/ia64/vmx_virt.c xen/arch/ia64/vtlb.c xen/arch/ia64/xen.lds.S xen/arch/ia64/xenasm.S xen/arch/ia64/xenmem.c xen/arch/ia64/xenmisc.c xen/arch/ia64/xensetup.c xen/arch/ia64/xentime.c xen/arch/x86/Makefile xen/arch/x86/acpi/boot.c xen/arch/x86/apic.c xen/arch/x86/audit.c xen/arch/x86/cpu/common.c xen/arch/x86/dom0_ops.c xen/arch/x86/domain.c xen/arch/x86/domain_build.c xen/arch/x86/genapic/es7000plat.c xen/arch/x86/i8259.c xen/arch/x86/io_apic.c xen/arch/x86/mm.c xen/arch/x86/mpparse.c xen/arch/x86/setup.c xen/arch/x86/shadow.c xen/arch/x86/shadow32.c xen/arch/x86/shadow_public.c xen/arch/x86/smpboot.c xen/arch/x86/time.c xen/arch/x86/traps.c xen/arch/x86/vmx.c xen/arch/x86/vmx_intercept.c xen/arch/x86/vmx_io.c xen/arch/x86/vmx_platform.c xen/arch/x86/vmx_vmcs.c xen/arch/x86/x86_32/entry.S xen/arch/x86/x86_32/mm.c xen/arch/x86/x86_32/traps.c xen/arch/x86/x86_64/entry.S xen/arch/x86/x86_64/mm.c xen/arch/x86/x86_64/traps.c xen/common/ac_timer.c xen/common/dom0_ops.c xen/common/dom_mem_ops.c xen/common/domain.c xen/common/grant_table.c xen/common/page_alloc.c xen/common/perfc.c xen/common/sched_sedf.c xen/common/symbols.c xen/common/xmalloc.c xen/drivers/char/console.c xen/drivers/char/ns16550.c xen/include/acm/acm_core.h xen/include/acm/acm_hooks.h xen/include/asm-ia64/config.h xen/include/asm-ia64/domain.h xen/include/asm-ia64/event.h xen/include/asm-ia64/ia64_int.h xen/include/asm-ia64/linux-xen/asm/gcc_intrin.h xen/include/asm-ia64/linux-xen/asm/hpsim_ssc.h xen/include/asm-ia64/linux-xen/asm/ia64regs.h xen/include/asm-ia64/linux-xen/asm/io.h xen/include/asm-ia64/linux-xen/asm/kregs.h xen/include/asm-ia64/linux-xen/asm/mca_asm.h xen/include/asm-ia64/linux-xen/asm/page.h xen/include/asm-ia64/linux-xen/asm/pal.h xen/include/asm-ia64/linux-xen/asm/pgalloc.h xen/include/asm-ia64/linux-xen/asm/processor.h xen/include/asm-ia64/linux-xen/asm/ptrace.h xen/include/asm-ia64/linux-xen/asm/sn/sn_sal.h xen/include/asm-ia64/linux-xen/asm/system.h xen/include/asm-ia64/linux-xen/asm/types.h xen/include/asm-ia64/linux-xen/asm/uaccess.h xen/include/asm-ia64/linux-xen/linux/cpumask.h xen/include/asm-ia64/linux-xen/linux/hardirq.h xen/include/asm-ia64/linux-xen/linux/interrupt.h xen/include/asm-ia64/linux/asm-generic/bug.h xen/include/asm-ia64/linux/asm-generic/div64.h xen/include/asm-ia64/linux/asm-generic/errno-base.h xen/include/asm-ia64/linux/asm-generic/errno.h xen/include/asm-ia64/linux/asm-generic/ide_iops.h xen/include/asm-ia64/linux/asm-generic/iomap.h xen/include/asm-ia64/linux/asm-generic/pci-dma-compat.h xen/include/asm-ia64/linux/asm-generic/pci.h xen/include/asm-ia64/linux/asm-generic/pgtable-nopud.h xen/include/asm-ia64/linux/asm-generic/pgtable.h xen/include/asm-ia64/linux/asm-generic/sections.h xen/include/asm-ia64/linux/asm-generic/topology.h xen/include/asm-ia64/linux/asm-generic/vmlinux.lds.h xen/include/asm-ia64/linux/asm/acpi.h xen/include/asm-ia64/linux/asm/asmmacro.h xen/include/asm-ia64/linux/asm/atomic.h xen/include/asm-ia64/linux/asm/bitops.h xen/include/asm-ia64/linux/asm/break.h xen/include/asm-ia64/linux/asm/bug.h xen/include/asm-ia64/linux/asm/byteorder.h xen/include/asm-ia64/linux/asm/cache.h xen/include/asm-ia64/linux/asm/cacheflush.h xen/include/asm-ia64/linux/asm/checksum.h xen/include/asm-ia64/linux/asm/current.h xen/include/asm-ia64/linux/asm/delay.h xen/include/asm-ia64/linux/asm/desc.h xen/include/asm-ia64/linux/asm/div64.h xen/include/asm-ia64/linux/asm/dma-mapping.h xen/include/asm-ia64/linux/asm/dma.h xen/include/asm-ia64/linux/asm/errno.h xen/include/asm-ia64/linux/asm/fpu.h xen/include/asm-ia64/linux/asm/hardirq.h xen/include/asm-ia64/linux/asm/hdreg.h xen/include/asm-ia64/linux/asm/hw_irq.h xen/include/asm-ia64/linux/asm/ia32.h xen/include/asm-ia64/linux/asm/intrinsics.h xen/include/asm-ia64/linux/asm/ioctl.h xen/include/asm-ia64/linux/asm/irq.h xen/include/asm-ia64/linux/asm/linkage.h xen/include/asm-ia64/linux/asm/machvec.h xen/include/asm-ia64/linux/asm/machvec_hpsim.h xen/include/asm-ia64/linux/asm/mca.h xen/include/asm-ia64/linux/asm/meminit.h xen/include/asm-ia64/linux/asm/mman.h xen/include/asm-ia64/linux/asm/module.h xen/include/asm-ia64/linux/asm/numa.h xen/include/asm-ia64/linux/asm/param.h xen/include/asm-ia64/linux/asm/patch.h xen/include/asm-ia64/linux/asm/pci.h xen/include/asm-ia64/linux/asm/pdb.h xen/include/asm-ia64/linux/asm/percpu.h xen/include/asm-ia64/linux/asm/pgtable.h xen/include/asm-ia64/linux/asm/ptrace_offsets.h xen/include/asm-ia64/linux/asm/rse.h xen/include/asm-ia64/linux/asm/rwsem.h xen/include/asm-ia64/linux/asm/sal.h xen/include/asm-ia64/linux/asm/scatterlist.h xen/include/asm-ia64/linux/asm/sections.h xen/include/asm-ia64/linux/asm/semaphore.h xen/include/asm-ia64/linux/asm/setup.h xen/include/asm-ia64/linux/asm/sigcontext.h xen/include/asm-ia64/linux/asm/signal.h xen/include/asm-ia64/linux/asm/smp.h xen/include/asm-ia64/linux/asm/sn/arch.h xen/include/asm-ia64/linux/asm/sn/geo.h xen/include/asm-ia64/linux/asm/sn/nodepda.h xen/include/asm-ia64/linux/asm/sn/sn_cpuid.h xen/include/asm-ia64/linux/asm/spinlock.h xen/include/asm-ia64/linux/asm/string.h xen/include/asm-ia64/linux/asm/thread_info.h xen/include/asm-ia64/linux/asm/timex.h xen/include/asm-ia64/linux/asm/tlbflush.h xen/include/asm-ia64/linux/asm/topology.h xen/include/asm-ia64/linux/asm/unaligned.h xen/include/asm-ia64/linux/asm/unistd.h xen/include/asm-ia64/linux/asm/unwind.h xen/include/asm-ia64/linux/asm/ustack.h xen/include/asm-ia64/linux/bcd.h xen/include/asm-ia64/linux/bitmap.h xen/include/asm-ia64/linux/bitops.h xen/include/asm-ia64/linux/bootmem.h xen/include/asm-ia64/linux/byteorder/generic.h xen/include/asm-ia64/linux/byteorder/little_endian.h xen/include/asm-ia64/linux/byteorder/swab.h xen/include/asm-ia64/linux/cpu.h xen/include/asm-ia64/linux/device.h xen/include/asm-ia64/linux/dma-mapping.h xen/include/asm-ia64/linux/efi.h xen/include/asm-ia64/linux/err.h xen/include/asm-ia64/linux/file.h xen/include/asm-ia64/linux/gfp.h xen/include/asm-ia64/linux/initrd.h xen/include/asm-ia64/linux/ioport.h xen/include/asm-ia64/linux/jiffies.h xen/include/asm-ia64/linux/kernel_stat.h xen/include/asm-ia64/linux/kmalloc_sizes.h xen/include/asm-ia64/linux/linkage.h xen/include/asm-ia64/linux/linuxtime.h xen/include/asm-ia64/linux/mmzone.h xen/include/asm-ia64/linux/module.h xen/include/asm-ia64/linux/numa.h xen/include/asm-ia64/linux/page-flags.h xen/include/asm-ia64/linux/percpu.h xen/include/asm-ia64/linux/preempt.h xen/include/asm-ia64/linux/proc_fs.h xen/include/asm-ia64/linux/profile.h xen/include/asm-ia64/linux/ptrace.h xen/include/asm-ia64/linux/random.h xen/include/asm-ia64/linux/rbtree.h xen/include/asm-ia64/linux/rtc.h xen/include/asm-ia64/linux/rwsem.h xen/include/asm-ia64/linux/seq_file.h xen/include/asm-ia64/linux/seqlock.h xen/include/asm-ia64/linux/serial.h xen/include/asm-ia64/linux/serial_core.h xen/include/asm-ia64/linux/signal.h xen/include/asm-ia64/linux/slab.h xen/include/asm-ia64/linux/smp_lock.h xen/include/asm-ia64/linux/stddef.h xen/include/asm-ia64/linux/swap.h xen/include/asm-ia64/linux/thread_info.h xen/include/asm-ia64/linux/threads.h xen/include/asm-ia64/linux/timex.h xen/include/asm-ia64/linux/topology.h xen/include/asm-ia64/linux/tty.h xen/include/asm-ia64/linux/wait.h xen/include/asm-ia64/privop.h xen/include/asm-ia64/regionreg.h xen/include/asm-ia64/regs.h xen/include/asm-ia64/vcpu.h xen/include/asm-ia64/vmx.h xen/include/asm-ia64/vmx_uaccess.h xen/include/asm-ia64/vmx_vcpu.h xen/include/asm-ia64/vmx_vpd.h xen/include/asm-ia64/xensystem.h xen/include/asm-x86/apicdef.h xen/include/asm-x86/config.h xen/include/asm-x86/event.h xen/include/asm-x86/fixmap.h xen/include/asm-x86/genapic.h xen/include/asm-x86/hpet.h xen/include/asm-x86/io.h xen/include/asm-x86/mach-bigsmp/mach_apic.h xen/include/asm-x86/mach-default/mach_apic.h xen/include/asm-x86/mach-es7000/mach_apic.h xen/include/asm-x86/mach-generic/mach_apic.h xen/include/asm-x86/mach-summit/mach_apic.h xen/include/asm-x86/mach-summit/mach_mpparse.h xen/include/asm-x86/mm.h xen/include/asm-x86/page.h xen/include/asm-x86/shadow.h xen/include/asm-x86/shadow_64.h xen/include/asm-x86/shadow_public.h xen/include/asm-x86/time.h xen/include/asm-x86/types.h xen/include/asm-x86/vmx.h xen/include/asm-x86/vmx_virpit.h xen/include/asm-x86/vmx_vmcs.h xen/include/asm-x86/x86_32/uaccess.h xen/include/asm-x86/x86_64/page.h xen/include/public/arch-ia64.h xen/include/public/dom0_ops.h xen/include/public/grant_table.h xen/include/public/io/blkif.h xen/include/public/io/domain_controller.h xen/include/public/io/netif.h xen/include/public/trace.h xen/include/public/xen.h xen/include/xen/ac_timer.h xen/include/xen/domain.h xen/include/xen/event.h xen/include/xen/grant_table.h xen/include/xen/mm.h xen/include/xen/perfc_defn.h xen/include/xen/sched.h xen/include/xen/serial.h xen/include/xen/symbols.h xen/include/xen/time.h xen/include/xen/trace.h xen/tools/Makefile xen/tools/symbols.c
line diff
     1.1 --- a/xen/arch/ia64/Makefile	Mon Aug 15 12:28:07 2005 -0800
     1.2 +++ b/xen/arch/ia64/Makefile	Tue Aug 16 12:15:23 2005 +0800
     1.3 @@ -1,5 +1,7 @@
     1.4  include $(BASEDIR)/Rules.mk
     1.5  
     1.6 +VPATH = linux linux-xen
     1.7 +
     1.8  # libs-y	+= arch/ia64/lib/lib.a
     1.9  
    1.10  OBJS = xensetup.o setup.o time.o irq.o ia64_ksyms.o process.o smp.o \
    1.11 @@ -75,7 +77,7 @@ xen.lds.s: xen.lds.S
    1.12  		-o xen.lds.s xen.lds.S
    1.13  
    1.14  ia64lib.o:
    1.15 -	$(MAKE) -C lib && cp lib/ia64lib.o .
    1.16 +	$(MAKE) -C linux/lib && cp linux/lib/ia64lib.o .
    1.17  
    1.18  clean:
    1.19  	rm -f *.o *~ core  xen.lds.s $(BASEDIR)/include/asm-ia64/.offsets.h.stamp asm-offsets.s
     2.1 --- a/xen/arch/ia64/Rules.mk	Mon Aug 15 12:28:07 2005 -0800
     2.2 +++ b/xen/arch/ia64/Rules.mk	Tue Aug 16 12:15:23 2005 +0800
     2.3 @@ -6,14 +6,21 @@ ifneq ($(COMPILE_ARCH),$(TARGET_ARCH))
     2.4  CROSS_COMPILE ?= /usr/local/sp_env/v2.2.5/i686/bin/ia64-unknown-linux-
     2.5  endif
     2.6  AFLAGS  += -D__ASSEMBLY__
     2.7 -CPPFLAGS  += -I$(BASEDIR)/include -I$(BASEDIR)/include/asm-ia64
     2.8 +CPPFLAGS  += -I$(BASEDIR)/include -I$(BASEDIR)/include/asm-ia64 	\
     2.9 +             -I$(BASEDIR)/include/asm-ia64/linux 			\
    2.10 +	     -I$(BASEDIR)/include/asm-ia64/linux-xen 			\
    2.11 +             -I$(BASEDIR)/arch/ia64/linux -I$(BASEDIR)/arch/ia64/linux-xen
    2.12 +
    2.13  CFLAGS  := -nostdinc -fno-builtin -fno-common -fno-strict-aliasing
    2.14  #CFLAGS  += -O3		# -O3 over-inlines making debugging tough!
    2.15  CFLAGS  += -O2		# but no optimization causes compile errors!
    2.16  #CFLAGS  += -iwithprefix include -Wall -DMONITOR_BASE=$(MONITOR_BASE)
    2.17  CFLAGS  += -iwithprefix include -Wall
    2.18  CFLAGS  += -fomit-frame-pointer -I$(BASEDIR)/include -D__KERNEL__
    2.19 -CFLAGS  += -I$(BASEDIR)/include/asm-ia64
    2.20 +CFLAGS  += -I$(BASEDIR)/include/asm-ia64 -I$(BASEDIR)/include/asm-ia64/linux \
    2.21 +           -I$(BASEDIR)/include/asm-ia64/linux 				\
    2.22 +           -I$(BASEDIR)/include/asm-ia64/linux-xen 			\
    2.23 +           -I$(BASEDIR)/arch/ia64/linux -I$(BASEDIR)/arch/ia64/linux-xen
    2.24  CFLAGS  += -Wno-pointer-arith -Wredundant-decls
    2.25  CFLAGS  += -DIA64 -DXEN -DLINUX_2_6
    2.26  CFLAGS	+= -ffixed-r13 -mfixed-range=f12-f15,f32-f127
     3.1 --- a/xen/arch/ia64/asm-offsets.c	Mon Aug 15 12:28:07 2005 -0800
     3.2 +++ b/xen/arch/ia64/asm-offsets.c	Tue Aug 16 12:15:23 2005 +0800
     3.3 @@ -139,14 +139,14 @@ void foo(void)
     3.4  	DEFINE(IA64_PT_REGS_R2_OFFSET, offsetof (struct pt_regs, r2));
     3.5  	DEFINE(IA64_PT_REGS_R3_OFFSET, offsetof (struct pt_regs, r3));
     3.6  #ifdef CONFIG_VTI
     3.7 -	DEFINE(IA64_PT_REGS_R4_OFFSET, offsetof (struct xen_regs, r4));
     3.8 -	DEFINE(IA64_PT_REGS_R5_OFFSET, offsetof (struct xen_regs, r5));
     3.9 -	DEFINE(IA64_PT_REGS_R6_OFFSET, offsetof (struct xen_regs, r6));
    3.10 -	DEFINE(IA64_PT_REGS_R7_OFFSET, offsetof (struct xen_regs, r7));
    3.11 -	DEFINE(IA64_PT_REGS_CR_IIPA_OFFSET, offsetof (struct xen_regs, cr_iipa));
    3.12 -	DEFINE(IA64_PT_REGS_CR_ISR_OFFSET, offsetof (struct xen_regs, cr_isr));
    3.13 -	DEFINE(IA64_PT_REGS_EML_UNAT_OFFSET, offsetof (struct xen_regs, eml_unat));
    3.14 -	DEFINE(IA64_PT_REGS_RFI_PFS_OFFSET, offsetof (struct xen_regs, rfi_pfs));
    3.15 +	DEFINE(IA64_PT_REGS_R4_OFFSET, offsetof (struct pt_regs, r4));
    3.16 +	DEFINE(IA64_PT_REGS_R5_OFFSET, offsetof (struct pt_regs, r5));
    3.17 +	DEFINE(IA64_PT_REGS_R6_OFFSET, offsetof (struct pt_regs, r6));
    3.18 +	DEFINE(IA64_PT_REGS_R7_OFFSET, offsetof (struct pt_regs, r7));
    3.19 +	DEFINE(IA64_PT_REGS_CR_IIPA_OFFSET, offsetof (struct pt_regs, cr_iipa));
    3.20 +	DEFINE(IA64_PT_REGS_CR_ISR_OFFSET, offsetof (struct pt_regs, cr_isr));
    3.21 +	DEFINE(IA64_PT_REGS_EML_UNAT_OFFSET, offsetof (struct pt_regs, eml_unat));
    3.22 +	DEFINE(IA64_PT_REGS_RFI_PFS_OFFSET, offsetof (struct pt_regs, rfi_pfs));
    3.23  	DEFINE(RFI_IIP_OFFSET, offsetof(struct vcpu, arch.arch_vmx.rfi_iip));
    3.24  	DEFINE(RFI_IPSR_OFFSET, offsetof(struct vcpu, arch.arch_vmx.rfi_ipsr));
    3.25  	DEFINE(RFI_IFS_OFFSET,offsetof(struct vcpu ,arch.arch_vmx.rfi_ifs));
     4.1 --- a/xen/arch/ia64/domain.c	Mon Aug 15 12:28:07 2005 -0800
     4.2 +++ b/xen/arch/ia64/domain.c	Tue Aug 16 12:15:23 2005 +0800
     4.3 @@ -450,7 +450,7 @@ void new_thread(struct vcpu *v,
     4.4                  unsigned long start_info)
     4.5  {
     4.6  	struct domain *d = v->domain;
     4.7 -	struct xen_regs *regs;
     4.8 +	struct pt_regs *regs;
     4.9  	struct ia64_boot_param *bp;
    4.10  	extern char saved_command_line[];
    4.11  	//char *dom0_cmdline = "BOOT_IMAGE=scsi0:\EFI\redhat\xenlinux nomca root=/dev/sdb1 ro";
     5.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     5.2 +++ b/xen/arch/ia64/linux-xen/efi.c	Tue Aug 16 12:15:23 2005 +0800
     5.3 @@ -0,0 +1,866 @@
     5.4 +/*
     5.5 + * Extensible Firmware Interface
     5.6 + *
     5.7 + * Based on Extensible Firmware Interface Specification version 0.9 April 30, 1999
     5.8 + *
     5.9 + * Copyright (C) 1999 VA Linux Systems
    5.10 + * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
    5.11 + * Copyright (C) 1999-2003 Hewlett-Packard Co.
    5.12 + *	David Mosberger-Tang <davidm@hpl.hp.com>
    5.13 + *	Stephane Eranian <eranian@hpl.hp.com>
    5.14 + *
    5.15 + * All EFI Runtime Services are not implemented yet as EFI only
    5.16 + * supports physical mode addressing on SoftSDV. This is to be fixed
    5.17 + * in a future version.  --drummond 1999-07-20
    5.18 + *
    5.19 + * Implemented EFI runtime services and virtual mode calls.  --davidm
    5.20 + *
    5.21 + * Goutham Rao: <goutham.rao@intel.com>
    5.22 + *	Skip non-WB memory and ignore empty memory ranges.
    5.23 + */
    5.24 +#include <linux/config.h>
    5.25 +#include <linux/module.h>
    5.26 +#include <linux/kernel.h>
    5.27 +#include <linux/init.h>
    5.28 +#include <linux/types.h>
    5.29 +#include <linux/time.h>
    5.30 +#include <linux/efi.h>
    5.31 +
    5.32 +#include <asm/io.h>
    5.33 +#include <asm/kregs.h>
    5.34 +#include <asm/meminit.h>
    5.35 +#include <asm/pgtable.h>
    5.36 +#include <asm/processor.h>
    5.37 +#include <asm/mca.h>
    5.38 +
    5.39 +#define EFI_DEBUG	0
    5.40 +
    5.41 +extern efi_status_t efi_call_phys (void *, ...);
    5.42 +
    5.43 +struct efi efi;
    5.44 +EXPORT_SYMBOL(efi);
    5.45 +static efi_runtime_services_t *runtime;
    5.46 +static unsigned long mem_limit = ~0UL, max_addr = ~0UL;
    5.47 +
    5.48 +#define efi_call_virt(f, args...)	(*(f))(args)
    5.49 +
    5.50 +#define STUB_GET_TIME(prefix, adjust_arg)							  \
    5.51 +static efi_status_t										  \
    5.52 +prefix##_get_time (efi_time_t *tm, efi_time_cap_t *tc)						  \
    5.53 +{												  \
    5.54 +	struct ia64_fpreg fr[6];								  \
    5.55 +	efi_time_cap_t *atc = NULL;								  \
    5.56 +	efi_status_t ret;									  \
    5.57 +												  \
    5.58 +	if (tc)											  \
    5.59 +		atc = adjust_arg(tc);								  \
    5.60 +	ia64_save_scratch_fpregs(fr);								  \
    5.61 +	ret = efi_call_##prefix((efi_get_time_t *) __va(runtime->get_time), adjust_arg(tm), atc); \
    5.62 +	ia64_load_scratch_fpregs(fr);								  \
    5.63 +	return ret;										  \
    5.64 +}
    5.65 +
    5.66 +#define STUB_SET_TIME(prefix, adjust_arg)							\
    5.67 +static efi_status_t										\
    5.68 +prefix##_set_time (efi_time_t *tm)								\
    5.69 +{												\
    5.70 +	struct ia64_fpreg fr[6];								\
    5.71 +	efi_status_t ret;									\
    5.72 +												\
    5.73 +	ia64_save_scratch_fpregs(fr);								\
    5.74 +	ret = efi_call_##prefix((efi_set_time_t *) __va(runtime->set_time), adjust_arg(tm));	\
    5.75 +	ia64_load_scratch_fpregs(fr);								\
    5.76 +	return ret;										\
    5.77 +}
    5.78 +
    5.79 +#define STUB_GET_WAKEUP_TIME(prefix, adjust_arg)						\
    5.80 +static efi_status_t										\
    5.81 +prefix##_get_wakeup_time (efi_bool_t *enabled, efi_bool_t *pending, efi_time_t *tm)		\
    5.82 +{												\
    5.83 +	struct ia64_fpreg fr[6];								\
    5.84 +	efi_status_t ret;									\
    5.85 +												\
    5.86 +	ia64_save_scratch_fpregs(fr);								\
    5.87 +	ret = efi_call_##prefix((efi_get_wakeup_time_t *) __va(runtime->get_wakeup_time),	\
    5.88 +				adjust_arg(enabled), adjust_arg(pending), adjust_arg(tm));	\
    5.89 +	ia64_load_scratch_fpregs(fr);								\
    5.90 +	return ret;										\
    5.91 +}
    5.92 +
    5.93 +#define STUB_SET_WAKEUP_TIME(prefix, adjust_arg)						\
    5.94 +static efi_status_t										\
    5.95 +prefix##_set_wakeup_time (efi_bool_t enabled, efi_time_t *tm)					\
    5.96 +{												\
    5.97 +	struct ia64_fpreg fr[6];								\
    5.98 +	efi_time_t *atm = NULL;									\
    5.99 +	efi_status_t ret;									\
   5.100 +												\
   5.101 +	if (tm)											\
   5.102 +		atm = adjust_arg(tm);								\
   5.103 +	ia64_save_scratch_fpregs(fr);								\
   5.104 +	ret = efi_call_##prefix((efi_set_wakeup_time_t *) __va(runtime->set_wakeup_time),	\
   5.105 +				enabled, atm);							\
   5.106 +	ia64_load_scratch_fpregs(fr);								\
   5.107 +	return ret;										\
   5.108 +}
   5.109 +
   5.110 +#define STUB_GET_VARIABLE(prefix, adjust_arg)						\
   5.111 +static efi_status_t									\
   5.112 +prefix##_get_variable (efi_char16_t *name, efi_guid_t *vendor, u32 *attr,		\
   5.113 +		       unsigned long *data_size, void *data)				\
   5.114 +{											\
   5.115 +	struct ia64_fpreg fr[6];							\
   5.116 +	u32 *aattr = NULL;									\
   5.117 +	efi_status_t ret;								\
   5.118 +											\
   5.119 +	if (attr)									\
   5.120 +		aattr = adjust_arg(attr);						\
   5.121 +	ia64_save_scratch_fpregs(fr);							\
   5.122 +	ret = efi_call_##prefix((efi_get_variable_t *) __va(runtime->get_variable),	\
   5.123 +				adjust_arg(name), adjust_arg(vendor), aattr,		\
   5.124 +				adjust_arg(data_size), adjust_arg(data));		\
   5.125 +	ia64_load_scratch_fpregs(fr);							\
   5.126 +	return ret;									\
   5.127 +}
   5.128 +
   5.129 +#define STUB_GET_NEXT_VARIABLE(prefix, adjust_arg)						\
   5.130 +static efi_status_t										\
   5.131 +prefix##_get_next_variable (unsigned long *name_size, efi_char16_t *name, efi_guid_t *vendor)	\
   5.132 +{												\
   5.133 +	struct ia64_fpreg fr[6];								\
   5.134 +	efi_status_t ret;									\
   5.135 +												\
   5.136 +	ia64_save_scratch_fpregs(fr);								\
   5.137 +	ret = efi_call_##prefix((efi_get_next_variable_t *) __va(runtime->get_next_variable),	\
   5.138 +				adjust_arg(name_size), adjust_arg(name), adjust_arg(vendor));	\
   5.139 +	ia64_load_scratch_fpregs(fr);								\
   5.140 +	return ret;										\
   5.141 +}
   5.142 +
   5.143 +#define STUB_SET_VARIABLE(prefix, adjust_arg)						\
   5.144 +static efi_status_t									\
   5.145 +prefix##_set_variable (efi_char16_t *name, efi_guid_t *vendor, unsigned long attr,	\
   5.146 +		       unsigned long data_size, void *data)				\
   5.147 +{											\
   5.148 +	struct ia64_fpreg fr[6];							\
   5.149 +	efi_status_t ret;								\
   5.150 +											\
   5.151 +	ia64_save_scratch_fpregs(fr);							\
   5.152 +	ret = efi_call_##prefix((efi_set_variable_t *) __va(runtime->set_variable),	\
   5.153 +				adjust_arg(name), adjust_arg(vendor), attr, data_size,	\
   5.154 +				adjust_arg(data));					\
   5.155 +	ia64_load_scratch_fpregs(fr);							\
   5.156 +	return ret;									\
   5.157 +}
   5.158 +
   5.159 +#define STUB_GET_NEXT_HIGH_MONO_COUNT(prefix, adjust_arg)					\
   5.160 +static efi_status_t										\
   5.161 +prefix##_get_next_high_mono_count (u32 *count)							\
   5.162 +{												\
   5.163 +	struct ia64_fpreg fr[6];								\
   5.164 +	efi_status_t ret;									\
   5.165 +												\
   5.166 +	ia64_save_scratch_fpregs(fr);								\
   5.167 +	ret = efi_call_##prefix((efi_get_next_high_mono_count_t *)				\
   5.168 +				__va(runtime->get_next_high_mono_count), adjust_arg(count));	\
   5.169 +	ia64_load_scratch_fpregs(fr);								\
   5.170 +	return ret;										\
   5.171 +}
   5.172 +
   5.173 +#define STUB_RESET_SYSTEM(prefix, adjust_arg)					\
   5.174 +static void									\
   5.175 +prefix##_reset_system (int reset_type, efi_status_t status,			\
   5.176 +		       unsigned long data_size, efi_char16_t *data)		\
   5.177 +{										\
   5.178 +	struct ia64_fpreg fr[6];						\
   5.179 +	efi_char16_t *adata = NULL;						\
   5.180 +										\
   5.181 +	if (data)								\
   5.182 +		adata = adjust_arg(data);					\
   5.183 +										\
   5.184 +	ia64_save_scratch_fpregs(fr);						\
   5.185 +	efi_call_##prefix((efi_reset_system_t *) __va(runtime->reset_system),	\
   5.186 +			  reset_type, status, data_size, adata);		\
   5.187 +	/* should not return, but just in case... */				\
   5.188 +	ia64_load_scratch_fpregs(fr);						\
   5.189 +}
   5.190 +
   5.191 +#define phys_ptr(arg)	((__typeof__(arg)) ia64_tpa(arg))
   5.192 +
   5.193 +STUB_GET_TIME(phys, phys_ptr)
   5.194 +STUB_SET_TIME(phys, phys_ptr)
   5.195 +STUB_GET_WAKEUP_TIME(phys, phys_ptr)
   5.196 +STUB_SET_WAKEUP_TIME(phys, phys_ptr)
   5.197 +STUB_GET_VARIABLE(phys, phys_ptr)
   5.198 +STUB_GET_NEXT_VARIABLE(phys, phys_ptr)
   5.199 +STUB_SET_VARIABLE(phys, phys_ptr)
   5.200 +STUB_GET_NEXT_HIGH_MONO_COUNT(phys, phys_ptr)
   5.201 +STUB_RESET_SYSTEM(phys, phys_ptr)
   5.202 +
   5.203 +#define id(arg)	arg
   5.204 +
   5.205 +STUB_GET_TIME(virt, id)
   5.206 +STUB_SET_TIME(virt, id)
   5.207 +STUB_GET_WAKEUP_TIME(virt, id)
   5.208 +STUB_SET_WAKEUP_TIME(virt, id)
   5.209 +STUB_GET_VARIABLE(virt, id)
   5.210 +STUB_GET_NEXT_VARIABLE(virt, id)
   5.211 +STUB_SET_VARIABLE(virt, id)
   5.212 +STUB_GET_NEXT_HIGH_MONO_COUNT(virt, id)
   5.213 +STUB_RESET_SYSTEM(virt, id)
   5.214 +
   5.215 +void
   5.216 +efi_gettimeofday (struct timespec *ts)
   5.217 +{
   5.218 +	efi_time_t tm;
   5.219 +
   5.220 +	memset(ts, 0, sizeof(ts));
   5.221 +	if ((*efi.get_time)(&tm, NULL) != EFI_SUCCESS)
   5.222 +		return;
   5.223 +
   5.224 +	ts->tv_sec = mktime(tm.year, tm.month, tm.day, tm.hour, tm.minute, tm.second);
   5.225 +	ts->tv_nsec = tm.nanosecond;
   5.226 +}
   5.227 +
   5.228 +static int
   5.229 +is_available_memory (efi_memory_desc_t *md)
   5.230 +{
   5.231 +	if (!(md->attribute & EFI_MEMORY_WB))
   5.232 +		return 0;
   5.233 +
   5.234 +	switch (md->type) {
   5.235 +	      case EFI_LOADER_CODE:
   5.236 +	      case EFI_LOADER_DATA:
   5.237 +	      case EFI_BOOT_SERVICES_CODE:
   5.238 +	      case EFI_BOOT_SERVICES_DATA:
   5.239 +	      case EFI_CONVENTIONAL_MEMORY:
   5.240 +		return 1;
   5.241 +	}
   5.242 +	return 0;
   5.243 +}
   5.244 +
   5.245 +/*
   5.246 + * Trim descriptor MD so its starts at address START_ADDR.  If the descriptor covers
   5.247 + * memory that is normally available to the kernel, issue a warning that some memory
   5.248 + * is being ignored.
   5.249 + */
   5.250 +static void
   5.251 +trim_bottom (efi_memory_desc_t *md, u64 start_addr)
   5.252 +{
   5.253 +	u64 num_skipped_pages;
   5.254 +
   5.255 +	if (md->phys_addr >= start_addr || !md->num_pages)
   5.256 +		return;
   5.257 +
   5.258 +	num_skipped_pages = (start_addr - md->phys_addr) >> EFI_PAGE_SHIFT;
   5.259 +	if (num_skipped_pages > md->num_pages)
   5.260 +		num_skipped_pages = md->num_pages;
   5.261 +
   5.262 +	if (is_available_memory(md))
   5.263 +		printk(KERN_NOTICE "efi.%s: ignoring %luKB of memory at 0x%lx due to granule hole "
   5.264 +		       "at 0x%lx\n", __FUNCTION__,
   5.265 +		       (num_skipped_pages << EFI_PAGE_SHIFT) >> 10,
   5.266 +		       md->phys_addr, start_addr - IA64_GRANULE_SIZE);
   5.267 +	/*
   5.268 +	 * NOTE: Don't set md->phys_addr to START_ADDR because that could cause the memory
   5.269 +	 * descriptor list to become unsorted.  In such a case, md->num_pages will be
   5.270 +	 * zero, so the Right Thing will happen.
   5.271 +	 */
   5.272 +	md->phys_addr += num_skipped_pages << EFI_PAGE_SHIFT;
   5.273 +	md->num_pages -= num_skipped_pages;
   5.274 +}
   5.275 +
   5.276 +static void
   5.277 +trim_top (efi_memory_desc_t *md, u64 end_addr)
   5.278 +{
   5.279 +	u64 num_dropped_pages, md_end_addr;
   5.280 +
   5.281 +	md_end_addr = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
   5.282 +
   5.283 +	if (md_end_addr <= end_addr || !md->num_pages)
   5.284 +		return;
   5.285 +
   5.286 +	num_dropped_pages = (md_end_addr - end_addr) >> EFI_PAGE_SHIFT;
   5.287 +	if (num_dropped_pages > md->num_pages)
   5.288 +		num_dropped_pages = md->num_pages;
   5.289 +
   5.290 +	if (is_available_memory(md))
   5.291 +		printk(KERN_NOTICE "efi.%s: ignoring %luKB of memory at 0x%lx due to granule hole "
   5.292 +		       "at 0x%lx\n", __FUNCTION__,
   5.293 +		       (num_dropped_pages << EFI_PAGE_SHIFT) >> 10,
   5.294 +		       md->phys_addr, end_addr);
   5.295 +	md->num_pages -= num_dropped_pages;
   5.296 +}
   5.297 +
   5.298 +/*
   5.299 + * Walks the EFI memory map and calls CALLBACK once for each EFI memory descriptor that
   5.300 + * has memory that is available for OS use.
   5.301 + */
   5.302 +void
   5.303 +efi_memmap_walk (efi_freemem_callback_t callback, void *arg)
   5.304 +{
   5.305 +	int prev_valid = 0;
   5.306 +	struct range {
   5.307 +		u64 start;
   5.308 +		u64 end;
   5.309 +	} prev, curr;
   5.310 +	void *efi_map_start, *efi_map_end, *p, *q;
   5.311 +	efi_memory_desc_t *md, *check_md;
   5.312 +	u64 efi_desc_size, start, end, granule_addr, last_granule_addr, first_non_wb_addr = 0;
   5.313 +	unsigned long total_mem = 0;
   5.314 +
   5.315 +	efi_map_start = __va(ia64_boot_param->efi_memmap);
   5.316 +	efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
   5.317 +	efi_desc_size = ia64_boot_param->efi_memdesc_size;
   5.318 +
   5.319 +	for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
   5.320 +		md = p;
   5.321 +
   5.322 +		/* skip over non-WB memory descriptors; that's all we're interested in... */
   5.323 +		if (!(md->attribute & EFI_MEMORY_WB))
   5.324 +			continue;
   5.325 +
   5.326 +#ifdef XEN
   5.327 +// this works around a problem in the ski bootloader
   5.328 +{
   5.329 +		extern long running_on_sim;
   5.330 +		if (running_on_sim && md->type != EFI_CONVENTIONAL_MEMORY)
   5.331 +			continue;
   5.332 +}
   5.333 +// this is a temporary hack to avoid CONFIG_VIRTUAL_MEM_MAP
   5.334 +		if (md->phys_addr >= 0x100000000) continue;
   5.335 +#endif
   5.336 +		/*
   5.337 +		 * granule_addr is the base of md's first granule.
   5.338 +		 * [granule_addr - first_non_wb_addr) is guaranteed to
   5.339 +		 * be contiguous WB memory.
   5.340 +		 */
   5.341 +		granule_addr = GRANULEROUNDDOWN(md->phys_addr);
   5.342 +		first_non_wb_addr = max(first_non_wb_addr, granule_addr);
   5.343 +
   5.344 +		if (first_non_wb_addr < md->phys_addr) {
   5.345 +			trim_bottom(md, granule_addr + IA64_GRANULE_SIZE);
   5.346 +			granule_addr = GRANULEROUNDDOWN(md->phys_addr);
   5.347 +			first_non_wb_addr = max(first_non_wb_addr, granule_addr);
   5.348 +		}
   5.349 +
   5.350 +		for (q = p; q < efi_map_end; q += efi_desc_size) {
   5.351 +			check_md = q;
   5.352 +
   5.353 +			if ((check_md->attribute & EFI_MEMORY_WB) &&
   5.354 +			    (check_md->phys_addr == first_non_wb_addr))
   5.355 +				first_non_wb_addr += check_md->num_pages << EFI_PAGE_SHIFT;
   5.356 +			else
   5.357 +				break;		/* non-WB or hole */
   5.358 +		}
   5.359 +
   5.360 +		last_granule_addr = GRANULEROUNDDOWN(first_non_wb_addr);
   5.361 +		if (last_granule_addr < md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT))
   5.362 +			trim_top(md, last_granule_addr);
   5.363 +
   5.364 +		if (is_available_memory(md)) {
   5.365 +			if (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) >= max_addr) {
   5.366 +				if (md->phys_addr >= max_addr)
   5.367 +					continue;
   5.368 +				md->num_pages = (max_addr - md->phys_addr) >> EFI_PAGE_SHIFT;
   5.369 +				first_non_wb_addr = max_addr;
   5.370 +			}
   5.371 +
   5.372 +			if (total_mem >= mem_limit)
   5.373 +				continue;
   5.374 +
   5.375 +			if (total_mem + (md->num_pages << EFI_PAGE_SHIFT) > mem_limit) {
   5.376 +				unsigned long limit_addr = md->phys_addr;
   5.377 +
   5.378 +				limit_addr += mem_limit - total_mem;
   5.379 +				limit_addr = GRANULEROUNDDOWN(limit_addr);
   5.380 +
   5.381 +				if (md->phys_addr > limit_addr)
   5.382 +					continue;
   5.383 +
   5.384 +				md->num_pages = (limit_addr - md->phys_addr) >>
   5.385 +				                EFI_PAGE_SHIFT;
   5.386 +				first_non_wb_addr = max_addr = md->phys_addr +
   5.387 +				              (md->num_pages << EFI_PAGE_SHIFT);
   5.388 +			}
   5.389 +			total_mem += (md->num_pages << EFI_PAGE_SHIFT);
   5.390 +
   5.391 +			if (md->num_pages == 0)
   5.392 +				continue;
   5.393 +
   5.394 +			curr.start = PAGE_OFFSET + md->phys_addr;
   5.395 +			curr.end   = curr.start + (md->num_pages << EFI_PAGE_SHIFT);
   5.396 +
   5.397 +			if (!prev_valid) {
   5.398 +				prev = curr;
   5.399 +				prev_valid = 1;
   5.400 +			} else {
   5.401 +				if (curr.start < prev.start)
   5.402 +					printk(KERN_ERR "Oops: EFI memory table not ordered!\n");
   5.403 +
   5.404 +				if (prev.end == curr.start) {
   5.405 +					/* merge two consecutive memory ranges */
   5.406 +					prev.end = curr.end;
   5.407 +				} else {
   5.408 +					start = PAGE_ALIGN(prev.start);
   5.409 +					end = prev.end & PAGE_MASK;
   5.410 +					if ((end > start) && (*callback)(start, end, arg) < 0)
   5.411 +						return;
   5.412 +					prev = curr;
   5.413 +				}
   5.414 +			}
   5.415 +		}
   5.416 +	}
   5.417 +	if (prev_valid) {
   5.418 +		start = PAGE_ALIGN(prev.start);
   5.419 +		end = prev.end & PAGE_MASK;
   5.420 +		if (end > start)
   5.421 +			(*callback)(start, end, arg);
   5.422 +	}
   5.423 +}
   5.424 +
   5.425 +/*
   5.426 + * Look for the PAL_CODE region reported by EFI and maps it using an
   5.427 + * ITR to enable safe PAL calls in virtual mode.  See IA-64 Processor
   5.428 + * Abstraction Layer chapter 11 in ADAG
   5.429 + */
   5.430 +
   5.431 +void *
   5.432 +efi_get_pal_addr (void)
   5.433 +{
   5.434 +	void *efi_map_start, *efi_map_end, *p;
   5.435 +	efi_memory_desc_t *md;
   5.436 +	u64 efi_desc_size;
   5.437 +	int pal_code_count = 0;
   5.438 +	u64 vaddr, mask;
   5.439 +
   5.440 +	efi_map_start = __va(ia64_boot_param->efi_memmap);
   5.441 +	efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
   5.442 +	efi_desc_size = ia64_boot_param->efi_memdesc_size;
   5.443 +
   5.444 +	for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
   5.445 +		md = p;
   5.446 +		if (md->type != EFI_PAL_CODE)
   5.447 +			continue;
   5.448 +
   5.449 +		if (++pal_code_count > 1) {
   5.450 +			printk(KERN_ERR "Too many EFI Pal Code memory ranges, dropped @ %lx\n",
   5.451 +			       md->phys_addr);
   5.452 +			continue;
   5.453 +		}
   5.454 +		/*
   5.455 +		 * The only ITLB entry in region 7 that is used is the one installed by
   5.456 +		 * __start().  That entry covers a 64MB range.
   5.457 +		 */
   5.458 +		mask  = ~((1 << KERNEL_TR_PAGE_SHIFT) - 1);
   5.459 +		vaddr = PAGE_OFFSET + md->phys_addr;
   5.460 +
   5.461 +		/*
   5.462 +		 * We must check that the PAL mapping won't overlap with the kernel
   5.463 +		 * mapping.
   5.464 +		 *
   5.465 +		 * PAL code is guaranteed to be aligned on a power of 2 between 4k and
   5.466 +		 * 256KB and that only one ITR is needed to map it. This implies that the
   5.467 +		 * PAL code is always aligned on its size, i.e., the closest matching page
   5.468 +		 * size supported by the TLB. Therefore PAL code is guaranteed never to
   5.469 +		 * cross a 64MB unless it is bigger than 64MB (very unlikely!).  So for
   5.470 +		 * now the following test is enough to determine whether or not we need a
   5.471 +		 * dedicated ITR for the PAL code.
   5.472 +		 */
   5.473 +		if ((vaddr & mask) == (KERNEL_START & mask)) {
   5.474 +			printk(KERN_INFO "%s: no need to install ITR for PAL code\n",
   5.475 +			       __FUNCTION__);
   5.476 +			continue;
   5.477 +		}
   5.478 +
   5.479 +		if (md->num_pages << EFI_PAGE_SHIFT > IA64_GRANULE_SIZE)
   5.480 +			panic("Woah!  PAL code size bigger than a granule!");
   5.481 +
   5.482 +#if EFI_DEBUG
   5.483 +		mask  = ~((1 << IA64_GRANULE_SHIFT) - 1);
   5.484 +
   5.485 +		printk(KERN_INFO "CPU %d: mapping PAL code [0x%lx-0x%lx) into [0x%lx-0x%lx)\n",
   5.486 +			smp_processor_id(), md->phys_addr,
   5.487 +			md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
   5.488 +			vaddr & mask, (vaddr & mask) + IA64_GRANULE_SIZE);
   5.489 +#endif
   5.490 +		return __va(md->phys_addr);
   5.491 +	}
   5.492 +	printk(KERN_WARNING "%s: no PAL-code memory-descriptor found",
   5.493 +	       __FUNCTION__);
   5.494 +	return NULL;
   5.495 +}
   5.496 +
   5.497 +void
   5.498 +efi_map_pal_code (void)
   5.499 +{
   5.500 +	void *pal_vaddr = efi_get_pal_addr ();
   5.501 +	u64 psr;
   5.502 +
   5.503 +	if (!pal_vaddr)
   5.504 +		return;
   5.505 +
   5.506 +	/*
   5.507 +	 * Cannot write to CRx with PSR.ic=1
   5.508 +	 */
   5.509 +	psr = ia64_clear_ic();
   5.510 +	ia64_itr(0x1, IA64_TR_PALCODE, GRANULEROUNDDOWN((unsigned long) pal_vaddr),
   5.511 +		 pte_val(pfn_pte(__pa(pal_vaddr) >> PAGE_SHIFT, PAGE_KERNEL)),
   5.512 +		 IA64_GRANULE_SHIFT);
   5.513 +	ia64_set_psr(psr);		/* restore psr */
   5.514 +	ia64_srlz_i();
   5.515 +}
   5.516 +
   5.517 +void __init
   5.518 +efi_init (void)
   5.519 +{
   5.520 +	void *efi_map_start, *efi_map_end;
   5.521 +	efi_config_table_t *config_tables;
   5.522 +	efi_char16_t *c16;
   5.523 +	u64 efi_desc_size;
   5.524 +	char *cp, *end, vendor[100] = "unknown";
   5.525 +	extern char saved_command_line[];
   5.526 +	int i;
   5.527 +
   5.528 +	/* it's too early to be able to use the standard kernel command line support... */
   5.529 +	for (cp = saved_command_line; *cp; ) {
   5.530 +		if (memcmp(cp, "mem=", 4) == 0) {
   5.531 +			cp += 4;
   5.532 +			mem_limit = memparse(cp, &end);
   5.533 +			if (end != cp)
   5.534 +				break;
   5.535 +			cp = end;
   5.536 +		} else if (memcmp(cp, "max_addr=", 9) == 0) {
   5.537 +			cp += 9;
   5.538 +			max_addr = GRANULEROUNDDOWN(memparse(cp, &end));
   5.539 +			if (end != cp)
   5.540 +				break;
   5.541 +			cp = end;
   5.542 +		} else {
   5.543 +			while (*cp != ' ' && *cp)
   5.544 +				++cp;
   5.545 +			while (*cp == ' ')
   5.546 +				++cp;
   5.547 +		}
   5.548 +	}
   5.549 +	if (max_addr != ~0UL)
   5.550 +		printk(KERN_INFO "Ignoring memory above %luMB\n", max_addr >> 20);
   5.551 +
   5.552 +	efi.systab = __va(ia64_boot_param->efi_systab);
   5.553 +
   5.554 +	/*
   5.555 +	 * Verify the EFI Table
   5.556 +	 */
   5.557 +	if (efi.systab == NULL)
   5.558 +		panic("Woah! Can't find EFI system table.\n");
   5.559 +	if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
   5.560 +		panic("Woah! EFI system table signature incorrect\n");
   5.561 +	if ((efi.systab->hdr.revision ^ EFI_SYSTEM_TABLE_REVISION) >> 16 != 0)
   5.562 +		printk(KERN_WARNING "Warning: EFI system table major version mismatch: "
   5.563 +		       "got %d.%02d, expected %d.%02d\n",
   5.564 +		       efi.systab->hdr.revision >> 16, efi.systab->hdr.revision & 0xffff,
   5.565 +		       EFI_SYSTEM_TABLE_REVISION >> 16, EFI_SYSTEM_TABLE_REVISION & 0xffff);
   5.566 +
   5.567 +	config_tables = __va(efi.systab->tables);
   5.568 +
   5.569 +	/* Show what we know for posterity */
   5.570 +	c16 = __va(efi.systab->fw_vendor);
   5.571 +	if (c16) {
   5.572 +		for (i = 0;i < (int) sizeof(vendor) && *c16; ++i)
   5.573 +			vendor[i] = *c16++;
   5.574 +		vendor[i] = '\0';
   5.575 +	}
   5.576 +
   5.577 +	printk(KERN_INFO "EFI v%u.%.02u by %s:",
   5.578 +	       efi.systab->hdr.revision >> 16, efi.systab->hdr.revision & 0xffff, vendor);
   5.579 +
   5.580 +	for (i = 0; i < (int) efi.systab->nr_tables; i++) {
   5.581 +		if (efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID) == 0) {
   5.582 +			efi.mps = __va(config_tables[i].table);
   5.583 +			printk(" MPS=0x%lx", config_tables[i].table);
   5.584 +		} else if (efi_guidcmp(config_tables[i].guid, ACPI_20_TABLE_GUID) == 0) {
   5.585 +			efi.acpi20 = __va(config_tables[i].table);
   5.586 +			printk(" ACPI 2.0=0x%lx", config_tables[i].table);
   5.587 +		} else if (efi_guidcmp(config_tables[i].guid, ACPI_TABLE_GUID) == 0) {
   5.588 +			efi.acpi = __va(config_tables[i].table);
   5.589 +			printk(" ACPI=0x%lx", config_tables[i].table);
   5.590 +		} else if (efi_guidcmp(config_tables[i].guid, SMBIOS_TABLE_GUID) == 0) {
   5.591 +			efi.smbios = __va(config_tables[i].table);
   5.592 +			printk(" SMBIOS=0x%lx", config_tables[i].table);
   5.593 +		} else if (efi_guidcmp(config_tables[i].guid, SAL_SYSTEM_TABLE_GUID) == 0) {
   5.594 +			efi.sal_systab = __va(config_tables[i].table);
   5.595 +			printk(" SALsystab=0x%lx", config_tables[i].table);
   5.596 +		} else if (efi_guidcmp(config_tables[i].guid, HCDP_TABLE_GUID) == 0) {
   5.597 +			efi.hcdp = __va(config_tables[i].table);
   5.598 +			printk(" HCDP=0x%lx", config_tables[i].table);
   5.599 +		}
   5.600 +	}
   5.601 +	printk("\n");
   5.602 +
   5.603 +	runtime = __va(efi.systab->runtime);
   5.604 +	efi.get_time = phys_get_time;
   5.605 +	efi.set_time = phys_set_time;
   5.606 +	efi.get_wakeup_time = phys_get_wakeup_time;
   5.607 +	efi.set_wakeup_time = phys_set_wakeup_time;
   5.608 +	efi.get_variable = phys_get_variable;
   5.609 +	efi.get_next_variable = phys_get_next_variable;
   5.610 +	efi.set_variable = phys_set_variable;
   5.611 +	efi.get_next_high_mono_count = phys_get_next_high_mono_count;
   5.612 +	efi.reset_system = phys_reset_system;
   5.613 +
   5.614 +	efi_map_start = __va(ia64_boot_param->efi_memmap);
   5.615 +	efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
   5.616 +	efi_desc_size = ia64_boot_param->efi_memdesc_size;
   5.617 +
   5.618 +#if EFI_DEBUG
   5.619 +	/* print EFI memory map: */
   5.620 +	{
   5.621 +		efi_memory_desc_t *md;
   5.622 +		void *p;
   5.623 +
   5.624 +		for (i = 0, p = efi_map_start; p < efi_map_end; ++i, p += efi_desc_size) {
   5.625 +			md = p;
   5.626 +			printk("mem%02u: type=%u, attr=0x%lx, range=[0x%016lx-0x%016lx) (%luMB)\n",
   5.627 +			       i, md->type, md->attribute, md->phys_addr,
   5.628 +			       md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
   5.629 +			       md->num_pages >> (20 - EFI_PAGE_SHIFT));
   5.630 +		}
   5.631 +	}
   5.632 +#endif
   5.633 +
   5.634 +	efi_map_pal_code();
   5.635 +	efi_enter_virtual_mode();
   5.636 +}
   5.637 +
   5.638 +void
   5.639 +efi_enter_virtual_mode (void)
   5.640 +{
   5.641 +	void *efi_map_start, *efi_map_end, *p;
   5.642 +	efi_memory_desc_t *md;
   5.643 +	efi_status_t status;
   5.644 +	u64 efi_desc_size;
   5.645 +
   5.646 +	efi_map_start = __va(ia64_boot_param->efi_memmap);
   5.647 +	efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
   5.648 +	efi_desc_size = ia64_boot_param->efi_memdesc_size;
   5.649 +
   5.650 +	for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
   5.651 +		md = p;
   5.652 +		if (md->attribute & EFI_MEMORY_RUNTIME) {
   5.653 +			/*
   5.654 +			 * Some descriptors have multiple bits set, so the order of
   5.655 +			 * the tests is relevant.
   5.656 +			 */
   5.657 +			if (md->attribute & EFI_MEMORY_WB) {
   5.658 +				md->virt_addr = (u64) __va(md->phys_addr);
   5.659 +			} else if (md->attribute & EFI_MEMORY_UC) {
   5.660 +				md->virt_addr = (u64) ioremap(md->phys_addr, 0);
   5.661 +			} else if (md->attribute & EFI_MEMORY_WC) {
   5.662 +#if 0
   5.663 +				md->virt_addr = ia64_remap(md->phys_addr, (_PAGE_A | _PAGE_P
   5.664 +									   | _PAGE_D
   5.665 +									   | _PAGE_MA_WC
   5.666 +									   | _PAGE_PL_0
   5.667 +									   | _PAGE_AR_RW));
   5.668 +#else
   5.669 +				printk(KERN_INFO "EFI_MEMORY_WC mapping\n");
   5.670 +				md->virt_addr = (u64) ioremap(md->phys_addr, 0);
   5.671 +#endif
   5.672 +			} else if (md->attribute & EFI_MEMORY_WT) {
   5.673 +#if 0
   5.674 +				md->virt_addr = ia64_remap(md->phys_addr, (_PAGE_A | _PAGE_P
   5.675 +									   | _PAGE_D | _PAGE_MA_WT
   5.676 +									   | _PAGE_PL_0
   5.677 +									   | _PAGE_AR_RW));
   5.678 +#else
   5.679 +				printk(KERN_INFO "EFI_MEMORY_WT mapping\n");
   5.680 +				md->virt_addr = (u64) ioremap(md->phys_addr, 0);
   5.681 +#endif
   5.682 +			}
   5.683 +		}
   5.684 +	}
   5.685 +
   5.686 +	status = efi_call_phys(__va(runtime->set_virtual_address_map),
   5.687 +			       ia64_boot_param->efi_memmap_size,
   5.688 +			       efi_desc_size, ia64_boot_param->efi_memdesc_version,
   5.689 +			       ia64_boot_param->efi_memmap);
   5.690 +	if (status != EFI_SUCCESS) {
   5.691 +		printk(KERN_WARNING "warning: unable to switch EFI into virtual mode "
   5.692 +		       "(status=%lu)\n", status);
   5.693 +		return;
   5.694 +	}
   5.695 +
   5.696 +	/*
   5.697 +	 * Now that EFI is in virtual mode, we call the EFI functions more efficiently:
   5.698 +	 */
   5.699 +	efi.get_time = virt_get_time;
   5.700 +	efi.set_time = virt_set_time;
   5.701 +	efi.get_wakeup_time = virt_get_wakeup_time;
   5.702 +	efi.set_wakeup_time = virt_set_wakeup_time;
   5.703 +	efi.get_variable = virt_get_variable;
   5.704 +	efi.get_next_variable = virt_get_next_variable;
   5.705 +	efi.set_variable = virt_set_variable;
   5.706 +	efi.get_next_high_mono_count = virt_get_next_high_mono_count;
   5.707 +	efi.reset_system = virt_reset_system;
   5.708 +}
   5.709 +
   5.710 +/*
   5.711 + * Walk the EFI memory map looking for the I/O port range.  There can only be one entry of
   5.712 + * this type, other I/O port ranges should be described via ACPI.
   5.713 + */
   5.714 +u64
   5.715 +efi_get_iobase (void)
   5.716 +{
   5.717 +	void *efi_map_start, *efi_map_end, *p;
   5.718 +	efi_memory_desc_t *md;
   5.719 +	u64 efi_desc_size;
   5.720 +
   5.721 +	efi_map_start = __va(ia64_boot_param->efi_memmap);
   5.722 +	efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
   5.723 +	efi_desc_size = ia64_boot_param->efi_memdesc_size;
   5.724 +
   5.725 +	for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
   5.726 +		md = p;
   5.727 +		if (md->type == EFI_MEMORY_MAPPED_IO_PORT_SPACE) {
   5.728 +			if (md->attribute & EFI_MEMORY_UC)
   5.729 +				return md->phys_addr;
   5.730 +		}
   5.731 +	}
   5.732 +	return 0;
   5.733 +}
   5.734 +
   5.735 +#ifdef XEN
   5.736 +// variation of efi_get_iobase which returns entire memory descriptor
   5.737 +efi_memory_desc_t *
   5.738 +efi_get_io_md (void)
   5.739 +{
   5.740 +	void *efi_map_start, *efi_map_end, *p;
   5.741 +	efi_memory_desc_t *md;
   5.742 +	u64 efi_desc_size;
   5.743 +
   5.744 +	efi_map_start = __va(ia64_boot_param->efi_memmap);
   5.745 +	efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
   5.746 +	efi_desc_size = ia64_boot_param->efi_memdesc_size;
   5.747 +
   5.748 +	for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
   5.749 +		md = p;
   5.750 +		if (md->type == EFI_MEMORY_MAPPED_IO_PORT_SPACE) {
   5.751 +			if (md->attribute & EFI_MEMORY_UC)
   5.752 +				return md;
   5.753 +		}
   5.754 +	}
   5.755 +	return 0;
   5.756 +}
   5.757 +#endif
   5.758 +
   5.759 +u32
   5.760 +efi_mem_type (unsigned long phys_addr)
   5.761 +{
   5.762 +	void *efi_map_start, *efi_map_end, *p;
   5.763 +	efi_memory_desc_t *md;
   5.764 +	u64 efi_desc_size;
   5.765 +
   5.766 +	efi_map_start = __va(ia64_boot_param->efi_memmap);
   5.767 +	efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
   5.768 +	efi_desc_size = ia64_boot_param->efi_memdesc_size;
   5.769 +
   5.770 +	for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
   5.771 +		md = p;
   5.772 +
   5.773 +		if (phys_addr - md->phys_addr < (md->num_pages << EFI_PAGE_SHIFT))
   5.774 +			 return md->type;
   5.775 +	}
   5.776 +	return 0;
   5.777 +}
   5.778 +
   5.779 +u64
   5.780 +efi_mem_attributes (unsigned long phys_addr)
   5.781 +{
   5.782 +	void *efi_map_start, *efi_map_end, *p;
   5.783 +	efi_memory_desc_t *md;
   5.784 +	u64 efi_desc_size;
   5.785 +
   5.786 +	efi_map_start = __va(ia64_boot_param->efi_memmap);
   5.787 +	efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
   5.788 +	efi_desc_size = ia64_boot_param->efi_memdesc_size;
   5.789 +
   5.790 +	for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
   5.791 +		md = p;
   5.792 +
   5.793 +		if (phys_addr - md->phys_addr < (md->num_pages << EFI_PAGE_SHIFT))
   5.794 +			return md->attribute;
   5.795 +	}
   5.796 +	return 0;
   5.797 +}
   5.798 +EXPORT_SYMBOL(efi_mem_attributes);
   5.799 +
   5.800 +int
   5.801 +valid_phys_addr_range (unsigned long phys_addr, unsigned long *size)
   5.802 +{
   5.803 +	void *efi_map_start, *efi_map_end, *p;
   5.804 +	efi_memory_desc_t *md;
   5.805 +	u64 efi_desc_size;
   5.806 +
   5.807 +	efi_map_start = __va(ia64_boot_param->efi_memmap);
   5.808 +	efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
   5.809 +	efi_desc_size = ia64_boot_param->efi_memdesc_size;
   5.810 +
   5.811 +	for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
   5.812 +		md = p;
   5.813 +
   5.814 +		if (phys_addr - md->phys_addr < (md->num_pages << EFI_PAGE_SHIFT)) {
   5.815 +			if (!(md->attribute & EFI_MEMORY_WB))
   5.816 +				return 0;
   5.817 +
   5.818 +			if (*size > md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - phys_addr)
   5.819 +				*size = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - phys_addr;
   5.820 +			return 1;
   5.821 +		}
   5.822 +	}
   5.823 +	return 0;
   5.824 +}
   5.825 +
   5.826 +int __init
   5.827 +efi_uart_console_only(void)
   5.828 +{
   5.829 +	efi_status_t status;
   5.830 +	char *s, name[] = "ConOut";
   5.831 +	efi_guid_t guid = EFI_GLOBAL_VARIABLE_GUID;
   5.832 +	efi_char16_t *utf16, name_utf16[32];
   5.833 +	unsigned char data[1024];
   5.834 +	unsigned long size = sizeof(data);
   5.835 +	struct efi_generic_dev_path *hdr, *end_addr;
   5.836 +	int uart = 0;
   5.837 +
   5.838 +	/* Convert to UTF-16 */
   5.839 +	utf16 = name_utf16;
   5.840 +	s = name;
   5.841 +	while (*s)
   5.842 +		*utf16++ = *s++ & 0x7f;
   5.843 +	*utf16 = 0;
   5.844 +
   5.845 +	status = efi.get_variable(name_utf16, &guid, NULL, &size, data);
   5.846 +	if (status != EFI_SUCCESS) {
   5.847 +		printk(KERN_ERR "No EFI %s variable?\n", name);
   5.848 +		return 0;
   5.849 +	}
   5.850 +
   5.851 +	hdr = (struct efi_generic_dev_path *) data;
   5.852 +	end_addr = (struct efi_generic_dev_path *) ((u8 *) data + size);
   5.853 +	while (hdr < end_addr) {
   5.854 +		if (hdr->type == EFI_DEV_MSG &&
   5.855 +		    hdr->sub_type == EFI_DEV_MSG_UART)
   5.856 +			uart = 1;
   5.857 +		else if (hdr->type == EFI_DEV_END_PATH ||
   5.858 +			  hdr->type == EFI_DEV_END_PATH2) {
   5.859 +			if (!uart)
   5.860 +				return 0;
   5.861 +			if (hdr->sub_type == EFI_DEV_END_ENTIRE)
   5.862 +				return 1;
   5.863 +			uart = 0;
   5.864 +		}
   5.865 +		hdr = (struct efi_generic_dev_path *) ((u8 *) hdr + hdr->length);
   5.866 +	}
   5.867 +	printk(KERN_ERR "Malformed %s value\n", name);
   5.868 +	return 0;
   5.869 +}
     6.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     6.2 +++ b/xen/arch/ia64/linux-xen/entry.S	Tue Aug 16 12:15:23 2005 +0800
     6.3 @@ -0,0 +1,1653 @@
     6.4 +/*
     6.5 + * ia64/kernel/entry.S
     6.6 + *
     6.7 + * Kernel entry points.
     6.8 + *
     6.9 + * Copyright (C) 1998-2003, 2005 Hewlett-Packard Co
    6.10 + *	David Mosberger-Tang <davidm@hpl.hp.com>
    6.11 + * Copyright (C) 1999, 2002-2003
    6.12 + *	Asit Mallick <Asit.K.Mallick@intel.com>
    6.13 + * 	Don Dugger <Don.Dugger@intel.com>
    6.14 + *	Suresh Siddha <suresh.b.siddha@intel.com>
    6.15 + *	Fenghua Yu <fenghua.yu@intel.com>
    6.16 + * Copyright (C) 1999 VA Linux Systems
    6.17 + * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
    6.18 + */
    6.19 +/*
    6.20 + * ia64_switch_to now places correct virtual mapping in in TR2 for
    6.21 + * kernel stack. This allows us to handle interrupts without changing
    6.22 + * to physical mode.
    6.23 + *
    6.24 + * Jonathan Nicklin	<nicklin@missioncriticallinux.com>
    6.25 + * Patrick O'Rourke	<orourke@missioncriticallinux.com>
    6.26 + * 11/07/2000
    6.27 + */
    6.28 +/*
    6.29 + * Global (preserved) predicate usage on syscall entry/exit path:
    6.30 + *
    6.31 + *	pKStk:		See entry.h.
    6.32 + *	pUStk:		See entry.h.
    6.33 + *	pSys:		See entry.h.
    6.34 + *	pNonSys:	!pSys
    6.35 + */
    6.36 +
    6.37 +#include <linux/config.h>
    6.38 +
    6.39 +#include <asm/asmmacro.h>
    6.40 +#include <asm/cache.h>
    6.41 +#include <asm/errno.h>
    6.42 +#include <asm/kregs.h>
    6.43 +#include <asm/offsets.h>
    6.44 +#include <asm/pgtable.h>
    6.45 +#include <asm/percpu.h>
    6.46 +#include <asm/processor.h>
    6.47 +#include <asm/thread_info.h>
    6.48 +#include <asm/unistd.h>
    6.49 +
    6.50 +#include "minstate.h"
    6.51 +
    6.52 +#ifndef XEN
    6.53 +	/*
    6.54 +	 * execve() is special because in case of success, we need to
    6.55 +	 * setup a null register window frame.
    6.56 +	 */
    6.57 +ENTRY(ia64_execve)
    6.58 +	/*
    6.59 +	 * Allocate 8 input registers since ptrace() may clobber them
    6.60 +	 */
    6.61 +	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
    6.62 +	alloc loc1=ar.pfs,8,2,4,0
    6.63 +	mov loc0=rp
    6.64 +	.body
    6.65 +	mov out0=in0			// filename
    6.66 +	;;				// stop bit between alloc and call
    6.67 +	mov out1=in1			// argv
    6.68 +	mov out2=in2			// envp
    6.69 +	add out3=16,sp			// regs
    6.70 +	br.call.sptk.many rp=sys_execve
    6.71 +.ret0:
    6.72 +#ifdef CONFIG_IA32_SUPPORT
    6.73 +	/*
    6.74 +	 * Check if we're returning to ia32 mode. If so, we need to restore ia32 registers
    6.75 +	 * from pt_regs.
    6.76 +	 */
    6.77 +	adds r16=PT(CR_IPSR)+16,sp
    6.78 +	;;
    6.79 +	ld8 r16=[r16]
    6.80 +#endif
    6.81 +	cmp4.ge p6,p7=r8,r0
    6.82 +	mov ar.pfs=loc1			// restore ar.pfs
    6.83 +	sxt4 r8=r8			// return 64-bit result
    6.84 +	;;
    6.85 +	stf.spill [sp]=f0
    6.86 +(p6)	cmp.ne pKStk,pUStk=r0,r0	// a successful execve() lands us in user-mode...
    6.87 +	mov rp=loc0
    6.88 +(p6)	mov ar.pfs=r0			// clear ar.pfs on success
    6.89 +(p7)	br.ret.sptk.many rp
    6.90 +
    6.91 +	/*
    6.92 +	 * In theory, we'd have to zap this state only to prevent leaking of
    6.93 +	 * security sensitive state (e.g., if current->mm->dumpable is zero).  However,
    6.94 +	 * this executes in less than 20 cycles even on Itanium, so it's not worth
    6.95 +	 * optimizing for...).
    6.96 +	 */
    6.97 +	mov ar.unat=0; 		mov ar.lc=0
    6.98 +	mov r4=0;		mov f2=f0;		mov b1=r0
    6.99 +	mov r5=0;		mov f3=f0;		mov b2=r0
   6.100 +	mov r6=0;		mov f4=f0;		mov b3=r0
   6.101 +	mov r7=0;		mov f5=f0;		mov b4=r0
   6.102 +	ldf.fill f12=[sp];	mov f13=f0;		mov b5=r0
   6.103 +	ldf.fill f14=[sp];	ldf.fill f15=[sp];	mov f16=f0
   6.104 +	ldf.fill f17=[sp];	ldf.fill f18=[sp];	mov f19=f0
   6.105 +	ldf.fill f20=[sp];	ldf.fill f21=[sp];	mov f22=f0
   6.106 +	ldf.fill f23=[sp];	ldf.fill f24=[sp];	mov f25=f0
   6.107 +	ldf.fill f26=[sp];	ldf.fill f27=[sp];	mov f28=f0
   6.108 +	ldf.fill f29=[sp];	ldf.fill f30=[sp];	mov f31=f0
   6.109 +#ifdef CONFIG_IA32_SUPPORT
   6.110 +	tbit.nz p6,p0=r16, IA64_PSR_IS_BIT
   6.111 +	movl loc0=ia64_ret_from_ia32_execve
   6.112 +	;;
   6.113 +(p6)	mov rp=loc0
   6.114 +#endif
   6.115 +	br.ret.sptk.many rp
   6.116 +END(ia64_execve)
   6.117 +
   6.118 +/*
   6.119 + * sys_clone2(u64 flags, u64 ustack_base, u64 ustack_size, u64 parent_tidptr, u64 child_tidptr,
   6.120 + *	      u64 tls)
   6.121 + */
   6.122 +GLOBAL_ENTRY(sys_clone2)
   6.123 +	/*
   6.124 +	 * Allocate 8 input registers since ptrace() may clobber them
   6.125 +	 */
   6.126 +	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
   6.127 +	alloc r16=ar.pfs,8,2,6,0
   6.128 +	DO_SAVE_SWITCH_STACK
   6.129 +	adds r2=PT(R16)+IA64_SWITCH_STACK_SIZE+16,sp
   6.130 +	mov loc0=rp
   6.131 +	mov loc1=r16				// save ar.pfs across do_fork
   6.132 +	.body
   6.133 +	mov out1=in1
   6.134 +	mov out3=in2
   6.135 +	tbit.nz p6,p0=in0,CLONE_SETTLS_BIT
   6.136 +	mov out4=in3	// parent_tidptr: valid only w/CLONE_PARENT_SETTID
   6.137 +	;;
   6.138 +(p6)	st8 [r2]=in5				// store TLS in r16 for copy_thread()
   6.139 +	mov out5=in4	// child_tidptr:  valid only w/CLONE_CHILD_SETTID or CLONE_CHILD_CLEARTID
   6.140 +	adds out2=IA64_SWITCH_STACK_SIZE+16,sp	// out2 = &regs
   6.141 +	mov out0=in0				// out0 = clone_flags
   6.142 +	br.call.sptk.many rp=do_fork
   6.143 +.ret1:	.restore sp
   6.144 +	adds sp=IA64_SWITCH_STACK_SIZE,sp	// pop the switch stack
   6.145 +	mov ar.pfs=loc1
   6.146 +	mov rp=loc0
   6.147 +	br.ret.sptk.many rp
   6.148 +END(sys_clone2)
   6.149 +
   6.150 +/*
   6.151 + * sys_clone(u64 flags, u64 ustack_base, u64 parent_tidptr, u64 child_tidptr, u64 tls)
   6.152 + *	Deprecated.  Use sys_clone2() instead.
   6.153 + */
   6.154 +GLOBAL_ENTRY(sys_clone)
   6.155 +	/*
   6.156 +	 * Allocate 8 input registers since ptrace() may clobber them
   6.157 +	 */
   6.158 +	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
   6.159 +	alloc r16=ar.pfs,8,2,6,0
   6.160 +	DO_SAVE_SWITCH_STACK
   6.161 +	adds r2=PT(R16)+IA64_SWITCH_STACK_SIZE+16,sp
   6.162 +	mov loc0=rp
   6.163 +	mov loc1=r16				// save ar.pfs across do_fork
   6.164 +	.body
   6.165 +	mov out1=in1
   6.166 +	mov out3=16				// stacksize (compensates for 16-byte scratch area)
   6.167 +	tbit.nz p6,p0=in0,CLONE_SETTLS_BIT
   6.168 +	mov out4=in2	// parent_tidptr: valid only w/CLONE_PARENT_SETTID
   6.169 +	;;
   6.170 +(p6)	st8 [r2]=in4				// store TLS in r13 (tp)
   6.171 +	mov out5=in3	// child_tidptr:  valid only w/CLONE_CHILD_SETTID or CLONE_CHILD_CLEARTID
   6.172 +	adds out2=IA64_SWITCH_STACK_SIZE+16,sp	// out2 = &regs
   6.173 +	mov out0=in0				// out0 = clone_flags
   6.174 +	br.call.sptk.many rp=do_fork
   6.175 +.ret2:	.restore sp
   6.176 +	adds sp=IA64_SWITCH_STACK_SIZE,sp	// pop the switch stack
   6.177 +	mov ar.pfs=loc1
   6.178 +	mov rp=loc0
   6.179 +	br.ret.sptk.many rp
   6.180 +END(sys_clone)
   6.181 +#endif /* !XEN */
   6.182 +
   6.183 +/*
   6.184 + * prev_task <- ia64_switch_to(struct task_struct *next)
   6.185 + *	With Ingo's new scheduler, interrupts are disabled when this routine gets
   6.186 + *	called.  The code starting at .map relies on this.  The rest of the code
   6.187 + *	doesn't care about the interrupt masking status.
   6.188 + */
   6.189 +GLOBAL_ENTRY(ia64_switch_to)
   6.190 +	.prologue
   6.191 +	alloc r16=ar.pfs,1,0,0,0
   6.192 +	DO_SAVE_SWITCH_STACK
   6.193 +	.body
   6.194 +
   6.195 +	adds r22=IA64_TASK_THREAD_KSP_OFFSET,r13
   6.196 +	movl r25=init_task
   6.197 +	mov r27=IA64_KR(CURRENT_STACK)
   6.198 +	adds r21=IA64_TASK_THREAD_KSP_OFFSET,in0
   6.199 +#ifdef XEN
   6.200 +	dep r20=0,in0,60,4		// physical address of "next"
   6.201 +#else
   6.202 +	dep r20=0,in0,61,3		// physical address of "next"
   6.203 +#endif
   6.204 +	;;
   6.205 +	st8 [r22]=sp			// save kernel stack pointer of old task
   6.206 +	shr.u r26=r20,IA64_GRANULE_SHIFT
   6.207 +	cmp.eq p7,p6=r25,in0
   6.208 +	;;
   6.209 +	/*
   6.210 +	 * If we've already mapped this task's page, we can skip doing it again.
   6.211 +	 */
   6.212 +(p6)	cmp.eq p7,p6=r26,r27
   6.213 +(p6)	br.cond.dpnt .map
   6.214 +	;;
   6.215 +.done:
   6.216 +(p6)	ssm psr.ic			// if we had to map, reenable the psr.ic bit FIRST!!!
   6.217 +	;;
   6.218 +(p6)	srlz.d
   6.219 +	ld8 sp=[r21]			// load kernel stack pointer of new task
   6.220 +	mov IA64_KR(CURRENT)=in0	// update "current" application register
   6.221 +	mov r8=r13			// return pointer to previously running task
   6.222 +	mov r13=in0			// set "current" pointer
   6.223 +	;;
   6.224 +	DO_LOAD_SWITCH_STACK
   6.225 +
   6.226 +#ifdef CONFIG_SMP
   6.227 +	sync.i				// ensure "fc"s done by this CPU are visible on other CPUs
   6.228 +#endif
   6.229 +	br.ret.sptk.many rp		// boogie on out in new context
   6.230 +
   6.231 +.map:
   6.232 +#ifdef XEN
   6.233 +	// avoid overlapping with kernel TR
   6.234 +	movl r25=KERNEL_START
   6.235 +	dep  r23=0,in0,0,KERNEL_TR_PAGE_SHIFT
   6.236 +	;;
   6.237 +	cmp.eq p7,p0=r25,r23
   6.238 +	;;
   6.239 +(p7)	mov IA64_KR(CURRENT_STACK)=r26	// remember last page we mapped...
   6.240 +(p7)	br.cond.sptk .done
   6.241 +#endif
   6.242 +	rsm psr.ic			// interrupts (psr.i) are already disabled here
   6.243 +	movl r25=PAGE_KERNEL
   6.244 +	;;
   6.245 +	srlz.d
   6.246 +	or r23=r25,r20			// construct PA | page properties
   6.247 +	mov r25=IA64_GRANULE_SHIFT<<2
   6.248 +	;;
   6.249 +	mov cr.itir=r25
   6.250 +	mov cr.ifa=in0			// VA of next task...
   6.251 +	;;
   6.252 +	mov r25=IA64_TR_CURRENT_STACK
   6.253 +	mov IA64_KR(CURRENT_STACK)=r26	// remember last page we mapped...
   6.254 +	;;
   6.255 +	itr.d dtr[r25]=r23		// wire in new mapping...
   6.256 +	br.cond.sptk .done
   6.257 +END(ia64_switch_to)
   6.258 +
   6.259 +/*
   6.260 + * Note that interrupts are enabled during save_switch_stack and load_switch_stack.  This
   6.261 + * means that we may get an interrupt with "sp" pointing to the new kernel stack while
   6.262 + * ar.bspstore is still pointing to the old kernel backing store area.  Since ar.rsc,
   6.263 + * ar.rnat, ar.bsp, and ar.bspstore are all preserved by interrupts, this is not a
   6.264 + * problem.  Also, we don't need to specify unwind information for preserved registers
   6.265 + * that are not modified in save_switch_stack as the right unwind information is already
   6.266 + * specified at the call-site of save_switch_stack.
   6.267 + */
   6.268 +
   6.269 +/*
   6.270 + * save_switch_stack:
   6.271 + *	- r16 holds ar.pfs
   6.272 + *	- b7 holds address to return to
   6.273 + *	- rp (b0) holds return address to save
   6.274 + */
   6.275 +GLOBAL_ENTRY(save_switch_stack)
   6.276 +	.prologue
   6.277 +	.altrp b7
   6.278 +	flushrs			// flush dirty regs to backing store (must be first in insn group)
   6.279 +	.save @priunat,r17
   6.280 +	mov r17=ar.unat		// preserve caller's
   6.281 +	.body
   6.282 +#ifdef CONFIG_ITANIUM
   6.283 +	adds r2=16+128,sp
   6.284 +	adds r3=16+64,sp
   6.285 +	adds r14=SW(R4)+16,sp
   6.286 +	;;
   6.287 +	st8.spill [r14]=r4,16		// spill r4
   6.288 +	lfetch.fault.excl.nt1 [r3],128
   6.289 +	;;
   6.290 +	lfetch.fault.excl.nt1 [r2],128
   6.291 +	lfetch.fault.excl.nt1 [r3],128
   6.292 +	;;
   6.293 +	lfetch.fault.excl [r2]
   6.294 +	lfetch.fault.excl [r3]
   6.295 +	adds r15=SW(R5)+16,sp
   6.296 +#else
   6.297 +	add r2=16+3*128,sp
   6.298 +	add r3=16,sp
   6.299 +	add r14=SW(R4)+16,sp
   6.300 +	;;
   6.301 +	st8.spill [r14]=r4,SW(R6)-SW(R4)	// spill r4 and prefetch offset 0x1c0
   6.302 +	lfetch.fault.excl.nt1 [r3],128	//		prefetch offset 0x010
   6.303 +	;;
   6.304 +	lfetch.fault.excl.nt1 [r3],128	//		prefetch offset 0x090
   6.305 +	lfetch.fault.excl.nt1 [r2],128	//		prefetch offset 0x190
   6.306 +	;;
   6.307 +	lfetch.fault.excl.nt1 [r3]	//		prefetch offset 0x110
   6.308 +	lfetch.fault.excl.nt1 [r2]	//		prefetch offset 0x210
   6.309 +	adds r15=SW(R5)+16,sp
   6.310 +#endif
   6.311 +	;;
   6.312 +	st8.spill [r15]=r5,SW(R7)-SW(R5)	// spill r5
   6.313 +	mov.m ar.rsc=0			// put RSE in mode: enforced lazy, little endian, pl 0
   6.314 +	add r2=SW(F2)+16,sp		// r2 = &sw->f2
   6.315 +	;;
   6.316 +	st8.spill [r14]=r6,SW(B0)-SW(R6)	// spill r6
   6.317 +	mov.m r18=ar.fpsr		// preserve fpsr
   6.318 +	add r3=SW(F3)+16,sp		// r3 = &sw->f3
   6.319 +	;;
   6.320 +	stf.spill [r2]=f2,32
   6.321 +	mov.m r19=ar.rnat
   6.322 +	mov r21=b0
   6.323 +
   6.324 +	stf.spill [r3]=f3,32
   6.325 +	st8.spill [r15]=r7,SW(B2)-SW(R7)	// spill r7
   6.326 +	mov r22=b1
   6.327 +	;;
   6.328 +	// since we're done with the spills, read and save ar.unat:
   6.329 +	mov.m r29=ar.unat
   6.330 +	mov.m r20=ar.bspstore
   6.331 +	mov r23=b2
   6.332 +	stf.spill [r2]=f4,32
   6.333 +	stf.spill [r3]=f5,32
   6.334 +	mov r24=b3
   6.335 +	;;
   6.336 +	st8 [r14]=r21,SW(B1)-SW(B0)		// save b0
   6.337 +	st8 [r15]=r23,SW(B3)-SW(B2)		// save b2
   6.338 +	mov r25=b4
   6.339 +	mov r26=b5
   6.340 +	;;
   6.341 +	st8 [r14]=r22,SW(B4)-SW(B1)		// save b1
   6.342 +	st8 [r15]=r24,SW(AR_PFS)-SW(B3)		// save b3
   6.343 +	mov r21=ar.lc		// I-unit
   6.344 +	stf.spill [r2]=f12,32
   6.345 +	stf.spill [r3]=f13,32
   6.346 +	;;
   6.347 +	st8 [r14]=r25,SW(B5)-SW(B4)		// save b4
   6.348 +	st8 [r15]=r16,SW(AR_LC)-SW(AR_PFS)	// save ar.pfs
   6.349 +	stf.spill [r2]=f14,32
   6.350 +	stf.spill [r3]=f15,32
   6.351 +	;;
   6.352 +	st8 [r14]=r26				// save b5
   6.353 +	st8 [r15]=r21				// save ar.lc
   6.354 +	stf.spill [r2]=f16,32
   6.355 +	stf.spill [r3]=f17,32
   6.356 +	;;
   6.357 +	stf.spill [r2]=f18,32
   6.358 +	stf.spill [r3]=f19,32
   6.359 +	;;
   6.360 +	stf.spill [r2]=f20,32
   6.361 +	stf.spill [r3]=f21,32
   6.362 +	;;
   6.363 +	stf.spill [r2]=f22,32
   6.364 +	stf.spill [r3]=f23,32
   6.365 +	;;
   6.366 +	stf.spill [r2]=f24,32
   6.367 +	stf.spill [r3]=f25,32
   6.368 +	;;
   6.369 +	stf.spill [r2]=f26,32
   6.370 +	stf.spill [r3]=f27,32
   6.371 +	;;
   6.372 +	stf.spill [r2]=f28,32
   6.373 +	stf.spill [r3]=f29,32
   6.374 +	;;
   6.375 +	stf.spill [r2]=f30,SW(AR_UNAT)-SW(F30)
   6.376 +	stf.spill [r3]=f31,SW(PR)-SW(F31)
   6.377 +	add r14=SW(CALLER_UNAT)+16,sp
   6.378 +	;;
   6.379 +	st8 [r2]=r29,SW(AR_RNAT)-SW(AR_UNAT)	// save ar.unat
   6.380 +	st8 [r14]=r17,SW(AR_FPSR)-SW(CALLER_UNAT) // save caller_unat
   6.381 +	mov r21=pr
   6.382 +	;;
   6.383 +	st8 [r2]=r19,SW(AR_BSPSTORE)-SW(AR_RNAT) // save ar.rnat
   6.384 +	st8 [r3]=r21				// save predicate registers
   6.385 +	;;
   6.386 +	st8 [r2]=r20				// save ar.bspstore
   6.387 +	st8 [r14]=r18				// save fpsr
   6.388 +	mov ar.rsc=3		// put RSE back into eager mode, pl 0
   6.389 +	br.cond.sptk.many b7
   6.390 +END(save_switch_stack)
   6.391 +
   6.392 +/*
   6.393 + * load_switch_stack:
   6.394 + *	- "invala" MUST be done at call site (normally in DO_LOAD_SWITCH_STACK)
   6.395 + *	- b7 holds address to return to
   6.396 + *	- must not touch r8-r11
   6.397 + */
   6.398 +#ifdef XEN
   6.399 +GLOBAL_ENTRY(load_switch_stack)
   6.400 +#else
   6.401 +ENTRY(load_switch_stack)
   6.402 +#endif
   6.403 +	.prologue
   6.404 +	.altrp b7
   6.405 +
   6.406 +	.body
   6.407 +	lfetch.fault.nt1 [sp]
   6.408 +	adds r2=SW(AR_BSPSTORE)+16,sp
   6.409 +	adds r3=SW(AR_UNAT)+16,sp
   6.410 +	mov ar.rsc=0						// put RSE into enforced lazy mode
   6.411 +	adds r14=SW(CALLER_UNAT)+16,sp
   6.412 +	adds r15=SW(AR_FPSR)+16,sp
   6.413 +	;;
   6.414 +	ld8 r27=[r2],(SW(B0)-SW(AR_BSPSTORE))	// bspstore
   6.415 +	ld8 r29=[r3],(SW(B1)-SW(AR_UNAT))	// unat
   6.416 +	;;
   6.417 +	ld8 r21=[r2],16		// restore b0
   6.418 +	ld8 r22=[r3],16		// restore b1
   6.419 +	;;
   6.420 +	ld8 r23=[r2],16		// restore b2
   6.421 +	ld8 r24=[r3],16		// restore b3
   6.422 +	;;
   6.423 +	ld8 r25=[r2],16		// restore b4
   6.424 +	ld8 r26=[r3],16		// restore b5
   6.425 +	;;
   6.426 +	ld8 r16=[r2],(SW(PR)-SW(AR_PFS))	// ar.pfs
   6.427 +	ld8 r17=[r3],(SW(AR_RNAT)-SW(AR_LC))	// ar.lc
   6.428 +	;;
   6.429 +	ld8 r28=[r2]		// restore pr
   6.430 +	ld8 r30=[r3]		// restore rnat
   6.431 +	;;
   6.432 +	ld8 r18=[r14],16	// restore caller's unat
   6.433 +	ld8 r19=[r15],24	// restore fpsr
   6.434 +	;;
   6.435 +	ldf.fill f2=[r14],32
   6.436 +	ldf.fill f3=[r15],32
   6.437 +	;;
   6.438 +	ldf.fill f4=[r14],32
   6.439 +	ldf.fill f5=[r15],32
   6.440 +	;;
   6.441 +	ldf.fill f12=[r14],32
   6.442 +	ldf.fill f13=[r15],32
   6.443 +	;;
   6.444 +	ldf.fill f14=[r14],32
   6.445 +	ldf.fill f15=[r15],32
   6.446 +	;;
   6.447 +	ldf.fill f16=[r14],32
   6.448 +	ldf.fill f17=[r15],32
   6.449 +	;;
   6.450 +	ldf.fill f18=[r14],32
   6.451 +	ldf.fill f19=[r15],32
   6.452 +	mov b0=r21
   6.453 +	;;
   6.454 +	ldf.fill f20=[r14],32
   6.455 +	ldf.fill f21=[r15],32
   6.456 +	mov b1=r22
   6.457 +	;;
   6.458 +	ldf.fill f22=[r14],32
   6.459 +	ldf.fill f23=[r15],32
   6.460 +	mov b2=r23
   6.461 +	;;
   6.462 +	mov ar.bspstore=r27
   6.463 +	mov ar.unat=r29		// establish unat holding the NaT bits for r4-r7
   6.464 +	mov b3=r24
   6.465 +	;;
   6.466 +	ldf.fill f24=[r14],32
   6.467 +	ldf.fill f25=[r15],32
   6.468 +	mov b4=r25
   6.469 +	;;
   6.470 +	ldf.fill f26=[r14],32
   6.471 +	ldf.fill f27=[r15],32
   6.472 +	mov b5=r26
   6.473 +	;;
   6.474 +	ldf.fill f28=[r14],32
   6.475 +	ldf.fill f29=[r15],32
   6.476 +	mov ar.pfs=r16
   6.477 +	;;
   6.478 +	ldf.fill f30=[r14],32
   6.479 +	ldf.fill f31=[r15],24
   6.480 +	mov ar.lc=r17
   6.481 +	;;
   6.482 +	ld8.fill r4=[r14],16
   6.483 +	ld8.fill r5=[r15],16
   6.484 +	mov pr=r28,-1
   6.485 +	;;
   6.486 +	ld8.fill r6=[r14],16
   6.487 +	ld8.fill r7=[r15],16
   6.488 +
   6.489 +	mov ar.unat=r18				// restore caller's unat
   6.490 +	mov ar.rnat=r30				// must restore after bspstore but before rsc!
   6.491 +	mov ar.fpsr=r19				// restore fpsr
   6.492 +	mov ar.rsc=3				// put RSE back into eager mode, pl 0
   6.493 +	br.cond.sptk.many b7
   6.494 +END(load_switch_stack)
   6.495 +
   6.496 +#ifndef XEN
   6.497 +GLOBAL_ENTRY(__ia64_syscall)
   6.498 +	.regstk 6,0,0,0
   6.499 +	mov r15=in5				// put syscall number in place
   6.500 +	break __BREAK_SYSCALL
   6.501 +	movl r2=errno
   6.502 +	cmp.eq p6,p7=-1,r10
   6.503 +	;;
   6.504 +(p6)	st4 [r2]=r8
   6.505 +(p6)	mov r8=-1
   6.506 +	br.ret.sptk.many rp
   6.507 +END(__ia64_syscall)
   6.508 +
   6.509 +GLOBAL_ENTRY(execve)
   6.510 +	mov r15=__NR_execve			// put syscall number in place
   6.511 +	break __BREAK_SYSCALL
   6.512 +	br.ret.sptk.many rp
   6.513 +END(execve)
   6.514 +
   6.515 +GLOBAL_ENTRY(clone)
   6.516 +	mov r15=__NR_clone			// put syscall number in place
   6.517 +	break __BREAK_SYSCALL
   6.518 +	br.ret.sptk.many rp
   6.519 +END(clone)
   6.520 +
   6.521 +	/*
   6.522 +	 * Invoke a system call, but do some tracing before and after the call.
   6.523 +	 * We MUST preserve the current register frame throughout this routine
   6.524 +	 * because some system calls (such as ia64_execve) directly
   6.525 +	 * manipulate ar.pfs.
   6.526 +	 */
   6.527 +GLOBAL_ENTRY(ia64_trace_syscall)
   6.528 +	PT_REGS_UNWIND_INFO(0)
   6.529 +	/*
   6.530 +	 * We need to preserve the scratch registers f6-f11 in case the system
   6.531 +	 * call is sigreturn.
   6.532 +	 */
   6.533 +	adds r16=PT(F6)+16,sp
   6.534 +	adds r17=PT(F7)+16,sp
   6.535 +	;;
   6.536 + 	stf.spill [r16]=f6,32
   6.537 + 	stf.spill [r17]=f7,32
   6.538 +	;;
   6.539 + 	stf.spill [r16]=f8,32
   6.540 + 	stf.spill [r17]=f9,32
   6.541 +	;;
   6.542 + 	stf.spill [r16]=f10
   6.543 + 	stf.spill [r17]=f11
   6.544 +	br.call.sptk.many rp=syscall_trace_enter // give parent a chance to catch syscall args
   6.545 +	adds r16=PT(F6)+16,sp
   6.546 +	adds r17=PT(F7)+16,sp
   6.547 +	;;
   6.548 +	ldf.fill f6=[r16],32
   6.549 +	ldf.fill f7=[r17],32
   6.550 +	;;
   6.551 +	ldf.fill f8=[r16],32
   6.552 +	ldf.fill f9=[r17],32
   6.553 +	;;
   6.554 +	ldf.fill f10=[r16]
   6.555 +	ldf.fill f11=[r17]
   6.556 +	// the syscall number may have changed, so re-load it and re-calculate the
   6.557 +	// syscall entry-point:
   6.558 +	adds r15=PT(R15)+16,sp			// r15 = &pt_regs.r15 (syscall #)
   6.559 +	;;
   6.560 +	ld8 r15=[r15]
   6.561 +	mov r3=NR_syscalls - 1
   6.562 +	;;
   6.563 +	adds r15=-1024,r15
   6.564 +	movl r16=sys_call_table
   6.565 +	;;
   6.566 +	shladd r20=r15,3,r16			// r20 = sys_call_table + 8*(syscall-1024)
   6.567 +	cmp.leu p6,p7=r15,r3
   6.568 +	;;
   6.569 +(p6)	ld8 r20=[r20]				// load address of syscall entry point
   6.570 +(p7)	movl r20=sys_ni_syscall
   6.571 +	;;
   6.572 +	mov b6=r20
   6.573 +	br.call.sptk.many rp=b6			// do the syscall
   6.574 +.strace_check_retval:
   6.575 +	cmp.lt p6,p0=r8,r0			// syscall failed?
   6.576 +	adds r2=PT(R8)+16,sp			// r2 = &pt_regs.r8
   6.577 +	adds r3=PT(R10)+16,sp			// r3 = &pt_regs.r10
   6.578 +	mov r10=0
   6.579 +(p6)	br.cond.sptk strace_error		// syscall failed ->
   6.580 +	;;					// avoid RAW on r10
   6.581 +.strace_save_retval:
   6.582 +.mem.offset 0,0; st8.spill [r2]=r8		// store return value in slot for r8
   6.583 +.mem.offset 8,0; st8.spill [r3]=r10		// clear error indication in slot for r10
   6.584 +	br.call.sptk.many rp=syscall_trace_leave // give parent a chance to catch return value
   6.585 +.ret3:	br.cond.sptk .work_pending_syscall_end
   6.586 +
   6.587 +strace_error:
   6.588 +	ld8 r3=[r2]				// load pt_regs.r8
   6.589 +	sub r9=0,r8				// negate return value to get errno value
   6.590 +	;;
   6.591 +	cmp.ne p6,p0=r3,r0			// is pt_regs.r8!=0?
   6.592 +	adds r3=16,r2				// r3=&pt_regs.r10
   6.593 +	;;
   6.594 +(p6)	mov r10=-1
   6.595 +(p6)	mov r8=r9
   6.596 +	br.cond.sptk .strace_save_retval
   6.597 +END(ia64_trace_syscall)
   6.598 +
   6.599 +	/*
   6.600 +	 * When traced and returning from sigreturn, we invoke syscall_trace but then
   6.601 +	 * go straight to ia64_leave_kernel rather than ia64_leave_syscall.
   6.602 +	 */
   6.603 +GLOBAL_ENTRY(ia64_strace_leave_kernel)
   6.604 +	PT_REGS_UNWIND_INFO(0)
   6.605 +{	/*
   6.606 +	 * Some versions of gas generate bad unwind info if the first instruction of a
   6.607 +	 * procedure doesn't go into the first slot of a bundle.  This is a workaround.
   6.608 +	 */
   6.609 +	nop.m 0
   6.610 +	nop.i 0
   6.611 +	br.call.sptk.many rp=syscall_trace_leave // give parent a chance to catch return value
   6.612 +}
   6.613 +.ret4:	br.cond.sptk ia64_leave_kernel
   6.614 +END(ia64_strace_leave_kernel)
   6.615 +#endif
   6.616 +
   6.617 +GLOBAL_ENTRY(ia64_ret_from_clone)
   6.618 +	PT_REGS_UNWIND_INFO(0)
   6.619 +{	/*
   6.620 +	 * Some versions of gas generate bad unwind info if the first instruction of a
   6.621 +	 * procedure doesn't go into the first slot of a bundle.  This is a workaround.
   6.622 +	 */
   6.623 +	nop.m 0
   6.624 +	nop.i 0
   6.625 +	/*
   6.626 +	 * We need to call schedule_tail() to complete the scheduling process.
   6.627 +	 * Called by ia64_switch_to() after do_fork()->copy_thread().  r8 contains the
   6.628 +	 * address of the previously executing task.
   6.629 +	 */
   6.630 +	br.call.sptk.many rp=ia64_invoke_schedule_tail
   6.631 +}
   6.632 +#ifdef XEN
   6.633 +	// new domains are cloned but not exec'ed so switch to user mode here
   6.634 +	cmp.ne pKStk,pUStk=r0,r0
   6.635 +#ifdef CONFIG_VTI
   6.636 +	br.cond.spnt ia64_leave_hypervisor
   6.637 +#else // CONFIG_VTI
   6.638 +	br.cond.spnt ia64_leave_kernel
   6.639 +#endif // CONFIG_VTI
   6.640 +#else
   6.641 +.ret8:
   6.642 +	adds r2=TI_FLAGS+IA64_TASK_SIZE,r13
   6.643 +	;;
   6.644 +	ld4 r2=[r2]
   6.645 +	;;
   6.646 +	mov r8=0
   6.647 +	and r2=_TIF_SYSCALL_TRACEAUDIT,r2
   6.648 +	;;
   6.649 +	cmp.ne p6,p0=r2,r0
   6.650 +(p6)	br.cond.spnt .strace_check_retval
   6.651 +#endif
   6.652 +	;;					// added stop bits to prevent r8 dependency
   6.653 +END(ia64_ret_from_clone)
   6.654 +	// fall through
   6.655 +GLOBAL_ENTRY(ia64_ret_from_syscall)
   6.656 +	PT_REGS_UNWIND_INFO(0)
   6.657 +	cmp.ge p6,p7=r8,r0			// syscall executed successfully?
   6.658 +	adds r2=PT(R8)+16,sp			// r2 = &pt_regs.r8
   6.659 +	mov r10=r0				// clear error indication in r10
   6.660 +(p7)	br.cond.spnt handle_syscall_error	// handle potential syscall failure
   6.661 +END(ia64_ret_from_syscall)
   6.662 +	// fall through
   6.663 +/*
   6.664 + * ia64_leave_syscall(): Same as ia64_leave_kernel, except that it doesn't
   6.665 + *	need to switch to bank 0 and doesn't restore the scratch registers.
   6.666 + *	To avoid leaking kernel bits, the scratch registers are set to
   6.667 + *	the following known-to-be-safe values:
   6.668 + *
   6.669 + *		  r1: restored (global pointer)
   6.670 + *		  r2: cleared
   6.671 + *		  r3: 1 (when returning to user-level)
   6.672 + *	      r8-r11: restored (syscall return value(s))
   6.673 + *		 r12: restored (user-level stack pointer)
   6.674 + *		 r13: restored (user-level thread pointer)
   6.675 + *		 r14: cleared
   6.676 + *		 r15: restored (syscall #)
   6.677 + *	     r16-r17: cleared
   6.678 + *		 r18: user-level b6
   6.679 + *		 r19: cleared
   6.680 + *		 r20: user-level ar.fpsr
   6.681 + *		 r21: user-level b0
   6.682 + *		 r22: cleared
   6.683 + *		 r23: user-level ar.bspstore
   6.684 + *		 r24: user-level ar.rnat
   6.685 + *		 r25: user-level ar.unat
   6.686 + *		 r26: user-level ar.pfs
   6.687 + *		 r27: user-level ar.rsc
   6.688 + *		 r28: user-level ip
   6.689 + *		 r29: user-level psr
   6.690 + *		 r30: user-level cfm
   6.691 + *		 r31: user-level pr
   6.692 + *	      f6-f11: cleared
   6.693 + *		  pr: restored (user-level pr)
   6.694 + *		  b0: restored (user-level rp)
   6.695 + *	          b6: restored
   6.696 + *		  b7: cleared
   6.697 + *	     ar.unat: restored (user-level ar.unat)
   6.698 + *	      ar.pfs: restored (user-level ar.pfs)
   6.699 + *	      ar.rsc: restored (user-level ar.rsc)
   6.700 + *	     ar.rnat: restored (user-level ar.rnat)
   6.701 + *	 ar.bspstore: restored (user-level ar.bspstore)
   6.702 + *	     ar.fpsr: restored (user-level ar.fpsr)
   6.703 + *	      ar.ccv: cleared
   6.704 + *	      ar.csd: cleared
   6.705 + *	      ar.ssd: cleared
   6.706 + */
   6.707 +ENTRY(ia64_leave_syscall)
   6.708 +	PT_REGS_UNWIND_INFO(0)
   6.709 +	/*
   6.710 +	 * work.need_resched etc. mustn't get changed by this CPU before it returns to
   6.711 +	 * user- or fsys-mode, hence we disable interrupts early on.
   6.712 +	 *
   6.713 +	 * p6 controls whether current_thread_info()->flags needs to be check for
   6.714 +	 * extra work.  We always check for extra work when returning to user-level.
   6.715 +	 * With CONFIG_PREEMPT, we also check for extra work when the preempt_count
   6.716 +	 * is 0.  After extra work processing has been completed, execution
   6.717 +	 * resumes at .work_processed_syscall with p6 set to 1 if the extra-work-check
   6.718 +	 * needs to be redone.
   6.719 +	 */
   6.720 +#ifdef CONFIG_PREEMPT
   6.721 +	rsm psr.i				// disable interrupts
   6.722 +	cmp.eq pLvSys,p0=r0,r0			// pLvSys=1: leave from syscall
   6.723 +(pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13
   6.724 +	;;
   6.725 +	.pred.rel.mutex pUStk,pKStk
   6.726 +(pKStk) ld4 r21=[r20]			// r21 <- preempt_count
   6.727 +(pUStk)	mov r21=0			// r21 <- 0
   6.728 +	;;
   6.729 +	cmp.eq p6,p0=r21,r0		// p6 <- pUStk || (preempt_count == 0)
   6.730 +#else /* !CONFIG_PREEMPT */
   6.731 +(pUStk)	rsm psr.i
   6.732 +	cmp.eq pLvSys,p0=r0,r0		// pLvSys=1: leave from syscall
   6.733 +(pUStk)	cmp.eq.unc p6,p0=r0,r0		// p6 <- pUStk
   6.734 +#endif
   6.735 +.work_processed_syscall:
   6.736 +	adds r2=PT(LOADRS)+16,r12
   6.737 +	adds r3=PT(AR_BSPSTORE)+16,r12
   6.738 +#ifdef XEN
   6.739 +	;;
   6.740 +#else
   6.741 +	adds r18=TI_FLAGS+IA64_TASK_SIZE,r13
   6.742 +	;;
   6.743 +(p6)	ld4 r31=[r18]				// load current_thread_info()->flags
   6.744 +#endif
   6.745 +	ld8 r19=[r2],PT(B6)-PT(LOADRS)		// load ar.rsc value for "loadrs"
   6.746 +	mov b7=r0		// clear b7
   6.747 +	;;
   6.748 +	ld8 r23=[r3],PT(R11)-PT(AR_BSPSTORE)	// load ar.bspstore (may be garbage)
   6.749 +	ld8 r18=[r2],PT(R9)-PT(B6)		// load b6
   6.750 +#ifndef XEN
   6.751 +(p6)	and r15=TIF_WORK_MASK,r31		// any work other than TIF_SYSCALL_TRACE?
   6.752 +#endif
   6.753 +	;;
   6.754 +	mov r16=ar.bsp				// M2  get existing backing store pointer
   6.755 +#ifndef XEN
   6.756 +(p6)	cmp4.ne.unc p6,p0=r15, r0		// any special work pending?
   6.757 +(p6)	br.cond.spnt .work_pending_syscall
   6.758 +#endif
   6.759 +	;;
   6.760 +	// start restoring the state saved on the kernel stack (struct pt_regs):
   6.761 +	ld8 r9=[r2],PT(CR_IPSR)-PT(R9)
   6.762 +	ld8 r11=[r3],PT(CR_IIP)-PT(R11)
   6.763 +	mov f6=f0		// clear f6
   6.764 +	;;
   6.765 +	invala			// M0|1 invalidate ALAT
   6.766 +	rsm psr.i | psr.ic	// M2 initiate turning off of interrupt and interruption collection
   6.767 +	mov f9=f0		// clear f9
   6.768 +
   6.769 +	ld8 r29=[r2],16		// load cr.ipsr
   6.770 +	ld8 r28=[r3],16			// load cr.iip
   6.771 +	mov f8=f0		// clear f8
   6.772 +	;;
   6.773 +	ld8 r30=[r2],16		// M0|1 load cr.ifs
   6.774 +	mov.m ar.ssd=r0		// M2 clear ar.ssd
   6.775 +	cmp.eq p9,p0=r0,r0	// set p9 to indicate that we should restore cr.ifs
   6.776 +	;;
   6.777 +	ld8 r25=[r3],16		// M0|1 load ar.unat
   6.778 +	mov.m ar.csd=r0		// M2 clear ar.csd
   6.779 +	mov r22=r0		// clear r22
   6.780 +	;;
   6.781 +	ld8 r26=[r2],PT(B0)-PT(AR_PFS)	// M0|1 load ar.pfs
   6.782 +(pKStk)	mov r22=psr		// M2 read PSR now that interrupts are disabled
   6.783 +	mov f10=f0		// clear f10
   6.784 +	;;
   6.785 +	ld8 r21=[r2],PT(AR_RNAT)-PT(B0) // load b0
   6.786 +	ld8 r27=[r3],PT(PR)-PT(AR_RSC)	// load ar.rsc
   6.787 +	mov f11=f0		// clear f11
   6.788 +	;;
   6.789 +	ld8 r24=[r2],PT(AR_FPSR)-PT(AR_RNAT)	// load ar.rnat (may be garbage)
   6.790 +	ld8 r31=[r3],PT(R1)-PT(PR)		// load predicates
   6.791 +(pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
   6.792 +	;;
   6.793 +	ld8 r20=[r2],PT(R12)-PT(AR_FPSR)	// load ar.fpsr
   6.794 +	ld8.fill r1=[r3],16	// load r1
   6.795 +(pUStk) mov r17=1
   6.796 +	;;
   6.797 +	srlz.d			// M0  ensure interruption collection is off
   6.798 +	ld8.fill r13=[r3],16
   6.799 +	mov f7=f0		// clear f7
   6.800 +	;;
   6.801 +	ld8.fill r12=[r2]	// restore r12 (sp)
   6.802 +	ld8.fill r15=[r3]	// restore r15
   6.803 +#ifdef XEN
   6.804 +	movl r3=THIS_CPU(ia64_phys_stacked_size_p8)
   6.805 +#else
   6.806 +	addl r3=THIS_CPU(ia64_phys_stacked_size_p8),r0
   6.807 +#endif
   6.808 +	;;
   6.809 +(pUStk)	ld4 r3=[r3]		// r3 = cpu_data->phys_stacked_size_p8
   6.810 +(pUStk) st1 [r14]=r17
   6.811 +	mov b6=r18		// I0  restore b6
   6.812 +	;;
   6.813 +	mov r14=r0		// clear r14
   6.814 +	shr.u r18=r19,16	// I0|1 get byte size of existing "dirty" partition
   6.815 +(pKStk) br.cond.dpnt.many skip_rbs_switch
   6.816 +
   6.817 +	mov.m ar.ccv=r0		// clear ar.ccv
   6.818 +(pNonSys) br.cond.dpnt.many dont_preserve_current_frame
   6.819 +	br.cond.sptk.many rbs_switch
   6.820 +END(ia64_leave_syscall)
   6.821 +
   6.822 +#ifdef CONFIG_IA32_SUPPORT
   6.823 +GLOBAL_ENTRY(ia64_ret_from_ia32_execve)
   6.824 +	PT_REGS_UNWIND_INFO(0)
   6.825 +	adds r2=PT(R8)+16,sp			// r2 = &pt_regs.r8
   6.826 +	adds r3=PT(R10)+16,sp			// r3 = &pt_regs.r10
   6.827 +	;;
   6.828 +	.mem.offset 0,0
   6.829 +	st8.spill [r2]=r8	// store return value in slot for r8 and set unat bit
   6.830 +	.mem.offset 8,0
   6.831 +	st8.spill [r3]=r0	// clear error indication in slot for r10 and set unat bit
   6.832 +END(ia64_ret_from_ia32_execve_syscall)
   6.833 +	// fall through
   6.834 +#endif /* CONFIG_IA32_SUPPORT */
   6.835 +GLOBAL_ENTRY(ia64_leave_kernel)
   6.836 +	PT_REGS_UNWIND_INFO(0)
   6.837 +	/*
   6.838 +	 * work.need_resched etc. mustn't get changed by this CPU before it returns to
   6.839 +	 * user- or fsys-mode, hence we disable interrupts early on.
   6.840 +	 *
   6.841 +	 * p6 controls whether current_thread_info()->flags needs to be check for
   6.842 +	 * extra work.  We always check for extra work when returning to user-level.
   6.843 +	 * With CONFIG_PREEMPT, we also check for extra work when the preempt_count
   6.844 +	 * is 0.  After extra work processing has been completed, execution
   6.845 +	 * resumes at .work_processed_syscall with p6 set to 1 if the extra-work-check
   6.846 +	 * needs to be redone.
   6.847 +	 */
   6.848 +#ifdef CONFIG_PREEMPT
   6.849 +	rsm psr.i				// disable interrupts
   6.850 +	cmp.eq p0,pLvSys=r0,r0			// pLvSys=0: leave from kernel
   6.851 +(pKStk)	adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13
   6.852 +	;;
   6.853 +	.pred.rel.mutex pUStk,pKStk
   6.854 +(pKStk)	ld4 r21=[r20]			// r21 <- preempt_count
   6.855 +(pUStk)	mov r21=0			// r21 <- 0
   6.856 +	;;
   6.857 +	cmp.eq p6,p0=r21,r0		// p6 <- pUStk || (preempt_count == 0)
   6.858 +#else
   6.859 +(pUStk)	rsm psr.i
   6.860 +	cmp.eq p0,pLvSys=r0,r0		// pLvSys=0: leave from kernel
   6.861 +(pUStk)	cmp.eq.unc p6,p0=r0,r0		// p6 <- pUStk
   6.862 +#endif
   6.863 +.work_processed_kernel:
   6.864 +#ifdef XEN
   6.865 +	alloc loc0=ar.pfs,0,1,1,0
   6.866 +	adds out0=16,r12
   6.867 +	;;
   6.868 +(p6)	br.call.sptk.many b0=deliver_pending_interrupt
   6.869 +	mov ar.pfs=loc0
   6.870 +	mov r31=r0
   6.871 +#else
   6.872 +	adds r17=TI_FLAGS+IA64_TASK_SIZE,r13
   6.873 +	;;
   6.874 +(p6)	ld4 r31=[r17]				// load current_thread_info()->flags
   6.875 +#endif
   6.876 +	adds r21=PT(PR)+16,r12
   6.877 +	;;
   6.878 +
   6.879 +	lfetch [r21],PT(CR_IPSR)-PT(PR)
   6.880 +	adds r2=PT(B6)+16,r12
   6.881 +	adds r3=PT(R16)+16,r12
   6.882 +	;;
   6.883 +	lfetch [r21]
   6.884 +	ld8 r28=[r2],8		// load b6
   6.885 +	adds r29=PT(R24)+16,r12
   6.886 +
   6.887 +	ld8.fill r16=[r3]
   6.888 +	adds r30=PT(AR_CCV)+16,r12
   6.889 +(p6)	and r19=TIF_WORK_MASK,r31		// any work other than TIF_SYSCALL_TRACE?
   6.890 +	;;
   6.891 +	adds r3=PT(AR_CSD)-PT(R16),r3
   6.892 +	ld8.fill r24=[r29]
   6.893 +	ld8 r15=[r30]		// load ar.ccv
   6.894 +(p6)	cmp4.ne.unc p6,p0=r19, r0		// any special work pending?
   6.895 +	;;
   6.896 +	ld8 r29=[r2],16		// load b7
   6.897 +	ld8 r30=[r3],16		// load ar.csd
   6.898 +#ifndef XEN
   6.899 +(p6)	br.cond.spnt .work_pending
   6.900 +#endif
   6.901 +	;;
   6.902 +	ld8 r31=[r2],16		// load ar.ssd
   6.903 +	ld8.fill r8=[r3],16
   6.904 +	;;
   6.905 +	ld8.fill r9=[r2],16
   6.906 +	ld8.fill r10=[r3],PT(R17)-PT(R10)
   6.907 +	;;
   6.908 +	ld8.fill r11=[r2],PT(R18)-PT(R11)
   6.909 +	ld8.fill r17=[r3],16
   6.910 +	;;
   6.911 +	ld8.fill r18=[r2],16
   6.912 +	ld8.fill r19=[r3],16
   6.913 +	;;
   6.914 +	ld8.fill r20=[r2],16
   6.915 +	ld8.fill r21=[r3],16
   6.916 +	mov ar.csd=r30
   6.917 +	mov ar.ssd=r31
   6.918 +	;;
   6.919 +	rsm psr.i | psr.ic	// initiate turning off of interrupt and interruption collection
   6.920 +	invala			// invalidate ALAT
   6.921 +	;;
   6.922 +	ld8.fill r22=[r2],24
   6.923 +	ld8.fill r23=[r3],24
   6.924 +	mov b6=r28
   6.925 +	;;
   6.926 +	ld8.fill r25=[r2],16
   6.927 +	ld8.fill r26=[r3],16
   6.928 +	mov b7=r29
   6.929 +	;;
   6.930 +	ld8.fill r27=[r2],16
   6.931 +	ld8.fill r28=[r3],16
   6.932 +	;;
   6.933 +	ld8.fill r29=[r2],16
   6.934 +	ld8.fill r30=[r3],24
   6.935 +	;;
   6.936 +	ld8.fill r31=[r2],PT(F9)-PT(R31)
   6.937 +	adds r3=PT(F10)-PT(F6),r3
   6.938 +	;;
   6.939 +	ldf.fill f9=[r2],PT(F6)-PT(F9)
   6.940 +	ldf.fill f10=[r3],PT(F8)-PT(F10)
   6.941 +	;;
   6.942 +	ldf.fill f6=[r2],PT(F7)-PT(F6)
   6.943 +	;;
   6.944 +	ldf.fill f7=[r2],PT(F11)-PT(F7)
   6.945 +	ldf.fill f8=[r3],32
   6.946 +	;;
   6.947 +	srlz.i			// ensure interruption collection is off
   6.948 +	mov ar.ccv=r15
   6.949 +	;;
   6.950 +	ldf.fill f11=[r2]
   6.951 +	bsw.0			// switch back to bank 0 (no stop bit required beforehand...)
   6.952 +	;;
   6.953 +(pUStk)	mov r18=IA64_KR(CURRENT)// M2 (12 cycle read latency)
   6.954 +	adds r16=PT(CR_IPSR)+16,r12
   6.955 +	adds r17=PT(CR_IIP)+16,r12
   6.956 +
   6.957 +(pKStk)	mov r22=psr		// M2 read PSR now that interrupts are disabled
   6.958 +	nop.i 0
   6.959 +	nop.i 0
   6.960 +	;;
   6.961 +	ld8 r29=[r16],16	// load cr.ipsr
   6.962 +	ld8 r28=[r17],16	// load cr.iip
   6.963 +	;;
   6.964 +	ld8 r30=[r16],16	// load cr.ifs
   6.965 +	ld8 r25=[r17],16	// load ar.unat
   6.966 +	;;
   6.967 +	ld8 r26=[r16],16	// load ar.pfs
   6.968 +	ld8 r27=[r17],16	// load ar.rsc
   6.969 +	cmp.eq p9,p0=r0,r0	// set p9 to indicate that we should restore cr.ifs
   6.970 +	;;
   6.971 +	ld8 r24=[r16],16	// load ar.rnat (may be garbage)
   6.972 +	ld8 r23=[r17],16	// load ar.bspstore (may be garbage)
   6.973 +	;;
   6.974 +	ld8 r31=[r16],16	// load predicates
   6.975 +	ld8 r21=[r17],16	// load b0
   6.976 +	;;
   6.977 +	ld8 r19=[r16],16	// load ar.rsc value for "loadrs"
   6.978 +	ld8.fill r1=[r17],16	// load r1
   6.979 +	;;
   6.980 +	ld8.fill r12=[r16],16
   6.981 +	ld8.fill r13=[r17],16
   6.982 +(pUStk)	adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18
   6.983 +	;;
   6.984 +	ld8 r20=[r16],16	// ar.fpsr
   6.985 +	ld8.fill r15=[r17],16
   6.986 +	;;
   6.987 +	ld8.fill r14=[r16],16
   6.988 +	ld8.fill r2=[r17]
   6.989 +(pUStk)	mov r17=1
   6.990 +	;;
   6.991 +	ld8.fill r3=[r16]
   6.992 +(pUStk)	st1 [r18]=r17		// restore current->thread.on_ustack
   6.993 +	shr.u r18=r19,16	// get byte size of existing "dirty" partition
   6.994 +	;;
   6.995 +	mov r16=ar.bsp		// get existing backing store pointer
   6.996 +#ifdef XEN
   6.997 +	movl r17=THIS_CPU(ia64_phys_stacked_size_p8)
   6.998 +#else
   6.999 +	addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0
  6.1000 +#endif
  6.1001 +	;;
  6.1002 +	ld4 r17=[r17]		// r17 = cpu_data->phys_stacked_size_p8
  6.1003 +(pKStk)	br.cond.dpnt skip_rbs_switch
  6.1004 +
  6.1005 +	/*
  6.1006 +	 * Restore user backing store.
  6.1007 +	 *
  6.1008 +	 * NOTE: alloc, loadrs, and cover can't be predicated.
  6.1009 +	 */
  6.1010 +(pNonSys) br.cond.dpnt dont_preserve_current_frame
  6.1011 +
  6.1012 +rbs_switch:
  6.1013 +	cover				// add current frame into dirty partition and set cr.ifs
  6.1014 +	;;
  6.1015 +	mov r19=ar.bsp			// get new backing store pointer
  6.1016 +	sub r16=r16,r18			// krbs = old bsp - size of dirty partition
  6.1017 +	cmp.ne p9,p0=r0,r0		// clear p9 to skip restore of cr.ifs
  6.1018 +	;;
  6.1019 +	sub r19=r19,r16			// calculate total byte size of dirty partition
  6.1020 +	add r18=64,r18			// don't force in0-in7 into memory...
  6.1021 +	;;
  6.1022 +	shl r19=r19,16			// shift size of dirty partition into loadrs position
  6.1023 +	;;
  6.1024 +dont_preserve_current_frame:
  6.1025 +	/*
  6.1026 +	 * To prevent leaking bits between the kernel and user-space,
  6.1027 +	 * we must clear the stacked registers in the "invalid" partition here.
  6.1028 +	 * Not pretty, but at least it's fast (3.34 registers/cycle on Itanium,
  6.1029 +	 * 5 registers/cycle on McKinley).
  6.1030 +	 */
  6.1031 +#	define pRecurse	p6
  6.1032 +#	define pReturn	p7
  6.1033 +#ifdef CONFIG_ITANIUM
  6.1034 +#	define Nregs	10
  6.1035 +#else
  6.1036 +#	define Nregs	14
  6.1037 +#endif
  6.1038 +	alloc loc0=ar.pfs,2,Nregs-2,2,0
  6.1039 +	shr.u loc1=r18,9		// RNaTslots <= floor(dirtySize / (64*8))
  6.1040 +	sub r17=r17,r18			// r17 = (physStackedSize + 8) - dirtySize
  6.1041 +	;;
  6.1042 +	mov ar.rsc=r19			// load ar.rsc to be used for "loadrs"
  6.1043 +	shladd in0=loc1,3,r17
  6.1044 +	mov in1=0
  6.1045 +	;;
  6.1046 +	TEXT_ALIGN(32)
  6.1047 +rse_clear_invalid:
  6.1048 +#ifdef CONFIG_ITANIUM
  6.1049 +	// cycle 0
  6.1050 + { .mii
  6.1051 +	alloc loc0=ar.pfs,2,Nregs-2,2,0
  6.1052 +	cmp.lt pRecurse,p0=Nregs*8,in0	// if more than Nregs regs left to clear, (re)curse
  6.1053 +	add out0=-Nregs*8,in0
  6.1054 +}{ .mfb
  6.1055 +	add out1=1,in1			// increment recursion count
  6.1056 +	nop.f 0
  6.1057 +	nop.b 0				// can't do br.call here because of alloc (WAW on CFM)
  6.1058 +	;;
  6.1059 +}{ .mfi	// cycle 1
  6.1060 +	mov loc1=0
  6.1061 +	nop.f 0
  6.1062 +	mov loc2=0
  6.1063 +}{ .mib
  6.1064 +	mov loc3=0
  6.1065 +	mov loc4=0
  6.1066 +(pRecurse) br.call.sptk.many b0=rse_clear_invalid
  6.1067 +
  6.1068 +}{ .mfi	// cycle 2
  6.1069 +	mov loc5=0
  6.1070 +	nop.f 0
  6.1071 +	cmp.ne pReturn,p0=r0,in1	// if recursion count != 0, we need to do a br.ret
  6.1072 +}{ .mib
  6.1073 +	mov loc6=0
  6.1074 +	mov loc7=0
  6.1075 +(pReturn) br.ret.sptk.many b0
  6.1076 +}
  6.1077 +#else /* !CONFIG_ITANIUM */
  6.1078 +	alloc loc0=ar.pfs,2,Nregs-2,2,0
  6.1079 +	cmp.lt pRecurse,p0=Nregs*8,in0	// if more than Nregs regs left to clear, (re)curse
  6.1080 +	add out0=-Nregs*8,in0
  6.1081 +	add out1=1,in1			// increment recursion count
  6.1082 +	mov loc1=0
  6.1083 +	mov loc2=0
  6.1084 +	;;
  6.1085 +	mov loc3=0
  6.1086 +	mov loc4=0
  6.1087 +	mov loc5=0
  6.1088 +	mov loc6=0
  6.1089 +	mov loc7=0
  6.1090 +(pRecurse) br.call.sptk.few b0=rse_clear_invalid
  6.1091 +	;;
  6.1092 +	mov loc8=0
  6.1093 +	mov loc9=0
  6.1094 +	cmp.ne pReturn,p0=r0,in1	// if recursion count != 0, we need to do a br.ret
  6.1095 +	mov loc10=0
  6.1096 +	mov loc11=0
  6.1097 +(pReturn) br.ret.sptk.many b0
  6.1098 +#endif /* !CONFIG_ITANIUM */
  6.1099 +#	undef pRecurse
  6.1100 +#	undef pReturn
  6.1101 +	;;
  6.1102 +	alloc r17=ar.pfs,0,0,0,0	// drop current register frame
  6.1103 +	;;
  6.1104 +	loadrs
  6.1105 +	;;
  6.1106 +skip_rbs_switch:
  6.1107 +	mov ar.unat=r25		// M2
  6.1108 +(pKStk)	extr.u r22=r22,21,1	// I0 extract current value of psr.pp from r22
  6.1109 +(pLvSys)mov r19=r0		// A  clear r19 for leave_syscall, no-op otherwise
  6.1110 +	;;
  6.1111 +(pUStk)	mov ar.bspstore=r23	// M2
  6.1112 +(pKStk)	dep r29=r22,r29,21,1	// I0 update ipsr.pp with psr.pp
  6.1113 +(pLvSys)mov r16=r0		// A  clear r16 for leave_syscall, no-op otherwise
  6.1114 +	;;
  6.1115 +	mov cr.ipsr=r29		// M2
  6.1116 +	mov ar.pfs=r26		// I0
  6.1117 +(pLvSys)mov r17=r0		// A  clear r17 for leave_syscall, no-op otherwise
  6.1118 +
  6.1119 +(p9)	mov cr.ifs=r30		// M2
  6.1120 +	mov b0=r21		// I0
  6.1121 +(pLvSys)mov r18=r0		// A  clear r18 for leave_syscall, no-op otherwise
  6.1122 +
  6.1123 +	mov ar.fpsr=r20		// M2
  6.1124 +	mov cr.iip=r28		// M2
  6.1125 +	nop 0
  6.1126 +	;;
  6.1127 +(pUStk)	mov ar.rnat=r24		// M2 must happen with RSE in lazy mode
  6.1128 +	nop 0
  6.1129 +(pLvSys)mov r2=r0
  6.1130 +
  6.1131 +	mov ar.rsc=r27		// M2
  6.1132 +	mov pr=r31,-1		// I0
  6.1133 +	rfi			// B
  6.1134 +
  6.1135 +#ifndef XEN
  6.1136 +	/*
  6.1137 +	 * On entry:
  6.1138 +	 *	r20 = &current->thread_info->pre_count (if CONFIG_PREEMPT)
  6.1139 +	 *	r31 = current->thread_info->flags
  6.1140 +	 * On exit:
  6.1141 +	 *	p6 = TRUE if work-pending-check needs to be redone
  6.1142 +	 */
  6.1143 +.work_pending_syscall:
  6.1144 +	add r2=-8,r2
  6.1145 +	add r3=-8,r3
  6.1146 +	;;
  6.1147 +	st8 [r2]=r8
  6.1148 +	st8 [r3]=r10
  6.1149 +.work_pending:
  6.1150 +	tbit.nz p6,p0=r31,TIF_SIGDELAYED		// signal delayed from  MCA/INIT/NMI/PMI context?
  6.1151 +(p6)	br.cond.sptk.few .sigdelayed
  6.1152 +	;;
  6.1153 +	tbit.z p6,p0=r31,TIF_NEED_RESCHED		// current_thread_info()->need_resched==0?
  6.1154 +(p6)	br.cond.sptk.few .notify
  6.1155 +#ifdef CONFIG_PREEMPT
  6.1156 +(pKStk) dep r21=-1,r0,PREEMPT_ACTIVE_BIT,1
  6.1157 +	;;
  6.1158 +(pKStk) st4 [r20]=r21
  6.1159 +	ssm psr.i		// enable interrupts
  6.1160 +#endif
  6.1161 +	br.call.spnt.many rp=schedule
  6.1162 +.ret9:	cmp.eq p6,p0=r0,r0				// p6 <- 1
  6.1163 +	rsm psr.i		// disable interrupts
  6.1164 +	;;
  6.1165 +#ifdef CONFIG_PREEMPT
  6.1166 +(pKStk)	adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13
  6.1167 +	;;
  6.1168 +(pKStk)	st4 [r20]=r0		// preempt_count() <- 0
  6.1169 +#endif
  6.1170 +(pLvSys)br.cond.sptk.few  .work_pending_syscall_end
  6.1171 +	br.cond.sptk.many .work_processed_kernel	// re-check
  6.1172 +
  6.1173 +.notify:
  6.1174 +(pUStk)	br.call.spnt.many rp=notify_resume_user
  6.1175 +.ret10:	cmp.ne p6,p0=r0,r0				// p6 <- 0
  6.1176 +(pLvSys)br.cond.sptk.few  .work_pending_syscall_end
  6.1177 +	br.cond.sptk.many .work_processed_kernel	// don't re-check
  6.1178 +
  6.1179 +// There is a delayed signal that was detected in MCA/INIT/NMI/PMI context where
  6.1180 +// it could not be delivered.  Deliver it now.  The signal might be for us and
  6.1181 +// may set TIF_SIGPENDING, so redrive ia64_leave_* after processing the delayed
  6.1182 +// signal.
  6.1183 +
  6.1184 +.sigdelayed:
  6.1185 +	br.call.sptk.many rp=do_sigdelayed
  6.1186 +	cmp.eq p6,p0=r0,r0				// p6 <- 1, always re-check
  6.1187 +(pLvSys)br.cond.sptk.few  .work_pending_syscall_end
  6.1188 +	br.cond.sptk.many .work_processed_kernel	// re-check
  6.1189 +
  6.1190 +.work_pending_syscall_end:
  6.1191 +	adds r2=PT(R8)+16,r12
  6.1192 +	adds r3=PT(R10)+16,r12
  6.1193 +	;;
  6.1194 +	ld8 r8=[r2]
  6.1195 +	ld8 r10=[r3]
  6.1196 +	br.cond.sptk.many .work_processed_syscall	// re-check
  6.1197 +#endif
  6.1198 +
  6.1199 +END(ia64_leave_kernel)
  6.1200 +
  6.1201 +ENTRY(handle_syscall_error)
  6.1202 +	/*
  6.1203 +	 * Some system calls (e.g., ptrace, mmap) can return arbitrary values which could
  6.1204 +	 * lead us to mistake a negative return value as a failed syscall.  Those syscall
  6.1205 +	 * must deposit a non-zero value in pt_regs.r8 to indicate an error.  If
  6.1206 +	 * pt_regs.r8 is zero, we assume that the call completed successfully.
  6.1207 +	 */
  6.1208 +	PT_REGS_UNWIND_INFO(0)
  6.1209 +	ld8 r3=[r2]		// load pt_regs.r8
  6.1210 +	;;
  6.1211 +	cmp.eq p6,p7=r3,r0	// is pt_regs.r8==0?
  6.1212 +	;;
  6.1213 +(p7)	mov r10=-1
  6.1214 +(p7)	sub r8=0,r8		// negate return value to get errno
  6.1215 +	br.cond.sptk ia64_leave_syscall
  6.1216 +END(handle_syscall_error)
  6.1217 +
  6.1218 +	/*
  6.1219 +	 * Invoke schedule_tail(task) while preserving in0-in7, which may be needed
  6.1220 +	 * in case a system call gets restarted.
  6.1221 +	 */
  6.1222 +GLOBAL_ENTRY(ia64_invoke_schedule_tail)
  6.1223 +	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
  6.1224 +	alloc loc1=ar.pfs,8,2,1,0
  6.1225 +	mov loc0=rp
  6.1226 +	mov out0=r8				// Address of previous task
  6.1227 +	;;
  6.1228 +	br.call.sptk.many rp=schedule_tail
  6.1229 +.ret11:	mov ar.pfs=loc1
  6.1230 +	mov rp=loc0
  6.1231 +	br.ret.sptk.many rp
  6.1232 +END(ia64_invoke_schedule_tail)
  6.1233 +
  6.1234 +#ifndef XEN
  6.1235 +	/*
  6.1236 +	 * Setup stack and call do_notify_resume_user().  Note that pSys and pNonSys need to
  6.1237 +	 * be set up by the caller.  We declare 8 input registers so the system call
  6.1238 +	 * args get preserved, in case we need to restart a system call.
  6.1239 +	 */
  6.1240 +ENTRY(notify_resume_user)
  6.1241 +	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
  6.1242 +	alloc loc1=ar.pfs,8,2,3,0 // preserve all eight input regs in case of syscall restart!
  6.1243 +	mov r9=ar.unat
  6.1244 +	mov loc0=rp				// save return address
  6.1245 +	mov out0=0				// there is no "oldset"
  6.1246 +	adds out1=8,sp				// out1=&sigscratch->ar_pfs
  6.1247 +(pSys)	mov out2=1				// out2==1 => we're in a syscall
  6.1248 +	;;
  6.1249 +(pNonSys) mov out2=0				// out2==0 => not a syscall
  6.1250 +	.fframe 16
  6.1251 +	.spillpsp ar.unat, 16			// (note that offset is relative to psp+0x10!)
  6.1252 +	st8 [sp]=r9,-16				// allocate space for ar.unat and save it
  6.1253 +	st8 [out1]=loc1,-8			// save ar.pfs, out1=&sigscratch
  6.1254 +	.body
  6.1255 +	br.call.sptk.many rp=do_notify_resume_user
  6.1256 +.ret15:	.restore sp
  6.1257 +	adds sp=16,sp				// pop scratch stack space
  6.1258 +	;;
  6.1259 +	ld8 r9=[sp]				// load new unat from sigscratch->scratch_unat
  6.1260 +	mov rp=loc0
  6.1261 +	;;
  6.1262 +	mov ar.unat=r9
  6.1263 +	mov ar.pfs=loc1
  6.1264 +	br.ret.sptk.many rp
  6.1265 +END(notify_resume_user)
  6.1266 +
  6.1267 +GLOBAL_ENTRY(sys_rt_sigsuspend)
  6.1268 +	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
  6.1269 +	alloc loc1=ar.pfs,8,2,3,0 // preserve all eight input regs in case of syscall restart!
  6.1270 +	mov r9=ar.unat
  6.1271 +	mov loc0=rp				// save return address
  6.1272 +	mov out0=in0				// mask
  6.1273 +	mov out1=in1				// sigsetsize
  6.1274 +	adds out2=8,sp				// out2=&sigscratch->ar_pfs
  6.1275 +	;;
  6.1276 +	.fframe 16
  6.1277 +	.spillpsp ar.unat, 16			// (note that offset is relative to psp+0x10!)
  6.1278 +	st8 [sp]=r9,-16				// allocate space for ar.unat and save it
  6.1279 +	st8 [out2]=loc1,-8			// save ar.pfs, out2=&sigscratch
  6.1280 +	.body
  6.1281 +	br.call.sptk.many rp=ia64_rt_sigsuspend
  6.1282 +.ret17:	.restore sp
  6.1283 +	adds sp=16,sp				// pop scratch stack space
  6.1284 +	;;
  6.1285 +	ld8 r9=[sp]				// load new unat from sw->caller_unat
  6.1286 +	mov rp=loc0
  6.1287 +	;;
  6.1288 +	mov ar.unat=r9
  6.1289 +	mov ar.pfs=loc1
  6.1290 +	br.ret.sptk.many rp
  6.1291 +END(sys_rt_sigsuspend)
  6.1292 +
  6.1293 +ENTRY(sys_rt_sigreturn)
  6.1294 +	PT_REGS_UNWIND_INFO(0)
  6.1295 +	/*
  6.1296 +	 * Allocate 8 input registers since ptrace() may clobber them
  6.1297 +	 */
  6.1298 +	alloc r2=ar.pfs,8,0,1,0
  6.1299 +	.prologue
  6.1300 +	PT_REGS_SAVES(16)
  6.1301 +	adds sp=-16,sp
  6.1302 +	.body
  6.1303 +	cmp.eq pNonSys,pSys=r0,r0		// sigreturn isn't a normal syscall...
  6.1304 +	;;
  6.1305 +	/*
  6.1306 +	 * leave_kernel() restores f6-f11 from pt_regs, but since the streamlined
  6.1307 +	 * syscall-entry path does not save them we save them here instead.  Note: we
  6.1308 +	 * don't need to save any other registers that are not saved by the stream-lined
  6.1309 +	 * syscall path, because restore_sigcontext() restores them.
  6.1310 +	 */
  6.1311 +	adds r16=PT(F6)+32,sp
  6.1312 +	adds r17=PT(F7)+32,sp
  6.1313 +	;;
  6.1314 + 	stf.spill [r16]=f6,32
  6.1315 + 	stf.spill [r17]=f7,32
  6.1316 +	;;
  6.1317 + 	stf.spill [r16]=f8,32
  6.1318 + 	stf.spill [r17]=f9,32
  6.1319 +	;;
  6.1320 + 	stf.spill [r16]=f10
  6.1321 + 	stf.spill [r17]=f11
  6.1322 +	adds out0=16,sp				// out0 = &sigscratch
  6.1323 +	br.call.sptk.many rp=ia64_rt_sigreturn
  6.1324 +.ret19:	.restore sp 0
  6.1325 +	adds sp=16,sp
  6.1326 +	;;
  6.1327 +	ld8 r9=[sp]				// load new ar.unat
  6.1328 +	mov.sptk b7=r8,ia64_leave_kernel
  6.1329 +	;;
  6.1330 +	mov ar.unat=r9
  6.1331 +	br.many b7
  6.1332 +END(sys_rt_sigreturn)
  6.1333 +#endif
  6.1334 +
  6.1335 +GLOBAL_ENTRY(ia64_prepare_handle_unaligned)
  6.1336 +	.prologue
  6.1337 +	/*
  6.1338 +	 * r16 = fake ar.pfs, we simply need to make sure privilege is still 0
  6.1339 +	 */
  6.1340 +	mov r16=r0
  6.1341 +	DO_SAVE_SWITCH_STACK
  6.1342 +	br.call.sptk.many rp=ia64_handle_unaligned	// stack frame setup in ivt
  6.1343 +.ret21:	.body
  6.1344 +	DO_LOAD_SWITCH_STACK
  6.1345 +	br.cond.sptk.many rp				// goes to ia64_leave_kernel
  6.1346 +END(ia64_prepare_handle_unaligned)
  6.1347 +
  6.1348 +#ifndef XEN
  6.1349 +	//
  6.1350 +	// unw_init_running(void (*callback)(info, arg), void *arg)
  6.1351 +	//
  6.1352 +#	define EXTRA_FRAME_SIZE	((UNW_FRAME_INFO_SIZE+15)&~15)
  6.1353 +
  6.1354 +GLOBAL_ENTRY(unw_init_running)
  6.1355 +	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(2)
  6.1356 +	alloc loc1=ar.pfs,2,3,3,0
  6.1357 +	;;
  6.1358 +	ld8 loc2=[in0],8
  6.1359 +	mov loc0=rp
  6.1360 +	mov r16=loc1
  6.1361 +	DO_SAVE_SWITCH_STACK
  6.1362 +	.body
  6.1363 +
  6.1364 +	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(2)
  6.1365 +	.fframe IA64_SWITCH_STACK_SIZE+EXTRA_FRAME_SIZE
  6.1366 +	SWITCH_STACK_SAVES(EXTRA_FRAME_SIZE)
  6.1367 +	adds sp=-EXTRA_FRAME_SIZE,sp
  6.1368 +	.body
  6.1369 +	;;
  6.1370 +	adds out0=16,sp				// &info
  6.1371 +	mov out1=r13				// current
  6.1372 +	adds out2=16+EXTRA_FRAME_SIZE,sp	// &switch_stack
  6.1373 +	br.call.sptk.many rp=unw_init_frame_info
  6.1374 +1:	adds out0=16,sp				// &info
  6.1375 +	mov b6=loc2
  6.1376 +	mov loc2=gp				// save gp across indirect function call
  6.1377 +	;;
  6.1378 +	ld8 gp=[in0]
  6.1379 +	mov out1=in1				// arg
  6.1380 +	br.call.sptk.many rp=b6			// invoke the callback function
  6.1381 +1:	mov gp=loc2				// restore gp
  6.1382 +
  6.1383 +	// For now, we don't allow changing registers from within
  6.1384 +	// unw_init_running; if we ever want to allow that, we'd
  6.1385 +	// have to do a load_switch_stack here:
  6.1386 +	.restore sp
  6.1387 +	adds sp=IA64_SWITCH_STACK_SIZE+EXTRA_FRAME_SIZE,sp
  6.1388 +
  6.1389 +	mov ar.pfs=loc1
  6.1390 +	mov rp=loc0
  6.1391 +	br.ret.sptk.many rp
  6.1392 +END(unw_init_running)
  6.1393 +
  6.1394 +	.rodata
  6.1395 +	.align 8
  6.1396 +	.globl sys_call_table
  6.1397 +sys_call_table:
  6.1398 +	data8 sys_ni_syscall		//  This must be sys_ni_syscall!  See ivt.S.
  6.1399 +	data8 sys_exit				// 1025
  6.1400 +	data8 sys_read
  6.1401 +	data8 sys_write
  6.1402 +	data8 sys_open
  6.1403 +	data8 sys_close
  6.1404 +	data8 sys_creat				// 1030
  6.1405 +	data8 sys_link
  6.1406 +	data8 sys_unlink
  6.1407 +	data8 ia64_execve
  6.1408 +	data8 sys_chdir
  6.1409 +	data8 sys_fchdir			// 1035
  6.1410 +	data8 sys_utimes
  6.1411 +	data8 sys_mknod
  6.1412 +	data8 sys_chmod
  6.1413 +	data8 sys_chown
  6.1414 +	data8 sys_lseek				// 1040
  6.1415 +	data8 sys_getpid
  6.1416 +	data8 sys_getppid
  6.1417 +	data8 sys_mount
  6.1418 +	data8 sys_umount
  6.1419 +	data8 sys_setuid			// 1045
  6.1420 +	data8 sys_getuid
  6.1421 +	data8 sys_geteuid
  6.1422 +	data8 sys_ptrace
  6.1423 +	data8 sys_access
  6.1424 +	data8 sys_sync				// 1050
  6.1425 +	data8 sys_fsync
  6.1426 +	data8 sys_fdatasync
  6.1427 +	data8 sys_kill
  6.1428 +	data8 sys_rename
  6.1429 +	data8 sys_mkdir				// 1055
  6.1430 +	data8 sys_rmdir
  6.1431 +	data8 sys_dup
  6.1432 +	data8 sys_pipe
  6.1433 +	data8 sys_times
  6.1434 +	data8 ia64_brk				// 1060
  6.1435 +	data8 sys_setgid
  6.1436 +	data8 sys_getgid
  6.1437 +	data8 sys_getegid
  6.1438 +	data8 sys_acct
  6.1439 +	data8 sys_ioctl				// 1065
  6.1440 +	data8 sys_fcntl
  6.1441 +	data8 sys_umask
  6.1442 +	data8 sys_chroot
  6.1443 +	data8 sys_ustat
  6.1444 +	data8 sys_dup2				// 1070
  6.1445 +	data8 sys_setreuid
  6.1446 +	data8 sys_setregid
  6.1447 +	data8 sys_getresuid
  6.1448 +	data8 sys_setresuid
  6.1449 +	data8 sys_getresgid			// 1075
  6.1450 +	data8 sys_setresgid
  6.1451 +	data8 sys_getgroups
  6.1452 +	data8 sys_setgroups
  6.1453 +	data8 sys_getpgid
  6.1454 +	data8 sys_setpgid			// 1080
  6.1455 +	data8 sys_setsid
  6.1456 +	data8 sys_getsid
  6.1457 +	data8 sys_sethostname
  6.1458 +	data8 sys_setrlimit
  6.1459 +	data8 sys_getrlimit			// 1085
  6.1460 +	data8 sys_getrusage
  6.1461 +	data8 sys_gettimeofday
  6.1462 +	data8 sys_settimeofday
  6.1463 +	data8 sys_select
  6.1464 +	data8 sys_poll				// 1090
  6.1465 +	data8 sys_symlink
  6.1466 +	data8 sys_readlink
  6.1467 +	data8 sys_uselib
  6.1468 +	data8 sys_swapon
  6.1469 +	data8 sys_swapoff			// 1095
  6.1470 +	data8 sys_reboot
  6.1471 +	data8 sys_truncate
  6.1472 +	data8 sys_ftruncate
  6.1473 +	data8 sys_fchmod
  6.1474 +	data8 sys_fchown			// 1100
  6.1475 +	data8 ia64_getpriority
  6.1476 +	data8 sys_setpriority
  6.1477 +	data8 sys_statfs
  6.1478 +	data8 sys_fstatfs
  6.1479 +	data8 sys_gettid			// 1105
  6.1480 +	data8 sys_semget
  6.1481 +	data8 sys_semop
  6.1482 +	data8 sys_semctl
  6.1483 +	data8 sys_msgget
  6.1484 +	data8 sys_msgsnd			// 1110
  6.1485 +	data8 sys_msgrcv
  6.1486 +	data8 sys_msgctl
  6.1487 +	data8 sys_shmget
  6.1488 +	data8 ia64_shmat
  6.1489 +	data8 sys_shmdt				// 1115
  6.1490 +	data8 sys_shmctl
  6.1491 +	data8 sys_syslog
  6.1492 +	data8 sys_setitimer
  6.1493 +	data8 sys_getitimer
  6.1494 +	data8 sys_ni_syscall			// 1120		/* was: ia64_oldstat */
  6.1495 +	data8 sys_ni_syscall					/* was: ia64_oldlstat */
  6.1496 +	data8 sys_ni_syscall					/* was: ia64_oldfstat */
  6.1497 +	data8 sys_vhangup
  6.1498 +	data8 sys_lchown
  6.1499 +	data8 sys_remap_file_pages		// 1125
  6.1500 +	data8 sys_wait4
  6.1501 +	data8 sys_sysinfo
  6.1502 +	data8 sys_clone
  6.1503 +	data8 sys_setdomainname
  6.1504 +	data8 sys_newuname			// 1130
  6.1505 +	data8 sys_adjtimex
  6.1506 +	data8 sys_ni_syscall					/* was: ia64_create_module */
  6.1507 +	data8 sys_init_module
  6.1508 +	data8 sys_delete_module
  6.1509 +	data8 sys_ni_syscall			// 1135		/* was: sys_get_kernel_syms */
  6.1510 +	data8 sys_ni_syscall					/* was: sys_query_module */
  6.1511 +	data8 sys_quotactl
  6.1512 +	data8 sys_bdflush
  6.1513 +	data8 sys_sysfs
  6.1514 +	data8 sys_personality			// 1140
  6.1515 +	data8 sys_ni_syscall		// sys_afs_syscall
  6.1516 +	data8 sys_setfsuid
  6.1517 +	data8 sys_setfsgid
  6.1518 +	data8 sys_getdents
  6.1519 +	data8 sys_flock				// 1145
  6.1520 +	data8 sys_readv
  6.1521 +	data8 sys_writev
  6.1522 +	data8 sys_pread64
  6.1523 +	data8 sys_pwrite64
  6.1524 +	data8 sys_sysctl			// 1150
  6.1525 +	data8 sys_mmap
  6.1526 +	data8 sys_munmap
  6.1527 +	data8 sys_mlock
  6.1528 +	data8 sys_mlockall
  6.1529 +	data8 sys_mprotect			// 1155
  6.1530 +	data8 ia64_mremap
  6.1531 +	data8 sys_msync
  6.1532 +	data8 sys_munlock
  6.1533 +	data8 sys_munlockall
  6.1534 +	data8 sys_sched_getparam		// 1160
  6.1535 +	data8 sys_sched_setparam
  6.1536 +	data8 sys_sched_getscheduler
  6.1537 +	data8 sys_sched_setscheduler
  6.1538 +	data8 sys_sched_yield
  6.1539 +	data8 sys_sched_get_priority_max	// 1165
  6.1540 +	data8 sys_sched_get_priority_min
  6.1541 +	data8 sys_sched_rr_get_interval
  6.1542 +	data8 sys_nanosleep
  6.1543 +	data8 sys_nfsservctl
  6.1544 +	data8 sys_prctl				// 1170
  6.1545 +	data8 sys_getpagesize
  6.1546 +	data8 sys_mmap2
  6.1547 +	data8 sys_pciconfig_read
  6.1548 +	data8 sys_pciconfig_write
  6.1549 +	data8 sys_perfmonctl			// 1175
  6.1550 +	data8 sys_sigaltstack
  6.1551 +	data8 sys_rt_sigaction
  6.1552 +	data8 sys_rt_sigpending
  6.1553 +	data8 sys_rt_sigprocmask
  6.1554 +	data8 sys_rt_sigqueueinfo		// 1180
  6.1555 +	data8 sys_rt_sigreturn
  6.1556 +	data8 sys_rt_sigsuspend
  6.1557 +	data8 sys_rt_sigtimedwait
  6.1558 +	data8 sys_getcwd
  6.1559 +	data8 sys_capget			// 1185
  6.1560 +	data8 sys_capset
  6.1561 +	data8 sys_sendfile64
  6.1562 +	data8 sys_ni_syscall		// sys_getpmsg (STREAMS)
  6.1563 +	data8 sys_ni_syscall		// sys_putpmsg (STREAMS)
  6.1564 +	data8 sys_socket			// 1190
  6.1565 +	data8 sys_bind
  6.1566 +	data8 sys_connect
  6.1567 +	data8 sys_listen
  6.1568 +	data8 sys_accept
  6.1569 +	data8 sys_getsockname			// 1195
  6.1570 +	data8 sys_getpeername
  6.1571 +	data8 sys_socketpair
  6.1572 +	data8 sys_send
  6.1573 +	data8 sys_sendto
  6.1574 +	data8 sys_recv				// 1200
  6.1575 +	data8 sys_recvfrom
  6.1576 +	data8 sys_shutdown
  6.1577 +	data8 sys_setsockopt
  6.1578 +	data8 sys_getsockopt
  6.1579 +	data8 sys_sendmsg			// 1205
  6.1580 +	data8 sys_recvmsg
  6.1581 +	data8 sys_pivot_root
  6.1582 +	data8 sys_mincore
  6.1583 +	data8 sys_madvise
  6.1584 +	data8 sys_newstat			// 1210
  6.1585 +	data8 sys_newlstat
  6.1586 +	data8 sys_newfstat
  6.1587 +	data8 sys_clone2
  6.1588 +	data8 sys_getdents64
  6.1589 +	data8 sys_getunwind			// 1215
  6.1590 +	data8 sys_readahead
  6.1591 +	data8 sys_setxattr
  6.1592 +	data8 sys_lsetxattr
  6.1593 +	data8 sys_fsetxattr
  6.1594 +	data8 sys_getxattr			// 1220
  6.1595 +	data8 sys_lgetxattr
  6.1596 +	data8 sys_fgetxattr
  6.1597 +	data8 sys_listxattr
  6.1598 +	data8 sys_llistxattr
  6.1599 +	data8 sys_flistxattr			// 1225
  6.1600 +	data8 sys_removexattr
  6.1601 +	data8 sys_lremovexattr
  6.1602 +	data8 sys_fremovexattr
  6.1603 +	data8 sys_tkill
  6.1604 +	data8 sys_futex				// 1230
  6.1605 +	data8 sys_sched_setaffinity
  6.1606 +	data8 sys_sched_getaffinity
  6.1607 +	data8 sys_set_tid_address
  6.1608 +	data8 sys_fadvise64_64
  6.1609 +	data8 sys_tgkill 			// 1235
  6.1610 +	data8 sys_exit_group
  6.1611 +	data8 sys_lookup_dcookie
  6.1612 +	data8 sys_io_setup
  6.1613 +	data8 sys_io_destroy
  6.1614 +	data8 sys_io_getevents			// 1240
  6.1615 +	data8 sys_io_submit
  6.1616 +	data8 sys_io_cancel
  6.1617 +	data8 sys_epoll_create
  6.1618 +	data8 sys_epoll_ctl
  6.1619 +	data8 sys_epoll_wait			// 1245
  6.1620 +	data8 sys_restart_syscall
  6.1621 +	data8 sys_semtimedop
  6.1622 +	data8 sys_timer_create
  6.1623 +	data8 sys_timer_settime
  6.1624 +	data8 sys_timer_gettime			// 1250
  6.1625 +	data8 sys_timer_getoverrun
  6.1626 +	data8 sys_timer_delete
  6.1627 +	data8 sys_clock_settime
  6.1628 +	data8 sys_clock_gettime
  6.1629 +	data8 sys_clock_getres			// 1255
  6.1630 +	data8 sys_clock_nanosleep
  6.1631 +	data8 sys_fstatfs64
  6.1632 +	data8 sys_statfs64
  6.1633 +	data8 sys_mbind
  6.1634 +	data8 sys_get_mempolicy			// 1260
  6.1635 +	data8 sys_set_mempolicy
  6.1636 +	data8 sys_mq_open
  6.1637 +	data8 sys_mq_unlink
  6.1638 +	data8 sys_mq_timedsend
  6.1639 +	data8 sys_mq_timedreceive		// 1265
  6.1640 +	data8 sys_mq_notify
  6.1641 +	data8 sys_mq_getsetattr
  6.1642 +	data8 sys_ni_syscall			// reserved for kexec_load
  6.1643 +	data8 sys_ni_syscall			// reserved for vserver
  6.1644 +	data8 sys_waitid			// 1270
  6.1645 +	data8 sys_add_key
  6.1646 +	data8 sys_request_key
  6.1647 +	data8 sys_keyctl
  6.1648 +	data8 sys_ni_syscall
  6.1649 +	data8 sys_ni_syscall			// 1275
  6.1650 +	data8 sys_ni_syscall
  6.1651 +	data8 sys_ni_syscall
  6.1652 +	data8 sys_ni_syscall
  6.1653 +	data8 sys_ni_syscall
  6.1654 +
  6.1655 +	.org sys_call_table + 8*NR_syscalls	// guard against failures to increase NR_syscalls
  6.1656 +#endif
     7.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     7.2 +++ b/xen/arch/ia64/linux-xen/entry.h	Tue Aug 16 12:15:23 2005 +0800
     7.3 @@ -0,0 +1,97 @@
     7.4 +#include <linux/config.h>
     7.5 +
     7.6 +/*
     7.7 + * Preserved registers that are shared between code in ivt.S and
     7.8 + * entry.S.  Be careful not to step on these!
     7.9 + */
    7.10 +#define PRED_LEAVE_SYSCALL	1 /* TRUE iff leave from syscall */
    7.11 +#define PRED_KERNEL_STACK	2 /* returning to kernel-stacks? */
    7.12 +#define PRED_USER_STACK		3 /* returning to user-stacks? */
    7.13 +#ifdef CONFIG_VTI
    7.14 +#define PRED_EMUL		2 /* Need to save r4-r7 for inst emulation */
    7.15 +#define PRED_NON_EMUL		3 /* No need to save r4-r7 for normal path */
    7.16 +#define PRED_BN0		6 /* Guest is in bank 0 */
    7.17 +#define PRED_BN1		7 /* Guest is in bank 1 */
    7.18 +#endif // CONFIG_VTI
    7.19 +#define PRED_SYSCALL		4 /* inside a system call? */
    7.20 +#define PRED_NON_SYSCALL	5 /* complement of PRED_SYSCALL */
    7.21 +
    7.22 +#ifdef __ASSEMBLY__
    7.23 +# define PASTE2(x,y)	x##y
    7.24 +# define PASTE(x,y)	PASTE2(x,y)
    7.25 +
    7.26 +# define pLvSys		PASTE(p,PRED_LEAVE_SYSCALL)
    7.27 +# define pKStk		PASTE(p,PRED_KERNEL_STACK)
    7.28 +# define pUStk		PASTE(p,PRED_USER_STACK)
    7.29 +#ifdef CONFIG_VTI
    7.30 +# define pEml		PASTE(p,PRED_EMUL)
    7.31 +# define pNonEml	PASTE(p,PRED_NON_EMUL)
    7.32 +# define pBN0		PASTE(p,PRED_BN0)
    7.33 +# define pBN1		PASTE(p,PRED_BN1)
    7.34 +#endif // CONFIG_VTI
    7.35 +# define pSys		PASTE(p,PRED_SYSCALL)
    7.36 +# define pNonSys	PASTE(p,PRED_NON_SYSCALL)
    7.37 +#endif
    7.38 +
    7.39 +#define PT(f)		(IA64_PT_REGS_##f##_OFFSET)
    7.40 +#define SW(f)		(IA64_SWITCH_STACK_##f##_OFFSET)
    7.41 +#ifdef CONFIG_VTI
    7.42 +#define VPD(f)      (VPD_##f##_START_OFFSET)
    7.43 +#endif // CONFIG_VTI
    7.44 +
    7.45 +#define PT_REGS_SAVES(off)			\
    7.46 +	.unwabi 3, 'i';				\
    7.47 +	.fframe IA64_PT_REGS_SIZE+16+(off);	\
    7.48 +	.spillsp rp, PT(CR_IIP)+16+(off);	\
    7.49 +	.spillsp ar.pfs, PT(CR_IFS)+16+(off);	\
    7.50 +	.spillsp ar.unat, PT(AR_UNAT)+16+(off);	\
    7.51 +	.spillsp ar.fpsr, PT(AR_FPSR)+16+(off);	\
    7.52 +	.spillsp pr, PT(PR)+16+(off);
    7.53 +
    7.54 +#define PT_REGS_UNWIND_INFO(off)		\
    7.55 +	.prologue;				\
    7.56 +	PT_REGS_SAVES(off);			\
    7.57 +	.body
    7.58 +
    7.59 +#define SWITCH_STACK_SAVES(off)							\
    7.60 +	.savesp ar.unat,SW(CALLER_UNAT)+16+(off);				\
    7.61 +	.savesp ar.fpsr,SW(AR_FPSR)+16+(off);					\
    7.62 +	.spillsp f2,SW(F2)+16+(off); .spillsp f3,SW(F3)+16+(off);		\
    7.63 +	.spillsp f4,SW(F4)+16+(off); .spillsp f5,SW(F5)+16+(off);		\
    7.64 +	.spillsp f16,SW(F16)+16+(off); .spillsp f17,SW(F17)+16+(off);		\
    7.65 +	.spillsp f18,SW(F18)+16+(off); .spillsp f19,SW(F19)+16+(off);		\
    7.66 +	.spillsp f20,SW(F20)+16+(off); .spillsp f21,SW(F21)+16+(off);		\
    7.67 +	.spillsp f22,SW(F22)+16+(off); .spillsp f23,SW(F23)+16+(off);		\
    7.68 +	.spillsp f24,SW(F24)+16+(off); .spillsp f25,SW(F25)+16+(off);		\
    7.69 +	.spillsp f26,SW(F26)+16+(off); .spillsp f27,SW(F27)+16+(off);		\
    7.70 +	.spillsp f28,SW(F28)+16+(off); .spillsp f29,SW(F29)+16+(off);		\
    7.71 +	.spillsp f30,SW(F30)+16+(off); .spillsp f31,SW(F31)+16+(off);		\
    7.72 +	.spillsp r4,SW(R4)+16+(off); .spillsp r5,SW(R5)+16+(off);		\
    7.73 +	.spillsp r6,SW(R6)+16+(off); .spillsp r7,SW(R7)+16+(off);		\
    7.74 +	.spillsp b0,SW(B0)+16+(off); .spillsp b1,SW(B1)+16+(off);		\
    7.75 +	.spillsp b2,SW(B2)+16+(off); .spillsp b3,SW(B3)+16+(off);		\
    7.76 +	.spillsp b4,SW(B4)+16+(off); .spillsp b5,SW(B5)+16+(off);		\
    7.77 +	.spillsp ar.pfs,SW(AR_PFS)+16+(off); .spillsp ar.lc,SW(AR_LC)+16+(off);	\
    7.78 +	.spillsp @priunat,SW(AR_UNAT)+16+(off);					\
    7.79 +	.spillsp ar.rnat,SW(AR_RNAT)+16+(off);					\
    7.80 +	.spillsp ar.bspstore,SW(AR_BSPSTORE)+16+(off);				\
    7.81 +	.spillsp pr,SW(PR)+16+(off))
    7.82 +
    7.83 +#define DO_SAVE_SWITCH_STACK			\
    7.84 +	movl r28=1f;				\
    7.85 +	;;					\
    7.86 +	.fframe IA64_SWITCH_STACK_SIZE;		\
    7.87 +	adds sp=-IA64_SWITCH_STACK_SIZE,sp;	\
    7.88 +	mov.ret.sptk b7=r28,1f;			\
    7.89 +	SWITCH_STACK_SAVES(0);			\
    7.90 +	br.cond.sptk.many save_switch_stack;	\
    7.91 +1:
    7.92 +
    7.93 +#define DO_LOAD_SWITCH_STACK			\
    7.94 +	movl r28=1f;				\
    7.95 +	;;					\
    7.96 +	invala;					\
    7.97 +	mov.ret.sptk b7=r28,1f;			\
    7.98 +	br.cond.sptk.many load_switch_stack;	\
    7.99 +1:	.restore sp;				\
   7.100 +	adds sp=IA64_SWITCH_STACK_SIZE,sp
     8.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     8.2 +++ b/xen/arch/ia64/linux-xen/head.S	Tue Aug 16 12:15:23 2005 +0800
     8.3 @@ -0,0 +1,1026 @@
     8.4 +/*
     8.5 + * Here is where the ball gets rolling as far as the kernel is concerned.
     8.6 + * When control is transferred to _start, the bootload has already
     8.7 + * loaded us to the correct address.  All that's left to do here is
     8.8 + * to set up the kernel's global pointer and jump to the kernel
     8.9 + * entry point.
    8.10 + *
    8.11 + * Copyright (C) 1998-2001, 2003, 2005 Hewlett-Packard Co
    8.12 + *	David Mosberger-Tang <davidm@hpl.hp.com>
    8.13 + *	Stephane Eranian <eranian@hpl.hp.com>
    8.14 + * Copyright (C) 1999 VA Linux Systems
    8.15 + * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
    8.16 + * Copyright (C) 1999 Intel Corp.
    8.17 + * Copyright (C) 1999 Asit Mallick <Asit.K.Mallick@intel.com>
    8.18 + * Copyright (C) 1999 Don Dugger <Don.Dugger@intel.com>
    8.19 + * Copyright (C) 2002 Fenghua Yu <fenghua.yu@intel.com>
    8.20 + *   -Optimize __ia64_save_fpu() and __ia64_load_fpu() for Itanium 2.
    8.21 + */
    8.22 +
    8.23 +#include <linux/config.h>
    8.24 +
    8.25 +#include <asm/asmmacro.h>
    8.26 +#include <asm/fpu.h>
    8.27 +#include <asm/kregs.h>
    8.28 +#include <asm/mmu_context.h>
    8.29 +#include <asm/offsets.h>
    8.30 +#include <asm/pal.h>
    8.31 +#include <asm/pgtable.h>
    8.32 +#include <asm/processor.h>
    8.33 +#include <asm/ptrace.h>
    8.34 +#include <asm/system.h>
    8.35 +
    8.36 +	.section __special_page_section,"ax"
    8.37 +
    8.38 +	.global empty_zero_page
    8.39 +empty_zero_page:
    8.40 +	.skip PAGE_SIZE
    8.41 +
    8.42 +	.global swapper_pg_dir
    8.43 +swapper_pg_dir:
    8.44 +	.skip PAGE_SIZE
    8.45 +
    8.46 +	.rodata
    8.47 +halt_msg:
    8.48 +	stringz "Halting kernel\n"
    8.49 +
    8.50 +	.text
    8.51 +
    8.52 +	.global start_ap
    8.53 +
    8.54 +	/*
    8.55 +	 * Start the kernel.  When the bootloader passes control to _start(), r28
    8.56 +	 * points to the address of the boot parameter area.  Execution reaches
    8.57 +	 * here in physical mode.
    8.58 +	 */
    8.59 +GLOBAL_ENTRY(_start)
    8.60 +start_ap:
    8.61 +	.prologue
    8.62 +	.save rp, r0		// terminate unwind chain with a NULL rp
    8.63 +	.body
    8.64 +
    8.65 +	rsm psr.i | psr.ic
    8.66 +	;;
    8.67 +	srlz.i
    8.68 +	;;
    8.69 +	/*
    8.70 +	 * Initialize kernel region registers:
    8.71 +	 *	rr[0]: VHPT enabled, page size = PAGE_SHIFT
    8.72 +	 *	rr[1]: VHPT enabled, page size = PAGE_SHIFT
    8.73 +	 *	rr[2]: VHPT enabled, page size = PAGE_SHIFT
    8.74 +	 *	rr[3]: VHPT enabled, page size = PAGE_SHIFT
    8.75 +	 *	rr[4]: VHPT enabled, page size = PAGE_SHIFT
    8.76 +	 *	rr[5]: VHPT enabled, page size = PAGE_SHIFT
    8.77 +	 *	rr[6]: VHPT disabled, page size = IA64_GRANULE_SHIFT
    8.78 +	 *	rr[7]: VHPT disabled, page size = IA64_GRANULE_SHIFT
    8.79 +	 * We initialize all of them to prevent inadvertently assuming
    8.80 +	 * something about the state of address translation early in boot.
    8.81 +	 */
    8.82 +	movl r6=((ia64_rid(IA64_REGION_ID_KERNEL, (0<<61)) << 8) | (PAGE_SHIFT << 2) | 1)
    8.83 +	movl r7=(0<<61)
    8.84 +	movl r8=((ia64_rid(IA64_REGION_ID_KERNEL, (1<<61)) << 8) | (PAGE_SHIFT << 2) | 1)
    8.85 +	movl r9=(1<<61)
    8.86 +	movl r10=((ia64_rid(IA64_REGION_ID_KERNEL, (2<<61)) << 8) | (PAGE_SHIFT << 2) | 1)
    8.87 +	movl r11=(2<<61)
    8.88 +	movl r12=((ia64_rid(IA64_REGION_ID_KERNEL, (3<<61)) << 8) | (PAGE_SHIFT << 2) | 1)
    8.89 +	movl r13=(3<<61)
    8.90 +	movl r14=((ia64_rid(IA64_REGION_ID_KERNEL, (4<<61)) << 8) | (PAGE_SHIFT << 2) | 1)
    8.91 +	movl r15=(4<<61)
    8.92 +	movl r16=((ia64_rid(IA64_REGION_ID_KERNEL, (5<<61)) << 8) | (PAGE_SHIFT << 2) | 1)
    8.93 +	movl r17=(5<<61)
    8.94 +	movl r18=((ia64_rid(IA64_REGION_ID_KERNEL, (6<<61)) << 8) | (IA64_GRANULE_SHIFT << 2))
    8.95 +	movl r19=(6<<61)
    8.96 +	movl r20=((ia64_rid(IA64_REGION_ID_KERNEL, (7<<61)) << 8) | (IA64_GRANULE_SHIFT << 2))
    8.97 +	movl r21=(7<<61)
    8.98 +	;;
    8.99 +	mov rr[r7]=r6
   8.100 +	mov rr[r9]=r8
   8.101 +	mov rr[r11]=r10
   8.102 +	mov rr[r13]=r12
   8.103 +	mov rr[r15]=r14
   8.104 +	mov rr[r17]=r16
   8.105 +	mov rr[r19]=r18
   8.106 +	mov rr[r21]=r20
   8.107 +	;;
   8.108 +	/*
   8.109 +	 * Now pin mappings into the TLB for kernel text and data
   8.110 +	 */
   8.111 +	mov r18=KERNEL_TR_PAGE_SHIFT<<2
   8.112 +	movl r17=KERNEL_START
   8.113 +	;;
   8.114 +	mov cr.itir=r18
   8.115 +	mov cr.ifa=r17
   8.116 +	mov r16=IA64_TR_KERNEL
   8.117 +	mov r3=ip
   8.118 +	movl r18=PAGE_KERNEL
   8.119 +	;;
   8.120 +	dep r2=0,r3,0,KERNEL_TR_PAGE_SHIFT
   8.121 +	;;
   8.122 +	or r18=r2,r18
   8.123 +	;;
   8.124 +	srlz.i
   8.125 +	;;
   8.126 +	itr.i itr[r16]=r18
   8.127 +	;;
   8.128 +	itr.d dtr[r16]=r18
   8.129 +	;;
   8.130 +	srlz.i
   8.131 +
   8.132 +	/*
   8.133 +	 * Switch into virtual mode:
   8.134 +	 */
   8.135 +#ifdef CONFIG_VTI
   8.136 +	movl r16=(IA64_PSR_IT|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_DFH \
   8.137 +		  |IA64_PSR_DI)
   8.138 +#else // CONFIG_VTI
   8.139 +	movl r16=(IA64_PSR_IT|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_DFH|IA64_PSR_BN \
   8.140 +		  |IA64_PSR_DI)
   8.141 +#endif // CONFIG_VTI
   8.142 +	;;
   8.143 +	mov cr.ipsr=r16
   8.144 +	movl r17=1f
   8.145 +	;;
   8.146 +	mov cr.iip=r17
   8.147 +	mov cr.ifs=r0
   8.148 +	;;
   8.149 +	rfi
   8.150 +	;;
   8.151 +1:	// now we are in virtual mode
   8.152 +
   8.153 +	// set IVT entry point---can't access I/O ports without it
   8.154 +#ifdef CONFIG_VTI
   8.155 +    movl r3=vmx_ia64_ivt
   8.156 +#else // CONFIG_VTI
   8.157 +	movl r3=ia64_ivt
   8.158 +#endif // CONFIG_VTI
   8.159 +	;;
   8.160 +	mov cr.iva=r3
   8.161 +	movl r2=FPSR_DEFAULT
   8.162 +	;;
   8.163 +	srlz.i
   8.164 +	movl gp=__gp
   8.165 +
   8.166 +	mov ar.fpsr=r2
   8.167 +	;;
   8.168 +
   8.169 +#define isAP	p2	// are we an Application Processor?
   8.170 +#define isBP	p3	// are we the Bootstrap Processor?
   8.171 +
   8.172 +#ifdef CONFIG_SMP
   8.173 +	/*
   8.174 +	 * Find the init_task for the currently booting CPU.  At poweron, and in
   8.175 +	 * UP mode, task_for_booting_cpu is NULL.
   8.176 +	 */
   8.177 +	movl r3=task_for_booting_cpu
   8.178 + 	;;
   8.179 +	ld8 r3=[r3]
   8.180 +	movl r2=init_task
   8.181 +	;;
   8.182 +	cmp.eq isBP,isAP=r3,r0
   8.183 +	;;
   8.184 +(isAP)	mov r2=r3
   8.185 +#else
   8.186 +	movl r2=init_task
   8.187 +	cmp.eq isBP,isAP=r0,r0
   8.188 +#endif
   8.189 +	;;
   8.190 +	tpa r3=r2		// r3 == phys addr of task struct
   8.191 +	mov r16=-1
   8.192 +(isBP)	br.cond.dpnt .load_current // BP stack is on region 5 --- no need to map it
   8.193 +
   8.194 +	// load mapping for stack (virtaddr in r2, physaddr in r3)
   8.195 +	rsm psr.ic
   8.196 +	movl r17=PAGE_KERNEL
   8.197 +	;;
   8.198 +	srlz.d
   8.199 +	dep r18=0,r3,0,12
   8.200 +	;;
   8.201 +	or r18=r17,r18
   8.202 +#ifdef XEN
   8.203 +	dep r2=-1,r3,60,4	// IMVA of task
   8.204 +#else
   8.205 +	dep r2=-1,r3,61,3	// IMVA of task
   8.206 +#endif
   8.207 +	;;
   8.208 +	mov r17=rr[r2]
   8.209 +	shr.u r16=r3,IA64_GRANULE_SHIFT
   8.210 +	;;
   8.211 +	dep r17=0,r17,8,24
   8.212 +	;;
   8.213 +	mov cr.itir=r17
   8.214 +	mov cr.ifa=r2
   8.215 +
   8.216 +	mov r19=IA64_TR_CURRENT_STACK
   8.217 +	;;
   8.218 +	itr.d dtr[r19]=r18
   8.219 +	;;
   8.220 +	ssm psr.ic
   8.221 +	srlz.d
   8.222 +  	;;
   8.223 +
   8.224 +.load_current:
   8.225 +	// load the "current" pointer (r13) and ar.k6 with the current task
   8.226 +#ifdef CONFIG_VTI
   8.227 +	mov r21=r2		// virtual address
   8.228 +	;;
   8.229 +	bsw.1
   8.230 +	;;
   8.231 +#else // CONFIG_VTI
   8.232 +	mov IA64_KR(CURRENT)=r2		// virtual address
   8.233 +	mov IA64_KR(CURRENT_STACK)=r16
   8.234 +#endif // CONFIG_VTI
   8.235 +	mov r13=r2
   8.236 +	/*
   8.237 +	 * Reserve space at the top of the stack for "struct pt_regs".  Kernel threads
   8.238 +	 * don't store interesting values in that structure, but the space still needs
   8.239 +	 * to be there because time-critical stuff such as the context switching can
   8.240 +	 * be implemented more efficiently (for example, __switch_to()
   8.241 +	 * always sets the psr.dfh bit of the task it is switching to).
   8.242 +	 */
   8.243 +	addl r12=IA64_STK_OFFSET-IA64_PT_REGS_SIZE-16,r2
   8.244 +	addl r2=IA64_RBS_OFFSET,r2	// initialize the RSE
   8.245 +	mov ar.rsc=0		// place RSE in enforced lazy mode
   8.246 +	;;
   8.247 +	loadrs			// clear the dirty partition
   8.248 +	;;
   8.249 +	mov ar.bspstore=r2	// establish the new RSE stack
   8.250 +	;;
   8.251 +	mov ar.rsc=0x3		// place RSE in eager mode
   8.252 +
   8.253 +#ifdef XEN
   8.254 +(isBP)	dep r28=-1,r28,60,4	// make address virtual
   8.255 +#else
   8.256 +(isBP)	dep r28=-1,r28,61,3	// make address virtual
   8.257 +#endif
   8.258 +(isBP)	movl r2=ia64_boot_param
   8.259 +	;;
   8.260 +(isBP)	st8 [r2]=r28		// save the address of the boot param area passed by the bootloader
   8.261 +
   8.262 +#ifdef CONFIG_SMP
   8.263 +(isAP)	br.call.sptk.many rp=start_secondary
   8.264 +.ret0:
   8.265 +(isAP)	br.cond.sptk self
   8.266 +#endif
   8.267 +
   8.268 +	// This is executed by the bootstrap processor (bsp) only:
   8.269 +
   8.270 +#ifdef CONFIG_IA64_FW_EMU
   8.271 +	// initialize PAL & SAL emulator:
   8.272 +	br.call.sptk.many rp=sys_fw_init
   8.273 +.ret1:
   8.274 +#endif
   8.275 +	br.call.sptk.many rp=start_kernel
   8.276 +.ret2:	addl r3=@ltoff(halt_msg),gp
   8.277 +	;;
   8.278 +	alloc r2=ar.pfs,8,0,2,0
   8.279 +	;;
   8.280 +	ld8 out0=[r3]
   8.281 +	br.call.sptk.many b0=console_print
   8.282 +
   8.283 +self:	hint @pause
   8.284 +	;;
   8.285 +	br.sptk.many self		// endless loop
   8.286 +	;;
   8.287 +END(_start)
   8.288 +
   8.289 +GLOBAL_ENTRY(ia64_save_debug_regs)
   8.290 +	alloc r16=ar.pfs,1,0,0,0
   8.291 +	mov r20=ar.lc			// preserve ar.lc
   8.292 +	mov ar.lc=IA64_NUM_DBG_REGS-1
   8.293 +	mov r18=0
   8.294 +	add r19=IA64_NUM_DBG_REGS*8,in0
   8.295 +	;;
   8.296 +1:	mov r16=dbr[r18]
   8.297 +#ifdef CONFIG_ITANIUM
   8.298 +	;;
   8.299 +	srlz.d
   8.300 +#endif
   8.301 +	mov r17=ibr[r18]
   8.302 +	add r18=1,r18
   8.303 +	;;
   8.304 +	st8.nta [in0]=r16,8
   8.305 +	st8.nta [r19]=r17,8
   8.306 +	br.cloop.sptk.many 1b
   8.307 +	;;
   8.308 +	mov ar.lc=r20			// restore ar.lc
   8.309 +	br.ret.sptk.many rp
   8.310 +END(ia64_save_debug_regs)
   8.311 +
   8.312 +GLOBAL_ENTRY(ia64_load_debug_regs)
   8.313 +	alloc r16=ar.pfs,1,0,0,0
   8.314 +	lfetch.nta [in0]
   8.315 +	mov r20=ar.lc			// preserve ar.lc
   8.316 +	add r19=IA64_NUM_DBG_REGS*8,in0
   8.317 +	mov ar.lc=IA64_NUM_DBG_REGS-1
   8.318 +	mov r18=-1
   8.319 +	;;
   8.320 +1:	ld8.nta r16=[in0],8
   8.321 +	ld8.nta r17=[r19],8
   8.322 +	add r18=1,r18
   8.323 +	;;
   8.324 +	mov dbr[r18]=r16
   8.325 +#ifdef CONFIG_ITANIUM
   8.326 +	;;
   8.327 +	srlz.d				// Errata 132 (NoFix status)
   8.328 +#endif
   8.329 +	mov ibr[r18]=r17
   8.330 +	br.cloop.sptk.many 1b
   8.331 +	;;
   8.332 +	mov ar.lc=r20			// restore ar.lc
   8.333 +	br.ret.sptk.many rp
   8.334 +END(ia64_load_debug_regs)
   8.335 +
   8.336 +GLOBAL_ENTRY(__ia64_save_fpu)
   8.337 +	alloc r2=ar.pfs,1,4,0,0
   8.338 +	adds loc0=96*16-16,in0
   8.339 +	adds loc1=96*16-16-128,in0
   8.340 +	;;
   8.341 +	stf.spill.nta [loc0]=f127,-256
   8.342 +	stf.spill.nta [loc1]=f119,-256
   8.343 +	;;
   8.344 +	stf.spill.nta [loc0]=f111,-256
   8.345 +	stf.spill.nta [loc1]=f103,-256
   8.346 +	;;
   8.347 +	stf.spill.nta [loc0]=f95,-256
   8.348 +	stf.spill.nta [loc1]=f87,-256
   8.349 +	;;
   8.350 +	stf.spill.nta [loc0]=f79,-256
   8.351 +	stf.spill.nta [loc1]=f71,-256
   8.352 +	;;
   8.353 +	stf.spill.nta [loc0]=f63,-256
   8.354 +	stf.spill.nta [loc1]=f55,-256
   8.355 +	adds loc2=96*16-32,in0
   8.356 +	;;
   8.357 +	stf.spill.nta [loc0]=f47,-256
   8.358 +	stf.spill.nta [loc1]=f39,-256
   8.359 +	adds loc3=96*16-32-128,in0
   8.360 +	;;
   8.361 +	stf.spill.nta [loc2]=f126,-256
   8.362 +	stf.spill.nta [loc3]=f118,-256
   8.363 +	;;
   8.364 +	stf.spill.nta [loc2]=f110,-256
   8.365 +	stf.spill.nta [loc3]=f102,-256
   8.366 +	;;
   8.367 +	stf.spill.nta [loc2]=f94,-256
   8.368 +	stf.spill.nta [loc3]=f86,-256
   8.369 +	;;
   8.370 +	stf.spill.nta [loc2]=f78,-256
   8.371 +	stf.spill.nta [loc3]=f70,-256
   8.372 +	;;
   8.373 +	stf.spill.nta [loc2]=f62,-256
   8.374 +	stf.spill.nta [loc3]=f54,-256
   8.375 +	adds loc0=96*16-48,in0
   8.376 +	;;
   8.377 +	stf.spill.nta [loc2]=f46,-256
   8.378 +	stf.spill.nta [loc3]=f38,-256
   8.379 +	adds loc1=96*16-48-128,in0
   8.380 +	;;
   8.381 +	stf.spill.nta [loc0]=f125,-256
   8.382 +	stf.spill.nta [loc1]=f117,-256
   8.383 +	;;
   8.384 +	stf.spill.nta [loc0]=f109,-256
   8.385 +	stf.spill.nta [loc1]=f101,-256
   8.386 +	;;
   8.387 +	stf.spill.nta [loc0]=f93,-256
   8.388 +	stf.spill.nta [loc1]=f85,-256
   8.389 +	;;
   8.390 +	stf.spill.nta [loc0]=f77,-256
   8.391 +	stf.spill.nta [loc1]=f69,-256
   8.392 +	;;
   8.393 +	stf.spill.nta [loc0]=f61,-256
   8.394 +	stf.spill.nta [loc1]=f53,-256
   8.395 +	adds loc2=96*16-64,in0
   8.396 +	;;
   8.397 +	stf.spill.nta [loc0]=f45,-256
   8.398 +	stf.spill.nta [loc1]=f37,-256
   8.399 +	adds loc3=96*16-64-128,in0
   8.400 +	;;
   8.401 +	stf.spill.nta [loc2]=f124,-256
   8.402 +	stf.spill.nta [loc3]=f116,-256
   8.403 +	;;
   8.404 +	stf.spill.nta [loc2]=f108,-256
   8.405 +	stf.spill.nta [loc3]=f100,-256
   8.406 +	;;
   8.407 +	stf.spill.nta [loc2]=f92,-256
   8.408 +	stf.spill.nta [loc3]=f84,-256
   8.409 +	;;
   8.410 +	stf.spill.nta [loc2]=f76,-256
   8.411 +	stf.spill.nta [loc3]=f68,-256
   8.412 +	;;
   8.413 +	stf.spill.nta [loc2]=f60,-256
   8.414 +	stf.spill.nta [loc3]=f52,-256
   8.415 +	adds loc0=96*16-80,in0
   8.416 +	;;
   8.417 +	stf.spill.nta [loc2]=f44,-256
   8.418 +	stf.spill.nta [loc3]=f36,-256
   8.419 +	adds loc1=96*16-80-128,in0
   8.420 +	;;
   8.421 +	stf.spill.nta [loc0]=f123,-256
   8.422 +	stf.spill.nta [loc1]=f115,-256
   8.423 +	;;
   8.424 +	stf.spill.nta [loc0]=f107,-256
   8.425 +	stf.spill.nta [loc1]=f99,-256
   8.426 +	;;
   8.427 +	stf.spill.nta [loc0]=f91,-256
   8.428 +	stf.spill.nta [loc1]=f83,-256
   8.429 +	;;
   8.430 +	stf.spill.nta [loc0]=f75,-256
   8.431 +	stf.spill.nta [loc1]=f67,-256
   8.432 +	;;
   8.433 +	stf.spill.nta [loc0]=f59,-256
   8.434 +	stf.spill.nta [loc1]=f51,-256
   8.435 +	adds loc2=96*16-96,in0
   8.436 +	;;
   8.437 +	stf.spill.nta [loc0]=f43,-256
   8.438 +	stf.spill.nta [loc1]=f35,-256
   8.439 +	adds loc3=96*16-96-128,in0
   8.440 +	;;
   8.441 +	stf.spill.nta [loc2]=f122,-256
   8.442 +	stf.spill.nta [loc3]=f114,-256
   8.443 +	;;
   8.444 +	stf.spill.nta [loc2]=f106,-256
   8.445 +	stf.spill.nta [loc3]=f98,-256
   8.446 +	;;
   8.447 +	stf.spill.nta [loc2]=f90,-256
   8.448 +	stf.spill.nta [loc3]=f82,-256
   8.449 +	;;
   8.450 +	stf.spill.nta [loc2]=f74,-256
   8.451 +	stf.spill.nta [loc3]=f66,-256
   8.452 +	;;
   8.453 +	stf.spill.nta [loc2]=f58,-256
   8.454 +	stf.spill.nta [loc3]=f50,-256
   8.455 +	adds loc0=96*16-112,in0
   8.456 +	;;
   8.457 +	stf.spill.nta [loc2]=f42,-256
   8.458 +	stf.spill.nta [loc3]=f34,-256
   8.459 +	adds loc1=96*16-112-128,in0
   8.460 +	;;
   8.461 +	stf.spill.nta [loc0]=f121,-256
   8.462 +	stf.spill.nta [loc1]=f113,-256
   8.463 +	;;
   8.464 +	stf.spill.nta [loc0]=f105,-256
   8.465 +	stf.spill.nta [loc1]=f97,-256
   8.466 +	;;
   8.467 +	stf.spill.nta [loc0]=f89,-256
   8.468 +	stf.spill.nta [loc1]=f81,-256
   8.469 +	;;
   8.470 +	stf.spill.nta [loc0]=f73,-256
   8.471 +	stf.spill.nta [loc1]=f65,-256
   8.472 +	;;
   8.473 +	stf.spill.nta [loc0]=f57,-256
   8.474 +	stf.spill.nta [loc1]=f49,-256
   8.475 +	adds loc2=96*16-128,in0
   8.476 +	;;
   8.477 +	stf.spill.nta [loc0]=f41,-256
   8.478 +	stf.spill.nta [loc1]=f33,-256
   8.479 +	adds loc3=96*16-128-128,in0
   8.480 +	;;
   8.481 +	stf.spill.nta [loc2]=f120,-256
   8.482 +	stf.spill.nta [loc3]=f112,-256
   8.483 +	;;
   8.484 +	stf.spill.nta [loc2]=f104,-256
   8.485 +	stf.spill.nta [loc3]=f96,-256
   8.486 +	;;
   8.487 +	stf.spill.nta [loc2]=f88,-256
   8.488 +	stf.spill.nta [loc3]=f80,-256
   8.489 +	;;
   8.490 +	stf.spill.nta [loc2]=f72,-256
   8.491 +	stf.spill.nta [loc3]=f64,-256
   8.492 +	;;
   8.493 +	stf.spill.nta [loc2]=f56,-256
   8.494 +	stf.spill.nta [loc3]=f48,-256
   8.495 +	;;
   8.496 +	stf.spill.nta [loc2]=f40
   8.497 +	stf.spill.nta [loc3]=f32
   8.498 +	br.ret.sptk.many rp
   8.499 +END(__ia64_save_fpu)
   8.500 +
   8.501 +GLOBAL_ENTRY(__ia64_load_fpu)
   8.502 +	alloc r2=ar.pfs,1,2,0,0
   8.503 +	adds r3=128,in0
   8.504 +	adds r14=256,in0
   8.505 +	adds r15=384,in0
   8.506 +	mov loc0=512
   8.507 +	mov loc1=-1024+16
   8.508 +	;;
   8.509 +	ldf.fill.nta f32=[in0],loc0
   8.510 +	ldf.fill.nta f40=[ r3],loc0
   8.511 +	ldf.fill.nta f48=[r14],loc0
   8.512 +	ldf.fill.nta f56=[r15],loc0
   8.513 +	;;
   8.514 +	ldf.fill.nta f64=[in0],loc0
   8.515 +	ldf.fill.nta f72=[ r3],loc0
   8.516 +	ldf.fill.nta f80=[r14],loc0
   8.517 +	ldf.fill.nta f88=[r15],loc0
   8.518 +	;;
   8.519 +	ldf.fill.nta f96=[in0],loc1
   8.520 +	ldf.fill.nta f104=[ r3],loc1
   8.521 +	ldf.fill.nta f112=[r14],loc1
   8.522 +	ldf.fill.nta f120=[r15],loc1
   8.523 +	;;
   8.524 +	ldf.fill.nta f33=[in0],loc0
   8.525 +	ldf.fill.nta f41=[ r3],loc0
   8.526 +	ldf.fill.nta f49=[r14],loc0
   8.527 +	ldf.fill.nta f57=[r15],loc0
   8.528 +	;;
   8.529 +	ldf.fill.nta f65=[in0],loc0
   8.530 +	ldf.fill.nta f73=[ r3],loc0
   8.531 +	ldf.fill.nta f81=[r14],loc0
   8.532 +	ldf.fill.nta f89=[r15],loc0
   8.533 +	;;
   8.534 +	ldf.fill.nta f97=[in0],loc1
   8.535 +	ldf.fill.nta f105=[ r3],loc1
   8.536 +	ldf.fill.nta f113=[r14],loc1
   8.537 +	ldf.fill.nta f121=[r15],loc1
   8.538 +	;;
   8.539 +	ldf.fill.nta f34=[in0],loc0
   8.540 +	ldf.fill.nta f42=[ r3],loc0
   8.541 +	ldf.fill.nta f50=[r14],loc0
   8.542 +	ldf.fill.nta f58=[r15],loc0
   8.543 +	;;
   8.544 +	ldf.fill.nta f66=[in0],loc0
   8.545 +	ldf.fill.nta f74=[ r3],loc0
   8.546 +	ldf.fill.nta f82=[r14],loc0
   8.547 +	ldf.fill.nta f90=[r15],loc0
   8.548 +	;;
   8.549 +	ldf.fill.nta f98=[in0],loc1
   8.550 +	ldf.fill.nta f106=[ r3],loc1
   8.551 +	ldf.fill.nta f114=[r14],loc1
   8.552 +	ldf.fill.nta f122=[r15],loc1
   8.553 +	;;
   8.554 +	ldf.fill.nta f35=[in0],loc0
   8.555 +	ldf.fill.nta f43=[ r3],loc0
   8.556 +	ldf.fill.nta f51=[r14],loc0
   8.557 +	ldf.fill.nta f59=[r15],loc0
   8.558 +	;;
   8.559 +	ldf.fill.nta f67=[in0],loc0
   8.560 +	ldf.fill.nta f75=[ r3],loc0
   8.561 +	ldf.fill.nta f83=[r14],loc0
   8.562 +	ldf.fill.nta f91=[r15],loc0
   8.563 +	;;
   8.564 +	ldf.fill.nta f99=[in0],loc1
   8.565 +	ldf.fill.nta f107=[ r3],loc1
   8.566 +	ldf.fill.nta f115=[r14],loc1
   8.567 +	ldf.fill.nta f123=[r15],loc1
   8.568 +	;;
   8.569 +	ldf.fill.nta f36=[in0],loc0
   8.570 +	ldf.fill.nta f44=[ r3],loc0
   8.571 +	ldf.fill.nta f52=[r14],loc0
   8.572 +	ldf.fill.nta f60=[r15],loc0
   8.573 +	;;
   8.574 +	ldf.fill.nta f68=[in0],loc0
   8.575 +	ldf.fill.nta f76=[ r3],loc0
   8.576 +	ldf.fill.nta f84=[r14],loc0
   8.577 +	ldf.fill.nta f92=[r15],loc0
   8.578 +	;;
   8.579 +	ldf.fill.nta f100=[in0],loc1
   8.580 +	ldf.fill.nta f108=[ r3],loc1
   8.581 +	ldf.fill.nta f116=[r14],loc1
   8.582 +	ldf.fill.nta f124=[r15],loc1
   8.583 +	;;
   8.584 +	ldf.fill.nta f37=[in0],loc0
   8.585 +	ldf.fill.nta f45=[ r3],loc0
   8.586 +	ldf.fill.nta f53=[r14],loc0
   8.587 +	ldf.fill.nta f61=[r15],loc0
   8.588 +	;;
   8.589 +	ldf.fill.nta f69=[in0],loc0
   8.590 +	ldf.fill.nta f77=[ r3],loc0
   8.591 +	ldf.fill.nta f85=[r14],loc0
   8.592 +	ldf.fill.nta f93=[r15],loc0
   8.593 +	;;
   8.594 +	ldf.fill.nta f101=[in0],loc1
   8.595 +	ldf.fill.nta f109=[ r3],loc1
   8.596 +	ldf.fill.nta f117=[r14],loc1
   8.597 +	ldf.fill.nta f125=[r15],loc1
   8.598 +	;;
   8.599 +	ldf.fill.nta f38 =[in0],loc0
   8.600 +	ldf.fill.nta f46 =[ r3],loc0
   8.601 +	ldf.fill.nta f54 =[r14],loc0
   8.602 +	ldf.fill.nta f62 =[r15],loc0
   8.603 +	;;
   8.604 +	ldf.fill.nta f70 =[in0],loc0
   8.605 +	ldf.fill.nta f78 =[ r3],loc0
   8.606 +	ldf.fill.nta f86 =[r14],loc0
   8.607 +	ldf.fill.nta f94 =[r15],loc0
   8.608 +	;;
   8.609 +	ldf.fill.nta f102=[in0],loc1
   8.610 +	ldf.fill.nta f110=[ r3],loc1
   8.611 +	ldf.fill.nta f118=[r14],loc1
   8.612 +	ldf.fill.nta f126=[r15],loc1
   8.613 +	;;
   8.614 +	ldf.fill.nta f39 =[in0],loc0
   8.615 +	ldf.fill.nta f47 =[ r3],loc0
   8.616 +	ldf.fill.nta f55 =[r14],loc0
   8.617 +	ldf.fill.nta f63 =[r15],loc0
   8.618 +	;;
   8.619 +	ldf.fill.nta f71 =[in0],loc0
   8.620 +	ldf.fill.nta f79 =[ r3],loc0
   8.621 +	ldf.fill.nta f87 =[r14],loc0
   8.622 +	ldf.fill.nta f95 =[r15],loc0
   8.623 +	;;
   8.624 +	ldf.fill.nta f103=[in0]
   8.625 +	ldf.fill.nta f111=[ r3]
   8.626 +	ldf.fill.nta f119=[r14]
   8.627 +	ldf.fill.nta f127=[r15]
   8.628 +	br.ret.sptk.many rp
   8.629 +END(__ia64_load_fpu)
   8.630 +
   8.631 +GLOBAL_ENTRY(__ia64_init_fpu)
   8.632 +	stf.spill [sp]=f0		// M3
   8.633 +	mov	 f32=f0			// F
   8.634 +	nop.b	 0
   8.635 +
   8.636 +	ldfps	 f33,f34=[sp]		// M0
   8.637 +	ldfps	 f35,f36=[sp]		// M1
   8.638 +	mov      f37=f0			// F
   8.639 +	;;
   8.640 +
   8.641 +	setf.s	 f38=r0			// M2
   8.642 +	setf.s	 f39=r0			// M3
   8.643 +	mov      f40=f0			// F
   8.644 +
   8.645 +	ldfps	 f41,f42=[sp]		// M0
   8.646 +	ldfps	 f43,f44=[sp]		// M1
   8.647 +	mov      f45=f0			// F
   8.648 +
   8.649 +	setf.s	 f46=r0			// M2
   8.650 +	setf.s	 f47=r0			// M3
   8.651 +	mov      f48=f0			// F
   8.652 +
   8.653 +	ldfps	 f49,f50=[sp]		// M0
   8.654 +	ldfps	 f51,f52=[sp]		// M1
   8.655 +	mov      f53=f0			// F
   8.656 +
   8.657 +	setf.s	 f54=r0			// M2
   8.658 +	setf.s	 f55=r0			// M3
   8.659 +	mov      f56=f0			// F
   8.660 +
   8.661 +	ldfps	 f57,f58=[sp]		// M0
   8.662 +	ldfps	 f59,f60=[sp]		// M1
   8.663 +	mov      f61=f0			// F
   8.664 +
   8.665 +	setf.s	 f62=r0			// M2
   8.666 +	setf.s	 f63=r0			// M3
   8.667 +	mov      f64=f0			// F
   8.668 +
   8.669 +	ldfps	 f65,f66=[sp]		// M0
   8.670 +	ldfps	 f67,f68=[sp]		// M1
   8.671 +	mov      f69=f0			// F
   8.672 +
   8.673 +	setf.s	 f70=r0			// M2
   8.674 +	setf.s	 f71=r0			// M3
   8.675 +	mov      f72=f0			// F
   8.676 +
   8.677 +	ldfps	 f73,f74=[sp]		// M0
   8.678 +	ldfps	 f75,f76=[sp]		// M1
   8.679 +	mov      f77=f0			// F
   8.680 +
   8.681 +	setf.s	 f78=r0			// M2
   8.682 +	setf.s	 f79=r0			// M3
   8.683 +	mov      f80=f0			// F
   8.684 +
   8.685 +	ldfps	 f81,f82=[sp]		// M0
   8.686 +	ldfps	 f83,f84=[sp]		// M1
   8.687 +	mov      f85=f0			// F
   8.688 +
   8.689 +	setf.s	 f86=r0			// M2
   8.690 +	setf.s	 f87=r0			// M3
   8.691 +	mov      f88=f0			// F
   8.692 +
   8.693 +	/*
   8.694 +	 * When the instructions are cached, it would be faster to initialize
   8.695 +	 * the remaining registers with simply mov instructions (F-unit).
   8.696 +	 * This gets the time down to ~29 cycles.  However, this would use up
   8.697 +	 * 33 bundles, whereas continuing with the above pattern yields
   8.698 +	 * 10 bundles and ~30 cycles.
   8.699 +	 */
   8.700 +
   8.701 +	ldfps	 f89,f90=[sp]		// M0
   8.702 +	ldfps	 f91,f92=[sp]		// M1
   8.703 +	mov      f93=f0			// F
   8.704 +
   8.705 +	setf.s	 f94=r0			// M2
   8.706 +	setf.s	 f95=r0			// M3
   8.707 +	mov      f96=f0			// F
   8.708 +
   8.709 +	ldfps	 f97,f98=[sp]		// M0
   8.710 +	ldfps	 f99,f100=[sp]		// M1
   8.711 +	mov      f101=f0		// F
   8.712 +
   8.713 +	setf.s	 f102=r0		// M2
   8.714 +	setf.s	 f103=r0		// M3
   8.715 +	mov      f104=f0		// F
   8.716 +
   8.717 +	ldfps	 f105,f106=[sp]		// M0
   8.718 +	ldfps	 f107,f108=[sp]		// M1
   8.719 +	mov      f109=f0		// F
   8.720 +
   8.721 +	setf.s	 f110=r0		// M2
   8.722 +	setf.s	 f111=r0		// M3
   8.723 +	mov      f112=f0		// F
   8.724 +
   8.725 +	ldfps	 f113,f114=[sp]		// M0
   8.726 +	ldfps	 f115,f116=[sp]		// M1
   8.727 +	mov      f117=f0		// F
   8.728 +
   8.729 +	setf.s	 f118=r0		// M2
   8.730 +	setf.s	 f119=r0		// M3
   8.731 +	mov      f120=f0		// F
   8.732 +
   8.733 +	ldfps	 f121,f122=[sp]		// M0
   8.734 +	ldfps	 f123,f124=[sp]		// M1
   8.735 +	mov      f125=f0		// F
   8.736 +
   8.737 +	setf.s	 f126=r0		// M2
   8.738 +	setf.s	 f127=r0		// M3
   8.739 +	br.ret.sptk.many rp		// F
   8.740 +END(__ia64_init_fpu)
   8.741 +
   8.742 +/*
   8.743 + * Switch execution mode from virtual to physical
   8.744 + *
   8.745 + * Inputs:
   8.746 + *	r16 = new psr to establish
   8.747 + * Output:
   8.748 + *	r19 = old virtual address of ar.bsp
   8.749 + *	r20 = old virtual address of sp
   8.750 + *
   8.751 + * Note: RSE must already be in enforced lazy mode
   8.752 + */
   8.753 +GLOBAL_ENTRY(ia64_switch_mode_phys)
   8.754 + {
   8.755 +	alloc r2=ar.pfs,0,0,0,0
   8.756 +	rsm psr.i | psr.ic		// disable interrupts and interrupt collection
   8.757 +	mov r15=ip
   8.758 + }
   8.759 +	;;
   8.760 + {
   8.761 +	flushrs				// must be first insn in group
   8.762 +	srlz.i
   8.763 + }
   8.764 +	;;
   8.765 +	mov cr.ipsr=r16			// set new PSR
   8.766 +	add r3=1f-ia64_switch_mode_phys,r15
   8.767 +
   8.768 +	mov r19=ar.bsp
   8.769 +	mov r20=sp
   8.770 +	mov r14=rp			// get return address into a general register
   8.771 +	;;
   8.772 +
   8.773 +	// going to physical mode, use tpa to translate virt->phys
   8.774 +	tpa r17=r19
   8.775 +	tpa r3=r3
   8.776 +	tpa sp=sp
   8.777 +	tpa r14=r14
   8.778 +	;;
   8.779 +
   8.780 +	mov r18=ar.rnat			// save ar.rnat
   8.781 +	mov ar.bspstore=r17		// this steps on ar.rnat
   8.782 +	mov cr.iip=r3
   8.783 +	mov cr.ifs=r0
   8.784 +	;;
   8.785 +	mov ar.rnat=r18			// restore ar.rnat
   8.786 +	rfi				// must be last insn in group
   8.787 +	;;
   8.788 +1:	mov rp=r14
   8.789 +	br.ret.sptk.many rp
   8.790 +END(ia64_switch_mode_phys)
   8.791 +
   8.792 +/*
   8.793 + * Switch execution mode from physical to virtual
   8.794 + *
   8.795 + * Inputs:
   8.796 + *	r16 = new psr to establish
   8.797 + *	r19 = new bspstore to establish
   8.798 + *	r20 = new sp to establish
   8.799 + *
   8.800 + * Note: RSE must already be in enforced lazy mode
   8.801 + */
   8.802 +GLOBAL_ENTRY(ia64_switch_mode_virt)
   8.803 + {
   8.804 +	alloc r2=ar.pfs,0,0,0,0
   8.805 +	rsm psr.i | psr.ic		// disable interrupts and interrupt collection
   8.806 +	mov r15=ip
   8.807 + }
   8.808 +	;;
   8.809 + {
   8.810 +	flushrs				// must be first insn in group
   8.811 +	srlz.i
   8.812 + }
   8.813 +	;;
   8.814 +	mov cr.ipsr=r16			// set new PSR
   8.815 +	add r3=1f-ia64_switch_mode_virt,r15
   8.816 +
   8.817 +	mov r14=rp			// get return address into a general register
   8.818 +	;;
   8.819 +
   8.820 +	// going to virtual
   8.821 +	//   - for code addresses, set upper bits of addr to KERNEL_START
   8.822 +	//   - for stack addresses, copy from input argument
   8.823 +	movl r18=KERNEL_START
   8.824 +	dep r3=0,r3,KERNEL_TR_PAGE_SHIFT,64-KERNEL_TR_PAGE_SHIFT
   8.825 +	dep r14=0,r14,KERNEL_TR_PAGE_SHIFT,64-KERNEL_TR_PAGE_SHIFT
   8.826 +	mov sp=r20
   8.827 +	;;
   8.828 +	or r3=r3,r18
   8.829 +	or r14=r14,r18
   8.830 +	;;
   8.831 +
   8.832 +	mov r18=ar.rnat			// save ar.rnat
   8.833 +	mov ar.bspstore=r19		// this steps on ar.rnat
   8.834 +	mov cr.iip=r3
   8.835 +	mov cr.ifs=r0
   8.836 +	;;
   8.837 +	mov ar.rnat=r18			// restore ar.rnat
   8.838 +	rfi				// must be last insn in group
   8.839 +	;;
   8.840 +1:	mov rp=r14
   8.841 +	br.ret.sptk.many rp
   8.842 +END(ia64_switch_mode_virt)
   8.843 +
   8.844 +GLOBAL_ENTRY(ia64_delay_loop)
   8.845 +	.prologue
   8.846 +{	nop 0			// work around GAS unwind info generation bug...
   8.847 +	.save ar.lc,r2
   8.848 +	mov r2=ar.lc
   8.849 +	.body
   8.850 +	;;
   8.851 +	mov ar.lc=r32
   8.852 +}
   8.853 +	;;
   8.854 +	// force loop to be 32-byte aligned (GAS bug means we cannot use .align
   8.855 +	// inside function body without corrupting unwind info).
   8.856 +{	nop 0 }
   8.857 +1:	br.cloop.sptk.few 1b
   8.858 +	;;
   8.859 +	mov ar.lc=r2
   8.860 +	br.ret.sptk.many rp
   8.861 +END(ia64_delay_loop)
   8.862 +
   8.863 +/*
   8.864 + * Return a CPU-local timestamp in nano-seconds.  This timestamp is
   8.865 + * NOT synchronized across CPUs its return value must never be
   8.866 + * compared against the values returned on another CPU.  The usage in
   8.867 + * kernel/sched.c ensures that.
   8.868 + *
   8.869 + * The return-value of sched_clock() is NOT supposed to wrap-around.
   8.870 + * If it did, it would cause some scheduling hiccups (at the worst).
   8.871 + * Fortunately, with a 64-bit cycle-counter ticking at 100GHz, even
   8.872 + * that would happen only once every 5+ years.
   8.873 + *
   8.874 + * The code below basically calculates:
   8.875 + *
   8.876 + *   (ia64_get_itc() * local_cpu_data->nsec_per_cyc) >> IA64_NSEC_PER_CYC_SHIFT
   8.877 + *
   8.878 + * except that the multiplication and the shift are done with 128-bit
   8.879 + * intermediate precision so that we can produce a full 64-bit result.
   8.880 + */
   8.881 +GLOBAL_ENTRY(sched_clock)
   8.882 +#ifdef XEN
   8.883 +	movl r8=THIS_CPU(cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET
   8.884 +#else
   8.885 +	addl r8=THIS_CPU(cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0
   8.886 +#endif
   8.887 +	mov.m r9=ar.itc		// fetch cycle-counter				(35 cyc)
   8.888 +	;;
   8.889 +	ldf8 f8=[r8]
   8.890 +	;;
   8.891 +	setf.sig f9=r9		// certain to stall, so issue it _after_ ldf8...
   8.892 +	;;
   8.893 +	xmpy.lu f10=f9,f8	// calculate low 64 bits of 128-bit product	(4 cyc)
   8.894 +	xmpy.hu f11=f9,f8	// calculate high 64 bits of 128-bit product
   8.895 +	;;
   8.896 +	getf.sig r8=f10		//						(5 cyc)
   8.897 +	getf.sig r9=f11
   8.898 +	;;
   8.899 +	shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT
   8.900 +	br.ret.sptk.many rp
   8.901 +END(sched_clock)
   8.902 +
   8.903 +GLOBAL_ENTRY(start_kernel_thread)
   8.904 +	.prologue
   8.905 +	.save rp, r0				// this is the end of the call-chain
   8.906 +	.body
   8.907 +	alloc r2 = ar.pfs, 0, 0, 2, 0
   8.908 +	mov out0 = r9
   8.909 +	mov out1 = r11;;
   8.910 +	br.call.sptk.many rp = kernel_thread_helper;;
   8.911 +	mov out0 = r8
   8.912 +	br.call.sptk.many rp = sys_exit;;
   8.913 +1:	br.sptk.few 1b				// not reached
   8.914 +END(start_kernel_thread)
   8.915 +
   8.916 +#ifdef CONFIG_IA64_BRL_EMU
   8.917 +
   8.918 +/*
   8.919 + *  Assembly routines used by brl_emu.c to set preserved register state.
   8.920 + */
   8.921 +
   8.922 +#define SET_REG(reg)				\
   8.923 + GLOBAL_ENTRY(ia64_set_##reg);			\
   8.924 +	alloc r16=ar.pfs,1,0,0,0;		\
   8.925 +	mov reg=r32;				\
   8.926 +	;;					\
   8.927 +	br.ret.sptk.many rp;			\
   8.928 + END(ia64_set_##reg)
   8.929 +
   8.930 +SET_REG(b1);
   8.931 +SET_REG(b2);
   8.932 +SET_REG(b3);
   8.933 +SET_REG(b4);
   8.934 +SET_REG(b5);
   8.935 +
   8.936 +#endif /* CONFIG_IA64_BRL_EMU */
   8.937 +
   8.938 +#ifdef CONFIG_SMP
   8.939 +	/*
   8.940 +	 * This routine handles spinlock contention.  It uses a non-standard calling
   8.941 +	 * convention to avoid converting leaf routines into interior routines.  Because
   8.942 +	 * of this special convention, there are several restrictions:
   8.943 +	 *
   8.944 +	 * - do not use gp relative variables, this code is called from the kernel
   8.945 +	 *   and from modules, r1 is undefined.
   8.946 +	 * - do not use stacked registers, the caller owns them.
   8.947 +	 * - do not use the scratch stack space, the caller owns it.
   8.948 +	 * - do not use any registers other than the ones listed below
   8.949 +	 *
   8.950 +	 * Inputs:
   8.951 +	 *   ar.pfs - saved CFM of caller
   8.952 +	 *   ar.ccv - 0 (and available for use)
   8.953 +	 *   r27    - flags from spin_lock_irqsave or 0.  Must be preserved.
   8.954 +	 *   r28    - available for use.
   8.955 +	 *   r29    - available for use.
   8.956 +	 *   r30    - available for use.
   8.957 +	 *   r31    - address of lock, available for use.
   8.958 +	 *   b6     - return address
   8.959 +	 *   p14    - available for use.
   8.960 +	 *   p15    - used to track flag status.
   8.961 +	 *
   8.962 +	 * If you patch this code to use more registers, do not forget to update
   8.963 +	 * the clobber lists for spin_lock() in include/asm-ia64/spinlock.h.
   8.964 +	 */
   8.965 +
   8.966 +#if __GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 3)
   8.967 +
   8.968 +GLOBAL_ENTRY(ia64_spinlock_contention_pre3_4)
   8.969 +	.prologue
   8.970 +	.save ar.pfs, r0	// this code effectively has a zero frame size
   8.971 +	.save rp, r28
   8.972 +	.body
   8.973 +	nop 0
   8.974 +	tbit.nz p15,p0=r27,IA64_PSR_I_BIT
   8.975 +	.restore sp		// pop existing prologue after next insn
   8.976 +	mov b6 = r28
   8.977 +	.prologue
   8.978 +	.save ar.pfs, r0
   8.979 +	.altrp b6
   8.980 +	.body
   8.981 +	;;
   8.982 +(p15)	ssm psr.i		// reenable interrupts if they were on
   8.983 +				// DavidM says that srlz.d is slow and is not required in this case
   8.984 +.wait:
   8.985 +	// exponential backoff, kdb, lockmeter etc. go in here
   8.986 +	hint @pause
   8.987 +	ld4 r30=[r31]		// don't use ld4.bias; if it's contended, we won't write the word
   8.988 +	nop 0
   8.989 +	;;
   8.990 +	cmp4.ne p14,p0=r30,r0
   8.991 +(p14)	br.cond.sptk.few .wait
   8.992 +(p15)	rsm psr.i		// disable interrupts if we reenabled them
   8.993 +	br.cond.sptk.few b6	// lock is now free, try to acquire
   8.994 +	.global ia64_spinlock_contention_pre3_4_end	// for kernprof
   8.995 +ia64_spinlock_contention_pre3_4_end:
   8.996 +END(ia64_spinlock_contention_pre3_4)
   8.997 +
   8.998 +#else
   8.999 +
  8.1000 +GLOBAL_ENTRY(ia64_spinlock_contention)
  8.1001 +	.prologue
  8.1002 +	.altrp b6
  8.1003 +	.body
  8.1004 +	tbit.nz p15,p0=r27,IA64_PSR_I_BIT
  8.1005 +	;;
  8.1006 +.wait:
  8.1007 +(p15)	ssm psr.i		// reenable interrupts if they were on
  8.1008 +				// DavidM says that srlz.d is slow and is not required in this case
  8.1009 +.wait2:
  8.1010 +	// exponential backoff, kdb, lockmeter etc. go in here
  8.1011 +	hint @pause
  8.1012 +	ld4 r30=[r31]		// don't use ld4.bias; if it's contended, we won't write the word
  8.1013 +	;;
  8.1014 +	cmp4.ne p14,p0=r30,r0
  8.1015 +	mov r30 = 1
  8.1016 +(p14)	br.cond.sptk.few .wait2
  8.1017 +(p15)	rsm psr.i		// disable interrupts if we reenabled them
  8.1018 +	;;
  8.1019 +	cmpxchg4.acq r30=[r31], r30, ar.ccv
  8.1020 +	;;
  8.1021 +	cmp4.ne p14,p0=r0,r30
  8.1022 +(p14)	br.cond.sptk.few .wait
  8.1023 +
  8.1024 +	br.ret.sptk.many b6	// lock is now taken
  8.1025 +END(ia64_spinlock_contention)
  8.1026 +
  8.1027 +#endif
  8.1028 +
  8.1029 +#endif /* CONFIG_SMP */
     9.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     9.2 +++ b/xen/arch/ia64/linux-xen/irq_ia64.c	Tue Aug 16 12:15:23 2005 +0800
     9.3 @@ -0,0 +1,381 @@
     9.4 +/*
     9.5 + * linux/arch/ia64/kernel/irq.c
     9.6 + *
     9.7 + * Copyright (C) 1998-2001 Hewlett-Packard Co
     9.8 + *	Stephane Eranian <eranian@hpl.hp.com>
     9.9 + *	David Mosberger-Tang <davidm@hpl.hp.com>
    9.10 + *
    9.11 + *  6/10/99: Updated to bring in sync with x86 version to facilitate
    9.12 + *	     support for SMP and different interrupt controllers.
    9.13 + *
    9.14 + * 09/15/00 Goutham Rao <goutham.rao@intel.com> Implemented pci_irq_to_vector
    9.15 + *                      PCI to vector allocation routine.
    9.16 + * 04/14/2004 Ashok Raj <ashok.raj@intel.com>
    9.17 + *						Added CPU Hotplug handling for IPF.
    9.18 + */
    9.19 +
    9.20 +#include <linux/config.h>
    9.21 +#include <linux/module.h>
    9.22 +
    9.23 +#include <linux/jiffies.h>
    9.24 +#include <linux/errno.h>
    9.25 +#include <linux/init.h>
    9.26 +#include <linux/interrupt.h>
    9.27 +#include <linux/ioport.h>
    9.28 +#include <linux/kernel_stat.h>
    9.29 +#include <linux/slab.h>
    9.30 +#include <linux/ptrace.h>
    9.31 +#include <linux/random.h>	/* for rand_initialize_irq() */
    9.32 +#include <linux/signal.h>
    9.33 +#include <linux/smp.h>
    9.34 +#include <linux/smp_lock.h>
    9.35 +#include <linux/threads.h>
    9.36 +#include <linux/bitops.h>
    9.37 +
    9.38 +#include <asm/delay.h>
    9.39 +#include <asm/intrinsics.h>
    9.40 +#include <asm/io.h>
    9.41 +#include <asm/hw_irq.h>
    9.42 +#include <asm/machvec.h>
    9.43 +#include <asm/pgtable.h>
    9.44 +#include <asm/system.h>
    9.45 +
    9.46 +#ifdef CONFIG_PERFMON
    9.47 +# include <asm/perfmon.h>
    9.48 +#endif
    9.49 +
    9.50 +#define IRQ_DEBUG	0
    9.51 +
    9.52 +/* default base addr of IPI table */
    9.53 +void __iomem *ipi_base_addr = ((void __iomem *)
    9.54 +			       (__IA64_UNCACHED_OFFSET | IA64_IPI_DEFAULT_BASE_ADDR));
    9.55 +
    9.56 +/*
    9.57 + * Legacy IRQ to IA-64 vector translation table.
    9.58 + */
    9.59 +__u8 isa_irq_to_vector_map[16] = {
    9.60 +	/* 8259 IRQ translation, first 16 entries */
    9.61 +	0x2f, 0x20, 0x2e, 0x2d, 0x2c, 0x2b, 0x2a, 0x29,
    9.62 +	0x28, 0x27, 0x26, 0x25, 0x24, 0x23, 0x22, 0x21
    9.63 +};
    9.64 +EXPORT_SYMBOL(isa_irq_to_vector_map);
    9.65 +
    9.66 +static unsigned long ia64_vector_mask[BITS_TO_LONGS(IA64_NUM_DEVICE_VECTORS)];
    9.67 +
    9.68 +int
    9.69 +assign_irq_vector (int irq)
    9.70 +{
    9.71 +	int pos, vector;
    9.72 + again:
    9.73 +	pos = find_first_zero_bit(ia64_vector_mask, IA64_NUM_DEVICE_VECTORS);
    9.74 +	vector = IA64_FIRST_DEVICE_VECTOR + pos;
    9.75 +	if (vector > IA64_LAST_DEVICE_VECTOR)
    9.76 +		/* XXX could look for sharable vectors instead of panic'ing... */
    9.77 +		panic("assign_irq_vector: out of interrupt vectors!");
    9.78 +	if (test_and_set_bit(pos, ia64_vector_mask))
    9.79 +		goto again;
    9.80 +	return vector;
    9.81 +}
    9.82 +
    9.83 +void
    9.84 +free_irq_vector (int vector)
    9.85 +{
    9.86 +	int pos;
    9.87 +
    9.88 +	if (vector < IA64_FIRST_DEVICE_VECTOR || vector > IA64_LAST_DEVICE_VECTOR)
    9.89 +		return;
    9.90 +
    9.91 +	pos = vector - IA64_FIRST_DEVICE_VECTOR;
    9.92 +	if (!test_and_clear_bit(pos, ia64_vector_mask))
    9.93 +		printk(KERN_WARNING "%s: double free!\n", __FUNCTION__);
    9.94 +}
    9.95 +
    9.96 +#ifdef CONFIG_SMP
    9.97 +#	define IS_RESCHEDULE(vec)	(vec == IA64_IPI_RESCHEDULE)
    9.98 +#else
    9.99 +#	define IS_RESCHEDULE(vec)	(0)
   9.100 +#endif
   9.101 +/*
   9.102 + * That's where the IVT branches when we get an external
   9.103 + * interrupt. This branches to the correct hardware IRQ handler via
   9.104 + * function ptr.
   9.105 + */
   9.106 +void
   9.107 +ia64_handle_irq (ia64_vector vector, struct pt_regs *regs)
   9.108 +{
   9.109 +	unsigned long saved_tpr;
   9.110 +
   9.111 +#if IRQ_DEBUG
   9.112 +#ifdef XEN
   9.113 +	xen_debug_irq(vector, regs);
   9.114 +#endif
   9.115 +	{
   9.116 +		unsigned long bsp, sp;
   9.117 +
   9.118 +		/*
   9.119 +		 * Note: if the interrupt happened while executing in
   9.120 +		 * the context switch routine (ia64_switch_to), we may
   9.121 +		 * get a spurious stack overflow here.  This is
   9.122 +		 * because the register and the memory stack are not
   9.123 +		 * switched atomically.
   9.124 +		 */
   9.125 +		bsp = ia64_getreg(_IA64_REG_AR_BSP);
   9.126 +		sp = ia64_getreg(_IA64_REG_SP);
   9.127 +
   9.128 +		if ((sp - bsp) < 1024) {
   9.129 +			static unsigned char count;
   9.130 +			static long last_time;
   9.131 +
   9.132 +			if (jiffies - last_time > 5*HZ)
   9.133 +				count = 0;
   9.134 +			if (++count < 5) {
   9.135 +				last_time = jiffies;
   9.136 +				printk("ia64_handle_irq: DANGER: less than "
   9.137 +				       "1KB of free stack space!!\n"
   9.138 +				       "(bsp=0x%lx, sp=%lx)\n", bsp, sp);
   9.139 +			}
   9.140 +		}
   9.141 +	}
   9.142 +#endif /* IRQ_DEBUG */
   9.143 +
   9.144 +	/*
   9.145 +	 * Always set TPR to limit maximum interrupt nesting depth to
   9.146 +	 * 16 (without this, it would be ~240, which could easily lead
   9.147 +	 * to kernel stack overflows).
   9.148 +	 */
   9.149 +	irq_enter();
   9.150 +	saved_tpr = ia64_getreg(_IA64_REG_CR_TPR);
   9.151 +	ia64_srlz_d();
   9.152 +	while (vector != IA64_SPURIOUS_INT_VECTOR) {
   9.153 +		if (!IS_RESCHEDULE(vector)) {
   9.154 +			ia64_setreg(_IA64_REG_CR_TPR, vector);
   9.155 +			ia64_srlz_d();
   9.156 +
   9.157 +#ifdef XEN
   9.158 +			if (!xen_do_IRQ(vector))
   9.159 +#endif
   9.160 +			__do_IRQ(local_vector_to_irq(vector), regs);
   9.161 +
   9.162 +			/*
   9.163 +			 * Disable interrupts and send EOI:
   9.164 +			 */
   9.165 +			local_irq_disable();
   9.166 +			ia64_setreg(_IA64_REG_CR_TPR, saved_tpr);
   9.167 +		}
   9.168 +		ia64_eoi();
   9.169 +		vector = ia64_get_ivr();
   9.170 +	}
   9.171 +	/*
   9.172 +	 * This must be done *after* the ia64_eoi().  For example, the keyboard softirq
   9.173 +	 * handler needs to be able to wait for further keyboard interrupts, which can't
   9.174 +	 * come through until ia64_eoi() has been done.
   9.175 +	 */
   9.176 +	irq_exit();
   9.177 +}
   9.178 +
   9.179 +#ifdef  CONFIG_VTI
   9.180 +#define vmx_irq_enter()		\
   9.181 +	add_preempt_count(HARDIRQ_OFFSET);
   9.182 +
   9.183 +/* Now softirq will be checked when leaving hypervisor, or else
   9.184 + * scheduler irq will be executed too early.
   9.185 + */
   9.186 +#define vmx_irq_exit(void)	\
   9.187 +	sub_preempt_count(HARDIRQ_OFFSET);
   9.188 +/*
   9.189 + * That's where the IVT branches when we get an external
   9.190 + * interrupt. This branches to the correct hardware IRQ handler via
   9.191 + * function ptr.
   9.192 + */
   9.193 +void
   9.194 +vmx_ia64_handle_irq (ia64_vector vector, struct pt_regs *regs)
   9.195 +{
   9.196 +	unsigned long saved_tpr;
   9.197 +	int	wake_dom0 = 0;
   9.198 +
   9.199 +
   9.200 +#if IRQ_DEBUG
   9.201 +	{
   9.202 +		unsigned long bsp, sp;
   9.203 +
   9.204 +		/*
   9.205 +		 * Note: if the interrupt happened while executing in
   9.206 +		 * the context switch routine (ia64_switch_to), we may
   9.207 +		 * get a spurious stack overflow here.  This is
   9.208 +		 * because the register and the memory stack are not
   9.209 +		 * switched atomically.
   9.210 +		 */
   9.211 +		bsp = ia64_getreg(_IA64_REG_AR_BSP);
   9.212 +		sp = ia64_getreg(_IA64_REG_AR_SP);
   9.213 +
   9.214 +		if ((sp - bsp) < 1024) {
   9.215 +			static unsigned char count;
   9.216 +			static long last_time;
   9.217 +
   9.218 +			if (jiffies - last_time > 5*HZ)
   9.219 +				count = 0;
   9.220 +			if (++count < 5) {
   9.221 +				last_time = jiffies;
   9.222 +				printk("ia64_handle_irq: DANGER: less than "
   9.223 +				       "1KB of free stack space!!\n"
   9.224 +				       "(bsp=0x%lx, sp=%lx)\n", bsp, sp);
   9.225 +			}
   9.226 +		}
   9.227 +	}
   9.228 +#endif /* IRQ_DEBUG */
   9.229 +
   9.230 +	/*
   9.231 +	 * Always set TPR to limit maximum interrupt nesting depth to
   9.232 +	 * 16 (without this, it would be ~240, which could easily lead
   9.233 +	 * to kernel stack overflows).
   9.234 +	 */
   9.235 +	vmx_irq_enter();
   9.236 +	saved_tpr = ia64_getreg(_IA64_REG_CR_TPR);
   9.237 +	ia64_srlz_d();
   9.238 +	while (vector != IA64_SPURIOUS_INT_VECTOR) {
   9.239 +	    if (!IS_RESCHEDULE(vector)) {
   9.240 +		ia64_setreg(_IA64_REG_CR_TPR, vector);
   9.241 +		ia64_srlz_d();
   9.242 +
   9.243 +		if (vector != IA64_TIMER_VECTOR) {
   9.244 +			/* FIXME: Leave IRQ re-route later */
   9.245 +			vmx_vcpu_pend_interrupt(dom0->vcpu[0],vector);
   9.246 +			wake_dom0 = 1;
   9.247 +		}
   9.248 +		else {	// FIXME: Handle Timer only now
   9.249 +			__do_IRQ(local_vector_to_irq(vector), regs);
   9.250 +		}
   9.251 +		
   9.252 +		/*
   9.253 +		 * Disable interrupts and send EOI:
   9.254 +		 */
   9.255 +		local_irq_disable();
   9.256 +		ia64_setreg(_IA64_REG_CR_TPR, saved_tpr);
   9.257 +	    }
   9.258 +	    else {
   9.259 +                printf("Oops: RESCHEDULE IPI absorbed by HV\n");
   9.260 +            }
   9.261 +	    ia64_eoi();
   9.262 +	    vector = ia64_get_ivr();
   9.263 +	}
   9.264 +	/*
   9.265 +	 * This must be done *after* the ia64_eoi().  For example, the keyboard softirq
   9.266 +	 * handler needs to be able to wait for further keyboard interrupts, which can't
   9.267 +	 * come through until ia64_eoi() has been done.
   9.268 +	 */
   9.269 +	vmx_irq_exit();
   9.270 +	if ( wake_dom0 && current != dom0 ) 
   9.271 +		domain_wake(dom0->vcpu[0]);
   9.272 +}
   9.273 +#endif
   9.274 +
   9.275 +
   9.276 +#ifdef CONFIG_HOTPLUG_CPU
   9.277 +/*
   9.278 + * This function emulates a interrupt processing when a cpu is about to be
   9.279 + * brought down.
   9.280 + */
   9.281 +void ia64_process_pending_intr(void)
   9.282 +{
   9.283 +	ia64_vector vector;
   9.284 +	unsigned long saved_tpr;
   9.285 +	extern unsigned int vectors_in_migration[NR_IRQS];
   9.286 +
   9.287 +	vector = ia64_get_ivr();
   9.288 +
   9.289 +	 irq_enter();
   9.290 +	 saved_tpr = ia64_getreg(_IA64_REG_CR_TPR);
   9.291 +	 ia64_srlz_d();
   9.292 +
   9.293 +	 /*
   9.294 +	  * Perform normal interrupt style processing
   9.295 +	  */
   9.296 +	while (vector != IA64_SPURIOUS_INT_VECTOR) {
   9.297 +		if (!IS_RESCHEDULE(vector)) {
   9.298 +			ia64_setreg(_IA64_REG_CR_TPR, vector);
   9.299 +			ia64_srlz_d();
   9.300 +
   9.301 +			/*
   9.302 +			 * Now try calling normal ia64_handle_irq as it would have got called
   9.303 +			 * from a real intr handler. Try passing null for pt_regs, hopefully
   9.304 +			 * it will work. I hope it works!.
   9.305 +			 * Probably could shared code.
   9.306 +			 */
   9.307 +			vectors_in_migration[local_vector_to_irq(vector)]=0;
   9.308 +			__do_IRQ(local_vector_to_irq(vector), NULL);
   9.309 +
   9.310 +			/*
   9.311 +			 * Disable interrupts and send EOI
   9.312 +			 */
   9.313 +			local_irq_disable();
   9.314 +			ia64_setreg(_IA64_REG_CR_TPR, saved_tpr);
   9.315 +		}
   9.316 +		ia64_eoi();
   9.317 +		vector = ia64_get_ivr();
   9.318 +	}
   9.319 +	irq_exit();
   9.320 +}
   9.321 +#endif
   9.322 +
   9.323 +
   9.324 +#ifdef CONFIG_SMP
   9.325 +extern irqreturn_t handle_IPI (int irq, void *dev_id, struct pt_regs *regs);
   9.326 +
   9.327 +static struct irqaction ipi_irqaction = {
   9.328 +	.handler =	handle_IPI,
   9.329 +	.flags =	SA_INTERRUPT,
   9.330 +	.name =		"IPI"
   9.331 +};
   9.332 +#endif
   9.333 +
   9.334 +void
   9.335 +register_percpu_irq (ia64_vector vec, struct irqaction *action)
   9.336 +{
   9.337 +	irq_desc_t *desc;
   9.338 +	unsigned int irq;
   9.339 +
   9.340 +	for (irq = 0; irq < NR_IRQS; ++irq)
   9.341 +		if (irq_to_vector(irq) == vec) {
   9.342 +			desc = irq_descp(irq);
   9.343 +			desc->status |= IRQ_PER_CPU;
   9.344 +			desc->handler = &irq_type_ia64_lsapic;
   9.345 +			if (action)
   9.346 +				setup_irq(irq, action);
   9.347 +		}
   9.348 +}
   9.349 +
   9.350 +void __init
   9.351 +init_IRQ (void)
   9.352 +{
   9.353 +	register_percpu_irq(IA64_SPURIOUS_INT_VECTOR, NULL);
   9.354 +#ifdef CONFIG_SMP
   9.355 +	register_percpu_irq(IA64_IPI_VECTOR, &ipi_irqaction);
   9.356 +#endif
   9.357 +#ifdef CONFIG_PERFMON
   9.358 +	pfm_init_percpu();
   9.359 +#endif
   9.360 +	platform_irq_init();
   9.361 +}
   9.362 +
   9.363 +void
   9.364 +ia64_send_ipi (int cpu, int vector, int delivery_mode, int redirect)
   9.365 +{
   9.366 +	void __iomem *ipi_addr;
   9.367 +	unsigned long ipi_data;
   9.368 +	unsigned long phys_cpu_id;
   9.369 +
   9.370 +#ifdef CONFIG_SMP
   9.371 +	phys_cpu_id = cpu_physical_id(cpu);
   9.372 +#else
   9.373 +	phys_cpu_id = (ia64_getreg(_IA64_REG_CR_LID) >> 16) & 0xffff;
   9.374 +#endif
   9.375 +
   9.376 +	/*
   9.377 +	 * cpu number is in 8bit ID and 8bit EID
   9.378 +	 */
   9.379 +
   9.380 +	ipi_data = (delivery_mode << 8) | (vector & 0xff);
   9.381 +	ipi_addr = ipi_base_addr + ((phys_cpu_id << 4) | ((redirect & 1) << 3));
   9.382 +
   9.383 +	writeq(ipi_data, ipi_addr);
   9.384 +}
    10.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    10.2 +++ b/xen/arch/ia64/linux-xen/mm_contig.c	Tue Aug 16 12:15:23 2005 +0800
    10.3 @@ -0,0 +1,305 @@
    10.4 +/*
    10.5 + * This file is subject to the terms and conditions of the GNU General Public
    10.6 + * License.  See the file "COPYING" in the main directory of this archive
    10.7 + * for more details.
    10.8 + *
    10.9 + * Copyright (C) 1998-2003 Hewlett-Packard Co
   10.10 + *	David Mosberger-Tang <davidm@hpl.hp.com>
   10.11 + *	Stephane Eranian <eranian@hpl.hp.com>
   10.12 + * Copyright (C) 2000, Rohit Seth <rohit.seth@intel.com>
   10.13 + * Copyright (C) 1999 VA Linux Systems
   10.14 + * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
   10.15 + * Copyright (C) 2003 Silicon Graphics, Inc. All rights reserved.
   10.16 + *
   10.17 + * Routines used by ia64 machines with contiguous (or virtually contiguous)
   10.18 + * memory.
   10.19 + */
   10.20 +#include <linux/config.h>
   10.21 +#include <linux/bootmem.h>
   10.22 +#include <linux/efi.h>
   10.23 +#include <linux/mm.h>
   10.24 +#include <linux/swap.h>
   10.25 +
   10.26 +#include <asm/meminit.h>
   10.27 +#include <asm/pgalloc.h>
   10.28 +#include <asm/pgtable.h>
   10.29 +#include <asm/sections.h>
   10.30 +#include <asm/mca.h>
   10.31 +
   10.32 +#ifdef CONFIG_VIRTUAL_MEM_MAP
   10.33 +static unsigned long num_dma_physpages;
   10.34 +#endif
   10.35 +
   10.36 +/**
   10.37 + * show_mem - display a memory statistics summary
   10.38 + *
   10.39 + * Just walks the pages in the system and describes where they're allocated.
   10.40 + */
   10.41 +#ifndef XEN
   10.42 +void
   10.43 +show_mem (void)
   10.44 +{
   10.45 +	int i, total = 0, reserved = 0;
   10.46 +	int shared = 0, cached = 0;
   10.47 +
   10.48 +	printk("Mem-info:\n");
   10.49 +	show_free_areas();
   10.50 +
   10.51 +	printk("Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
   10.52 +	i = max_mapnr;
   10.53 +	while (i-- > 0) {
   10.54 +		if (!pfn_valid(i))
   10.55 +			continue;
   10.56 +		total++;
   10.57 +		if (PageReserved(mem_map+i))
   10.58 +			reserved++;
   10.59 +		else if (PageSwapCache(mem_map+i))
   10.60 +			cached++;
   10.61 +		else if (page_count(mem_map + i))
   10.62 +			shared += page_count(mem_map + i) - 1;
   10.63 +	}
   10.64 +	printk("%d pages of RAM\n", total);
   10.65 +	printk("%d reserved pages\n", reserved);
   10.66 +	printk("%d pages shared\n", shared);
   10.67 +	printk("%d pages swap cached\n", cached);
   10.68 +	printk("%ld pages in page table cache\n", pgtable_cache_size);
   10.69 +}
   10.70 +#endif
   10.71 +
   10.72 +/* physical address where the bootmem map is located */
   10.73 +unsigned long bootmap_start;
   10.74 +
   10.75 +/**
   10.76 + * find_max_pfn - adjust the maximum page number callback
   10.77 + * @start: start of range
   10.78 + * @end: end of range
   10.79 + * @arg: address of pointer to global max_pfn variable
   10.80 + *
   10.81 + * Passed as a callback function to efi_memmap_walk() to determine the highest
   10.82 + * available page frame number in the system.
   10.83 + */
   10.84 +int
   10.85 +find_max_pfn (unsigned long start, unsigned long end, void *arg)
   10.86 +{
   10.87 +	unsigned long *max_pfnp = arg, pfn;
   10.88 +
   10.89 +	pfn = (PAGE_ALIGN(end - 1) - PAGE_OFFSET) >> PAGE_SHIFT;
   10.90 +	if (pfn > *max_pfnp)
   10.91 +		*max_pfnp = pfn;
   10.92 +	return 0;
   10.93 +}
   10.94 +
   10.95 +/**
   10.96 + * find_bootmap_location - callback to find a memory area for the bootmap
   10.97 + * @start: start of region
   10.98 + * @end: end of region
   10.99 + * @arg: unused callback data
  10.100 + *
  10.101 + * Find a place to put the bootmap and return its starting address in
  10.102 + * bootmap_start.  This address must be page-aligned.
  10.103 + */
  10.104 +int
  10.105 +find_bootmap_location (unsigned long start, unsigned long end, void *arg)
  10.106 +{
  10.107 +	unsigned long needed = *(unsigned long *)arg;
  10.108 +	unsigned long range_start, range_end, free_start;
  10.109 +	int i;
  10.110 +
  10.111 +#if IGNORE_PFN0
  10.112 +	if (start == PAGE_OFFSET) {
  10.113 +		start += PAGE_SIZE;
  10.114 +		if (start >= end)
  10.115 +			return 0;
  10.116 +	}
  10.117 +#endif
  10.118 +
  10.119 +	free_start = PAGE_OFFSET;
  10.120 +
  10.121 +	for (i = 0; i < num_rsvd_regions; i++) {
  10.122 +		range_start = max(start, free_start);
  10.123 +		range_end   = min(end, rsvd_region[i].start & PAGE_MASK);
  10.124 +
  10.125 +		free_start = PAGE_ALIGN(rsvd_region[i].end);
  10.126 +
  10.127 +		if (range_end <= range_start)
  10.128 +			continue; /* skip over empty range */
  10.129 +
  10.130 +		if (range_end - range_start >= needed) {
  10.131 +			bootmap_start = __pa(range_start);
  10.132 +			return -1;	/* done */
  10.133 +		}
  10.134 +
  10.135 +		/* nothing more available in this segment */
  10.136 +		if (range_end == end)
  10.137 +			return 0;
  10.138 +	}
  10.139 +	return 0;
  10.140 +}
  10.141 +
  10.142 +/**
  10.143 + * find_memory - setup memory map
  10.144 + *
  10.145 + * Walk the EFI memory map and find usable memory for the system, taking
  10.146 + * into account reserved areas.
  10.147 + */
  10.148 +#ifndef XEN
  10.149 +void
  10.150 +find_memory (void)
  10.151 +{
  10.152 +	unsigned long bootmap_size;
  10.153 +
  10.154 +	reserve_memory();
  10.155 +
  10.156 +	/* first find highest page frame number */
  10.157 +	max_pfn = 0;
  10.158 +	efi_memmap_walk(find_max_pfn, &max_pfn);
  10.159 +
  10.160 +	/* how many bytes to cover all the pages */
  10.161 +	bootmap_size = bootmem_bootmap_pages(max_pfn) << PAGE_SHIFT;
  10.162 +
  10.163 +	/* look for a location to hold the bootmap */
  10.164 +	bootmap_start = ~0UL;
  10.165 +	efi_memmap_walk(find_bootmap_location, &bootmap_size);
  10.166 +	if (bootmap_start == ~0UL)
  10.167 +		panic("Cannot find %ld bytes for bootmap\n", bootmap_size);
  10.168 +
  10.169 +	bootmap_size = init_bootmem(bootmap_start >> PAGE_SHIFT, max_pfn);
  10.170 +
  10.171 +	/* Free all available memory, then mark bootmem-map as being in use. */
  10.172 +	efi_memmap_walk(filter_rsvd_memory, free_bootmem);
  10.173 +	reserve_bootmem(bootmap_start, bootmap_size);
  10.174 +
  10.175 +	find_initrd();
  10.176 +}
  10.177 +#endif
  10.178 +
  10.179 +#ifdef CONFIG_SMP
  10.180 +/**
  10.181 + * per_cpu_init - setup per-cpu variables
  10.182 + *
  10.183 + * Allocate and setup per-cpu data areas.
  10.184 + */
  10.185 +void *
  10.186 +per_cpu_init (void)
  10.187 +{
  10.188 +	void *cpu_data;
  10.189 +	int cpu;
  10.190 +
  10.191 +	/*
  10.192 +	 * get_free_pages() cannot be used before cpu_init() done.  BSP
  10.193 +	 * allocates "NR_CPUS" pages for all CPUs to avoid that AP calls
  10.194 +	 * get_zeroed_page().
  10.195 +	 */
  10.196 +	if (smp_processor_id() == 0) {
  10.197 +		cpu_data = __alloc_bootmem(PERCPU_PAGE_SIZE * NR_CPUS,
  10.198 +					   PERCPU_PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
  10.199 +		for (cpu = 0; cpu < NR_CPUS; cpu++) {
  10.200 +			memcpy(cpu_data, __phys_per_cpu_start, __per_cpu_end - __per_cpu_start);
  10.201 +			__per_cpu_offset[cpu] = (char *) cpu_data - __per_cpu_start;
  10.202 +			cpu_data += PERCPU_PAGE_SIZE;
  10.203 +			per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu];
  10.204 +		}
  10.205 +	}
  10.206 +	return __per_cpu_start + __per_cpu_offset[smp_processor_id()];
  10.207 +}
  10.208 +#endif /* CONFIG_SMP */
  10.209 +
  10.210 +static int
  10.211 +count_pages (u64 start, u64 end, void *arg)
  10.212 +{
  10.213 +	unsigned long *count = arg;
  10.214 +
  10.215 +	*count += (end - start) >> PAGE_SHIFT;
  10.216 +	return 0;
  10.217 +}
  10.218 +
  10.219 +#ifdef CONFIG_VIRTUAL_MEM_MAP
  10.220 +static int
  10.221 +count_dma_pages (u64 start, u64 end, void *arg)
  10.222 +{
  10.223 +	unsigned long *count = arg;
  10.224 +
  10.225 +	if (start < MAX_DMA_ADDRESS)
  10.226 +		*count += (min(end, MAX_DMA_ADDRESS) - start) >> PAGE_SHIFT;
  10.227 +	return 0;
  10.228 +}
  10.229 +#endif
  10.230 +
  10.231 +/*
  10.232 + * Set up the page tables.
  10.233 + */
  10.234 +
  10.235 +#ifndef XEN
  10.236 +void
  10.237 +paging_init (void)
  10.238 +{
  10.239 +	unsigned long max_dma;
  10.240 +	unsigned long zones_size[MAX_NR_ZONES];
  10.241 +#ifdef CONFIG_VIRTUAL_MEM_MAP
  10.242 +	unsigned long zholes_size[MAX_NR_ZONES];
  10.243 +	unsigned long max_gap;
  10.244 +#endif
  10.245 +
  10.246 +	/* initialize mem_map[] */
  10.247 +
  10.248 +	memset(zones_size, 0, sizeof(zones_size));
  10.249 +
  10.250 +	num_physpages = 0;
  10.251 +	efi_memmap_walk(count_pages, &num_physpages);
  10.252 +
  10.253 +	max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT;
  10.254 +
  10.255 +#ifdef CONFIG_VIRTUAL_MEM_MAP
  10.256 +	memset(zholes_size, 0, sizeof(zholes_size));
  10.257 +
  10.258 +	num_dma_physpages = 0;
  10.259 +	efi_memmap_walk(count_dma_pages, &num_dma_physpages);
  10.260 +
  10.261 +	if (max_low_pfn < max_dma) {
  10.262 +		zones_size[ZONE_DMA] = max_low_pfn;
  10.263 +		zholes_size[ZONE_DMA] = max_low_pfn - num_dma_physpages;
  10.264 +	} else {
  10.265 +		zones_size[ZONE_DMA] = max_dma;
  10.266 +		zholes_size[ZONE_DMA] = max_dma - num_dma_physpages;
  10.267 +		if (num_physpages > num_dma_physpages) {
  10.268 +			zones_size[ZONE_NORMAL] = max_low_pfn - max_dma;
  10.269 +			zholes_size[ZONE_NORMAL] =
  10.270 +				((max_low_pfn - max_dma) -
  10.271 +				 (num_physpages - num_dma_physpages));
  10.272 +		}
  10.273 +	}
  10.274 +
  10.275 +	max_gap = 0;
  10.276 +	efi_memmap_walk(find_largest_hole, (u64 *)&max_gap);
  10.277 +	if (max_gap < LARGE_GAP) {
  10.278 +		vmem_map = (struct page *) 0;
  10.279 +		free_area_init_node(0, &contig_page_data, zones_size, 0,
  10.280 +				    zholes_size);
  10.281 +	} else {
  10.282 +		unsigned long map_size;
  10.283 +
  10.284 +		/* allocate virtual_mem_map */
  10.285 +
  10.286 +		map_size = PAGE_ALIGN(max_low_pfn * sizeof(struct page));
  10.287 +		vmalloc_end -= map_size;
  10.288 +		vmem_map = (struct page *) vmalloc_end;
  10.289 +		efi_memmap_walk(create_mem_map_page_table, NULL);
  10.290 +
  10.291 +		mem_map = contig_page_data.node_mem_map = vmem_map;
  10.292 +		free_area_init_node(0, &contig_page_data, zones_size,
  10.293 +				    0, zholes_size);
  10.294 +
  10.295 +		printk("Virtual mem_map starts at 0x%p\n", mem_map);
  10.296 +	}
  10.297 +#else /* !CONFIG_VIRTUAL_MEM_MAP */
  10.298 +	if (max_low_pfn < max_dma)
  10.299 +		zones_size[ZONE_DMA] = max_low_pfn;
  10.300 +	else {
  10.301 +		zones_size[ZONE_DMA] = max_dma;
  10.302 +		zones_size[ZONE_NORMAL] = max_low_pfn - max_dma;
  10.303 +	}
  10.304 +	free_area_init(zones_size);
  10.305 +#endif /* !CONFIG_VIRTUAL_MEM_MAP */
  10.306 +	zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page));
  10.307 +}
  10.308 +#endif /* !CONFIG_XEN */
    11.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    11.2 +++ b/xen/arch/ia64/linux-xen/pal.S	Tue Aug 16 12:15:23 2005 +0800
    11.3 @@ -0,0 +1,310 @@
    11.4 +/*
    11.5 + * PAL Firmware support
    11.6 + * IA-64 Processor Programmers Reference Vol 2
    11.7 + *
    11.8 + * Copyright (C) 1999 Don Dugger <don.dugger@intel.com>
    11.9 + * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
   11.10 + * Copyright (C) 1999-2001, 2003 Hewlett-Packard Co
   11.11 + *	David Mosberger <davidm@hpl.hp.com>
   11.12 + *	Stephane Eranian <eranian@hpl.hp.com>
   11.13 + *
   11.14 + * 05/22/2000 eranian Added support for stacked register calls
   11.15 + * 05/24/2000 eranian Added support for physical mode static calls
   11.16 + */
   11.17 +
   11.18 +#include <asm/asmmacro.h>
   11.19 +#include <asm/processor.h>
   11.20 +
   11.21 +	.data
   11.22 +pal_entry_point:
   11.23 +	data8 ia64_pal_default_handler
   11.24 +	.text
   11.25 +
   11.26 +/*
   11.27 + * Set the PAL entry point address.  This could be written in C code, but we do it here
   11.28 + * to keep it all in one module (besides, it's so trivial that it's
   11.29 + * not a big deal).
   11.30 + *
   11.31 + * in0		Address of the PAL entry point (text address, NOT a function descriptor).
   11.32 + */
   11.33 +GLOBAL_ENTRY(ia64_pal_handler_init)
   11.34 +	alloc r3=ar.pfs,1,0,0,0
   11.35 +	movl r2=pal_entry_point
   11.36 +	;;
   11.37 +	st8 [r2]=in0
   11.38 +	br.ret.sptk.many rp
   11.39 +END(ia64_pal_handler_init)
   11.40 +
   11.41 +/*
   11.42 + * Default PAL call handler.  This needs to be coded in assembly because it uses
   11.43 + * the static calling convention, i.e., the RSE may not be used and calls are
   11.44 + * done via "br.cond" (not "br.call").
   11.45 + */
   11.46 +GLOBAL_ENTRY(ia64_pal_default_handler)
   11.47 +	mov r8=-1
   11.48 +	br.cond.sptk.many rp
   11.49 +END(ia64_pal_default_handler)
   11.50 +
   11.51 +/*
   11.52 + * Make a PAL call using the static calling convention.
   11.53 + *
   11.54 + * in0         Index of PAL service
   11.55 + * in1 - in3   Remaining PAL arguments
   11.56 + * in4	       1 ==> clear psr.ic,  0 ==> don't clear psr.ic
   11.57 + *
   11.58 + */
   11.59 +GLOBAL_ENTRY(ia64_pal_call_static)
   11.60 +	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(5)
   11.61 +	alloc loc1 = ar.pfs,5,5,0,0
   11.62 +	movl loc2 = pal_entry_point
   11.63 +1:	{
   11.64 +	  mov r28 = in0
   11.65 +	  mov r29 = in1
   11.66 +	  mov r8 = ip
   11.67 +	}
   11.68 +	;;
   11.69 +	ld8 loc2 = [loc2]		// loc2 <- entry point
   11.70 +	tbit.nz p6,p7 = in4, 0
   11.71 +	adds r8 = 1f-1b,r8
   11.72 +	mov loc4=ar.rsc			// save RSE configuration
   11.73 +	;;
   11.74 +	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
   11.75 +	mov loc3 = psr
   11.76 +	mov loc0 = rp
   11.77 +	.body
   11.78 +	mov r30 = in2
   11.79 +
   11.80 +(p6)	rsm psr.i | psr.ic
   11.81 +	mov r31 = in3
   11.82 +	mov b7 = loc2
   11.83 +
   11.84 +(p7)	rsm psr.i
   11.85 +	;;
   11.86 +(p6)	srlz.i
   11.87 +	mov rp = r8
   11.88 +	br.cond.sptk.many b7
   11.89 +1:	mov psr.l = loc3
   11.90 +	mov ar.rsc = loc4		// restore RSE configuration
   11.91 +	mov ar.pfs = loc1
   11.92 +	mov rp = loc0
   11.93 +	;;
   11.94 +	srlz.d				// seralize restoration of psr.l
   11.95 +	br.ret.sptk.many b0
   11.96 +END(ia64_pal_call_static)
   11.97 +
   11.98 +/*
   11.99 + * Make a PAL call using the stacked registers calling convention.
  11.100 + *
  11.101 + * Inputs:
  11.102 + * 	in0         Index of PAL service
  11.103 + * 	in2 - in3   Remaning PAL arguments
  11.104 + */
  11.105 +GLOBAL_ENTRY(ia64_pal_call_stacked)
  11.106 +	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(4)
  11.107 +	alloc loc1 = ar.pfs,4,4,4,0
  11.108 +	movl loc2 = pal_entry_point
  11.109 +
  11.110 +	mov r28  = in0			// Index MUST be copied to r28
  11.111 +	mov out0 = in0			// AND in0 of PAL function
  11.112 +	mov loc0 = rp
  11.113 +	.body
  11.114 +	;;
  11.115 +	ld8 loc2 = [loc2]		// loc2 <- entry point
  11.116 +	mov out1 = in1
  11.117 +	mov out2 = in2
  11.118 +	mov out3 = in3
  11.119 +	mov loc3 = psr
  11.120 +	;;
  11.121 +	rsm psr.i
  11.122 +	mov b7 = loc2
  11.123 +	;;
  11.124 +	br.call.sptk.many rp=b7		// now make the call
  11.125 +.ret0:	mov psr.l  = loc3
  11.126 +	mov ar.pfs = loc1
  11.127 +	mov rp = loc0
  11.128 +	;;
  11.129 +	srlz.d				// serialize restoration of psr.l
  11.130 +	br.ret.sptk.many b0
  11.131 +END(ia64_pal_call_stacked)
  11.132 +
  11.133 +/*
  11.134 + * Make a physical mode PAL call using the static registers calling convention.
  11.135 + *
  11.136 + * Inputs:
  11.137 + * 	in0         Index of PAL service
  11.138 + * 	in2 - in3   Remaning PAL arguments
  11.139 + *
  11.140 + * PSR_LP, PSR_TB, PSR_ID, PSR_DA are never set by the kernel.
  11.141 + * So we don't need to clear them.
  11.142 + */
  11.143 +#define PAL_PSR_BITS_TO_CLEAR							\
  11.144 +	(IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT  | IA64_PSR_DB | IA64_PSR_RT |	\
  11.145 +	 IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED |		\
  11.146 +	 IA64_PSR_DFL | IA64_PSR_DFH)
  11.147 +
  11.148 +#define PAL_PSR_BITS_TO_SET							\
  11.149 +	(IA64_PSR_BN)
  11.150 +
  11.151 +
  11.152 +GLOBAL_ENTRY(ia64_pal_call_phys_static)
  11.153 +	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(4)
  11.154 +	alloc loc1 = ar.pfs,4,7,0,0
  11.155 +	movl loc2 = pal_entry_point
  11.156 +1:	{
  11.157 +	  mov r28  = in0		// copy procedure index
  11.158 +	  mov r8   = ip			// save ip to compute branch
  11.159 +	  mov loc0 = rp			// save rp
  11.160 +	}
  11.161 +	.body
  11.162 +	;;
  11.163 +	ld8 loc2 = [loc2]		// loc2 <- entry point
  11.164 +	mov r29  = in1			// first argument
  11.165 +	mov r30  = in2			// copy arg2
  11.166 +	mov r31  = in3			// copy arg3
  11.167 +	;;
  11.168 +	mov loc3 = psr			// save psr
  11.169 +	adds r8  = 1f-1b,r8		// calculate return address for call
  11.170 +	;;
  11.171 +	mov loc4=ar.rsc			// save RSE configuration
  11.172 +#ifdef XEN
  11.173 +	dep.z loc2=loc2,0,60		// convert pal entry point to physical
  11.174 +#else // XEN
  11.175 +	dep.z loc2=loc2,0,61		// convert pal entry point to physical
  11.176 +#endif // XEN
  11.177 +	tpa r8=r8			// convert rp to physical
  11.178 +	;;
  11.179 +	mov b7 = loc2			// install target to branch reg
  11.180 +	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
  11.181 +	movl r16=PAL_PSR_BITS_TO_CLEAR
  11.182 +	movl r17=PAL_PSR_BITS_TO_SET
  11.183 +	;;
  11.184 +	or loc3=loc3,r17		// add in psr the bits to set
  11.185 +	;;
  11.186 +	andcm r16=loc3,r16		// removes bits to clear from psr
  11.187 +	br.call.sptk.many rp=ia64_switch_mode_phys
  11.188 +.ret1:	mov rp = r8			// install return address (physical)
  11.189 +	mov loc5 = r19
  11.190 +	mov loc6 = r20
  11.191 +	br.cond.sptk.many b7
  11.192 +1:
  11.193 +	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
  11.194 +	mov r16=loc3			// r16= original psr
  11.195 +	mov r19=loc5
  11.196 +	mov r20=loc6
  11.197 +	br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode
  11.198 +.ret2:
  11.199 +	mov psr.l = loc3		// restore init PSR
  11.200 +
  11.201 +	mov ar.pfs = loc1
  11.202 +	mov rp = loc0
  11.203 +	;;
  11.204 +	mov ar.rsc=loc4			// restore RSE configuration
  11.205 +	srlz.d				// seralize restoration of psr.l
  11.206 +	br.ret.sptk.many b0
  11.207 +END(ia64_pal_call_phys_static)
  11.208 +
  11.209 +/*
  11.210 + * Make a PAL call using the stacked registers in physical mode.
  11.211 + *
  11.212 + * Inputs:
  11.213 + * 	in0         Index of PAL service
  11.214 + * 	in2 - in3   Remaning PAL arguments
  11.215 + */
  11.216 +GLOBAL_ENTRY(ia64_pal_call_phys_stacked)
  11.217 +	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(5)
  11.218 +	alloc	loc1 = ar.pfs,5,7,4,0
  11.219 +	movl	loc2 = pal_entry_point
  11.220 +1:	{
  11.221 +	  mov r28  = in0		// copy procedure index
  11.222 +	  mov loc0 = rp		// save rp
  11.223 +	}
  11.224 +	.body
  11.225 +	;;
  11.226 +	ld8 loc2 = [loc2]		// loc2 <- entry point
  11.227 +	mov out0 = in0		// first argument
  11.228 +	mov out1 = in1		// copy arg2
  11.229 +	mov out2 = in2		// copy arg3
  11.230 +	mov out3 = in3		// copy arg3
  11.231 +	;;
  11.232 +	mov loc3 = psr		// save psr
  11.233 +	;;
  11.234 +	mov loc4=ar.rsc			// save RSE configuration
  11.235 +#ifdef XEN
  11.236 +	dep.z loc2=loc2,0,60		// convert pal entry point to physical
  11.237 +#else // XEN
  11.238 +	dep.z loc2=loc2,0,61		// convert pal entry point to physical
  11.239 +#endif // XEN
  11.240 +	;;
  11.241 +	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
  11.242 +	movl r16=PAL_PSR_BITS_TO_CLEAR
  11.243 +	movl r17=PAL_PSR_BITS_TO_SET
  11.244 +	;;
  11.245 +	or loc3=loc3,r17		// add in psr the bits to set
  11.246 +	mov b7 = loc2			// install target to branch reg
  11.247 +	;;
  11.248 +	andcm r16=loc3,r16		// removes bits to clear from psr
  11.249 +	br.call.sptk.many rp=ia64_switch_mode_phys
  11.250 +.ret6:
  11.251 +	mov loc5 = r19
  11.252 +	mov loc6 = r20
  11.253 +	br.call.sptk.many rp=b7		// now make the call
  11.254 +.ret7:
  11.255 +	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
  11.256 +	mov r16=loc3			// r16= original psr
  11.257 +	mov r19=loc5
  11.258 +	mov r20=loc6
  11.259 +	br.call.sptk.many rp=ia64_switch_mode_virt	// return to virtual mode
  11.260 +
  11.261 +.ret8:	mov psr.l  = loc3		// restore init PSR
  11.262 +	mov ar.pfs = loc1
  11.263 +	mov rp = loc0
  11.264 +	;;
  11.265 +	mov ar.rsc=loc4			// restore RSE configuration
  11.266 +	srlz.d				// seralize restoration of psr.l
  11.267 +	br.ret.sptk.many b0
  11.268 +END(ia64_pal_call_phys_stacked)
  11.269 +
  11.270 +/*
  11.271 + * Save scratch fp scratch regs which aren't saved in pt_regs already (fp10-fp15).
  11.272 + *
  11.273 + * NOTE: We need to do this since firmware (SAL and PAL) may use any of the scratch
  11.274 + * regs fp-low partition.
  11.275 + *
  11.276 + * Inputs:
  11.277 + *      in0	Address of stack storage for fp regs
  11.278 + */
  11.279 +GLOBAL_ENTRY(ia64_save_scratch_fpregs)
  11.280 +	alloc r3=ar.pfs,1,0,0,0
  11.281 +	add r2=16,in0
  11.282 +	;;
  11.283 +	stf.spill [in0] = f10,32
  11.284 +	stf.spill [r2]  = f11,32
  11.285 +	;;
  11.286 +	stf.spill [in0] = f12,32
  11.287 +	stf.spill [r2]  = f13,32
  11.288 +	;;
  11.289 +	stf.spill [in0] = f14,32
  11.290 +	stf.spill [r2]  = f15,32
  11.291 +	br.ret.sptk.many rp
  11.292 +END(ia64_save_scratch_fpregs)
  11.293 +
  11.294 +/*
  11.295 + * Load scratch fp scratch regs (fp10-fp15)
  11.296 + *
  11.297 + * Inputs:
  11.298 + *      in0	Address of stack storage for fp regs
  11.299 + */
  11.300 +GLOBAL_ENTRY(ia64_load_scratch_fpregs)
  11.301 +	alloc r3=ar.pfs,1,0,0,0
  11.302 +	add r2=16,in0
  11.303 +	;;
  11.304 +	ldf.fill  f10 = [in0],32
  11.305 +	ldf.fill  f11 = [r2],32
  11.306 +	;;
  11.307 +	ldf.fill  f12 = [in0],32
  11.308 +	ldf.fill  f13 = [r2],32
  11.309 +	;;
  11.310 +	ldf.fill  f14 = [in0],32
  11.311 +	ldf.fill  f15 = [r2],32
  11.312 +	br.ret.sptk.many rp
  11.313 +END(ia64_load_scratch_fpregs)
    12.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    12.2 +++ b/xen/arch/ia64/linux-xen/setup.c	Tue Aug 16 12:15:23 2005 +0800
    12.3 @@ -0,0 +1,773 @@
    12.4 +/*
    12.5 + * Architecture-specific setup.
    12.6 + *
    12.7 + * Copyright (C) 1998-2001, 2003-2004 Hewlett-Packard Co
    12.8 + *	David Mosberger-Tang <davidm@hpl.hp.com>
    12.9 + *	Stephane Eranian <eranian@hpl.hp.com>
   12.10 + * Copyright (C) 2000, Rohit Seth <rohit.seth@intel.com>
   12.11 + * Copyright (C) 1999 VA Linux Systems
   12.12 + * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
   12.13 + *
   12.14 + * 11/12/01 D.Mosberger Convert get_cpuinfo() to seq_file based show_cpuinfo().
   12.15 + * 04/04/00 D.Mosberger renamed cpu_initialized to cpu_online_map
   12.16 + * 03/31/00 R.Seth	cpu_initialized and current->processor fixes
   12.17 + * 02/04/00 D.Mosberger	some more get_cpuinfo fixes...
   12.18 + * 02/01/00 R.Seth	fixed get_cpuinfo for SMP
   12.19 + * 01/07/99 S.Eranian	added the support for command line argument
   12.20 + * 06/24/99 W.Drummond	added boot_cpu_data.
   12.21 + */
   12.22 +#include <linux/config.h>
   12.23 +#include <linux/module.h>
   12.24 +#include <linux/init.h>
   12.25 +
   12.26 +#include <linux/acpi.h>
   12.27 +#include <linux/bootmem.h>
   12.28 +#include <linux/console.h>
   12.29 +#include <linux/delay.h>
   12.30 +#include <linux/kernel.h>
   12.31 +#include <linux/reboot.h>
   12.32 +#include <linux/sched.h>
   12.33 +#include <linux/seq_file.h>
   12.34 +#include <linux/string.h>
   12.35 +#include <linux/threads.h>
   12.36 +#include <linux/tty.h>
   12.37 +#include <linux/serial.h>
   12.38 +#include <linux/serial_core.h>
   12.39 +#include <linux/efi.h>
   12.40 +#include <linux/initrd.h>
   12.41 +
   12.42 +#include <asm/ia32.h>
   12.43 +#include <asm/machvec.h>
   12.44 +#include <asm/mca.h>
   12.45 +#include <asm/meminit.h>
   12.46 +#include <asm/page.h>
   12.47 +#include <asm/patch.h>
   12.48 +#include <asm/pgtable.h>
   12.49 +#include <asm/processor.h>
   12.50 +#include <asm/sal.h>
   12.51 +#include <asm/sections.h>
   12.52 +#include <asm/serial.h>
   12.53 +#include <asm/setup.h>
   12.54 +#include <asm/smp.h>
   12.55 +#include <asm/system.h>
   12.56 +#include <asm/unistd.h>
   12.57 +#ifdef CONFIG_VTI
   12.58 +#include <asm/vmx.h>
   12.59 +#endif // CONFIG_VTI
   12.60 +#include <asm/io.h>
   12.61 +
   12.62 +#if defined(CONFIG_SMP) && (IA64_CPU_SIZE > PAGE_SIZE)
   12.63 +# error "struct cpuinfo_ia64 too big!"
   12.64 +#endif
   12.65 +
   12.66 +#ifdef CONFIG_SMP
   12.67 +unsigned long __per_cpu_offset[NR_CPUS];
   12.68 +EXPORT_SYMBOL(__per_cpu_offset);
   12.69 +#endif
   12.70 +
   12.71 +DEFINE_PER_CPU(struct cpuinfo_ia64, cpu_info);
   12.72 +DEFINE_PER_CPU(unsigned long, local_per_cpu_offset);
   12.73 +DEFINE_PER_CPU(unsigned long, ia64_phys_stacked_size_p8);
   12.74 +unsigned long ia64_cycles_per_usec;
   12.75 +struct ia64_boot_param *ia64_boot_param;
   12.76 +struct screen_info screen_info;
   12.77 +
   12.78 +unsigned long ia64_max_cacheline_size;
   12.79 +unsigned long ia64_iobase;	/* virtual address for I/O accesses */
   12.80 +EXPORT_SYMBOL(ia64_iobase);
   12.81 +struct io_space io_space[MAX_IO_SPACES];
   12.82 +EXPORT_SYMBOL(io_space);
   12.83 +unsigned int num_io_spaces;
   12.84 +
   12.85 +unsigned char aux_device_present = 0xaa;        /* XXX remove this when legacy I/O is gone */
   12.86 +
   12.87 +/*
   12.88 + * The merge_mask variable needs to be set to (max(iommu_page_size(iommu)) - 1).  This
   12.89 + * mask specifies a mask of address bits that must be 0 in order for two buffers to be
   12.90 + * mergeable by the I/O MMU (i.e., the end address of the first buffer and the start
   12.91 + * address of the second buffer must be aligned to (merge_mask+1) in order to be
   12.92 + * mergeable).  By default, we assume there is no I/O MMU which can merge physically
   12.93 + * discontiguous buffers, so we set the merge_mask to ~0UL, which corresponds to a iommu
   12.94 + * page-size of 2^64.
   12.95 + */
   12.96 +unsigned long ia64_max_iommu_merge_mask = ~0UL;
   12.97 +EXPORT_SYMBOL(ia64_max_iommu_merge_mask);
   12.98 +
   12.99 +/*
  12.100 + * We use a special marker for the end of memory and it uses the extra (+1) slot
  12.101 + */
  12.102 +struct rsvd_region rsvd_region[IA64_MAX_RSVD_REGIONS + 1];
  12.103 +int num_rsvd_regions;
  12.104 +
  12.105 +
  12.106 +/*
  12.107 + * Filter incoming memory segments based on the primitive map created from the boot
  12.108 + * parameters. Segments contained in the map are removed from the memory ranges. A
  12.109 + * caller-specified function is called with the memory ranges that remain after filtering.
  12.110 + * This routine does not assume the incoming segments are sorted.
  12.111 + */
  12.112 +int
  12.113 +filter_rsvd_memory (unsigned long start, unsigned long end, void *arg)
  12.114 +{
  12.115 +	unsigned long range_start, range_end, prev_start;
  12.116 +	void (*func)(unsigned long, unsigned long, int);
  12.117 +	int i;
  12.118 +
  12.119 +#if IGNORE_PFN0
  12.120 +	if (start == PAGE_OFFSET) {
  12.121 +		printk(KERN_WARNING "warning: skipping physical page 0\n");
  12.122 +		start += PAGE_SIZE;
  12.123 +		if (start >= end) return 0;
  12.124 +	}
  12.125 +#endif
  12.126 +	/*
  12.127 +	 * lowest possible address(walker uses virtual)
  12.128 +	 */
  12.129 +	prev_start = PAGE_OFFSET;
  12.130 +	func = arg;
  12.131 +
  12.132 +	for (i = 0; i < num_rsvd_regions; ++i) {
  12.133 +		range_start = max(start, prev_start);
  12.134 +		range_end   = min(end, rsvd_region[i].start);
  12.135 +
  12.136 +		if (range_start < range_end)
  12.137 +#ifdef XEN
  12.138 +		{
  12.139 +		/* init_boot_pages requires "ps, pe" */
  12.140 +			printk("Init boot pages: 0x%lx -> 0x%lx.\n",
  12.141 +				__pa(range_start), __pa(range_end));
  12.142 +			(*func)(__pa(range_start), __pa(range_end), 0);
  12.143 +		}
  12.144 +#else
  12.145 +			call_pernode_memory(__pa(range_start), range_end - range_start, func);
  12.146 +#endif
  12.147 +
  12.148 +		/* nothing more available in this segment */
  12.149 +		if (range_end == end) return 0;
  12.150 +
  12.151 +		prev_start = rsvd_region[i].end;
  12.152 +	}
  12.153 +	/* end of memory marker allows full processing inside loop body */
  12.154 +	return 0;
  12.155 +}
  12.156 +
  12.157 +static void
  12.158 +sort_regions (struct rsvd_region *rsvd_region, int max)
  12.159 +{
  12.160 +	int j;
  12.161 +
  12.162 +	/* simple bubble sorting */
  12.163 +	while (max--) {
  12.164 +		for (j = 0; j < max; ++j) {
  12.165 +			if (rsvd_region[j].start > rsvd_region[j+1].start) {
  12.166 +				struct rsvd_region tmp;
  12.167 +				tmp = rsvd_region[j];
  12.168 +				rsvd_region[j] = rsvd_region[j + 1];
  12.169 +				rsvd_region[j + 1] = tmp;
  12.170 +			}
  12.171 +		}
  12.172 +	}
  12.173 +}
  12.174 +
  12.175 +/**
  12.176 + * reserve_memory - setup reserved memory areas
  12.177 + *
  12.178 + * Setup the reserved memory areas set aside for the boot parameters,
  12.179 + * initrd, etc.  There are currently %IA64_MAX_RSVD_REGIONS defined,
  12.180 + * see include/asm-ia64/meminit.h if you need to define more.
  12.181 + */
  12.182 +void
  12.183 +reserve_memory (void)
  12.184 +{
  12.185 +	int n = 0;
  12.186 +
  12.187 +	/*
  12.188 +	 * none of the entries in this table overlap
  12.189 +	 */
  12.190 +	rsvd_region[n].start = (unsigned long) ia64_boot_param;
  12.191 +	rsvd_region[n].end   = rsvd_region[n].start + sizeof(*ia64_boot_param);
  12.192 +	n++;
  12.193 +
  12.194 +	rsvd_region[n].start = (unsigned long) __va(ia64_boot_param->efi_memmap);
  12.195 +	rsvd_region[n].end   = rsvd_region[n].start + ia64_boot_param->efi_memmap_size;
  12.196 +	n++;
  12.197 +
  12.198 +	rsvd_region[n].start = (unsigned long) __va(ia64_boot_param->command_line);
  12.199 +	rsvd_region[n].end   = (rsvd_region[n].start
  12.200 +				+ strlen(__va(ia64_boot_param->command_line)) + 1);
  12.201 +	n++;
  12.202 +
  12.203 +	rsvd_region[n].start = (unsigned long) ia64_imva((void *)KERNEL_START);
  12.204 +#ifdef XEN
  12.205 +	/* Reserve xen image/bitmap/xen-heap */
  12.206 +	rsvd_region[n].end   = rsvd_region[n].start + xenheap_size;
  12.207 +#else
  12.208 +	rsvd_region[n].end   = (unsigned long) ia64_imva(_end);
  12.209 +#endif
  12.210 +	n++;
  12.211 +
  12.212 +#ifdef CONFIG_BLK_DEV_INITRD
  12.213 +	if (ia64_boot_param->initrd_start) {
  12.214 +		rsvd_region[n].start = (unsigned long)__va(ia64_boot_param->initrd_start);
  12.215 +		rsvd_region[n].end   = rsvd_region[n].start + ia64_boot_param->initrd_size;
  12.216 +		n++;
  12.217 +	}
  12.218 +#endif
  12.219 +
  12.220 +	/* end of memory marker */
  12.221 +	rsvd_region[n].start = ~0UL;
  12.222 +	rsvd_region[n].end   = ~0UL;
  12.223 +	n++;
  12.224 +
  12.225 +	num_rsvd_regions = n;
  12.226 +
  12.227 +	sort_regions(rsvd_region, num_rsvd_regions);
  12.228 +}
  12.229 +
  12.230 +/**
  12.231 + * find_initrd - get initrd parameters from the boot parameter structure
  12.232 + *
  12.233 + * Grab the initrd start and end from the boot parameter struct given us by
  12.234 + * the boot loader.
  12.235 + */
  12.236 +void
  12.237 +find_initrd (void)
  12.238 +{
  12.239 +#ifdef CONFIG_BLK_DEV_INITRD
  12.240 +	if (ia64_boot_param->initrd_start) {
  12.241 +		initrd_start = (unsigned long)__va(ia64_boot_param->initrd_start);
  12.242 +		initrd_end   = initrd_start+ia64_boot_param->initrd_size;
  12.243 +
  12.244 +		printk(KERN_INFO "Initial ramdisk at: 0x%lx (%lu bytes)\n",
  12.245 +		       initrd_start, ia64_boot_param->initrd_size);
  12.246 +	}
  12.247 +#endif
  12.248 +}
  12.249 +
  12.250 +static void __init
  12.251 +io_port_init (void)
  12.252 +{
  12.253 +	extern unsigned long ia64_iobase;
  12.254 +	unsigned long phys_iobase;
  12.255 +
  12.256 +	/*
  12.257 +	 *  Set `iobase' to the appropriate address in region 6 (uncached access range).
  12.258 +	 *
  12.259 +	 *  The EFI memory map is the "preferred" location to get the I/O port space base,
  12.260 +	 *  rather the relying on AR.KR0. This should become more clear in future SAL
  12.261 +	 *  specs. We'll fall back to getting it out of AR.KR0 if no appropriate entry is
  12.262 +	 *  found in the memory map.
  12.263 +	 */
  12.264 +	phys_iobase = efi_get_iobase();
  12.265 +	if (phys_iobase)
  12.266 +		/* set AR.KR0 since this is all we use it for anyway */
  12.267 +		ia64_set_kr(IA64_KR_IO_BASE, phys_iobase);
  12.268 +	else {
  12.269 +		phys_iobase = ia64_get_kr(IA64_KR_IO_BASE);
  12.270 +		printk(KERN_INFO "No I/O port range found in EFI memory map, falling back "
  12.271 +		       "to AR.KR0\n");
  12.272 +		printk(KERN_INFO "I/O port base = 0x%lx\n", phys_iobase);
  12.273 +	}
  12.274 +	ia64_iobase = (unsigned long) ioremap(phys_iobase, 0);
  12.275 +
  12.276 +	/* setup legacy IO port space */
  12.277 +	io_space[0].mmio_base = ia64_iobase;
  12.278 +	io_space[0].sparse = 1;
  12.279 +	num_io_spaces = 1;
  12.280 +}
  12.281 +
  12.282 +/**
  12.283 + * early_console_setup - setup debugging console
  12.284 + *
  12.285 + * Consoles started here require little enough setup that we can start using
  12.286 + * them very early in the boot process, either right after the machine
  12.287 + * vector initialization, or even before if the drivers can detect their hw.
  12.288 + *
  12.289 + * Returns non-zero if a console couldn't be setup.
  12.290 + */
  12.291 +static inline int __init
  12.292 +early_console_setup (char *cmdline)
  12.293 +{
  12.294 +#ifdef CONFIG_SERIAL_SGI_L1_CONSOLE
  12.295 +	{
  12.296 +		extern int sn_serial_console_early_setup(void);
  12.297 +		if (!sn_serial_console_early_setup())
  12.298 +			return 0;
  12.299 +	}
  12.300 +#endif
  12.301 +#ifdef CONFIG_EFI_PCDP
  12.302 +	if (!efi_setup_pcdp_console(cmdline))
  12.303 +		return 0;
  12.304 +#endif
  12.305 +#ifdef CONFIG_SERIAL_8250_CONSOLE
  12.306 +	if (!early_serial_console_init(cmdline))
  12.307 +		return 0;
  12.308 +#endif
  12.309 +
  12.310 +	return -1;
  12.311 +}
  12.312 +
  12.313 +static inline void
  12.314 +mark_bsp_online (void)
  12.315 +{
  12.316 +#ifdef CONFIG_SMP
  12.317 +	/* If we register an early console, allow CPU 0 to printk */
  12.318 +	cpu_set(smp_processor_id(), cpu_online_map);
  12.319 +#endif
  12.320 +}
  12.321 +
  12.322 +void __init
  12.323 +#ifdef XEN
  12.324 +early_setup_arch (char **cmdline_p)
  12.325 +#else
  12.326 +setup_arch (char **cmdline_p)
  12.327 +#endif
  12.328 +{
  12.329 +	unw_init();
  12.330 +
  12.331 +	ia64_patch_vtop((u64) __start___vtop_patchlist, (u64) __end___vtop_patchlist);
  12.332 +
  12.333 +	*cmdline_p = __va(ia64_boot_param->command_line);
  12.334 +#ifdef XEN
  12.335 +	efi_init();
  12.336 +#else
  12.337 +	strlcpy(saved_command_line, *cmdline_p, COMMAND_LINE_SIZE);
  12.338 +
  12.339 +	efi_init();
  12.340 +	io_port_init();
  12.341 +#endif
  12.342 +
  12.343 +#ifdef CONFIG_IA64_GENERIC
  12.344 +	{
  12.345 +		const char *mvec_name = strstr (*cmdline_p, "machvec=");
  12.346 +		char str[64];
  12.347 +
  12.348 +		if (mvec_name) {
  12.349 +			const char *end;
  12.350 +			size_t len;
  12.351 +
  12.352 +			mvec_name += 8;
  12.353 +			end = strchr (mvec_name, ' ');
  12.354 +			if (end)
  12.355 +				len = end - mvec_name;
  12.356 +			else
  12.357 +				len = strlen (mvec_name);
  12.358 +			len = min(len, sizeof (str) - 1);
  12.359 +			strncpy (str, mvec_name, len);
  12.360 +			str[len] = '\0';
  12.361 +			mvec_name = str;
  12.362 +		} else
  12.363 +			mvec_name = acpi_get_sysname();
  12.364 +		machvec_init(mvec_name);
  12.365 +	}
  12.366 +#endif
  12.367 +
  12.368 +#ifdef XEN
  12.369 +	early_cmdline_parse(cmdline_p);
  12.370 +	cmdline_parse(*cmdline_p);
  12.371 +#undef CONFIG_ACPI_BOOT
  12.372 +#endif
  12.373 +	if (early_console_setup(*cmdline_p) == 0)
  12.374 +		mark_bsp_online();
  12.375 +
  12.376 +#ifdef CONFIG_ACPI_BOOT
  12.377 +	/* Initialize the ACPI boot-time table parser */
  12.378 +	acpi_table_init();
  12.379 +# ifdef CONFIG_ACPI_NUMA
  12.380 +	acpi_numa_init();
  12.381 +# endif
  12.382 +#else
  12.383 +# ifdef CONFIG_SMP
  12.384 +	smp_build_cpu_map();	/* happens, e.g., with the Ski simulator */
  12.385 +# endif
  12.386 +#endif /* CONFIG_APCI_BOOT */
  12.387 +
  12.388 +#ifndef XEN
  12.389 +	find_memory();
  12.390 +#else
  12.391 +	io_port_init();
  12.392 +}
  12.393 +
  12.394 +void __init
  12.395 +late_setup_arch (char **cmdline_p)
  12.396 +{
  12.397 +#undef CONFIG_ACPI_BOOT
  12.398 +	acpi_table_init();
  12.399 +#endif
  12.400 +	/* process SAL system table: */
  12.401 +	ia64_sal_init(efi.sal_systab);
  12.402 +
  12.403 +#ifdef CONFIG_SMP
  12.404 +	cpu_physical_id(0) = hard_smp_processor_id();
  12.405 +#endif
  12.406 +
  12.407 +#ifdef CONFIG_VTI
  12.408 +	identify_vmx_feature();
  12.409 +#endif // CONFIG_VTI
  12.410 +
  12.411 +	cpu_init();	/* initialize the bootstrap CPU */
  12.412 +
  12.413 +#ifdef CONFIG_ACPI_BOOT
  12.414 +	acpi_boot_init();
  12.415 +#endif
  12.416 +
  12.417 +#ifdef CONFIG_VT
  12.418 +	if (!conswitchp) {
  12.419 +# if defined(CONFIG_DUMMY_CONSOLE)
  12.420 +		conswitchp = &dummy_con;
  12.421 +# endif
  12.422 +# if defined(CONFIG_VGA_CONSOLE)
  12.423 +		/*
  12.424 +		 * Non-legacy systems may route legacy VGA MMIO range to system
  12.425 +		 * memory.  vga_con probes the MMIO hole, so memory looks like
  12.426 +		 * a VGA device to it.  The EFI memory map can tell us if it's
  12.427 +		 * memory so we can avoid this problem.
  12.428 +		 */
  12.429 +		if (efi_mem_type(0xA0000) != EFI_CONVENTIONAL_MEMORY)
  12.430 +			conswitchp = &vga_con;
  12.431 +# endif
  12.432 +	}
  12.433 +#endif
  12.434 +
  12.435 +	/* enable IA-64 Machine Check Abort Handling unless disabled */
  12.436 +	if (!strstr(saved_command_line, "nomca"))
  12.437 +		ia64_mca_init();
  12.438 +
  12.439 +	platform_setup(cmdline_p);
  12.440 +	paging_init();
  12.441 +}
  12.442 +
  12.443 +/*
  12.444 + * Display cpu info for all cpu's.
  12.445 + */
  12.446 +static int
  12.447 +show_cpuinfo (struct seq_file *m, void *v)
  12.448 +{
  12.449 +#ifdef CONFIG_SMP
  12.450 +#	define lpj	c->loops_per_jiffy
  12.451 +#	define cpunum	c->cpu
  12.452 +#else
  12.453 +#	define lpj	loops_per_jiffy
  12.454 +#	define cpunum	0
  12.455 +#endif
  12.456 +	static struct {
  12.457 +		unsigned long mask;
  12.458 +		const char *feature_name;
  12.459 +	} feature_bits[] = {
  12.460 +		{ 1UL << 0, "branchlong" },
  12.461 +		{ 1UL << 1, "spontaneous deferral"},
  12.462 +		{ 1UL << 2, "16-byte atomic ops" }
  12.463 +	};
  12.464 +	char family[32], features[128], *cp, sep;
  12.465 +	struct cpuinfo_ia64 *c = v;
  12.466 +	unsigned long mask;
  12.467 +	int i;
  12.468 +
  12.469 +	mask = c->features;
  12.470 +
  12.471 +	switch (c->family) {
  12.472 +	      case 0x07:	memcpy(family, "Itanium", 8); break;
  12.473 +	      case 0x1f:	memcpy(family, "Itanium 2", 10); break;
  12.474 +	      default:		sprintf(family, "%u", c->family); break;
  12.475 +	}
  12.476 +
  12.477 +	/* build the feature string: */
  12.478 +	memcpy(features, " standard", 10);
  12.479 +	cp = features;
  12.480 +	sep = 0;
  12.481 +	for (i = 0; i < (int) ARRAY_SIZE(feature_bits); ++i) {
  12.482 +		if (mask & feature_bits[i].mask) {
  12.483 +			if (sep)
  12.484 +				*cp++ = sep;
  12.485 +			sep = ',';
  12.486 +			*cp++ = ' ';
  12.487 +			strcpy(cp, feature_bits[i].feature_name);
  12.488 +			cp += strlen(feature_bits[i].feature_name);
  12.489 +			mask &= ~feature_bits[i].mask;
  12.490 +		}
  12.491 +	}
  12.492 +	if (mask) {
  12.493 +		/* print unknown features as a hex value: */
  12.494 +		if (sep)
  12.495 +			*cp++ = sep;
  12.496 +		sprintf(cp, " 0x%lx", mask);
  12.497 +	}
  12.498 +
  12.499 +	seq_printf(m,
  12.500 +		   "processor  : %d\n"
  12.501 +		   "vendor     : %s\n"
  12.502 +		   "arch       : IA-64\n"
  12.503 +		   "family     : %s\n"
  12.504 +		   "model      : %u\n"
  12.505 +		   "revision   : %u\n"
  12.506 +		   "archrev    : %u\n"
  12.507 +		   "features   :%s\n"	/* don't change this---it _is_ right! */
  12.508 +		   "cpu number : %lu\n"
  12.509 +		   "cpu regs   : %u\n"
  12.510 +		   "cpu MHz    : %lu.%06lu\n"
  12.511 +		   "itc MHz    : %lu.%06lu\n"
  12.512 +		   "BogoMIPS   : %lu.%02lu\n\n",
  12.513 +		   cpunum, c->vendor, family, c->model, c->revision, c->archrev,
  12.514 +		   features, c->ppn, c->number,
  12.515 +		   c->proc_freq / 1000000, c->proc_freq % 1000000,
  12.516 +		   c->itc_freq / 1000000, c->itc_freq % 1000000,
  12.517 +		   lpj*HZ/500000, (lpj*HZ/5000) % 100);
  12.518 +	return 0;
  12.519 +}
  12.520 +
  12.521 +static void *
  12.522 +c_start (struct seq_file *m, loff_t *pos)
  12.523 +{
  12.524 +#ifdef CONFIG_SMP
  12.525 +	while (*pos < NR_CPUS && !cpu_isset(*pos, cpu_online_map))
  12.526 +		++*pos;
  12.527 +#endif
  12.528 +	return *pos < NR_CPUS ? cpu_data(*pos) : NULL;
  12.529 +}
  12.530 +
  12.531 +static void *
  12.532 +c_next (struct seq_file *m, void *v, loff_t *pos)
  12.533 +{
  12.534 +	++*pos;
  12.535 +	return c_start(m, pos);
  12.536 +}
  12.537 +
  12.538 +static void
  12.539 +c_stop (struct seq_file *m, void *v)
  12.540 +{
  12.541 +}
  12.542 +
  12.543 +#ifndef XEN
  12.544 +struct seq_operations cpuinfo_op = {
  12.545 +	.start =	c_start,
  12.546 +	.next =		c_next,
  12.547 +	.stop =		c_stop,
  12.548 +	.show =		show_cpuinfo
  12.549 +};
  12.550 +#endif
  12.551 +
  12.552 +void
  12.553 +identify_cpu (struct cpuinfo_ia64 *c)
  12.554 +{
  12.555 +	union {
  12.556 +		unsigned long bits[5];
  12.557 +		struct {
  12.558 +			/* id 0 & 1: */
  12.559 +			char vendor[16];
  12.560 +
  12.561 +			/* id 2 */
  12.562 +			u64 ppn;		/* processor serial number */
  12.563 +
  12.564 +			/* id 3: */
  12.565 +			unsigned number		:  8;
  12.566 +			unsigned revision	:  8;
  12.567 +			unsigned model		:  8;
  12.568 +			unsigned family		:  8;
  12.569 +			unsigned archrev	:  8;
  12.570 +			unsigned reserved	: 24;
  12.571 +
  12.572 +			/* id 4: */
  12.573 +			u64 features;
  12.574 +		} field;
  12.575 +	} cpuid;
  12.576 +	pal_vm_info_1_u_t vm1;
  12.577 +	pal_vm_info_2_u_t vm2;
  12.578 +	pal_status_t status;
  12.579 +	unsigned long impl_va_msb = 50, phys_addr_size = 44;	/* Itanium defaults */
  12.580 +	int i;
  12.581 +
  12.582 +	for (i = 0; i < 5; ++i)
  12.583 +		cpuid.bits[i] = ia64_get_cpuid(i);
  12.584 +
  12.585 +	memcpy(c->vendor, cpuid.field.vendor, 16);
  12.586 +#ifdef CONFIG_SMP
  12.587 +	c->cpu = smp_processor_id();
  12.588 +#endif
  12.589 +	c->ppn = cpuid.field.ppn;
  12.590 +	c->number = cpuid.field.number;
  12.591 +	c->revision = cpuid.field.revision;
  12.592 +	c->model = cpuid.field.model;
  12.593 +	c->family = cpuid.field.family;
  12.594 +	c->archrev = cpuid.field.archrev;
  12.595 +	c->features = cpuid.field.features;
  12.596 +
  12.597 +	status = ia64_pal_vm_summary(&vm1, &vm2);
  12.598 +	if (status == PAL_STATUS_SUCCESS) {
  12.599 +		impl_va_msb = vm2.pal_vm_info_2_s.impl_va_msb;
  12.600 +		phys_addr_size = vm1.pal_vm_info_1_s.phys_add_size;
  12.601 +	}
  12.602 +	c->unimpl_va_mask = ~((7L<<61) | ((1L << (impl_va_msb + 1)) - 1));
  12.603 +	c->unimpl_pa_mask = ~((1L<<63) | ((1L << phys_addr_size) - 1));
  12.604 +
  12.605 +#ifdef CONFIG_VTI
  12.606 +	/* If vmx feature is on, do necessary initialization for vmx */
  12.607 +	if (vmx_enabled)
  12.608 +		vmx_init_env();
  12.609 +#endif
  12.610 +}
  12.611 +
  12.612 +void
  12.613 +setup_per_cpu_areas (void)
  12.614 +{
  12.615 +	/* start_kernel() requires this... */
  12.616 +}
  12.617 +
  12.618 +static void
  12.619 +get_max_cacheline_size (void)
  12.620 +{
  12.621 +	unsigned long line_size, max = 1;
  12.622 +	u64 l, levels, unique_caches;
  12.623 +        pal_cache_config_info_t cci;
  12.624 +        s64 status;
  12.625 +
  12.626 +        status = ia64_pal_cache_summary(&levels, &unique_caches);
  12.627 +        if (status != 0) {
  12.628 +                printk(KERN_ERR "%s: ia64_pal_cache_summary() failed (status=%ld)\n",
  12.629 +                       __FUNCTION__, status);
  12.630 +                max = SMP_CACHE_BYTES;
  12.631 +		goto out;
  12.632 +        }
  12.633 +
  12.634 +	for (l = 0; l < levels; ++l) {
  12.635 +		status = ia64_pal_cache_config_info(l, /* cache_type (data_or_unified)= */ 2,
  12.636 +						    &cci);
  12.637 +		if (status != 0) {
  12.638 +			printk(KERN_ERR
  12.639 +			       "%s: ia64_pal_cache_config_info(l=%lu) failed (status=%ld)\n",
  12.640 +			       __FUNCTION__, l, status);
  12.641 +			max = SMP_CACHE_BYTES;
  12.642 +		}
  12.643 +		line_size = 1 << cci.pcci_line_size;
  12.644 +		if (line_size > max)
  12.645 +			max = line_size;
  12.646 +        }
  12.647 +  out:
  12.648 +	if (max > ia64_max_cacheline_size)
  12.649 +		ia64_max_cacheline_size = max;
  12.650 +}
  12.651 +
  12.652 +/*
  12.653 + * cpu_init() initializes state that is per-CPU.  This function acts
  12.654 + * as a 'CPU state barrier', nothing should get across.
  12.655 + */
  12.656 +void
  12.657 +cpu_init (void)
  12.658 +{
  12.659 +	extern void __devinit ia64_mmu_init (void *);
  12.660 +	unsigned long num_phys_stacked;
  12.661 +	pal_vm_info_2_u_t vmi;
  12.662 +	unsigned int max_ctx;
  12.663 +	struct cpuinfo_ia64 *cpu_info;
  12.664 +	void *cpu_data;
  12.665 +
  12.666 +	cpu_data = per_cpu_init();
  12.667 +
  12.668 +	/*
  12.669 +	 * We set ar.k3 so that assembly code in MCA handler can compute
  12.670 +	 * physical addresses of per cpu variables with a simple:
  12.671 +	 *   phys = ar.k3 + &per_cpu_var
  12.672 +	 */
  12.673 +	ia64_set_kr(IA64_KR_PER_CPU_DATA,
  12.674 +		    ia64_tpa(cpu_data) - (long) __per_cpu_start);
  12.675 +
  12.676 +	get_max_cacheline_size();
  12.677 +
  12.678 +	/*
  12.679 +	 * We can't pass "local_cpu_data" to identify_cpu() because we haven't called
  12.680 +	 * ia64_mmu_init() yet.  And we can't call ia64_mmu_init() first because it
  12.681 +	 * depends on the data returned by identify_cpu().  We break the dependency by
  12.682 +	 * accessing cpu_data() through the canonical per-CPU address.
  12.683 +	 */
  12.684 +	cpu_info = cpu_data + ((char *) &__ia64_per_cpu_var(cpu_info) - __per_cpu_start);
  12.685 +	identify_cpu(cpu_info);
  12.686 +
  12.687 +#ifdef CONFIG_MCKINLEY
  12.688 +	{
  12.689 +#		define FEATURE_SET 16
  12.690 +		struct ia64_pal_retval iprv;
  12.691 +
  12.692 +		if (cpu_info->family == 0x1f) {
  12.693 +			PAL_CALL_PHYS(iprv, PAL_PROC_GET_FEATURES, 0, FEATURE_SET, 0);
  12.694 +			if ((iprv.status == 0) && (iprv.v0 & 0x80) && (iprv.v2 & 0x80))
  12.695 +				PAL_CALL_PHYS(iprv, PAL_PROC_SET_FEATURES,
  12.696 +				              (iprv.v1 | 0x80), FEATURE_SET, 0);
  12.697 +		}
  12.698 +	}
  12.699 +#endif
  12.700 +
  12.701 +	/* Clear the stack memory reserved for pt_regs: */
  12.702 +	memset(ia64_task_regs(current), 0, sizeof(struct pt_regs));
  12.703 +
  12.704 +	ia64_set_kr(IA64_KR_FPU_OWNER, 0);
  12.705 +
  12.706 +	/*
  12.707 +	 * Initialize default control register to defer all speculative faults.  The
  12.708 +	 * kernel MUST NOT depend on a particular setting of these bits (in other words,
  12.709 +	 * the kernel must have recovery code for all speculative accesses).  Turn on
  12.710 +	 * dcr.lc as per recommendation by the architecture team.  Most IA-32 apps
  12.711 +	 * shouldn't be affected by this (moral: keep your ia32 locks aligned and you'll
  12.712 +	 * be fine).
  12.713 +	 */
  12.714 +	ia64_setreg(_IA64_REG_CR_DCR,  (  IA64_DCR_DP | IA64_DCR_DK | IA64_DCR_DX | IA64_DCR_DR
  12.715 +					| IA64_DCR_DA | IA64_DCR_DD | IA64_DCR_LC));
  12.716 +	atomic_inc(&init_mm.mm_count);
  12.717 +	current->active_mm = &init_mm;
  12.718 +#ifdef XEN
  12.719 +	if (current->domain->arch.mm)
  12.720 +#else
  12.721 +	if (current->mm)
  12.722 +#endif
  12.723 +		BUG();
  12.724 +
  12.725 +	ia64_mmu_init(ia64_imva(cpu_data));
  12.726 +	ia64_mca_cpu_init(ia64_imva(cpu_data));
  12.727 +
  12.728 +#ifdef CONFIG_IA32_SUPPORT
  12.729 +	ia32_cpu_init();
  12.730 +#endif
  12.731 +
  12.732 +	/* Clear ITC to eliminiate sched_clock() overflows in human time.  */
  12.733 +	ia64_set_itc(0);
  12.734 +
  12.735 +	/* disable all local interrupt sources: */
  12.736 +	ia64_set_itv(1 << 16);
  12.737 +	ia64_set_lrr0(1 << 16);
  12.738 +	ia64_set_lrr1(1 << 16);
  12.739 +	ia64_setreg(_IA64_REG_CR_PMV, 1 << 16);
  12.740 +	ia64_setreg(_IA64_REG_CR_CMCV, 1 << 16);
  12.741 +
  12.742 +	/* clear TPR & XTP to enable all interrupt classes: */
  12.743 +	ia64_setreg(_IA64_REG_CR_TPR, 0);
  12.744 +#ifdef CONFIG_SMP
  12.745 +	normal_xtp();
  12.746 +#endif
  12.747 +
  12.748 +	/* set ia64_ctx.max_rid to the maximum RID that is supported by all CPUs: */
  12.749 +	if (ia64_pal_vm_summary(NULL, &vmi) == 0)
  12.750 +		max_ctx = (1U << (vmi.pal_vm_info_2_s.rid_size - 3)) - 1;
  12.751 +	else {
  12.752 +		printk(KERN_WARNING "cpu_init: PAL VM summary failed, assuming 18 RID bits\n");
  12.753 +		max_ctx = (1U << 15) - 1;	/* use architected minimum */
  12.754 +	}
  12.755 +	while (max_ctx < ia64_ctx.max_ctx) {
  12.756 +		unsigned int old = ia64_ctx.max_ctx;
  12.757 +		if (cmpxchg(&ia64_ctx.max_ctx, old, max_ctx) == old)
  12.758 +			break;
  12.759 +	}
  12.760 +
  12.761 +	if (ia64_pal_rse_info(&num_phys_stacked, NULL) != 0) {
  12.762 +		printk(KERN_WARNING "cpu_init: PAL RSE info failed; assuming 96 physical "
  12.763 +		       "stacked regs\n");
  12.764 +		num_phys_stacked = 96;
  12.765 +	}
  12.766 +	/* size of physical stacked register partition plus 8 bytes: */
  12.767 +	__get_cpu_var(ia64_phys_stacked_size_p8) = num_phys_stacked*8 + 8;
  12.768 +	platform_cpu_init();
  12.769 +}
  12.770 +
  12.771 +void
  12.772 +check_bugs (void)
  12.773 +{
  12.774 +	ia64_patch_mckinley_e9((unsigned long) __start___mckinley_e9_bundles,
  12.775 +			       (unsigned long) __end___mckinley_e9_bundles);
  12.776 +}
    13.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    13.2 +++ b/xen/arch/ia64/linux-xen/time.c	Tue Aug 16 12:15:23 2005 +0800
    13.3 @@ -0,0 +1,264 @@
    13.4 +/*
    13.5 + * linux/arch/ia64/kernel/time.c
    13.6 + *
    13.7 + * Copyright (C) 1998-2003 Hewlett-Packard Co
    13.8 + *	Stephane Eranian <eranian@hpl.hp.com>
    13.9 + *	David Mosberger <davidm@hpl.hp.com>
   13.10 + * Copyright (C) 1999 Don Dugger <don.dugger@intel.com>
   13.11 + * Copyright (C) 1999-2000 VA Linux Systems
   13.12 + * Copyright (C) 1999-2000 Walt Drummond <drummond@valinux.com>
   13.13 + */
   13.14 +#include <linux/config.h>
   13.15 +
   13.16 +#include <linux/cpu.h>
   13.17 +#include <linux/init.h>
   13.18 +#include <linux/kernel.h>
   13.19 +#include <linux/module.h>
   13.20 +#include <linux/profile.h>
   13.21 +#include <linux/sched.h>
   13.22 +#include <linux/time.h>
   13.23 +#include <linux/interrupt.h>
   13.24 +#include <linux/efi.h>
   13.25 +#include <linux/profile.h>
   13.26 +#include <linux/timex.h>
   13.27 +
   13.28 +#include <asm/machvec.h>
   13.29 +#include <asm/delay.h>
   13.30 +#include <asm/hw_irq.h>
   13.31 +#include <asm/ptrace.h>
   13.32 +#include <asm/sal.h>
   13.33 +#include <asm/sections.h>
   13.34 +#include <asm/system.h>
   13.35 +#ifdef XEN
   13.36 +#include <linux/jiffies.h>	// not included by xen/sched.h
   13.37 +#endif
   13.38 +
   13.39 +extern unsigned long wall_jiffies;
   13.40 +
   13.41 +u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES;
   13.42 +
   13.43 +EXPORT_SYMBOL(jiffies_64);
   13.44 +
   13.45 +#define TIME_KEEPER_ID	0	/* smp_processor_id() of time-keeper */
   13.46 +
   13.47 +#ifdef CONFIG_IA64_DEBUG_IRQ
   13.48 +
   13.49 +unsigned long last_cli_ip;
   13.50 +EXPORT_SYMBOL(last_cli_ip);
   13.51 +
   13.52 +#endif
   13.53 +
   13.54 +#ifndef XEN
   13.55 +static struct time_interpolator itc_interpolator = {
   13.56 +	.shift = 16,
   13.57 +	.mask = 0xffffffffffffffffLL,
   13.58 +	.source = TIME_SOURCE_CPU
   13.59 +};
   13.60 +
   13.61 +static irqreturn_t
   13.62 +timer_interrupt (int irq, void *dev_id, struct pt_regs *regs)
   13.63 +{
   13.64 +	unsigned long new_itm;
   13.65 +
   13.66 +	if (unlikely(cpu_is_offline(smp_processor_id()))) {
   13.67 +		return IRQ_HANDLED;
   13.68 +	}
   13.69 +
   13.70 +	platform_timer_interrupt(irq, dev_id, regs);
   13.71 +
   13.72 +	new_itm = local_cpu_data->itm_next;
   13.73 +
   13.74 +	if (!time_after(ia64_get_itc(), new_itm))
   13.75 +		printk(KERN_ERR "Oops: timer tick before it's due (itc=%lx,itm=%lx)\n",
   13.76 +		       ia64_get_itc(), new_itm);
   13.77 +
   13.78 +	profile_tick(CPU_PROFILING, regs);
   13.79 +
   13.80 +	while (1) {
   13.81 +		update_process_times(user_mode(regs));
   13.82 +
   13.83 +		new_itm += local_cpu_data->itm_delta;
   13.84 +
   13.85 +		if (smp_processor_id() == TIME_KEEPER_ID) {
   13.86 +			/*
   13.87 +			 * Here we are in the timer irq handler. We have irqs locally
   13.88 +			 * disabled, but we don't know if the timer_bh is running on
   13.89 +			 * another CPU. We need to avoid to SMP race by acquiring the
   13.90 +			 * xtime_lock.
   13.91 +			 */
   13.92 +			write_seqlock(&xtime_lock);
   13.93 +			do_timer(regs);
   13.94 +			local_cpu_data->itm_next = new_itm;
   13.95 +			write_sequnlock(&xtime_lock);
   13.96 +		} else
   13.97 +			local_cpu_data->itm_next = new_itm;
   13.98 +
   13.99 +		if (time_after(new_itm, ia64_get_itc()))
  13.100 +			break;
  13.101 +	}
  13.102 +
  13.103 +	do {
  13.104 +		/*
  13.105 +		 * If we're too close to the next clock tick for
  13.106 +		 * comfort, we increase the safety margin by
  13.107 +		 * intentionally dropping the next tick(s).  We do NOT
  13.108 +		 * update itm.next because that would force us to call
  13.109 +		 * do_timer() which in turn would let our clock run
  13.110 +		 * too fast (with the potentially devastating effect
  13.111 +		 * of losing monotony of time).
  13.112 +		 */
  13.113 +		while (!time_after(new_itm, ia64_get_itc() + local_cpu_data->itm_delta/2))
  13.114 +			new_itm += local_cpu_data->itm_delta;
  13.115 +		ia64_set_itm(new_itm);
  13.116 +		/* double check, in case we got hit by a (slow) PMI: */
  13.117 +	} while (time_after_eq(ia64_get_itc(), new_itm));
  13.118 +	return IRQ_HANDLED;
  13.119 +}
  13.120 +#endif
  13.121 +
  13.122 +/*
  13.123 + * Encapsulate access to the itm structure for SMP.
  13.124 + */
  13.125 +void
  13.126 +ia64_cpu_local_tick (void)
  13.127 +{
  13.128 +	int cpu = smp_processor_id();
  13.129 +	unsigned long shift = 0, delta;
  13.130 +
  13.131 +	/* arrange for the cycle counter to generate a timer interrupt: */
  13.132 +	ia64_set_itv(IA64_TIMER_VECTOR);
  13.133 +
  13.134 +	delta = local_cpu_data->itm_delta;
  13.135 +	/*
  13.136 +	 * Stagger the timer tick for each CPU so they don't occur all at (almost) the
  13.137 +	 * same time:
  13.138 +	 */
  13.139 +	if (cpu) {
  13.140 +		unsigned long hi = 1UL << ia64_fls(cpu);
  13.141 +		shift = (2*(cpu - hi) + 1) * delta/hi/2;
  13.142 +	}
  13.143 +	local_cpu_data->itm_next = ia64_get_itc() + delta + shift;
  13.144 +	ia64_set_itm(local_cpu_data->itm_next);
  13.145 +}
  13.146 +
  13.147 +static int nojitter;
  13.148 +
  13.149 +static int __init nojitter_setup(char *str)
  13.150 +{
  13.151 +	nojitter = 1;
  13.152 +	printk("Jitter checking for ITC timers disabled\n");
  13.153 +	return 1;
  13.154 +}
  13.155 +
  13.156 +__setup("nojitter", nojitter_setup);
  13.157 +
  13.158 +
  13.159 +void __devinit
  13.160 +ia64_init_itm (void)
  13.161 +{
  13.162 +	unsigned long platform_base_freq, itc_freq;
  13.163 +	struct pal_freq_ratio itc_ratio, proc_ratio;
  13.164 +	long status, platform_base_drift, itc_drift;
  13.165 +
  13.166 +	/*
  13.167 +	 * According to SAL v2.6, we need to use a SAL call to determine the platform base
  13.168 +	 * frequency and then a PAL call to determine the frequency ratio between the ITC
  13.169 +	 * and the base frequency.
  13.170 +	 */
  13.171 +	status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM,
  13.172 +				    &platform_base_freq, &platform_base_drift);
  13.173 +	if (status != 0) {
  13.174 +		printk(KERN_ERR "SAL_FREQ_BASE_PLATFORM failed: %s\n", ia64_sal_strerror(status));
  13.175 +	} else {
  13.176 +		status = ia64_pal_freq_ratios(&proc_ratio, NULL, &itc_ratio);
  13.177 +		if (status != 0)
  13.178 +			printk(KERN_ERR "PAL_FREQ_RATIOS failed with status=%ld\n", status);
  13.179 +	}
  13.180 +	if (status != 0) {
  13.181 +		/* invent "random" values */
  13.182 +		printk(KERN_ERR
  13.183 +		       "SAL/PAL failed to obtain frequency info---inventing reasonable values\n");
  13.184 +		platform_base_freq = 100000000;
  13.185 +		platform_base_drift = -1;	/* no drift info */
  13.186 +		itc_ratio.num = 3;
  13.187 +		itc_ratio.den = 1;
  13.188 +	}
  13.189 +	if (platform_base_freq < 40000000) {
  13.190 +		printk(KERN_ERR "Platform base frequency %lu bogus---resetting to 75MHz!\n",
  13.191 +		       platform_base_freq);
  13.192 +		platform_base_freq = 75000000;
  13.193 +		platform_base_drift = -1;
  13.194 +	}
  13.195 +	if (!proc_ratio.den)
  13.196 +		proc_ratio.den = 1;	/* avoid division by zero */
  13.197 +	if (!itc_ratio.den)
  13.198 +		itc_ratio.den = 1;	/* avoid division by zero */
  13.199 +
  13.200 +	itc_freq = (platform_base_freq*itc_ratio.num)/itc_ratio.den;
  13.201 +
  13.202 +	local_cpu_data->itm_delta = (itc_freq + HZ/2) / HZ;
  13.203 +	printk(KERN_DEBUG "CPU %d: base freq=%lu.%03luMHz, ITC ratio=%lu/%lu, "
  13.204 +	       "ITC freq=%lu.%03luMHz", smp_processor_id(),
  13.205 +	       platform_base_freq / 1000000, (platform_base_freq / 1000) % 1000,
  13.206 +	       itc_ratio.num, itc_ratio.den, itc_freq / 1000000, (itc_freq / 1000) % 1000);
  13.207 +
  13.208 +	if (platform_base_drift != -1) {
  13.209 +		itc_drift = platform_base_drift*itc_ratio.num/itc_ratio.den;
  13.210 +		printk("+/-%ldppm\n", itc_drift);
  13.211 +	} else {
  13.212 +		itc_drift = -1;
  13.213 +		printk("\n");
  13.214 +	}
  13.215 +
  13.216 +	local_cpu_data->proc_freq = (platform_base_freq*proc_ratio.num)/proc_ratio.den;
  13.217 +	local_cpu_data->itc_freq = itc_freq;
  13.218 +	local_cpu_data->cyc_per_usec = (itc_freq + USEC_PER_SEC/2) / USEC_PER_SEC;
  13.219 +	local_cpu_data->nsec_per_cyc = ((NSEC_PER_SEC<<IA64_NSEC_PER_CYC_SHIFT)
  13.220 +					+ itc_freq/2)/itc_freq;
  13.221 +
  13.222 +	if (!(sal_platform_features & IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT)) {
  13.223 +#ifndef XEN
  13.224 +		itc_interpolator.frequency = local_cpu_data->itc_freq;
  13.225 +		itc_interpolator.drift = itc_drift;
  13.226 +#ifdef CONFIG_SMP
  13.227 +		/* On IA64 in an SMP configuration ITCs are never accurately synchronized.
  13.228 +		 * Jitter compensation requires a cmpxchg which may limit
  13.229 +		 * the scalability of the syscalls for retrieving time.
  13.230 +		 * The ITC synchronization is usually successful to within a few
  13.231 +		 * ITC ticks but this is not a sure thing. If you need to improve
  13.232 +		 * timer performance in SMP situations then boot the kernel with the
  13.233 +		 * "nojitter" option. However, doing so may result in time fluctuating (maybe
  13.234 +		 * even going backward) if the ITC offsets between the individual CPUs
  13.235 +		 * are too large.
  13.236 +		 */
  13.237 +		if (!nojitter) itc_interpolator.jitter = 1;
  13.238 +#endif
  13.239 +		register_time_interpolator(&itc_interpolator);
  13.240 +#endif
  13.241 +	}
  13.242 +
  13.243 +	/* Setup the CPU local timer tick */
  13.244 +	ia64_cpu_local_tick();
  13.245 +}
  13.246 +
  13.247 +#ifndef XEN
  13.248 +static struct irqaction timer_irqaction = {
  13.249 +	.handler =	timer_interrupt,
  13.250 +	.flags =	SA_INTERRUPT,
  13.251 +	.name =		"timer"
  13.252 +};
  13.253 +
  13.254 +void __init
  13.255 +time_init (void)
  13.256 +{
  13.257 +	register_percpu_irq(IA64_TIMER_VECTOR, &timer_irqaction);
  13.258 +	efi_gettimeofday(&xtime);
  13.259 +	ia64_init_itm();
  13.260 +
  13.261 +	/*
  13.262 +	 * Initialize wall_to_monotonic such that adding it to xtime will yield zero, the
  13.263 +	 * tv_nsec field must be normalized (i.e., 0 <= nsec < NSEC_PER_SEC).
  13.264 +	 */
  13.265 +	set_normalized_timespec(&wall_to_monotonic, -xtime.tv_sec, -xtime.tv_nsec);
  13.266 +}
  13.267 +#endif
    14.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    14.2 +++ b/xen/arch/ia64/linux-xen/tlb.c	Tue Aug 16 12:15:23 2005 +0800
    14.3 @@ -0,0 +1,199 @@
    14.4 +/*
    14.5 + * TLB support routines.
    14.6 + *
    14.7 + * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co
    14.8 + *	David Mosberger-Tang <davidm@hpl.hp.com>
    14.9 + *
   14.10 + * 08/02/00 A. Mallick <asit.k.mallick@intel.com>
   14.11 + *		Modified RID allocation for SMP
   14.12 + *          Goutham Rao <goutham.rao@intel.com>
   14.13 + *              IPI based ptc implementation and A-step IPI implementation.
   14.14 + */
   14.15 +#include <linux/config.h>
   14.16 +#include <linux/module.h>
   14.17 +#include <linux/init.h>
   14.18 +#include <linux/kernel.h>
   14.19 +#include <linux/sched.h>
   14.20 +#include <linux/smp.h>
   14.21 +#include <linux/mm.h>
   14.22 +
   14.23 +#include <asm/delay.h>
   14.24 +#include <asm/mmu_context.h>
   14.25 +#include <asm/pgalloc.h>
   14.26 +#include <asm/pal.h>
   14.27 +#include <asm/tlbflush.h>
   14.28 +
   14.29 +static struct {
   14.30 +	unsigned long mask;	/* mask of supported purge page-sizes */
   14.31 +	unsigned long max_bits;	/* log2() of largest supported purge page-size */
   14.32 +} purge;
   14.33 +
   14.34 +struct ia64_ctx ia64_ctx = {
   14.35 +	.lock =		SPIN_LOCK_UNLOCKED,
   14.36 +	.next =		1,
   14.37 +	.limit =	(1 << 15) - 1,		/* start out with the safe (architected) limit */
   14.38 +	.max_ctx =	~0U
   14.39 +};
   14.40 +
   14.41 +DEFINE_PER_CPU(u8, ia64_need_tlb_flush);
   14.42 +
   14.43 +/*
   14.44 + * Acquire the ia64_ctx.lock before calling this function!
   14.45 + */
   14.46 +void
   14.47 +wrap_mmu_context (struct mm_struct *mm)
   14.48 +{
   14.49 +#ifdef XEN
   14.50 +printf("wrap_mmu_context: called, not implemented\n");
   14.51 +#else
   14.52 +	unsigned long tsk_context, max_ctx = ia64_ctx.max_ctx;
   14.53 +	struct task_struct *tsk;
   14.54 +	int i;
   14.55 +
   14.56 +	if (ia64_ctx.next > max_ctx)
   14.57 +		ia64_ctx.next = 300;	/* skip daemons */
   14.58 +	ia64_ctx.limit = max_ctx + 1;
   14.59 +
   14.60 +	/*
   14.61 +	 * Scan all the task's mm->context and set proper safe range
   14.62 +	 */
   14.63 +
   14.64 +	read_lock(&tasklist_lock);
   14.65 +  repeat:
   14.66 +	for_each_process(tsk) {
   14.67 +		if (!tsk->mm)
   14.68 +			continue;
   14.69 +		tsk_context = tsk->mm->context;
   14.70 +		if (tsk_context == ia64_ctx.next) {
   14.71 +			if (++ia64_ctx.next >= ia64_ctx.limit) {
   14.72 +				/* empty range: reset the range limit and start over */
   14.73 +				if (ia64_ctx.next > max_ctx)
   14.74 +					ia64_ctx.next = 300;
   14.75 +				ia64_ctx.limit = max_ctx + 1;
   14.76 +				goto repeat;
   14.77 +			}
   14.78 +		}
   14.79 +		if ((tsk_context > ia64_ctx.next) && (tsk_context < ia64_ctx.limit))
   14.80 +			ia64_ctx.limit = tsk_context;
   14.81 +	}
   14.82 +	read_unlock(&tasklist_lock);
   14.83 +	/* can't call flush_tlb_all() here because of race condition with O(1) scheduler [EF] */
   14.84 +	{
   14.85 +		int cpu = get_cpu(); /* prevent preemption/migration */
   14.86 +		for (i = 0; i < NR_CPUS; ++i)
   14.87 +			if (cpu_online(i) && (i != cpu))
   14.88 +				per_cpu(ia64_need_tlb_flush, i) = 1;
   14.89 +		put_cpu();
   14.90 +	}
   14.91 +	local_flush_tlb_all();
   14.92 +#endif
   14.93 +}
   14.94 +
   14.95 +void
   14.96 +ia64_global_tlb_purge (unsigned long start, unsigned long end, unsigned long nbits)
   14.97 +{
   14.98 +	static DEFINE_SPINLOCK(ptcg_lock);
   14.99 +
  14.100 +	/* HW requires global serialization of ptc.ga.  */
  14.101 +	spin_lock(&ptcg_lock);
  14.102 +	{
  14.103 +		do {
  14.104 +			/*
  14.105 +			 * Flush ALAT entries also.
  14.106 +			 */
  14.107 +			ia64_ptcga(start, (nbits<<2));
  14.108 +			ia64_srlz_i();
  14.109 +			start += (1UL << nbits);
  14.110 +		} while (start < end);
  14.111 +	}
  14.112 +	spin_unlock(&ptcg_lock);
  14.113 +}
  14.114 +
  14.115 +void
  14.116 +local_flush_tlb_all (void)
  14.117 +{
  14.118 +	unsigned long i, j, flags, count0, count1, stride0, stride1, addr;
  14.119 +
  14.120 +	addr    = local_cpu_data->ptce_base;
  14.121 +	count0  = local_cpu_data->ptce_count[0];
  14.122 +	count1  = local_cpu_data->ptce_count[1];
  14.123 +	stride0 = local_cpu_data->ptce_stride[0];
  14.124 +	stride1 = local_cpu_data->ptce_stride[1];
  14.125 +
  14.126 +	local_irq_save(flags);
  14.127 +	for (i = 0; i < count0; ++i) {
  14.128 +		for (j = 0; j < count1; ++j) {
  14.129 +			ia64_ptce(addr);
  14.130 +			addr += stride1;
  14.131 +		}
  14.132 +		addr += stride0;
  14.133 +	}
  14.134 +	local_irq_restore(flags);
  14.135 +	ia64_srlz_i();			/* srlz.i implies srlz.d */
  14.136 +}
  14.137 +EXPORT_SYMBOL(local_flush_tlb_all);
  14.138 +
  14.139 +void
  14.140 +flush_tlb_range (struct vm_area_struct *vma, unsigned long start, unsigned long end)
  14.141 +{
  14.142 +#ifdef XEN
  14.143 +printf("flush_tlb_range: called, not implemented\n");
  14.144 +#else
  14.145 +	struct mm_struct *mm = vma->vm_mm;
  14.146 +	unsigned long size = end - start;
  14.147 +	unsigned long nbits;
  14.148 +
  14.149 +	if (mm != current->active_mm) {
  14.150 +		/* this does happen, but perhaps it's not worth optimizing for? */
  14.151 +#ifdef CONFIG_SMP
  14.152 +		flush_tlb_all();
  14.153 +#else
  14.154 +		mm->context = 0;
  14.155 +#endif
  14.156 +		return;
  14.157 +	}
  14.158 +
  14.159 +	nbits = ia64_fls(size + 0xfff);
  14.160 +	while (unlikely (((1UL << nbits) & purge.mask) == 0) && (nbits < purge.max_bits))
  14.161 +		++nbits;
  14.162 +	if (nbits > purge.max_bits)
  14.163 +		nbits = purge.max_bits;
  14.164 +	start &= ~((1UL << nbits) - 1);
  14.165 +
  14.166 +# ifdef CONFIG_SMP
  14.167 +	platform_global_tlb_purge(start, end, nbits);
  14.168 +# else
  14.169 +	do {
  14.170 +		ia64_ptcl(start, (nbits<<2));
  14.171 +		start += (1UL << nbits);
  14.172 +	} while (start < end);
  14.173 +# endif
  14.174 +
  14.175 +	ia64_srlz_i();			/* srlz.i implies srlz.d */
  14.176 +#endif
  14.177 +}
  14.178 +EXPORT_SYMBOL(flush_tlb_range);
  14.179 +
  14.180 +void __devinit
  14.181 +ia64_tlb_init (void)
  14.182 +{
  14.183 +	ia64_ptce_info_t ptce_info;
  14.184 +	unsigned long tr_pgbits;
  14.185 +	long status;
  14.186 +
  14.187 +	if ((status = ia64_pal_vm_page_size(&tr_pgbits, &purge.mask)) != 0) {
  14.188 +		printk(KERN_ERR "PAL_VM_PAGE_SIZE failed with status=%ld;"
  14.189 +		       "defaulting to architected purge page-sizes.\n", status);
  14.190 +		purge.mask = 0x115557000UL;
  14.191 +	}
  14.192 +	purge.max_bits = ia64_fls(purge.mask);
  14.193 +
  14.194 +	ia64_get_ptce(&ptce_info);
  14.195 +	local_cpu_data->ptce_base = ptce_info.base;
  14.196 +	local_cpu_data->ptce_count[0] = ptce_info.count[0];
  14.197 +	local_cpu_data->ptce_count[1] = ptce_info.count[1];
  14.198 +	local_cpu_data->ptce_stride[0] = ptce_info.stride[0];
  14.199 +	local_cpu_data->ptce_stride[1] = ptce_info.stride[1];
  14.200 +
  14.201 +	local_flush_tlb_all();		/* nuke left overs from bootstrapping... */
  14.202 +}
    15.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    15.2 +++ b/xen/arch/ia64/linux-xen/unaligned.c	Tue Aug 16 12:15:23 2005 +0800
    15.3 @@ -0,0 +1,1653 @@
    15.4 +/*
    15.5 + * Architecture-specific unaligned trap handling.
    15.6 + *
    15.7 + * Copyright (C) 1999-2002, 2004 Hewlett-Packard Co
    15.8 + *	Stephane Eranian <eranian@hpl.hp.com>
    15.9 + *	David Mosberger-Tang <davidm@hpl.hp.com>
   15.10 + *
   15.11 + * 2002/12/09   Fix rotating register handling (off-by-1 error, missing fr-rotation).  Fix
   15.12 + *		get_rse_reg() to not leak kernel bits to user-level (reading an out-of-frame
   15.13 + *		stacked register returns an undefined value; it does NOT trigger a
   15.14 + *		"rsvd register fault").
   15.15 + * 2001/10/11	Fix unaligned access to rotating registers in s/w pipelined loops.
   15.16 + * 2001/08/13	Correct size of extended floats (float_fsz) from 16 to 10 bytes.
   15.17 + * 2001/01/17	Add support emulation of unaligned kernel accesses.
   15.18 + */
   15.19 +#include <linux/kernel.h>
   15.20 +#include <linux/sched.h>
   15.21 +#include <linux/smp_lock.h>
   15.22 +#include <linux/tty.h>
   15.23 +
   15.24 +#include <asm/intrinsics.h>
   15.25 +#include <asm/processor.h>
   15.26 +#include <asm/rse.h>
   15.27 +#include <asm/uaccess.h>
   15.28 +#include <asm/unaligned.h>
   15.29 +
   15.30 +extern void die_if_kernel(char *str, struct pt_regs *regs, long err) __attribute__ ((noreturn));
   15.31 +
   15.32 +#undef DEBUG_UNALIGNED_TRAP
   15.33 +
   15.34 +#ifdef DEBUG_UNALIGNED_TRAP
   15.35 +# define DPRINT(a...)	do { printk("%s %u: ", __FUNCTION__, __LINE__); printk (a); } while (0)
   15.36 +# define DDUMP(str,vp,len)	dump(str, vp, len)
   15.37 +
   15.38 +static void
   15.39 +dump (const char *str, void *vp, size_t len)
   15.40 +{
   15.41 +	unsigned char *cp = vp;
   15.42 +	int i;
   15.43 +
   15.44 +	printk("%s", str);
   15.45 +	for (i = 0; i < len; ++i)
   15.46 +		printk (" %02x", *cp++);
   15.47 +	printk("\n");
   15.48 +}
   15.49 +#else
   15.50 +# define DPRINT(a...)
   15.51 +# define DDUMP(str,vp,len)
   15.52 +#endif
   15.53 +
   15.54 +#define IA64_FIRST_STACKED_GR	32
   15.55 +#define IA64_FIRST_ROTATING_FR	32
   15.56 +#define SIGN_EXT9		0xffffffffffffff00ul
   15.57 +
   15.58 +/*
   15.59 + * For M-unit:
   15.60 + *
   15.61 + *  opcode |   m  |   x6    |
   15.62 + * --------|------|---------|
   15.63 + * [40-37] | [36] | [35:30] |
   15.64 + * --------|------|---------|
   15.65 + *     4   |   1  |    6    | = 11 bits
   15.66 + * --------------------------
   15.67 + * However bits [31:30] are not directly useful to distinguish between
   15.68 + * load/store so we can use [35:32] instead, which gives the following
   15.69 + * mask ([40:32]) using 9 bits. The 'e' comes from the fact that we defer
   15.70 + * checking the m-bit until later in the load/store emulation.
   15.71 + */
   15.72 +#define IA64_OPCODE_MASK	0x1ef
   15.73 +#define IA64_OPCODE_SHIFT	32
   15.74 +
   15.75 +/*
   15.76 + * Table C-28 Integer Load/Store
   15.77 + *
   15.78 + * We ignore [35:32]= 0x6, 0x7, 0xE, 0xF
   15.79 + *
   15.80 + * ld8.fill, st8.fill  MUST be aligned because the RNATs are based on
   15.81 + * the address (bits [8:3]), so we must failed.
   15.82 + */
   15.83 +#define LD_OP            0x080
   15.84 +#define LDS_OP           0x081
   15.85 +#define LDA_OP           0x082
   15.86 +#define LDSA_OP          0x083
   15.87 +#define LDBIAS_OP        0x084
   15.88 +#define LDACQ_OP         0x085
   15.89 +/* 0x086, 0x087 are not relevant */
   15.90 +#define LDCCLR_OP        0x088
   15.91 +#define LDCNC_OP         0x089
   15.92 +#define LDCCLRACQ_OP     0x08a
   15.93 +#define ST_OP            0x08c
   15.94 +#define STREL_OP         0x08d
   15.95 +/* 0x08e,0x8f are not relevant */
   15.96 +
   15.97 +/*
   15.98 + * Table C-29 Integer Load +Reg
   15.99 + *
  15.100 + * we use the ld->m (bit [36:36]) field to determine whether or not we have
  15.101 + * a load/store of this form.
  15.102 + */
  15.103 +
  15.104 +/*
  15.105 + * Table C-30 Integer Load/Store +Imm
  15.106 + *
  15.107 + * We ignore [35:32]= 0x6, 0x7, 0xE, 0xF
  15.108 + *
  15.109 + * ld8.fill, st8.fill  must be aligned because the Nat register are based on
  15.110 + * the address, so we must fail and the program must be fixed.
  15.111 + */
  15.112 +#define LD_IMM_OP            0x0a0
  15.113 +#define LDS_IMM_OP           0x0a1
  15.114 +#define LDA_IMM_OP           0x0a2
  15.115 +#define LDSA_IMM_OP          0x0a3
  15.116 +#define LDBIAS_IMM_OP        0x0a4
  15.117 +#define LDACQ_IMM_OP         0x0a5
  15.118 +/* 0x0a6, 0xa7 are not relevant */
  15.119 +#define LDCCLR_IMM_OP        0x0a8
  15.120 +#define LDCNC_IMM_OP         0x0a9
  15.121 +#define LDCCLRACQ_IMM_OP     0x0aa
  15.122 +#define ST_IMM_OP            0x0ac
  15.123 +#define STREL_IMM_OP         0x0ad
  15.124 +/* 0x0ae,0xaf are not relevant */
  15.125 +
  15.126 +/*
  15.127 + * Table C-32 Floating-point Load/Store
  15.128 + */
  15.129 +#define LDF_OP           0x0c0
  15.130 +#define LDFS_OP          0x0c1
  15.131 +#define LDFA_OP          0x0c2
  15.132 +#define LDFSA_OP         0x0c3
  15.133 +/* 0x0c6 is irrelevant */
  15.134 +#define LDFCCLR_OP       0x0c8
  15.135 +#define LDFCNC_OP        0x0c9
  15.136 +/* 0x0cb is irrelevant  */
  15.137 +#define STF_OP           0x0cc
  15.138 +
  15.139 +/*
  15.140 + * Table C-33 Floating-point Load +Reg
  15.141 + *
  15.142 + * we use the ld->m (bit [36:36]) field to determine whether or not we have
  15.143 + * a load/store of this form.
  15.144 + */
  15.145 +
  15.146 +/*
  15.147 + * Table C-34 Floating-point Load/Store +Imm
  15.148 + */
  15.149 +#define LDF_IMM_OP       0x0e0
  15.150 +#define LDFS_IMM_OP      0x0e1
  15.151 +#define LDFA_IMM_OP      0x0e2
  15.152 +#define LDFSA_IMM_OP     0x0e3
  15.153 +/* 0x0e6 is irrelevant */
  15.154 +#define LDFCCLR_IMM_OP   0x0e8
  15.155 +#define LDFCNC_IMM_OP    0x0e9
  15.156 +#define STF_IMM_OP       0x0ec
  15.157 +
  15.158 +typedef struct {
  15.159 +	unsigned long	 qp:6;	/* [0:5]   */
  15.160 +	unsigned long    r1:7;	/* [6:12]  */
  15.161 +	unsigned long   imm:7;	/* [13:19] */
  15.162 +	unsigned long    r3:7;	/* [20:26] */
  15.163 +	unsigned long     x:1;  /* [27:27] */
  15.164 +	unsigned long  hint:2;	/* [28:29] */
  15.165 +	unsigned long x6_sz:2;	/* [30:31] */
  15.166 +	unsigned long x6_op:4;	/* [32:35], x6 = x6_sz|x6_op */
  15.167 +	unsigned long     m:1;	/* [36:36] */
  15.168 +	unsigned long    op:4;	/* [37:40] */
  15.169 +	unsigned long   pad:23; /* [41:63] */
  15.170 +} load_store_t;
  15.171 +
  15.172 +
  15.173 +typedef enum {
  15.174 +	UPD_IMMEDIATE,	/* ldXZ r1=[r3],imm(9) */
  15.175 +	UPD_REG		/* ldXZ r1=[r3],r2     */
  15.176 +} update_t;
  15.177 +
  15.178 +/*
  15.179 + * We use tables to keep track of the offsets of registers in the saved state.
  15.180 + * This way we save having big switch/case statements.
  15.181 + *
  15.182 + * We use bit 0 to indicate switch_stack or pt_regs.
  15.183 + * The offset is simply shifted by 1 bit.
  15.184 + * A 2-byte value should be enough to hold any kind of offset
  15.185 + *
  15.186 + * In case the calling convention changes (and thus pt_regs/switch_stack)
  15.187 + * simply use RSW instead of RPT or vice-versa.
  15.188 + */
  15.189 +
  15.190 +#define RPO(x)	((size_t) &((struct pt_regs *)0)->x)
  15.191 +#define RSO(x)	((size_t) &((struct switch_stack *)0)->x)
  15.192 +
  15.193 +#define RPT(x)		(RPO(x) << 1)
  15.194 +#define RSW(x)		(1| RSO(x)<<1)
  15.195 +
  15.196 +#define GR_OFFS(x)	(gr_info[x]>>1)
  15.197 +#define GR_IN_SW(x)	(gr_info[x] & 0x1)
  15.198 +
  15.199 +#define FR_OFFS(x)	(fr_info[x]>>1)
  15.200 +#define FR_IN_SW(x)	(fr_info[x] & 0x1)
  15.201 +
  15.202 +static u16 gr_info[32]={
  15.203 +	0,			/* r0 is read-only : WE SHOULD NEVER GET THIS */
  15.204 +
  15.205 +	RPT(r1), RPT(r2), RPT(r3),
  15.206 +
  15.207 +#ifdef  CONFIG_VTI
  15.208 +	RPT(r4), RPT(r5), RPT(r6), RPT(r7),
  15.209 +#else   //CONFIG_VTI
  15.210 +	RSW(r4), RSW(r5), RSW(r6), RSW(r7),
  15.211 +#endif  //CONFIG_VTI
  15.212 +
  15.213 +	RPT(r8), RPT(r9), RPT(r10), RPT(r11),
  15.214 +	RPT(r12), RPT(r13), RPT(r14), RPT(r15),
  15.215 +
  15.216 +	RPT(r16), RPT(r17), RPT(r18), RPT(r19),
  15.217 +	RPT(r20), RPT(r21), RPT(r22), RPT(r23),
  15.218 +	RPT(r24), RPT(r25), RPT(r26), RPT(r27),
  15.219 +	RPT(r28), RPT(r29), RPT(r30), RPT(r31)
  15.220 +};
  15.221 +
  15.222 +static u16 fr_info[32]={
  15.223 +	0,			/* constant : WE SHOULD NEVER GET THIS */
  15.224 +	0,			/* constant : WE SHOULD NEVER GET THIS */
  15.225 +
  15.226 +	RSW(f2), RSW(f3), RSW(f4), RSW(f5),
  15.227 +
  15.228 +	RPT(f6), RPT(f7), RPT(f8), RPT(f9),
  15.229 +	RPT(f10), RPT(f11),
  15.230 +
  15.231 +	RSW(f12), RSW(f13), RSW(f14),
  15.232 +	RSW(f15), RSW(f16), RSW(f17), RSW(f18), RSW(f19),
  15.233 +	RSW(f20), RSW(f21), RSW(f22), RSW(f23), RSW(f24),
  15.234 +	RSW(f25), RSW(f26), RSW(f27), RSW(f28), RSW(f29),
  15.235 +	RSW(f30), RSW(f31)
  15.236 +};
  15.237 +
  15.238 +/* Invalidate ALAT entry for integer register REGNO.  */
  15.239 +static void
  15.240 +invala_gr (int regno)
  15.241 +{
  15.242 +#	define F(reg)	case reg: ia64_invala_gr(reg); break
  15.243 +
  15.244 +	switch (regno) {
  15.245 +		F(  0); F(  1); F(  2); F(  3); F(  4); F(  5); F(  6); F(  7);
  15.246 +		F(  8); F(  9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15);
  15.247 +		F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23);
  15.248 +		F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31);
  15.249 +		F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39);
  15.250 +		F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47);
  15.251 +		F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55);
  15.252 +		F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63);
  15.253 +		F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71);
  15.254 +		F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79);
  15.255 +		F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87);
  15.256 +		F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95);
  15.257 +		F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103);
  15.258 +		F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111);
  15.259 +		F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119);
  15.260 +		F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127);
  15.261 +	}
  15.262 +#	undef F
  15.263 +}
  15.264 +
  15.265 +/* Invalidate ALAT entry for floating-point register REGNO.  */
  15.266 +static void
  15.267 +invala_fr (int regno)
  15.268 +{
  15.269 +#	define F(reg)	case reg: ia64_invala_fr(reg); break
  15.270 +
  15.271 +	switch (regno) {
  15.272 +		F(  0); F(  1); F(  2); F(  3); F(  4); F(  5); F(  6); F(  7);
  15.273 +		F(  8); F(  9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15);
  15.274 +		F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23);
  15.275 +		F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31);
  15.276 +		F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39);
  15.277 +		F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47);
  15.278 +		F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55);
  15.279 +		F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63);
  15.280 +		F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71);
  15.281 +		F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79);
  15.282 +		F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87);
  15.283 +		F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95);
  15.284 +		F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103);
  15.285 +		F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111);
  15.286 +		F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119);
  15.287 +		F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127);
  15.288 +	}
  15.289 +#	undef F
  15.290 +}
  15.291 +
  15.292 +static inline unsigned long
  15.293 +rotate_reg (unsigned long sor, unsigned long rrb, unsigned long reg)
  15.294 +{
  15.295 +	reg += rrb;
  15.296 +	if (reg >= sor)
  15.297 +		reg -= sor;
  15.298 +	return reg;
  15.299 +}
  15.300 +
  15.301 +#ifdef CONFIG_VTI
  15.302 +static void
  15.303 +set_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long val, unsigned long nat)
  15.304 +{
  15.305 +	struct switch_stack *sw = (struct switch_stack *) regs - 1;
  15.306 +	unsigned long *bsp, *bspstore, *addr, *rnat_addr, *ubs_end;
  15.307 +	unsigned long *kbs = (void *) current + IA64_RBS_OFFSET;
  15.308 +	unsigned long rnats, nat_mask;
  15.309 +    unsigned long old_rsc,new_rsc;
  15.310 +	unsigned long on_kbs,rnat;
  15.311 +	long sof = (regs->cr_ifs) & 0x7f;
  15.312 +	long sor = 8 * ((regs->cr_ifs >> 14) & 0xf);
  15.313 +	long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
  15.314 +	long ridx = r1 - 32;
  15.315 +
  15.316 +	if (ridx >= sof) {
  15.317 +		/* this should never happen, as the "rsvd register fault" has higher priority */
  15.318 +		DPRINT("ignoring write to r%lu; only %lu registers are allocated!\n", r1, sof);
  15.319 +		return;
  15.320 +	}
  15.321 +
  15.322 +	if (ridx < sor)
  15.323 +		ridx = rotate_reg(sor, rrb_gr, ridx);
  15.324 +
  15.325 +    old_rsc=ia64_get_rsc();
  15.326 +    new_rsc=old_rsc&(~0x3);
  15.327 +    ia64_set_rsc(new_rsc);
  15.328 +
  15.329 +    bspstore = ia64_get_bspstore();
  15.330 +    bsp =kbs + (regs->loadrs >> 19);//16+3
  15.331 +
  15.332 +	addr = ia64_rse_skip_regs(bsp, -sof + ridx);
  15.333 +    nat_mask = 1UL << ia64_rse_slot_num(addr);
  15.334 +	rnat_addr = ia64_rse_rnat_addr(addr);
  15.335 +
  15.336 +    if(addr >= bspstore){
  15.337 +
  15.338 +        ia64_flushrs ();
  15.339 +        ia64_mf ();
  15.340 +		*addr = val;
  15.341 +        bspstore = ia64_get_bspstore();
  15.342 +    	rnat = ia64_get_rnat ();
  15.343 +        if(bspstore < rnat_addr){
  15.344 +            rnat=rnat&(~nat_mask);
  15.345 +        }else{
  15.346 +            *rnat_addr = (*rnat_addr)&(~nat_mask);
  15.347 +        }
  15.348 +        ia64_mf();
  15.349 +        ia64_loadrs();
  15.350 +        ia64_set_rnat(rnat);
  15.351 +    }else{
  15.352 +
  15.353 +    	rnat = ia64_get_rnat ();
  15.354 +		*addr = val;
  15.355 +        if(bspstore < rnat_addr){
  15.356 +            rnat=rnat&(~nat_mask);
  15.357 +        }else{
  15.358 +            *rnat_addr = (*rnat_addr)&(~nat_mask);
  15.359 +        }
  15.360 +        ia64_set_bspstore (bspstore);
  15.361 +        ia64_set_rnat(rnat);
  15.362 +    }
  15.363 +    ia64_set_rsc(old_rsc);
  15.364 +}
  15.365 +
  15.366 +
  15.367 +static void
  15.368 +get_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long *val, unsigned long *nat)
  15.369 +{
  15.370 +	struct switch_stack *sw = (struct switch_stack *) regs - 1;
  15.371 +	unsigned long *bsp, *addr, *rnat_addr, *ubs_end, *bspstore;
  15.372 +	unsigned long *kbs = (void *) current + IA64_RBS_OFFSET;
  15.373 +	unsigned long rnats, nat_mask;
  15.374 +	unsigned long on_kbs;
  15.375 +    unsigned long old_rsc, new_rsc;
  15.376 +	long sof = (regs->cr_ifs) & 0x7f;
  15.377 +	long sor = 8 * ((regs->cr_ifs >> 14) & 0xf);
  15.378 +	long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
  15.379 +	long ridx = r1 - 32;
  15.380 +
  15.381 +	if (ridx >= sof) {
  15.382 +		/* read of out-of-frame register returns an undefined value; 0 in our case.  */
  15.383 +		DPRINT("ignoring read from r%lu; only %lu registers are allocated!\n", r1, sof);
  15.384 +		panic("wrong stack register number");
  15.385 +	}
  15.386 +
  15.387 +	if (ridx < sor)
  15.388 +		ridx = rotate_reg(sor, rrb_gr, ridx);
  15.389 +
  15.390 +    old_rsc=ia64_get_rsc();
  15.391 +    new_rsc=old_rsc&(~(0x3));
  15.392 +    ia64_set_rsc(new_rsc);
  15.393 +
  15.394 +    bspstore = ia64_get_bspstore();
  15.395 +    bsp =kbs + (regs->loadrs >> 19); //16+3;
  15.396 +
  15.397 +	addr = ia64_rse_skip_regs(bsp, -sof + ridx);
  15.398 +    nat_mask = 1UL << ia64_rse_slot_num(addr);
  15.399 +	rnat_addr = ia64_rse_rnat_addr(addr);
  15.400 +
  15.401 +    if(addr >= bspstore){
  15.402 +
  15.403 +        ia64_flushrs ();
  15.404 +        ia64_mf ();
  15.405 +        bspstore = ia64_get_bspstore();
  15.406 +    }
  15.407 +	*val=*addr;
  15.408 +    if(bspstore < rnat_addr){
  15.409 +        *nat=!!(ia64_get_rnat()&nat_mask);
  15.410 +    }else{
  15.411 +        *nat = !!((*rnat_addr)&nat_mask);
  15.412 +    }
  15.413 +    ia64_set_rsc(old_rsc);
  15.414 +}
  15.415 +#else // CONFIG_VTI
  15.416 +static void
  15.417 +set_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long val, int nat)
  15.418 +{
  15.419 +	struct switch_stack *sw = (struct switch_stack *) regs - 1;
  15.420 +	unsigned long *bsp, *bspstore, *addr, *rnat_addr, *ubs_end;
  15.421 +	unsigned long *kbs = (void *) current + IA64_RBS_OFFSET;
  15.422 +	unsigned long rnats, nat_mask;
  15.423 +	unsigned long on_kbs;
  15.424 +	long sof = (regs->cr_ifs) & 0x7f;
  15.425 +	long sor = 8 * ((regs->cr_ifs >> 14) & 0xf);
  15.426 +	long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
  15.427 +	long ridx = r1 - 32;
  15.428 +
  15.429 +	if (ridx >= sof) {
  15.430 +		/* this should never happen, as the "rsvd register fault" has higher priority */
  15.431 +		DPRINT("ignoring write to r%lu; only %lu registers are allocated!\n", r1, sof);
  15.432 +		return;
  15.433 +	}
  15.434 +
  15.435 +	if (ridx < sor)
  15.436 +		ridx = rotate_reg(sor, rrb_gr, ridx);
  15.437 +
  15.438 +	DPRINT("r%lu, sw.bspstore=%lx pt.bspstore=%lx sof=%ld sol=%ld ridx=%ld\n",
  15.439 +	       r1, sw->ar_bspstore, regs->ar_bspstore, sof, (regs->cr_ifs >> 7) & 0x7f, ridx);
  15.440 +
  15.441 +	on_kbs = ia64_rse_num_regs(kbs, (unsigned long *) sw->ar_bspstore);
  15.442 +	addr = ia64_rse_skip_regs((unsigned long *) sw->ar_bspstore, -sof + ridx);
  15.443 +	if (addr >= kbs) {
  15.444 +		/* the register is on the kernel backing store: easy... */
  15.445 +		rnat_addr = ia64_rse_rnat_addr(addr);
  15.446 +		if ((unsigned long) rnat_addr >= sw->ar_bspstore)
  15.447 +			rnat_addr = &sw->ar_rnat;
  15.448 +		nat_mask = 1UL << ia64_rse_slot_num(addr);
  15.449 +
  15.450 +		*addr = val;
  15.451 +		if (nat)
  15.452 +			*rnat_addr |=  nat_mask;
  15.453 +		else
  15.454 +			*rnat_addr &= ~nat_mask;
  15.455 +		return;
  15.456 +	}
  15.457 +
  15.458 +	if (!user_stack(current, regs)) {
  15.459 +		DPRINT("ignoring kernel write to r%lu; register isn't on the kernel RBS!", r1);
  15.460 +		return;
  15.461 +	}
  15.462 +
  15.463 +	bspstore = (unsigned long *)regs->ar_bspstore;
  15.464 +	ubs_end = ia64_rse_skip_regs(bspstore, on_kbs);
  15.465 +	bsp     = ia64_rse_skip_regs(ubs_end, -sof);
  15.466 +	addr    = ia64_rse_skip_regs(bsp, ridx);
  15.467 +
  15.468 +	DPRINT("ubs_end=%p bsp=%p addr=%p\n", (void *) ubs_end, (void *) bsp, (void *) addr);
  15.469 +
  15.470 +	ia64_poke(current, sw, (unsigned long) ubs_end, (unsigned long) addr, val);
  15.471 +
  15.472 +	rnat_addr = ia64_rse_rnat_addr(addr);
  15.473 +
  15.474 +	ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, &rnats);
  15.475 +	DPRINT("rnat @%p = 0x%lx nat=%d old nat=%ld\n",
  15.476 +	       (void *) rnat_addr, rnats, nat, (rnats >> ia64_rse_slot_num(addr)) & 1);
  15.477 +
  15.478 +	nat_mask = 1UL << ia64_rse_slot_num(addr);
  15.479 +	if (nat)
  15.480 +		rnats |=  nat_mask;
  15.481 +	else
  15.482 +		rnats &= ~nat_mask;
  15.483 +	ia64_poke(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, rnats);
  15.484 +
  15.485 +	DPRINT("rnat changed to @%p = 0x%lx\n", (void *) rnat_addr, rnats);
  15.486 +}
  15.487 +
  15.488 +
  15.489 +static void
  15.490 +get_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long *val, int *nat)
  15.491 +{
  15.492 +	struct switch_stack *sw = (struct switch_stack *) regs - 1;
  15.493 +	unsigned long *bsp, *addr, *rnat_addr, *ubs_end, *bspstore;
  15.494 +	unsigned long *kbs = (void *) current + IA64_RBS_OFFSET;
  15.495 +	unsigned long rnats, nat_mask;
  15.496 +	unsigned long on_kbs;
  15.497 +	long sof = (regs->cr_ifs) & 0x7f;
  15.498 +	long sor = 8 * ((regs->cr_ifs >> 14) & 0xf);
  15.499 +	long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
  15.500 +	long ridx = r1 - 32;
  15.501 +
  15.502 +	if (ridx >= sof) {
  15.503 +		/* read of out-of-frame register returns an undefined value; 0 in our case.  */
  15.504 +		DPRINT("ignoring read from r%lu; only %lu registers are allocated!\n", r1, sof);
  15.505 +		goto fail;
  15.506 +	}
  15.507 +
  15.508 +	if (ridx < sor)
  15.509 +		ridx = rotate_reg(sor, rrb_gr, ridx);
  15.510 +
  15.511 +	DPRINT("r%lu, sw.bspstore=%lx pt.bspstore=%lx sof=%ld sol=%ld ridx=%ld\n",
  15.512 +	       r1, sw->ar_bspstore, regs->ar_bspstore, sof, (regs->cr_ifs >> 7) & 0x7f, ridx);
  15.513 +
  15.514 +	on_kbs = ia64_rse_num_regs(kbs, (unsigned long *) sw->ar_bspstore);
  15.515 +	addr = ia64_rse_skip_regs((unsigned long *) sw->ar_bspstore, -sof + ridx);
  15.516 +	if (addr >= kbs) {
  15.517 +		/* the register is on the kernel backing store: easy... */
  15.518 +		*val = *addr;
  15.519 +		if (nat) {
  15.520 +			rnat_addr = ia64_rse_rnat_addr(addr);
  15.521 +			if ((unsigned long) rnat_addr >= sw->ar_bspstore)
  15.522 +				rnat_addr = &sw->ar_rnat;
  15.523 +			nat_mask = 1UL << ia64_rse_slot_num(addr);
  15.524 +			*nat = (*rnat_addr & nat_mask) != 0;
  15.525 +		}
  15.526 +		return;
  15.527 +	}
  15.528 +
  15.529 +	if (!user_stack(current, regs)) {
  15.530 +		DPRINT("ignoring kernel read of r%lu; register isn't on the RBS!", r1);
  15.531 +		goto fail;
  15.532 +	}
  15.533 +
  15.534 +	bspstore = (unsigned long *)regs->ar_bspstore;
  15.535 +	ubs_end = ia64_rse_skip_regs(bspstore, on_kbs);
  15.536 +	bsp     = ia64_rse_skip_regs(ubs_end, -sof);
  15.537 +	addr    = ia64_rse_skip_regs(bsp, ridx);
  15.538 +
  15.539 +	DPRINT("ubs_end=%p bsp=%p addr=%p\n", (void *) ubs_end, (void *) bsp, (void *) addr);
  15.540 +
  15.541 +	ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) addr, val);
  15.542 +
  15.543 +	if (nat) {
  15.544 +		rnat_addr = ia64_rse_rnat_addr(addr);
  15.545 +		nat_mask = 1UL << ia64_rse_slot_num(addr);
  15.546 +
  15.547 +		DPRINT("rnat @%p = 0x%lx\n", (void *) rnat_addr, rnats);
  15.548 +
  15.549 +		ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, &rnats);
  15.550 +		*nat = (rnats & nat_mask) != 0;
  15.551 +	}
  15.552 +	return;
  15.553 +
  15.554 +  fail:
  15.555 +	*val = 0;
  15.556 +	if (nat)
  15.557 +		*nat = 0;
  15.558 +	return;
  15.559 +}
  15.560 +#endif // CONFIG_VTI
  15.561 +
  15.562 +
  15.563 +#ifdef XEN
  15.564 +void
  15.565 +#else
  15.566 +static void
  15.567 +#endif
  15.568 +setreg (unsigned long regnum, unsigned long val, int nat, struct pt_regs *regs)
  15.569 +{
  15.570 +	struct switch_stack *sw = (struct switch_stack *) regs - 1;
  15.571 +	unsigned long addr;
  15.572 +	unsigned long bitmask;
  15.573 +	unsigned long *unat;
  15.574 +
  15.575 +	/*
  15.576 +	 * First takes care of stacked registers
  15.577 +	 */
  15.578 +	if (regnum >= IA64_FIRST_STACKED_GR) {
  15.579 +		set_rse_reg(regs, regnum, val, nat);
  15.580 +		return;
  15.581 +	}
  15.582 +
  15.583 +	/*
  15.584 +	 * Using r0 as a target raises a General Exception fault which has higher priority
  15.585 +	 * than the Unaligned Reference fault.
  15.586 +	 */
  15.587 +
  15.588 +	/*
  15.589 +	 * Now look at registers in [0-31] range and init correct UNAT
  15.590 +	 */
  15.591 +	if (GR_IN_SW(regnum)) {
  15.592 +		addr = (unsigned long)sw;
  15.593 +		unat = &sw->ar_unat;
  15.594 +	} else {
  15.595 +		addr = (unsigned long)regs;
  15.596 +#ifdef CONFIG_VTI
  15.597 +		unat = &regs->eml_unat;
  15.598 +#else //CONFIG_VTI
  15.599 +		unat = &sw->caller_unat;
  15.600 +#endif  //CONFIG_VTI
  15.601 +	}
  15.602 +	DPRINT("tmp_base=%lx switch_stack=%s offset=%d\n",
  15.603 +	       addr, unat==&sw->ar_unat ? "yes":"no", GR_OFFS(regnum));
  15.604 +	/*
  15.605 +	 * add offset from base of struct
  15.606 +	 * and do it !
  15.607 +	 */
  15.608 +	addr += GR_OFFS(regnum);
  15.609 +
  15.610 +	*(unsigned long *)addr = val;
  15.611 +
  15.612 +	/*
  15.613 +	 * We need to clear the corresponding UNAT bit to fully emulate the load
  15.614 +	 * UNAT bit_pos = GR[r3]{8:3} form EAS-2.4
  15.615 +	 */
  15.616 +	bitmask   = 1UL << (addr >> 3 & 0x3f);
  15.617 +	DPRINT("*0x%lx=0x%lx NaT=%d prev_unat @%p=%lx\n", addr, val, nat, (void *) unat, *unat);
  15.618 +	if (nat) {
  15.619 +		*unat |= bitmask;
  15.620 +	} else {
  15.621 +		*unat &= ~bitmask;
  15.622 +	}
  15.623 +	DPRINT("*0x%lx=0x%lx NaT=%d new unat: %p=%lx\n", addr, val, nat, (void *) unat,*unat);
  15.624 +}
  15.625 +
  15.626 +/*
  15.627 + * Return the (rotated) index for floating point register REGNUM (REGNUM must be in the
  15.628 + * range from 32-127, result is in the range from 0-95.
  15.629 + */
  15.630 +static inline unsigned long
  15.631 +fph_index (struct pt_regs *regs, long regnum)
  15.632 +{
  15.633 +	unsigned long rrb_fr = (regs->cr_ifs >> 25) & 0x7f;
  15.634 +	return rotate_reg(96, rrb_fr, (regnum - IA64_FIRST_ROTATING_FR));
  15.635 +}
  15.636 +
  15.637 +static void
  15.638 +setfpreg (unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs)
  15.639 +{
  15.640 +	struct switch_stack *sw = (struct switch_stack *)regs - 1;
  15.641 +	unsigned long addr;
  15.642 +
  15.643 +	/*
  15.644 +	 * From EAS-2.5: FPDisableFault has higher priority than Unaligned
  15.645 +	 * Fault. Thus, when we get here, we know the partition is enabled.
  15.646 +	 * To update f32-f127, there are three choices:
  15.647 +	 *
  15.648 +	 *	(1) save f32-f127 to thread.fph and update the values there
  15.649 +	 *	(2) use a gigantic switch statement to directly access the registers
  15.650 +	 *	(3) generate code on the fly to update the desired register
  15.651 +	 *
  15.652 +	 * For now, we are using approach (1).
  15.653 +	 */
  15.654 +	if (regnum >= IA64_FIRST_ROTATING_FR) {
  15.655 +		ia64_sync_fph(current);
  15.656 +#ifdef XEN
  15.657 +		current->arch._thread.fph[fph_index(regs, regnum)] = *fpval;
  15.658 +#else
  15.659 +		current->thread.fph[fph_index(regs, regnum)] = *fpval;
  15.660 +#endif
  15.661 +	} else {
  15.662 +		/*
  15.663 +		 * pt_regs or switch_stack ?
  15.664 +		 */
  15.665 +		if (FR_IN_SW(regnum)) {
  15.666 +			addr = (unsigned long)sw;
  15.667 +		} else {
  15.668 +			addr = (unsigned long)regs;
  15.669 +		}
  15.670 +
  15.671 +		DPRINT("tmp_base=%lx offset=%d\n", addr, FR_OFFS(regnum));
  15.672 +
  15.673 +		addr += FR_OFFS(regnum);
  15.674 +		*(struct ia64_fpreg *)addr = *fpval;
  15.675 +
  15.676 +		/*
  15.677 +		 * mark the low partition as being used now
  15.678 +		 *
  15.679 +		 * It is highly unlikely that this bit is not already set, but
  15.680 +		 * let's do it for safety.
  15.681 +		 */
  15.682 +		regs->cr_ipsr |= IA64_PSR_MFL;
  15.683 +	}
  15.684 +}
  15.685 +
  15.686 +/*
  15.687 + * Those 2 inline functions generate the spilled versions of the constant floating point
  15.688 + * registers which can be used with stfX
  15.689 + */
  15.690 +static inline void
  15.691 +float_spill_f0 (struct ia64_fpreg *final)
  15.692 +{
  15.693 +	ia64_stf_spill(final, 0);
  15.694 +}
  15.695 +
  15.696 +static inline void
  15.697 +float_spill_f1 (struct ia64_fpreg *final)
  15.698 +{
  15.699 +	ia64_stf_spill(final, 1);
  15.700 +}
  15.701 +
  15.702 +static void
  15.703 +getfpreg (unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs)
  15.704 +{
  15.705 +	struct switch_stack *sw = (struct switch_stack *) regs - 1;
  15.706 +	unsigned long addr;
  15.707 +
  15.708 +	/*
  15.709 +	 * From EAS-2.5: FPDisableFault has higher priority than
  15.710 +	 * Unaligned Fault. Thus, when we get here, we know the partition is
  15.711 +	 * enabled.
  15.712 +	 *
  15.713 +	 * When regnum > 31, the register is still live and we need to force a save
  15.714 +	 * to current->thread.fph to get access to it.  See discussion in setfpreg()
  15.715 +	 * for reasons and other ways of doing this.
  15.716 +	 */
  15.717 +	if (regnum >= IA64_FIRST_ROTATING_FR) {
  15.718 +		ia64_flush_fph(current);
  15.719 +#ifdef XEN
  15.720 +		*fpval = current->arch._thread.fph[fph_index(regs, regnum)];
  15.721 +#else
  15.722 +		*fpval = current->thread.fph[fph_index(regs, regnum)];
  15.723 +#endif
  15.724 +	} else {
  15.725 +		/*
  15.726 +		 * f0 = 0.0, f1= 1.0. Those registers are constant and are thus
  15.727 +		 * not saved, we must generate their spilled form on the fly
  15.728 +		 */
  15.729 +		switch(regnum) {
  15.730 +		case 0:
  15.731 +			float_spill_f0(fpval);
  15.732 +			break;
  15.733 +		case 1:
  15.734 +			float_spill_f1(fpval);
  15.735 +			break;
  15.736 +		default:
  15.737 +			/*
  15.738 +			 * pt_regs or switch_stack ?
  15.739 +			 */
  15.740 +			addr =  FR_IN_SW(regnum) ? (unsigned long)sw
  15.741 +						 : (unsigned long)regs;
  15.742 +
  15.743 +			DPRINT("is_sw=%d tmp_base=%lx offset=0x%x\n",
  15.744 +			       FR_IN_SW(regnum), addr, FR_OFFS(regnum));
  15.745 +
  15.746 +			addr  += FR_OFFS(regnum);
  15.747 +			*fpval = *(struct ia64_fpreg *)addr;
  15.748 +		}
  15.749 +	}
  15.750 +}
  15.751 +
  15.752 +
  15.753 +#ifdef XEN
  15.754 +void
  15.755 +#else
  15.756 +static void
  15.757 +#endif
  15.758 +getreg (unsigned long regnum, unsigned long *val, int *nat, struct pt_regs *regs)
  15.759 +{
  15.760 +	struct switch_stack *sw = (struct switch_stack *) regs - 1;
  15.761 +	unsigned long addr, *unat;
  15.762 +
  15.763 +	if (regnum >= IA64_FIRST_STACKED_GR) {
  15.764 +		get_rse_reg(regs, regnum, val, nat);
  15.765 +		return;
  15.766 +	}
  15.767 +
  15.768 +	/*
  15.769 +	 * take care of r0 (read-only always evaluate to 0)
  15.770 +	 */
  15.771 +	if (regnum == 0) {
  15.772 +		*val = 0;
  15.773 +		if (nat)
  15.774 +			*nat = 0;
  15.775 +		return;
  15.776 +	}
  15.777 +
  15.778 +	/*
  15.779 +	 * Now look at registers in [0-31] range and init correct UNAT
  15.780 +	 */
  15.781 +	if (GR_IN_SW(regnum)) {
  15.782 +		addr = (unsigned long)sw;
  15.783 +		unat = &sw->ar_unat;
  15.784 +	} else {
  15.785 +		addr = (unsigned long)regs;
  15.786 +#ifdef  CONFIG_VTI
  15.787 +		unat = &regs->eml_unat;;
  15.788 +#else   //CONFIG_VTI
  15.789 +		unat = &sw->caller_unat;
  15.790 +#endif  //CONFIG_VTI
  15.791 +	}
  15.792 +
  15.793 +	DPRINT("addr_base=%lx offset=0x%x\n", addr,  GR_OFFS(regnum));
  15.794 +
  15.795 +	addr += GR_OFFS(regnum);
  15.796 +
  15.797 +	*val  = *(unsigned long *)addr;
  15.798 +
  15.799 +	/*
  15.800 +	 * do it only when requested
  15.801 +	 */
  15.802 +	if (nat)
  15.803 +		*nat  = (*unat >> (addr >> 3 & 0x3f)) & 0x1UL;
  15.804 +}
  15.805 +
  15.806 +static void
  15.807 +emulate_load_updates (update_t type, load_store_t ld, struct pt_regs *regs, unsigned long ifa)
  15.808 +{
  15.809 +	/*
  15.810 +	 * IMPORTANT:
  15.811 +	 * Given the way we handle unaligned speculative loads, we should
  15.812 +	 * not get to this point in the code but we keep this sanity check,
  15.813 +	 * just in case.
  15.814 +	 */
  15.815 +	if (ld.x6_op == 1 || ld.x6_op == 3) {
  15.816 +		printk(KERN_ERR "%s: register update on speculative load, error\n", __FUNCTION__);
  15.817 +		die_if_kernel("unaligned reference on speculative load with register update\n",
  15.818 +			      regs, 30);
  15.819 +	}
  15.820 +
  15.821 +
  15.822 +	/*
  15.823 +	 * at this point, we know that the base register to update is valid i.e.,
  15.824 +	 * it's not r0
  15.825 +	 */
  15.826 +	if (type == UPD_IMMEDIATE) {
  15.827 +		unsigned long imm;
  15.828 +
  15.829 +		/*
  15.830 +		 * Load +Imm: ldXZ r1=[r3],imm(9)
  15.831 +		 *
  15.832 +		 *
  15.833 +		 * form imm9: [13:19] contain the first 7 bits
  15.834 +		 */
  15.835 +		imm = ld.x << 7 | ld.imm;
  15.836 +
  15.837 +		/*
  15.838 +		 * sign extend (1+8bits) if m set
  15.839 +		 */
  15.840 +		if (ld.m) imm |= SIGN_EXT9;
  15.841 +
  15.842 +		/*
  15.843 +		 * ifa == r3 and we know that the NaT bit on r3 was clear so
  15.844 +		 * we can directly use ifa.
  15.845 +		 */
  15.846 +		ifa += imm;
  15.847 +
  15.848 +		setreg(ld.r3, ifa, 0, regs);
  15.849 +
  15.850 +		DPRINT("ld.x=%d ld.m=%d imm=%ld r3=0x%lx\n", ld.x, ld.m, imm, ifa);
  15.851 +
  15.852 +	} else if (ld.m) {
  15.853 +		unsigned long r2;
  15.854 +		int nat_r2;
  15.855 +
  15.856 +		/*
  15.857 +		 * Load +Reg Opcode: ldXZ r1=[r3],r2
  15.858 +		 *
  15.859 +		 * Note: that we update r3 even in the case of ldfX.a
  15.860 +		 * (where the load does not happen)
  15.861 +		 *
  15.862 +		 * The way the load algorithm works, we know that r3 does not
  15.863 +		 * have its NaT bit set (would have gotten NaT consumption
  15.864 +		 * before getting the unaligned fault). So we can use ifa
  15.865 +		 * which equals r3 at this point.
  15.866 +		 *
  15.867 +		 * IMPORTANT:
  15.868 +		 * The above statement holds ONLY because we know that we
  15.869 +		 * never reach this code when trying to do a ldX.s.
  15.870 +		 * If we ever make it to here on an ldfX.s then
  15.871 +		 */
  15.872 +		getreg(ld.imm, &r2, &nat_r2, regs);
  15.873 +
  15.874 +		ifa += r2;
  15.875 +
  15.876 +		/*
  15.877 +		 * propagate Nat r2 -> r3
  15.878 +		 */
  15.879 +		setreg(ld.r3, ifa, nat_r2, regs);
  15.880 +
  15.881 +		DPRINT("imm=%d r2=%ld r3=0x%lx nat_r2=%d\n",ld.imm, r2, ifa, nat_r2);
  15.882 +	}
  15.883 +}
  15.884 +
  15.885 +
  15.886 +static int
  15.887 +emulate_load_int (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
  15.888 +{
  15.889 +	unsigned int len = 1 << ld.x6_sz;
  15.890 +	unsigned long val = 0;
  15.891 +
  15.892 +	/*
  15.893 +	 * r0, as target, doesn't need to be checked because Illegal Instruction
  15.894 +	 * faults have higher priority than unaligned faults.
  15.895 +	 *
  15.896 +	 * r0 cannot be found as the base as it would never generate an
  15.897 +	 * unaligned reference.
  15.898 +	 */
  15.899 +
  15.900 +	/*
  15.901 +	 * ldX.a we will emulate load and also invalidate the ALAT entry.
  15.902 +	 * See comment below for explanation on how we handle ldX.a
  15.903 +	 */
  15.904 +
  15.905 +	if (len != 2 && len != 4 && len != 8) {
  15.906 +		DPRINT("unknown size: x6=%d\n", ld.x6_sz);
  15.907 +		return -1;
  15.908 +	}
  15.909 +	/* this assumes little-endian byte-order: */
  15.910 +	if (copy_from_user(&val, (void __user *) ifa, len))
  15.911 +		return -1;
  15.912 +	setreg(ld.r1, val, 0, regs);
  15.913 +
  15.914 +	/*
  15.915 +	 * check for updates on any kind of loads
  15.916 +	 */
  15.917 +	if (ld.op == 0x5 || ld.m)
  15.918 +		emulate_load_updates(ld.op == 0x5 ? UPD_IMMEDIATE: UPD_REG, ld, regs, ifa);
  15.919 +
  15.920 +	/*
  15.921 +	 * handling of various loads (based on EAS2.4):
  15.922 +	 *
  15.923 +	 * ldX.acq (ordered load):
  15.924 +	 *	- acquire semantics would have been used, so force fence instead.
  15.925 +	 *
  15.926 +	 * ldX.c.clr (check load and clear):
  15.927 +	 *	- if we get to this handler, it's because the entry was not in the ALAT.
  15.928 +	 *	  Therefore the operation reverts to a normal load
  15.929 +	 *
  15.930 +	 * ldX.c.nc (check load no clear):
  15.931 +	 *	- same as previous one
  15.932 +	 *
  15.933 +	 * ldX.c.clr.acq (ordered check load and clear):
  15.934 +	 *	- same as above for c.clr part. The load needs to have acquire semantics. So
  15.935 +	 *	  we use the fence semantics which is stronger and thus ensures correctness.
  15.936 +	 *
  15.937 +	 * ldX.a (advanced load):
  15.938 +	 *	- suppose ldX.a r1=[r3]. If we get to the unaligned trap it's because the
  15.939 +	 *	  address doesn't match requested size alignment. This means that we would
  15.940 +	 *	  possibly need more than one load to get the result.
  15.941 +	 *
  15.942 +	 *	  The load part can be handled just like a normal load, however the difficult
  15.943 +	 *	  part is to get the right thing into the ALAT. The critical piece of information
  15.944 +	 *	  in the base address of the load & size. To do that, a ld.a must be executed,
  15.945 +	 *	  clearly any address can be pushed into the table by using ld1.a r1=[r3]. Now
  15.946 +	 *	  if we use the same target register, we will be okay for the check.a instruction.
  15.947 +	 *	  If we look at the store, basically a stX [r3]=r1 checks the ALAT  for any entry
  15.948 +	 *	  which would overlap within [r3,r3+X] (the size of the load was store in the
  15.949 +	 *	  ALAT). If such an entry is found the entry is invalidated. But this is not good
  15.950 +	 *	  enough, take the following example:
  15.951 +	 *		r3=3
  15.952 +	 *		ld4.a r1=[r3]
  15.953 +	 *
  15.954 +	 *	  Could be emulated by doing:
  15.955 +	 *		ld1.a r1=[r3],1
  15.956 +	 *		store to temporary;
  15.957 +	 *		ld1.a r1=[r3],1
  15.958 +	 *		store & shift to temporary;
  15.959 +	 *		ld1.a r1=[r3],1
  15.960 +	 *		store & shift to temporary;
  15.961 +	 *		ld1.a r1=[r3]
  15.962 +	 *		store & shift to temporary;
  15.963 +	 *		r1=temporary
  15.964 +	 *
  15.965 +	 *	  So in this case, you would get the right value is r1 but the wrong info in
  15.966 +	 *	  the ALAT.  Notice that you could do it in reverse to finish with address 3
  15.967 +	 *	  but you would still get the size wrong.  To get the size right, one needs to
  15.968 +	 *	  execute exactly the same kind of load. You could do it from a aligned
  15.969 +	 *	  temporary location, but you would get the address wrong.
  15.970 +	 *
  15.971 +	 *	  So no matter what, it is not possible to emulate an advanced load
  15.972 +	 *	  correctly. But is that really critical ?
  15.973 +	 *
  15.974 +	 *	  We will always convert ld.a into a normal load with ALAT invalidated.  This
  15.975 +	 *	  will enable compiler to do optimization where certain code path after ld.a
  15.976 +	 *	  is not required to have ld.c/chk.a, e.g., code path with no intervening stores.
  15.977 +	 *
  15.978 +	 *	  If there is a store after the advanced load, one must either do a ld.c.* or
  15.979 +	 *	  chk.a.* to reuse the value stored in the ALAT. Both can "fail" (meaning no
  15.980 +	 *	  entry found in ALAT), and that's perfectly ok because:
  15.981 +	 *
  15.982 +	 *		- ld.c.*, if the entry is not present a  normal load is executed
  15.983 +	 *		- chk.a.*, if the entry is not present, execution jumps to recovery code
  15.984 +	 *
  15.985 +	 *	  In either case, the load can be potentially retried in another form.
  15.986 +	 *
  15.987 +	 *	  ALAT must be invalidated for the register (so that chk.a or ld.c don't pick
  15.988 +	 *	  up a stale entry later). The register base update MUST also be performed.
  15.989 +	 */
  15.990 +
  15.991 +	/*
  15.992 +	 * when the load has the .acq completer then
  15.993 +	 * use ordering fence.
  15.994 +	 */
  15.995 +	if (ld.x6_op == 0x5 || ld.x6_op == 0xa)
  15.996 +		mb();
  15.997 +
  15.998 +	/*
  15.999 +	 * invalidate ALAT entry in case of advanced load
 15.1000 +	 */
 15.1001 +	if (ld.x6_op == 0x2)
 15.1002 +		invala_gr(ld.r1);
 15.1003 +
 15.1004 +	return 0;
 15.1005 +}
 15.1006 +
 15.1007 +static int
 15.1008 +emulate_store_int (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
 15.1009 +{
 15.1010 +	unsigned long r2;
 15.1011 +	unsigned int len = 1 << ld.x6_sz;
 15.1012 +
 15.1013 +	/*
 15.1014 +	 * if we get to this handler, Nat bits on both r3 and r2 have already
 15.1015 +	 * been checked. so we don't need to do it
 15.1016 +	 *
 15.1017 +	 * extract the value to be stored
 15.1018 +	 */
 15.1019 +	getreg(ld.imm, &r2, NULL, regs);
 15.1020 +
 15.1021 +	/*
 15.1022 +	 * we rely on the macros in unaligned.h for now i.e.,
 15.1023 +	 * we let the compiler figure out how to read memory gracefully.
 15.1024 +	 *
 15.1025 +	 * We need this switch/case because the way the inline function
 15.1026 +	 * works. The code is optimized by the compiler and looks like
 15.1027 +	 * a single switch/case.
 15.1028 +	 */
 15.1029 +	DPRINT("st%d [%lx]=%lx\n", len, ifa, r2);
 15.1030 +
 15.1031 +	if (len != 2 && len != 4 && len != 8) {
 15.1032 +		DPRINT("unknown size: x6=%d\n", ld.x6_sz);
 15.1033 +		return -1;
 15.1034 +	}
 15.1035 +
 15.1036 +	/* this assumes little-endian byte-order: */
 15.1037 +	if (copy_to_user((void __user *) ifa, &r2, len))
 15.1038 +		return -1;
 15.1039 +
 15.1040 +	/*
 15.1041 +	 * stX [r3]=r2,imm(9)
 15.1042 +	 *
 15.1043 +	 * NOTE:
 15.1044 +	 * ld.r3 can never be r0, because r0 would not generate an
 15.1045 +	 * unaligned access.
 15.1046 +	 */
 15.1047 +	if (ld.op == 0x5) {
 15.1048 +		unsigned long imm;
 15.1049 +
 15.1050 +		/*
 15.1051 +		 * form imm9: [12:6] contain first 7bits
 15.1052 +		 */
 15.1053 +		imm = ld.x << 7 | ld.r1;
 15.1054 +		/*
 15.1055 +		 * sign extend (8bits) if m set
 15.1056 +		 */
 15.1057 +		if (ld.m) imm |= SIGN_EXT9;
 15.1058 +		/*
 15.1059 +		 * ifa == r3 (NaT is necessarily cleared)
 15.1060 +		 */
 15.1061 +		ifa += imm;
 15.1062 +
 15.1063 +		DPRINT("imm=%lx r3=%lx\n", imm, ifa);
 15.1064 +
 15.1065 +		setreg(ld.r3, ifa, 0, regs);
 15.1066 +	}
 15.1067 +	/*
 15.1068 +	 * we don't have alat_invalidate_multiple() so we need
 15.1069 +	 * to do the complete flush :-<<
 15.1070 +	 */
 15.1071 +	ia64_invala();
 15.1072 +
 15.1073 +	/*
 15.1074 +	 * stX.rel: use fence instead of release
 15.1075 +	 */
 15.1076 +	if (ld.x6_op == 0xd)
 15.1077 +		mb();
 15.1078 +
 15.1079 +	return 0;
 15.1080 +}
 15.1081 +
 15.1082 +/*
 15.1083 + * floating point operations sizes in bytes
 15.1084 + */
 15.1085 +static const unsigned char float_fsz[4]={
 15.1086 +	10, /* extended precision (e) */
 15.1087 +	8,  /* integer (8)            */
 15.1088 +	4,  /* single precision (s)   */
 15.1089 +	8   /* double precision (d)   */
 15.1090 +};
 15.1091 +
 15.1092 +static inline void
 15.1093 +mem2float_extended (struct ia64_fpreg *init, struct ia64_fpreg *final)
 15.1094 +{
 15.1095 +	ia64_ldfe(6, init);
 15.1096 +	ia64_stop();
 15.1097 +	ia64_stf_spill(final, 6);
 15.1098 +}
 15.1099 +
 15.1100 +static inline void
 15.1101 +mem2float_integer (struct ia64_fpreg *init, struct ia64_fpreg *final)
 15.1102 +{
 15.1103 +	ia64_ldf8(6, init);
 15.1104 +	ia64_stop();
 15.1105 +	ia64_stf_spill(final, 6);
 15.1106 +}
 15.1107 +
 15.1108 +static inline void
 15.1109 +mem2float_single (struct ia64_fpreg *init, struct ia64_fpreg *final)
 15.1110 +{
 15.1111 +	ia64_ldfs(6, init);
 15.1112 +	ia64_stop();
 15.1113 +	ia64_stf_spill(final, 6);
 15.1114 +}
 15.1115 +
 15.1116 +static inline void
 15.1117 +mem2float_double (struct ia64_fpreg *init, struct ia64_fpreg *final)
 15.1118 +{
 15.1119 +	ia64_ldfd(6, init);
 15.1120 +	ia64_stop();
 15.1121 +	ia64_stf_spill(final, 6);
 15.1122 +}
 15.1123 +
 15.1124 +static inline void
 15.1125 +float2mem_extended (struct ia64_fpreg *init, struct ia64_fpreg *final)
 15.1126 +{
 15.1127 +	ia64_ldf_fill(6, init);
 15.1128 +	ia64_stop();
 15.1129 +	ia64_stfe(final, 6);
 15.1130 +}
 15.1131 +
 15.1132 +static inline void
 15.1133 +float2mem_integer (struct ia64_fpreg *init, struct ia64_fpreg *final)
 15.1134 +{
 15.1135 +	ia64_ldf_fill(6, init);
 15.1136 +	ia64_stop();
 15.1137 +	ia64_stf8(final, 6);
 15.1138 +}
 15.1139 +
 15.1140 +static inline void
 15.1141 +float2mem_single (struct ia64_fpreg *init, struct ia64_fpreg *final)
 15.1142 +{
 15.1143 +	ia64_ldf_fill(6, init);
 15.1144 +	ia64_stop();
 15.1145 +	ia64_stfs(final, 6);
 15.1146 +}
 15.1147 +
 15.1148 +static inline void
 15.1149 +float2mem_double (struct ia64_fpreg *init, struct ia64_fpreg *final)
 15.1150 +{
 15.1151 +	ia64_ldf_fill(6, init);
 15.1152 +	ia64_stop();
 15.1153 +	ia64_stfd(final, 6);
 15.1154 +}
 15.1155 +
 15.1156 +static int
 15.1157 +emulate_load_floatpair (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
 15.1158 +{
 15.1159 +	struct ia64_fpreg fpr_init[2];
 15.1160 +	struct ia64_fpreg fpr_final[2];
 15.1161 +	unsigned long len = float_fsz[ld.x6_sz];
 15.1162 +
 15.1163 +	/*
 15.1164 +	 * fr0 & fr1 don't need to be checked because Illegal Instruction faults have
 15.1165 +	 * higher priority than unaligned faults.
 15.1166 +	 *
 15.1167 +	 * r0 cannot be found as the base as it would never generate an unaligned
 15.1168 +	 * reference.
 15.1169 +	 */
 15.1170 +
 15.1171 +	/*
 15.1172 +	 * make sure we get clean buffers
 15.1173 +	 */
 15.1174 +	memset(&fpr_init, 0, sizeof(fpr_init));
 15.1175 +	memset(&fpr_final, 0, sizeof(fpr_final));
 15.1176 +
 15.1177 +	/*
 15.1178 +	 * ldfpX.a: we don't try to emulate anything but we must
 15.1179 +	 * invalidate the ALAT entry and execute updates, if any.
 15.1180 +	 */
 15.1181 +	if (ld.x6_op != 0x2) {
 15.1182 +		/*
 15.1183 +		 * This assumes little-endian byte-order.  Note that there is no "ldfpe"
 15.1184 +		 * instruction:
 15.1185 +		 */
 15.1186 +		if (copy_from_user(&fpr_init[0], (void __user *) ifa, len)
 15.1187 +		    || copy_from_user(&fpr_init[1], (void __user *) (ifa + len), len))
 15.1188 +			return -1;
 15.1189 +
 15.1190 +		DPRINT("ld.r1=%d ld.imm=%d x6_sz=%d\n", ld.r1, ld.imm, ld.x6_sz);
 15.1191 +		DDUMP("frp_init =", &fpr_init, 2*len);
 15.1192 +		/*
 15.1193 +		 * XXX fixme
 15.1194 +		 * Could optimize inlines by using ldfpX & 2 spills
 15.1195 +		 */
 15.1196 +		switch( ld.x6_sz ) {
 15.1197 +			case 0:
 15.1198 +				mem2float_extended(&fpr_init[0], &fpr_final[0]);
 15.1199 +				mem2float_extended(&fpr_init[1], &fpr_final[1]);
 15.1200 +				break;
 15.1201 +			case 1:
 15.1202 +				mem2float_integer(&fpr_init[0], &fpr_final[0]);
 15.1203 +				mem2float_integer(&fpr_init[1], &fpr_final[1]);
 15.1204 +				break;
 15.1205 +			case 2:
 15.1206 +				mem2float_single(&fpr_init[0], &fpr_final[0]);
 15.1207 +				mem2float_single(&fpr_init[1], &fpr_final[1]);
 15.1208 +				break;
 15.1209 +			case 3:
 15.1210 +				mem2float_double(&fpr_init[0], &fpr_final[0]);
 15.1211 +				mem2float_double(&fpr_init[1], &fpr_final[1]);
 15.1212 +				break;
 15.1213 +		}
 15.1214 +		DDUMP("fpr_final =", &fpr_final, 2*len);
 15.1215 +		/*
 15.1216 +		 * XXX fixme
 15.1217 +		 *
 15.1218 +		 * A possible optimization would be to drop fpr_final and directly
 15.1219 +		 * use the storage from the saved context i.e., the actual final
 15.1220 +		 * destination (pt_regs, switch_stack or thread structure).
 15.1221 +		 */
 15.1222 +		setfpreg(ld.r1, &fpr_final[0], regs);
 15.1223 +		setfpreg(ld.imm, &fpr_final[1], regs);
 15.1224 +	}
 15.1225 +
 15.1226 +	/*
 15.1227 +	 * Check for updates: only immediate updates are available for this
 15.1228 +	 * instruction.
 15.1229 +	 */
 15.1230 +	if (ld.m) {
 15.1231 +		/*
 15.1232 +		 * the immediate is implicit given the ldsz of the operation:
 15.1233 +		 * single: 8 (2x4) and for  all others it's 16 (2x8)
 15.1234 +		 */
 15.1235 +		ifa += len<<1;
 15.1236 +
 15.1237 +		/*
 15.1238 +		 * IMPORTANT:
 15.1239 +		 * the fact that we force the NaT of r3 to zero is ONLY valid
 15.1240 +		 * as long as we don't come here with a ldfpX.s.
 15.1241 +		 * For this reason we keep this sanity check
 15.1242 +		 */
 15.1243 +		if (ld.x6_op == 1 || ld.x6_op == 3)
 15.1244 +			printk(KERN_ERR "%s: register update on speculative load pair, error\n",
 15.1245 +			       __FUNCTION__);
 15.1246 +
 15.1247 +		setreg(ld.r3, ifa, 0, regs);
 15.1248 +	}
 15.1249 +
 15.1250 +	/*
 15.1251 +	 * Invalidate ALAT entries, if any, for both registers.
 15.1252 +	 */
 15.1253 +	if (ld.x6_op == 0x2) {
 15.1254 +		invala_fr(ld.r1);
 15.1255 +		invala_fr(ld.imm);
 15.1256 +	}
 15.1257 +	return 0;
 15.1258 +}
 15.1259 +
 15.1260 +
 15.1261 +static int
 15.1262 +emulate_load_float (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
 15.1263 +{
 15.1264 +	struct ia64_fpreg fpr_init;
 15.1265 +	struct ia64_fpreg fpr_final;
 15.1266 +	unsigned long len = float_fsz[ld.x6_sz];
 15.1267 +
 15.1268 +	/*
 15.1269 +	 * fr0 & fr1 don't need to be checked because Illegal Instruction
 15.1270 +	 * faults have higher priority than unaligned faults.
 15.1271 +	 *
 15.1272 +	 * r0 cannot be found as the base as it would never generate an
 15.1273 +	 * unaligned reference.
 15.1274 +	 */
 15.1275 +
 15.1276 +	/*
 15.1277 +	 * make sure we get clean buffers
 15.1278 +	 */
 15.1279 +	memset(&fpr_init,0, sizeof(fpr_init));
 15.1280 +	memset(&fpr_final,0, sizeof(fpr_final));
 15.1281 +
 15.1282 +	/*
 15.1283 +	 * ldfX.a we don't try to emulate anything but we must
 15.1284 +	 * invalidate the ALAT entry.
 15.1285 +	 * See comments in ldX for descriptions on how the various loads are handled.
 15.1286 +	 */
 15.1287 +	if (ld.x6_op != 0x2) {
 15.1288 +		if (copy_from_user(&fpr_init, (void __user *) ifa, len))
 15.1289 +			return -1;
 15.1290 +
 15.1291 +		DPRINT("ld.r1=%d x6_sz=%d\n", ld.r1, ld.x6_sz);
 15.1292 +		DDUMP("fpr_init =", &fpr_init, len);
 15.1293 +		/*
 15.1294 +		 * we only do something for x6_op={0,8,9}
 15.1295 +		 */
 15.1296 +		switch( ld.x6_sz ) {
 15.1297 +			case 0:
 15.1298 +				mem2float_extended(&fpr_init, &fpr_final);
 15.1299 +				break;
 15.1300 +			case 1:
 15.1301 +				mem2float_integer(&fpr_init, &fpr_final);
 15.1302 +				break;
 15.1303 +			case 2:
 15.1304 +				mem2float_single(&fpr_init, &fpr_final);
 15.1305 +				break;
 15.1306 +			case 3:
 15.1307 +				mem2float_double(&fpr_init, &fpr_final);
 15.1308 +				break;
 15.1309 +		}
 15.1310 +		DDUMP("fpr_final =", &fpr_final, len);
 15.1311 +		/*
 15.1312 +		 * XXX fixme
 15.1313 +		 *
 15.1314 +		 * A possible optimization would be to drop fpr_final and directly
 15.1315 +		 * use the storage from the saved context i.e., the actual final
 15.1316 +		 * destination (pt_regs, switch_stack or thread structure).
 15.1317 +		 */
 15.1318 +		setfpreg(ld.r1, &fpr_final, regs);
 15.1319 +	}
 15.1320 +
 15.1321 +	/*
 15.1322 +	 * check for updates on any loads
 15.1323 +	 */
 15.1324 +	if (ld.op == 0x7 || ld.m)
 15.1325 +		emulate_load_updates(ld.op == 0x7 ? UPD_IMMEDIATE: UPD_REG, ld, regs, ifa);
 15.1326 +
 15.1327 +	/*
 15.1328 +	 * invalidate ALAT entry in case of advanced floating point loads
 15.1329 +	 */
 15.1330 +	if (ld.x6_op == 0x2)
 15.1331 +		invala_fr(ld.r1);
 15.1332 +
 15.1333 +	return 0;
 15.1334 +}
 15.1335 +
 15.1336 +
 15.1337 +static int
 15.1338 +emulate_store_float (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
 15.1339 +{
 15.1340 +	struct ia64_fpreg fpr_init;
 15.1341 +	struct ia64_fpreg fpr_final;
 15.1342 +	unsigned long len = float_fsz[ld.x6_sz];
 15.1343 +
 15.1344 +	/*
 15.1345 +	 * make sure we get clean buffers
 15.1346 +	 */
 15.1347 +	memset(&fpr_init,0, sizeof(fpr_init));
 15.1348 +	memset(&fpr_final,0, sizeof(fpr_final));
 15.1349 +
 15.1350 +	/*
 15.1351 +	 * if we get to this handler, Nat bits on both r3 and r2 have already
 15.1352 +	 * been checked. so we don't need to do it
 15.1353 +	 *
 15.1354 +	 * extract the value to be stored
 15.1355 +	 */
 15.1356 +	getfpreg(ld.imm, &fpr_init, regs);
 15.1357 +	/*
 15.1358 +	 * during this step, we extract the spilled registers from the saved
 15.1359 +	 * context i.e., we refill. Then we store (no spill) to temporary
 15.1360 +	 * aligned location
 15.1361 +	 */
 15.1362 +	switch( ld.x6_sz ) {
 15.1363 +		case 0:
 15.1364 +			float2mem_extended(&fpr_init, &fpr_final);
 15.1365 +			break;
 15.1366 +		case 1:
 15.1367 +			float2mem_integer(&fpr_init, &fpr_final);
 15.1368 +			break;
 15.1369 +		case 2:
 15.1370 +			float2mem_single(&fpr_init, &fpr_final);
 15.1371 +			break;
 15.1372 +		case 3:
 15.1373 +			float2mem_double(&fpr_init, &fpr_final);
 15.1374 +			break;
 15.1375 +	}
 15.1376 +	DPRINT("ld.r1=%d x6_sz=%d\n", ld.r1, ld.x6_sz);
 15.1377 +	DDUMP("fpr_init =", &fpr_init, len);
 15.1378 +	DDUMP("fpr_final =", &fpr_final, len);
 15.1379 +
 15.1380 +	if (copy_to_user((void __user *) ifa, &fpr_final, len))
 15.1381 +		return -1;
 15.1382 +
 15.1383 +	/*
 15.1384 +	 * stfX [r3]=r2,imm(9)
 15.1385 +	 *
 15.1386 +	 * NOTE:
 15.1387 +	 * ld.r3 can never be r0, because r0 would not generate an
 15.1388 +	 * unaligned access.
 15.1389 +	 */
 15.1390 +	if (ld.op == 0x7) {
 15.1391 +		unsigned long imm;
 15.1392 +
 15.1393 +		/*
 15.1394 +		 * form imm9: [12:6] contain first 7bits
 15.1395 +		 */
 15.1396 +		imm = ld.x << 7 | ld.r1;
 15.1397 +		/*
 15.1398 +		 * sign extend (8bits) if m set
 15.1399 +		 */
 15.1400 +		if (ld.m)
 15.1401 +			imm |= SIGN_EXT9;
 15.1402 +		/*
 15.1403 +		 * ifa == r3 (NaT is necessarily cleared)
 15.1404 +		 */
 15.1405 +		ifa += imm;
 15.1406 +
 15.1407 +		DPRINT("imm=%lx r3=%lx\n", imm, ifa);
 15.1408 +
 15.1409 +		setreg(ld.r3, ifa, 0, regs);
 15.1410 +	}
 15.1411 +	/*
 15.1412 +	 * we don't have alat_invalidate_multiple() so we need
 15.1413 +	 * to do the complete flush :-<<
 15.1414 +	 */
 15.1415 +	ia64_invala();
 15.1416 +
 15.1417 +	return 0;
 15.1418 +}
 15.1419 +
 15.1420 +/*
 15.1421 + * Make sure we log the unaligned access, so that user/sysadmin can notice it and
 15.1422 + * eventually fix the program.  However, we don't want to do that for every access so we
 15.1423 + * pace it with jiffies.  This isn't really MP-safe, but it doesn't really have to be
 15.1424 + * either...
 15.1425 + */
 15.1426 +static int
 15.1427 +within_logging_rate_limit (void)
 15.1428 +{
 15.1429 +	static unsigned long count, last_time;
 15.1430 +
 15.1431 +	if (jiffies - last_time > 5*HZ)
 15.1432 +		count = 0;
 15.1433 +	if (++count < 5) {
 15.1434 +		last_time = jiffies;
 15.1435 +		return 1;
 15.1436 +	}
 15.1437 +	return 0;
 15.1438 +
 15.1439 +}
 15.1440 +
 15.1441 +void
 15.1442 +ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs)
 15.1443 +{
 15.1444 +#ifdef XEN
 15.1445 +printk("ia64_handle_unaligned: called, not working yet\n");
 15.1446 +#else
 15.1447 +	struct ia64_psr *ipsr = ia64_psr(regs);
 15.1448 +	mm_segment_t old_fs = get_fs();
 15.1449 +	unsigned long bundle[2];
 15.1450 +	unsigned long opcode;
 15.1451 +	struct siginfo si;
 15.1452 +	const struct exception_table_entry *eh = NULL;
 15.1453 +	union {
 15.1454 +		unsigned long l;
 15.1455 +		load_store_t insn;
 15.1456 +	} u;
 15.1457 +	int ret = -1;
 15.1458 +
 15.1459 +	if (ia64_psr(regs)->be) {
 15.1460 +		/* we don't support big-endian accesses */
 15.1461 +		die_if_kernel("big-endian unaligned accesses are not supported", regs, 0);
 15.1462 +		goto force_sigbus;
 15.1463 +	}
 15.1464 +
 15.1465 +	/*
 15.1466 +	 * Treat kernel accesses for which there is an exception handler entry the same as
 15.1467 +	 * user-level unaligned accesses.  Otherwise, a clever program could trick this
 15.1468 +	 * handler into reading an arbitrary kernel addresses...
 15.1469 +	 */
 15.1470 +	if (!user_mode(regs))
 15.1471 +		eh = search_exception_tables(regs->cr_iip + ia64_psr(regs)->ri);
 15.1472 +	if (user_mode(regs) || eh) {
 15.1473 +		if ((current->thread.flags & IA64_THREAD_UAC_SIGBUS) != 0)
 15.1474 +			goto force_sigbus;
 15.1475 +
 15.1476 +		if (!(current->thread.flags & IA64_THREAD_UAC_NOPRINT)
 15.1477 +		    && within_logging_rate_limit())
 15.1478 +		{
 15.1479 +			char buf[200];	/* comm[] is at most 16 bytes... */
 15.1480 +			size_t len;
 15.1481 +
 15.1482 +			len = sprintf(buf, "%s(%d): unaligned access to 0x%016lx, "
 15.1483 +				      "ip=0x%016lx\n\r", current->comm, current->pid,
 15.1484 +				      ifa, regs->cr_iip + ipsr->ri);
 15.1485 +			/*
 15.1486 +			 * Don't call tty_write_message() if we're in the kernel; we might
 15.1487 +			 * be holding locks...
 15.1488 +			 */
 15.1489 +			if (user_mode(regs))
 15.1490 +				tty_write_message(current->signal->tty, buf);
 15.1491 +			buf[len-1] = '\0';	/* drop '\r' */
 15.1492 +			printk(KERN_WARNING "%s", buf);	/* watch for command names containing %s */
 15.1493 +		}
 15.1494 +	} else {
 15.1495 +		if (within_logging_rate_limit())
 15.1496 +			printk(KERN_WARNING "kernel unaligned access to 0x%016lx, ip=0x%016lx\n",
 15.1497 +			       ifa, regs->cr_iip + ipsr->ri);
 15.1498 +		set_fs(KERNEL_DS);
 15.1499 +	}
 15.1500 +
 15.1501 +	DPRINT("iip=%lx ifa=%lx isr=%lx (ei=%d, sp=%d)\n",
 15.1502 +	       regs->cr_iip, ifa, regs->cr_ipsr, ipsr->ri, ipsr->it);
 15.1503 +
 15.1504 +	if (__copy_from_user(bundle, (void __user *) regs->cr_iip, 16))
 15.1505 +		goto failure;
 15.1506 +
 15.1507 +	/*
 15.1508 +	 * extract the instruction from the bundle given the slot number
 15.1509 +	 */
 15.1510 +	switch (ipsr->ri) {
 15.1511 +	      case 0: u.l = (bundle[0] >>  5); break;
 15.1512 +	      case 1: u.l = (bundle[0] >> 46) | (bundle[1] << 18); break;
 15.1513 +	      case 2: u.l = (bundle[1] >> 23); break;
 15.1514 +	}
 15.1515 +	opcode = (u.l >> IA64_OPCODE_SHIFT) & IA64_OPCODE_MASK;
 15.1516 +
 15.1517 +	DPRINT("opcode=%lx ld.qp=%d ld.r1=%d ld.imm=%d ld.r3=%d ld.x=%d ld.hint=%d "
 15.1518 +	       "ld.x6=0x%x ld.m=%d ld.op=%d\n", opcode, u.insn.qp, u.insn.r1, u.insn.imm,
 15.1519 +	       u.insn.r3, u.insn.x, u.insn.hint, u.insn.x6_sz, u.insn.m, u.insn.op);
 15.1520 +
 15.1521 +	/*
 15.1522 +	 * IMPORTANT:
 15.1523 +	 * Notice that the switch statement DOES not cover all possible instructions
 15.1524 +	 * that DO generate unaligned references. This is made on purpose because for some
 15.1525 +	 * instructions it DOES NOT make sense to try and emulate the access. Sometimes it
 15.1526 +	 * is WRONG to try and emulate. Here is a list of instruction we don't emulate i.e.,
 15.1527 +	 * the program will get a signal and die:
 15.1528 +	 *
 15.1529 +	 *	load/store:
 15.1530 +	 *		- ldX.spill
 15.1531 +	 *		- stX.spill
 15.1532 +	 *	Reason: RNATs are based on addresses
 15.1533 +	 *
 15.1534 +	 *	synchronization:
 15.1535 +	 *		- cmpxchg
 15.1536 +	 *		- fetchadd
 15.1537 +	 *		- xchg
 15.1538 +	 *	Reason: ATOMIC operations cannot be emulated properly using multiple
 15.1539 +	 *	        instructions.
 15.1540 +	 *
 15.1541 +	 *	speculative loads:
 15.1542 +	 *		- ldX.sZ
 15.1543 +	 *	Reason: side effects, code must be ready to deal with failure so simpler
 15.1544 +	 *		to let the load fail.
 15.1545 +	 * ---------------------------------------------------------------------------------
 15.1546 +	 * XXX fixme
 15.1547 +	 *
 15.1548 +	 * I would like to get rid of this switch case and do something
 15.1549 +	 * more elegant.
 15.1550 +	 */
 15.1551 +	switch (opcode) {
 15.1552 +	      case LDS_OP:
 15.1553 +	      case LDSA_OP:
 15.1554 +	      case LDS_IMM_OP:
 15.1555 +	      case LDSA_IMM_OP:
 15.1556 +	      case LDFS_OP:
 15.1557 +	      case LDFSA_OP:
 15.1558 +	      case LDFS_IMM_OP:
 15.1559 +		/*
 15.1560 +		 * The instruction will be retried with deferred exceptions turned on, and
 15.1561 +		 * we should get Nat bit installed
 15.1562 +		 *
 15.1563 +		 * IMPORTANT: When PSR_ED is set, the register & immediate update forms
 15.1564 +		 * are actually executed even though the operation failed. So we don't
 15.1565 +		 * need to take care of this.
 15.1566 +		 */
 15.1567 +		DPRINT("forcing PSR_ED\n");
 15.1568 +		regs->cr_ipsr |= IA64_PSR_ED;
 15.1569 +		goto done;
 15.1570 +
 15.1571 +	      case LD_OP:
 15.1572 +	      case LDA_OP:
 15.1573 +	      case LDBIAS_OP:
 15.1574 +	      case LDACQ_OP:
 15.1575 +	      case LDCCLR_OP:
 15.1576 +	      case LDCNC_OP:
 15.1577 +	      case LDCCLRACQ_OP:
 15.1578 +	      case LD_IMM_OP:
 15.1579 +	      case LDA_IMM_OP:
 15.1580 +	      case LDBIAS_IMM_OP:
 15.1581 +	      case LDACQ_IMM_OP:
 15.1582 +	      case LDCCLR_IMM_OP:
 15.1583 +	      case LDCNC_IMM_OP:
 15.1584 +	      case LDCCLRACQ_IMM_OP:
 15.1585 +		ret = emulate_load_int(ifa, u.insn, regs);
 15.1586 +		break;
 15.1587 +
 15.1588 +	      case ST_OP:
 15.1589 +	      case STREL_OP:
 15.1590 +	      case ST_IMM_OP:
 15.1591 +	      case STREL_IMM_OP:
 15.1592 +		ret = emulate_store_int(ifa, u.insn, regs);
 15.1593 +		break;
 15.1594 +
 15.1595 +	      case LDF_OP:
 15.1596 +	      case LDFA_OP:
 15.1597 +	      case LDFCCLR_OP:
 15.1598 +	      case LDFCNC_OP:
 15.1599 +	      case LDF_IMM_OP:
 15.1600 +	      case LDFA_IMM_OP:
 15.1601 +	      case LDFCCLR_IMM_OP:
 15.1602 +	      case LDFCNC_IMM_OP:
 15.1603 +		if (u.insn.x)
 15.1604 +			ret = emulate_load_floatpair(ifa, u.insn, regs);
 15.1605 +		else
 15.1606 +			ret = emulate_load_float(ifa, u.insn, regs);
 15.1607 +		break;
 15.1608 +
 15.1609 +	      case STF_OP:
 15.1610 +	      case STF_IMM_OP:
 15.1611 +		ret = emulate_store_float(ifa, u.insn, regs);
 15.1612 +		break;
 15.1613 +
 15.1614 +	      default:
 15.1615 +		goto failure;
 15.1616 +	}
 15.1617 +	DPRINT("ret=%d\n", ret);
 15.1618 +	if (ret)
 15.1619 +		goto failure;
 15.1620 +
 15.1621 +	if (ipsr->ri == 2)
 15.1622 +		/*
 15.1623 +		 * given today's architecture this case is not likely to happen because a
 15.1624 +		 * memory access instruction (M) can never be in the last slot of a
 15.1625 +		 * bundle. But let's keep it for now.
 15.1626 +		 */
 15.1627 +		regs->cr_iip += 16;
 15.1628 +	ipsr->ri = (ipsr->ri + 1) & 0x3;
 15.1629 +
 15.1630 +	DPRINT("ipsr->ri=%d iip=%lx\n", ipsr->ri, regs->cr_iip);
 15.1631 +  done:
 15.1632 +	set_fs(old_fs);		/* restore original address limit */
 15.1633 +	return;
 15.1634 +
 15.1635 +  failure:
 15.1636 +	/* something went wrong... */
 15.1637 +	if (!user_mode(regs)) {
 15.1638 +		if (eh) {
 15.1639 +			ia64_handle_exception(regs, eh);
 15.1640 +			goto done;
 15.1641 +		}
 15.1642 +		die_if_kernel("error during unaligned kernel access\n", regs, ret);
 15.1643 +		/* NOT_REACHED */
 15.1644 +	}
 15.1645 +  force_sigbus:
 15.1646 +	si.si_signo = SIGBUS;
 15.1647 +	si.si_errno = 0;
 15.1648 +	si.si_code = BUS_ADRALN;
 15.1649 +	si.si_addr = (void __user *) ifa;
 15.1650 +	si.si_flags = 0;
 15.1651 +	si.si_isr = 0;
 15.1652 +	si.si_imm = 0;
 15.1653 +	force_sig_info(SIGBUS, &si, current);
 15.1654 +	goto done;
 15.1655 +#endif
 15.1656 +}
    16.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    16.2 +++ b/xen/arch/ia64/linux/cmdline.c	Tue Aug 16 12:15:23 2005 +0800
    16.3 @@ -0,0 +1,120 @@
    16.4 +/*
    16.5 + * linux/lib/cmdline.c
    16.6 + * Helper functions generally used for parsing kernel command line
    16.7 + * and module options.
    16.8 + *
    16.9 + * Code and copyrights come from init/main.c and arch/i386/kernel/setup.c.
   16.10 + *
   16.11 + * This source code is licensed under the GNU General Public License,
   16.12 + * Version 2.  See the file COPYING for more details.
   16.13 + *
   16.14 + * GNU Indent formatting options for this file: -kr -i8 -npsl -pcs
   16.15 + *
   16.16 + */
   16.17 +
   16.18 +#include <linux/module.h>
   16.19 +#include <linux/kernel.h>
   16.20 +#include <linux/string.h>
   16.21 +
   16.22 +
   16.23 +/**
   16.24 + *	get_option - Parse integer from an option string
   16.25 + *	@str: option string
   16.26 + *	@pint: (output) integer value parsed from @str
   16.27 + *
   16.28 + *	Read an int from an option string; if available accept a subsequent
   16.29 + *	comma as well.
   16.30 + *
   16.31 + *	Return values:
   16.32 + *	0 : no int in string
   16.33 + *	1 : int found, no subsequent comma
   16.34 + *	2 : int found including a subsequent comma
   16.35 + */
   16.36 +
   16.37 +int get_option (char **str, int *pint)
   16.38 +{
   16.39 +	char *cur = *str;
   16.40 +
   16.41 +	if (!cur || !(*cur))
   16.42 +		return 0;
   16.43 +	*pint = simple_strtol (cur, str, 0);
   16.44 +	if (cur == *str)
   16.45 +		return 0;
   16.46 +	if (**str == ',') {
   16.47 +		(*str)++;
   16.48 +		return 2;
   16.49 +	}
   16.50 +
   16.51 +	return 1;
   16.52 +}
   16.53 +
   16.54 +/**
   16.55 + *	get_options - Parse a string into a list of integers
   16.56 + *	@str: String to be parsed
   16.57 + *	@nints: size of integer array
   16.58 + *	@ints: integer array
   16.59 + *
   16.60 + *	This function parses a string containing a comma-separated
   16.61 + *	list of integers.  The parse halts when the array is
   16.62 + *	full, or when no more numbers can be retrieved from the
   16.63 + *	string.
   16.64 + *
   16.65 + *	Return value is the character in the string which caused
   16.66 + *	the parse to end (typically a null terminator, if @str is
   16.67 + *	completely parseable).
   16.68 + */
   16.69 + 
   16.70 +char *get_options(const char *str, int nints, int *ints)
   16.71 +{
   16.72 +	int res, i = 1;
   16.73 +
   16.74 +	while (i < nints) {
   16.75 +		res = get_option ((char **)&str, ints + i);
   16.76 +		if (res == 0)
   16.77 +			break;
   16.78 +		i++;
   16.79 +		if (res == 1)
   16.80 +			break;
   16.81 +	}
   16.82 +	ints[0] = i - 1;
   16.83 +	return (char *)str;
   16.84 +}
   16.85 +
   16.86 +/**
   16.87 + *	memparse - parse a string with mem suffixes into a number
   16.88 + *	@ptr: Where parse begins
   16.89 + *	@retptr: (output) Pointer to next char after parse completes
   16.90 + *
   16.91 + *	Parses a string into a number.  The number stored at @ptr is
   16.92 + *	potentially suffixed with %K (for kilobytes, or 1024 bytes),
   16.93 + *	%M (for megabytes, or 1048576 bytes), or %G (for gigabytes, or
   16.94 + *	1073741824).  If the number is suffixed with K, M, or G, then
   16.95 + *	the return value is the number multiplied by one kilobyte, one
   16.96 + *	megabyte, or one gigabyte, respectively.
   16.97 + */
   16.98 +
   16.99 +unsigned long long memparse (char *ptr, char **retptr)
  16.100 +{
  16.101 +	unsigned long long ret = simple_strtoull (ptr, retptr, 0);
  16.102 +
  16.103 +	switch (**retptr) {
  16.104 +	case 'G':
  16.105 +	case 'g':
  16.106 +		ret <<= 10;
  16.107 +	case 'M':
  16.108 +	case 'm':
  16.109 +		ret <<= 10;
  16.110 +	case 'K':
  16.111 +	case 'k':
  16.112 +		ret <<= 10;
  16.113 +		(*retptr)++;
  16.114 +	default:
  16.115 +		break;
  16.116 +	}
  16.117 +	return ret;
  16.118 +}
  16.119 +
  16.120 +
  16.121 +EXPORT_SYMBOL(memparse);
  16.122 +EXPORT_SYMBOL(get_option);
  16.123 +EXPORT_SYMBOL(get_options);
    17.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    17.2 +++ b/xen/arch/ia64/linux/efi_stub.S	Tue Aug 16 12:15:23 2005 +0800
    17.3 @@ -0,0 +1,86 @@
    17.4 +/*
    17.5 + * EFI call stub.
    17.6 + *
    17.7 + * Copyright (C) 1999-2001 Hewlett-Packard Co
    17.8 + *	David Mosberger <davidm@hpl.hp.com>
    17.9 + *
   17.10 + * This stub allows us to make EFI calls in physical mode with interrupts
   17.11 + * turned off.  We need this because we can't call SetVirtualMap() until
   17.12 + * the kernel has booted far enough to allow allocation of struct vma_struct
   17.13 + * entries (which we would need to map stuff with memory attributes other
   17.14 + * than uncached or writeback...).  Since the GetTime() service gets called
   17.15 + * earlier than that, we need to be able to make physical mode EFI calls from
   17.16 + * the kernel.
   17.17 + */
   17.18 +
   17.19 +/*
   17.20 + * PSR settings as per SAL spec (Chapter 8 in the "IA-64 System
   17.21 + * Abstraction Layer Specification", revision 2.6e).  Note that
   17.22 + * psr.dfl and psr.dfh MUST be cleared, despite what this manual says.
   17.23 + * Otherwise, SAL dies whenever it's trying to do an IA-32 BIOS call
   17.24 + * (the br.ia instruction fails unless psr.dfl and psr.dfh are
   17.25 + * cleared).  Fortunately, SAL promises not to touch the floating
   17.26 + * point regs, so at least we don't have to save f2-f127.
   17.27 + */
   17.28 +#define PSR_BITS_TO_CLEAR						\
   17.29 +	(IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT | IA64_PSR_RT |		\
   17.30 +	 IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED |	\
   17.31 +	 IA64_PSR_DFL | IA64_PSR_DFH)
   17.32 +
   17.33 +#define PSR_BITS_TO_SET							\
   17.34 +	(IA64_PSR_BN)
   17.35 +
   17.36 +#include <asm/processor.h>
   17.37 +#include <asm/asmmacro.h>
   17.38 +
   17.39 +/*
   17.40 + * Inputs:
   17.41 + *	in0 = address of function descriptor of EFI routine to call
   17.42 + *	in1..in7 = arguments to routine
   17.43 + *
   17.44 + * Outputs:
   17.45 + *	r8 = EFI_STATUS returned by called function
   17.46 + */
   17.47 +
   17.48 +GLOBAL_ENTRY(efi_call_phys)
   17.49 +	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
   17.50 +	alloc loc1=ar.pfs,8,7,7,0
   17.51 +	ld8 r2=[in0],8			// load EFI function's entry point
   17.52 +	mov loc0=rp
   17.53 +	.body
   17.54 +	;;
   17.55 +	mov loc2=gp			// save global pointer
   17.56 +	mov loc4=ar.rsc			// save RSE configuration
   17.57 +	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
   17.58 +	;;
   17.59 +	ld8 gp=[in0]			// load EFI function's global pointer
   17.60 +	movl r16=PSR_BITS_TO_CLEAR
   17.61 +	mov loc3=psr			// save processor status word
   17.62 +	movl r17=PSR_BITS_TO_SET
   17.63 +	;;
   17.64 +	or loc3=loc3,r17
   17.65 +	mov b6=r2
   17.66 +	;;
   17.67 +	andcm r16=loc3,r16		// get psr with IT, DT, and RT bits cleared
   17.68 +	br.call.sptk.many rp=ia64_switch_mode_phys
   17.69 +.ret0:	mov out4=in5
   17.70 +	mov out0=in1
   17.71 +	mov out1=in2
   17.72 +	mov out2=in3
   17.73 +	mov out3=in4
   17.74 +	mov out5=in6
   17.75 +	mov out6=in7
   17.76 +	mov loc5=r19
   17.77 +	mov loc6=r20
   17.78 +	br.call.sptk.many rp=b6		// call the EFI function
   17.79 +.ret1:	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
   17.80 +	mov r16=loc3
   17.81 +	mov r19=loc5
   17.82 +	mov r20=loc6
   17.83 +	br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode
   17.84 +.ret2:	mov ar.rsc=loc4			// restore RSE configuration
   17.85 +	mov ar.pfs=loc1
   17.86 +	mov rp=loc0
   17.87 +	mov gp=loc2
   17.88 +	br.ret.sptk.many rp
   17.89 +END(efi_call_phys)
    18.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    18.2 +++ b/xen/arch/ia64/linux/extable.c	Tue Aug 16 12:15:23 2005 +0800
    18.3 @@ -0,0 +1,93 @@
    18.4 +/*
    18.5 + * Kernel exception handling table support.  Derived from arch/alpha/mm/extable.c.
    18.6 + *
    18.7 + * Copyright (C) 1998, 1999, 2001-2002, 2004 Hewlett-Packard Co
    18.8 + *	David Mosberger-Tang <davidm@hpl.hp.com>
    18.9 + */
   18.10 +
   18.11 +#include <linux/config.h>
   18.12 +
   18.13 +#include <asm/uaccess.h>
   18.14 +#include <asm/module.h>
   18.15 +
   18.16 +static inline int
   18.17 +compare_entries (struct exception_table_entry *l, struct exception_table_entry *r)
   18.18 +{
   18.19 +	u64 lip = (u64) &l->addr + l->addr;
   18.20 +	u64 rip = (u64) &r->addr + r->addr;
   18.21 +
   18.22 +	if (lip < rip)
   18.23 +		return -1;
   18.24 +	if (lip == rip)
   18.25 +		return 0;
   18.26 +	else
   18.27 +		return 1;
   18.28 +}
   18.29 +
   18.30 +static inline void
   18.31 +swap_entries (struct exception_table_entry *l, struct exception_table_entry *r)
   18.32 +{
   18.33 +	u64 delta = (u64) r - (u64) l;
   18.34 +	struct exception_table_entry tmp;
   18.35 +
   18.36 +	tmp = *l;
   18.37 +	l->addr = r->addr + delta;
   18.38 +	l->cont = r->cont + delta;
   18.39 +	r->addr = tmp.addr - delta;
   18.40 +	r->cont = tmp.cont - delta;
   18.41 +}
   18.42 +
   18.43 +/*
   18.44 + * Sort the exception table.  It's usually already sorted, but there may be unordered
   18.45 + * entries due to multiple text sections (such as the .init text section).  Note that the
   18.46 + * exception-table-entries contain location-relative addresses, which requires a bit of
   18.47 + * care during sorting to avoid overflows in the offset members (e.g., it would not be
   18.48 + * safe to make a temporary copy of an exception-table entry on the stack, because the
   18.49 + * stack may be more than 2GB away from the exception-table).
   18.50 + */
   18.51 +void
   18.52 +sort_extable (struct exception_table_entry *start, struct exception_table_entry *finish)
   18.53 +{
   18.54 +	struct exception_table_entry *p, *q;
   18.55 +
   18.56 + 	/* insertion sort */
   18.57 +	for (p = start + 1; p < finish; ++p)
   18.58 +		/* start .. p-1 is sorted; push p down to it's proper place */
   18.59 +		for (q = p; q > start && compare_entries(&q[0], &q[-1]) < 0; --q)
   18.60 +			swap_entries(&q[0], &q[-1]);
   18.61 +}
   18.62 +
   18.63 +const struct exception_table_entry *
   18.64 +search_extable (const struct exception_table_entry *first,
   18.65 +		const struct exception_table_entry *last,
   18.66 +		unsigned long ip)
   18.67 +{
   18.68 +	const struct exception_table_entry *mid;
   18.69 +	unsigned long mid_ip;
   18.70 +	long diff;
   18.71 +
   18.72 +        while (first <= last) {
   18.73 +		mid = &first[(last - first)/2];
   18.74 +		mid_ip = (u64) &mid->addr + mid->addr;
   18.75 +		diff = mid_ip - ip;
   18.76 +                if (diff == 0)
   18.77 +                        return mid;
   18.78 +                else if (diff < 0)
   18.79 +                        first = mid + 1;
   18.80 +                else
   18.81 +                        last = mid - 1;
   18.82 +        }
   18.83 +        return NULL;
   18.84 +}
   18.85 +
   18.86 +void
   18.87 +ia64_handle_exception (struct pt_regs *regs, const struct exception_table_entry *e)
   18.88 +{
   18.89 +	long fix = (u64) &e->cont + e->cont;
   18.90 +
   18.91 +	regs->r8 = -EFAULT;
   18.92 +	if (fix & 4)
   18.93 +		regs->r9 = 0;
   18.94 +	regs->cr_iip = fix & ~0xf;
   18.95 +	ia64_psr(regs)->ri = fix & 0x3;		/* set continuation slot number */
   18.96 +}
    19.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    19.2 +++ b/xen/arch/ia64/linux/hpsim.S	Tue Aug 16 12:15:23 2005 +0800
    19.3 @@ -0,0 +1,10 @@
    19.4 +#include <asm/asmmacro.h>
    19.5 +
    19.6 +/*
    19.7 + * Simulator system call.
    19.8 + */
    19.9 +GLOBAL_ENTRY(ia64_ssc)
   19.10 +	mov r15=r36
   19.11 +	break 0x80001
   19.12 +	br.ret.sptk.many rp
   19.13 +END(ia64_ssc)
    20.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    20.2 +++ b/xen/arch/ia64/linux/ia64_ksyms.c	Tue Aug 16 12:15:23 2005 +0800
    20.3 @@ -0,0 +1,127 @@
    20.4 +/*
    20.5 + * Architecture-specific kernel symbols
    20.6 + *
    20.7 + * Don't put any exports here unless it's defined in an assembler file.
    20.8 + * All other exports should be put directly after the definition.
    20.9 + */
   20.10 +
   20.11 +#include <linux/config.h>
   20.12 +#include <linux/module.h>
   20.13 +
   20.14 +#include <linux/string.h>
   20.15 +EXPORT_SYMBOL(memset);
   20.16 +EXPORT_SYMBOL(memchr);
   20.17 +EXPORT_SYMBOL(memcmp);
   20.18 +EXPORT_SYMBOL(memcpy);
   20.19 +EXPORT_SYMBOL(memmove);
   20.20 +EXPORT_SYMBOL(memscan);
   20.21 +EXPORT_SYMBOL(strcat);
   20.22 +EXPORT_SYMBOL(strchr);
   20.23 +EXPORT_SYMBOL(strcmp);
   20.24 +EXPORT_SYMBOL(strcpy);
   20.25 +EXPORT_SYMBOL(strlen);
   20.26 +EXPORT_SYMBOL(strncat);
   20.27 +EXPORT_SYMBOL(strncmp);
   20.28 +EXPORT_SYMBOL(strncpy);
   20.29 +EXPORT_SYMBOL(strnlen);
   20.30 +EXPORT_SYMBOL(strrchr);
   20.31 +EXPORT_SYMBOL(strstr);
   20.32 +EXPORT_SYMBOL(strpbrk);
   20.33 +
   20.34 +#include <asm/checksum.h>
   20.35 +EXPORT_SYMBOL(ip_fast_csum);		/* hand-coded assembly */
   20.36 +
   20.37 +#include <asm/semaphore.h>
   20.38 +EXPORT_SYMBOL(__down);
   20.39 +EXPORT_SYMBOL(__down_interruptible);
   20.40 +EXPORT_SYMBOL(__down_trylock);
   20.41 +EXPORT_SYMBOL(__up);
   20.42 +
   20.43 +#include <asm/page.h>
   20.44 +EXPORT_SYMBOL(clear_page);
   20.45 +
   20.46 +#ifdef CONFIG_VIRTUAL_MEM_MAP
   20.47 +#include <linux/bootmem.h>
   20.48 +EXPORT_SYMBOL(max_low_pfn);	/* defined by bootmem.c, but not exported by generic code */
   20.49 +#endif
   20.50 +
   20.51 +#include <asm/processor.h>
   20.52 +EXPORT_SYMBOL(per_cpu__cpu_info);
   20.53 +#ifdef CONFIG_SMP
   20.54 +EXPORT_SYMBOL(per_cpu__local_per_cpu_offset);
   20.55 +#endif
   20.56 +
   20.57 +#include <asm/uaccess.h>
   20.58 +EXPORT_SYMBOL(__copy_user);
   20.59 +EXPORT_SYMBOL(__do_clear_user);
   20.60 +EXPORT_SYMBOL(__strlen_user);
   20.61 +EXPORT_SYMBOL(__strncpy_from_user);
   20.62 +EXPORT_SYMBOL(__strnlen_user);
   20.63 +
   20.64 +#include <asm/unistd.h>
   20.65 +EXPORT_SYMBOL(__ia64_syscall);
   20.66 +
   20.67 +/* from arch/ia64/lib */
   20.68 +extern void __divsi3(void);
   20.69 +extern void __udivsi3(void);
   20.70 +extern void __modsi3(void);
   20.71 +extern void __umodsi3(void);
   20.72 +extern void __divdi3(void);
   20.73 +extern void __udivdi3(void);
   20.74 +extern void __moddi3(void);
   20.75 +extern void __umoddi3(void);
   20.76 +
   20.77 +EXPORT_SYMBOL(__divsi3);
   20.78 +EXPORT_SYMBOL(__udivsi3);
   20.79 +EXPORT_SYMBOL(__modsi3);
   20.80 +EXPORT_SYMBOL(__umodsi3);
   20.81 +EXPORT_SYMBOL(__divdi3);
   20.82 +EXPORT_SYMBOL(__udivdi3);
   20.83 +EXPORT_SYMBOL(__moddi3);
   20.84 +EXPORT_SYMBOL(__umoddi3);
   20.85 +
   20.86 +#if defined(CONFIG_MD_RAID5) || defined(CONFIG_MD_RAID5_MODULE)
   20.87 +extern void xor_ia64_2(void);
   20.88 +extern void xor_ia64_3(void);
   20.89 +extern void xor_ia64_4(void);
   20.90 +extern void xor_ia64_5(void);
   20.91 +
   20.92 +EXPORT_SYMBOL(xor_ia64_2);
   20.93 +EXPORT_SYMBOL(xor_ia64_3);
   20.94 +EXPORT_SYMBOL(xor_ia64_4);
   20.95 +EXPORT_SYMBOL(xor_ia64_5);
   20.96 +#endif
   20.97 +
   20.98 +#include <asm/pal.h>
   20.99 +EXPORT_SYMBOL(ia64_pal_call_phys_stacked);
  20.100 +EXPORT_SYMBOL(ia64_pal_call_phys_static);
  20.101 +EXPORT_SYMBOL(ia64_pal_call_stacked);
  20.102 +EXPORT_SYMBOL(ia64_pal_call_static);
  20.103 +EXPORT_SYMBOL(ia64_load_scratch_fpregs);
  20.104 +EXPORT_SYMBOL(ia64_save_scratch_fpregs);
  20.105 +
  20.106 +#include <asm/unwind.h>
  20.107 +EXPORT_SYMBOL(unw_init_running);
  20.108 +
  20.109 +#ifdef ASM_SUPPORTED
  20.110 +# ifdef CONFIG_SMP
  20.111 +#  if __GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 3)
  20.112 +/*
  20.113 + * This is not a normal routine and we don't want a function descriptor for it, so we use
  20.114 + * a fake declaration here.
  20.115 + */
  20.116 +extern char ia64_spinlock_contention_pre3_4;
  20.117 +EXPORT_SYMBOL(ia64_spinlock_contention_pre3_4);
  20.118 +#  else
  20.119 +/*
  20.120 + * This is not a normal routine and we don't want a function descriptor for it, so we use
  20.121 + * a fake declaration here.
  20.122 + */
  20.123 +extern char ia64_spinlock_contention;
  20.124 +EXPORT_SYMBOL(ia64_spinlock_contention);
  20.125 +#  endif
  20.126 +# endif
  20.127 +#endif
  20.128 +
  20.129 +extern char ia64_ivt[];
  20.130 +EXPORT_SYMBOL(ia64_ivt);
    21.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    21.2 +++ b/xen/arch/ia64/linux/irq_lsapic.c	Tue Aug 16 12:15:23 2005 +0800
    21.3 @@ -0,0 +1,37 @@
    21.4 +/*
    21.5 + * LSAPIC Interrupt Controller
    21.6 + *
    21.7 + * This takes care of interrupts that are generated by the CPU's
    21.8 + * internal Streamlined Advanced Programmable Interrupt Controller
    21.9 + * (LSAPIC), such as the ITC and IPI interrupts.
   21.10 +    *
   21.11 + * Copyright (C) 1999 VA Linux Systems
   21.12 + * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
   21.13 + * Copyright (C) 2000 Hewlett-Packard Co
   21.14 + * Copyright (C) 2000 David Mosberger-Tang <davidm@hpl.hp.com>
   21.15 + */
   21.16 +
   21.17 +#include <linux/sched.h>
   21.18 +#include <linux/irq.h>
   21.19 +
   21.20 +static unsigned int
   21.21 +lsapic_noop_startup (unsigned int irq)
   21.22 +{
   21.23 +	return 0;
   21.24 +}
   21.25 +
   21.26 +static void
   21.27 +lsapic_noop (unsigned int irq)
   21.28 +{
   21.29 +	/* nuthing to do... */
   21.30 +}
   21.31 +
   21.32 +struct hw_interrupt_type irq_type_ia64_lsapic = {
   21.33 +	.typename =	"LSAPIC",
   21.34 +	.startup =	lsapic_noop_startup,
   21.35 +	.shutdown =	lsapic_noop,
   21.36 +	.enable =	lsapic_noop,
   21.37 +	.disable =	lsapic_noop,
   21.38 +	.ack =		lsapic_noop,
   21.39 +	.end =		lsapic_noop
   21.40 +};
    22.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    22.2 +++ b/xen/arch/ia64/linux/lib/Makefile	Tue Aug 16 12:15:23 2005 +0800
    22.3 @@ -0,0 +1,44 @@
    22.4 +#
    22.5 +# Makefile for ia64-specific library routines..
    22.6 +#
    22.7 +
    22.8 +include $(BASEDIR)/Rules.mk
    22.9 +
   22.10 +OBJS := __divsi3.o __udivsi3.o __modsi3.o __umodsi3.o			\
   22.11 +	__divdi3.o __udivdi3.o __moddi3.o __umoddi3.o			\
   22.12 +	bitop.o checksum.o clear_page.o csum_partial_copy.o copy_page.o	\
   22.13 +	clear_user.o strncpy_from_user.o strlen_user.o strnlen_user.o	\
   22.14 +	flush.o ip_fast_csum.o do_csum.o copy_user.o			\
   22.15 +	memset.o strlen.o memcpy.o 
   22.16 +
   22.17 +default: $(OBJS)
   22.18 +	$(LD) -r -o ia64lib.o $(OBJS)
   22.19 +
   22.20 +AFLAGS += -I$(BASEDIR)/include -D__ASSEMBLY__
   22.21 +
   22.22 +__divdi3.o: idiv64.S
   22.23 +	$(CC) $(AFLAGS) $(AFLAGS_KERNEL) -c -o $@ $<
   22.24 +
   22.25 +__udivdi3.o: idiv64.S
   22.26 +	$(CC) $(AFLAGS) $(AFLAGS_KERNEL) -c -DUNSIGNED -c -o $@ $<
   22.27 +
   22.28 +__moddi3.o: idiv64.S
   22.29 +	$(CC) $(AFLAGS) $(AFLAGS_KERNEL) -c -DMODULO -c -o $@ $<
   22.30 +
   22.31 +__umoddi3.o: idiv64.S
   22.32 +	$(CC) $(AFLAGS) $(AFLAGS_KERNEL) -c -DMODULO -DUNSIGNED -c -o $@ $<
   22.33 +
   22.34 +__divsi3.o: idiv32.S
   22.35 +	$(CC) $(AFLAGS) $(AFLAGS_KERNEL) -c -o $@ $<
   22.36 +
   22.37 +__udivsi3.o: idiv32.S
   22.38 +	$(CC) $(AFLAGS) $(AFLAGS_KERNEL) -c -DUNSIGNED -c -o $@ $<
   22.39 +
   22.40 +__modsi3.o: idiv32.S
   22.41 +	$(CC) $(AFLAGS) $(AFLAGS_KERNEL) -c -DMODULO -c -o $@ $<
   22.42 +
   22.43 +__umodsi3.o: idiv32.S
   22.44 +	$(CC) $(AFLAGS) $(AFLAGS_KERNEL) -c -DMODULO -DUNSIGNED -c -o $@ $<
   22.45 +
   22.46 +clean:
   22.47 +	rm -f *.o *~
    23.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    23.2 +++ b/xen/arch/ia64/linux/lib/bitop.c	Tue Aug 16 12:15:23 2005 +0800
    23.3 @@ -0,0 +1,88 @@
    23.4 +#include <linux/compiler.h>
    23.5 +#include <linux/types.h>
    23.6 +#include <asm/intrinsics.h>
    23.7 +#include <linux/module.h>
    23.8 +#include <linux/bitops.h>
    23.9 +
   23.10 +/*
   23.11 + * Find next zero bit in a bitmap reasonably efficiently..
   23.12 + */
   23.13 +
   23.14 +int __find_next_zero_bit (const void *addr, unsigned long size, unsigned long offset)
   23.15 +{
   23.16 +	unsigned long *p = ((unsigned long *) addr) + (offset >> 6);
   23.17 +	unsigned long result = offset & ~63UL;
   23.18 +	unsigned long tmp;
   23.19 +
   23.20 +	if (offset >= size)
   23.21 +		return size;
   23.22 +	size -= result;
   23.23 +	offset &= 63UL;
   23.24 +	if (offset) {
   23.25 +		tmp = *(p++);
   23.26 +		tmp |= ~0UL >> (64-offset);
   23.27 +		if (size < 64)
   23.28 +			goto found_first;
   23.29 +		if (~tmp)
   23.30 +			goto found_middle;
   23.31 +		size -= 64;
   23.32 +		result += 64;
   23.33 +	}
   23.34 +	while (size & ~63UL) {
   23.35 +		if (~(tmp = *(p++)))
   23.36 +			goto found_middle;
   23.37 +		result += 64;
   23.38 +		size -= 64;
   23.39 +	}
   23.40 +	if (!size)
   23.41 +		return result;
   23.42 +	tmp = *p;
   23.43 +found_first:
   23.44 +	tmp |= ~0UL << size;
   23.45 +	if (tmp == ~0UL)		/* any bits zero? */
   23.46 +		return result + size;	/* nope */
   23.47 +found_middle:
   23.48 +	return result + ffz(tmp);
   23.49 +}
   23.50 +EXPORT_SYMBOL(__find_next_zero_bit);
   23.51 +
   23.52 +/*
   23.53 + * Find next bit in a bitmap reasonably efficiently..
   23.54 + */
   23.55 +int __find_next_bit(const void *addr, unsigned long size, unsigned long offset)
   23.56 +{
   23.57 +	unsigned long *p = ((unsigned long *) addr) + (offset >> 6);
   23.58 +	unsigned long result = offset & ~63UL;
   23.59 +	unsigned long tmp;
   23.60 +
   23.61 +	if (offset >= size)
   23.62 +		return size;
   23.63 +	size -= result;
   23.64 +	offset &= 63UL;
   23.65 +	if (offset) {
   23.66 +		tmp = *(p++);
   23.67 +		tmp &= ~0UL << offset;
   23.68 +		if (size < 64)
   23.69 +			goto found_first;
   23.70 +		if (tmp)
   23.71 +			goto found_middle;
   23.72 +		size -= 64;
   23.73 +		result += 64;
   23.74 +	}
   23.75 +	while (size & ~63UL) {
   23.76 +		if ((tmp = *(p++)))
   23.77 +			goto found_middle;
   23.78 +		result += 64;
   23.79 +		size -= 64;
   23.80 +	}
   23.81 +	if (!size)
   23.82 +		return result;
   23.83 +	tmp = *p;
   23.84 +  found_first:
   23.85 +	tmp &= ~0UL >> (64-size);
   23.86 +	if (tmp == 0UL)		/* Are any bits set? */
   23.87 +		return result + size; /* Nope. */
   23.88 +  found_middle:
   23.89 +	return result + __ffs(tmp);
   23.90 +}
   23.91 +EXPORT_SYMBOL(__find_next_bit);
    24.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    24.2 +++ b/xen/arch/ia64/linux/lib/carta_random.S	Tue Aug 16 12:15:23 2005 +0800
    24.3 @@ -0,0 +1,54 @@
    24.4 +/*
    24.5 + * Fast, simple, yet decent quality random number generator based on
    24.6 + * a paper by David G. Carta ("Two Fast Implementations of the
    24.7 + * `Minimal Standard' Random Number Generator," Communications of the
    24.8 + * ACM, January, 1990).
    24.9 + *
   24.10 + * Copyright (C) 2002 Hewlett-Packard Co
   24.11 + *	David Mosberger-Tang <davidm@hpl.hp.com>
   24.12 + */
   24.13 +
   24.14 +#include <asm/asmmacro.h>
   24.15 +
   24.16 +#define a	r2
   24.17 +#define m	r3
   24.18 +#define lo	r8
   24.19 +#define hi	r9
   24.20 +#define t0	r16
   24.21 +#define t1	r17
   24.22 +#define	seed	r32
   24.23 +
   24.24 +GLOBAL_ENTRY(carta_random32)
   24.25 +	movl	a = (16807 << 16) | 16807
   24.26 +	;;
   24.27 +	pmpyshr2.u t0 = a, seed, 0
   24.28 +	pmpyshr2.u t1 = a, seed, 16
   24.29 +	;;
   24.30 +	unpack2.l t0 = t1, t0
   24.31 +	dep	m = -1, r0, 0, 31
   24.32 +	;;
   24.33 +	zxt4	lo = t0
   24.34 +	shr.u	hi = t0, 32
   24.35 +	;;
   24.36 +	dep	t0 = 0, hi, 15, 49	// t0 = (hi & 0x7fff)
   24.37 +	;;
   24.38 +	shl	t0 = t0, 16		// t0 = (hi & 0x7fff) << 16
   24.39 +	shr	t1 = hi, 15		// t1 = (hi >> 15)
   24.40 +	;;
   24.41 +	add	lo = lo, t0
   24.42 +	;;
   24.43 +	cmp.gtu	p6, p0 = lo, m
   24.44 +	;;
   24.45 +(p6)	and	lo = lo, m
   24.46 +	;;
   24.47 +(p6)	add	lo = 1, lo
   24.48 +	;;
   24.49 +	add	lo = lo, t1
   24.50 +	;;
   24.51 +	cmp.gtu p6, p0 = lo, m
   24.52 +	;;
   24.53 +(p6)	and	lo = lo, m
   24.54 +	;;
   24.55 +(p6)	add	lo = 1, lo
   24.56 +	br.ret.sptk.many rp
   24.57 +END(carta_random32)
    25.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    25.2 +++ b/xen/arch/ia64/linux/lib/checksum.c	Tue Aug 16 12:15:23 2005 +0800
    25.3 @@ -0,0 +1,102 @@
    25.4 +/*
    25.5 + * Network checksum routines
    25.6 + *
    25.7 + * Copyright (C) 1999, 2003 Hewlett-Packard Co
    25.8 + *	Stephane Eranian <eranian@hpl.hp.com>
    25.9 + *
   25.10 + * Most of the code coming from arch/alpha/lib/checksum.c
   25.11 + *
   25.12 + * This file contains network checksum routines that are better done
   25.13 + * in an architecture-specific manner due to speed..
   25.14 + */
   25.15 +
   25.16 +#include <linux/module.h>
   25.17 +#include <linux/string.h>
   25.18 +
   25.19 +#include <asm/byteorder.h>
   25.20 +
   25.21 +static inline unsigned short
   25.22 +from64to16 (unsigned long x)
   25.23 +{
   25.24 +	/* add up 32-bit words for 33 bits */
   25.25 +	x = (x & 0xffffffff) + (x >> 32);
   25.26 +	/* add up 16-bit and 17-bit words for 17+c bits */
   25.27 +	x = (x & 0xffff) + (x >> 16);
   25.28 +	/* add up 16-bit and 2-bit for 16+c bit */
   25.29 +	x = (x & 0xffff) + (x >> 16);
   25.30 +	/* add up carry.. */
   25.31 +	x = (x & 0xffff) + (x >> 16);
   25.32 +	return x;
   25.33 +}
   25.34 +
   25.35 +/*
   25.36 + * computes the checksum of the TCP/UDP pseudo-header
   25.37 + * returns a 16-bit checksum, already complemented.
   25.38 + */
   25.39 +unsigned short int
   25.40 +csum_tcpudp_magic (unsigned long saddr, unsigned long daddr, unsigned short len,
   25.41 +		   unsigned short proto, unsigned int sum)
   25.42 +{
   25.43 +	return ~from64to16(saddr + daddr + sum + ((unsigned long) ntohs(len) << 16) +
   25.44 +			   ((unsigned long) proto << 8));
   25.45 +}
   25.46 +
   25.47 +EXPORT_SYMBOL(csum_tcpudp_magic);
   25.48 +
   25.49 +unsigned int
   25.50 +csum_tcpudp_nofold (unsigned long saddr, unsigned long daddr, unsigned short len,
   25.51 +		    unsigned short proto, unsigned int sum)
   25.52 +{
   25.53 +	unsigned long result;
   25.54 +
   25.55 +	result = (saddr + daddr + sum +
   25.56 +		  ((unsigned long) ntohs(len) << 16) +
   25.57 +		  ((unsigned long) proto << 8));
   25.58 +
   25.59 +	/* Fold down to 32-bits so we don't lose in the typedef-less network stack.  */
   25.60 +	/* 64 to 33 */
   25.61 +	result = (result & 0xffffffff) + (result >> 32);
   25.62 +	/* 33 to 32 */
   25.63 +	result = (result & 0xffffffff) + (result >> 32);
   25.64 +	return result;
   25.65 +}
   25.66 +
   25.67 +extern unsigned long do_csum (const unsigned char *, long);
   25.68 +
   25.69 +/*
   25.70 + * computes the checksum of a memory block at buff, length len,
   25.71 + * and adds in "sum" (32-bit)
   25.72 + *
   25.73 + * returns a 32-bit number suitable for feeding into itself
   25.74 + * or csum_tcpudp_magic
   25.75 + *
   25.76 + * this function must be called with even lengths, except
   25.77 + * for the last fragment, which may be odd
   25.78 + *
   25.79 + * it's best to have buff aligned on a 32-bit boundary
   25.80 + */
   25.81 +unsigned int
   25.82 +csum_partial (const unsigned char * buff, int len, unsigned int sum)
   25.83 +{
   25.84 +	unsigned long result = do_csum(buff, len);
   25.85 +
   25.86 +	/* add in old sum, and carry.. */
   25.87 +	result += sum;
   25.88 +	/* 32+c bits -> 32 bits */
   25.89 +	result = (result & 0xffffffff) + (result >> 32);
   25.90 +	return result;
   25.91 +}
   25.92 +
   25.93 +EXPORT_SYMBOL(csum_partial);
   25.94 +
   25.95 +/*
   25.96 + * this routine is used for miscellaneous IP-like checksums, mainly
   25.97 + * in icmp.c
   25.98 + */
   25.99 +unsigned short
  25.100 +ip_compute_csum (unsigned char * buff, int len)
  25.101 +{
  25.102 +	return ~do_csum(buff,len);
  25.103 +}
  25.104 +
  25.105 +EXPORT_SYMBOL(ip_compute_csum);
    26.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    26.2 +++ b/xen/arch/ia64/linux/lib/clear_page.S	Tue Aug 16 12:15:23 2005 +0800
    26.3 @@ -0,0 +1,77 @@
    26.4 +/*
    26.5 + * Copyright (C) 1999-2002 Hewlett-Packard Co
    26.6 + *	Stephane Eranian <eranian@hpl.hp.com>
    26.7 + *	David Mosberger-Tang <davidm@hpl.hp.com>
    26.8 + * Copyright (C) 2002 Ken Chen <kenneth.w.chen@intel.com>
    26.9 + *
   26.10 + * 1/06/01 davidm	Tuned for Itanium.
   26.11 + * 2/12/02 kchen	Tuned for both Itanium and McKinley
   26.12 + * 3/08/02 davidm	Some more tweaking
   26.13 + */
   26.14 +#include <linux/config.h>
   26.15 +
   26.16 +#include <asm/asmmacro.h>
   26.17 +#include <asm/page.h>
   26.18 +
   26.19 +#ifdef CONFIG_ITANIUM
   26.20 +# define L3_LINE_SIZE	64	// Itanium L3 line size
   26.21 +# define PREFETCH_LINES	9	// magic number
   26.22 +#else
   26.23 +# define L3_LINE_SIZE	128	// McKinley L3 line size
   26.24 +# define PREFETCH_LINES	12	// magic number
   26.25 +#endif
   26.26 +
   26.27 +#define saved_lc	r2
   26.28 +#define dst_fetch	r3
   26.29 +#define dst1		r8
   26.30 +#define dst2		r9
   26.31 +#define dst3		r10
   26.32 +#define dst4		r11
   26.33 +
   26.34 +#define dst_last	r31
   26.35 +
   26.36 +GLOBAL_ENTRY(clear_page)
   26.37 +	.prologue
   26.38 +	.regstk 1,0,0,0
   26.39 +	mov r16 = PAGE_SIZE/L3_LINE_SIZE-1	// main loop count, -1=repeat/until
   26.40 +	.save ar.lc, saved_lc
   26.41 +	mov saved_lc = ar.lc
   26.42 +
   26.43 +	.body
   26.44 +	mov ar.lc = (PREFETCH_LINES - 1)
   26.45 +	mov dst_fetch = in0
   26.46 +	adds dst1 = 16, in0
   26.47 +	adds dst2 = 32, in0
   26.48 +	;;
   26.49 +.fetch:	stf.spill.nta [dst_fetch] = f0, L3_LINE_SIZE
   26.50 +	adds dst3 = 48, in0		// executing this multiple times is harmless
   26.51 +	br.cloop.sptk.few .fetch
   26.52 +	;;
   26.53 +	addl dst_last = (PAGE_SIZE - PREFETCH_LINES*L3_LINE_SIZE), dst_fetch
   26.54 +	mov ar.lc = r16			// one L3 line per iteration
   26.55 +	adds dst4 = 64, in0
   26.56 +	;;
   26.57 +#ifdef CONFIG_ITANIUM
   26.58 +	// Optimized for Itanium
   26.59 +1:	stf.spill.nta [dst1] = f0, 64
   26.60 +	stf.spill.nta [dst2] = f0, 64
   26.61 +	cmp.lt p8,p0=dst_fetch, dst_last
   26.62 +	;;
   26.63 +#else
   26.64 +	// Optimized for McKinley
   26.65 +1:	stf.spill.nta [dst1] = f0, 64
   26.66 +	stf.spill.nta [dst2] = f0, 64
   26.67 +	stf.spill.nta [dst3] = f0, 64
   26.68 +	stf.spill.nta [dst4] = f0, 128
   26.69 +	cmp.lt p8,p0=dst_fetch, dst_last
   26.70 +	;;
   26.71 +	stf.spill.nta [dst1] = f0, 64
   26.72 +	stf.spill.nta [dst2] = f0, 64
   26.73 +#endif
   26.74 +	stf.spill.nta [dst3] = f0, 64
   26.75 +(p8)	stf.spill.nta [dst_fetch] = f0, L3_LINE_SIZE
   26.76 +	br.cloop.sptk.few 1b
   26.77 +	;;
   26.78 +	mov ar.lc = saved_lc		// restore lc
   26.79 +	br.ret.sptk.many rp
   26.80 +END(clear_page)
    27.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    27.2 +++ b/xen/arch/ia64/linux/lib/clear_user.S	Tue Aug 16 12:15:23 2005 +0800
    27.3 @@ -0,0 +1,209 @@
    27.4 +/*
    27.5 + * This routine clears to zero a linear memory buffer in user space.
    27.6 + *
    27.7 + * Inputs:
    27.8 + *	in0:	address of buffer
    27.9 + *	in1:	length of buffer in bytes
   27.10 + * Outputs:
   27.11 + *	r8:	number of bytes that didn't get cleared due to a fault
   27.12 + *
   27.13 + * Copyright (C) 1998, 1999, 2001 Hewlett-Packard Co
   27.14 + *	Stephane Eranian <eranian@hpl.hp.com>
   27.15 + */
   27.16 +
   27.17 +#include <asm/asmmacro.h>
   27.18 +
   27.19 +//
   27.20 +// arguments
   27.21 +//
   27.22 +#define buf		r32
   27.23 +#define len		r33
   27.24 +
   27.25 +//
   27.26 +// local registers
   27.27 +//
   27.28 +#define cnt		r16
   27.29 +#define buf2		r17
   27.30 +#define saved_lc	r18
   27.31 +#define saved_pfs	r19
   27.32 +#define tmp		r20
   27.33 +#define len2		r21
   27.34 +#define len3		r22
   27.35 +
   27.36 +//
   27.37 +// Theory of operations:
   27.38 +//	- we check whether or not the buffer is small, i.e., less than 17
   27.39 +//	  in which case we do the byte by byte loop.
   27.40 +//
   27.41 +//	- Otherwise we go progressively from 1 byte store to 8byte store in
   27.42 +//	  the head part, the body is a 16byte store loop and we finish we the
   27.43 +//	  tail for the last 15 bytes.
   27.44 +//	  The good point about this breakdown is that the long buffer handling
   27.45 +//	  contains only 2 branches.
   27.46 +//
   27.47 +//	The reason for not using shifting & masking for both the head and the
   27.48 +//	tail is to stay semantically correct. This routine is not supposed
   27.49 +//	to write bytes outside of the buffer. While most of the time this would
   27.50 +//	be ok, we can't tolerate a mistake. A classical example is the case
   27.51 +//	of multithreaded code were to the extra bytes touched is actually owned
   27.52 +//	by another thread which runs concurrently to ours. Another, less likely,
   27.53 +//	example is with device drivers where reading an I/O mapped location may
   27.54 +//	have side effects (same thing for writing).
   27.55 +//
   27.56 +
   27.57 +GLOBAL_ENTRY(__do_clear_user)
   27.58 +	.prologue
   27.59 +	.save ar.pfs, saved_pfs
   27.60 +	alloc	saved_pfs=ar.pfs,2,0,0,0
   27.61 +	cmp.eq p6,p0=r0,len		// check for zero length
   27.62 +	.save ar.lc, saved_lc
   27.63 +	mov saved_lc=ar.lc		// preserve ar.lc (slow)
   27.64 +	.body
   27.65 +	;;				// avoid WAW on CFM
   27.66 +	adds tmp=-1,len			// br.ctop is repeat/until
   27.67 +	mov ret0=len			// return value is length at this point
   27.68 +(p6)	br.ret.spnt.many rp
   27.69 +	;;
   27.70 +	cmp.lt p6,p0=16,len		// if len > 16 then long memset
   27.71 +	mov ar.lc=tmp			// initialize lc for small count
   27.72 +(p6)	br.cond.dptk .long_do_clear
   27.73 +	;;				// WAR on ar.lc
   27.74 +	//
   27.75 +	// worst case 16 iterations, avg 8 iterations
   27.76 +	//
   27.77 +	// We could have played with the predicates to use the extra
   27.78 +	// M slot for 2 stores/iteration but the cost the initialization
   27.79 +	// the various counters compared to how long the loop is supposed
   27.80 +	// to last on average does not make this solution viable.
   27.81 +	//
   27.82 +1:
   27.83 +	EX( .Lexit1, st1 [buf]=r0,1 )
   27.84 +	adds len=-1,len			// countdown length using len
   27.85 +	br.cloop.dptk 1b
   27.86 +	;;				// avoid RAW on ar.lc
   27.87 +	//
   27.88 +	// .Lexit4: comes from byte by byte loop
   27.89 +	//	    len contains bytes left
   27.90 +.Lexit1:
   27.91 +	mov ret0=len			// faster than using ar.lc
   27.92 +	mov ar.lc=saved_lc
   27.93 +	br.ret.sptk.many rp		// end of short clear_user
   27.94 +
   27.95 +
   27.96 +	//
   27.97 +	// At this point we know we have more than 16 bytes to copy
   27.98 +	// so we focus on alignment (no branches required)
   27.99 +	//
  27.100 +	// The use of len/len2 for countdown of the number of bytes left
  27.101 +	// instead of ret0 is due to the fact that the exception code
  27.102 +	// changes the values of r8.
  27.103 +	//
  27.104 +.long_do_clear:
  27.105 +	tbit.nz p6,p0=buf,0		// odd alignment (for long_do_clear)
  27.106 +	;;
  27.107 +	EX( .Lexit3, (p6) st1 [buf]=r0,1 )	// 1-byte aligned
  27.108 +(p6)	adds len=-1,len;;		// sync because buf is modified
  27.109 +	tbit.nz p6,p0=buf,1
  27.110 +	;;
  27.111 +	EX( .Lexit3, (p6) st2 [buf]=r0,2 )	// 2-byte aligned
  27.112 +(p6)	adds len=-2,len;;
  27.113 +	tbit.nz p6,p0=buf,2
  27.114 +	;;
  27.115 +	EX( .Lexit3, (p6) st4 [buf]=r0,4 )	// 4-byte aligned
  27.116 +(p6)	adds len=-4,len;;
  27.117 +	tbit.nz p6,p0=buf,3
  27.118 +	;;
  27.119 +	EX( .Lexit3, (p6) st8 [buf]=r0,8 )	// 8-byte aligned
  27.120 +(p6)	adds len=-8,len;;
  27.121 +	shr.u cnt=len,4		// number of 128-bit (2x64bit) words
  27.122 +	;;
  27.123 +	cmp.eq p6,p0=r0,cnt
  27.124 +	adds tmp=-1,cnt
  27.125 +(p6)	br.cond.dpnt .dotail		// we have less than 16 bytes left
  27.126 +	;;
  27.127 +	adds buf2=8,buf			// setup second base pointer
  27.128 +	mov ar.lc=tmp
  27.129 +	;;
  27.130 +
  27.131 +	//
  27.132 +	// 16bytes/iteration core loop
  27.133 +	//
  27.134 +	// The second store can never generate a fault because
  27.135 +	// we come into the loop only when we are 16-byte aligned.
  27.136 +	// This means that if we cross a page then it will always be
  27.137 +	// in the first store and never in the second.
  27.138 +	//
  27.139 +	//
  27.140 +	// We need to keep track of the remaining length. A possible (optimistic)
  27.141 +	// way would be to use ar.lc and derive how many byte were left by
  27.142 +	// doing : left= 16*ar.lc + 16.  this would avoid the addition at
  27.143 +	// every iteration.
  27.144 +	// However we need to keep the synchronization point. A template
  27.145 +	// M;;MB does not exist and thus we can keep the addition at no
  27.146 +	// extra cycle cost (use a nop slot anyway). It also simplifies the
  27.147 +	// (unlikely)  error recovery code
  27.148 +	//
  27.149 +
  27.150 +2:	EX(.Lexit3, st8 [buf]=r0,16 )
  27.151 +	;;				// needed to get len correct when error
  27.152 +	st8 [buf2]=r0,16
  27.153 +	adds len=-16,len
  27.154 +	br.cloop.dptk 2b
  27.155 +	;;
  27.156 +	mov ar.lc=saved_lc
  27.157 +	//
  27.158 +	// tail correction based on len only
  27.159 +	//
  27.160 +	// We alternate the use of len3,len2 to allow parallelism and correct
  27.161 +	// error handling. We also reuse p6/p7 to return correct value.
  27.162 +	// The addition of len2/len3 does not cost anything more compared to
  27.163 +	// the regular memset as we had empty slots.
  27.164 +	//
  27.165 +.dotail:
  27.166 +	mov len2=len			// for parallelization of error handling
  27.167 +	mov len3=len
  27.168 +	tbit.nz p6,p0=len,3
  27.169 +	;;
  27.170 +	EX( .Lexit2, (p6) st8 [buf]=r0,8 )	// at least 8 bytes
  27.171 +(p6)	adds len3=-8,len2
  27.172 +	tbit.nz p7,p6=len,2
  27.173 +	;;
  27.174 +	EX( .Lexit2, (p7) st4 [buf]=r0,4 )	// at least 4 bytes
  27.175 +(p7)	adds len2=-4,len3
  27.176 +	tbit.nz p6,p7=len,1
  27.177 +	;;
  27.178 +	EX( .Lexit2, (p6) st2 [buf]=r0,2 )	// at least 2 bytes
  27.179 +(p6)	adds len3=-2,len2
  27.180 +	tbit.nz p7,p6=len,0
  27.181 +	;;
  27.182 +	EX( .Lexit2, (p7) st1 [buf]=r0 )	// only 1 byte left
  27.183 +	mov ret0=r0				// success
  27.184 +	br.ret.sptk.many rp			// end of most likely path
  27.185 +
  27.186 +	//
  27.187 +	// Outlined error handling code
  27.188 +	//
  27.189 +
  27.190 +	//
  27.191 +	// .Lexit3: comes from core loop, need restore pr/lc
  27.192 +	//	    len contains bytes left
  27.193 +	//
  27.194 +	//
  27.195 +	// .Lexit2:
  27.196 +	//	if p6 -> coming from st8 or st2 : len2 contains what's left
  27.197 +	//	if p7 -> coming from st4 or st1 : len3 contains what's left
  27.198 +	// We must restore lc/pr even though might not have been used.
  27.199 +.Lexit2:
  27.200 +	.pred.rel "mutex", p6, p7
  27.201 +(p6)	mov len=len2
  27.202 +(p7)	mov len=len3
  27.203 +	;;
  27.204 +	//
  27.205 +	// .Lexit4: comes from head, need not restore pr/lc
  27.206 +	//	    len contains bytes left
  27.207 +	//
  27.208 +.Lexit3:
  27.209 +	mov ret0=len
  27.210 +	mov ar.lc=saved_lc
  27.211 +	br.ret.sptk.many rp
  27.212 +END(__do_clear_user)
    28.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    28.2 +++ b/xen/arch/ia64/linux/lib/copy_page.S	Tue Aug 16 12:15:23 2005 +0800
    28.3 @@ -0,0 +1,98 @@
    28.4 +/*
    28.5 + *
    28.6 + * Optimized version of the standard copy_page() function
    28.7 + *
    28.8 + * Inputs:
    28.9 + *	in0:	address of target page
   28.10 + *	in1:	address of source page
   28.11 + * Output:
   28.12 + *	no return value
   28.13 + *
   28.14 + * Copyright (C) 1999, 2001 Hewlett-Packard Co
   28.15 + *	Stephane Eranian <eranian@hpl.hp.com>
   28.16 + *	David Mosberger <davidm@hpl.hp.com>
   28.17 + *
   28.18 + * 4/06/01 davidm	Tuned to make it perform well both for cached and uncached copies.
   28.19 + */
   28.20 +#include <asm/asmmacro.h>
   28.21 +#include <asm/page.h>
   28.22 +
   28.23 +#define PIPE_DEPTH	3
   28.24 +#define EPI		p[PIPE_DEPTH-1]
   28.25 +
   28.26 +#define lcount		r16
   28.27 +#define saved_pr	r17
   28.28 +#define saved_lc	r18
   28.29 +#define saved_pfs	r19
   28.30 +#define src1		r20
   28.31 +#define src2		r21
   28.32 +#define tgt1		r22
   28.33 +#define tgt2		r23
   28.34 +#define srcf		r24
   28.35 +#define tgtf		r25
   28.36 +#define tgt_last	r26
   28.37 +
   28.38 +#define Nrot		((8*PIPE_DEPTH+7)&~7)
   28.39 +
   28.40 +GLOBAL_ENTRY(copy_page)
   28.41 +	.prologue
   28.42 +	.save ar.pfs, saved_pfs
   28.43 +	alloc saved_pfs=ar.pfs,3,Nrot-3,0,Nrot
   28.44 +
   28.45 +	.rotr t1[PIPE_DEPTH], t2[PIPE_DEPTH], t3[PIPE_DEPTH], t4[PIPE_DEPTH], \
   28.46 +	      t5[PIPE_DEPTH], t6[PIPE_DEPTH], t7[PIPE_DEPTH], t8[PIPE_DEPTH]
   28.47 +	.rotp p[PIPE_DEPTH]
   28.48 +
   28.49 +	.save ar.lc, saved_lc
   28.50 +	mov saved_lc=ar.lc
   28.51 +	mov ar.ec=PIPE_DEPTH
   28.52 +
   28.53 +	mov lcount=PAGE_SIZE/64-1
   28.54 +	.save pr, saved_pr
   28.55 +	mov saved_pr=pr
   28.56 +	mov pr.rot=1<<16
   28.57 +
   28.58 +	.body
   28.59 +
   28.60 +	mov src1=in1
   28.61 +	adds src2=8,in1
   28.62 +	mov tgt_last = PAGE_SIZE
   28.63 +	;;
   28.64 +	adds tgt2=8,in0
   28.65 +	add srcf=512,in1
   28.66 +	mov ar.lc=lcount
   28.67 +	mov tgt1=in0
   28.68 +	add tgtf=512,in0
   28.69 +	add tgt_last = tgt_last, in0
   28.70 +	;;
   28.71 +1:
   28.72 +(p[0])	ld8 t1[0]=[src1],16
   28.73 +(EPI)	st8 [tgt1]=t1[PIPE_DEPTH-1],16
   28.74 +(p[0])	ld8 t2[0]=[src2],16
   28.75 +(EPI)	st8 [tgt2]=t2[PIPE_DEPTH-1],16
   28.76 +	cmp.ltu p6,p0 = tgtf, tgt_last
   28.77 +	;;
   28.78 +(p[0])	ld8 t3[0]=[src1],16
   28.79 +(EPI)	st8 [tgt1]=t3[PIPE_DEPTH-1],16
   28.80 +(p[0])	ld8 t4[0]=[src2],16
   28.81 +(EPI)	st8 [tgt2]=t4[PIPE_DEPTH-1],16
   28.82 +	;;
   28.83 +(p[0])	ld8 t5[0]=[src1],16
   28.84 +(EPI)	st8 [tgt1]=t5[PIPE_DEPTH-1],16
   28.85 +(p[0])	ld8 t6[0]=[src2],16
   28.86 +(EPI)	st8 [tgt2]=t6[PIPE_DEPTH-1],16
   28.87 +	;;
   28.88 +(p[0])	ld8 t7[0]=[src1],16
   28.89 +(EPI)	st8 [tgt1]=t7[PIPE_DEPTH-1],16
   28.90 +(p[0])	ld8 t8[0]=[src2],16
   28.91 +(EPI)	st8 [tgt2]=t8[PIPE_DEPTH-1],16
   28.92 +
   28.93 +(p6)	lfetch [srcf], 64
   28.94 +(p6)	lfetch [tgtf], 64
   28.95 +	br.ctop.sptk.few 1b
   28.96 +	;;
   28.97 +	mov pr=saved_pr,0xffffffffffff0000	// restore predicates
   28.98 +	mov ar.pfs=saved_pfs
   28.99 +	mov ar.lc=saved_lc
  28.100 +	br.ret.sptk.many rp
  28.101 +END(copy_page)
    29.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    29.2 +++ b/xen/arch/ia64/linux/lib/copy_page_mck.S	Tue Aug 16 12:15:23 2005 +0800
    29.3 @@ -0,0 +1,185 @@
    29.4 +/*
    29.5 + * McKinley-optimized version of copy_page().
    29.6 + *
    29.7 + * Copyright (C) 2002 Hewlett-Packard Co
    29.8 + *	David Mosberger <davidm@hpl.hp.com>
    29.9 + *
   29.10 + * Inputs:
   29.11 + *	in0:	address of target page
   29.12 + *	in1:	address of source page
   29.13 + * Output:
   29.14 + *	no return value
   29.15 + *
   29.16 + * General idea:
   29.17 + *	- use regular loads and stores to prefetch data to avoid consuming M-slot just for
   29.18 + *	  lfetches => good for in-cache performance
   29.19 + *	- avoid l2 bank-conflicts by not storing into the same 16-byte bank within a single
   29.20 + *	  cycle
   29.21 + *
   29.22 + * Principle of operation:
   29.23 + *	First, note that L1 has a line-size of 64 bytes and L2 a line-size of 128 bytes.
   29.24 + *	To avoid secondary misses in L2, we prefetch both source and destination with a line-size
   29.25 + *	of 128 bytes.  When both of these lines are in the L2 and the first half of the
   29.26 + *	source line is in L1, we start copying the remaining words.  The second half of the
   29.27 + *	source line is prefetched in an earlier iteration, so that by the time we start
   29.28 + *	accessing it, it's also present in the L1.
   29.29 + *
   29.30 + *	We use a software-pipelined loop to control the overall operation.  The pipeline
   29.31 + *	has 2*PREFETCH_DIST+K stages.  The first PREFETCH_DIST stages are used for prefetching
   29.32 + *	source cache-lines.  The second PREFETCH_DIST stages are used for prefetching destination
   29.33 + *	cache-lines, the last K stages are used to copy the cache-line words not copied by
   29.34 + *	the prefetches.  The four relevant points in the pipelined are called A, B, C, D:
   29.35 + *	p[A] is TRUE if a source-line should be prefetched, p[B] is TRUE if a destination-line
   29.36 + *	should be prefetched, p[C] is TRUE if the second half of an L2 line should be brought
   29.37 + *	into L1D and p[D] is TRUE if a cacheline needs to be copied.
   29.38 + *
   29.39 + *	This all sounds very complicated, but thanks to the modulo-scheduled loop support,
   29.40 + *	the resulting code is very regular and quite easy to follow (once you get the idea).
   29.41 + *
   29.42 + *	As a secondary optimization, the first 2*PREFETCH_DIST iterations are implemented
   29.43 + *	as the separate .prefetch_loop.  Logically, this loop performs exactly like the
   29.44 + *	main-loop (.line_copy), but has all known-to-be-predicated-off instructions removed,
   29.45 + *	so that each loop iteration is faster (again, good for cached case).
   29.46 + *
   29.47 + *	When reading the code, it helps to keep the following picture in mind:
   29.48 + *
   29.49 + *	       word 0 word 1
   29.50 + *            +------+------+---
   29.51 + *	      |	v[x] | 	t1  | ^
   29.52 + *	      |	t2   |	t3  | |
   29.53 + *	      |	t4   |	t5  | |
   29.54 + *	      |	t6   |	t7  | | 128 bytes
   29.55 + *     	      |	n[y] | 	t9  | |	(L2 cache line)
   29.56 + *	      |	t10  | 	t11 | |
   29.57 + *	      |	t12  | 	t13 | |
   29.58 + *	      |	t14  | 	t15 | v
   29.59 + *	      +------+------+---
   29.60 + *
   29.61 + *	Here, v[x] is copied by the (memory) prefetch.  n[y] is loaded at p[C]
   29.62 + *	to fetch the second-half of the L2 cache line into L1, and the tX words are copied in
   29.63 + *	an order that avoids bank conflicts.
   29.64 + */
   29.65 +#include <asm/asmmacro.h>
   29.66 +#include <asm/page.h>
   29.67 +
   29.68 +#define PREFETCH_DIST	8		// McKinley sustains 16 outstanding L2 misses (8 ld, 8 st)
   29.69 +
   29.70 +#define src0		r2
   29.71 +#define src1		r3
   29.72 +#define dst0		r9
   29.73 +#define dst1		r10
   29.74 +#define src_pre_mem	r11
   29.75 +#define dst_pre_mem	r14
   29.76 +#define src_pre_l2	r15
   29.77 +#define dst_pre_l2	r16
   29.78 +#define t1		r17
   29.79 +#define t2		r18
   29.80 +#define t3		r19
   29.81 +#define t4		r20
   29.82 +#define t5		t1	// alias!
   29.83 +#define t6		t2	// alias!
   29.84 +#define t7		t3	// alias!
   29.85 +#define t9		t5	// alias!
   29.86 +#define t10		t4	// alias!
   29.87 +#define t11		t7	// alias!
   29.88 +#define t12		t6	// alias!
   29.89 +#define t14		t10	// alias!
   29.90 +#define t13		r21
   29.91 +#define t15		r22
   29.92 +
   29.93 +#define saved_lc	r23
   29.94 +#define saved_pr	r24
   29.95 +
   29.96 +#define	A	0
   29.97 +#define B	(PREFETCH_DIST)
   29.98 +#define C	(B + PREFETCH_DIST)
   29.99 +#define D	(C + 3)
  29.100 +#define N	(D + 1)
  29.101 +#define Nrot	((N + 7) & ~7)
  29.102 +
  29.103 +GLOBAL_ENTRY(copy_page)
  29.104 +	.prologue
  29.105 +	alloc r8 = ar.pfs, 2, Nrot-2, 0, Nrot
  29.106 +
  29.107 +	.rotr v[2*PREFETCH_DIST], n[D-C+1]
  29.108 +	.rotp p[N]
  29.109 +
  29.110 +	.save ar.lc, saved_lc
  29.111 +	mov saved_lc = ar.lc
  29.112 +	.save pr, saved_pr
  29.113 +	mov saved_pr = pr
  29.114 +	.body
  29.115 +
  29.116 +	mov src_pre_mem = in1
  29.117 +	mov pr.rot = 0x10000
  29.118 +	mov ar.ec = 1				// special unrolled loop
  29.119 +
  29.120 +	mov dst_pre_mem = in0
  29.121 +	mov ar.lc = 2*PREFETCH_DIST - 1
  29.122 +
  29.123 +	add src_pre_l2 = 8*8, in1
  29.124 +	add dst_pre_l2 = 8*8, in0
  29.125 +	add src0 = 8, in1			// first t1 src
  29.126 +	add src1 = 3*8, in1			// first t3 src
  29.127 +	add dst0 = 8, in0			// first t1 dst
  29.128 +	add dst1 = 3*8, in0			// first t3 dst
  29.129 +	mov t1 = (PAGE_SIZE/128) - (2*PREFETCH_DIST) - 1
  29.130 +	nop.m 0
  29.131 +	nop.i 0
  29.132 +	;;
  29.133 +	// same as .line_copy loop, but with all predicated-off instructions removed:
  29.134 +.prefetch_loop:
  29.135 +(p[A])	ld8 v[A] = [src_pre_mem], 128		// M0
  29.136 +(p[B])	st8 [dst_pre_mem] = v[B], 128		// M2
  29.137 +	br.ctop.sptk .prefetch_loop
  29.138 +	;;
  29.139 +	cmp.eq p16, p0 = r0, r0			// reset p16 to 1 (br.ctop cleared it to zero)
  29.140 +	mov ar.lc = t1				// with 64KB pages, t1 is too big to fit in 8 bits!
  29.141 +	mov ar.ec = N				// # of stages in pipeline
  29.142 +	;;
  29.143 +.line_copy:
  29.144 +(p[D])	ld8 t2 = [src0], 3*8			// M0
  29.145 +(p[D])	ld8 t4 = [src1], 3*8			// M1
  29.146 +(p[B])	st8 [dst_pre_mem] = v[B], 128		// M2 prefetch dst from memory
  29.147 +(p[D])	st8 [dst_pre_l2] = n[D-C], 128		// M3 prefetch dst from L2
  29.148 +	;;
  29.149 +(p[A])	ld8 v[A] = [src_pre_mem], 128		// M0 prefetch src from memory
  29.150 +(p[C])	ld8 n[0] = [src_pre_l2], 128		// M1 prefetch src from L2
  29.151 +(p[D])	st8 [dst0] =  t1, 8			// M2
  29.152 +(p[D])	st8 [dst1] =  t3, 8			// M3
  29.153 +	;;
  29.154 +(p[D])	ld8  t5 = [src0], 8
  29.155 +(p[D])	ld8  t7 = [src1], 3*8
  29.156 +(p[D])	st8 [dst0] =  t2, 3*8
  29.157 +(p[D])	st8 [dst1] =  t4, 3*8
  29.158 +	;;
  29.159 +(p[D])	ld8  t6 = [src0], 3*8
  29.160 +(p[D])	ld8 t10 = [src1], 8
  29.161 +(p[D])	st8 [dst0] =  t5, 8
  29.162 +(p[D])	st8 [dst1] =  t7, 3*8
  29.163 +	;;
  29.164 +(p[D])	ld8  t9 = [src0], 3*8
  29.165 +(p[D])	ld8 t11 = [src1], 3*8
  29.166 +(p[D])	st8 [dst0] =  t6, 3*8
  29.167 +(p[D])	st8 [dst1] = t10, 8
  29.168 +	;;
  29.169 +(p[D])	ld8 t12 = [src0], 8
  29.170 +(p[D])	ld8 t14 = [src1], 8
  29.171 +(p[D])	st8 [dst0] =  t9, 3*8
  29.172 +(p[D])	st8 [dst1] = t11, 3*8
  29.173 +	;;
  29.174 +(p[D])	ld8 t13 = [src0], 4*8
  29.175 +(p[D])	ld8 t15 = [src1], 4*8
  29.176 +(p[D])	st8 [dst0] = t12, 8
  29.177 +(p[D])	st8 [dst1] = t14, 8
  29.178 +	;;
  29.179 +(p[D-1])ld8  t1 = [src0], 8
  29.180 +(p[D-1])ld8  t3 = [src1], 8
  29.181 +(p[D])	st8 [dst0] = t13, 4*8
  29.182 +(p[D])	st8 [dst1] = t15, 4*8
  29.183 +	br.ctop.sptk .line_copy
  29.184 +	;;
  29.185 +	mov ar.lc = saved_lc
  29.186 +	mov pr = saved_pr, -1
  29.187 +	br.ret.sptk.many rp
  29.188 +END(copy_page)
    30.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    30.2 +++ b/xen/arch/ia64/linux/lib/copy_user.S	Tue Aug 16 12:15:23 2005 +0800
    30.3 @@ -0,0 +1,610 @@
    30.4 +/*
    30.5 + *
    30.6 + * Optimized version of the copy_user() routine.
    30.7 + * It is used to copy date across the kernel/user boundary.
    30.8 + *
    30.9 + * The source and destination are always on opposite side of
   30.10 + * the boundary. When reading from user space we must catch
   30.11 + * faults on loads. When writing to user space we must catch
   30.12 + * errors on stores. Note that because of the nature of the copy
   30.13 + * we don't need to worry about overlapping regions.
   30.14 + *
   30.15 + *
   30.16 + * Inputs:
   30.17 + *	in0	address of source buffer
   30.18 + *	in1	address of destination buffer
   30.19 + *	in2	number of bytes to copy
   30.20 + *
   30.21 + * Outputs:
   30.22 + *	ret0	0 in case of success. The number of bytes NOT copied in
   30.23 + *		case of error.
   30.24 + *
   30.25 + * Copyright (C) 2000-2001 Hewlett-Packard Co
   30.26 + *	Stephane Eranian <eranian@hpl.hp.com>
   30.27 + *
   30.28 + * Fixme:
   30.29 + *	- handle the case where we have more than 16 bytes and the alignment
   30.30 + *	  are different.
   30.31 + *	- more benchmarking
   30.32 + *	- fix extraneous stop bit introduced by the EX() macro.
   30.33 + */
   30.34 +
   30.35 +#include <asm/asmmacro.h>
   30.36 +
   30.37 +//
   30.38 +// Tuneable parameters
   30.39 +//
   30.40 +#define COPY_BREAK	16	// we do byte copy below (must be >=16)
   30.41 +#define PIPE_DEPTH	21	// pipe depth
   30.42 +
   30.43 +#define EPI		p[PIPE_DEPTH-1]
   30.44 +
   30.45 +//
   30.46 +// arguments
   30.47 +//
   30.48 +#define dst		in0
   30.49 +#define src		in1
   30.50 +#define len		in2
   30.51 +
   30.52 +//
   30.53 +// local registers
   30.54 +//
   30.55 +#define t1		r2	// rshift in bytes
   30.56 +#define t2		r3	// lshift in bytes
   30.57 +#define rshift		r14	// right shift in bits
   30.58 +#define lshift		r15	// left shift in bits
   30.59 +#define word1		r16
   30.60 +#define word2		r17
   30.61 +#define cnt		r18
   30.62 +#define len2		r19
   30.63 +#define saved_lc	r20
   30.64 +#define saved_pr	r21
   30.65 +#define tmp		r22
   30.66 +#define val		r23
   30.67 +#define src1		r24
   30.68 +#define dst1		r25
   30.69 +#define src2		r26
   30.70 +#define dst2		r27
   30.71 +#define len1		r28
   30.72 +#define enddst		r29
   30.73 +#define endsrc		r30
   30.74 +#define saved_pfs	r31
   30.75 +
   30.76 +GLOBAL_ENTRY(__copy_user)
   30.77 +	.prologue
   30.78 +	.save ar.pfs, saved_pfs
   30.79 +	alloc saved_pfs=ar.pfs,3,((2*PIPE_DEPTH+7)&~7),0,((2*PIPE_DEPTH+7)&~7)
   30.80 +
   30.81 +	.rotr val1[PIPE_DEPTH],val2[PIPE_DEPTH]
   30.82 +	.rotp p[PIPE_DEPTH]
   30.83 +
   30.84 +	adds len2=-1,len	// br.ctop is repeat/until
   30.85 +	mov ret0=r0
   30.86 +
   30.87 +	;;			// RAW of cfm when len=0
   30.88 +	cmp.eq p8,p0=r0,len	// check for zero length
   30.89 +	.save ar.lc, saved_lc
   30.90 +	mov saved_lc=ar.lc	// preserve ar.lc (slow)
   30.91 +(p8)	br.ret.spnt.many rp	// empty mempcy()
   30.92 +	;;
   30.93 +	add enddst=dst,len	// first byte after end of source
   30.94 +	add endsrc=src,len	// first byte after end of destination
   30.95 +	.save pr, saved_pr
   30.96 +	mov saved_pr=pr		// preserve predicates
   30.97 +
   30.98 +	.body
   30.99 +
  30.100 +	mov dst1=dst		// copy because of rotation
  30.101 +	mov ar.ec=PIPE_DEPTH
  30.102 +	mov pr.rot=1<<16	// p16=true all others are false
  30.103 +
  30.104 +	mov src1=src		// copy because of rotation
  30.105 +	mov ar.lc=len2		// initialize lc for small count
  30.106 +	cmp.lt p10,p7=COPY_BREAK,len	// if len > COPY_BREAK then long copy
  30.107 +
  30.108 +	xor tmp=src,dst		// same alignment test prepare
  30.109 +(p10)	br.cond.dptk .long_copy_user
  30.110 +	;;			// RAW pr.rot/p16 ?
  30.111 +	//
  30.112 +	// Now we do the byte by byte loop with software pipeline
  30.113 +	//
  30.114 +	// p7 is necessarily false by now
  30.115 +1:
  30.116 +	EX(.failure_in_pipe1,(p16) ld1 val1[0]=[src1],1)
  30.117 +	EX(.failure_out,(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1)
  30.118 +	br.ctop.dptk.few 1b
  30.119 +	;;
  30.120 +	mov ar.lc=saved_lc
  30.121 +	mov pr=saved_pr,0xffffffffffff0000
  30.122 +	mov ar.pfs=saved_pfs		// restore ar.ec
  30.123 +	br.ret.sptk.many rp		// end of short memcpy
  30.124 +
  30.125 +	//
  30.126 +	// Not 8-byte aligned
  30.127 +	//
  30.128 +.diff_align_copy_user:
  30.129 +	// At this point we know we have more than 16 bytes to copy
  30.130 +	// and also that src and dest do _not_ have the same alignment.
  30.131 +	and src2=0x7,src1				// src offset
  30.132 +	and dst2=0x7,dst1				// dst offset
  30.133 +	;;
  30.134 +	// The basic idea is that we copy byte-by-byte at the head so
  30.135 +	// that we can reach 8-byte alignment for both src1 and dst1.
  30.136 +	// Then copy the body using software pipelined 8-byte copy,
  30.137 +	// shifting the two back-to-back words right and left, then copy
  30.138 +	// the tail by copying byte-by-byte.
  30.139 +	//
  30.140 +	// Fault handling. If the byte-by-byte at the head fails on the
  30.141 +	// load, then restart and finish the pipleline by copying zeros
  30.142 +	// to the dst1. Then copy zeros for the rest of dst1.
  30.143 +	// If 8-byte software pipeline fails on the load, do the same as
  30.144 +	// failure_in3 does. If the byte-by-byte at the tail fails, it is
  30.145 +	// handled simply by failure_in_pipe1.
  30.146 +	//
  30.147 +	// The case p14 represents the source has more bytes in the
  30.148 +	// the first word (by the shifted part), whereas the p15 needs to
  30.149 +	// copy some bytes from the 2nd word of the source that has the
  30.150 +	// tail of the 1st of the destination.
  30.151 +	//
  30.152 +
  30.153 +	//
  30.154 +	// Optimization. If dst1 is 8-byte aligned (quite common), we don't need
  30.155 +	// to copy the head to dst1, to start 8-byte copy software pipeline.
  30.156 +	// We know src1 is not 8-byte aligned in this case.
  30.157 +	//
  30.158 +	cmp.eq p14,p15=r0,dst2
  30.159 +(p15)	br.cond.spnt 1f
  30.160 +	;;
  30.161 +	sub t1=8,src2
  30.162 +	mov t2=src2
  30.163 +	;;
  30.164 +	shl rshift=t2,3
  30.165 +	sub len1=len,t1					// set len1
  30.166 +	;;
  30.167 +	sub lshift=64,rshift
  30.168 +	;;
  30.169 +	br.cond.spnt .word_copy_user
  30.170 +	;;
  30.171 +1:
  30.172 +	cmp.leu	p14,p15=src2,dst2
  30.173 +	sub t1=dst2,src2
  30.174 +	;;
  30.175 +	.pred.rel "mutex", p14, p15
  30.176 +(p14)	sub word1=8,src2				// (8 - src offset)
  30.177 +(p15)	sub t1=r0,t1					// absolute value
  30.178 +(p15)	sub word1=8,dst2				// (8 - dst offset)
  30.179 +	;;
  30.180 +	// For the case p14, we don't need to copy the shifted part to
  30.181 +	// the 1st word of destination.
  30.182 +	sub t2=8,t1
  30.183 +(p14)	sub word1=word1,t1
  30.184 +	;;
  30.185 +	sub len1=len,word1				// resulting len
  30.186 +(p15)	shl rshift=t1,3					// in bits
  30.187 +(p14)	shl rshift=t2,3
  30.188 +	;;
  30.189 +(p14)	sub len1=len1,t1
  30.190 +	adds cnt=-1,word1
  30.191 +	;;
  30.192 +	sub lshift=64,rshift
  30.193 +	mov ar.ec=PIPE_DEPTH
  30.194 +	mov pr.rot=1<<16	// p16=true all others are false
  30.195 +	mov ar.lc=cnt
  30.196 +	;;
  30.197 +2:
  30.198 +	EX(.failure_in_pipe2,(p16) ld1 val1[0]=[src1],1)
  30.199 +	EX(.failure_out,(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1)
  30.200 +	br.ctop.dptk.few 2b
  30.201 +	;;
  30.202 +	clrrrb
  30.203 +	;;
  30.204 +.word_copy_user:
  30.205 +	cmp.gtu p9,p0=16,len1
  30.206 +(p9)	br.cond.spnt 4f			// if (16 > len1) skip 8-byte copy
  30.207 +	;;
  30.208 +	shr.u cnt=len1,3		// number of 64-bit words
  30.209 +	;;
  30.210 +	adds cnt=-1,cnt
  30.211 +	;;
  30.212 +	.pred.rel "mutex", p14, p15
  30.213 +(p14)	sub src1=src1,t2
  30.214 +(p15)	sub src1=src1,t1
  30.215 +	//
  30.216 +	// Now both src1 and dst1 point to an 8-byte aligned address. And
  30.217 +	// we have more than 8 bytes to copy.
  30.218 +	//
  30.219 +	mov ar.lc=cnt
  30.220 +	mov ar.ec=PIPE_DEPTH
  30.221 +	mov pr.rot=1<<16	// p16=true all others are false
  30.222 +	;;
  30.223 +3:
  30.224 +	//
  30.225 +	// The pipleline consists of 3 stages:
  30.226 +	// 1 (p16):	Load a word from src1
  30.227 +	// 2 (EPI_1):	Shift right pair, saving to tmp
  30.228 +	// 3 (EPI):	Store tmp to dst1
  30.229 +	//
  30.230 +	// To make it simple, use at least 2 (p16) loops to set up val1[n]
  30.231 +	// because we need 2 back-to-back val1[] to get tmp.
  30.232 +	// Note that this implies EPI_2 must be p18 or greater.
  30.233 +	//
  30.234 +
  30.235 +#define EPI_1		p[PIPE_DEPTH-2]
  30.236 +#define SWITCH(pred, shift)	cmp.eq pred,p0=shift,rshift
  30.237 +#define CASE(pred, shift)	\
  30.238 +	(pred)	br.cond.spnt .copy_user_bit##shift
  30.239 +#define BODY(rshift)						\
  30.240 +.copy_user_bit##rshift:						\
  30.241 +1:								\
  30.242 +	EX(.failure_out,(EPI) st8 [dst1]=tmp,8);		\
  30.243 +(EPI_1) shrp tmp=val1[PIPE_DEPTH-2],val1[PIPE_DEPTH-1],rshift;	\
  30.244 +	EX(3f,(p16) ld8 val1[1]=[src1],8);			\
  30.245 +(p16)	mov val1[0]=r0;						\
  30.246 +	br.ctop.dptk 1b;					\
  30.247 +	;;							\
  30.248 +	br.cond.sptk.many .diff_align_do_tail;			\
  30.249 +2:								\
  30.250 +(EPI)	st8 [dst1]=tmp,8;					\
  30.251 +(EPI_1)	shrp tmp=val1[PIPE_DEPTH-2],val1[PIPE_DEPTH-1],rshift;	\
  30.252 +3:								\
  30.253 +(p16)	mov val1[1]=r0;						\
  30.254 +(p16)	mov val1[0]=r0;						\
  30.255 +	br.ctop.dptk 2b;					\
  30.256 +	;;							\
  30.257 +	br.cond.sptk.many .failure_in2
  30.258 +
  30.259 +	//
  30.260 +	// Since the instruction 'shrp' requires a fixed 128-bit value
  30.261 +	// specifying the bits to shift, we need to provide 7 cases
  30.262 +	// below.
  30.263 +	//
  30.264 +	SWITCH(p6, 8)
  30.265 +	SWITCH(p7, 16)
  30.266 +	SWITCH(p8, 24)
  30.267 +	SWITCH(p9, 32)
  30.268 +	SWITCH(p10, 40)
  30.269 +	SWITCH(p11, 48)
  30.270 +	SWITCH(p12, 56)
  30.271 +	;;
  30.272 +	CASE(p6, 8)
  30.273 +	CASE(p7, 16)
  30.274 +	CASE(p8, 24)
  30.275 +	CASE(p9, 32)
  30.276 +	CASE(p10, 40)
  30.277 +	CASE(p11, 48)
  30.278 +	CASE(p12, 56)
  30.279 +	;;
  30.280 +	BODY(8)
  30.281 +	BODY(16)
  30.282 +	BODY(24)
  30.283 +	BODY(32)
  30.284 +	BODY(40)
  30.285 +	BODY(48)
  30.286 +	BODY(56)
  30.287 +	;;
  30.288 +.diff_align_do_tail:
  30.289 +	.pred.rel "mutex", p14, p15
  30.290 +(p14)	sub src1=src1,t1
  30.291 +(p14)	adds dst1=-8,dst1
  30.292 +(p15)	sub dst1=dst1,t1
  30.293 +	;;
  30.294 +4:
  30.295 +	// Tail correction.
  30.296 +	//
  30.297 +	// The problem with this piplelined loop is that the last word is not
  30.298 +	// loaded and thus parf of the last word written is not correct.
  30.299 +	// To fix that, we simply copy the tail byte by byte.
  30.300 +
  30.301 +	sub len1=endsrc,src1,1
  30.302 +	clrrrb
  30.303 +	;;
  30.304 +	mov ar.ec=PIPE_DEPTH
  30.305 +	mov pr.rot=1<<16	// p16=true all others are false
  30.306 +	mov ar.lc=len1
  30.307 +	;;
  30.308 +5:
  30.309 +	EX(.failure_in_pipe1,(p16) ld1 val1[0]=[src1],1)
  30.310 +	EX(.failure_out,(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1)
  30.311 +	br.ctop.dptk.few 5b
  30.312 +	;;
  30.313 +	mov ar.lc=saved_lc
  30.314 +	mov pr=saved_pr,0xffffffffffff0000
  30.315 +	mov ar.pfs=saved_pfs
  30.316 +	br.ret.sptk.many rp
  30.317 +
  30.318 +	//
  30.319 +	// Beginning of long mempcy (i.e. > 16 bytes)
  30.320 +	//
  30.321 +.long_copy_user:
  30.322 +	tbit.nz p6,p7=src1,0	// odd alignment
  30.323 +	and tmp=7,tmp
  30.324 +	;;
  30.325 +	cmp.eq p10,p8=r0,tmp
  30.326 +	mov len1=len		// copy because of rotation
  30.327 +(p8)	br.cond.dpnt .diff_align_copy_user
  30.328 +	;;
  30.329 +	// At this point we know we have more than 16 bytes to copy
  30.330 +	// and also that both src and dest have the same alignment
  30.331 +	// which may not be the one we want. So for now we must move
  30.332 +	// forward slowly until we reach 16byte alignment: no need to
  30.333 +	// worry about reaching the end of buffer.
  30.334 +	//
  30.335 +	EX(.failure_in1,(p6) ld1 val1[0]=[src1],1)	// 1-byte aligned
  30.336 +(p6)	adds len1=-1,len1;;
  30.337 +	tbit.nz p7,p0=src1,1
  30.338 +	;;
  30.339 +	EX(.failure_in1,(p7) ld2 val1[1]=[src1],2)	// 2-byte aligned
  30.340 +(p7)	adds len1=-2,len1;;
  30.341 +	tbit.nz p8,p0=src1,2
  30.342 +	;;
  30.343 +	//
  30.344 +	// Stop bit not required after ld4 because if we fail on ld4
  30.345 +	// we have never executed the ld1, therefore st1 is not executed.
  30.346 +	//
  30.347 +	EX(.failure_in1,(p8) ld4 val2[0]=[src1],4)	// 4-byte aligned
  30.348 +	;;
  30.349 +	EX(.failure_out,(p6) st1 [dst1]=val1[0],1)
  30.350 +	tbit.nz p9,p0=src1,3
  30.351 +	;;
  30.352 +	//
  30.353 +	// Stop bit not required after ld8 because if we fail on ld8
  30.354 +	// we have never executed the ld2, therefore st2 is not executed.
  30.355 +	//
  30.356 +	EX(.failure_in1,(p9) ld8 val2[1]=[src1],8)	// 8-byte aligned
  30.357 +	EX(.failure_out,(p7) st2 [dst1]=val1[1],2)
  30.358 +(p8)	adds len1=-4,len1
  30.359 +	;;
  30.360 +	EX(.failure_out, (p8) st4 [dst1]=val2[0],4)
  30.361 +(p9)	adds len1=-8,len1;;
  30.362 +	shr.u cnt=len1,4		// number of 128-bit (2x64bit) words
  30.363 +	;;
  30.364 +	EX(.failure_out, (p9) st8 [dst1]=val2[1],8)
  30.365 +	tbit.nz p6,p0=len1,3
  30.366 +	cmp.eq p7,p0=r0,cnt
  30.367 +	adds tmp=-1,cnt			// br.ctop is repeat/until
  30.368 +(p7)	br.cond.dpnt .dotail		// we have less than 16 bytes left
  30.369 +	;;
  30.370 +	adds src2=8,src1
  30.371 +	adds dst2=8,dst1
  30.372 +	mov ar.lc=tmp
  30.373 +	;;
  30.374 +	//
  30.375 +	// 16bytes/iteration
  30.376 +	//